tensor([ -9.5982,  -6.7168,  -8.4494, -12.2851,  -6.1446,  -7.5963,  -9.4666,
         -7.3661,  -8.4889, -12.2915,  -6.2534,  -6.9247,  -7.2414,  -7.9814,
         -7.6375,  -9.3655,  -9.3500,  -7.2926,  -8.1894,  -9.3901],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.4015, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.1839,  -6.9198,  -6.6942,  -6.7191,  -7.3350,  -7.9744,  -9.0513,
         -8.5566, -12.3829,  -5.4620,  -7.8333,  -9.6143,  -8.0100,  -8.2738,
        -12.4180,  -6.5841,  -6.8444,  -8.1199,  -7.1302,  -7.1837],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.9645, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.8876, -11.3783,  -6.1855,  -6.3989,  -8.4240,  -9.2373,  -7.4831,
         -8.4789, -11.3468,  -6.0682,  -7.0385, -11.1374,  -8.3564,  -7.7946,
        -11.8992,  -5.9173,  -7.5338,  -9.9515,  -8.7550,  -6.9915],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.4132, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.2741,  -7.0882, -10.0454,  -8.7620,  -7.9890,  -9.1120,  -6.5625,
         -6.8147,  -7.7829,  -8.8203,  -6.8206,  -8.6278, -10.4559,  -5.6873,
         -7.8341,  -8.7555,  -8.4757,  -8.0076, -12.2150,  -5.6929],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.0912, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.0182,  -8.4774, -11.6910,  -6.1920,  -6.7215,  -7.5901,  -7.6737,
         -7.0334,  -8.0551, -10.7058,  -6.2313,  -7.2810,  -8.9634,  -8.1467,
         -8.2370, -11.1047,  -6.5995,  -6.4438,  -6.1808,  -6.7091],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.8528, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-12.3777,  -6.1094,  -8.0851, -10.3840,  -7.3097,  -8.6163,  -8.8149,
         -6.3814,  -6.2482,  -6.4856,  -7.1221,  -7.5227,  -8.3932,  -7.9179,
         -8.0346, -12.4472,  -5.9983,  -7.1457,  -9.4769,  -8.9990],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.1935, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.1046, -12.0168,  -5.6881,  -6.6939,  -9.4854,  -9.4777,  -7.5348,
         -8.4338, -12.1570,  -6.3748,  -7.0540,  -7.8334,  -9.4877,  -7.7454,
        -12.2830,  -6.1841,  -7.9232,  -9.6302,  -6.9595,  -9.4785],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.5273, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.6907, -10.0456,  -7.7839,  -9.0203, -12.2448,  -7.1650,  -8.2434,
         -9.9196,  -6.2366,  -8.4869, -12.1417,  -6.4584,  -6.9543,  -8.3779,
        -10.3412,  -7.4362,  -8.0393, -11.9422,  -5.9528,  -8.0098],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.6245, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.5611,  -6.5598,  -5.8394,  -9.9189,  -8.6672,  -9.1182, -14.2180,
         -6.4637,  -7.3762,  -9.3870,  -7.6800,  -9.4367,  -8.6762,  -6.4790,
         -8.7592,  -8.3388,  -6.9503,  -7.2821,  -6.9149,  -7.8902],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.1258, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.0677,  -8.9030,  -9.3033,  -7.0585,  -8.3416,  -7.8711,  -6.8181,
         -9.6191,  -9.2163,  -5.8630,  -6.1535,  -5.9147,  -6.3249,  -7.2826,
         -8.5006,  -8.8770,  -8.2119, -11.5685,  -5.7513,  -7.7249],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.8186, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.4012,  -7.7239,  -8.9320,  -9.7360,  -5.7705,  -7.1803,  -9.8022,
         -8.8567,  -7.9764, -11.7562,  -5.7900,  -7.8713,  -8.8672,  -7.3410,
         -7.2071,  -7.9940, -10.7679,  -6.2672,  -7.9597, -10.2241],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.3213, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.7066,  -9.0718, -12.6599,  -6.7145,  -7.4141,  -7.6863,  -8.9407,
         -8.2325, -10.9255,  -6.2481,  -8.8289,  -8.0956,  -6.7937,  -9.6835,
         -9.6060,  -6.0509,  -7.1014,  -8.1491,  -8.2171,  -7.9724],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.3549, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.9841,  -5.6091,  -6.5772,  -7.4064,  -7.3158,  -7.7070,  -7.8584,
        -11.3597,  -6.5510,  -7.1580,  -8.1202,  -9.6598,  -7.6708,  -8.4389,
        -12.0560,  -5.4592,  -6.9224,  -6.8542,  -5.1108,  -8.8132],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.8816, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.1572,  -9.2322,  -6.6957,  -8.8991,  -9.8031,  -6.6357,  -6.4414,
         -9.1426,  -7.9632,  -7.8525,  -9.3670, -10.5294,  -5.9269,  -6.9109,
         -8.8347,  -8.0124,  -8.1415, -11.4368,  -6.6295,  -6.9372],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.1775, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-12.7160,  -6.8220,  -8.2579,  -9.5525,  -8.1836,  -8.4228, -11.6311,
         -5.9486,  -7.4816,  -8.6907,  -9.1293,  -6.5683,  -8.7305, -12.0056,
         -5.8572,  -8.5443,  -9.4298,  -8.8227,  -8.1183, -12.4264],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.8670, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.8977,  -6.5816,  -7.6046,  -8.7985,  -8.9524,  -9.6291,  -6.5900,
         -7.5971,  -8.2916,  -7.2247,  -9.3297, -12.9320,  -6.1597,  -7.7736,
         -9.7917,  -9.1788,  -7.6345,  -8.2166, -11.7474,  -5.6244],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.3278, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.4194,  -7.4993,  -8.9392,  -7.1468,  -8.5921, -11.9334,  -6.0396,
         -7.7421,  -9.4381,  -7.0096,  -9.4761,  -8.8345,  -6.5237,  -6.9775,
         -6.8258,  -6.3059,  -7.8786,  -8.5218,  -7.9158,  -8.5630],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.9791, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.5488,  -9.0977,  -5.4901,  -8.4468,  -9.1536,  -7.2179,  -9.3611,
         -8.9501,  -6.4203,  -7.0035,  -7.6329,  -8.3921,  -7.3962,  -8.4351,
        -10.7480,  -5.9927,  -7.0714,  -9.1295,  -8.5710,  -7.0579],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.0058, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.1389,  -7.3581,  -8.8351,  -8.6747,  -7.6991,  -8.4009, -11.0870,
         -6.2060,  -6.7519,  -9.3253,  -9.6809,  -6.7409,  -8.1354,  -9.5442,
         -7.3398,  -8.7234,  -9.6308,  -7.1826,  -8.8230, -11.9281],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.4103, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.0551,  -6.9344,  -8.5220, -11.8698,  -5.7916,  -8.9717,  -8.6673,
         -6.6701,  -9.3411,  -9.0121,  -7.2184,  -7.0247,  -8.7182,  -7.5134,
         -7.1513,  -7.7271, -10.3522,  -6.7230,  -6.6136,  -7.9246],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.0901, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.0109,  -8.1366,  -9.0712,  -7.1722,  -9.9064,  -9.5690,  -5.8782,
         -7.0970,  -5.9454,  -6.1561,  -6.4601,  -8.3506,  -7.0319,  -8.7877,
         -7.7527,  -8.0341,  -7.8585, -10.9851,  -7.6159,  -8.7807],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.8300, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.4172,  -6.1330,  -6.5836,  -7.0696,  -6.8641,  -8.6406,  -7.8422,
         -8.4418, -12.8158,  -5.8948,  -7.5141, -10.1888,  -7.6037,  -9.7244,
         -8.9357,  -6.6787,  -7.4175, -10.0850,  -7.7094,  -7.2549],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.9908, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.0303,  -6.0931,  -7.3913,  -8.6760,  -8.5895,  -8.2900,  -9.1343,
        -10.3527,  -5.4382,  -6.9197,  -9.4959,  -6.7414,  -9.3475,  -9.2485,
         -6.8185,  -6.1853,  -8.7531,  -9.3833,  -8.1105,  -8.5994],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.2299, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.6347,  -7.2754,  -8.0337, -12.9951,  -6.1620,  -7.9211,  -8.8047,
         -7.0924,  -9.0842,  -8.8213,  -6.3242,  -6.2092,  -6.8659,  -7.0527,
         -6.9217,  -8.0621,  -7.4277,  -7.2890, -10.3951,  -6.2917],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.8832, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.2210,  -8.0268,  -9.3848,  -7.8279,  -7.3881,  -8.5332, -12.3259,
         -5.7748,  -7.8253,  -7.8540,  -8.0703,  -8.4188, -11.8286,  -6.1040,
         -8.3016,  -8.4655,  -8.7870,  -7.9000, -10.4366,  -5.2309],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.2352, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.3535,  -5.4919,  -9.2198,  -7.2284,  -8.4814,  -8.8797,  -7.5014,
         -6.4880,  -9.7801,  -9.2650,  -7.7791,  -8.1564, -10.8222,  -6.6791,
         -6.2777,  -7.5471,  -9.2952,  -7.4492,  -8.1286, -10.7959],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.2810, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.3321, -11.0037,  -6.2606,  -7.6215,  -8.8388,  -9.5922,  -7.1757,
         -8.9891,  -9.1816,  -6.4348,  -6.6359,  -9.5504,  -9.2580,  -7.3891,
         -9.2386, -10.4134,  -6.1219,  -8.0092,  -9.0768,  -8.6046],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.3864, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.6157,  -8.1273, -11.4012,  -6.6300,  -7.2742,  -8.7473,  -7.4996,
         -6.7114,  -8.2762,  -8.2825,  -7.5623,  -8.8792, -12.8541,  -6.4127,
         -6.7777,  -7.6846,  -8.5220,  -7.6735,  -9.9963,  -5.5439],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.0736, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.3312,  -8.2058, -11.5269,  -5.9631,  -7.4136,  -8.8598,  -7.9460,
         -8.1187,  -8.2812, -10.9630,  -6.0343,  -9.5768,  -8.6523,  -6.3909,
         -8.9428,  -9.3139,  -6.8444,  -6.8943,  -9.3866,  -8.9026],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.2774, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.5642,  -7.4422,  -7.6249, -11.0161,  -6.3355,  -7.1639,  -9.2001,
         -9.2067,  -7.3366,  -8.5478, -12.1360,  -6.4527,  -7.6338,  -8.8832,
         -7.9422,  -7.8646, -10.8375,  -6.2325,  -8.2427,  -9.3015],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.3482, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.7134,  -9.1635,  -9.2775,  -8.8487, -10.3887,  -6.8619,  -7.4419,
         -9.0379,  -7.0940,  -8.1073, -10.3862,  -6.8229,  -7.6390,  -9.9956,
         -7.2975,  -8.0998, -11.8204,  -6.3983,  -6.5192,  -7.1381],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.2526, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.0926, -12.2454,  -6.1550,  -6.5475,  -7.7293,  -8.8787,  -8.5456,
        -10.2986,  -6.8907,  -6.9528,  -7.1141,  -8.7795,  -7.3966,  -8.8429,
        -10.5639,  -5.6005,  -6.9857,  -9.2164,  -9.2978,  -6.5959],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.1365, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.8928,  -8.1107,  -7.6663, -11.6016,  -6.4275,  -6.8668,  -8.8849,
         -9.6132,  -7.4070,  -8.8479, -10.2577,  -6.4455,  -7.4586,  -9.9700,
         -7.5071,  -7.8994, -12.3305,  -6.4709,  -7.1760,  -8.4448],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.4639, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.2470,  -7.4464,  -7.3543,  -8.1093, -12.7229,  -6.2979,  -6.5530,
         -8.7309,  -8.4576,  -7.4254,  -9.1403, -10.0215,  -6.6983,  -7.4497,
         -8.6424,  -8.5775,  -7.4692,  -8.4596, -11.9151,  -7.2374],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.3478, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.2455, -10.1742,  -5.7569,  -8.0235,  -8.0939,  -8.3061,  -8.6333,
        -10.9124,  -6.3430,  -6.0471,  -6.0008,  -7.3957,  -7.0367,  -7.6391,
         -7.4350,  -8.2311, -12.0973,  -6.0897,  -7.9475,  -9.0746],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.9742, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.5251,  -8.3459, -13.6254,  -5.6491,  -7.9942,  -9.5594,  -8.8799,
         -8.9353, -12.5095,  -6.8340,  -7.0972,  -6.2547,  -7.2487,  -7.7930,
         -8.2725,  -7.9829,  -8.5022, -12.6097,  -6.2636,  -8.9160],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.5399, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4223, -8.3224, -8.9843, -8.7690, -8.7574, -6.2600, -6.0562, -6.7879,
        -8.0254, -5.8266, -9.5769, -8.1741, -8.9999, -6.8146, -6.2990, -5.9744,
        -5.7079, -6.5439, -7.7818, -6.9101], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3497, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.4397,  -7.1124,  -5.8704,  -7.3717,  -6.6147,  -8.0515,  -8.1192,
         -8.5178, -11.2668,  -9.1603,  -7.6200,  -8.2684,  -9.5196,  -8.8902,
         -8.0352, -11.4107,  -5.6502,  -7.6090,  -9.9807,  -8.2532],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.1881, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.3895, -12.7562,  -5.0257,  -8.6528,  -8.6486,  -7.0017,  -9.4462,
         -9.7887,  -7.2746,  -6.6349,  -8.3435,  -6.6342,  -7.9229,  -7.5119,
         -7.0832,  -8.1845, -10.8858,  -6.7820,  -7.8527,  -9.3530],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.2086, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.6966,  -7.1408,  -7.1958,  -7.1498,  -7.6664,  -9.0577,  -9.7383,
         -6.7278,  -7.9750,  -8.9474,  -7.6051,  -7.7717, -11.3261,  -5.4508,
         -8.4864,  -9.1969,  -7.5595,  -9.0237, -12.7068,  -5.9990],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.1711, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.9259,  -9.5516,  -8.9380,  -5.9253,  -6.6353,  -6.1145,  -6.2965,
         -7.3330,  -6.5292,  -8.0637,  -7.2848,  -8.5227, -11.3229,  -8.7448,
         -6.5279,  -7.3823,  -7.2128,  -7.9054,  -7.2276,  -8.1544],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.6299, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.4157,  -5.8169,  -7.9227,  -9.5943,  -8.2120,  -8.8050, -12.6796,
         -5.9635,  -7.4679,  -9.0583,  -9.4917,  -7.8428,  -8.5682, -13.2523,
         -6.8502,  -6.4411,  -7.9595,  -8.1038,  -5.8941,  -7.8631],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.4601, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.5076,  -9.3689,  -8.6942,  -6.3439,  -6.9547,  -6.8641,  -6.2840,
         -7.4291,  -7.3518,  -8.9234,  -7.5468,  -7.4928,  -9.2607,  -8.9627,
         -5.6676,  -8.0875,  -9.8313,  -8.4835,  -7.8923, -11.7543],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.0351, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.1737,  -8.1330, -11.0362,  -5.4000,  -7.2688,  -9.9347,  -6.5457,
         -8.2861, -11.1672,  -6.2976,  -7.3845,  -8.0329,  -9.6423,  -8.2997,
        -11.2397,  -6.5004,  -7.4382,  -8.0285,  -7.2753,  -7.2545],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.1169, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.8661, -5.9525, -8.4287, -9.4716, -7.5145, -8.5168, -7.5324, -6.7241,
        -5.9080, -7.1583, -6.4005, -7.4647, -6.8731, -7.2475, -8.7320, -8.8597,
        -5.9925, -6.5973, -7.9516, -7.2277], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4710, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.0941,  -7.5386,  -8.1855, -10.2152,  -6.2524,  -6.9760,  -6.7900,
         -7.2681,  -8.0656,  -8.8917, -10.1830,  -6.2911,  -7.0724,  -7.9461,
         -7.6062,  -8.1895,  -9.9711,  -5.9874,  -7.0877,  -6.6338],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.7123, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.2959,  -7.6413,  -7.2251,  -8.1509,  -8.7567, -10.1287,  -6.1162,
         -7.4107,  -8.8257,  -7.0959,  -8.8057, -11.5944,  -6.8943,  -6.3767,
         -9.9402,  -8.0265,  -8.4032, -10.0428,  -5.7414,  -6.8962],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.1184, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.6061,  -8.1991,  -9.0945,  -9.9738,  -6.8061,  -6.3476,  -7.0294,
         -7.6034,  -7.3302,  -8.1082, -10.2273,  -6.4984,  -7.0021, -10.7239,
         -7.3976,  -7.7130,  -8.5648, -11.4441,  -5.5551,  -7.8479],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.1036, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-12.4533,  -6.7303,  -6.8383,  -9.0167,  -8.5260,  -8.6321, -13.6580,
         -6.1332,  -6.6900,  -7.5368,  -6.6931,  -8.3059,  -8.8147,  -7.8949,
         -8.3773,  -9.5540,  -5.7348,  -8.2324,  -8.7100,  -7.0407],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.2786, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.6450,  -9.0627,  -6.5677,  -7.2315,  -6.1427,  -5.5468,  -6.3297,
         -8.1591,  -7.0966,  -8.0426,  -7.5818,  -8.6149,  -8.7007,  -6.1189,
         -7.2478,  -9.6907,  -7.7471,  -7.4197,  -8.0897, -10.2425],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.7639, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.8385, -11.9477,  -6.4688,  -7.6768, -10.2209,  -9.7252,  -7.6122,
         -8.3725, -11.4541,  -6.0830,  -8.1328,  -8.7565,  -9.4322,  -7.1811,
         -8.5641, -11.7379,  -6.5939,  -8.8689,  -6.9613,  -8.5490],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.6589, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.9209,  -7.4448,  -8.1796, -10.7934,  -6.1547,  -8.2202,  -9.2312,
         -8.9366,  -7.9234, -12.6084,  -6.3391,  -8.2507,  -9.5589,  -7.7106,
         -7.1890,  -8.3749, -10.4387,  -6.3533,  -7.1888,  -8.4000],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.4609, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.3832,  -6.2632,  -6.6749,  -7.2195,  -8.8921,  -7.6955,  -8.3758,
        -11.0774,  -6.5159,  -7.2306,  -9.0939,  -8.9541,  -7.3044,  -8.0584,
        -10.3997,  -5.5903,  -6.7577,  -9.7154,  -7.1252,  -9.4805],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.1904, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.0269,  -8.6885, -13.4878,  -5.4314,  -8.9924,  -9.3215,  -7.2176,
         -9.7070,  -9.7280,  -6.3083,  -6.3900,  -6.6093,  -5.7424,  -6.4638,
         -6.3299,  -7.2023,  -8.8388,  -9.0464,  -8.2951,  -7.1606],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.9494, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.7510,  -6.5198,  -6.5236,  -7.5432,  -7.8542,  -7.9507,  -9.6613,
        -10.0793,  -6.2709,  -8.3524,  -8.9346,  -8.4125,  -8.5993, -13.2711,
         -6.7657,  -7.0331,  -6.2209,  -6.3383,  -8.4044,  -8.9924],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.1739, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.5974,  -6.6438,  -9.3306,  -7.0450,  -8.5222, -13.5750,  -6.8859,
         -8.5613,  -7.4221,  -8.1520,  -8.1452,  -7.0369,  -6.2280,  -8.8464,
         -7.7439,  -7.2488,  -8.1684, -10.3394,  -5.6898,  -7.6228],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.9902, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.5430,  -7.5313,  -8.2455, -10.1742,  -5.7569,  -8.0235,  -8.0939,
         -8.3061,  -8.6333, -10.9124,  -6.3430,  -6.0471,  -6.0008,  -7.3957,
         -7.0367,  -7.6391,  -7.4350,  -8.2311, -12.0973,  -6.0897],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.8768, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.4855,  -9.2567,  -8.4728, -11.8263,  -6.9380,  -8.0211,  -8.2014,
         -7.0667,  -7.7812,  -9.1952,  -9.6790,  -6.0827,  -8.2997,  -7.7399,
         -7.9924,  -8.9963, -12.4622,  -5.7489,  -7.3004,  -9.0977],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.4822, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.9730,  -8.4305,  -7.3052,  -7.9912, -10.8026,  -5.8510,  -6.7496,
         -7.3681,  -8.6592,  -7.2100,  -9.0583,  -9.4950,  -7.0690,  -6.9668,
         -9.0201,  -8.6806,  -8.2931, -11.2477,  -6.4271,  -6.8380],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.0718, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.4328,  -8.9007,  -7.8911,  -8.8934,  -7.9027,  -9.3256,  -6.5834,
         -7.9514,  -8.1735,  -7.4112,  -8.2542, -10.1145,  -6.6170,  -7.3040,
         -8.2444,  -8.8908,  -8.5779, -12.4633,  -6.0457,  -7.3107],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.2644, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.2620,  -7.1493,  -9.2864, -12.4071,  -6.4785,  -7.2827,  -7.7433,
         -7.7910,  -8.3948, -11.8414,  -6.2036,  -6.7180,  -9.1639,  -8.2173,
         -8.2204,  -9.1303,  -9.5344,  -6.0082,  -8.0750,  -9.8113],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.3859, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.2733,  -8.3456, -10.8850,  -6.0442,  -7.1892,  -9.5567,  -8.9028,
         -7.8071, -10.7055,  -6.1768,  -7.3909,  -9.4174,  -9.7038,  -7.4080,
         -8.5043, -12.8185,  -5.7562,  -7.9313,  -9.4737,  -9.8736],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.5082, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.2006,  -8.2037,  -7.7442,  -8.2975, -10.6174,  -8.9269,  -6.7652,
         -7.2187,  -7.6761,  -7.1152,  -6.3903,  -8.8321,  -7.5559,  -8.3888,
        -13.6069,  -5.5824,  -7.9376,  -7.6780,  -7.9756,  -8.8722],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.0793, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.3626,  -9.2921,  -7.1581,  -8.8817, -11.4235,  -6.0217,  -7.1782,
         -7.7125,  -8.2538,  -8.0365, -11.0615,  -5.4859,  -7.2532, -10.0557,
         -9.1011,  -8.8644, -13.4864,  -5.9995,  -7.2593,  -8.3876],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.4638, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.6639, -10.3956,  -8.8356,  -7.5796,  -8.4085, -10.5305,  -6.3111,
         -7.8117,  -7.4374,  -8.7001,  -8.6109, -12.4852,  -6.1888,  -6.5931,
         -6.8420,  -8.4907,  -8.3638,  -7.7266,  -8.8413, -13.4168],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.5117, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.3954,  -8.6368, -10.4292,  -5.6278,  -8.6774,  -9.5270,  -7.0728,
         -9.2751,  -9.3697,  -5.9961,  -6.8176,  -8.1868,  -9.2806,  -7.0824,
         -8.5687, -13.4038,  -6.6296,  -6.9672,  -9.8735,  -8.4281],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.3623, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.7072,  -8.6634,  -5.8806,  -8.2422,  -9.0686,  -6.8431,  -8.7567,
        -11.6094,  -6.7405,  -6.5644,  -8.0184,  -8.0860,  -8.5742, -10.2522,
         -6.2459,  -8.1268,  -9.3408,  -7.2799,  -9.5087,  -8.9389],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.2724, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.1645,  -8.6960,  -7.9547, -12.4904,  -6.3019,  -8.3722,  -7.6955,
         -7.7407,  -8.5521,  -8.8253,  -6.6387,  -6.3460,  -6.1782,  -6.9935,
         -7.5996,  -8.4161,  -7.4197,  -7.9387, -12.3336,  -5.6678],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.0663, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.3870,  -9.0413,  -6.9766,  -9.4445,  -8.3157,  -6.0587,  -7.9049,
         -7.8857,  -8.0147,  -8.1007, -10.3251,  -6.2890,  -7.3622,  -8.5701,
         -7.8284,  -8.1148, -11.0767,  -6.2550,  -6.6065,  -9.1373],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.0847, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.5576,  -7.8499,  -9.7710,  -8.2523,  -8.2063, -12.1299,  -6.4374,
         -7.5971,  -7.8991,  -8.3750,  -7.1638,  -8.9220, -12.2505,  -5.9236,
         -7.5565,  -8.5780,  -6.9767,  -8.6116, -12.0966,  -6.0478],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.3101, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7241, -6.1559, -7.1899, -6.4312, -8.0358, -6.9508, -8.8349, -8.1575,
        -5.4075, -8.3849, -5.9803, -6.1912, -6.7533, -7.3354, -7.4182, -8.6934,
        -7.8518, -5.7451, -7.8444, -8.5401], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1813, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.4471,  -5.8448,  -7.5404,  -7.9436,  -8.2561,  -7.9210, -12.1930,
         -6.1611,  -8.0321,  -9.4870,  -9.0746,  -7.9785, -12.1955,  -6.1883,
         -6.8863,  -7.3567,  -7.7195,  -7.1699,  -9.2570,  -9.8262],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.3739, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.5882, -12.8221,  -5.6567,  -6.8600,  -8.3174,  -8.4257,  -7.6257,
         -8.8505, -11.9824,  -6.6282,  -7.0216,  -7.4127,  -9.0915,  -7.6515,
         -8.5952, -12.2801,  -5.3923,  -8.1382,  -7.2511,  -8.9049],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.3748, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.8220,  -8.8669, -10.2234,  -5.9041,  -8.3672,  -9.1829,  -7.6210,
         -9.6151,  -9.2475,  -5.8126,  -7.0925,  -9.7552,  -7.3653,  -7.7223,
         -9.7444,  -6.1624,  -7.5933,  -8.2785,  -8.8974,  -8.3438],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.1309, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.4175,  -6.9352,  -8.4371, -11.7685,  -5.7625,  -6.6945,  -6.5456,
         -6.7946,  -8.5329,  -6.9605,  -7.0223,  -8.5368, -12.1733,  -5.8796,
         -8.4251,  -9.3227,  -7.4195,  -9.1614, -12.5758,  -5.6645],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.1515, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.6750,  -7.5269,  -8.3779, -11.9665,  -6.3246,  -7.4626,  -7.5166,
        -10.7506,  -7.6071,  -8.0579, -11.7031,  -6.7303,  -7.3576, -10.3399,
         -9.5429,  -6.8274,  -8.7102, -11.5351,  -6.1398,  -7.4230],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.5288, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.3898,  -8.0002,  -8.4996, -12.5809,  -5.9241,  -6.5734,  -6.1022,
         -6.5627,  -7.9186,  -8.6259,  -7.8382,  -8.3770,  -9.2947,  -5.7856,
         -8.1748,  -9.0092,  -7.4504,  -9.7891,  -9.8047,  -6.8661],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.0784, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.7533,  -6.9764,  -7.9116,  -9.3766,  -5.9717,  -7.6883,  -9.2010,
         -7.0028,  -8.2616,  -9.3991,  -6.0085,  -7.8110,  -6.0584,  -6.5135,
         -7.7005,  -6.4287,  -8.3106,  -7.1614,  -8.5158, -12.1320],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.8091, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.9835,  -9.4598,  -9.3754,  -8.5748, -11.6824,  -8.0592,  -7.2583,
         -8.4509,  -6.3570,  -8.3860,  -7.7923,  -8.3111,  -8.2091,  -8.4517,
         -9.1880,  -5.8883,  -6.6950,  -9.3016,  -8.6724,  -7.7008],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.3399, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.7995,  -5.8212,  -6.7287,  -8.3241,  -8.6551,  -7.8166,  -8.7404,
        -11.6093,  -6.1025,  -6.8641,  -9.9807,  -8.5728,  -6.9517,  -8.5046,
        -11.9240,  -6.0047,  -8.2794,  -9.8099,  -7.0394,  -9.3304],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.2930, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.8689,  -7.5437,  -8.5529, -10.9007,  -5.2023,  -7.9083,  -9.2521,
         -9.3369,  -6.5187,  -8.8950, -10.1099,  -6.7694,  -6.8905,  -9.3685,
         -7.3469,  -9.0483, -13.0518,  -5.8385,  -7.0218,  -9.6679],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.4047, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.2008,  -7.6434,  -9.0349, -12.6885,  -6.2972,  -7.0094,  -7.8796,
        -10.3557,  -7.3400,  -8.1429, -10.6171,  -6.2271,  -7.4771,  -8.0169,
         -7.5776,  -8.1843, -11.9744,  -6.4255,  -6.8947,  -9.5844],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.4286, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.6045, -13.5468,  -5.1254,  -8.8189,  -8.5874,  -6.4746,  -8.2291,
         -7.9906, -10.7660,  -6.7771,  -6.8883,  -7.2336,  -5.6944,  -7.7089,
         -6.8006,  -8.5393,  -7.2487,  -8.4199, -11.9371,  -6.6316],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.1011, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.0645,  -8.1313,  -7.5661,  -8.7454, -10.0658,  -5.7499,  -6.9765,
        -10.1307,  -7.8026,  -8.5867, -12.1244,  -6.0501,  -6.9667,  -9.3455,
         -8.3393,  -8.5101, -12.0411,  -6.6656,  -6.8504,  -9.9465],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.4330, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.1443, -11.4058,  -5.8614,  -8.3054, -10.2316,  -7.1823,  -9.3358,
         -8.9673,  -7.2015,  -6.8153,  -8.5773,  -7.1443,  -7.5140,  -8.0565,
         -8.0143,  -8.6171, -12.9605,  -5.8348,  -8.1702,  -9.7313],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.4036, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.8373, -12.8253,  -6.4174,  -6.7979,  -7.6477,  -8.4322,  -7.0625,
         -8.7049, -10.2931,  -6.2302,  -6.5376,  -9.8354,  -9.9618,  -7.3393,
         -8.5005, -10.7890,  -6.6365,  -7.5650,  -7.8828,  -7.8647],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.3081, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.4853,  -9.0868, -12.3549,  -6.0686,  -7.0112,  -9.5965,  -9.6701,
         -7.4401,  -8.1289, -10.8967,  -6.2740,  -7.3904,  -7.7470,  -8.6648,
         -7.9788, -12.7321,  -6.3284,  -7.0717,  -8.4306,  -9.6462],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.5002, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.2663, -13.2173,  -6.4954,  -6.5320,  -6.5547,  -6.9891,  -7.5302,
         -8.7354,  -7.6788,  -8.7759,  -9.2408,  -5.6593,  -8.5107,  -8.9790,
         -7.2004,  -9.4276,  -9.0511,  -6.4561,  -6.3359,  -7.0469],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.9841, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.7726,  -6.9857,  -6.3280,  -6.9084,  -7.6821,  -8.2127,  -8.2607,
         -8.5068, -12.9837,  -5.4649,  -9.2014,  -7.1897,  -6.7930,  -9.4002,
         -9.7603,  -6.2941,  -8.0043,  -6.4950,  -6.8335,  -6.7109],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.7394, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.7677,  -8.3412, -11.7836,  -5.9544,  -8.1824,  -9.2064,  -7.1588,
         -9.7060,  -9.1826,  -6.0509,  -6.6369,  -8.1552,  -7.8971,  -8.0750,
        -10.6517,  -6.0918,  -8.4376,  -8.0704,  -7.8497,  -8.2756],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.1738, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.3050, -11.0193,  -6.2038,  -6.8918,  -9.5783,  -8.0858,  -8.1312,
        -12.0851,  -6.5429,  -6.7795,  -9.4608,  -9.5274,  -7.0077,  -8.2716,
        -10.5645,  -6.1288,  -7.8594,  -8.9624,  -8.1602,  -9.2326],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.4399, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.3738,  -6.8346,  -6.3212,  -6.8229,  -6.6264,  -7.6665,  -7.3660,
         -6.6989,  -9.0364,  -9.4322,  -6.0348,  -7.7330,  -9.5541,  -9.4385,
         -8.9795, -12.3411,  -7.0883,  -8.1722,  -7.8294,  -7.5396],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.1445, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.7889, -11.0413,  -5.6437,  -7.3797, -10.5328, -10.6025,  -7.5612,
         -8.9556, -13.5809,  -6.0897,  -6.4711,  -6.8995,  -7.0181,  -7.8314,
         -9.1623,  -7.9560,  -8.3339, -13.4576,  -6.7926,  -8.5986],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.6349, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.0676,  -9.9044,  -9.2995,  -7.8817,  -8.0910, -12.9317,  -5.9312,
         -7.7812,  -9.5393,  -7.8503,  -6.7755,  -8.7185,  -9.2334,  -6.5643,
         -7.2662,  -8.8581,  -7.6961,  -9.1037, -12.5719,  -5.9432],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.4504, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.3743,  -6.9897,  -6.9420,  -8.0162,  -8.9640,  -8.7597,  -9.2865,
         -8.9941,  -6.0726,  -8.0946,  -9.7866,  -7.5653,  -9.1480, -11.9139,
         -6.2512,  -6.7274,  -9.6411,  -8.4080,  -7.7527,  -9.0007],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.3844, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.0280,  -7.2502,  -7.0779,  -6.8115,  -7.9153,  -8.6250,  -7.2808,
         -6.6911, -10.7887,  -6.6591,  -6.8570,  -9.0585,  -7.5116,  -7.0050,
         -7.9382, -10.4914,  -5.7889,  -6.9713,  -8.9720,  -9.8366],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.7779, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.6025, -10.3762,  -5.6278,  -8.4999,  -7.9608,  -7.9854,  -8.8985,
        -12.8337,  -6.3937,  -6.9507,  -5.7348,  -6.4232,  -7.5166,  -9.0179,
         -7.7949,  -8.5642,  -9.3819,  -5.9694,  -7.8973,  -9.0442],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.0737, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.1880,  -8.1530,  -8.5273,  -9.9853,  -6.4171,  -7.1331,  -9.3865,
         -6.4124,  -8.4917, -12.8079,  -5.8518,  -8.4266,  -8.8656,  -8.6048,
         -7.1214, -12.0260,  -6.1759,  -7.2918,  -8.0162,  -9.3909],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.3137, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.1502, -11.2381,  -6.1883,  -7.1021,  -9.9226,  -8.6522,  -8.5908,
        -13.1279,  -6.3061,  -7.2240,  -8.9658,  -9.1835,  -7.5126,  -8.7513,
        -12.6091,  -5.5185,  -7.8141,  -8.5231,  -7.9255,  -7.8578],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.5582, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.5931,  -8.0070,  -7.5821,  -9.8044,  -6.3874,  -6.8888,  -7.5064,
         -7.6583,  -8.0566, -10.5702,  -6.8611,  -8.9693,  -6.3401,  -6.7502,
         -7.1192,  -8.0362,  -7.0964,  -7.9012, -10.8351,  -6.4909],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.8727, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.0721,  -8.1253,  -8.2189, -12.2355,  -6.0640,  -7.4285,  -9.7270,
         -8.3854,  -8.5833, -12.2821,  -6.6147,  -6.6611,  -8.2029,  -7.5683,
         -7.0446,  -8.6897,  -9.4146,  -7.5064,  -8.1206,  -8.8334],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.4889, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.3102,  -7.0983,  -8.1803, -10.4916,  -5.9163,  -7.5236,  -9.1717,
        -10.4185,  -7.3744,  -8.7117, -12.1814,  -5.7155,  -7.4429,  -6.7715,
         -7.6787,  -8.1380,  -8.8831,  -7.8266,  -8.7090,  -9.1925],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.2868, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.6106,  -8.5734,  -8.0714,  -9.1745,  -9.6990,  -6.4469,  -6.6595,
         -8.1403,  -7.5135,  -8.4731, -12.1876,  -6.3873,  -6.8260,  -8.6275,
         -8.6431,  -8.2066, -10.9492,  -6.5207,  -6.9264,  -8.2487],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.1943, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.8965,  -6.8059, -10.0626,  -7.3037,  -8.4162,  -9.5137,  -6.2698,
         -8.1089,  -7.8303,  -6.3856,  -8.4026,  -9.2932,  -6.0409,  -6.7495,
         -7.1789,  -8.4131,  -7.0348,  -8.3754, -11.5027,  -6.5236],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.8054, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.7793,  -6.7373,  -7.3821,  -9.2062,  -7.2018,  -8.4048, -11.4824,
         -5.3515,  -9.4586,  -8.9087,  -8.7904,  -8.3037,  -8.3180, -12.7168,
         -7.1962,  -6.3946,  -7.7139,  -5.6366,  -9.1922,  -7.5372],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.0856, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.2268,  -6.0076,  -5.6806,  -7.3554,  -7.2869,  -8.7369,  -7.8513,
         -8.7453,  -8.7495,  -6.3410,  -8.1036,  -7.1316,  -7.8746,  -8.1679,
        -12.2597,  -5.9055,  -7.4902,  -8.1160,  -8.7323,  -8.0297],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.8396, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.8487, -12.3283,  -5.5733,  -6.4488,  -7.7300, -10.0116,  -6.5456,
         -8.5917, -12.2052,  -6.3657,  -6.3283,  -6.0438,  -6.6802,  -7.8568,
         -8.7575,  -8.7822,  -8.9155,  -9.3997,  -5.6118,  -8.3821],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.0703, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.5742,  -8.4800,  -8.4137,  -7.5373,  -9.0514, -10.3620,  -5.6808,
         -8.0980,  -8.0758,  -6.5951,  -9.0397, -12.6511,  -5.8091,  -7.3692,
         -8.0007,  -7.2765,  -7.2741,  -6.6155,  -9.6640,  -9.8885],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.1228, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.7993,  -8.7782,  -7.5975,  -8.5072, -11.6831,  -5.5561,  -7.8518,
         -8.1835,  -7.7367,  -8.7809, -11.8812,  -6.2267,  -6.8789,  -9.4231,
         -8.7889,  -7.4850,  -8.6428, -10.2635,  -6.1444,  -6.8355],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.2522, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.6364,  -9.0500,  -8.1725, -11.2675,  -6.6394,  -7.1492,  -8.9600,
         -7.9539,  -7.0109,  -8.4844, -10.3443,  -5.1490,  -7.5742,  -9.1302,
         -7.2484,  -9.0108, -12.3515,  -6.6234,  -6.7783,  -6.9706],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.2252, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.5956,  -8.5716,  -7.5144,  -8.5939, -10.0915,  -5.9118,  -8.5408,
         -8.2726, -12.2883,  -6.8103,  -9.5064,  -9.0144,  -6.7439,  -6.5021,
        -10.1197,  -8.2607,  -8.6082,  -9.8049,  -6.4687,  -8.6037],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.4412, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.6253,  -9.6561,  -7.2475,  -8.1539, -11.0018,  -6.1337,  -7.7430,
         -8.1444,  -8.3463,  -8.9100, -12.6062,  -6.3557,  -6.7314,  -6.4344,
         -6.1661,  -8.9948,  -7.8277,  -8.7876, -13.7260,  -6.0076],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.3300, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.4834, -11.6140,  -5.7508,  -7.2441,  -9.8656,  -8.9172,  -8.5314,
        -12.7200,  -6.2199,  -7.4051,  -9.6344,  -9.7130,  -7.5497,  -8.2561,
        -10.9507,  -6.7362,  -6.6160,  -8.4863,  -8.9779,  -7.6982],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.5685, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.8535,  -6.8186,  -8.3275,  -9.8709,  -8.0669,  -8.1246, -11.7996,
         -6.2880,  -7.7773,  -9.1743,  -9.0914,  -9.0087, -10.4422,  -6.6976,
         -6.6626,  -7.7945,  -8.4077,  -7.9642, -11.9779,  -5.9454],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.5547, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.0795,  -8.3287, -12.4782,  -5.6532,  -7.8000,  -9.8898, -10.3802,
         -6.9002,  -8.6232, -12.2743,  -6.2817,  -9.0076,  -8.6232,  -7.3458,
         -8.8234, -11.8992,  -6.9705,  -7.3549,  -8.0910,  -8.2624],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.6534, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.0787,  -8.7270,  -8.2757, -10.6185,  -6.8492,  -7.7344,  -7.9043,
         -8.8211,  -7.2317,  -8.4400, -12.0600,  -6.3348,  -7.0774,  -9.6493,
         -9.0908,  -8.7286,  -9.8826,  -6.4392,  -6.4030,  -9.1497],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.3748, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.3729,  -9.8384,  -8.4028,  -6.7929,  -6.5861,  -6.3105,  -6.5322,
         -7.1768,  -7.7056,  -8.1851,  -8.2375, -12.0598,  -6.8108,  -7.2415,
         -8.4511,  -7.5529,  -6.9544,  -8.2549, -11.4214,  -6.3008],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.9094, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.0339,  -6.7474,  -7.0063,  -6.9534,  -6.9794,  -7.3207,  -8.2039,
         -7.7479,  -8.3191, -10.9261,  -5.9566,  -6.8641,  -8.9666,  -7.4816,
         -7.9745, -11.1650,  -5.4786,  -8.2299,  -9.5065,  -8.0516],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.9957, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.1318,  -9.8061,  -7.9133,  -7.6404, -11.9276,  -6.3237,  -7.2563,
         -8.2516,  -7.8100,  -6.9114, -10.9307,  -6.3796,  -6.3578,  -7.9738,
         -8.6040,  -7.4572,  -8.4032,  -9.5951,  -6.5024,  -7.5597],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.0368, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.8607,  -5.6659,  -7.9437,  -9.4251,  -8.0604,  -8.9158, -12.9186,
         -6.0934,  -6.5972, -10.2342,  -8.0448,  -7.4621,  -8.8297, -10.5574,
         -6.6335,  -6.7355,  -9.0110,  -7.0937,  -8.3167, -12.3332],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.5366, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.0290,  -9.2756,  -9.4056,  -7.4157,  -8.2853, -12.3463,  -6.4614,
         -6.8742,  -8.4257,  -9.4697,  -7.4424,  -8.6672, -12.2206,  -6.0051,
         -8.7600,  -9.6541,  -6.9704,  -9.6911,  -9.6517,  -6.1316],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.5091, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.8093,  -8.1777, -10.9781,  -6.0671,  -6.4694,  -9.2291,  -8.8531,
         -6.5704,  -8.5112, -11.6762,  -5.8893,  -8.3101,  -8.5466,  -7.6373,
         -8.9077, -12.2777,  -6.1549,  -7.3115,  -7.8343,  -8.8663],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.3039, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.1016,  -7.4417,  -9.4522,  -8.6140,  -7.7691, -10.7853,  -6.3243,
         -6.4997,  -8.0507,  -8.6077,  -8.0327, -10.9429,  -7.0371,  -7.1709,
         -8.0958,  -9.2953,  -7.8623,  -7.9361, -11.7994,  -5.4056],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.1612, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.5842, -5.9245, -8.3985, -7.1842, -8.2042, -8.3932, -6.9434, -5.8662,
        -6.2795, -6.7406, -6.9320, -6.6127, -8.2606, -7.1798, -6.8494, -9.1320,
        -7.0629, -5.9950, -7.0494, -8.3220], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2957, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.8946,  -9.3961,  -6.7413,  -7.2773,  -7.1707,  -6.7345,  -6.0159,
         -8.3884, -11.2988,  -5.4898,  -8.4543,  -7.7994,  -8.3686,  -8.3261,
        -13.3743,  -6.7753,  -6.7498,  -7.0318, -11.0434,  -8.1311],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.1731, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.9647,  -5.8589,  -7.6113,  -8.6445,  -9.4231,  -7.2167,  -8.2867,
         -9.8234,  -7.6204,  -7.6297,  -9.9513,  -6.3180,  -7.3788,  -7.2996,
         -9.3516,  -7.1129,  -8.1367, -11.0600,  -6.3521,  -6.8170],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.8929, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-12.1804,  -5.4972,  -8.3402,  -7.1884,  -8.7535,  -7.9843,  -7.8639,
         -7.3386, -10.6880,  -6.9149,  -6.8104,  -8.0952,  -7.8955,  -7.0813,
         -8.4264, -11.9527,  -5.9417,  -6.8944,  -9.0308,  -8.6821],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.1780, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.0639, -11.5834,  -5.6428,  -7.1906,  -8.8496,  -8.6208,  -7.4057,
         -8.7519,  -9.2399,  -7.1083,  -6.9441,  -7.7455,  -8.0990,  -8.0058,
        -11.6177,  -5.4968,  -8.0014,  -8.4540,  -7.5110,  -9.1596],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.1746, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0058, -8.1370, -8.0216, -7.6300, -7.5545, -8.7714, -8.6432, -7.2721,
        -7.7059, -7.7335, -7.7629, -9.2024, -7.7328, -6.6147, -6.6527, -7.1116,
        -7.0185, -7.7140, -6.9491, -7.7504], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.6492, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.4081,  -7.4991,  -7.8354, -10.1088,  -5.5594,  -6.9911,  -8.5250,
         -8.7069,  -6.8348,  -9.0998,  -8.5344,  -6.3046,  -7.0974,  -6.4446,
         -6.6103,  -6.8835,  -8.5225,  -8.4841,  -8.4313,  -8.2627],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.7572, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.9267,  -8.0968,  -8.7434, -10.8854,  -6.1892,  -6.8536,  -7.5751,
         -8.3984,  -7.3023,  -8.4510,  -9.4773,  -6.0990,  -7.1319,  -7.4020,
         -6.7561,  -8.1977, -10.4890,  -5.7860,  -7.3819,  -7.6856],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.8414, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.0868, -10.5174,  -6.1802,  -7.0847,  -7.9095,  -6.6364,  -8.5257,
        -10.9543,  -6.2907,  -6.8466,  -7.8906,  -7.6538,  -7.6796,  -8.7906,
         -9.0899,  -6.1631,  -7.2388,  -8.3374,  -7.8437,  -8.6817],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.9201, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.4604,  -6.4074,  -6.8654,  -8.1184,  -8.2179,  -8.7933,  -8.8982,
         -6.3043,  -7.8472,  -7.4035,  -6.8948,  -9.0741, -11.0218,  -6.2992,
         -7.1357,  -7.8184,  -7.8419,  -7.0441,  -8.2808,  -9.6778],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.8202, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.8225, -7.0060, -6.9854, -6.1479, -7.6647, -7.3089, -8.3374, -7.0896,
        -8.8145, -8.3125, -5.8175, -7.3882, -8.5430, -8.5373, -6.7719, -8.4680,
        -8.5849, -5.9669, -7.2573, -7.1121], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.5968, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.1397,  -8.0416,  -7.5885,  -8.2994, -11.1243,  -5.6485,  -7.4421,
         -8.6340,  -8.9783,  -7.2878,  -8.2951, -10.5160,  -6.5515,  -6.5188,
         -6.9322,  -7.8339,  -8.1881,  -9.6558,  -6.2369,  -7.6353],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.9274, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6062, -5.8138, -7.3597, -8.0875, -8.1471, -7.2382, -8.3586, -8.9642,
        -6.0211, -8.0688, -8.3170, -6.8852, -8.9984, -8.3867, -6.2429, -6.1230,
        -6.6881, -6.8280, -7.4107, -7.2521], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3899, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.2190,  -6.0943,  -6.5781,  -7.7616,  -8.3455,  -8.4648, -10.8682,
         -6.4313,  -6.8550,  -7.7772,  -7.2354,  -8.5315,  -9.1043,  -5.1058,
         -8.1246,  -7.3249,  -8.0147,  -9.2516, -11.3100,  -6.0208],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.9709, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.3014,  -7.1533,  -8.3528,  -7.2383,  -8.8163, -11.2554,  -6.6668,
         -7.7164,  -7.3946,  -8.4941,  -7.0058,  -8.3772,  -9.0050,  -6.4274,
         -7.3394,  -8.1772,  -8.2100,  -7.7655,  -9.8454,  -5.9469],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.8745, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.0079,  -8.9317, -11.1372,  -6.0708,  -6.8671,  -7.9992,  -8.5274,
         -7.2638,  -8.0680,  -9.7402,  -7.1559,  -7.1979,  -8.4594,  -8.7166,
         -7.6166,  -9.3940,  -5.9478,  -6.8114,  -7.8131,  -8.5676],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.9647, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.1413, -10.3324,  -7.2977,  -6.6319,  -7.3276,  -6.1584,  -7.7310,
         -7.1091,  -7.4010, -10.4432,  -6.2943,  -7.3003,  -8.3675,  -8.8073,
         -7.7117,  -7.0607,  -6.6815,  -7.9652,  -7.0628,  -6.8099],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.5817, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.6906, -8.9255, -6.4655, -7.0083, -7.4501, -7.4005, -7.3198, -8.1394,
        -9.5882, -5.5416, -6.9739, -7.1255, -7.9984, -8.1117, -9.1183, -5.5578,
        -7.3721, -8.3439, -8.2890, -8.1851], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.6803, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.1080, -6.1730, -6.9517, -7.1215, -7.7716, -7.3942, -6.9428, -6.2182,
        -6.8245, -6.6941, -7.9376, -7.1299, -8.3413, -7.8644, -5.3741, -7.4921,
        -8.1458, -6.8227, -7.9435, -6.9847], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2118, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.7345,  -8.4584,  -7.9086,  -7.9434,  -9.2849,  -6.5459,  -6.1260,
         -6.3834,  -6.2591,  -8.5157,  -7.3390,  -8.9432, -11.4458,  -5.9220,
         -7.2834,  -7.5893,  -6.6137,  -8.5907,  -9.9286,  -6.4429],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.7129, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.0944,  -7.9542, -10.3950,  -6.6130,  -6.2608,  -6.6767,  -6.1604,
         -8.6429,  -6.9439,  -8.5039, -11.3213,  -5.9633,  -7.6033,  -8.6977,
         -7.2443,  -6.7163,  -8.1506,  -8.7050,  -6.1263,  -7.1959],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.6985, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6732, -7.3103, -8.2798, -8.0229, -8.9168, -8.2112, -6.6325, -6.3102,
        -6.2802, -7.3090, -7.8754, -7.7807, -6.6696, -6.1136, -8.5180, -9.0504,
        -6.1381, -7.7937, -8.0801, -8.1454], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.5056, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.7852,  -8.3643, -10.1903,  -6.4961,  -6.9329,  -6.7042,  -5.9588,
         -8.1087,  -6.8499,  -8.4535, -11.1823,  -6.3974,  -7.2848,  -8.0899,
         -8.1051,  -8.0780, -10.0160,  -7.0287,  -6.7735,  -6.7214],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.7261, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.3923,  -8.2542, -10.2376,  -6.1850,  -6.8239,  -7.2591,  -7.5475,
         -8.5572,  -9.9426,  -7.7450,  -6.9313,  -6.5365,  -6.9013,  -7.7002,
         -8.9366,  -8.8838,  -6.3778,  -6.6019,  -8.1806,  -7.8380],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.7416, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.4563,  -7.2416,  -7.3582,  -8.6265,  -9.3793,  -5.6581,  -6.8370,
         -7.9945,  -6.7737,  -7.6190, -11.2420,  -5.6423,  -6.7409,  -8.3305,
         -7.7982,  -7.7203,  -8.7675,  -9.4856,  -6.6526,  -7.8236],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.7574, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.5229,  -6.7577,  -8.6927, -10.7883,  -6.1760,  -6.9352,  -8.4820,
         -6.7507,  -8.4578,  -9.3685,  -6.4872,  -6.9577,  -8.5311,  -6.8776,
         -7.7567, -10.1976,  -6.2801,  -6.8971,  -7.5391,  -8.0528],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.7754, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.8761,  -7.3044,  -7.5577,  -6.8958,  -7.8565,  -9.9607,  -5.9944,
         -6.6217,  -8.4646,  -8.0688,  -7.6407,  -8.6308, -10.9548,  -6.9766,
         -6.8499,  -6.7200,  -7.9362,  -8.5625,  -6.8636,  -7.9559],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.7346, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.9452,  -7.3963,  -8.2923, -10.3290,  -5.8541,  -7.3499,  -8.6392,
         -7.4872,  -8.3856, -10.8015,  -5.8826,  -6.5898,  -8.4095,  -8.3809,
         -7.9042,  -9.9267,  -6.3969,  -6.3084,  -8.1615,  -7.0899],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.8765, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.3363,  -8.0340,  -7.8549,  -7.8519, -10.2489,  -6.2378,  -6.6032,
         -8.3004,  -8.3883,  -6.8768,  -8.3659,  -9.8646,  -5.5985,  -6.7418,
         -8.6093,  -8.6176,  -6.6207,  -8.3031, -10.3528,  -5.4503],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.8129, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.3096,  -6.8841,  -8.2034,  -8.0724,  -8.9142,  -8.5975,  -6.7183,
         -6.6329,  -7.3998,  -6.4612,  -7.5073,  -7.3126,  -6.9633,  -8.6652,
        -10.5314,  -5.8248,  -7.2025,  -8.6672,  -8.1126,  -7.1581],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.5569, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.2847,  -6.3629,  -8.4264,  -8.2138,  -7.1713,  -8.0091, -10.0136,
         -5.7082,  -7.1756,  -7.1758,  -6.5322,  -8.7972,  -8.3430,  -6.1370,
         -6.9266,  -6.7136,  -6.8509,  -7.4077,  -8.4166,  -7.9843],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.4325, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0791, -7.9367, -9.0817, -8.7634, -5.9523, -7.5226, -7.1077, -7.4572,
        -9.2052, -8.0807, -6.2898, -6.6862, -8.5979, -7.6062, -8.1619, -9.3178,
        -6.4042, -8.2371, -8.3963, -8.7678], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.8326, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.1486,  -8.8569,  -6.9002,  -8.8371, -10.8883,  -6.4225,  -6.6059,
         -7.5046,  -8.3944,  -7.4103,  -8.6183, -10.4964,  -6.1125,  -7.6394,
         -7.9926,  -6.6315,  -8.5043, -10.8478,  -7.2351,  -7.0129],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.0530, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1853, -6.5679, -8.4310, -7.3439, -7.6722, -8.9589, -8.1199, -7.3439,
        -7.6379, -7.1758, -7.4717, -9.0304, -8.0965, -6.9620, -6.5128, -7.1625,
        -8.4227, -6.8014, -7.5993, -8.8675], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.6682, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.1383,  -6.8693,  -8.1549,  -7.2350,  -7.4044,  -8.0453, -10.0487,
         -5.9981,  -7.1353,  -7.1779,  -7.5503,  -8.2870, -10.8345,  -5.9385,
         -7.2243,  -8.7278,  -7.1845,  -7.8602,  -8.7530,  -9.0065],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.8787, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.9695,  -8.7438,  -6.8247,  -8.0998, -10.7635,  -6.0396,  -6.9938,
         -7.3796,  -8.5266,  -7.0492,  -7.7474,  -9.8601,  -6.0845,  -7.2561,
         -8.4077,  -7.7659,  -8.0528,  -9.9895,  -6.0002,  -6.8338],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.8194, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.1046,  -6.4860,  -8.2859, -10.8076,  -6.6556,  -6.5232,  -7.2991,
         -7.4756,  -7.9434,  -9.7266,  -5.6594,  -7.5246,  -8.3123,  -6.2840,
         -8.8586,  -8.1966,  -7.2335,  -6.8507,  -7.7453,  -5.3765],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.5675, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.5703,  -8.4367, -11.1590,  -6.4164,  -7.8470,  -7.0760,  -8.9236,
         -7.9399,  -9.5164,  -6.0551,  -6.9088,  -7.2501,  -7.3668,  -7.8185,
        -10.2141,  -6.5752,  -6.8669,  -8.4475,  -8.5873,  -7.3354],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.9155, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.6523, -10.3651,  -5.7657,  -7.7330,  -9.1695,  -7.7632,  -7.4695,
         -8.4048, -11.1527,  -6.2298,  -6.7518,  -7.1858,  -7.8103,  -8.0475,
         -9.0576,  -9.0960,  -6.1676,  -6.7565,  -8.2881,  -7.8109],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.9339, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.5198,  -8.3967,  -6.5296,  -8.2403, -10.5898,  -6.3595,  -7.2746,
         -7.9507,  -8.5491,  -6.5649,  -9.3065,  -9.3350,  -6.3287,  -6.9464,
         -8.4731,  -7.4484,  -7.2604,  -8.2055, -10.5153,  -6.8868],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.9341, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.2126,  -8.2391, -10.8245,  -6.0109,  -7.5413,  -7.7208,  -7.0046,
         -8.9661,  -8.1372,  -6.5788,  -6.0196,  -7.0386,  -6.1328,  -8.0576,
         -7.0446,  -8.1406, -10.6741,  -5.7895,  -7.2911,  -7.1354],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.5780, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.4377,  -7.1537,  -7.8611,  -9.9150,  -6.3786,  -7.7223,  -7.5242,
         -8.5658,  -8.4399, -10.4578,  -6.0538,  -7.1281,  -8.3979,  -8.2452,
         -7.1238,  -8.1414, -10.1763,  -5.3113,  -7.2468,  -6.9813],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.8631, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.4617,  -7.2250,  -8.9776, -10.9804,  -6.6923,  -7.4343,  -6.8526,
         -6.9065,  -8.0224,  -8.1728,  -7.5141,  -8.4922, -11.2910,  -5.6116,
         -7.9251,  -7.8872,  -6.9301,  -9.1562,  -8.6177,  -6.6846],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.9418, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.1235,  -7.9775, -10.4661,  -5.8240,  -7.3931,  -8.1576,  -7.1525,
         -9.3879,  -8.6650,  -5.9481,  -6.4227,  -8.4921,  -7.8983,  -6.8264,
         -8.4772,  -9.2420,  -5.5466,  -6.5962,  -8.0875,  -7.5116],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.6598, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.9585,  -8.0745,  -8.2988,  -7.7729,  -8.6549,  -8.4058,  -5.8107,
         -7.6953,  -7.4912,  -7.1599,  -8.8055, -10.8531,  -6.5711,  -7.0746,
         -8.2505,  -8.7758,  -7.4161,  -8.5591,  -9.7972,  -7.1062],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.9766, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.8847, -11.0186,  -5.8812,  -7.6210,  -8.1868,  -8.4617,  -8.7834,
        -10.7633,  -6.5517,  -6.4599,  -7.5234,  -7.7402,  -8.1928,  -9.3250,
         -5.7639,  -7.9755,  -7.6572,  -6.8990,  -9.3754,  -8.6722],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.0868, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4113, -7.7418, -8.1095, -8.4599, -8.9914, -7.4769, -7.8119, -7.0394,
        -6.9290, -7.3896, -8.3565, -8.9894, -5.8147, -7.6400, -7.9639, -7.0020,
        -9.5025, -8.7150, -6.6234, -7.4686], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7218, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.2687,  -6.5319,  -6.3155,  -6.9602,  -8.0634,  -7.3012,  -7.9122,
        -10.3504,  -5.6809,  -7.7690,  -7.2910,  -7.2452,  -9.2706,  -8.2922,
         -6.1741,  -6.3430,  -5.9645,  -7.7800,  -8.2571,  -8.3988],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.6085, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.8013,  -7.4136, -10.0290,  -6.6365,  -7.1291,  -7.7565,  -6.7053,
         -6.9895,  -8.5616,  -7.3382,  -8.7250, -11.5096,  -6.0767,  -6.9106,
         -8.7648,  -8.3431,  -8.0546,  -9.9701,  -6.1151,  -7.4277],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.9129, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.5673, -10.3324,  -5.8174,  -6.9822,  -8.2228,  -8.4436,  -7.3353,
         -8.6349, -10.7558,  -5.9436,  -7.1823,  -7.6924,  -6.9307,  -8.8699,
        -10.3158,  -6.8909,  -6.6082,  -8.0953,  -7.8192,  -8.1079],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.9774, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.0748, -10.8556,  -5.9840,  -7.1513,  -8.0965,  -8.3170,  -6.7737,
         -9.4770,  -6.3729,  -6.6748,  -7.3777,  -8.7054,  -7.3528,  -7.8755,
        -10.0052,  -6.4983,  -7.0063,  -7.7379,  -8.1123,  -8.3992],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.8924, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.1792,  -7.5626,  -8.4225,  -7.3015,  -8.4903,  -7.4711,  -7.2319,
         -6.8139,  -8.8069,  -6.9984,  -7.2012, -10.1819,  -5.8077,  -6.7411,
         -8.4781,  -8.6750,  -7.2131,  -8.2495, -10.1329,  -6.7154],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.7337, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.5437,  -7.5735,  -8.9402,  -8.0051,  -5.6818,  -6.6767,  -7.1444,
         -7.0235,  -7.9862, -10.0040,  -6.4002,  -6.2633,  -6.1114,  -6.6460,
         -6.6329,  -8.5350,  -7.4102,  -8.7424, -11.7627,  -5.6772],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.5880, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.2607,  -7.8564,  -7.9547,  -7.2012,  -8.7360, -10.9363,  -5.9714,
         -6.6264,  -7.2175,  -8.1809,  -7.4869,  -8.7050, -11.2198,  -6.2354,
         -6.9902,  -7.5446,  -8.5561,  -6.7740,  -8.3831, -10.6086],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.0223, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.4485,  -6.2992,  -8.8103,  -8.8817,  -6.4886,  -6.8568,  -7.7281,
         -8.2012,  -7.3938,  -8.6508, -10.8454,  -6.0938,  -7.2157,  -8.0352,
         -9.0002,  -6.9042,  -8.8463, -10.8642,  -6.3105,  -7.6295],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.9752, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.8924,  -7.3819,  -8.6042,  -9.0731,  -6.5809,  -6.5771,  -7.1423,
         -7.7772,  -7.3921,  -8.5325, -10.3663,  -5.6533,  -7.0761,  -8.1999,
         -7.0369,  -8.5759, -10.2965,  -5.8549,  -6.6280,  -8.2580],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.6950, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.9380,  -7.3191,  -8.8652,  -9.0674,  -6.7627,  -6.2540,  -8.1715,
         -8.5972,  -7.2213,  -8.3701,  -9.5221,  -5.8476,  -7.3524,  -8.9770,
         -7.5853,  -8.3185, -10.5764,  -6.2906,  -6.9228,  -6.2410],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.7600, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4895, -7.0950, -8.6397, -8.3053, -9.3288, -6.0831, -7.2653, -8.6008,
        -8.3814, -8.8755, -8.8735, -6.8585, -7.0273, -7.1161, -7.0553, -7.8453,
        -8.6640, -6.4682, -6.5933, -6.8234], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.6695, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.9257,  -7.4137,  -6.5896,  -9.3402,  -9.5769,  -6.4486,  -7.5278,
         -8.9162,  -6.5684,  -8.8098,  -8.5354,  -6.2253,  -7.1586,  -8.4421,
         -7.7599,  -7.9858, -11.0584,  -5.4059,  -7.5833,  -8.2411],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.8756, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.0155,  -7.3274,  -8.6296, -10.4530,  -5.9227,  -7.2039,  -8.2217,
         -8.0432,  -8.3376, -10.5794,  -7.0697,  -6.7951,  -7.1706,  -7.8583,
         -7.7439,  -8.1678, -10.1081,  -6.3929,  -6.5966,  -7.4264],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.9532, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.7301, -11.4215,  -6.1380,  -6.9226,  -7.5440,  -8.4814,  -6.8421,
         -8.2271, -10.8758,  -6.4499,  -7.1731,  -8.0232,  -8.6259,  -6.5367,
         -8.5270, -10.6083,  -5.7617,  -6.6666,  -8.8294,  -8.7997],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.0592, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.2079,  -6.3956,  -5.9895,  -7.1741,  -7.5353,  -8.3655,  -7.5127,
         -8.5325, -11.6965,  -6.2454,  -7.6907,  -8.2759,  -7.1380,  -9.4190,
         -7.9396,  -6.3037,  -6.4181,  -6.1461,  -6.6466,  -7.0819],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.4357, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.8741,  -6.7887,  -7.6110,  -7.1278,  -8.1230, -10.9103,  -5.8664,
         -7.7233,  -8.2936,  -7.4181,  -9.3094,  -8.8121,  -6.4125,  -6.5486,
         -8.5826,  -7.8543,  -7.3707,  -8.4732,  -8.9143,  -6.3692],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.7691, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.2424,  -9.7590,  -6.1199,  -7.3633,  -7.8307,  -8.1227,  -8.5422,
        -11.4189,  -6.6708,  -7.4229,  -7.9056,  -8.2806,  -8.2349,  -9.5824,
         -6.0473,  -7.5198,  -6.9699,  -7.6400,  -8.1777, -10.4527],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.1152, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.6284,  -8.7349,  -8.1589,  -8.2038, -10.4373,  -6.7931,  -6.7632,
         -6.8073,  -6.9507,  -7.7553,  -8.7799,  -8.6779,  -6.7383,  -6.5170,
         -7.0030,  -6.6461,  -7.9189,  -8.2594,  -9.2779,  -6.2169],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.6634, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.2874,  -5.3282,  -7.6781,  -8.5443,  -7.0736,  -9.4157,  -8.3895,
         -6.4377,  -6.4206,  -7.2793,  -6.5933,  -7.7015,  -9.2957,  -6.4484,
         -7.3729,  -9.1986,  -7.2895,  -7.9067,  -9.4865,  -5.8123],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.6980, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.7764,  -6.3914,  -7.8769,  -8.3583,  -8.7281,  -8.4147, -11.3480,
         -6.3806,  -7.5396,  -8.2964,  -8.6875,  -7.2608,  -8.7977,  -8.5899,
         -7.0693,  -7.4273,  -8.2136,  -6.9675,  -8.0527,  -8.5700],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.9373, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.4566,  -8.4190,  -6.3290,  -6.9495,  -6.6974,  -7.2058,  -7.9089,
         -8.3193,  -7.0409,  -8.2649, -10.9326,  -5.6788,  -7.4023,  -8.2054,
         -7.2026,  -9.3722, -10.4877,  -6.3367,  -7.0321,  -8.2159],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.8729, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.4631,  -7.8451,  -8.1136, -10.9107,  -6.1427,  -6.5209,  -8.1953,
         -7.6360,  -7.2167,  -9.9944,  -7.0434,  -6.9599,  -7.0564,  -7.8370,
         -8.9742,  -7.5334,  -8.2687, -11.2536,  -6.2642,  -7.5799],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.9405, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.3369,  -7.8485,  -9.7911,  -6.1080,  -6.7962,  -9.0154,  -8.2504,
         -8.3617,  -8.2407,  -6.1893,  -6.6136,  -8.0244,  -8.0828,  -8.2328,
        -10.6922,  -6.6694,  -6.4155,  -8.1397,  -8.4105,  -7.4511],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.8335, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.1856,  -6.6164,  -7.0914,  -6.5627,  -6.3216,  -7.8771,  -8.2602,
         -7.2847,  -7.4569,  -6.2375,  -8.1055,  -7.0641,  -8.5285, -10.9270,
         -5.6467,  -7.6434,  -7.7431,  -6.5516,  -7.0571,  -8.9614],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.6061, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9434, -6.8412, -7.9564, -7.1641, -8.7950, -9.9882, -6.6864, -7.0354,
        -6.4024, -7.1197, -7.4176, -8.9211, -8.7316, -6.7624, -8.3113, -7.2528,
        -7.4736, -9.6408, -8.2951, -6.4364], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.6587, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.9324,  -6.9324,  -9.2671,  -8.1299,  -6.0372,  -6.4170,  -8.0385,
         -7.1476,  -7.4784,  -8.0457,  -9.8732,  -5.3565,  -7.4888,  -8.5608,
         -7.4877,  -8.0876, -10.3876,  -6.0984,  -7.4427,  -8.2405],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.7225, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.5029,  -6.2495,  -6.7917,  -6.3391,  -8.5947,  -7.0953,  -7.2283,
         -8.7479, -10.9269,  -6.2146,  -7.4086,  -8.6444,  -7.7683,  -8.6271,
        -10.4132,  -6.8159,  -7.3232,  -8.7301,  -8.6642,  -7.1810],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.8633, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.9927,  -7.9664,  -8.2737,  -9.7668,  -6.2103,  -6.4614,  -8.8439,
         -8.2535,  -7.6550,  -9.6522,  -6.4464,  -7.4434,  -8.2124,  -7.6786,
         -8.2089, -10.5357,  -6.1817,  -6.7185,  -7.1408,  -8.5116],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.9077, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.4906,  -8.1642,  -7.0031,  -8.1860, -10.2053,  -6.0841,  -8.8430,
         -7.5216,  -7.7028,  -9.7434,  -9.4187,  -6.7702,  -6.6240,  -7.3524,
         -6.2431,  -8.2333,  -7.3272,  -7.5551, -10.9393,  -6.8847],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.9646, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.0773,  -6.7428,  -7.4266,  -8.4963,  -6.5877,  -9.3267, -10.9980,
         -6.1640,  -6.6840,  -7.7300,  -8.1208,  -7.5832,  -8.6011,  -8.9186,
         -7.9449,  -7.5251,  -7.8479,  -7.8596,  -8.8685, -10.5414],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.1522, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.5254, -7.1824, -7.5722, -9.4051, -6.1603, -6.3105, -7.7295, -8.7233,
        -7.3455, -7.7831, -9.9792, -5.6820, -7.6217, -7.2629, -7.1179, -9.0896,
        -8.7381, -7.0812, -6.3065, -6.4215], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.6019, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.6696, -6.7764, -6.2624, -7.0317, -7.9706, -7.0460, -8.5136, -9.0827,
        -7.2368, -8.3901, -7.6159, -7.2205, -8.9944, -7.6174, -6.4926, -6.5839,
        -6.4083, -7.0822, -7.2597, -8.1200], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.5687, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.0125,  -8.1106,  -6.9413,  -8.1268, -10.1196,  -6.4108,  -6.4264,
         -8.5906,  -8.4262,  -7.2666,  -8.7097,  -8.2708,  -5.5836,  -7.8552,
         -6.7961,  -6.9478,  -9.6319,  -8.6835,  -6.5630,  -6.7405],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.7107, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.1226,  -8.3610,  -7.9491,  -8.8517, -10.7196,  -6.4130,  -6.6379,
         -6.7528,  -7.9541,  -7.0319,  -7.1585,  -8.7761,  -8.6121,  -7.0685,
         -7.4435,  -8.3042,  -7.1750,  -8.7607,  -8.2632,  -7.4721],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.8414, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.8872,  -8.0398,  -7.9954,  -7.9950,  -8.7452, -10.3279,  -6.5775,
         -6.6749,  -6.8947,  -7.5768,  -7.4739,  -8.3954,  -9.3488,  -6.8509,
         -7.1522,  -8.7660,  -6.6580,  -9.1411,  -8.6719,  -6.0305],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.7601, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.0899,  -6.1367,  -6.8230,  -8.0719,  -8.2543,  -8.5859,  -7.7439,
         -6.2731,  -5.9398,  -5.7186,  -6.9991,  -6.5901,  -8.2085,  -7.0819,
         -7.9527, -10.6762,  -5.6344,  -6.9683,  -7.9040,  -7.8726],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.2762, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.6808,  -6.0446,  -6.7798,  -7.8261,  -8.3492,  -7.1443,  -8.7125,
        -11.1885,  -5.3497,  -8.5956,  -7.9650,  -8.2784,  -8.9333,  -6.0566,
         -7.1730,  -6.6456,  -6.6679,  -7.4341,  -7.6739,  -7.4393],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.7469, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.4564,  -8.6423, -11.5661,  -5.7607,  -7.7747,  -8.0900,  -7.4937,
         -9.6294,  -8.5955,  -5.9951,  -6.8853,  -6.4484,  -5.9499,  -8.6600,
         -7.3766,  -7.2712, -10.4787,  -6.1036,  -6.5172,  -6.3951],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.6545, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.9888,  -8.6849, -11.0894,  -5.8364,  -7.3424,  -9.0147,  -8.6737,
         -8.8807, -10.4638,  -6.7552,  -6.2184,  -8.4398,  -7.4910,  -8.3853,
         -9.4602,  -6.7062,  -6.9248,  -7.8987,  -8.4972,  -8.0944],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.0923, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.7256,  -9.9785,  -5.8937,  -7.4879,  -7.9575,  -7.4607,  -8.0750,
         -9.9887,  -5.7462,  -6.6900,  -7.9110,  -7.9598,  -7.0454,  -8.2562,
        -10.2958,  -5.6460,  -8.0514,  -7.4189,  -7.6157,  -9.2143],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.8209, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.7720,  -6.8499,  -9.0419,  -8.9126,  -7.2956,  -6.8054,  -7.2731,
         -5.9854,  -8.4162,  -7.4131,  -8.2170, -11.2798,  -6.4098,  -6.7347,
         -7.1916,  -6.7572,  -6.4039,  -8.4749,  -8.3937,  -6.0254],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.5827, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.6905,  -9.1792, -11.1354,  -6.7609,  -6.3985,  -6.6300,  -7.6302,
         -8.6025,  -8.9801,  -7.1146,  -7.8298,  -7.3045,  -7.1552,  -9.6283,
         -8.3354,  -6.3457,  -6.5236,  -6.6830,  -7.2118,  -7.6506],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.7395, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.6075,  -8.2515,  -7.2176,  -9.2684,  -8.6954,  -6.9216,  -7.7116,
         -7.4139,  -4.7432,  -7.7682,  -7.5323,  -6.7284, -10.5648,  -6.9506,
         -6.9250,  -7.8566,  -8.2566,  -8.2835,  -9.0359,  -6.3757],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.7054, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.6210,  -8.1764, -10.0736,  -6.4092,  -6.6020,  -7.8792,  -8.1046,
         -7.1832,  -8.3959,  -8.8585,  -6.3261,  -7.5228,  -9.0537,  -7.5886,
         -7.4023, -10.7586,  -6.7323,  -6.3559,  -8.0672,  -5.5561],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.7334, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.1960,  -7.8547, -10.0682,  -5.9171,  -6.3689,  -8.6516,  -7.8605,
         -8.0480, -10.5605,  -6.5864,  -7.2684,  -7.4652,  -7.5103,  -7.4392,
         -8.5066, -10.6311,  -5.6627,  -6.3104,  -7.7491,  -8.0218],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.7838, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.4591,  -6.7606,  -7.5506,  -7.6451,  -7.3943,  -7.9554, -10.6509,
         -6.0775,  -6.6648,  -8.8987,  -8.6932,  -7.0804,  -8.4631, -10.4993,
         -6.2553,  -6.8459,  -6.4272,  -6.3376,  -9.0475,  -7.4869],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.6597, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.0372,  -6.7795,  -7.5228,  -9.5318,  -5.8826,  -7.7692,  -7.5289,
         -7.3576,  -9.1801, -10.5578,  -6.4876,  -6.7508,  -6.0285,  -6.9194,
         -6.8100,  -8.6268,  -8.6483,  -8.8182,  -8.6477,  -5.8455],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.6365, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3069, -8.0756, -7.4306, -8.6503, -9.3980, -7.0915, -8.1436, -7.0688,
        -7.7458, -7.9892, -9.9267, -6.0948, -6.0376, -6.8916, -5.8801, -7.1855,
        -7.1229, -8.5388, -7.2999, -8.6218], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.5750, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.8822,  -6.0771,  -6.2221,  -6.8179,  -8.2879,  -6.6631,  -8.6395,
        -11.2058,  -5.9433,  -7.1049,  -6.8031,  -7.7568,  -8.3624,  -9.5352,
         -6.4963,  -6.4342,  -7.9465,  -8.5146,  -8.8503,  -9.3099],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.6927, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.0928,  -7.5306,  -8.6643,  -7.3102,  -8.0343,  -9.1643,  -5.5985,
         -7.8006,  -7.6974,  -7.5658,  -8.3939, -10.4514,  -6.8535,  -8.2081,
         -6.7291,  -6.5189,  -7.4271,  -7.4998,  -7.9418,  -7.4063],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.6944, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.2745,  -7.0044,  -6.5862,  -7.5676,  -8.9848,  -8.6159,  -7.5133,
         -8.6109, -11.2481,  -5.9388,  -7.2497,  -8.7002,  -8.1469,  -7.0599,
         -8.9930, -11.2660,  -6.0732,  -6.7417,  -8.0900,  -8.5670],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.1616, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.9849,  -8.2660,  -7.1879,  -8.0303, -10.1183,  -5.5016,  -7.7432,
         -7.6088,  -6.9731,  -9.0584,  -8.4962,  -5.8657,  -6.4382,  -7.5082,
         -8.1202,  -7.1954,  -8.3396,  -9.2702,  -7.9679,  -7.2676],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.7471, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.3568,  -6.7956,  -6.9328,  -7.7025,  -8.2550,  -7.4606,  -8.1131,
        -10.8979,  -6.3989,  -7.2783,  -7.8162,  -8.7599,  -7.1529,  -7.5319,
         -9.6830,  -6.0316,  -6.9261,  -8.4485,  -8.5396,  -7.2321],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.7157, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.7910,  -8.0964,  -8.3215,  -9.2794,  -6.6204,  -6.6287,  -8.1505,
         -8.3857,  -8.3698,  -8.9855,  -6.2206,  -6.4916,  -8.7863,  -8.1761,
         -8.4703, -11.1919,  -6.1074,  -6.8375,  -7.1796,  -8.5424],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.9316, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.8678, -10.5376,  -6.1700,  -6.9733,  -8.3111,  -7.2737,  -7.2043,
         -8.5143,  -9.5878,  -6.1132,  -7.0259,  -7.7206,  -7.2770,  -8.9605,
        -10.7718,  -5.9739,  -6.6145,  -7.2342,  -8.0809,  -7.6727],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.7943, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.6771,  -6.3024,  -7.1621,  -8.2541,  -8.2705,  -8.6118, -11.1503,
         -7.0754,  -7.5480,  -8.5956,  -6.4088,  -8.9248, -10.6768,  -6.0537,
         -7.4376,  -7.9225,  -7.8693,  -7.5590,  -8.7233,  -8.6423],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.1433, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.8364,  -8.6462,  -8.4021,  -6.3795,  -7.7782,  -8.4809,  -7.0660,
         -8.0186, -10.4768,  -6.9756,  -6.4624,  -6.3570,  -7.5133,  -8.0131,
         -7.1976,  -6.6758,  -8.5061, -10.6204,  -5.6342,  -7.7972],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.7419, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.9686,  -7.1131,  -8.8860,  -7.3083,  -8.2111, -10.4297,  -6.0400,
         -6.7167,  -7.8175,  -8.1161,  -7.6507,  -9.9154,  -5.9003,  -7.2414,
         -7.8515,  -8.6874,  -6.9936,  -9.2250,  -8.6520,  -6.4155],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.8070, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.4649,  -7.2191,  -8.5341,  -8.6143,  -7.2005,  -8.6659, -10.8354,
         -6.1934,  -6.7883,  -7.5623,  -7.1843,  -7.7702,  -9.8926,  -6.5869,
         -8.0596,  -8.0880,  -7.1787,  -9.5132,  -8.7653,  -6.3629],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.8740, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.8378,  -7.2154,  -6.2656,  -6.8745,  -8.5617,  -7.7563,  -8.1728,
         -9.9184,  -5.9561,  -6.8976,  -7.6376,  -8.2678,  -6.9662,  -8.3094,
        -11.1380,  -7.2370,  -7.3942,  -8.6716,  -7.4505,  -7.8105],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.7670, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.9504,  -6.8512,  -9.4377,  -9.0183,  -6.0019,  -6.6747,  -8.4783,
         -8.6879,  -8.4035, -10.0269,  -6.6193,  -8.1241,  -8.5363,  -7.2841,
         -7.8739, -10.2224,  -6.0686,  -7.1268,  -8.0259,  -6.9257],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.9669, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4918, -6.8643, -6.7695, -7.7722, -7.3980, -7.9738, -9.6542, -6.2648,
        -7.5389, -8.0439, -7.5452, -9.1482, -8.4769, -6.5217, -6.4183, -7.9507,
        -6.5073, -7.4712, -8.7003, -8.7498], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.6131, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.2489,  -9.5705,  -6.2556,  -7.1626,  -7.8054,  -7.8203,  -7.3861,
         -8.4424, -10.5896,  -6.3400,  -7.9327,  -7.0247,  -7.4137,  -8.3300,
        -10.2456,  -6.1561,  -6.0859,  -7.5826,  -7.7631,  -7.1997],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.7678, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.1563,  -7.4412,  -7.6144,  -7.5074,  -8.4027,  -9.9756,  -6.2637,
         -7.2033,  -8.2057,  -8.8188,  -7.3112,  -8.3795, -10.2398,  -5.5575,
         -7.2343,  -8.9956,  -7.1594,  -8.5551, -10.6360,  -6.0395],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.9349, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.9939,  -7.5626,  -8.4413,  -7.7029,  -9.0571,  -8.4534,  -5.9059,
         -6.5829,  -8.0019,  -7.5493,  -7.2070,  -8.0132,  -9.3836,  -5.7702,
         -7.5509,  -8.1949,  -8.9162,  -7.1812,  -8.8497, -10.6604],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.8489, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.0817,  -6.8261,  -8.6263,  -8.7299,  -6.6031,  -6.8138,  -7.1843,
         -7.9804,  -7.4188,  -8.7522,  -9.5441,  -6.2070,  -6.8432,  -7.3971,
         -6.3773,  -8.1851, -10.6522,  -6.7714,  -7.1163,  -6.5315],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.6321, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.2429,  -7.0007,  -6.5469,  -7.4181,  -8.5884,  -6.9863,  -8.0815,
        -10.7922,  -6.1095,  -7.4462,  -7.8461,  -7.4725,  -7.4000,  -8.0993,
         -7.6714,  -7.9897,  -7.9397,  -6.3880,  -6.7936,  -7.0687],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.5441, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.7275,  -8.2567, -10.5360,  -5.7866,  -7.7445,  -7.3649,  -6.9225,
         -9.2121,  -8.8296,  -6.6361,  -7.1786,  -6.8601,  -5.6812,  -7.7530,
         -7.2514,  -8.2692, -11.1559,  -5.6896,  -7.5431,  -8.4049],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.7402, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.2021,  -8.7874,  -7.1634,  -7.8231,  -9.8758,  -6.2947,  -7.2696,
         -7.7556,  -7.5515,  -8.3667, -10.8946,  -5.5210,  -7.6014,  -7.6238,
         -6.4562,  -9.2566,  -8.7757,  -6.3683,  -6.9271,  -6.6915],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.7103, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.0838,  -7.6989,  -9.1022,  -6.0644,  -7.7282,  -8.2726,  -7.2097,
         -8.4363, -11.0740,  -6.4716,  -6.6963,  -7.9940,  -8.1912,  -7.1240,
         -8.5009,  -8.8751,  -6.0637,  -7.3379,  -8.1535,  -7.4941],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.7786, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.4252,  -4.8648,  -7.8504,  -7.2754,  -8.3538, -11.3634,  -5.1642,
         -8.4475,  -7.8133,  -6.9623,  -8.8745,  -7.7431,  -6.1370,  -7.0164,
         -7.2095,  -6.9269,  -7.4284,  -8.4734,  -7.6288,  -8.3456],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.5652, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.5428,  -8.3663, -10.1474,  -5.2694,  -6.7999,  -8.2788,  -7.8742,
         -6.9827,  -8.6322, -11.1587,  -6.3939,  -6.9120,  -8.6958,  -8.9333,
         -7.3223,  -8.0607, -10.3011,  -5.6042,  -7.9017,  -8.3298],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.9254, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.1389,  -9.0040,  -5.8057,  -6.8871,  -7.7224,  -8.3292,  -8.1662,
         -9.9688,  -7.0002,  -6.9911,  -8.0017,  -7.0956,  -7.4227,  -9.8078,
         -6.7151,  -7.2932,  -7.2183,  -8.4378,  -8.7390, -10.8207],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.0283, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.1296,  -8.6139,  -7.7873,  -8.4646,  -9.9212,  -6.5516,  -8.0184,
         -8.2453,  -6.3122,  -8.7642, -11.0860,  -6.0298,  -6.6874,  -8.4125,
         -8.5816,  -7.3727,  -7.9737, -10.2501,  -6.0049,  -8.3052],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.0756, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.6726,  -8.5096,  -7.0855,  -8.7401, -10.7176,  -5.7398,  -6.7981,
         -8.2504,  -8.3224,  -8.2039, -10.1921,  -7.0265,  -6.8613,  -8.2937,
         -7.2202,  -7.4092,  -8.0210,  -9.8721,  -5.5159,  -7.1938],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.9323, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.0968, -8.0739, -5.7702, -6.7669, -7.4693, -8.0074, -7.5109, -8.1868,
        -9.1383, -5.5118, -7.4115, -7.4371, -7.1804, -9.4922, -8.3151, -6.3699,
        -7.2720, -7.0379, -7.0237, -8.3277], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.5700, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.8835,  -6.2299,  -6.9615,  -8.2984,  -7.1376,  -8.5536,  -8.9102,
         -6.0272,  -6.4107,  -7.3639,  -7.8513,  -7.9385,  -9.4856,  -6.1545,
         -6.4676,  -7.4295,  -7.5869,  -8.1745, -10.0836,  -5.6756],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.6812, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.9355, -7.0600, -6.5390, -9.1220, -8.7107, -5.9648, -7.1900, -8.1322,
        -8.7961, -7.0019, -8.3691, -9.7408, -6.3248, -7.9755, -8.0541, -6.6066,
        -9.2045, -8.3638, -6.2671, -6.8121], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7585, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8992, -6.4307, -7.6096, -7.0008, -8.2797, -8.0768, -8.2452, -9.5583,
        -6.6837, -6.0744, -6.7186, -7.7577, -8.2424, -8.6449, -6.7677, -8.2707,
        -7.9656, -7.8044, -8.9783, -9.0712], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7540, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.4633,  -6.7423,  -8.3939, -11.6044,  -6.0688,  -7.5492,  -8.0880,
         -6.5823,  -8.1868,  -9.7791,  -6.6319,  -6.7261,  -8.0733,  -7.2292,
         -6.2120,  -8.0686,  -9.9211,  -5.8920,  -6.7942,  -7.8296],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.7418, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.3314,  -5.3209,  -7.4123,  -7.9268,  -7.5256,  -7.7241, -10.1093,
         -6.3200,  -7.4622,  -8.5062,  -7.8532,  -7.8775, -10.0821,  -6.3207,
         -6.6067,  -7.8252,  -7.5707,  -7.1783,  -7.6361, -10.1761],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.8383, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.7690, -11.2624,  -5.6893,  -7.0321,  -8.8338,  -6.9981,  -7.7744,
        -10.1993,  -6.1602,  -6.6368,  -9.0042,  -8.6655,  -7.5071,  -8.3661,
        -10.5138,  -6.2337,  -6.4669,  -8.0238,  -8.3556,  -8.2441],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.0368, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.9762,  -8.0392,  -7.0435,  -7.7989,  -9.6506,  -6.2323,  -7.2213,
         -8.2118,  -8.2876,  -8.1902, -10.3508,  -6.1221,  -6.5340,  -7.5071,
         -8.2631,  -7.5291,  -7.8398, -10.0997,  -6.1923,  -7.0232],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.8056, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.4344,  -8.5082, -10.4348,  -6.7139,  -6.7372,  -7.8920,  -8.1698,
         -8.4703,  -9.6365,  -6.5951,  -8.2360,  -6.9190,  -7.3872,  -8.3158,
        -10.8306,  -6.5738,  -6.6026,  -6.7412,  -5.8034,  -8.9532],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.7977, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5946, -7.2671, -7.3089, -8.0788, -7.9615, -9.6584, -5.2999, -7.7525,
        -8.5088, -6.7557, -9.3356, -8.3359, -6.9569, -7.1679, -7.5351, -6.7548,
        -7.6571, -8.4447, -8.7235, -7.0927], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.6595, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.3734,  -6.1191,  -7.6172,  -8.0240,  -8.0678,  -8.4823, -10.1056,
         -6.5996,  -6.5369,  -7.0189,  -7.3704,  -8.4908,  -9.2093,  -6.2590,
         -6.9817,  -8.5204,  -7.4387,  -7.7354, -10.0621,  -5.5673],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.7790, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.4157, -9.9114, -5.9365, -6.5999, -7.4571, -7.4530, -7.6831, -9.5327,
        -6.5039, -6.3063, -7.5160, -8.0557, -6.6537, -8.1312, -6.9024, -8.3926,
        -9.6695, -6.1207, -6.7770, -6.8692], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.5444, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.1031,  -7.7320,  -7.9592,  -8.0916,  -6.4713,  -7.1551,  -6.4180,
         -7.8441,  -8.6605, -10.0298,  -7.2008,  -7.4147,  -7.5259,  -7.5013,
         -7.8658,  -9.5257,  -6.6700,  -6.7446,  -8.2501,  -6.4345],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.6299, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5822, -7.2975, -7.6188, -8.0255, -8.2561, -6.8887, -7.2950, -7.0509,
        -7.3490, -8.1631, -9.1632, -6.1862, -6.6110, -6.6211, -7.4360, -7.8750,
        -9.2086, -6.3946, -6.6665, -7.4349], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4062, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4593, -7.2305, -6.7786, -7.2720, -8.9672, -7.6042, -7.3244, -6.0782,
        -6.6162, -7.3824, -8.0308, -7.8338, -6.5487, -7.2380, -9.6284, -5.5540,
        -7.2503, -6.7411, -7.6397, -8.2502], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2714, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.3677, -6.3318, -6.3187, -6.7630, -6.7905, -7.5862, -8.1766, -7.1010,
        -7.0069, -7.3497, -7.2004, -8.8270, -9.8022, -6.5710, -6.6739, -6.5883,
        -7.2309, -7.6344, -8.6098, -8.3737], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4652, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.6443, -9.0769, -5.7090, -7.1527, -7.2683, -7.9135, -6.6182, -8.5083,
        -9.6905, -6.5879, -6.3999, -7.4263, -6.6695, -8.3262, -9.7047, -5.7570,
        -6.6387, -7.9282, -7.6631, -6.9400], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4812, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7611, -6.8062, -6.8227, -7.8307, -9.2067, -5.7396, -6.9955, -8.0899,
        -7.6783, -8.0998, -9.3169, -7.0643, -6.6350, -7.0225, -6.9633, -7.3217,
        -7.0386, -7.4737, -9.0462, -6.2844], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4098, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9623, -6.7191, -7.3140, -7.7733, -7.1598, -7.8313, -9.3864, -5.3858,
        -6.6900, -6.9076, -6.8031, -8.5368, -9.3740, -6.4328, -6.9168, -7.1136,
        -7.9895, -7.9267, -9.1014, -5.8131], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3569, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2067, -8.0181, -8.9288, -6.2400, -6.4043, -6.7461, -6.8599, -6.5358,
        -8.2220, -6.3652, -6.9360, -7.8497, -7.0255, -7.1425, -7.9049, -9.4083,
        -5.9537, -7.0789, -7.4990, -6.4216], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2374, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3048, -7.9783, -8.4585, -5.5665, -6.9527, -7.0233, -7.8661, -9.0613,
        -9.9365, -6.1695, -6.7779, -7.5208, -7.3168, -7.4851, -9.3071, -5.1897,
        -7.5590, -6.9428, -6.0851, -7.8750], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3689, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.4101,  -7.3177,  -8.5957,  -9.5122,  -6.3090,  -6.9471,  -6.2386,
         -7.8358,  -7.4772,  -8.3614,  -7.0952,  -8.5382, -10.0169,  -5.8329,
         -7.4641,  -7.7061,  -7.2302,  -9.0655,  -7.9580,  -6.6102],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.6761, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4560, -6.3558, -6.6931, -7.7874, -6.9208, -7.9465, -9.1611, -5.8407,
        -6.8951, -7.4089, -6.3506, -8.5233, -9.6488, -6.2812, -7.2310, -7.5491,
        -7.8142, -7.8922, -9.7859, -6.3044], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4423, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.3770,  -7.0577,  -7.3522,  -6.7253,  -8.4304,  -9.5913,  -6.9606,
         -7.0107,  -7.8459,  -7.8640,  -6.8363,  -8.3590, -10.2304,  -6.2171,
         -7.1670,  -7.2767,  -7.7029,  -7.8254,  -8.6586,  -5.8421],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.5665, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4328, -7.2209, -7.4705, -8.4405, -8.7034, -6.3215, -7.3135, -7.4749,
        -8.1846, -7.5701, -9.0294, -6.3744, -6.2471, -6.8582, -7.8571, -7.1661,
        -8.0427, -9.6687, -6.3434, -6.8545], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4787, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2869, -7.8695, -7.9503, -6.9368, -7.7700, -9.4802, -6.1099, -6.4597,
        -7.6428, -7.0009, -8.0880, -9.6507, -6.0757, -6.7299, -7.2534, -7.6716,
        -7.4465, -7.8541, -9.2434, -5.7393], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4630, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4422, -7.0818, -7.8585, -7.3329, -8.1635, -9.2905, -6.5856, -7.2530,
        -7.7236, -7.0435, -9.2160, -8.3674, -7.0962, -6.4281, -6.7930, -7.1923,
        -7.6106, -9.0333, -5.6683, -6.9326], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4556, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.3192,  -6.2988,  -5.8985,  -7.1476,  -6.9781,  -8.6086,  -7.3729,
         -8.9675, -10.6832,  -5.8910,  -7.6151,  -7.2341,  -7.1000,  -8.4041,
         -8.3137,  -6.4798,  -7.0418,  -6.7624,  -7.5380,  -8.2632],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.4459, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1673, -6.8142, -6.2258, -5.7240, -7.4479, -6.9752, -8.0602, -6.5973,
        -6.2918, -8.4444, -7.9435, -6.3112, -7.1200, -7.2183, -6.7374, -8.0925,
        -8.6148, -5.9428, -6.7251, -7.7115], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0583, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.8412, -7.8329, -5.8994, -6.2271, -6.5060, -7.2589, -6.9645, -7.7671,
        -8.7645, -6.2114, -6.6698, -7.1738, -6.6435, -8.4543, -9.3703, -6.7208,
        -6.4184, -7.3264, -7.2773, -8.1816], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3255, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.6731, -10.0941,  -5.6552,  -6.6397,  -6.8030,  -7.3440,  -7.4247,
         -8.4470,  -9.4107,  -6.4119,  -6.9298,  -6.8719,  -7.8736,  -7.2991,
         -8.2490,  -9.5757,  -5.4281,  -7.1569,  -6.5095,  -8.0345],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.5416, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.3120,  -6.8701,  -5.5294,  -7.3395,  -7.1063,  -8.1341,  -8.1738,
         -7.8959,  -8.5289, -10.7895,  -5.9147,  -7.0712,  -6.8400,  -7.0636,
         -8.4251,  -7.9223,  -6.5110,  -6.2523,  -6.4577,  -6.4924],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.3315, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5002, -8.1221, -9.1482, -5.7906, -7.1975, -6.9083, -6.7203, -8.8030,
        -9.7617, -6.2980, -6.2053, -7.1318, -7.2061, -8.4744, -8.5248, -6.6346,
        -6.7662, -6.7827, -7.2383, -8.1493], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4682, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.8136, -6.5187, -6.1736, -6.1363, -7.1599, -7.1634, -7.3711, -6.6799,
        -7.1223, -8.6398, -5.8091, -6.8254, -7.6011, -7.4993, -8.3602, -6.9458,
        -6.7235, -6.5954, -5.6458, -7.1326], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0458, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7686, -6.6220, -6.0108, -5.9753, -7.1236, -7.2379, -8.3465, -8.3527,
        -8.4617, -7.9330, -6.0520, -6.9747, -7.8602, -7.6671, -6.3499, -8.7742,
        -7.9480, -6.3312, -6.5523, -7.3732], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2357, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7422, -6.7668, -8.9591, -7.9417, -6.6602, -6.6207, -6.1290, -7.0634,
        -7.8800, -7.9556, -7.4617, -8.1409, -8.1520, -5.8932, -7.4345, -7.8366,
        -6.4416, -8.5527, -8.1490, -6.4568], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3619, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4250, -7.1780, -7.4512, -6.8804, -7.1610, -8.1627, -9.8898, -5.6922,
        -6.7825, -6.6258, -7.5303, -8.1472, -9.3574, -6.1395, -7.3316, -6.9846,
        -7.4393, -7.7885, -8.9538, -5.3165], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3619, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.5165, -10.0925,  -5.7916,  -6.8405,  -6.7942,  -6.8516,  -7.3039,
         -7.8398,  -9.3289,  -5.8233,  -7.2474,  -7.1679,  -7.2442,  -9.0003,
         -7.4552,  -6.2427,  -6.4926,  -7.2957,  -7.5334,  -7.1303],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.3996, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.2348, -8.8613, -6.2872, -6.6313, -7.1135, -7.8872, -7.0332, -8.2353,
        -9.5472, -5.7821, -7.2297, -6.6604, -7.5658, -9.2546, -8.2412, -7.2392,
        -6.9574, -8.3468, -6.5745, -6.9714], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.5327, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9545, -7.5211, -7.3727, -7.0096, -8.5852, -9.7982, -7.0830, -6.3513,
        -7.6060, -8.3526, -8.3149, -9.1779, -6.8641, -7.2353, -6.8836, -7.6608,
        -6.8293, -8.2345, -9.4125, -5.8258], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.6536, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.3537,  -8.6515,  -6.3745,  -6.9660,  -7.4555,  -6.7796,  -9.0681,
        -10.2069,  -6.6158,  -6.7294,  -7.8293,  -7.6347,  -7.4040,  -8.1968,
         -8.9878,  -5.2117,  -7.1923,  -7.2772,  -6.4768,  -9.0526],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.6232, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.3354, -6.5227, -6.4150, -6.8993, -7.5103, -7.5248, -8.4162, -8.5716,
        -6.5994, -6.4780, -7.5569, -6.6438, -8.2078, -9.8685, -6.3907, -6.8259,
        -7.2537, -6.9489, -7.8173, -9.8458], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.5816, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0194, -8.0281, -8.4341, -6.3466, -6.7752, -7.1907, -7.0827, -8.6570,
        -9.4057, -6.2063, -6.5214, -7.2901, -6.9874, -7.1088, -8.2347, -9.3048,
        -6.2407, -7.3696, -7.4072, -7.5965], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4604, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.9076,  -8.1127,  -8.9667,  -5.8644,  -7.6383,  -7.2826,  -6.9682,
         -9.2452,  -7.9535,  -7.0051,  -6.6873,  -7.1976,  -6.4886,  -7.5275,
         -7.4553,  -7.4933,  -7.1413,  -8.4729, -10.4575,  -6.6666],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.5766, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8980, -8.0258, -7.8123, -6.5215, -7.6575, -9.9220, -6.6975, -6.2065,
        -7.0247, -6.0404, -6.6060, -8.1917, -7.6348, -8.3216, -9.4392, -7.8985,
        -6.4482, -7.3338, -7.3370, -6.9424], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4480, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1641, -8.2938, -7.5005, -8.0747, -8.8965, -6.6030, -6.4382, -6.7022,
        -5.6149, -5.4308, -6.2957, -7.7784, -6.7037, -7.6908, -8.2831, -8.5157,
        -8.3092, -5.4577, -6.5771, -8.0346], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2182, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.6132, -8.4621, -7.6137, -5.7164, -5.2866, -6.1987, -6.1716, -7.0915,
        -6.5309, -8.4239, -6.5022, -8.6000, -7.4024, -5.8003, -6.3435, -7.4238,
        -7.1940, -7.8836, -8.8393, -6.5335], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1316, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.5433, -8.7517, -6.4188, -6.3651, -6.5576, -6.3713, -7.1793, -8.6945,
        -8.4848, -6.2907, -6.6238, -7.9293, -7.6031, -7.5687, -8.9299, -6.5678,
        -6.7793, -7.6602, -7.9560, -7.0772], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4176, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7256, -6.8018, -7.3996, -6.8386, -8.2256, -7.3687, -6.8614, -6.0296,
        -6.4030, -6.8204, -7.8226, -7.8341, -6.9314, -8.0768, -9.9565, -6.4471,
        -7.2811, -7.4223, -7.7572, -8.3651], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3184, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8683, -9.2015, -8.3635, -6.1472, -6.8852, -6.9032, -7.1926, -7.6588,
        -7.6983, -8.9478, -6.0146, -7.3201, -7.5659, -7.5606, -8.9852, -8.0337,
        -6.4041, -6.0960, -6.8221, -7.0326], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3851, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1422, -8.0187, -9.7508, -5.6385, -6.8462, -6.8252, -6.5806, -7.6624,
        -8.9429, -6.2188, -7.3599, -7.7239, -6.5772, -8.8066, -8.3034, -6.6878,
        -6.9600, -5.9094, -6.9442, -7.3830], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3141, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.9461, -7.6831, -9.9591, -7.3652, -6.5305, -6.2770, -7.2529, -7.0244,
        -8.4894, -9.0766, -6.5229, -7.4758, -7.2294, -8.4106, -7.5936, -6.7265,
        -6.1412, -7.0646, -6.2504, -7.4386], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4229, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.9006, -5.9289, -8.0373, -7.0401, -8.4117, -8.1216, -6.1389, -6.9645,
        -7.4721, -7.5375, -8.1345, -9.2199, -6.1570, -6.4041, -7.4621, -6.9751,
        -6.9722, -8.1418, -9.7691, -5.9215], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4355, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8569, -7.6558, -7.7224, -7.0951, -8.0476, -9.8027, -6.5149, -6.4199,
        -7.4235, -7.4828, -8.1820, -8.8578, -6.7197, -6.5503, -7.1415, -7.5119,
        -7.2331, -7.8963, -9.4527, -5.9534], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.5260, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5242, -7.1726, -7.6509, -6.6849, -8.0633, -8.8763, -6.6892, -6.1194,
        -7.9351, -6.5077, -8.0623, -8.5859, -6.5099, -6.9347, -7.2863, -7.2312,
        -8.6605, -9.7820, -6.1751, -6.4917], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3972, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.9145, -7.1279, -8.1260, -8.9128, -7.2090, -7.4033, -7.4521, -7.4319,
        -8.7476, -8.0156, -6.1405, -6.7136, -7.1913, -7.8517, -7.0099, -7.7420,
        -9.5534, -6.2570, -6.4940, -7.6995], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.5497, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1822, -8.3606, -9.6724, -6.6260, -7.0016, -7.6443, -8.2149, -6.9759,
        -8.0757, -9.9845, -6.7452, -7.1301, -7.8087, -6.9323, -9.1551, -8.3175,
        -6.5003, -6.4079, -6.5713, -7.6456], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.6476, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.6421,  -6.8538,  -8.0912, -10.1056,  -5.7857,  -7.4467,  -7.1674,
         -7.4144,  -8.7598,  -8.8255,  -6.4986,  -5.8290,  -7.5118,  -6.7929,
         -7.6636,  -6.7996,  -7.9398,  -9.7266,  -5.8647,  -6.9663],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.4843, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.0219,  -7.1859,  -8.7800, -10.2133,  -6.2080,  -6.7426,  -7.4502,
         -7.6499,  -6.5892,  -8.5302,  -9.6772,  -6.4772,  -6.7968,  -7.7403,
         -7.6393,  -8.3296,  -9.4614,  -6.5373,  -6.4256,  -6.7605],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.6608, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8831, -6.6743, -7.1808, -7.8880, -7.7144, -6.8593, -7.4173, -9.6125,
        -6.0566, -6.3182, -7.4984, -7.9582, -7.2196, -7.8609, -9.4727, -6.1777,
        -6.4405, -7.7119, -8.0420, -7.1331], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3560, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.7388, -6.0263, -6.8773, -7.3081, -6.8857, -7.3711, -8.0376, -8.6765,
        -6.2874, -6.7364, -7.7708, -7.4442, -8.2863, -9.2197, -6.6331, -6.6958,
        -7.0122, -7.1722, -7.0505, -7.5400], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4385, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.2320,  -9.5923,  -6.7333,  -7.3436,  -7.4628,  -6.4210,  -8.4975,
        -10.1103,  -6.3889,  -6.6597,  -7.6726,  -8.1450,  -8.4738,  -9.3546,
         -6.0313,  -7.7355,  -7.0537,  -6.9199,  -9.0134, -10.1185],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.8980, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.9413, -9.4561, -6.1841, -7.0543, -7.2849, -7.6073, -7.0348, -7.9669,
        -9.5266, -6.8538, -6.4238, -6.9234, -7.6480, -7.3232, -8.0946, -8.8327,
        -6.6057, -6.6778, -7.5637, -7.5493], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.5276, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9651, -7.3745, -7.2058, -8.2988, -9.1058, -6.6908, -6.6864, -6.6789,
        -6.2332, -7.1046, -7.8183, -8.7001, -5.6098, -7.2583, -6.6726, -8.3972,
        -8.7204, -6.9299, -5.9687, -7.9277], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3174, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.8053, -9.3382, -5.8638, -6.5552, -7.1689, -7.3293, -8.2526, -9.8651,
        -5.6474, -6.6077, -7.0869, -7.7178, -7.3924, -6.9326, -9.1209, -5.8582,
        -6.8686, -7.2767, -7.4610, -6.6272], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3388, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7280, -8.8144, -9.2266, -6.6761, -6.6117, -7.1494, -7.0575, -7.4509,
        -9.2909, -5.9682, -6.5025, -6.9568, -7.8119, -7.1283, -8.0548, -8.7191,
        -6.8917, -6.9128, -6.8444, -8.3380], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4567, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.0341, -5.3509, -6.8254, -7.7982, -6.0777, -8.3046, -9.7205, -6.6637,
        -6.2331, -6.6955, -6.4999, -7.8122, -8.1127, -7.4477, -8.7810, -9.8153,
        -5.9993, -6.2802, -7.3530, -8.3989], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4602, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9724, -8.3332, -7.4794, -8.4768, -8.8816, -6.5512, -6.9005, -7.0053,
        -7.2584, -8.0458, -9.7505, -6.6655, -6.9640, -7.9316, -6.7376, -7.0151,
        -8.2723, -9.9812, -6.1166, -7.0483], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.6194, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.6854,  -6.4881,  -7.2807,  -6.5016,  -7.5724, -10.2172,  -5.5989,
         -6.6521,  -7.5514,  -7.3349,  -7.5720,  -8.4947,  -8.9205,  -6.6666,
         -7.3495,  -6.5142,  -7.1902,  -7.9195,  -8.4976,  -6.5631],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.3285, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.3985,  -9.6547,  -6.7185,  -6.4584,  -6.8477,  -6.5180,  -7.9043,
         -7.0019,  -8.4145,  -6.9696,  -8.4679, -10.3319,  -6.3388,  -7.2859,
         -7.3169,  -7.6205,  -9.2016,  -7.7949,  -6.6278,  -7.0989],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.6486, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.5716,  -7.1665,  -7.6164,  -7.1285,  -8.2899,  -9.4665,  -6.6286,
         -6.3948,  -6.5863,  -6.8954,  -8.5072,  -6.9914,  -8.4168, -10.5857,
         -6.3108,  -7.4327,  -7.3819,  -7.6507,  -6.6243,  -8.5937],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.5120, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4597, -7.7706, -7.4472, -8.4504, -7.3290, -8.5058, -8.6843, -5.6203,
        -7.7807, -7.3848, -6.8994, -8.9120, -7.9993, -6.7401, -6.7699, -7.1242,
        -6.9455, -7.9963, -7.0720, -8.1405], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.5016, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3233, -7.2533, -7.2901, -8.6215, -9.8960, -6.3166, -6.9911, -7.2805,
        -6.7118, -7.3239, -8.2601, -7.0842, -7.1063, -6.6568, -7.3199, -8.7059,
        -9.6992, -6.4248, -6.8336, -6.8776], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4988, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8167, -6.8534, -7.4413, -7.0070, -8.8770, -8.7405, -6.4470, -7.2411,
        -7.4561, -8.5748, -7.0404, -7.8196, -9.3672, -6.1976, -7.3657, -7.1248,
        -7.7507, -8.3667, -9.6726, -5.9333], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.5547, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4590, -6.7808, -7.2559, -7.7386, -6.5005, -8.0758, -9.2157, -5.8248,
        -6.8689, -7.5430, -7.5680, -8.0623, -9.8311, -5.5771, -6.9204, -7.4335,
        -6.5150, -8.7551, -9.7279, -6.2330], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4443, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9905, -7.1062, -7.9659, -9.4704, -6.7965, -6.5535, -6.9164, -7.7928,
        -6.8794, -7.8354, -9.1881, -6.0059, -7.5479, -7.1164, -6.3601, -9.1346,
        -8.1849, -6.8364, -6.6453, -7.2156], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4271, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.5434, -6.3943, -6.8210, -7.3450, -7.1904, -7.8419, -9.4066, -6.3669,
        -6.9908, -7.4539, -7.8602, -7.6847, -8.7361, -5.6239, -7.1598, -7.7693,
        -6.3772, -6.1158, -9.1617, -7.3116], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4577, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8326, -6.2917, -6.4823, -6.1262, -7.3168, -8.5208, -7.3933, -8.7047,
        -9.9667, -6.1228, -7.0050, -7.2971, -7.2571, -7.5014, -8.0927, -9.0617,
        -6.7557, -6.8910, -7.4732, -7.4920], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4292, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.4756, -6.4135, -6.3647, -6.4183, -7.5230, -8.6981, -9.1506, -7.3782,
        -6.6874, -7.2890, -7.3670, -6.8616, -8.3643, -8.2947, -6.3536, -6.8021,
        -6.9931, -7.3961, -9.1082, -9.5294], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.6234, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1346, -7.3119, -7.4960, -7.8330, -9.0918, -6.0614, -7.0467, -7.6541,
        -7.0191, -8.3359, -9.3199, -5.6474, -6.5085, -7.5291, -7.2847, -8.1819,
        -9.1534, -6.1263, -6.7811, -6.7127], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4115, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7757, -7.2172, -8.3962, -7.2739, -8.7655, -8.7264, -5.9667, -7.1059,
        -6.9741, -6.9711, -7.8232, -8.9376, -6.2578, -6.3046, -6.8107, -6.7724,
        -8.3088, -8.4314, -6.7045, -7.1739], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3849, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9126, -7.8323, -9.4875, -5.9558, -7.3059, -8.2511, -6.5818, -8.7022,
        -9.5241, -6.3572, -6.8450, -7.7783, -7.3374, -7.1637, -7.9163, -8.7305,
        -6.6430, -6.1533, -7.4772, -7.3348], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.5145, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1149, -6.5428, -7.6924, -7.6196, -6.7428, -8.0166, -9.2785, -6.7093,
        -6.0580, -6.8119, -7.9302, -7.4475, -7.8019, -9.0975, -6.0990, -6.2106,
        -7.5999, -7.8573, -7.5267, -7.8964], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3527, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.5425, -9.7969, -5.8232, -7.0023, -7.0181, -6.7910, -7.6477, -8.8028,
        -6.4507, -7.1985, -7.1171, -7.2570, -8.9125, -9.7221, -5.9434, -6.4513,
        -7.5342, -7.3480, -7.8007, -9.5523], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.6356, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.0114,  -6.4340,  -7.1906,  -6.0513,  -7.2063,  -7.0444,  -7.8724,
         -7.6638,  -6.9431,  -8.8147, -10.1522,  -6.1992,  -6.6154,  -6.9763,
         -7.3609,  -6.5666,  -7.7013,  -9.0686,  -5.6776,  -7.1579],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.2354, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.0744, -8.5813, -6.4186, -6.1352, -6.9983, -6.0787, -7.4615, -6.9808,
        -8.5810, -9.7966, -6.0595, -7.1303, -7.1292, -6.4431, -8.1109, -8.7254,
        -6.4145, -6.3015, -6.8253, -6.5613], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2404, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4436, -6.8115, -6.8362, -7.1098, -7.4052, -9.1448, -5.7983, -6.6345,
        -7.8685, -7.5545, -7.5925, -7.9274, -9.4817, -6.0757, -7.0373, -7.2746,
        -6.9851, -9.1648, -8.1002, -6.2265], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3736, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.0508,  -6.6989,  -6.1814,  -8.0674,  -7.0207,  -7.9719, -10.4004,
         -6.6854,  -7.5091,  -7.9234,  -7.2101,  -8.7193,  -9.4649,  -7.1127,
         -7.4892,  -7.0840,  -6.9127,  -7.5160,  -8.3578,  -8.3921],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.6884, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.9282, -7.8164, -7.8999, -7.3821, -6.3147, -6.9643, -7.6644, -6.7969,
        -6.7027, -7.4921, -7.2474, -8.2386, -7.4517, -5.7675, -5.7806, -6.0774,
        -7.2540, -6.6464, -8.0146, -7.2263], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1333, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.2009, -6.9551, -6.4527, -8.1595, -7.5404, -7.9289, -8.3337, -8.4214,
        -6.3765, -6.3586, -7.9794, -7.2593, -8.5395, -9.4328, -6.7626, -7.3385,
        -7.1185, -7.8022, -8.0853, -8.9242], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7485, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.1868,  -7.7236,  -8.2250,  -8.6351,  -5.4258,  -7.3779,  -6.9584,
         -7.3793,  -9.0889,  -7.8432,  -6.5082,  -6.0716,  -7.2220,  -6.7657,
         -8.3396,  -7.0171,  -8.3735, -10.4585,  -6.0787,  -7.2972],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.4988, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.9879,  -9.3845,  -5.8971,  -6.0565,  -6.8845,  -6.7212,  -9.0136,
         -6.8885,  -8.4131, -10.0713,  -6.3407,  -6.8576,  -7.1758,  -7.1002,
         -8.7085,  -9.4106,  -6.1368,  -6.1860,  -6.7792,  -7.6950],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.4854, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.6648,  -7.2615,  -8.7891,  -7.3679,  -6.8015,  -6.1889,  -7.2621,
         -6.2103,  -8.5878,  -6.6683,  -8.4348,  -7.1230,  -8.4695, -10.4369,
         -5.8809,  -6.5635,  -7.2528,  -8.0159,  -7.4435,  -8.5352],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.5479, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4254, -6.3125, -6.3748, -7.0785, -7.6504, -7.3518, -8.8196, -7.2899,
        -8.6883, -7.7509, -6.1284, -7.3463, -6.6568, -6.9981, -7.9368, -9.1986,
        -6.7003, -6.5955, -7.3781, -6.9231], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3302, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.2448, -6.6025, -6.9679, -7.7493, -7.1923, -8.3073, -9.5285, -6.5843,
        -6.2878, -7.6814, -7.2217, -6.8946, -8.5538, -8.9487, -6.6139, -6.5686,
        -7.3460, -6.4461, -8.1504, -9.5571], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.5724, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.3240, -8.8935, -6.4357, -7.2983, -6.8565, -8.6135, -8.7277, -9.9172,
        -5.6045, -7.0621, -6.8856, -7.3598, -7.0102, -8.1983, -9.5642, -5.5538,
        -7.1088, -6.7143, -6.1805, -8.4782], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.5393, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.8565,  -8.5993, -10.2057,  -6.3208,  -6.8767,  -6.8091,  -7.3051,
         -8.0823,  -8.8860,  -6.1134,  -7.5634,  -7.0522,  -7.4360,  -8.4425,
         -9.7916,  -6.4720,  -7.0145,  -7.4643,  -7.6462,  -7.2400],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.6089, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1846, -7.6805, -7.3905, -7.3931, -8.4694, -8.8507, -5.4132, -7.1946,
        -7.1687, -7.3045, -9.0434, -8.1571, -6.2831, -7.3017, -7.4476, -6.3261,
        -7.7208, -9.5730, -6.0208, -6.8695], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3896, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7970, -8.3560, -8.5534, -7.2384, -6.7107, -7.4015, -7.1727, -7.6050,
        -9.0049, -6.3729, -7.5059, -7.1395, -6.6867, -7.8752, -8.8608, -6.1763,
        -6.7822, -7.6143, -8.1548, -8.1022], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.5555, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5038, -8.3322, -8.6402, -7.6272, -6.5388, -6.7077, -7.1096, -7.9000,
        -6.9944, -8.1469, -6.8703, -7.8559, -9.3499, -7.1481, -6.4285, -7.0212,
        -7.5854, -7.4882, -8.2581, -9.8217], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.6664, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2181, -7.4507, -7.4175, -8.0390, -9.5836, -5.8957, -7.0811, -7.2810,
        -8.0763, -8.5993, -9.9896, -7.6717, -6.2733, -6.5731, -7.5089, -7.5134,
        -7.9526, -6.6251, -7.5427, -8.9219], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.6107, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.9488,  -8.3948,  -7.0403,  -8.4834, -10.5542,  -5.9864,  -7.5356,
         -6.5801,  -7.4693,  -8.4240,  -9.3919,  -6.5653,  -6.7784,  -7.5665,
         -6.7043,  -7.1622,  -8.5061,  -8.2211,  -5.9354,  -6.9503],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.5599, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.1905,  -6.9863,  -9.3474,  -7.6154,  -7.0305,  -7.1894,  -5.9221,
         -6.1347,  -7.3364,  -7.1724,  -8.2957,  -6.4936,  -8.7163, -10.2142,
         -5.7508,  -6.5166,  -6.8114,  -7.0726,  -7.3745,  -8.1713],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.3671, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4811, -7.1597, -7.2757, -8.8298, -8.0015, -6.3686, -7.1414, -6.1041,
        -6.9985, -7.7505, -7.9966, -6.8939, -8.0688, -9.7354, -5.9208, -6.9010,
        -7.5786, -7.4122, -8.3180, -9.0470], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.5492, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9408, -6.9852, -7.8775, -9.3681, -7.4298, -6.7507, -6.8554, -7.9384,
        -7.2361, -8.6031, -8.8494, -5.5217, -7.3273, -6.9616, -7.1948, -9.3271,
        -8.0870, -7.1269, -7.5533, -8.1197], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.6027, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.3295,  -7.7085,  -7.7712,  -7.9296,  -6.7543,  -8.2313, -10.2178,
         -6.4773,  -6.8800,  -6.6626,  -6.5899,  -7.0387,  -8.0500,  -9.3212,
         -6.3337,  -6.8575,  -7.8581,  -7.1830,  -8.1479,  -9.0713],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.5707, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4911, -6.9194, -7.2799, -8.3237, -9.5273, -6.0851, -6.7120, -7.9518,
        -7.4712, -7.4958, -8.3673, -8.6236, -5.4286, -7.2723, -7.2484, -6.2827,
        -9.2056, -8.2174, -6.5408, -6.1836], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4314, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1943, -6.4440, -5.8021, -6.7063, -7.5531, -6.7642, -7.7659, -8.2107,
        -8.7581, -9.0420, -6.9704, -7.8018, -6.7004, -8.3693, -8.0603, -5.5447,
        -6.9141, -6.8416, -6.0919, -8.0342], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2785, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0044, -7.6185, -6.9253, -9.3285, -8.0870, -5.9332, -6.5229, -7.1218,
        -7.0043, -6.8102, -7.8479, -9.1988, -6.2500, -6.8973, -7.4878, -8.2452,
        -8.7510, -9.7977, -7.3128, -6.7107], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.5428, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0456, -6.3536, -7.3454, -6.8904, -7.7043, -8.1406, -6.2752, -7.2251,
        -6.8231, -7.4744, -8.4606, -9.1408, -7.0086, -6.5675, -7.5599, -6.9796,
        -7.4284, -8.2585, -8.5657, -5.4447], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2846, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5723, -7.4162, -7.8534, -9.1903, -6.3023, -7.3319, -7.5129, -6.8179,
        -8.4469, -9.6593, -6.2794, -6.3900, -7.3705, -7.2761, -7.2551, -7.7441,
        -9.5407, -5.3347, -6.4750, -7.4470], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4608, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0899, -7.1982, -6.9499, -7.8762, -9.7129, -6.2076, -6.7641, -8.0332,
        -6.7508, -8.3895, -9.9213, -6.5553, -6.5927, -6.0691, -7.4870, -8.8759,
        -9.7956, -7.8493, -7.3007, -7.1769], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.6298, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6810, -7.1760, -7.4342, -6.7645, -7.1605, -6.9157, -7.6315, -8.0443,
        -6.3655, -7.2297, -9.5004, -5.9228, -6.5592, -7.1003, -7.0993, -7.7320,
        -7.9671, -9.3217, -5.8067, -6.8213], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2617, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3475, -7.7381, -7.2988, -9.0374, -7.7548, -6.8118, -6.3960, -7.1811,
        -6.9985, -6.6437, -7.9400, -6.8088, -7.2402, -8.0327, -9.4910, -5.9093,
        -7.2950, -7.2584, -6.9283, -8.2092], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4160, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4843, -7.1796, -8.2963, -9.5498, -5.7005, -7.0603, -7.6966, -7.5790,
        -8.7884, -9.5106, -5.9464, -6.7775, -7.0502, -8.4170, -8.3971, -9.4623,
        -6.5490, -7.3040, -6.8838, -8.1386], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.6886, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.8035, -6.8297, -6.4103, -7.9527, -6.9780, -7.7577, -6.8508, -7.5780,
        -9.8505, -6.3422, -6.5828, -7.6396, -7.8187, -8.1356, -8.5377, -6.6285,
        -6.4488, -7.3600, -7.6589, -6.9386], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4051, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.1066, -6.7566, -7.9541, -9.8968, -6.0239, -7.5628, -6.7893, -8.1250,
        -7.2581, -7.3638, -7.4253, -6.6810, -6.5910, -6.0343, -7.1602, -6.8888,
        -6.2406, -7.4084, -7.2322, -7.1410], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2320, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.8458,  -8.6423,  -9.5844,  -5.9601,  -7.0265,  -6.9694,  -7.6949,
         -6.3775,  -8.5031, -10.0895,  -6.1617,  -6.5324,  -7.8892,  -7.8457,
         -7.5018,  -9.6585,  -6.0850,  -6.5835,  -8.1140,  -7.5770],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.6321, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9998, -6.3807, -7.0596, -8.4923, -8.0360, -8.3251, -9.7287, -6.0843,
        -6.7263, -7.8587, -7.1005, -7.9727, -8.9725, -6.5473, -7.0049, -6.6169,
        -6.4953, -7.4611, -8.2017, -6.9520], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4008, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6201, -7.7301, -7.4008, -7.9793, -9.4926, -6.0916, -7.2730, -7.4376,
        -6.3607, -8.3442, -9.8170, -6.6555, -6.7036, -6.7536, -6.6206, -8.1695,
        -8.4399, -8.2016, -8.3822, -8.8548], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.6664, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3861, -7.3856, -7.5581, -7.1715, -7.9860, -8.9851, -6.1953, -6.3831,
        -7.9145, -7.7241, -7.9131, -9.6291, -5.9732, -6.6661, -7.2599, -6.6386,
        -8.5037, -9.6543, -6.0205, -6.9794], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3964, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5428, -7.3283, -6.5240, -7.8529, -9.4330, -5.8839, -6.9621, -7.7170,
        -6.7449, -8.4141, -7.7273, -6.7075, -6.1153, -6.6865, -7.4879, -7.3353,
        -8.0444, -6.8767, -8.1734, -9.9214], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4239, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.9273, -6.8168, -9.0589, -7.9255, -6.2758, -7.4010, -6.5783, -7.6135,
        -7.0053, -8.7153, -8.6581, -5.5772, -7.2721, -7.0206, -6.6475, -8.9564,
        -8.2694, -6.3184, -6.4847, -7.7410], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4132, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7090, -7.4682, -7.4348, -8.5057, -9.5433, -6.1022, -6.6635, -7.0848,
        -7.8058, -6.1645, -8.2726, -8.3373, -6.4736, -6.8401, -7.7885, -7.7219,
        -7.6731, -9.2951, -5.7933, -6.6215], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4649, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.9583,  -9.4085,  -5.3603,  -6.4230,  -7.8681,  -7.2382,  -8.1944,
        -10.0850,  -6.2425,  -6.9126,  -7.5545,  -7.2224,  -7.2922,  -7.7180,
         -9.0013,  -5.8483,  -7.0804,  -7.2301,  -6.3074,  -8.2176],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.4582, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9713, -7.0308, -8.2105, -7.8784, -8.3157, -8.3558, -5.8525, -7.3747,
        -8.4361, -7.4925, -6.7971, -6.6266, -7.1184, -8.1853, -8.0756, -6.5828,
        -7.0695, -6.0979, -6.6873, -9.0089], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4084, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.2484,  -8.1425,  -7.5601,  -8.0807, -10.6967,  -6.5513,  -7.2166,
         -7.2482,  -8.4269,  -7.8944,  -9.1114,  -6.1430,  -6.8142,  -7.0272,
         -7.1179,  -8.2691,  -9.9454,  -6.4272,  -6.5453,  -7.4302],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.6448, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7597, -8.5675, -8.2504, -6.4283, -6.2202, -7.8605, -7.5739, -6.1755,
        -7.8978, -9.6759, -6.1829, -7.3568, -6.7148, -6.4270, -8.5778, -8.2250,
        -6.8576, -6.8872, -6.8020, -7.3356], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3388, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2353, -7.0781, -7.2095, -6.6986, -8.6181, -9.0991, -6.4062, -6.1219,
        -6.3841, -5.5828, -8.3197, -8.9987, -7.0315, -7.2712, -6.9906, -6.6207,
        -8.0325, -8.9714, -6.6841, -6.8477], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2601, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4879, -6.8360, -7.2753, -7.9945, -8.4064, -8.3939, -6.3631, -6.9429,
        -7.2736, -7.0945, -8.3960, -9.4040, -5.7953, -6.8547, -7.0716, -8.0402,
        -6.8294, -7.7706, -9.4103, -6.2544], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4447, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1674, -6.4805, -6.9645, -8.0058, -7.4119, -8.4890, -8.7820, -5.6092,
        -7.2629, -7.3451, -7.4733, -9.4939, -8.2175, -6.3695, -6.2278, -6.0082,
        -6.4959, -7.0142, -8.0271, -7.8328], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2839, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5423, -8.1654, -7.2527, -8.4349, -8.0775, -6.0932, -7.2754, -7.0741,
        -6.7665, -8.4585, -8.9774, -6.5344, -6.0809, -6.6109, -6.8291, -8.2798,
        -6.6475, -6.5344, -8.5412, -8.1858], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4181, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1379, -7.5387, -6.7502, -6.0178, -8.2279, -9.5494, -6.3387, -7.1608,
        -7.2015, -6.3098, -7.2175, -8.1837, -7.8923, -8.3821, -9.9953, -6.5049,
        -6.6923, -6.6765, -7.4235, -8.0848], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4143, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.0254, -7.1248, -8.1906, -9.0331, -6.9947, -6.0963, -7.6716, -7.2750,
        -7.8053, -9.3833, -6.2854, -6.7958, -7.5770, -7.5793, -7.9394, -9.2155,
        -6.4323, -6.4529, -7.5880, -7.8144], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.5640, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4718, -7.8584, -8.4607, -6.8074, -6.6683, -6.2087, -6.6080, -8.5241,
        -6.9534, -8.0043, -9.3291, -6.3581, -7.7592, -7.3514, -8.8646, -8.5466,
        -8.4946, -6.3650, -8.2182, -6.8369], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.5844, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1407, -6.2044, -6.8222, -7.8702, -7.3616, -7.8960, -9.6934, -5.2956,
        -7.2425, -6.5631, -7.4838, -6.4569, -8.6243, -8.3083, -6.8139, -7.1230,
        -7.3476, -6.0808, -7.9525, -7.1627], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2222, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0585, -6.9255, -6.3577, -9.4876, -8.3687, -6.1040, -7.3323, -7.2478,
        -7.4977, -8.3308, -8.2503, -6.3895, -6.5280, -7.7641, -6.6067, -8.2550,
        -8.7871, -6.8280, -6.6829, -6.8885], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3845, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.7565, -9.2531, -6.5069, -7.1301, -7.1816, -7.4425, -6.8177, -8.0222,
        -9.6680, -6.0956, -6.5669, -7.5115, -6.5298, -7.1184, -8.0575, -9.4018,
        -5.7357, -6.6869, -7.8798, -7.4255], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4894, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.4685,  -9.3205,  -8.2842,  -6.9818,  -6.6948,  -7.3474,  -7.5584,
         -7.2832,  -8.3954,  -8.9917,  -5.6382,  -6.9844,  -7.3646,  -6.9653,
         -8.0204, -10.1541,  -6.0391,  -6.5830,  -7.3966,  -7.4272],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.5450, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7628, -7.1123, -6.7385, -6.6552, -8.9684, -7.7759, -6.5219, -6.2184,
        -7.0966, -7.4693, -6.9339, -7.5943, -8.1279, -5.4998, -6.8793, -6.8438,
        -7.0204, -7.8792, -9.5846, -5.7592], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1221, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3170, -6.5715, -5.1000, -7.3479, -6.8492, -7.9655, -9.2268, -6.6265,
        -6.0064, -6.5335, -6.6612, -6.3354, -8.0247, -6.2754, -7.0632, -6.9046,
        -7.1394, -7.0567, -8.0825, -9.6247], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1356, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5341, -7.8912, -6.9496, -8.2734, -6.7032, -8.4694, -9.5993, -6.4059,
        -7.0491, -6.6886, -7.5003, -8.9299, -7.4271, -6.6693, -7.1554, -7.2706,
        -5.8512, -7.6124, -6.7147, -7.7865], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3741, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3689, -6.8614, -6.9327, -7.0350, -8.6134, -7.9146, -5.7217, -6.8807,
        -6.8978, -6.8708, -7.9504, -9.3423, -6.0923, -6.6113, -7.6044, -7.2418,
        -6.5356, -8.3134, -7.6763, -6.4358], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2450, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4728, -5.9188, -6.9821, -7.6037, -7.3766, -7.3828, -7.3137, -6.9255,
        -7.6448, -6.6807, -6.9349, -8.6514, -7.9614, -7.3151, -7.0473, -6.2490,
        -6.6560, -6.4901, -8.3325, -7.1022], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1521, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.7779, -6.6456, -7.2477, -6.4655, -6.6774, -8.2151, -8.3129, -6.3709,
        -6.6869, -7.0385, -6.7310, -8.8921, -9.2385, -6.5752, -6.6389, -7.2890,
        -7.1396, -7.3244, -8.0436, -8.5768], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4444, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6511, -6.6798, -7.0879, -6.4265, -7.9369, -8.4330, -7.0593, -6.1123,
        -6.7043, -7.4270, -7.6282, -8.4446, -8.2922, -6.3476, -6.6841, -6.6488,
        -6.7633, -6.8816, -7.4117, -8.1625], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1391, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.1923, -9.4691, -6.6172, -7.5766, -7.7315, -7.4911, -7.2113, -7.4384,
        -8.7676, -6.0234, -6.4746, -7.7826, -7.6589, -7.1255, -8.0163, -9.0935,
        -6.8414, -6.0782, -6.1900, -7.1340], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4457, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2484, -8.2688, -7.9909, -6.2926, -6.4997, -6.9451, -7.1330, -7.9190,
        -8.5705, -6.4455, -6.8507, -6.6964, -7.7585, -7.7743, -8.4187, -6.2418,
        -6.3447, -7.0344, -7.2890, -6.6965], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2209, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.6664, -6.4119, -7.1414, -7.2075, -7.0261, -8.6752, -7.5495, -6.1232,
        -6.0805, -6.9810, -6.8504, -8.0391, -8.3828, -5.5705, -6.6236, -6.8335,
        -6.9073, -8.3921, -7.6877, -6.1059], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1628, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2071, -6.7276, -7.3443, -8.1825, -7.3075, -7.3520, -6.4048, -5.7764,
        -7.2586, -6.3050, -7.5141, -7.1709, -7.8362, -7.4253, -5.9866, -6.1263,
        -7.3397, -6.4527, -7.3844, -7.3030], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9703, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1743, -6.9205, -6.7609, -8.2880, -9.3744, -6.1874, -6.8132, -6.6163,
        -7.3660, -8.5848, -7.8003, -6.3927, -6.2776, -5.9550, -6.4132, -6.9354,
        -8.2651, -8.2694, -7.6005, -6.8230], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2409, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.7526, -5.9867, -6.4530, -7.1022, -7.5319, -7.0582, -8.2383, -8.9506,
        -5.7050, -7.1389, -6.8551, -6.4811, -8.6301, -8.5744, -6.3843, -6.2604,
        -6.5041, -7.5109, -6.9589, -8.0715], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2574, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2175, -7.5130, -7.7297, -8.2824, -8.4267, -6.6383, -5.9800, -6.2517,
        -7.0984, -7.1727, -8.3901, -8.1123, -5.9434, -6.2650, -6.7827, -6.1831,
        -8.4958, -8.9187, -6.0245, -6.8092], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2118, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.5707, -9.0410, -6.2233, -7.2993, -6.9979, -7.3082, -8.4297, -8.6096,
        -6.2867, -7.4027, -6.4959, -7.1201, -7.1295, -8.2629, -7.0024, -8.5907,
        -9.5451, -5.6856, -6.8970, -6.7591], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4829, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.4278, -7.9667, -6.5681, -6.9220, -6.6564, -7.5091, -7.2806, -7.9452,
        -6.9409, -8.4372, -9.6848, -5.7138, -7.0118, -6.8173, -7.2403, -8.7661,
        -7.7818, -6.6273, -6.3392, -7.1470], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3892, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3655, -7.6203, -8.4925, -6.0996, -6.9293, -6.9448, -6.1541, -8.2733,
        -8.7691, -6.1759, -6.5364, -7.6255, -7.7223, -7.0171, -8.0514, -9.3074,
        -6.7031, -6.2593, -6.2602, -6.9386], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2623, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.8060, -7.5067, -6.5846, -8.2235, -9.1072, -6.6099, -6.2140, -6.9475,
        -6.9644, -7.8582, -8.4310, -5.8789, -6.9483, -6.4345, -7.2599, -7.6453,
        -8.3934, -6.2137, -7.0153, -6.5535], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2298, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7280, -7.4005, -8.9896, -8.0796, -7.0522, -6.3441, -7.1167, -7.3785,
        -6.8278, -8.2512, -6.4657, -8.4651, -9.6116, -6.4037, -7.1424, -6.4912,
        -7.1256, -8.6076, -7.3179, -6.7725], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4286, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9556, -8.8270, -7.7995, -6.6088, -5.9251, -5.7854, -7.7705, -8.3316,
        -8.9037, -7.5544, -6.8151, -6.6874, -7.5798, -6.9585, -8.4153, -8.4046,
        -6.1042, -6.8306, -6.6291, -6.4637], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2675, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8021, -6.3215, -7.5105, -9.0163, -7.8293, -7.0424, -6.8919, -7.9156,
        -6.5959, -6.9953, -7.5453, -6.5682, -8.4768, -9.7538, -5.8225, -6.7449,
        -6.7277, -7.4430, -7.3816, -7.7517], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3568, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.1800, -7.6105, -8.5494, -9.5299, -6.8469, -6.6760, -6.8708, -6.3173,
        -7.7211, -8.5523, -6.3526, -6.7503, -6.6274, -6.5836, -8.1899, -9.1180,
        -6.2230, -6.2869, -7.1059, -7.5728], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3832, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6437, -7.0321, -7.4813, -8.2339, -5.6636, -6.6874, -6.9081, -6.2488,
        -8.4541, -9.0048, -5.8876, -6.3352, -6.6247, -7.3654, -6.9522, -7.9192,
        -6.7315, -8.3427, -9.6392, -6.2710], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2213, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6318, -6.6916, -6.9615, -8.3959, -8.6127, -6.4177, -6.9693, -6.6988,
        -6.8714, -7.8786, -8.8157, -5.9355, -7.2091, -7.1022, -6.9564, -8.7975,
        -7.7233, -5.9151, -6.2084, -6.5076], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1650, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.8431, -6.0963, -6.8555, -6.9422, -6.4184, -8.3102, -8.7656, -6.1095,
        -6.6889, -7.0152, -7.0782, -8.0601, -8.4572, -5.8775, -6.6333, -6.7028,
        -6.9779, -6.8588, -8.0244, -8.6019], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2659, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9569, -6.4743, -6.7703, -8.9322, -7.6454, -6.5495, -6.8079, -6.1658,
        -7.6423, -8.7886, -8.3736, -6.3731, -6.6291, -6.3758, -6.6886, -7.5916,
        -8.3407, -6.9149, -6.4147, -5.9195], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1177, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4615, -6.6046, -7.4567, -7.5877, -7.3381, -8.4880, -8.7402, -6.1893,
        -6.6084, -6.6082, -7.2640, -7.7691, -8.4847, -5.9769, -6.8205, -7.6288,
        -6.7795, -7.3410, -7.7936, -8.4831], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3212, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9346, -6.0569, -6.9722, -6.9043, -7.8475, -7.8673, -7.6628, -6.8044,
        -6.8074, -6.4072, -8.4467, -9.0326, -6.3025, -6.6557, -7.7923, -6.5719,
        -6.7642, -7.8614, -7.8178, -8.5332], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3021, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0231, -8.0923, -8.6084, -5.8146, -7.0315, -7.4708, -7.4674, -7.0627,
        -8.1920, -9.2565, -5.9860, -6.6657, -6.8640, -7.5305, -6.9119, -7.8285,
        -8.5436, -5.6278, -6.5132, -6.5679], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2529, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5858, -6.7858, -6.4256, -7.1286, -6.2193, -8.2094, -7.1908, -6.4529,
        -7.0675, -6.8926, -7.3474, -8.2861, -8.9239, -5.8245, -6.7634, -7.2010,
        -7.7531, -7.0059, -8.1204, -8.6466], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1915, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4470, -7.4686, -7.7975, -7.1877, -7.9862, -8.4999, -5.6230, -6.2159,
        -7.0578, -6.8494, -8.2001, -8.6205, -6.7049, -6.1893, -7.5065, -6.8365,
        -8.2945, -6.7647, -8.2027, -8.5801], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3516, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0147, -7.4637, -7.3163, -7.1218, -8.2644, -8.8650, -6.0423, -6.2134,
        -6.3570, -7.1583, -7.0392, -8.1320, -8.5775, -6.0033, -5.9467, -6.4453,
        -6.7037, -6.9782, -7.8869, -8.5411], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2035, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.9060, -8.8840, -6.2852, -6.2960, -7.3328, -6.9506, -6.5561, -8.3971,
        -7.6418, -6.3929, -6.2551, -5.5905, -7.4810, -8.4886, -8.4071, -7.3698,
        -7.1448, -6.9085, -7.8511, -8.2830], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3211, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5561, -7.3246, -6.7774, -7.4570, -7.5975, -7.4588, -7.6855, -7.6063,
        -7.1071, -5.9970, -8.3633, -7.1457, -7.8229, -8.9023, -6.4375, -6.8582,
        -6.8919, -7.2641, -8.6785, -7.7748], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3853, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.1341, -6.2605, -6.2532, -7.0235, -7.4730, -7.8651, -8.4772, -5.5090,
        -6.3281, -7.0256, -6.3464, -8.3497, -9.4936, -6.2222, -6.6025, -6.9225,
        -7.7757, -7.1006, -7.7666, -9.1885], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3559, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.7640, -5.8435, -7.1173, -6.7798, -5.6975, -8.0999, -8.6856, -6.9408,
        -7.1699, -7.2009, -7.0275, -6.1946, -6.8352, -7.8033, -6.3957, -7.9750,
        -9.2130, -6.5601, -7.1768, -6.5878], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2534, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3686, -7.1736, -8.6020, -7.5723, -6.6615, -7.4000, -8.1083, -6.7972,
        -7.8928, -6.7357, -8.3029, -9.3976, -5.8510, -6.5948, -6.6131, -6.3751,
        -7.9804, -8.8430, -6.4996, -6.1506], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3460, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.7444, -5.8453, -6.3056, -6.9280, -7.1103, -6.3858, -8.0222, -9.6463,
        -6.4061, -6.2207, -6.6219, -7.6143, -7.2022, -8.2271, -8.7787, -5.6820,
        -6.4748, -6.4983, -6.5357, -8.2811], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1765, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3140, -6.3859, -7.5094, -7.7815, -7.3820, -7.9124, -8.8146, -6.5279,
        -6.4291, -6.6459, -6.8220, -8.3941, -9.3174, -6.4357, -6.6215, -6.5575,
        -7.4853, -7.9028, -8.6302, -8.5329], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4201, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5637, -7.3806, -6.0388, -7.9475, -8.4873, -7.9048, -6.9459, -8.5684,
        -7.9223, -5.8667, -7.0392, -6.7064, -6.6867, -8.4897, -8.4355, -6.6716,
        -6.1875, -6.2598, -7.3161, -8.0813], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2750, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2436, -6.2293, -6.6961, -8.5216, -7.5560, -8.6082, -8.8040, -6.3411,
        -6.6602, -7.2845, -7.5837, -8.6103, -9.2061, -6.9794, -6.7037, -6.1999,
        -6.4332, -7.2588, -8.1351, -8.1092], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4082, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6618, -7.0959, -7.4126, -7.6230, -8.3794, -6.2009, -6.1793, -6.9438,
        -6.3068, -7.5672, -8.6189, -9.2635, -6.0933, -6.2868, -6.8714, -6.8078,
        -7.4842, -7.6002, -8.5814, -5.9443], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1961, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.9845, -5.7788, -7.2206, -6.9535, -7.1899, -7.8191, -7.2353, -8.1584,
        -8.9298, -6.3918, -6.3713, -7.0848, -7.2248, -7.5295, -7.9296, -8.3488,
        -5.8012, -6.2426, -6.9512, -7.4539], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2300, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2776, -8.9232, -7.7430, -6.6294, -6.3053, -6.9080, -7.6267, -7.7588,
        -7.7381, -6.3710, -8.0692, -9.3081, -6.2588, -6.8467, -7.2240, -6.7735,
        -6.1997, -8.2157, -7.9714, -6.2218], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3185, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7459, -7.6808, -8.1652, -8.6436, -6.9817, -6.7240, -7.1713, -7.8538,
        -8.0379, -7.8959, -6.8617, -8.3692, -9.2248, -6.1669, -6.9815, -7.0684,
        -6.1564, -8.0149, -8.6967, -6.8347], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.5138, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1336, -6.4116, -6.6666, -7.5072, -7.2306, -7.6467, -8.1822, -6.1859,
        -6.6738, -6.9817, -6.2859, -8.3564, -9.2634, -6.2200, -6.9502, -6.9463,
        -7.1688, -7.3116, -8.0432, -9.1131], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2639, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9649, -6.0664, -7.1072, -6.4358, -8.4143, -6.5390, -6.9337, -8.3806,
        -9.1471, -6.3426, -6.7000, -7.3114, -7.0666, -8.4362, -8.6964, -6.5802,
        -6.8194, -7.1963, -7.3524, -7.0336], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2762, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4374, -7.7082, -7.9297, -8.6244, -6.2150, -6.0727, -7.2643, -7.3027,
        -7.5513, -8.3834, -6.5529, -6.7898, -6.8995, -6.8363, -8.1650, -9.2606,
        -6.1172, -6.3141, -6.5785, -7.2658], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2634, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.8821, -8.9762, -6.0410, -6.7531, -6.7834, -7.4824, -7.0669, -7.6804,
        -8.3981, -5.9452, -6.2675, -6.5574, -6.9360, -8.2001, -9.2547, -5.9471,
        -6.3096, -6.6798, -6.1822, -7.6468], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1495, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9975, -7.0251, -6.8287, -8.0264, -8.6056, -6.0257, -6.7770, -6.7231,
        -6.5848, -8.2886, -8.8961, -6.0843, -5.9205, -6.9699, -6.2762, -8.8417,
        -7.0085, -8.8159, -9.8426, -6.0481], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3293, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.1705, -7.3509, -8.3016, -7.5743, -5.6587, -6.8989, -6.2815, -7.0490,
        -7.8654, -8.4057, -6.4321, -5.8518, -6.2217, -6.9452, -6.6780, -8.0389,
        -8.6466, -5.3137, -6.8088, -6.5046], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0499, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.2470, -5.8797, -7.1201, -6.9571, -6.5307, -7.8796, -8.3771, -7.5412,
        -6.0000, -6.8563, -7.2982, -7.2702, -8.3257, -6.7920, -8.4562, -9.7073,
        -6.0289, -6.7834, -6.9667, -6.1866], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2602, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2614, -6.4879, -7.9865, -8.2257, -8.2452, -7.1758, -7.1005, -6.5127,
        -7.1324, -7.9803, -8.3719, -6.4734, -6.5491, -6.5855, -7.5870, -8.3923,
        -8.9239, -6.2161, -6.4868, -6.5730], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2634, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0455, -7.2330, -7.9522, -8.2895, -6.1542, -6.6137, -6.3808, -6.8680,
        -9.0759, -8.1516, -6.5624, -6.3958, -7.4830, -7.5474, -6.5120, -7.4628,
        -8.7921, -5.6211, -6.8640, -6.5688], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1787, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4112, -6.1016, -7.1671, -6.6990, -6.8667, -7.6092, -9.1124, -6.3202,
        -6.4730, -7.1184, -7.4327, -6.9591, -8.1164, -8.6972, -5.9637, -7.1462,
        -6.5551, -6.5179, -8.1342, -8.4623], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1932, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1547, -7.6199, -8.3033, -8.6667, -6.8691, -6.3283, -7.0135, -6.6001,
        -7.0533, -8.0856, -6.1175, -6.0383, -6.6224, -7.7620, -6.7530, -7.6107,
        -8.6617, -6.4559, -6.1621, -7.0984], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1488, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3401, -6.0943, -7.0963, -8.2627, -8.4435, -5.9904, -6.8037, -6.5833,
        -6.9519, -8.0177, -8.9549, -6.1142, -6.2479, -7.6785, -7.9350, -8.3424,
        -8.3661, -7.6140, -6.5511, -7.4763], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2932, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2449, -7.6910, -8.6325, -6.0675, -6.2685, -6.8122, -6.5123, -6.5637,
        -8.3743, -7.9528, -5.9711, -6.2430, -7.3857, -7.0901, -7.5569, -8.4267,
        -6.3209, -6.3751, -6.5913, -6.9096], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0495, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.7509, -5.7890, -6.4839, -6.9994, -7.1524, -7.8468, -8.1241, -8.2755,
        -5.9970, -5.8174, -6.9959, -7.5563, -7.9351, -8.4679, -6.3624, -6.1275,
        -6.7450, -7.4457, -6.7314, -7.9438], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2274, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7681, -6.8073, -6.9045, -8.3652, -9.1042, -5.9577, -7.2157, -6.3548,
        -6.5312, -8.7837, -7.6998, -6.4401, -6.8710, -6.9096, -6.9837, -7.7286,
        -8.6020, -6.3303, -6.3398, -6.4103], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1554, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8767, -7.3379, -7.1486, -8.6461, -8.6105, -6.8674, -6.9381, -6.4561,
        -6.2904, -7.4867, -8.2990, -8.3885, -5.3956, -6.9121, -6.2939, -6.5980,
        -8.9435, -9.3729, -6.3216, -6.5238], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2854, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5098, -6.6155, -7.5737, -7.1679, -7.0370, -7.7282, -8.4693, -6.0316,
        -6.0100, -6.3595, -7.2602, -7.2553, -7.7395, -8.6397, -6.1105, -6.0133,
        -7.1549, -7.3262, -7.9165, -8.1163], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1517, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4133, -6.4186, -6.5753, -7.2489, -7.0990, -7.7695, -8.8019, -5.8525,
        -6.2653, -6.5569, -6.0878, -7.8720, -9.1040, -6.3056, -6.1614, -6.7684,
        -6.4369, -6.2578, -7.6238, -8.8772], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0248, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.0719, -8.9252, -6.4956, -6.3218, -6.0751, -6.5957, -7.4135, -8.1365,
        -7.4240, -8.1838, -8.5751, -6.2370, -7.1201, -6.9237, -7.1714, -7.0671,
        -7.4426, -9.5312, -6.5705, -7.1089], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3695, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0663, -6.4621, -6.5770, -7.7339, -6.7411, -7.2085, -7.8631, -8.3636,
        -6.0397, -6.5454, -7.0412, -6.8454, -8.1172, -8.5348, -6.3761, -6.0850,
        -7.3342, -6.3455, -7.6604, -6.9598], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0950, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0778, -7.1642, -7.0823, -8.5008, -5.9391, -7.0853, -6.6579, -6.4274,
        -6.9200, -8.5435, -7.9525, -5.4293, -6.2261, -6.7711, -7.4051, -8.0266,
        -8.3192, -6.4171, -6.7462, -6.5181], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0605, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2281, -6.4608, -6.7107, -8.3338, -8.4591, -6.5335, -7.1122, -6.2241,
        -8.7432, -8.9274, -6.6933, -5.8325, -7.2449, -6.1745, -6.0109, -6.4195,
        -7.5773, -7.3207, -8.1234, -6.9022], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1016, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.6530, -7.6340, -7.1492, -7.4410, -7.1590, -6.2429, -7.6556, -7.8161,
        -6.4777, -6.8511, -6.2892, -7.3034, -8.1386, -8.1158, -6.1517, -6.5213,
        -6.6962, -7.0946, -7.9909, -8.4315], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2906, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4182, -6.2024, -6.5485, -7.2256, -8.2610, -8.6656, -6.2113, -7.2901,
        -7.1729, -7.2333, -8.6920, -8.8907, -6.2409, -7.0915, -6.5290, -6.3348,
        -8.5367, -8.4928, -6.8333, -6.9196], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2895, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9414, -6.9195, -6.5140, -8.3145, -9.3573, -6.3537, -6.4925, -6.6494,
        -6.8594, -7.8146, -9.0585, -5.8726, -6.3668, -6.9740, -6.9874, -7.1583,
        -7.8508, -8.6394, -5.8508, -6.2499], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1612, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8270, -7.1839, -7.9580, -8.6789, -6.2388, -6.7122, -6.9725, -7.9041,
        -8.4605, -8.9656, -6.5348, -7.0935, -7.3403, -7.0154, -6.8534, -7.3603,
        -6.6151, -8.4265, -7.6619, -8.6007], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4702, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1761, -6.4319, -6.5380, -6.1855, -6.4409, -7.5988, -7.9029, -8.2178,
        -6.8790, -8.6899, -8.1954, -6.1075, -7.1529, -6.7986, -6.4885, -8.4918,
        -7.7023, -6.7271, -6.5603, -6.4985], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1392, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7175, -6.8665, -7.9527, -7.8034, -6.7447, -7.6116, -8.1991, -6.0551,
        -7.0784, -6.6745, -6.9366, -8.7566, -8.5181, -6.7796, -6.4893, -6.5814,
        -7.1867, -6.1409, -7.8946, -7.7830], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2385, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0774, -6.5760, -7.0670, -6.9409, -7.0771, -7.8002, -8.8650, -5.1067,
        -6.7553, -6.6179, -6.6537, -8.6172, -8.5275, -6.2871, -7.0648, -6.8407,
        -7.6282, -6.5334, -7.6778, -8.8184], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1766, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2351, -6.7624, -6.4297, -6.2327, -8.0251, -9.2646, -6.1711, -6.6926,
        -6.4253, -7.5825, -8.5452, -8.3854, -7.0224, -6.3221, -6.9335, -7.4843,
        -7.8143, -7.9530, -6.8006, -8.6207], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2851, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.6356, -6.5781, -8.1440, -9.6986, -5.9836, -6.5469, -7.0468, -6.4967,
        -6.4946, -7.6693, -9.2141, -6.4323, -6.8163, -5.9583, -7.6851, -8.8469,
        -7.8155, -6.1171, -6.1960, -7.3048], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2840, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.1628, -6.0062, -6.8792, -6.8410, -7.4680, -9.1153, -7.5071, -6.6677,
        -6.8071, -6.7225, -7.2859, -7.6786, -7.5057, -6.6182, -6.6927, -8.4227,
        -9.4376, -6.1063, -6.6197, -6.9389], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2742, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1493, -7.1834, -7.9329, -8.4995, -6.4981, -6.9453, -6.4224, -7.5802,
        -8.3429, -8.8787, -6.2850, -6.3958, -6.4487, -6.6448, -7.1161, -8.1047,
        -8.7568, -5.3855, -6.0810, -6.7994], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1725, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.8179, -6.1616, -7.1084, -7.1090, -6.8308, -8.5463, -7.4770, -7.5182,
        -7.7847, -6.7889, -7.6176, -7.2885, -7.9725, -9.1420, -5.4026, -6.8619,
        -6.3938, -6.5762, -7.5554, -8.6264], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4290, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8297, -7.7862, -9.0877, -6.0301, -6.2662, -7.2010, -7.0872, -8.4387,
        -6.8125, -7.8915, -9.2977, -6.4629, -6.7209, -7.6227, -7.5008, -6.5219,
        -8.0691, -8.8023, -6.1102, -6.5165], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3528, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3908, -8.2399, -7.9067, -6.2158, -6.5547, -6.8622, -6.9548, -8.1165,
        -9.1440, -6.5239, -6.5826, -6.9584, -7.3132, -7.1888, -7.0599, -8.0457,
        -8.1864, -6.8504, -7.1656, -7.5417], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3401, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2601, -6.6214, -6.9003, -8.1237, -9.1771, -6.3146, -7.1168, -7.2232,
        -7.0338, -7.1243, -6.7965, -7.8031, -8.4085, -5.5560, -6.3386, -6.5897,
        -6.4968, -6.9848, -7.8621, -8.5181], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1625, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3093, -6.5590, -8.7600, -7.9075, -6.5700, -6.9332, -6.6769, -6.7572,
        -8.3123, -9.4878, -6.4725, -6.5791, -6.2696, -7.5767, -7.7521, -8.0715,
        -6.9974, -8.2255, -7.5472, -5.5607], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2663, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2354, -6.8684, -6.5549, -7.7850, -9.2363, -5.6543, -6.8673, -6.3296,
        -7.3128, -8.9674, -7.4712, -6.0135, -5.9753, -6.1267, -5.5797, -7.4310,
        -7.5212, -8.1900, -6.9357, -8.4835], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0770, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9562, -6.9472, -6.7223, -6.3516, -8.3248, -7.7261, -6.8002, -6.6624,
        -6.7700, -7.1471, -8.3631, -8.2060, -5.8577, -6.9102, -6.6438, -7.3022,
        -8.6654, -7.6283, -6.6461, -6.8729], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1252, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4653, -6.1098, -7.7960, -8.7165, -6.8733, -6.8488, -6.5072, -7.1076,
        -7.7949, -8.5536, -6.0839, -6.3932, -7.3165, -6.7136, -8.4512, -9.1647,
        -5.9273, -6.7006, -6.8841, -7.1962], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1802, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.1062, -8.3077, -8.0553, -6.4735, -6.0222, -6.4679, -6.7730, -7.8333,
        -8.8137, -5.2467, -6.5705, -6.0029, -6.8764, -9.0401, -7.6057, -6.5924,
        -6.3800, -5.8105, -6.5157, -6.2786], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9886, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.4509, -9.1254, -6.1554, -6.5360, -6.3218, -7.1759, -6.4414, -7.8298,
        -7.7214, -8.5374, -9.1543, -6.1963, -6.9585, -7.1876, -7.1930, -8.6762,
        -7.3814, -6.6551, -6.2697, -7.0520], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3510, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.7855, -6.3483, -6.2344, -6.6978, -7.3883, -7.1761, -8.3535, -9.1761,
        -5.8854, -7.2444, -7.4250, -7.7396, -6.6651, -7.5594, -8.9772, -6.1363,
        -6.4914, -7.0062, -6.8836, -8.4165], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3295, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2557, -7.5669, -6.7777, -8.1461, -6.4920, -6.8841, -8.5338, -9.2467,
        -5.8648, -6.8765, -7.0873, -7.1061, -8.3438, -8.9604, -7.2729, -6.4291,
        -6.4231, -7.3054, -7.4094, -7.6682], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3325, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5578, -7.9724, -8.9134, -6.3957, -5.9298, -5.8286, -6.2472, -6.3019,
        -6.8084, -7.5441, -7.5790, -7.0712, -6.5765, -7.0076, -7.2097, -8.6610,
        -8.4909, -6.7099, -6.0572, -6.4077], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0635, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6667, -6.0345, -7.3141, -7.7132, -7.2073, -7.8424, -8.5734, -6.7435,
        -6.0871, -6.5304, -6.4320, -7.6467, -8.0705, -6.0249, -6.7353, -6.8663,
        -6.2306, -8.2647, -8.8812, -6.4607], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1163, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1808, -7.0879, -8.8893, -7.5799, -6.2919, -6.1285, -6.4041, -7.4625,
        -6.8517, -7.9771, -6.7522, -8.6494, -9.2792, -5.6235, -6.8642, -6.3379,
        -7.2546, -8.3479, -7.0761, -7.1588], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2599, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6325, -6.5203, -6.7872, -6.8042, -8.0571, -8.6950, -6.5721, -6.3608,
        -7.1423, -7.7359, -6.4328, -8.0355, -8.5650, -6.3915, -6.7534, -6.9361,
        -6.4929, -8.5427, -8.4101, -7.1064], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2487, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.7806, -9.4927, -6.1662, -7.2692, -6.9347, -6.8947, -8.5187, -8.5296,
        -6.5480, -6.1061, -6.9978, -7.7102, -6.5361, -7.7772, -8.2228, -6.7786,
        -6.6374, -6.4252, -6.6072, -8.3759], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3654, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1445, -6.1950, -6.9606, -7.3157, -7.2537, -7.7595, -6.9591, -8.1048,
        -9.3011, -5.8625, -6.2725, -6.3481, -7.0638, -8.1333, -8.6536, -5.8846,
        -6.2166, -7.2028, -7.4409, -7.0947], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1584, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7088, -6.8592, -6.8650, -7.0597, -8.1622, -8.2407, -8.5354, -5.8680,
        -6.5178, -6.6737, -6.3979, -7.3444, -8.9490, -6.2818, -6.7515, -6.8755,
        -6.6322, -6.9866, -7.5103, -9.2482], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1734, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3652, -7.4345, -7.9212, -6.4062, -6.5160, -8.0154, -9.0899, -5.8261,
        -6.2243, -6.7294, -6.4596, -8.1221, -8.4754, -6.6125, -6.5927, -6.5881,
        -6.4347, -7.6654, -7.7588, -8.2656], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2252, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.1329, -9.6927, -6.1307, -7.1543, -6.8868, -7.5443, -7.2245, -8.0879,
        -8.4192, -6.2817, -6.1635, -6.8555, -7.3906, -7.1738, -7.3485, -8.2133,
        -6.3398, -6.0518, -6.7225, -7.4051], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2610, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.0708, -6.6455, -8.1842, -9.0249, -6.1815, -6.8767, -7.3379, -7.1486,
        -8.6461, -8.6105, -6.8674, -6.9381, -6.4561, -6.2904, -7.4867, -8.2990,
        -8.3885, -5.3956, -6.9121, -6.2939], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3027, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6574, -8.5897, -9.3826, -6.5531, -6.9731, -7.7214, -7.6662, -7.3353,
        -7.7641, -8.8602, -5.7534, -6.1074, -7.4083, -7.0359, -6.8794, -7.9898,
        -8.5044, -5.4858, -6.3382, -6.7629], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2884, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6134, -7.2975, -8.2834, -9.0394, -5.8766, -6.5063, -7.0310, -6.9758,
        -7.1910, -8.4188, -9.2455, -6.1451, -6.4007, -7.0757, -6.7678, -7.1685,
        -8.1586, -8.8874, -5.9021, -6.0722], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2528, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.6608, -7.2947, -8.3128, -6.8202, -8.5555, -7.9521, -5.7639, -6.6844,
        -6.4086, -7.2754, -8.6351, -8.6056, -6.0856, -6.3965, -6.4401, -6.5962,
        -6.8035, -7.9157, -8.8074, -5.7057], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2360, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0649, -6.8737, -6.4354, -6.9119, -8.1143, -9.2116, -6.4901, -6.1834,
        -6.7995, -7.2728, -7.3896, -7.9299, -8.5387, -6.5263, -6.0225, -6.9214,
        -6.6351, -7.3451, -7.7777, -8.9496], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2197, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5300, -6.3136, -8.4943, -8.1553, -6.7283, -6.8624, -6.9011, -6.4050,
        -8.2873, -8.7819, -6.2498, -6.8308, -7.0500, -7.0915, -7.8377, -8.8943,
        -5.7426, -6.6091, -6.4481, -6.6160], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1415, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2994, -6.4291, -7.6226, -7.0181, -7.9448, -8.2955, -5.2525, -6.6279,
        -6.7081, -6.4091, -8.2194, -8.8469, -6.6079, -6.4152, -7.0726, -6.9968,
        -7.0171, -8.0488, -8.6654, -6.3802], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1439, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7783, -8.1787, -6.9933, -8.5629, -7.8597, -6.6901, -7.1185, -6.9254,
        -7.6551, -8.6222, -7.4369, -6.0836, -6.3471, -6.2460, -6.8339, -7.9213,
        -8.5923, -6.6091, -6.4794, -6.9988], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2966, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7069, -6.4319, -7.4197, -6.9319, -7.7482, -9.3564, -5.9762, -6.3299,
        -6.6957, -7.2439, -7.1285, -7.5716, -8.4959, -6.0505, -6.2957, -6.4501,
        -6.8587, -7.5931, -8.4083, -6.8220], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0758, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.0266, -5.8502, -6.5605, -7.3892, -7.1781, -7.2752, -8.0399, -8.8848,
        -6.7777, -6.3688, -6.5355, -6.8298, -7.4849, -8.4152, -6.0400, -6.7805,
        -7.3095, -7.2911, -7.3547, -7.9075], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2650, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9461, -7.4441, -7.8351, -8.3457, -6.7248, -6.9247, -6.7445, -7.7919,
        -8.3148, -8.7810, -6.3367, -6.5171, -6.6810, -7.2223, -7.9693, -8.3367,
        -6.3214, -6.8055, -7.6340, -6.8245], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3251, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9130, -6.8575, -6.7549, -7.1288, -8.6089, -8.1379, -6.5477, -6.3517,
        -6.6838, -6.9168, -7.9547, -8.0849, -6.5846, -5.9945, -6.3169, -6.7338,
        -6.9588, -8.0327, -8.7937, -6.8799], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1118, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.3345, -8.5165, -6.9948, -6.4398, -7.3094, -7.1132, -7.5804, -7.5453,
        -6.9087, -8.6869, -6.2974, -8.5134, -7.8859, -6.0897, -6.7360, -6.2500,
        -7.2028, -6.7531, -8.1163, -9.3012], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4288, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9616, -7.5848, -8.5651, -5.7259, -6.5403, -6.4397, -6.3738, -8.7893,
        -8.9796, -6.2265, -6.4106, -6.7026, -7.4211, -7.5160, -7.8444, -8.2746,
        -5.9014, -6.3068, -6.7907, -6.7171], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1036, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.0417, -6.2158, -9.1988, -8.9246, -5.9738, -6.9650, -7.4177, -6.8639,
        -8.2951, -8.5126, -6.5151, -6.8253, -6.4783, -6.7277, -8.5625, -8.7311,
        -6.0393, -7.0822, -7.1156, -7.7020], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4094, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.4665, -9.3862, -5.7815, -7.3461, -6.7297, -6.6437, -7.8076, -7.7783,
        -7.9205, -7.4447, -7.6436, -7.8817, -6.2921, -8.0154, -7.2396, -7.9605,
        -8.0153, -6.1290, -7.0211, -6.8341], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4169, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.0526, -6.8662, -6.5271, -6.5638, -6.6906, -8.1943, -7.5626, -7.3206,
        -6.8088, -6.9198, -6.6198, -8.4464, -8.6093, -6.5961, -6.2588, -6.7529,
        -7.5224, -7.9600, -8.3754, -6.0630], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2355, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9343, -6.2522, -6.9185, -7.8118, -8.6744, -5.7673, -6.3201, -6.4902,
        -7.3448, -7.0667, -8.2643, -9.1400, -5.9029, -6.2586, -6.8258, -6.7602,
        -7.6699, -8.2358, -6.1351, -6.3413], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0557, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1533, -7.0811, -7.3469, -8.5964, -8.2237, -6.4583, -6.7400, -6.7312,
        -6.5033, -8.0029, -6.8563, -8.2402, -9.1244, -6.0442, -7.2349, -6.4441,
        -6.8417, -8.6608, -7.5073, -6.8688], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3330, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.9953, -9.0615, -6.1825, -6.5298, -6.6069, -6.9730, -7.4614, -8.0169,
        -7.9408, -8.7256, -9.8348, -5.9872, -7.1455, -6.5901, -7.6163, -8.7454,
        -7.2094, -7.8757, -7.1546, -8.3393], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.5996, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3875, -7.3498, -6.2121, -6.8626, -7.8001, -8.8843, -6.2401, -6.7335,
        -6.9215, -7.2712, -8.1286, -8.6323, -6.6389, -5.9360, -6.3530, -7.2992,
        -7.1345, -7.6705, -8.3820, -6.1329], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1485, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2057, -7.0248, -7.3514, -8.4184, -8.6799, -6.8224, -6.1335, -6.5266,
        -6.6741, -7.8130, -9.1613, -5.7968, -6.9400, -6.8521, -7.3746, -8.0096,
        -8.3236, -6.8025, -6.4122, -7.0790], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2201, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0363, -6.2197, -6.8617, -7.6157, -6.6556, -7.8270, -8.5193, -6.0256,
        -7.0378, -6.7819, -5.6294, -7.6485, -9.0738, -5.8632, -7.0034, -6.9715,
        -6.9026, -5.9682, -8.3552, -7.3939], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0195, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8254, -8.3803, -7.3060, -6.3714, -7.4464, -5.9910, -6.5009, -7.5593,
        -6.8150, -8.0759, -7.1047, -8.7827, -9.8188, -6.0028, -6.9470, -7.0335,
        -7.0668, -6.8604, -7.2221, -8.0195], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3065, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6471, -6.3311, -6.6335, -7.7639, -8.1961, -8.5292, -6.0299, -6.1890,
        -7.1038, -6.6425, -7.6397, -8.7908, -5.9273, -6.8464, -7.0131, -5.9694,
        -8.2767, -8.7689, -6.6132, -6.6304], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1271, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4395, -7.0359, -8.4893, -6.9303, -8.5618, -9.8031, -5.2609, -7.3644,
        -6.7701, -6.2737, -8.2272, -7.1137, -8.7735, -6.7269, -6.1995, -7.7624,
        -5.8050, -7.5447, -6.7093, -8.1802], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3486, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5499, -6.3599, -7.5747, -6.8450, -7.5765, -8.4882, -5.6213, -6.3216,
        -7.0798, -7.2900, -8.7467, -8.9587, -6.5749, -6.4510, -6.4436, -7.5660,
        -6.6811, -8.2179, -8.4539, -5.8933], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1847, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2602, -8.4738, -7.6533, -6.4606, -6.5597, -6.8281, -7.1753, -7.4605,
        -7.3984, -6.7788, -8.5258, -8.7363, -6.0902, -6.6165, -7.1705, -6.7045,
        -7.5676, -8.0523, -6.9712, -6.0921], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2288, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.1429, -6.7113, -6.5812, -7.1194, -7.6219, -7.2160, -7.9555, -8.4974,
        -6.2859, -5.9428, -7.0465, -7.2853, -7.8661, -8.5744, -6.4859, -6.3757,
        -7.1296, -7.4817, -7.9782, -8.4839], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3891, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3603, -5.9504, -6.1293, -7.8217, -7.8431, -6.3547, -6.3377, -6.3831,
        -6.2524, -8.3092, -8.7657, -5.7118, -6.3095, -6.2994, -6.8367, -8.3872,
        -7.6264, -6.5919, -6.1105, -6.7620], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8572, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7944, -7.5032, -7.1691, -6.6837, -6.1582, -5.5249, -6.9219, -6.9205,
        -7.0804, -6.4274, -7.5391, -7.4092, -6.1827, -6.1513, -7.2142, -5.9473,
        -7.4609, -6.1513, -8.1464, -7.7022], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8544, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4478, -8.6333, -7.7193, -6.2667, -6.8680, -6.3695, -6.7914, -8.6555,
        -7.4729, -6.4982, -6.6255, -6.5441, -6.9775, -7.1419, -7.7911, -8.2841,
        -5.9588, -6.2632, -5.9289, -6.1428], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9690, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0651, -7.6632, -7.1269, -8.1749, -6.7860, -8.4800, -8.3614, -6.0019,
        -6.8348, -6.5975, -6.8135, -7.1677, -8.4750, -7.7074, -5.9426, -6.1367,
        -5.9894, -7.0676, -6.6033, -7.5508], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0773, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.8904, -7.0037, -8.2914, -8.7280, -6.4872, -6.2584, -6.3008, -7.7615,
        -7.0152, -8.0301, -6.6435, -8.4877, -9.1396, -5.6461, -6.6742, -6.2013,
        -7.5616, -8.3503, -7.2760, -6.6440], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3195, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1316, -5.9817, -6.0769, -7.2232, -8.3898, -7.5451, -6.2146, -7.5805,
        -6.4167, -6.6879, -8.7332, -8.1480, -6.1268, -6.2002, -6.3397, -6.3377,
        -7.4306, -8.2330, -5.6946, -7.2300], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9361, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7448, -6.6560, -7.3302, -6.6325, -7.9574, -8.5621, -5.6580, -6.4559,
        -6.2325, -6.7069, -8.7659, -7.2806, -6.8915, -6.0732, -7.3989, -7.0833,
        -7.7257, -8.1283, -6.5816, -8.5642], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1215, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3598, -6.2734, -6.2714, -7.3576, -7.9327, -6.1317, -6.0960, -6.6461,
        -7.2300, -7.1151, -7.5198, -8.2424, -5.4436, -6.7939, -6.2524, -6.5968,
        -6.8215, -8.1600, -8.3184, -6.8999], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9231, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.9187, -6.1365, -7.7997, -7.2582, -7.0182, -7.1740, -7.7362, -8.0188,
        -6.2325, -6.2029, -6.9365, -7.4139, -8.0152, -8.3750, -6.4022, -6.4220,
        -5.8024, -6.6597, -7.5251, -8.1373], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2092, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.8594, -7.2693, -6.0767, -5.7226, -6.9005, -6.7932, -7.9955, -7.8525,
        -6.9544, -8.2842, -9.2196, -5.5347, -7.0334, -6.4580, -6.8494, -8.5679,
        -8.2419, -6.5740, -6.2081, -6.5294], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1962, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7393, -8.1328, -8.4141, -6.0291, -6.8343, -6.7657, -6.5378, -6.4318,
        -7.8803, -8.2297, -6.2635, -6.2265, -6.2264, -6.2800, -7.6597, -8.2741,
        -6.1709, -6.2341, -6.1649, -7.0072], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9251, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7461, -6.8825, -7.7573, -8.1746, -5.8538, -5.9590, -6.0355, -6.3649,
        -8.1918, -8.4536, -6.3762, -6.1234, -7.1219, -7.9972, -8.3607, -8.1983,
        -6.6747, -6.7237, -5.9834, -7.3499], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0664, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2499, -7.1140, -7.2748, -8.1743, -6.2911, -8.6459, -9.1764, -6.0358,
        -6.9447, -6.6012, -7.4585, -8.5073, -7.8525, -6.5903, -5.9893, -6.2434,
        -6.1315, -7.8172, -7.3565, -5.6010], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1028, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8334, -6.9894, -6.2838, -6.6180, -8.7459, -7.2566, -6.9852, -7.0770,
        -6.4081, -7.6958, -7.6686, -7.4751, -6.7333, -8.6390, -9.2006, -5.9049,
        -6.8207, -6.2232, -6.3107, -8.7741], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1822, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8911, -6.8775, -6.7810, -8.0433, -6.7856, -8.5426, -9.2034, -5.7676,
        -6.6286, -7.0255, -7.0581, -7.5542, -8.3531, -6.5114, -5.6662, -5.9590,
        -7.4940, -7.1155, -7.8616, -7.5518], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1336, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.0935, -8.0328, -6.1553, -5.9134, -6.7938, -6.3268, -7.4128, -8.7732,
        -5.7842, -6.6850, -6.2452, -6.7813, -6.4241, -8.4158, -7.4099, -6.2223,
        -6.3481, -6.7326, -6.0175, -7.3017], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8935, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9187, -8.6924, -7.5483, -6.5288, -5.6900, -7.4178, -6.6603, -8.0873,
        -6.2651, -8.3295, -8.4418, -5.4680, -7.0738, -5.7712, -7.6285, -8.6681,
        -7.3654, -6.9446, -6.8497, -8.2574], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2303, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.2635, -8.2427, -6.4360, -6.3001, -6.4706, -6.7021, -7.5098, -8.1642,
        -6.3802, -6.3435, -6.5438, -6.8018, -7.1449, -7.9494, -8.4451, -6.4042,
        -6.3736, -6.3214, -6.8396, -8.7222], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1179, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6746, -8.0841, -8.3000, -6.6018, -6.3290, -7.0857, -7.7758, -7.4998,
        -7.7922, -6.1761, -8.0029, -8.9898, -6.2149, -6.7067, -6.3085, -6.0049,
        -8.1411, -8.4390, -6.3092, -6.5220], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1979, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7693, -6.2804, -8.7460, -8.4014, -6.0432, -6.7122, -6.3329, -6.6906,
        -7.4956, -8.0529, -5.9062, -6.7371, -6.4858, -6.0943, -8.3264, -8.3012,
        -6.2103, -5.9754, -7.1800, -7.4512], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0096, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4148, -6.8543, -8.1257, -6.5625, -8.7584, -9.0232, -6.1478, -7.2224,
        -5.9907, -6.6740, -8.5631, -6.8983, -6.7781, -6.4960, -7.6794, -6.7395,
        -8.1881, -6.8380, -7.8786, -6.2413], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2537, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.1355, -7.3462, -6.9610, -5.4911, -6.8492, -6.5699, -7.5154, -6.8653,
        -7.7666, -6.6680, -8.2277, -8.7211, -6.0256, -6.7457, -6.7206, -6.8995,
        -7.7589, -8.1317, -5.9809, -6.7861], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1083, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0975, -6.3606, -6.2415, -6.0688, -7.8526, -7.7559, -6.5455, -6.4717,
        -6.7416, -7.5111, -6.8066, -8.1836, -8.7140, -6.5040, -6.5536, -5.9907,
        -6.9975, -8.3068, -8.2178, -5.9688], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9945, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.6091, -7.1697, -6.3595, -5.8159, -6.1108, -6.4799, -7.8405, -8.1649,
        -6.5375, -6.0877, -6.4786, -6.9931, -7.2361, -8.0929, -8.2038, -5.9579,
        -6.4869, -6.5106, -6.8621, -8.8347], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0416, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1866, -6.4687, -6.3059, -7.2792, -8.2655, -7.9895, -6.1432, -6.8105,
        -5.8936, -6.6935, -7.8830, -7.6681, -6.7308, -6.2047, -6.8960, -7.4864,
        -8.1883, -7.5950, -6.8242, -6.3437], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9928, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.4086, -6.0171, -6.4394, -6.1482, -6.0192, -8.4050, -7.0798, -7.2963,
        -7.9747, -7.1201, -5.6852, -7.5623, -6.9145, -7.7840, -8.2151, -7.4131,
        -6.0915, -7.6652, -7.1956, -7.7740], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1604, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2921, -6.0122, -6.2780, -8.5572, -7.4199, -5.8984, -6.1292, -6.3704,
        -6.1991, -7.9952, -8.9702, -5.6683, -6.8224, -6.8197, -6.9185, -6.7451,
        -8.0405, -7.2364, -6.3625, -6.0748], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8405, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.5286, -7.3118, -6.2615, -5.8859, -6.1908, -6.8579, -7.1830, -7.6971,
        -8.2581, -5.3323, -6.4596, -6.3439, -7.0015, -8.1663, -7.4242, -6.3303,
        -7.2862, -6.4671, -7.3581, -6.7794], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9562, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5660, -7.9044, -6.9301, -8.4650, -7.7606, -5.6203, -7.2301, -6.7914,
        -6.8540, -8.6118, -7.4506, -6.6468, -5.8672, -6.0246, -6.7486, -7.1174,
        -8.2576, -8.1019, -5.3532, -6.4510], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0876, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2406, -7.8527, -7.2030, -7.5462, -6.4458, -4.9963, -6.2792, -6.2408,
        -7.6949, -6.7764, -7.9866, -6.3532, -7.4252, -8.1614, -6.0673, -6.6905,
        -6.1355, -7.8682, -8.0197, -7.1434], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0063, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.3950, -6.3092, -6.8947, -6.2296, -7.7997, -8.2266, -7.9340, -6.8939,
        -6.4515, -5.9181, -6.5396, -7.8435, -7.6720, -7.1406, -7.6129, -6.0002,
        -7.2293, -8.7085, -7.3741, -6.6258], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1899, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.8448, -6.3052, -6.0324, -6.3519, -6.9088, -7.1813, -8.0473, -8.0297,
        -6.8597, -6.0670, -6.3397, -6.2832, -7.4378, -8.0231, -5.7801, -6.3369,
        -6.3509, -6.3211, -8.0392, -8.1477], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9344, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7713, -6.0603, -6.1922, -6.6773, -6.6994, -7.7539, -7.7347, -6.5755,
        -5.7949, -6.8267, -7.4032, -5.9772, -8.1046, -6.5848, -8.4835, -9.2648,
        -5.6697, -6.8707, -6.3374, -6.8132], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9798, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2554, -7.4742, -7.3973, -8.1729, -5.9096, -6.1159, -6.5036, -6.6506,
        -8.0580, -8.7013, -5.9133, -6.2746, -6.6803, -7.4808, -7.4167, -7.4251,
        -8.3112, -5.8185, -6.1739, -6.2736], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0003, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5580, -7.4527, -8.7347, -7.1815, -6.6344, -6.8358, -7.2957, -7.1676,
        -7.6993, -7.3370, -6.6888, -7.9837, -8.9214, -7.2414, -6.4556, -6.1826,
        -6.0507, -7.7994, -7.9458, -5.5297], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1848, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7100, -7.2680, -7.7204, -8.2122, -6.8324, -8.4726, -7.6176, -5.6354,
        -6.4187, -6.3228, -6.9035, -8.1735, -7.6780, -6.5280, -5.9653, -6.4612,
        -7.2577, -6.9386, -7.9248, -7.8041], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0922, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4989, -8.1198, -7.5027, -6.8335, -6.4827, -6.2629, -6.1509, -7.4275,
        -6.9036, -7.3015, -6.9799, -6.7947, -8.2614, -7.1159, -4.9847, -6.2917,
        -6.4596, -8.8795, -7.7005, -6.6235], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0288, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7053, -7.9524, -6.9240, -8.1815, -8.7285, -5.8016, -6.9391, -6.3752,
        -7.4202, -8.6465, -7.8295, -6.6737, -6.4582, -6.8148, -7.1458, -7.7153,
        -7.7525, -6.9440, -8.2083, -6.5902], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2903, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.2600, -6.1894, -6.2875, -6.2699, -6.8085, -8.0575, -8.1342, -6.6452,
        -6.1734, -6.6413, -6.5135, -7.0849, -8.0467, -7.9878, -6.3086, -5.8785,
        -6.6298, -7.4296, -6.5607, -8.0302], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9969, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5947, -6.4863, -7.6704, -8.4303, -5.6417, -6.4086, -6.2393, -5.8881,
        -8.1096, -8.6107, -5.8986, -6.1288, -6.3398, -7.2253, -7.1787, -8.0044,
        -8.3007, -5.9707, -6.2717, -6.4473], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8923, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1672, -6.8226, -8.1822, -8.4605, -6.4943, -6.0681, -7.1356, -6.5069,
        -6.9533, -7.1138, -8.1192, -6.2120, -6.8621, -6.9114, -7.0909, -7.1013,
        -7.6200, -8.4055, -5.4618, -6.0850], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9887, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3506, -6.3708, -6.3977, -7.1355, -8.4935, -7.9411, -6.9977, -6.1027,
        -6.1657, -7.7119, -8.0338, -7.6516, -7.2026, -5.9290, -6.0155, -6.5842,
        -7.4689, -7.7984, -8.0182, -5.8339], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0102, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0736, -7.3163, -7.7999, -8.5852, -5.7200, -5.9853, -6.0740, -7.4458,
        -8.4790, -8.2686, -6.8036, -6.5681, -6.0144, -6.6924, -7.5050, -7.8785,
        -7.9801, -6.1330, -5.8292, -5.9131], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0032, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1741, -7.2128, -7.7045, -8.3406, -6.1772, -6.0671, -6.2066, -6.5431,
        -7.2793, -8.1361, -6.0312, -6.5880, -5.8854, -5.9608, -6.6606, -8.5084,
        -7.2549, -6.3593, -6.6944, -6.8223], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8803, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7567, -7.7605, -6.3377, -6.8176, -6.9563, -7.1665, -8.4654, -7.9543,
        -5.7315, -6.5040, -6.4778, -6.4752, -8.6099, -7.9009, -6.7249, -5.8558,
        -8.0383, -6.5987, -8.0214, -6.5158], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1335, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1393, -6.5275, -7.0038, -7.7015, -8.5708, -5.7656, -6.4582, -5.6709,
        -7.0505, -8.6141, -7.2118, -6.5838, -5.8250, -8.0057, -5.6535, -8.1220,
        -6.5874, -8.3196, -9.1109, -6.6821], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0802, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1508, -8.0397, -8.6097, -6.0704, -6.3143, -6.5118, -6.2696, -7.8037,
        -8.1376, -6.7008, -5.9168, -6.6150, -6.9555, -6.7693, -7.8534, -8.5892,
        -6.4494, -6.5500, -6.5022, -5.9178], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9863, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.8899, -7.4559, -6.0364, -5.9215, -6.6993, -6.5073, -8.4105, -6.7358,
        -8.3214, -9.0595, -5.8896, -6.5459, -7.4831, -6.6338, -6.1609, -7.5262,
        -8.5772, -6.6526, -6.0138, -7.1372], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1329, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3129, -6.9478, -7.7196, -8.1873, -6.4588, -6.0903, -6.4473, -6.6800,
        -7.7068, -8.1579, -6.0769, -6.2377, -6.7391, -5.8434, -7.1594, -7.5411,
        -8.2984, -5.9890, -6.3131, -7.6740], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9790, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5642, -6.2751, -6.5632, -7.4357, -8.1824, -8.3545, -6.7425, -6.3822,
        -5.9629, -6.3497, -7.2226, -8.0369, -8.2365, -5.6080, -6.1477, -6.4387,
        -6.4828, -8.1015, -8.1560, -6.0288], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9136, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5967, -8.2172, -8.0316, -7.1897, -6.5203, -6.9581, -7.7288, -6.7716,
        -7.7932, -8.3671, -5.7573, -6.3876, -9.3345, -5.5744, -7.4094, -6.6363,
        -8.0295, -9.4297, -5.9571, -6.7870], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3239, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5171, -6.3331, -6.3622, -6.5292, -6.6423, -8.5057, -8.2549, -6.4941,
        -5.9693, -6.8716, -6.8722, -8.0584, -7.4833, -6.9688, -7.3711, -8.3175,
        -8.5288, -5.7921, -6.4568, -6.7606], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1044, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.7489, -6.8024, -8.8246, -9.2308, -5.6825, -6.1870, -6.4952, -6.8935,
        -6.6465, -7.9003, -8.8779, -5.9581, -6.2812, -6.1198, -7.0282, -8.6584,
        -8.6030, -6.0569, -6.0249, -6.4878], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1754, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1687, -6.5966, -6.7544, -8.3898, -8.3725, -6.0303, -5.9965, -6.3986,
        -6.3162, -7.7591, -8.0666, -6.6798, -5.7833, -6.0379, -6.3804, -7.0710,
        -8.2515, -8.0248, -5.7781, -5.7782], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8317, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.6837, -6.0782, -8.5284, -6.2823, -8.2104, -9.0016, -6.3557, -6.9085,
        -6.5784, -7.0709, -8.3428, -7.9613, -6.7049, -5.9406, -7.3693, -5.9482,
        -7.6239, -6.4444, -7.7448, -6.2397], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1509, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.3013, -5.9279, -5.7924, -6.3968, -7.2503, -8.2296, -7.9721, -7.0538,
        -6.0502, -6.7035, -7.0157, -7.1818, -7.7117, -8.0001, -6.0725, -5.9331,
        -6.6673, -6.4666, -7.9070, -8.9189], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0776, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.1300, -6.4121, -6.6248, -6.9301, -8.3503, -6.6762, -7.7347, -6.1693,
        -8.3983, -8.8306, -6.4187, -7.0082, -6.8860, -6.5464, -6.6567, -8.5873,
        -7.7127, -6.9945, -6.5395, -6.1188], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1863, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1512, -6.9819, -6.7275, -8.2153, -7.2733, -6.1432, -6.3074, -6.1564,
        -7.0197, -8.3330, -8.6494, -6.2973, -6.1054, -6.5604, -6.6816, -7.4092,
        -8.0946, -5.4631, -6.4476, -6.1300], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8574, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3688, -7.4713, -8.0757, -6.7325, -5.9332, -6.2640, -6.4923, -6.7489,
        -7.9462, -8.1921, -6.0712, -5.8683, -6.0963, -7.3455, -7.6611, -7.9170,
        -5.6341, -6.4710, -6.3892, -6.7818], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8730, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5756, -7.9491, -7.2328, -7.4157, -6.8168, -6.6823, -8.3294, -7.9542,
        -6.5871, -6.7330, -6.5583, -6.6093, -8.0853, -6.5084, -7.2901, -8.7092,
        -6.2758, -6.6632, -7.2342, -7.0574], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2134, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5383, -8.1720, -5.9309, -5.9832, -6.7552, -7.1343, -7.7897, -8.6327,
        -6.6573, -6.7170, -6.3163, -7.5402, -7.4713, -8.0651, -6.7167, -8.1899,
        -9.0749, -5.7927, -6.6754, -6.2983], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1726, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.6649, -8.4139, -6.1443, -6.1000, -6.1897, -7.1074, -7.8728, -7.6813,
        -6.3715, -6.7880, -6.1695, -7.4343, -7.9778, -8.4737, -6.0838, -6.0218,
        -6.0597, -6.2899, -7.5182, -8.0517], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0207, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.0408, -5.8954, -5.8830, -6.2793, -6.5781, -5.9077, -7.4879, -8.5613,
        -5.4842, -6.4482, -6.1907, -6.8641, -8.5449, -7.1826, -6.9999, -6.9146,
        -6.5674, -7.4110, -6.9673, -7.5684], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8888, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3967, -7.9387, -6.0405, -7.9946, -9.2003, -5.9270, -6.9689, -5.8890,
        -7.2827, -8.3022, -7.4721, -6.7562, -5.9982, -7.0332, -6.4019, -7.3540,
        -7.0400, -6.2159, -6.5002, -7.8410], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0777, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7466, -8.0509, -6.1128, -6.6726, -6.0865, -6.1936, -8.2376, -8.2857,
        -6.3317, -6.7850, -6.8587, -7.4561, -7.8420, -7.9665, -5.7797, -6.6051,
        -6.0303, -6.5662, -8.0122, -8.1918], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0906, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5003, -6.4816, -8.1413, -8.1083, -6.5285, -6.2378, -6.2772, -7.0695,
        -7.4716, -8.5115, -5.3371, -6.3354, -6.2061, -5.9853, -8.6586, -7.5414,
        -6.5687, -6.6674, -7.9030, -6.4897], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9510, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.1805, -8.9040, -5.9094, -6.4778, -6.2931, -7.3894, -7.8359, -7.8691,
        -6.4774, -6.6383, -6.3847, -6.6367, -7.9327, -8.1985, -6.0886, -6.1171,
        -6.6591, -6.1223, -7.7811, -8.1466], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1021, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7592, -8.6556, -5.9416, -6.1881, -6.7450, -7.1345, -6.6217, -7.4437,
        -8.6285, -6.6124, -6.0728, -6.3623, -6.8438, -7.6928, -8.3088, -5.9397,
        -5.9588, -6.6610, -6.5458, -8.2061], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0161, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8814, -6.2476, -6.2808, -7.1212, -8.6287, -7.8611, -6.4359, -6.0722,
        -6.2184, -6.2940, -7.4983, -8.1207, -5.5627, -6.4759, -5.9256, -6.6659,
        -8.6461, -7.3451, -5.7223, -6.4407], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7722, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5068, -7.9118, -6.1251, -5.7922, -6.3367, -6.6498, -6.8993, -7.8269,
        -8.5587, -5.6094, -6.5679, -6.3906, -6.5692, -8.3480, -8.6000, -6.1221,
        -6.7285, -6.2964, -7.2269, -8.5184], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0292, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6442, -5.8565, -6.5216, -7.0816, -7.8460, -8.1271, -6.6940, -6.1634,
        -6.3401, -6.7859, -7.1563, -7.6470, -7.7985, -5.1908, -6.3695, -6.8082,
        -8.4045, -7.3599, -6.3855, -5.7657], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8473, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.9355, -5.5609, -6.5029, -6.1992, -7.0143, -8.4686, -7.7474, -6.9582,
        -6.7968, -5.9228, -6.0936, -6.9379, -7.8818, -7.9301, -7.2314, -6.6486,
        -6.1175, -6.7143, -6.9271, -7.8764], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9733, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2071, -6.6535, -8.2340, -8.1934, -6.0169, -6.5932, -6.2078, -5.7921,
        -8.2410, -8.4784, -6.8410, -6.3758, -6.7477, -7.3769, -8.1763, -8.2706,
        -6.4760, -6.8771, -6.3234, -6.5250], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0304, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4042, -6.2207, -6.5029, -6.5230, -6.4125, -7.7496, -7.9499, -6.6150,
        -7.0843, -7.0268, -6.6159, -7.3304, -7.7668, -6.3844, -6.3494, -6.4570,
        -6.4375, -7.9753, -8.0752, -5.9714], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9426, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.2571, -8.3200, -6.5877, -6.2922, -6.3966, -6.5656, -8.4248, -7.5136,
        -6.4649, -6.2954, -6.5295, -7.1353, -7.1599, -7.4730, -7.8655, -6.3495,
        -5.8580, -6.4991, -7.5241, -6.9860], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0249, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0298, -8.2649, -7.8772, -6.1576, -6.6775, -6.0988, -7.4392, -8.6050,
        -8.5544, -6.2033, -6.1304, -5.8252, -6.6844, -7.3508, -8.1965, -8.0629,
        -5.8334, -6.4185, -6.5521, -5.5847], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9773, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5419, -6.1560, -6.0125, -6.0515, -6.8110, -7.9984, -8.2261, -6.3639,
        -5.8038, -6.6999, -7.7462, -8.0375, -8.1169, -6.3652, -6.1727, -6.4344,
        -6.3063, -6.9990, -7.8170, -8.3643], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0012, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7716, -8.2061, -5.1310, -6.4518, -6.3346, -6.8597, -8.6921, -7.5230,
        -6.6342, -7.4630, -6.5731, -7.2268, -8.0453, -8.3093, -6.2605, -6.1968,
        -6.3900, -7.0037, -7.9920, -7.8293], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1447, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0551, -7.4841, -7.0779, -8.5603, -6.6456, -8.3943, -8.0536, -5.7695,
        -6.3704, -6.7572, -6.5299, -8.0745, -8.0668, -6.4566, -6.0706, -6.2852,
        -7.1361, -7.4549, -8.1429, -7.1544], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1270, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0747, -7.1010, -7.8872, -7.9966, -5.7579, -6.8679, -5.9431, -6.5259,
        -7.6873, -8.1998, -5.9067, -6.1723, -6.4009, -6.9299, -8.2774, -7.9534,
        -6.3907, -6.4703, -6.0519, -6.6966], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8646, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.1012, -6.3133, -5.8724, -7.4878, -6.1935, -8.4349, -6.7064, -7.6787,
        -8.5737, -6.4551, -6.6184, -6.3161, -6.4708, -7.5830, -8.0543, -8.3711,
        -5.9482, -6.0892, -6.3299, -6.2550], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9927, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8714, -6.5448, -6.3057, -6.8852, -8.1124, -7.9791, -5.9815, -6.4710,
        -6.0728, -7.1228, -8.1787, -8.3228, -6.4045, -6.5601, -6.8159, -7.7616,
        -8.4142, -7.7613, -6.0516, -6.4117], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0014, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4308, -6.9677, -8.1379, -7.8723, -5.1601, -6.2743, -6.6311, -6.3046,
        -6.7591, -8.0772, -7.4684, -6.2756, -5.8824, -6.8266, -6.6824, -7.1527,
        -7.5170, -7.9821, -6.6127, -6.4610], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8738, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.0656, -6.4565, -6.6761, -8.4427, -8.2352, -6.6811, -5.6026, -6.7622,
        -7.3411, -7.5666, -8.0092, -6.4830, -8.5885, -9.0848, -6.2350, -6.9440,
        -6.3221, -6.7008, -8.7403, -7.4881], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3213, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0428, -6.2206, -7.0721, -7.1372, -8.1897, -8.0107, -5.5585, -6.9844,
        -6.3508, -7.0565, -8.8161, -7.6299, -6.1369, -6.1301, -6.3068, -7.3078,
        -7.5641, -8.1889, -5.8225, -6.5663], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9546, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7965, -6.5602, -6.3114, -7.0310, -8.5494, -7.6769, -7.1115, -7.0363,
        -7.9920, -5.9958, -7.8780, -6.8037, -8.3680, -8.7697, -5.8659, -6.5681,
        -6.5234, -6.3512, -6.9276, -8.2632], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1190, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.4322, -5.5184, -6.6329, -5.6765, -6.7553, -7.1812, -8.3332, -8.2990,
        -6.0261, -6.2528, -6.5019, -7.1575, -7.1164, -7.5259, -8.5627, -5.7120,
        -6.3896, -6.3937, -6.4315, -8.6497], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9774, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4160, -6.0519, -7.1409, -6.2434, -8.3367, -6.6242, -8.7678, -7.7559,
        -5.7408, -6.9806, -6.7906, -6.4888, -8.0820, -7.6685, -6.7240, -6.0592,
        -7.5039, -7.0714, -7.3938, -6.6763], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0258, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8792, -8.0795, -8.3218, -5.6987, -6.3265, -6.5977, -7.1584, -6.8506,
        -7.8568, -8.4727, -5.8714, -6.1845, -6.5849, -7.0718, -7.5885, -7.8814,
        -6.8856, -6.1438, -6.3539, -5.9780], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9393, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3878, -5.9420, -6.0311, -6.7889, -7.6440, -8.0833, -7.9106, -6.6413,
        -6.0851, -6.4286, -6.3927, -6.7281, -7.9705, -7.5294, -5.8149, -5.9623,
        -6.3125, -6.3115, -6.8805, -7.3803], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7613, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0606, -6.9258, -7.1932, -7.5739, -7.8609, -6.6710, -6.5910, -5.8138,
        -6.7206, -7.5655, -8.0734, -6.3196, -6.2271, -7.0928, -6.9848, -6.8539,
        -7.8155, -8.3222, -5.8994, -5.9283], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9247, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.2495, -6.0272, -6.1772, -7.1044, -6.3837, -6.9933, -7.9839, -8.6063,
        -5.9197, -6.4641, -6.1793, -6.5103, -7.6884, -8.0746, -6.9515, -6.7769,
        -6.4956, -6.4684, -7.3164, -8.3097], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0340, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2940, -6.7903, -6.4929, -6.4323, -6.6655, -8.3707, -7.1915, -6.1689,
        -6.3054, -6.0470, -6.3757, -8.3684, -8.2824, -5.8653, -6.8685, -6.9971,
        -6.5765, -6.9737, -6.6918, -8.0887], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8423, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7142, -6.2650, -6.7121, -6.7864, -8.0371, -8.5968, -6.5714, -6.4348,
        -6.2644, -6.7490, -7.7392, -8.0111, -6.2479, -6.0765, -6.4588, -6.8960,
        -7.6631, -8.1394, -5.9375, -6.5983], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9449, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.9176, -7.4303, -8.4836, -8.7628, -5.8320, -6.1098, -7.0194, -6.5749,
        -6.7121, -7.6952, -8.0510, -6.2876, -6.8053, -7.1313, -7.2226, -6.8519,
        -7.8804, -8.6508, -5.7962, -6.5295], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1872, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2293, -7.7751, -7.9470, -5.8137, -6.0956, -5.9786, -6.6152, -8.4337,
        -8.4096, -6.2369, -6.3360, -5.9993, -7.0310, -7.7897, -8.2364, -6.9825,
        -5.8434, -6.2794, -6.9845, -7.2719], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9644, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.0623, -5.9601, -6.1658, -6.7080, -6.7486, -7.0977, -7.8535, -8.1213,
        -6.2691, -6.5958, -6.3306, -6.4758, -8.6148, -7.6333, -6.5362, -6.0884,
        -5.9840, -7.2345, -6.8191, -7.8546], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9577, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4203, -6.6221, -8.6222, -7.4053, -6.1238, -5.9406, -7.1349, -6.9709,
        -7.5212, -8.0560, -6.7293, -8.1210, -9.0589, -6.3205, -6.2441, -6.1197,
        -6.4532, -8.7715, -8.2776, -5.7331], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1323, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9093, -6.1088, -6.4841, -6.7863, -7.6301, -8.2088, -6.0989, -5.9197,
        -5.9677, -7.4775, -7.7191, -8.1532, -6.7898, -8.2633, -7.7896, -5.6833,
        -6.8492, -6.1597, -6.7098, -6.6708], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8690, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7641, -6.4271, -6.6216, -7.0973, -8.4525, -7.2374, -6.6495, -5.7375,
        -6.7007, -6.2596, -7.6149, -6.8633, -7.3168, -7.0923, -6.7578, -8.6947,
        -7.6564, -6.0468, -6.2002, -6.3060], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8748, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3214, -7.4319, -7.4503, -7.8340, -7.6898, -7.5355, -7.6412, -6.9661,
        -6.7653, -7.3094, -7.2222, -7.0950, -7.3716, -6.9479, -7.5576, -7.5509,
        -7.2886, -7.4014, -7.3951, -7.3846], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3580, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.9224, -8.6487, -5.9943, -6.3214, -6.5441, -6.5450, -8.1080, -8.4056,
        -5.9998, -5.8256, -6.3720, -6.6522, -7.2969, -8.1363, -5.9978, -6.6519,
        -6.6168, -6.9437, -6.8633, -7.9828], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9914, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1822, -6.4794, -7.9264, -7.9675, -6.2964, -6.4341, -5.5571, -6.5541,
        -7.4990, -8.3142, -6.5135, -8.2167, -7.3112, -6.2342, -7.0124, -6.4105,
        -6.4812, -8.5939, -7.3863, -5.6965], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9533, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9495, -6.4006, -6.0400, -8.9717, -7.3246, -9.0955, -9.4064, -6.5130,
        -6.6364, -6.5878, -7.3643, -8.7139, -7.3833, -6.5583, -6.7883, -7.0512,
        -7.2677, -7.2860, -6.8523, -7.2225], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2707, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4524, -8.1604, -6.1064, -6.4774, -6.0909, -6.4725, -7.5515, -7.9235,
        -6.5176, -6.1268, -6.2100, -6.5548, -7.2357, -7.7729, -7.9783, -6.0540,
        -7.0402, -6.3799, -6.8804, -7.9918], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9489, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2319, -5.6068, -5.8552, -6.5779, -7.6743, -8.1757, -6.3639, -5.9800,
        -6.6850, -6.3764, -6.1521, -7.7637, -8.4862, -5.4739, -6.5992, -6.1297,
        -6.3756, -8.7321, -7.3882, -6.7182], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7673, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.2773, -8.6723, -6.2023, -6.0538, -6.7215, -6.7182, -7.2814, -7.5402,
        -8.2416, -6.7280, -5.9679, -7.5656, -6.1195, -8.1969, -6.8257, -8.1897,
        -9.2043, -6.2851, -7.0503, -6.4618], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2152, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7180, -6.8766, -7.6679, -8.2249, -6.2299, -5.7883, -6.6342, -7.3001,
        -6.9062, -7.6125, -8.2882, -5.8118, -6.4601, -6.5542, -6.9342, -7.9881,
        -8.7520, -5.7015, -6.1615, -5.8955], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9253, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.1998, -6.4589, -6.6659, -6.2978, -6.8731, -7.0993, -7.5048, -7.0076,
        -8.5249, -8.9092, -6.6498, -6.9024, -6.5653, -7.0750, -8.7012, -7.9200,
        -6.4829, -6.3320, -7.2148, -7.5455], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2465, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.2538, -6.6776, -6.3462, -7.4455, -6.7745, -7.5289, -8.1929, -8.5544,
        -6.5458, -6.3163, -6.5694, -7.2180, -7.2717, -8.0396, -6.1456, -6.7226,
        -6.7609, -7.1496, -8.3172, -8.5495], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2690, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3852, -5.8601, -7.1293, -6.9256, -6.8583, -7.8246, -9.0494, -6.0299,
        -7.1185, -6.3876, -7.3753, -7.8728, -8.0985, -7.1192, -5.5872, -5.3081,
        -5.9219, -7.4116, -8.2444, -7.9501], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0229, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.6927, -6.0649, -6.5384, -6.8623, -7.0871, -8.3008, -8.0769, -6.9881,
        -6.3994, -6.9630, -6.9013, -7.4132, -8.5668, -4.7233, -6.7574, -6.5866,
        -6.4615, -7.4668, -8.4071, -8.7155], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1487, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5131, -6.3271, -6.1677, -6.5723, -7.8965, -8.0937, -5.8329, -6.2838,
        -6.9129, -5.9925, -7.9786, -8.8980, -6.4184, -6.3206, -6.5484, -6.4372,
        -7.4960, -8.6083, -5.6800, -6.3614], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8670, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.6720, -8.3762, -7.1965, -6.1332, -6.1990, -6.8059, -6.8264, -7.7272,
        -8.5008, -5.4254, -6.2337, -6.3447, -6.5238, -8.5400, -8.3533, -6.7206,
        -6.7355, -6.6501, -7.0247, -8.2833], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1136, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8105, -6.4460, -7.4483, -7.3775, -8.2068, -8.3501, -6.2175, -5.8572,
        -6.6575, -6.8996, -7.8860, -8.6003, -5.7764, -6.4188, -6.4372, -7.2507,
        -7.0351, -8.1779, -8.5675, -6.0096], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0715, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0998, -6.4342, -7.0751, -8.6362, -7.4210, -6.9381, -6.1875, -5.9711,
        -6.9885, -7.3773, -8.1750, -8.1348, -6.2994, -6.5375, -5.8275, -6.9445,
        -7.5276, -8.1257, -5.7045, -6.3539], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9380, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4682, -6.2210, -7.2982, -7.2492, -7.8177, -8.6542, -5.3922, -6.1387,
        -6.6964, -6.3565, -8.4578, -8.1540, -6.4832, -5.9719, -6.7086, -7.0693,
        -6.9237, -7.7424, -8.1153, -6.2599], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0089, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0319, -6.7702, -6.9318, -8.1657, -7.8050, -5.6217, -6.5630, -6.3905,
        -6.9119, -7.6788, -7.8342, -6.8598, -6.1157, -5.9426, -6.4244, -6.9943,
        -7.6900, -7.6961, -5.5747, -6.4858], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8244, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8351, -8.4126, -8.7031, -6.4037, -6.2366, -6.1540, -6.7335, -7.7084,
        -8.0421, -5.4227, -5.9670, -6.3190, -6.2046, -7.4096, -8.2277, -5.8638,
        -6.6996, -6.5507, -7.0272, -8.4436], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9682, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5027, -6.8335, -6.4827, -6.2629, -6.1509, -7.4275, -6.9036, -7.3015,
        -6.9799, -6.7947, -8.2614, -7.1159, -4.9847, -6.2917, -6.4596, -8.8795,
        -7.7005, -6.6235, -6.6314, -5.8053], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8697, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.4900, -6.0880, -6.7249, -6.3437, -6.7976, -7.2918, -8.0707, -7.8411,
        -5.0435, -6.4198, -6.1083, -6.3350, -8.7259, -7.1886, -6.0816, -6.1305,
        -6.7287, -7.0756, -8.3084, -8.1136], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9954, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4640, -7.6235, -8.3560, -5.7834, -6.4971, -6.0104, -6.5700, -7.7200,
        -8.1652, -6.7967, -6.6225, -6.7253, -6.8395, -7.1704, -7.6560, -8.2547,
        -5.9644, -5.8947, -6.1376, -6.6979], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8975, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3931, -7.0868, -7.9794, -8.5243, -5.8309, -6.0961, -6.3235, -6.3525,
        -8.1543, -8.4524, -6.2328, -6.2605, -7.0644, -7.0003, -7.1757, -7.1673,
        -8.3815, -6.7791, -6.1119, -6.8224], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0095, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9634, -6.0692, -5.8206, -6.4614, -8.2379, -8.2699, -6.1702, -5.9914,
        -8.1605, -6.2692, -7.8040, -6.8037, -8.1704, -8.9197, -5.7992, -6.2793,
        -6.5361, -6.4514, -8.6171, -8.1453], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0470, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.2496, -8.4990, -6.0770, -6.2542, -6.8377, -6.8569, -6.8878, -7.6337,
        -8.5979, -6.2622, -6.4007, -6.7659, -6.8320, -8.3961, -7.7990, -7.1188,
        -6.5467, -5.9880, -6.2634, -6.9278], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0597, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.6593, -8.4417, -5.9075, -5.9275, -5.9459, -6.3275, -7.9829, -8.5346,
        -6.1478, -6.4646, -6.4201, -6.5622, -7.1083, -7.8026, -8.2118, -5.2049,
        -6.2991, -6.2504, -7.5619, -8.6344], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9697, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.0150, -6.4096, -5.9842, -6.1791, -6.6946, -7.6914, -7.9294, -5.4807,
        -6.3680, -6.4334, -5.9587, -8.2657, -8.5061, -6.4161, -6.2046, -6.4060,
        -7.3276, -6.8453, -7.6472, -8.2087], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9486, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.6700, -8.1564, -6.6066, -6.0127, -6.0667, -6.5601, -6.9560, -7.5336,
        -8.3495, -5.8609, -6.2714, -6.5153, -6.5534, -7.9460, -8.3337, -6.6806,
        -5.5455, -6.2307, -6.6731, -8.0319], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9277, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3273, -5.8819, -6.0486, -6.3783, -7.0603, -7.8388, -8.0680, -5.5287,
        -5.8573, -5.9739, -7.0085, -8.2039, -8.1588, -5.5420, -6.4293, -5.6535,
        -6.9760, -8.2636, -7.1558, -6.5329], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7444, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3298, -6.5947, -5.3983, -5.8662, -7.5192, -8.2427, -8.0349, -5.6337,
        -6.0061, -5.7076, -6.4702, -8.7803, -8.2316, -6.0959, -5.8903, -5.7841,
        -7.1780, -6.4770, -7.2240, -8.0042], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8234, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0625, -7.1201, -7.9946, -7.8907, -6.3074, -5.4160, -5.6268, -7.2456,
        -8.0710, -7.9605, -6.7569, -5.7625, -5.9001, -5.9677, -7.6766, -8.1245,
        -6.0537, -6.2816, -5.9759, -6.4539], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7324, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5657, -6.8779, -7.9984, -8.3495, -5.9375, -6.0330, -6.1027, -6.0412,
        -7.6856, -7.7975, -6.6550, -5.6334, -6.1815, -6.6291, -6.7061, -7.7264,
        -8.2801, -6.4104, -6.3688, -6.2853], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7633, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.9170, -7.3497, -7.2979, -7.5916, -6.6522, -7.2507, -6.9986, -7.6962,
        -7.5087, -7.7664, -7.5177, -7.1613, -8.0501, -7.0773, -7.7066, -7.3144,
        -7.4328, -7.7603, -7.8563, -7.5788], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4742, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7634, -5.8442, -5.9932, -6.9701, -6.8038, -7.8983, -7.8461, -5.3023,
        -6.0659, -5.8140, -6.5957, -8.6594, -7.1813, -5.9467, -5.9042, -5.9414,
        -6.7459, -6.9588, -7.6273, -8.0025], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7432, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2752, -6.0637, -5.8147, -6.9003, -8.4713, -7.2878, -6.3294, -5.6607,
        -5.8302, -6.0690, -7.8570, -8.0509, -6.9042, -6.7893, -7.2186, -6.8222,
        -6.7954, -7.6278, -8.4471, -5.2044], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7710, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1641, -6.3772, -7.8229, -8.1984, -6.0820, -6.5141, -6.0596, -6.5882,
        -7.1084, -7.8301, -7.9582, -6.0231, -5.7429, -6.4213, -6.4552, -7.2084,
        -7.8616, -8.2486, -5.9776, -5.8215], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8232, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5216, -5.8700, -5.5134, -6.9599, -7.7891, -7.6836, -6.2889, -5.7210,
        -5.7866, -6.1286, -7.7154, -8.2066, -7.4728, -6.3110, -5.8836, -5.9590,
        -6.0277, -7.9605, -7.8669, -6.0434], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6855, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.6377, -7.2949, -6.1598, -5.7730, -6.0778, -7.5458, -8.2925, -8.0135,
        -6.7581, -8.3144, -7.4078, -6.1376, -6.0328, -5.6727, -7.1020, -7.7474,
        -7.5890, -6.7059, -5.8247, -5.6160], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9352, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9524, -8.0460, -6.1918, -7.7310, -8.7605, -6.1735, -6.9216, -5.9511,
        -7.6462, -7.7651, -8.1139, -7.1636, -5.4873, -7.2893, -6.8483, -7.9233,
        -6.5940, -8.2647, -7.3553, -8.7825], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2481, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6910, -5.7864, -5.6620, -5.9894, -6.5683, -7.6405, -6.7602, -8.6716,
        -7.4911, -5.6731, -6.6576, -5.9000, -6.8788, -6.6865, -7.5057, -7.5150,
        -8.4614, -7.3501, -5.5186, -7.0509], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7729, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4419, -7.7260, -6.5228, -6.1471, -5.7448, -6.3723, -7.9691, -8.1264,
        -6.2134, -6.2870, -6.6137, -7.1008, -6.8066, -7.6215, -7.9516, -6.2888,
        -6.1563, -5.7390, -7.0963, -6.5170], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8221, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3498, -6.4625, -8.2860, -7.6184, -6.7381, -6.0683, -5.9813, -6.6639,
        -7.1613, -7.8551, -7.8976, -6.2620, -5.7096, -5.6870, -6.3915, -7.1966,
        -8.1927, -6.0512, -6.2925, -6.0776], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7471, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1284, -8.1276, -8.0592, -5.8207, -6.8306, -5.8015, -6.6434, -8.7115,
        -7.4864, -6.3718, -5.7570, -6.5082, -6.0208, -8.6609, -6.5303, -8.0625,
        -8.7342, -6.3768, -6.4194, -5.5184], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9785, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5169, -8.7054, -5.6346, -5.8978, -5.9344, -5.4887, -7.4603, -7.7933,
        -6.4274, -6.0448, -6.5215, -7.2890, -8.1124, -7.2228, -6.6122, -6.2960,
        -5.4319, -6.3152, -8.5194, -7.3675], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8296, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3521, -6.2721, -5.6593, -6.5248, -6.5339, -8.2233, -7.4194, -6.3571,
        -5.3947, -5.6357, -6.6601, -7.0368, -7.6683, -7.9789, -5.6357, -6.0592,
        -5.8225, -6.0548, -7.3146, -7.8652], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5734, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1281, -5.5600, -6.6450, -8.3882, -7.6567, -5.8370, -5.8777, -5.7309,
        -6.6750, -7.1342, -7.4260, -8.0623, -5.7125, -6.1243, -5.5847, -6.2865,
        -8.1774, -7.3878, -6.0668, -6.7600], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6610, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3168, -7.9729, -6.2965, -6.2756, -6.3410, -6.5182, -7.0248, -7.5477,
        -8.1478, -6.3030, -5.7387, -5.5468, -6.4558, -7.4917, -7.2827, -6.3076,
        -6.6276, -6.3366, -6.3199, -8.2294], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8041, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5013, -6.6433, -7.3115, -8.0051, -5.5464, -6.0142, -6.3092, -6.2948,
        -7.9494, -7.7560, -5.4170, -6.2374, -5.7296, -6.3397, -8.1617, -7.2486,
        -6.6822, -5.7074, -6.2674, -6.6126], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6367, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1495, -6.2039, -7.1585, -7.4922, -8.0245, -6.1042, -6.2456, -6.1814,
        -6.8309, -7.0878, -8.1489, -8.3733, -5.6993, -6.3197, -5.7141, -6.3026,
        -8.5932, -7.4880, -6.9721, -7.0745], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9582, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8828, -6.2249, -5.6508, -6.2802, -7.0477, -7.8922, -8.2375, -5.7913,
        -6.1762, -6.1926, -7.0058, -8.4462, -7.4816, -6.5662, -5.9353, -6.0923,
        -6.6228, -7.8652, -8.1136, -5.9735], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7739, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8252, -7.3799, -7.4343, -7.7285, -6.9057, -6.1435, -5.4669, -7.2698,
        -7.8444, -7.7149, -6.6911, -5.5136, -6.3640, -7.0259, -6.6055, -7.8248,
        -7.7675, -5.1753, -6.1095, -5.7390], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7765, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0949, -7.1057, -8.1474, -6.6247, -8.4209, -8.7532, -6.3524, -6.2501,
        -6.2086, -6.7308, -7.8715, -7.6725, -6.6143, -5.9572, -5.7035, -6.5310,
        -6.7864, -7.6557, -8.1948, -6.3892], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0532, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6875, -8.3656, -6.4207, -8.7740, -8.6412, -5.7842, -6.2038, -5.8102,
        -6.3584, -8.2668, -7.0896, -7.1501, -6.7597, -7.6333, -5.7651, -6.5983,
        -7.1017, -8.4613, -7.4510, -8.2909], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1807, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4592, -8.4383, -6.2099, -8.7381, -8.7415, -6.1018, -6.7566, -6.0351,
        -6.5619, -8.0835, -7.2691, -6.8213, -5.6800, -5.8988, -6.4331, -7.7702,
        -7.7605, -6.1679, -6.3353, -5.9436], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9103, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.9805, -7.9969, -5.9487, -5.9687, -6.9427, -6.6797, -7.0440, -7.8849,
        -7.8660, -5.6095, -6.2224, -6.1423, -6.5302, -7.9846, -7.5412, -6.1344,
        -6.5859, -6.1621, -7.2752, -7.9704], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9235, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5019, -8.1120, -6.5728, -8.8397, -7.7357, -5.6180, -6.3941, -5.9971,
        -6.3115, -8.5292, -7.3216, -6.5557, -7.2394, -6.9944, -6.1694, -7.8700,
        -7.2260, -7.4817, -6.4371, -7.8009], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0854, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8208, -6.3706, -8.1179, -7.4262, -6.5036, -6.2948, -6.1686, -6.8113,
        -7.1162, -7.9158, -6.0392, -7.2245, -7.8241, -6.3150, -6.6619, -6.0232,
        -7.0428, -8.7310, -7.1434, -6.6774], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9114, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2820, -5.7683, -5.9051, -7.5401, -8.1317, -7.4495, -6.5316, -6.3819,
        -5.6491, -6.6263, -7.6761, -7.6427, -6.1924, -5.7089, -5.6918, -6.3619,
        -8.2461, -8.0912, -6.5512, -6.1043], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7266, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7949, -8.3903, -8.2071, -6.3368, -6.7502, -6.3284, -6.2015, -7.1146,
        -7.5016, -8.0477, -6.4673, -5.8705, -5.9347, -6.1414, -8.0371, -8.4430,
        -6.0796, -5.9841, -6.0866, -7.5018], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9110, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3450, -6.2331, -6.2103, -6.8653, -7.4285, -7.7250, -7.9843, -5.8381,
        -6.0304, -5.6457, -6.0773, -7.8334, -8.2063, -6.7204, -6.0862, -6.5467,
        -7.3215, -7.5615, -7.3454, -6.7134], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8359, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1501, -7.5910, -8.5120, -6.3863, -6.5795, -6.2541, -7.3596, -5.9491,
        -7.9817, -7.5110, -8.5643, -8.2247, -6.0553, -6.0670, -5.9701, -6.7768,
        -7.1882, -7.4110, -7.8607, -6.0623], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0227, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0176, -6.3906, -6.2626, -5.7471, -7.5611, -8.2151, -6.3409, -5.8566,
        -5.9913, -5.9655, -7.9905, -8.1128, -6.1987, -6.4045, -5.8450, -6.4236,
        -7.0925, -7.6168, -8.1651, -4.7487], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6473, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1780, -6.3136, -6.0288, -6.1364, -7.0002, -8.2990, -7.9885, -6.9049,
        -6.0640, -5.8832, -5.9823, -7.7020, -8.2123, -8.1282, -5.6522, -5.5654,
        -5.7290, -6.0756, -7.4778, -7.9215], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8122, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8236, -6.7683, -8.0241, -7.7225, -6.8346, -5.4892, -5.8624, -7.6807,
        -8.2625, -7.8298, -6.0556, -6.0410, -5.9453, -6.6488, -8.3358, -8.1789,
        -6.1337, -5.8235, -5.7830, -6.4237], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7834, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.0288, -5.1462, -6.1832, -6.1246, -5.8496, -7.9365, -8.1049, -6.3265,
        -6.1749, -6.3485, -6.4616, -6.4739, -7.8301, -7.8135, -5.8528, -5.7116,
        -6.5557, -5.3510, -6.9244, -7.9725], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6585, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1090, -6.5316, -6.0541, -9.0855, -8.6733, -6.6027, -5.7341, -6.7410,
        -6.6639, -7.4663, -6.6772, -8.4200, -6.3582, -8.3478, -8.8829, -5.6167,
        -6.3629, -5.7143, -6.5640, -8.4550], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0530, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3511, -5.9494, -6.6110, -6.5667, -7.7362, -8.2447, -5.8023, -5.9964,
        -6.4535, -6.2932, -7.8874, -8.2574, -6.0422, -6.9860, -6.3639, -5.8428,
        -6.8724, -7.7618, -7.9832, -6.9291], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8465, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7911, -5.8898, -5.6318, -6.9320, -7.8271, -7.5640, -5.9299, -5.8290,
        -6.2373, -6.7453, -6.7492, -7.8528, -8.3811, -5.8988, -6.2035, -6.1094,
        -6.6923, -6.5804, -7.4158, -8.3300], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7295, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8445, -6.1700, -5.7707, -6.8677, -8.1082, -7.7849, -5.9371, -5.8085,
        -5.8973, -6.8306, -6.8870, -7.5934, -7.8760, -5.8976, -5.9435, -5.9872,
        -6.6876, -8.2316, -7.7784, -6.6953], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7299, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3322, -5.6785, -6.2133, -6.2401, -6.5323, -7.8153, -7.3730, -6.2366,
        -6.0664, -5.8867, -7.2500, -7.7768, -7.7667, -5.5686, -5.8049, -5.8014,
        -6.3051, -8.4288, -7.8843, -5.8851], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6923, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0382, -6.1376, -8.2290, -7.9592, -5.8232, -6.5385, -5.8620, -6.7185,
        -7.2240, -7.2310, -6.5857, -6.1461, -5.7431, -6.9293, -6.6137, -7.5648,
        -8.2915, -5.4820, -6.0299, -5.9329], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6540, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0168, -7.7416, -7.9408, -7.0579, -6.7220, -6.9644, -7.2613, -6.2567,
        -7.8744, -6.4435, -8.2207, -8.9083, -5.6599, -6.6952, -5.9130, -6.3897,
        -8.3597, -7.3635, -6.0428, -7.7597], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0796, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8217, -7.6335, -7.5436, -6.4379, -6.3346, -5.4229, -7.2006, -7.9973,
        -7.6967, -6.5060, -6.5157, -7.2602, -7.5568, -7.2653, -8.0294, -6.5722,
        -8.4337, -8.8170, -6.1362, -6.2425], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1212, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8066, -6.3068, -5.4912, -5.4696, -7.2641, -8.0465, -7.0398, -6.6815,
        -7.1508, -6.1219, -7.1076, -8.1140, -8.1802, -6.0787, -7.1393, -6.7379,
        -5.2897, -6.0847, -7.2906, -7.0890], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7745, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0571, -6.1496, -6.5792, -6.2301, -6.5964, -7.8204, -8.2182, -6.3491,
        -5.9477, -6.3469, -5.9552, -6.9611, -7.7594, -8.5785, -5.4697, -6.4385,
        -6.0315, -5.7865, -7.3761, -7.7647], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7208, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8555, -8.1111, -8.2067, -5.3630, -6.0804, -5.6933, -6.1257, -6.9431,
        -8.1196, -8.1475, -5.7573, -5.8690, -5.9865, -6.2332, -7.3613, -7.9944,
        -5.6823, -6.1533, -6.1186, -6.5143], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6658, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.3104, -6.6090, -6.0149, -6.3720, -6.7232, -7.0890, -7.7178, -7.8785,
        -6.2669, -5.5184, -5.9699, -6.8000, -7.6327, -7.8585, -6.2210, -5.9017,
        -6.1671, -7.0015, -7.7033, -7.8513], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8804, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0057, -6.1108, -6.1633, -6.8748, -7.6778, -8.3060, -6.3876, -5.6651,
        -6.3739, -6.6596, -8.1726, -8.0211, -5.7725, -6.0927, -5.7524, -6.1908,
        -8.1769, -7.9007, -6.0330, -6.1123], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7725, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0005, -7.1040, -7.6055, -8.2379, -5.6765, -6.0344, -5.8453, -6.1886,
        -6.7079, -7.4498, -8.2119, -7.0498, -6.4026, -6.0444, -6.9098, -7.0505,
        -7.2617, -8.2569, -5.7478, -6.3739], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8580, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3144, -6.6081, -5.6495, -6.3755, -7.2760, -7.5454, -8.1285, -5.9050,
        -6.2075, -6.0698, -7.2728, -8.2552, -8.1913, -5.9591, -6.2205, -6.5662,
        -5.4873, -7.4272, -7.6878, -7.0828], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8115, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4814, -6.7304, -8.3936, -7.2227, -6.7961, -5.4347, -6.5463, -6.9216,
        -7.0361, -8.2441, -6.3187, -8.7699, -8.7030, -6.3041, -6.8323, -5.3584,
        -7.5284, -8.0082, -7.2322, -7.2389], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0551, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.8109, -6.1714, -5.5437, -5.9870, -5.6037, -7.3584, -8.3694, -6.0301,
        -6.3481, -7.0498, -5.5853, -7.4616, -7.1138, -8.1111, -6.1679, -8.4443,
        -7.5628, -6.2346, -6.2640, -5.5253], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7372, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6215, -8.0241, -5.9399, -6.5177, -8.1655, -5.9526, -6.8705, -6.2829,
        -6.3719, -5.8065, -7.8535, -8.2146, -6.8567, -7.6302, -6.6488, -9.7681,
        -5.4533, -7.6813, -6.7006, -7.6501], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0505, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1122, -8.5088, -7.7293, -6.6103, -7.0748, -5.1238, -6.0082, -6.2911,
        -6.6582, -6.3576, -6.2215, -8.7348, -7.5538, -6.9156, -7.0347, -6.6354,
        -5.8715, -6.5212, -7.0080, -7.8386], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8405, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8200, -6.2972, -6.7388, -7.0325, -7.8555, -8.2822, -5.8728, -6.6425,
        -6.3765, -6.6717, -7.0545, -7.5591, -8.1543, -5.5822, -6.0470, -6.5155,
        -6.1946, -7.1046, -7.5585, -8.1268], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8743, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9302, -6.4118, -7.2705, -7.3140, -7.5020, -8.1233, -5.1948, -6.5290,
        -5.8381, -6.1185, -8.6499, -7.4014, -5.9165, -6.6142, -5.7353, -5.9963,
        -6.8479, -7.5281, -7.9742, -5.7411], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7319, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.6926, -7.6904, -7.8712, -7.4691, -6.5665, -8.0155, -8.2380, -5.8533,
        -6.2523, -5.7923, -6.4330, -8.3450, -7.5753, -6.6592, -5.7256, -6.8606,
        -7.5753, -7.5310, -7.8595, -6.2750], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1140, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1288, -8.6483, -8.0233, -6.0968, -6.1644, -5.6992, -6.7691, -8.2535,
        -7.7167, -6.6356, -5.9616, -5.7105, -6.1422, -7.3657, -7.8762, -5.6523,
        -5.9548, -5.8705, -6.2110, -7.7550], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7818, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.8141, -5.6708, -6.1364, -6.9766, -5.7142, -6.6631, -7.8284, -8.4957,
        -6.0846, -6.4436, -6.7683, -6.6113, -7.7320, -7.9454, -6.1398, -5.9146,
        -5.6681, -5.8101, -7.6302, -8.3696], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8208, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.1859, -6.3840, -6.1637, -6.0863, -5.9553, -7.0532, -7.6495, -8.1439,
        -5.3057, -6.1774, -5.6834, -6.9425, -8.5660, -6.9766, -6.5818, -7.2270,
        -9.3105, -6.3189, -7.4182, -7.2460], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9688, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2927, -7.9893, -7.4995, -4.9776, -6.2090, -6.6036, -7.6403, -7.1842,
        -7.3253, -4.8881, -4.9224, -6.9785, -6.8772, -6.8580, -6.2618, -6.0828,
        -7.7504, -8.2601, -5.4920, -6.3460], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6219, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1044, -7.0348, -7.8310, -7.8094, -6.8019, -6.3931, -6.6014, -7.2477,
        -8.0192, -7.9941, -6.2342, -8.3525, -8.7216, -5.9753, -6.2546, -5.9403,
        -5.8334, -7.9677, -8.0022, -6.7123], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0915, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4447, -7.0021, -5.6589, -6.4296, -7.5899, -7.4878, -7.9390, -7.7218,
        -8.3113, -7.5845, -6.2163, -5.9775, -6.9210, -6.2936, -7.0989, -7.5748,
        -8.0667, -5.7551, -5.7767, -6.6954], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9773, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5861, -5.5537, -6.3605, -6.0469, -6.9060, -7.9559, -7.7021, -5.7825,
        -6.8364, -6.2116, -6.1645, -8.0866, -7.6445, -6.1541, -5.9456, -6.3266,
        -7.1548, -7.9226, -7.9632, -6.8491], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8577, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.9257, -6.0198, -7.6145, -6.7797, -7.4861, -8.4531, -6.8353, -6.4901,
        -6.3087, -6.7695, -6.1831, -7.5756, -7.8743, -6.8454, -5.9551, -5.9089,
        -7.4426, -8.1164, -7.6164, -6.5690], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0385, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.3662, -7.8588, -6.2521, -5.6215, -6.3769, -7.1535, -7.8793, -7.9130,
        -5.9628, -6.2900, -6.2380, -7.0182, -8.5975, -6.8461, -6.3550, -5.5242,
        -6.3448, -7.7088, -7.7661, -7.8233], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9948, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9854, -7.4831, -8.6820, -6.9337, -6.3048, -5.7839, -8.0622, -6.0033,
        -6.3291, -6.6323, -7.0151, -6.4280, -8.1106, -5.8591, -8.6421, -8.5541,
        -5.9225, -6.1926, -6.2672, -6.9425], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9067, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9637, -7.4204, -7.8771, -6.3936, -5.3600, -7.0770, -6.3263, -8.3412,
        -6.9427, -8.2621, -8.6555, -6.6347, -7.5956, -6.7834, -5.5118, -6.8422,
        -7.0436, -8.2003, -8.5602, -5.9277], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1360, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1655, -5.7267, -6.0459, -7.3903, -6.4531, -8.0889, -6.2846, -8.3556,
        -7.1857, -6.0518, -6.6045, -5.6738, -5.7133, -7.8699, -7.7677, -6.1528,
        -6.0662, -5.9165, -6.6725, -6.8959], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6541, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3731, -8.3060, -5.8107, -8.8301, -8.3959, -5.9434, -6.6713, -5.7373,
        -6.1948, -7.3671, -7.5187, -8.1832, -5.6371, -6.0873, -6.1465, -6.2753,
        -8.1487, -8.7709, -6.7371, -6.0614], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9598, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.9187, -6.6371, -5.6632, -6.0759, -7.2636, -7.0017, -7.3811, -8.1020,
        -5.2547, -5.8156, -5.9656, -5.8586, -8.1033, -8.0662, -5.8955, -6.1152,
        -6.0181, -6.0199, -8.4430, -7.5498], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7574, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2015, -6.1184, -6.8310, -7.7706, -7.4256, -6.6039, -6.6302, -6.6887,
        -5.8223, -6.9987, -8.0801, -7.8479, -5.6343, -6.1383, -5.8932, -6.5286,
        -8.6942, -8.0937, -6.2747, -5.7056], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7991, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9086, -8.5117, -7.3629, -8.8497, -8.9227, -5.5680, -6.4088, -6.0191,
        -7.1354, -6.7155, -7.6984, -7.8970, -6.6787, -5.9825, -6.2280, -6.9217,
        -7.0619, -8.0019, -8.1528, -5.8871], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1456, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0698, -6.5236, -7.7729, -8.0499, -6.1724, -5.7541, -6.7197, -6.7608,
        -7.5455, -8.2574, -7.8278, -6.5985, -6.3868, -5.9995, -6.7520, -7.4823,
        -7.3944, -6.5155, -6.6617, -6.6245], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8935, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0452, -5.2828, -7.3502, -7.8245, -7.5052, -6.2162, -6.5562, -5.6517,
        -7.1362, -8.2641, -8.4831, -6.2165, -5.4602, -5.6521, -6.4840, -7.5617,
        -7.7833, -5.3244, -6.1682, -6.0668], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6516, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3394, -6.3463, -6.1753, -7.1321, -6.5752, -7.5173, -7.7639, -5.8019,
        -6.0450, -5.7496, -5.4799, -8.0777, -7.4846, -6.5350, -5.8784, -5.7966,
        -6.2523, -7.6198, -7.6584, -6.6943], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6462, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3337, -5.5658, -5.8121, -6.6938, -7.2156, -7.9501, -8.0251, -6.1504,
        -5.9158, -5.5611, -6.3507, -8.4155, -7.4698, -6.0690, -6.2401, -6.8630,
        -6.9829, -7.2497, -7.6438, -7.9694], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8239, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3768, -6.3546, -6.8021, -8.4229, -7.8787, -6.9153, -6.9218, -5.5338,
        -5.8329, -7.2595, -8.0518, -7.9976, -5.1657, -6.2602, -5.7399, -6.2131,
        -8.7016, -8.2953, -6.0113, -6.1217], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8428, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5424, -7.0987, -7.5906, -7.7772, -6.0076, -5.5716, -7.0714, -7.8007,
        -8.1996, -5.2461, -6.0859, -5.9476, -6.7355, -8.0963, -7.9154, -5.6646,
        -5.9362, -5.8236, -6.5796, -8.3490], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8520, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4925, -7.5610, -8.1237, -7.2844, -6.7631, -6.4471, -5.5746, -6.9663,
        -8.5900, -7.1924, -6.4117, -5.9042, -7.2548, -6.4737, -7.1073, -6.5377,
        -6.5068, -7.7399, -7.8804, -5.6970], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9254, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3885, -6.8323, -7.9847, -7.6059, -6.9050, -6.4890, -5.6642, -6.6811,
        -7.9917, -7.2128, -7.9221, -6.0938, -9.2386, -7.8881, -7.9951, -7.7073,
        -6.2106, -8.0323, -8.4892, -5.6216], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2477, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1168, -7.3760, -8.0262, -5.9807, -5.7505, -5.6279, -5.6558, -7.5423,
        -7.6747, -7.3067, -5.8638, -5.8544, -7.0862, -8.0367, -7.9140, -7.0341,
        -6.4318, -6.1180, -6.6217, -8.8148], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8917, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.0749, -5.4174, -6.5529, -5.6605, -7.7806, -8.0230, -7.0737, -6.6781,
        -5.3598, -7.2250, -6.4477, -7.7384, -6.7905, -7.6173, -6.7861, -6.8141,
        -8.6864, -8.1283, -6.0115, -6.2098], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9538, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.9180, -6.0143, -5.8914, -5.8792, -6.2513, -7.8641, -8.5732, -5.4868,
        -6.2917, -6.8979, -6.2348, -6.9788, -7.7617, -8.2859, -6.6282, -6.1564,
        -5.5008, -7.6291, -7.8280, -8.5179], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9295, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.0186, -6.0339, -6.2952, -6.3995, -6.3129, -7.6810, -7.8003, -6.0701,
        -6.7142, -6.8171, -6.3852, -6.2555, -7.4392, -7.8765, -5.1690, -6.1089,
        -5.7786, -5.9053, -8.0568, -7.9873], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7553, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5223, -5.9978, -6.9823, -8.0531, -7.0365, -7.1110, -6.9262, -6.9074,
        -6.2841, -7.6248, -6.4527, -7.3185, -8.7907, -6.4023, -6.5426, -6.4947,
        -6.8096, -6.8988, -5.8554, -7.8475], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9429, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7555, -8.3562, -8.9655, -5.9537, -6.4616, -6.0524, -6.3876, -7.8365,
        -7.6825, -6.2501, -6.7384, -6.4065, -7.3922, -8.3743, -8.2441, -6.6542,
        -6.8022, -6.2482, -6.7798, -8.1025], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0722, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0335, -7.6852, -8.1566, -7.9112, -5.8602, -6.2198, -5.6280, -6.3797,
        -8.3780, -7.3240, -6.2560, -5.4069, -5.6297, -6.9202, -6.9368, -7.6858,
        -8.2187, -5.7825, -5.9080, -6.3122], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7316, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.9695, -7.9405, -5.4553, -6.2668, -6.3039, -6.6087, -7.9524, -7.3644,
        -6.5288, -6.0154, -6.6187, -6.7709, -6.9945, -8.2553, -7.4429, -8.6566,
        -7.9797, -5.2066, -6.3703, -6.4024], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9552, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3556, -7.0304, -6.3795, -8.0165, -8.8158, -6.6847, -6.3162, -6.3822,
        -6.7338, -7.7899, -8.2410, -5.2807, -6.1679, -5.7645, -5.4893, -8.1895,
        -8.8353, -5.6433, -6.1367, -6.3408], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8297, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5888, -8.4814, -7.2108, -6.4119, -5.6847, -6.6616, -7.1328, -7.0926,
        -7.7914, -6.5902, -8.2034, -8.8719, -6.2676, -6.3936, -5.4691, -6.1805,
        -8.3839, -7.3864, -6.8668, -5.9345], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9802, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5161, -6.7702, -7.8198, -7.3602, -6.5990, -6.4937, -6.0198, -7.3369,
        -7.2394, -6.8329, -7.2318, -5.3931, -6.2126, -5.9392, -7.0382, -7.9353,
        -7.8422, -5.6722, -6.1788, -5.9758], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7204, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7286, -6.6054, -7.3323, -7.7867, -6.8625, -6.6687, -6.4457, -6.7611,
        -8.3731, -7.2350, -7.1462, -6.6962, -7.4324, -7.5990, -7.9583, -7.4600,
        -5.7978, -7.5650, -8.0334, -5.6682], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0578, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.1373, -6.3690, -5.8699, -5.9161, -6.4251, -7.4997, -8.2947, -6.2298,
        -6.3857, -7.2789, -6.9062, -7.5181, -6.4715, -8.3613, -8.3508, -6.2239,
        -6.3039, -5.9169, -5.6341, -6.5327], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8313, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.6624, -7.4018, -5.7169, -5.9501, -6.3988, -6.4007, -7.0793, -7.6773,
        -5.5342, -6.2288, -6.1051, -6.5257, -6.5088, -8.3027, -8.4946, -6.2495,
        -6.1647, -6.5971, -6.3399, -7.0837], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7711, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5340, -7.8233, -6.0978, -6.0200, -6.1223, -6.7045, -7.0007, -7.7633,
        -8.0819, -6.3113, -5.8421, -5.5558, -6.9074, -7.4018, -7.9464, -5.7094,
        -5.9367, -5.4831, -6.5451, -8.4098], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7598, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.3699, -6.4306, -6.3255, -6.3607, -6.9219, -6.0427, -8.0202, -7.8342,
        -5.4300, -5.8312, -5.8047, -6.7892, -8.4830, -7.5008, -6.9799, -7.9341,
        -6.2693, -6.8159, -7.2027, -8.1290], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9738, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8340, -7.6513, -7.9312, -7.5124, -6.3456, -6.5556, -6.0575, -6.1883,
        -7.2231, -8.0640, -6.9626, -6.1871, -6.7868, -6.7938, -7.6918, -8.0836,
        -6.6933, -8.1233, -8.3472, -6.2616], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1147, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3784, -7.3310, -7.2601, -7.8026, -5.6381, -5.7117, -5.7071, -6.4786,
        -7.6858, -8.0990, -6.0669, -5.8037, -6.0904, -6.4809, -7.1694, -7.6942,
        -8.2876, -6.2872, -6.1021, -5.9094], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6992, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4842, -6.1628, -6.4726, -7.4433, -8.0179, -5.5026, -6.0446, -6.1645,
        -7.0075, -7.2785, -7.5714, -8.2063, -5.9070, -6.0825, -6.1293, -7.2552,
        -7.6917, -8.0739, -5.9865, -5.8201], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7651, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4838, -6.2309, -6.4065, -7.6346, -7.9241, -6.0177, -5.7322, -6.1835,
        -6.5979, -7.7386, -7.6944, -5.9091, -6.3296, -5.8873, -6.3419, -7.9783,
        -7.7180, -5.9510, -6.0029, -6.5736], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6668, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6980, -6.5995, -6.5534, -8.0554, -7.7813, -8.6989, -7.9175, -5.4695,
        -5.8411, -5.7936, -7.9075, -6.7680, -8.3789, -7.2174, -8.1584, -6.6424,
        -6.3110, -5.3064, -6.0191, -6.5876], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8852, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6752, -8.9232, -6.1408, -7.7979, -6.1658, -8.0622, -8.4064, -5.8113,
        -6.6033, -6.8115, -6.8049, -6.7670, -7.5145, -8.5853, -5.1177, -6.3809,
        -5.9215, -6.5589, -8.6003, -7.1292], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0389, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3754, -7.3822, -5.4998, -6.3513, -6.2370, -7.9094, -7.8227, -6.4528,
        -5.8466, -5.6464, -6.5066, -6.7695, -7.3769, -8.0793, -6.0954, -6.0335,
        -5.7639, -6.6696, -8.6949, -7.4868], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8000, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0121, -7.6582, -7.7037, -5.4227, -6.1679, -6.3402, -6.3445, -7.6768,
        -8.6262, -5.8019, -5.8266, -5.8181, -6.4581, -7.0200, -7.3648, -7.8542,
        -5.9421, -5.6959, -5.8465, -6.5470], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6564, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4950, -8.0868, -8.0007, -6.8126, -5.6952, -7.1257, -6.2762, -6.9247,
        -7.0930, -7.8797, -5.8457, -6.3438, -6.6141, -7.0547, -8.0216, -7.8967,
        -5.0997, -6.5658, -5.9256, -6.3484], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8553, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4833, -8.4369, -7.2612, -6.1040, -5.8206, -5.8894, -7.0771, -7.3116,
        -7.5493, -7.9364, -5.4012, -6.4893, -5.8044, -6.7115, -8.6033, -7.9318,
        -6.6809, -5.5939, -6.8145, -6.3854], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8143, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0127, -7.8969, -8.1179, -6.5269, -6.0196, -6.3392, -6.4607, -6.8348,
        -8.3399, -8.2280, -5.7785, -5.8958, -6.1912, -6.1162, -7.9745, -8.2539,
        -6.3368, -6.5555, -6.4009, -6.3591], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8820, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2553, -7.5795, -6.8994, -8.2698, -6.5118, -8.5803, -9.0760, -5.4446,
        -6.7722, -6.1667, -5.8036, -6.6821, -8.3285, -6.8204, -6.8781, -7.0084,
        -7.5429, -6.1891, -8.3840, -6.6981], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0945, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.9534, -7.5525, -6.0726, -6.7612, -6.7977, -6.7533, -8.0352, -7.6144,
        -6.2764, -5.9665, -5.9684, -7.4074, -7.7898, -8.3514, -6.0273, -5.6444,
        -6.1121, -7.3114, -7.2543, -8.2454], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9948, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3301, -5.5139, -6.5585, -6.8760, -7.9248, -7.9760, -4.9903, -6.2732,
        -6.0185, -6.8981, -8.0973, -6.9088, -6.1150, -5.9728, -5.6156, -6.8587,
        -7.1220, -7.4814, -7.9315, -5.4085], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6436, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3181, -6.0886, -6.8235, -6.5564, -7.0133, -7.8277, -7.9399, -5.8153,
        -6.3001, -6.2497, -6.0835, -7.0314, -7.3718, -7.7750, -6.3328, -5.9549,
        -6.8095, -7.3189, -7.0058, -7.7831], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8200, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.6630, -7.7932, -6.2809, -8.2785, -8.8854, -5.8495, -6.4711, -5.4636,
        -6.4642, -7.9176, -7.5259, -5.9743, -6.0187, -6.3323, -6.4738, -7.0110,
        -7.4070, -7.9046, -6.4776, -5.5832], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8888, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9304, -8.2058, -7.7871, -6.3917, -6.9637, -5.7845, -5.3785, -6.8974,
        -6.8396, -7.6455, -7.1713, -8.5842, -8.5211, -6.2244, -6.5418, -5.8037,
        -6.2761, -7.1652, -8.4960, -7.3501], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9979, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7744, -5.6452, -5.7983, -5.9397, -7.2421, -8.4748, -6.8651, -8.7260,
        -8.5974, -6.1051, -6.7310, -6.1175, -6.3314, -7.0469, -7.7818, -7.8905,
        -6.6970, -6.5279, -6.1352, -6.7201], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9074, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.8921, -6.1172, -5.9157, -5.8991, -7.0402, -8.3737, -8.4697, -7.3062,
        -6.3756, -6.1861, -6.3584, -6.6120, -7.9660, -7.6239, -6.1761, -5.8892,
        -5.5755, -6.9169, -8.5748, -7.7869], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9528, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3165, -7.7135, -6.4793, -8.1640, -8.6419, -6.1555, -6.2901, -5.5892,
        -5.2183, -7.7961, -7.8690, -6.3289, -6.1542, -7.7640, -6.2475, -7.2268,
        -7.8707, -7.0252, -8.5357, -8.6637], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1025, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7758, -7.1579, -6.5809, -8.3954, -6.3957, -7.6926, -7.1533, -7.9601,
        -6.2583, -6.5916, -8.2486, -8.0532, -5.3482, -6.2173, -5.8685, -6.6585,
        -8.1162, -7.3953, -6.4199, -6.9163], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0602, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4826, -7.3636, -7.6186, -8.2295, -6.3803, -8.4862, -7.3886, -5.7665,
        -6.5523, -6.0452, -6.8892, -6.5925, -7.8260, -7.3756, -7.1605, -5.9877,
        -6.3199, -6.6774, -5.9470, -7.9866], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9538, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7428, -7.3153, -8.2212, -6.2101, -8.3400, -6.3814, -8.3947, -8.7319,
        -5.5876, -6.1768, -5.8918, -8.0818, -6.5650, -8.4535, -6.8352, -8.1082,
        -7.5111, -5.5501, -6.0445, -6.8920], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1018, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3814, -6.5981, -5.5278, -6.1432, -7.0037, -7.1276, -7.3373, -8.0657,
        -5.6688, -6.0425, -5.7770, -6.4640, -8.6381, -7.2370, -6.4286, -5.6338,
        -5.5894, -6.3480, -6.7998, -7.8222], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6817, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.3188, -6.1438, -6.1477, -6.1747, -6.1683, -6.7598, -7.8525, -8.2086,
        -5.9598, -5.8997, -5.7944, -6.1848, -7.9412, -7.3517, -6.4270, -5.8430,
        -6.0813, -6.6046, -6.4667, -7.7264], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7027, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7522, -5.6651, -7.5123, -8.0976, -7.4294, -6.3177, -5.9821, -6.0936,
        -6.4049, -7.6803, -7.4551, -6.6546, -6.0545, -5.9593, -7.1649, -7.9649,
        -7.6687, -5.6199, -6.4799, -5.6375], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6797, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0889, -7.7829, -5.9977, -5.8772, -6.0675, -6.6803, -7.3774, -8.0621,
        -7.8014, -5.4042, -6.1214, -5.7555, -6.3864, -8.2997, -7.2296, -6.5731,
        -5.9584, -8.7660, -6.3170, -7.9703], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8758, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3723, -7.9995, -7.7587, -6.4570, -5.7826, -5.7187, -6.2837, -7.7753,
        -8.0238, -5.7710, -6.5657, -5.8361, -6.5009, -7.7132, -7.7253, -6.3792,
        -5.9847, -5.9176, -5.9406, -7.8251], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7666, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6878, -5.7771, -6.4232, -8.3621, -7.7215, -6.4083, -6.4536, -6.6210,
        -6.4405, -6.9860, -7.9482, -7.5858, -5.6092, -6.0773, -6.1944, -6.6845,
        -7.7576, -7.6846, -5.5697, -6.2041], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7098, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5692, -6.1421, -5.3741, -5.7452, -6.4633, -6.9861, -7.3875, -7.7089,
        -5.8558, -5.7330, -5.1916, -5.9223, -7.8893, -7.5857, -6.3998, -5.9076,
        -5.6615, -6.4714, -6.7298, -8.2192], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5472, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8081, -5.7957, -6.8418, -6.9705, -7.9191, -5.4557, -5.5779, -5.7771,
        -6.1183, -8.0058, -7.6865, -6.3276, -5.6376, -5.9279, -7.1293, -7.3414,
        -7.8577, -8.0367, -6.1104, -5.1285], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5727, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.4137, -8.5076, -5.2740, -6.1370, -5.4215, -7.1918, -8.2529, -6.9214,
        -6.6017, -5.1449, -6.7626, -6.1799, -7.8443, -6.2888, -8.3402, -5.8774,
        -9.0358, -8.1171, -5.7728, -6.5507], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9318, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8374, -4.9960, -7.4011, -7.6003, -7.5014, -6.5377, -6.4941, -5.5219,
        -5.6213, -7.6076, -7.6666, -7.7667, -5.2315, -5.4576, -5.2940, -6.6650,
        -8.0718, -7.6029, -6.6518, -6.0524], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5789, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0030, -7.5594, -7.7653, -5.6006, -7.4019, -7.9820, -5.5929, -6.9665,
        -5.1828, -5.6297, -6.2255, -7.9671, -7.0916, -6.5996, -6.2680, -8.9403,
        -6.3545, -8.1074, -6.3889, -8.4399], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9534, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1386, -5.4260, -5.3953, -6.5224, -7.7937, -7.5639, -6.1093, -5.6973,
        -5.6935, -6.2680, -6.7284, -7.5450, -7.7503, -5.9682, -5.5127, -5.3343,
        -5.9893, -7.4948, -7.3278, -6.2805], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4270, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.8901, -5.6706, -5.8149, -5.6164, -6.0600, -7.3774, -7.3395, -5.3771,
        -6.1109, -5.9340, -6.7102, -7.8789, -7.9052, -6.8847, -5.4385, -6.2922,
        -5.9885, -6.8476, -7.5205, -7.9830], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6320, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7488, -8.4336, -6.3974, -5.8621, -7.7992, -8.8819, -7.9749, -7.0567,
        -5.8682, -4.9701, -7.3650, -6.5840, -7.6211, -7.7975, -5.6390, -6.4670,
        -6.9037, -5.8216, -6.5372, -8.1408], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9935, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5600, -6.6076, -6.0614, -5.5261, -7.3018, -8.3513, -7.6809, -6.1419,
        -6.2662, -5.0042, -6.1138, -6.2118, -7.3306, -7.9396, -6.0476, -6.1202,
        -5.3747, -6.1036, -8.0286, -7.9414], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6857, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1003, -6.8009, -7.0392, -7.5208, -7.7154, -5.6800, -6.2731, -5.8446,
        -5.9183, -8.5336, -7.0087, -6.6284, -7.1349, -7.8757, -6.8442, -6.6358,
        -7.8881, -6.7085, -8.5270, -8.2135], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0945, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.3971, -7.0106, -6.2415, -6.2456, -5.4361, -6.4482, -6.0492, -7.4839,
        -7.8328, -6.8372, -6.8451, -5.7011, -7.3105, -7.8687, -6.8198, -6.6102,
        -6.6454, -5.8744, -6.4381, -7.1859], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7641, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7947, -7.7751, -5.0385, -5.3443, -5.8633, -5.9942, -7.7269, -7.8147,
        -6.3940, -5.3290, -6.1974, -6.6709, -6.7186, -7.5491, -7.5881, -6.2155,
        -5.4950, -5.5043, -6.0145, -7.4831], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5255, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5854, -5.9395, -6.0984, -5.6633, -6.3000, -6.1212, -8.0533, -6.9710,
        -7.2510, -5.4982, -4.6320, -6.6540, -7.7972, -8.0426, -7.5748, -6.3019,
        -5.8208, -7.2306, -7.5828, -7.8723], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7495, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7746, -7.6062, -5.9442, -5.5790, -5.3881, -6.8256, -7.4903, -7.5098,
        -5.5935, -5.3787, -6.2671, -6.3057, -6.4512, -7.4497, -7.3585, -6.5988,
        -5.5246, -5.5472, -5.5151, -7.6955], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4902, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1428, -7.6947, -8.0192, -6.2094, -6.0964, -5.4636, -6.5549, -8.3895,
        -7.2312, -6.3158, -5.8518, -7.0484, -6.4339, -7.1301, -7.8938, -7.7174,
        -6.3172, -5.8713, -6.3155, -6.0255], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7861, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8742, -7.3429, -8.3077, -6.2321, -8.4030, -7.2413, -5.8786, -5.9564,
        -5.4673, -6.3369, -7.6828, -7.1239, -6.2533, -6.5679, -5.2390, -6.3703,
        -6.9449, -7.7502, -7.6494, -5.8149], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7219, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7692, -6.9743, -7.3077, -7.7238, -6.3751, -5.7060, -6.3434, -6.3509,
        -6.7542, -7.8043, -7.8366, -6.1946, -5.7091, -5.2763, -6.0761, -6.8947,
        -7.8115, -7.7324, -7.7263, -6.7106], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8039, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6172, -6.5145, -5.4340, -6.8917, -7.9781, -7.7295, -6.8384, -7.9980,
        -6.4367, -5.9743, -6.6975, -6.9764, -7.9320, -6.0915, -8.1346, -8.2410,
        -6.2530, -6.5599, -5.9314, -5.9448], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8087, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5443, -5.7673, -7.6357, -7.5171, -6.8660, -6.5585, -5.4615, -6.9349,
        -6.2881, -7.4134, -8.2403, -5.7853, -5.9348, -6.1004, -6.1502, -6.7768,
        -7.2492, -7.6835, -6.4437, -5.6772], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6014, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5211, -8.0924, -8.2723, -5.8613, -5.8774, -5.9710, -6.3102, -8.1544,
        -7.2959, -5.8714, -6.2129, -6.3949, -6.1681, -6.6337, -8.2023, -7.1536,
        -6.3251, -5.5224, -6.8291, -6.8816], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6776, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4995, -6.7735, -7.6243, -7.1661, -6.2797, -6.3701, -7.9780, -8.2120,
        -7.1912, -5.4978, -6.3884, -6.5153, -7.1730, -7.9890, -7.4234, -6.1957,
        -5.8138, -5.7775, -6.0133, -7.3810], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8131, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6108, -6.2104, -6.2674, -5.1752, -7.0173, -6.6779, -8.0360, -5.8534,
        -8.8354, -8.0624, -5.7790, -6.0607, -5.3585, -6.0127, -7.8386, -8.1812,
        -6.6425, -6.4227, -6.1642, -5.7162], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6461, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3605, -6.2774, -7.6942, -7.7183, -5.8447, -6.0795, -6.0484, -6.2872,
        -7.5823, -7.8933, -7.5574, -6.0876, -5.8356, -5.2916, -6.3140, -7.9361,
        -7.4084, -5.7497, -5.7488, -5.9094], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5312, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.2293, -7.9090, -5.9453, -6.4761, -5.7932, -6.0524, -6.5773, -7.9009,
        -7.8569, -8.0074, -6.6777, -5.6590, -6.9215, -8.4885, -7.1694, -6.3663,
        -6.2830, -8.0206, -6.1755, -7.7615], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0135, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5488, -7.5086, -5.5725, -6.2381, -5.6071, -6.4913, -8.5569, -7.2142,
        -6.1473, -7.3101, -7.1972, -7.0539, -7.6251, -6.8959, -6.4173, -8.0454,
        -7.9416, -6.3454, -5.9908, -5.2971], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8502, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9806, -8.3276, -7.2359, -6.3015, -6.3706, -6.0558, -6.2845, -7.1092,
        -7.3920, -7.5527, -6.0278, -5.8498, -5.2124, -5.6934, -7.8227, -7.9264,
        -5.9990, -5.7361, -6.1996, -7.0243], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6051, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8893, -5.7263, -5.6272, -6.4532, -8.5144, -7.9685, -5.5884, -6.2156,
        -5.5062, -5.4218, -7.9135, -7.5761, -5.6424, -5.9725, -6.3511, -6.5323,
        -7.4249, -8.1905, -7.6486, -6.8419], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6502, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7503, -6.5377, -8.3089, -5.6951, -9.3521, -7.4780, -5.4834, -6.8787,
        -5.6635, -6.2993, -7.0913, -7.8423, -7.0431, -5.4922, -5.6605, -5.5749,
        -5.7094, -8.2786, -7.1415, -6.3090], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7795, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9810, -8.2907, -7.3880, -6.3824, -5.7115, -5.7216, -6.4837, -7.8542,
        -7.4656, -6.5141, -5.8083, -5.6971, -8.1718, -7.2714, -8.2270, -6.5019,
        -8.9191, -7.6378, -5.5764, -6.3484], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9476, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.6033, -8.6549, -5.7266, -6.3781, -6.1573, -6.1690, -6.3868, -7.5724,
        -7.7165, -5.5077, -5.6771, -5.1267, -7.2159, -7.6322, -7.5138, -6.8006,
        -6.3600, -5.3151, -6.1600, -7.4341], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7054, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3634, -5.1822, -7.4014, -7.8480, -7.5393, -5.6396, -5.5910, -5.3027,
        -6.3437, -7.5230, -7.4791, -5.9340, -5.5150, -5.3713, -6.8405, -7.0172,
        -7.7380, -8.0146, -5.1473, -5.8820], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4337, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7871, -5.9535, -5.9900, -5.8646, -6.6476, -7.5153, -7.2120, -6.4148,
        -5.3668, -5.2736, -6.2935, -8.0196, -7.7814, -6.2800, -6.2316, -5.1175,
        -6.7307, -8.5365, -7.0935, -6.2463], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6178, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0644, -7.0781, -7.5275, -5.9036, -5.1747, -5.2035, -6.3633, -7.4800,
        -8.3191, -6.3957, -5.6691, -5.9518, -5.9427, -6.2484, -7.4375, -7.7545,
        -6.4644, -5.8014, -5.6968, -6.0808], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4779, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9936, -6.6813, -6.4616, -6.7694, -5.9704, -6.8513, -8.1061, -7.7879,
        -5.9909, -5.6217, -5.7696, -5.9871, -7.4650, -7.4626, -6.4923, -6.1600,
        -5.3417, -5.9520, -6.8848, -7.3559], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6053, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6691, -5.6359, -5.9967, -7.7798, -7.8004, -5.8008, -6.1895, -5.3293,
        -6.1040, -8.4611, -7.0294, -6.5801, -8.0781, -6.0163, -5.8114, -6.1730,
        -7.0738, -7.1990, -7.4976, -6.5032], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6364, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3524, -7.5190, -6.4280, -8.0969, -7.9000, -6.0624, -5.7842, -5.4903,
        -6.1051, -7.6680, -7.3199, -5.9569, -5.6824, -5.5485, -5.9975, -7.4412,
        -7.3635, -6.1016, -5.5413, -5.5075], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5433, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7488, -5.9553, -6.4749, -6.7808, -8.0295, -8.0741, -6.2025, -6.2795,
        -6.3245, -6.2449, -6.7353, -7.7367, -7.8939, -4.5428, -5.8241, -5.4389,
        -6.9591, -8.4815, -7.6832, -6.2891], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6850, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4720, -5.8783, -7.6798, -6.6626, -7.4924, -7.3979, -8.0723, -5.6153,
        -6.4841, -8.1832, -7.3686, -5.9303, -6.0053, -6.3168, -6.6431, -7.6192,
        -7.3582, -6.8039, -5.2964, -4.7004], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6990, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5950, -5.9204, -6.4140, -6.9653, -7.7522, -7.9950, -5.8216, -6.4764,
        -6.2098, -6.3389, -6.9548, -7.4440, -7.8808, -5.4720, -5.8724, -6.1586,
        -5.8038, -7.0659, -7.4564, -7.8799], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6739, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7216, -6.1344, -6.9479, -7.2518, -7.3868, -7.8697, -5.0238, -6.2600,
        -5.4742, -5.8793, -8.5312, -7.2462, -5.8467, -6.5988, -5.6426, -6.0199,
        -6.8412, -7.5155, -7.9284, -5.4364], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5778, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4133, -7.8280, -7.7892, -5.8416, -5.9497, -5.1870, -6.6069, -7.0332,
        -7.6029, -7.8432, -5.8215, -5.8192, -5.8219, -5.1326, -6.5332, -7.0049,
        -7.7193, -6.1342, -5.5871, -5.8515], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4760, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4277, -5.5064, -5.3785, -5.5920, -6.3395, -7.2272, -7.4410, -5.4860,
        -6.0844, -5.4702, -6.5976, -8.2965, -7.6097, -6.5033, -7.0735, -6.8790,
        -6.7483, -7.5809, -7.0279, -7.5644], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6917, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.2618, -6.0859, -5.8857, -5.8061, -6.7402, -7.7853, -7.5255, -5.7585,
        -6.0102, -6.4357, -5.7407, -8.1015, -7.6228, -6.6673, -5.5955, -7.5803,
        -6.4828, -6.2012, -7.5643, -6.5138], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7183, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3407, -6.7900, -5.6557, -5.9579, -6.4003, -7.6954, -7.4506, -6.0951,
        -6.3510, -5.3382, -6.9709, -7.6209, -7.1657, -6.3830, -6.1250, -5.6170,
        -6.0688, -7.3552, -7.3504, -6.1793], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5956, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1452, -8.5965, -5.6981, -8.4708, -8.6819, -5.9531, -6.1478, -5.8889,
        -6.4063, -7.1881, -7.4440, -7.9602, -6.5933, -6.1762, -5.5173, -6.3112,
        -6.7435, -7.8662, -7.9315, -5.6435], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9182, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1111, -6.1141, -7.7376, -7.7489, -7.1836, -7.5230, -6.2459, -6.3519,
        -6.5130, -7.4279, -8.3127, -5.7046, -5.7393, -5.5868, -5.8670, -8.4512,
        -7.8967, -6.1896, -7.4313, -6.3232], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7730, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4027, -6.3469, -7.5161, -6.7022, -7.0479, -8.0105, -6.0553, -7.5264,
        -7.3030, -6.5893, -6.1148, -5.5880, -7.0653, -7.8332, -7.4823, -6.2743,
        -6.1813, -6.1180, -7.5356, -6.4227], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7558, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2215, -6.8253, -7.7753, -7.2060, -7.4630, -7.5825, -7.0574, -8.0464,
        -7.5709, -5.5984, -6.4085, -6.5317, -5.9163, -7.0425, -7.6473, -7.7749,
        -5.7866, -6.0898, -6.1027, -6.1991], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8423, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7822, -6.1042, -6.9821, -8.0031, -8.3346, -6.5790, -5.5198, -5.1754,
        -6.1605, -7.8661, -8.5362, -6.0833, -5.8175, -5.5343, -6.0457, -7.9137,
        -7.6854, -6.6177, -5.0819, -9.3859], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7604, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7850, -5.9679, -6.3092, -7.4320, -7.2723, -6.2651, -5.7175, -6.2908,
        -7.3067, -7.8461, -7.4265, -6.3671, -6.3269, -5.4798, -7.1310, -8.1290,
        -7.1786, -6.6585, -6.3348, -7.9654], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7595, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.0668, -5.9883, -6.5659, -6.0450, -6.1953, -6.7024, -7.7899, -7.7815,
        -5.9528, -5.7575, -5.7301, -5.6956, -8.0256, -7.5624, -6.7261, -5.8405,
        -6.8728, -6.9421, -7.1283, -8.1098], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7739, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9005, -8.3304, -8.3918, -6.5443, -6.3910, -5.2970, -8.0738, -8.4303,
        -6.9333, -6.7263, -7.0166, -5.8553, -6.3758, -7.0927, -7.7799, -7.6610,
        -6.2480, -5.9728, -5.0242, -5.8349], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7940, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9209, -6.1664, -5.8179, -5.6934, -7.1559, -7.2762, -7.8503, -5.5861,
        -6.1802, -5.3346, -6.7651, -7.9673, -7.2921, -6.3567, -6.3920, -5.8787,
        -6.1841, -6.6723, -7.9318, -7.2776], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5850, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6281, -6.6290, -7.4833, -7.5426, -5.4682, -5.9389, -5.8060, -5.9992,
        -8.6013, -8.0641, -6.0508, -6.1854, -8.6088, -5.7136, -8.0689, -6.4234,
        -8.6726, -8.4778, -6.1066, -6.3202], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8894, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7427, -5.9513, -6.7957, -7.7057, -8.0069, -5.7885, -5.6019, -5.5603,
        -6.3557, -7.6795, -7.9384, -6.4903, -6.1997, -5.3121, -6.2492, -8.2829,
        -7.7147, -6.8697, -5.2365, -5.4987], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5490, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2193, -5.7281, -5.7062, -6.4185, -7.5298, -7.2983, -5.3793, -6.0625,
        -5.5348, -6.4066, -8.0064, -6.7573, -6.4365, -6.5361, -5.6938, -6.7409,
        -7.5289, -7.4283, -6.4111, -5.6777], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4750, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8169, -6.2919, -7.3695, -7.5430, -6.1819, -6.6451, -7.7218, -8.0255,
        -5.7379, -5.8265, -5.6410, -6.4640, -7.8308, -7.3384, -6.5764, -5.6138,
        -6.0443, -6.3128, -7.2933, -7.7764], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7526, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2615, -5.5746, -6.3871, -8.4557, -7.1682, -6.3570, -6.9021, -7.0834,
        -7.2545, -7.6733, -7.8004, -5.9469, -8.0861, -8.2231, -5.8618, -5.8126,
        -6.9626, -6.3915, -6.7837, -7.4731], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9230, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1704, -6.3622, -7.0533, -8.1324, -7.6386, -7.7891, -6.7827, -6.3679,
        -6.1064, -6.3068, -8.1705, -7.3998, -5.6866, -5.9693, -5.6940, -7.9678,
        -6.5280, -7.9882, -6.5999, -8.3755], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0045, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5207, -5.6500, -5.6679, -7.5835, -7.6085, -6.5086, -6.5601, -6.2501,
        -6.0689, -6.3576, -7.1976, -7.8381, -5.7892, -6.1223, -5.5472, -6.3567,
        -8.0805, -7.0605, -6.5968, -6.8952], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5630, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.6108, -8.5373, -6.1018, -6.4668, -5.2653, -5.5110, -8.3256, -7.0346,
        -6.5760, -7.0603, -6.1386, -6.1683, -6.5041, -7.5115, -7.6971, -7.0000,
        -6.4088, -6.1226, -5.6427, -8.4113], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8547, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0631, -5.6840, -6.5288, -5.8749, -7.8524, -7.7152, -6.4121, -5.6105,
        -5.9263, -6.8826, -7.2477, -7.8712, -7.5974, -6.3346, -6.7046, -6.3823,
        -5.8927, -7.7549, -8.7015, -5.7645], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7401, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7722, -6.8233, -5.6620, -5.7310, -6.5441, -7.5079, -7.5607, -6.3076,
        -5.6115, -5.6712, -6.4087, -8.0304, -8.2154, -6.0297, -8.3502, -7.6708,
        -6.0024, -6.3435, -5.7552, -5.7811], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6890, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.6852, -7.5639, -6.4515, -5.7494, -6.2928, -5.7265, -7.0298, -7.6434,
        -7.7836, -5.2669, -5.7234, -5.5408, -5.9156, -8.0567, -7.2327, -6.0164,
        -5.9595, -5.8084, -5.7423, -7.9217], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5555, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.9875, -7.5625, -6.6730, -5.9865, -5.3472, -6.3881, -7.1043, -7.7585,
        -7.8476, -5.7722, -5.4489, -5.4415, -5.9637, -7.1011, -7.2180, -7.6001,
        -6.0590, -6.1284, -5.7929, -6.7889], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5985, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9429, -6.0840, -7.8170, -7.7533, -6.0802, -5.6982, -5.3034, -6.6832,
        -8.0994, -7.6938, -6.5886, -5.6716, -5.3160, -5.5209, -7.5525, -8.1223,
        -5.5330, -6.1169, -5.5940, -5.5119], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4341, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6268, -7.0395, -7.8078, -8.0016, -6.1534, -5.5841, -5.2522, -5.5975,
        -7.6097, -7.8186, -5.9021, -6.0694, -5.2521, -5.9773, -7.7473, -7.5469,
        -5.8758, -5.9919, -7.7068, -6.7788], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6170, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4544, -6.3675, -6.7253, -6.9723, -7.6735, -6.5471, -7.3572, -6.2234,
        -5.2141, -6.6086, -6.1635, -6.7918, -7.2437, -6.8663, -7.3879, -6.1211,
        -8.1864, -7.6021, -6.0807, -6.3684], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7978, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3365, -7.3971, -7.7779, -6.1234, -5.6694, -5.9049, -6.1726, -8.1827,
        -7.6851, -5.7130, -5.4151, -5.8485, -6.6363, -6.4818, -7.4843, -7.9981,
        -5.7579, -5.8257, -5.3527, -6.6526], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5208, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.3016, -5.9539, -7.6811, -7.0425, -6.3275, -6.6455, -7.7448, -7.4116,
        -6.1396, -5.8636, -6.4635, -7.0022, -7.9391, -7.8930, -6.2371, -5.8095,
        -5.0141, -6.2449, -7.3367, -7.5739], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8313, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4948, -5.5778, -6.6562, -7.3125, -7.4477, -5.8405, -5.7337, -5.8504,
        -6.1951, -6.8288, -8.3075, -6.6320, -6.2604, -6.2689, -5.3908, -7.0156,
        -8.5057, -6.9582, -6.4276, -5.6027], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5153, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3133, -8.0176, -6.9716, -8.8451, -8.1006, -5.8441, -7.1861, -5.2007,
        -5.5177, -6.7856, -8.3408, -7.2138, -5.1794, -5.7080, -5.2973, -6.7043,
        -7.2472, -7.7737, -7.9226, -6.0079], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8589, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.6171,  -7.5115,  -8.4909,  -7.4824,  -5.8798,  -5.8474,  -6.1472,
         -6.4615,  -6.5230,  -8.0833,  -7.8329,  -6.2131,  -5.5353,  -5.6050,
         -6.6499,  -8.3561,  -7.7260,  -6.8050,  -4.9538, -10.7936],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.0757, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2088, -6.2868, -5.2543, -6.1285, -8.0821, -7.1839, -6.1349, -6.0407,
        -6.8598, -7.1817, -7.1087, -8.0276, -5.8486, -8.4905, -8.5945, -5.8525,
        -6.6571, -6.0353, -5.3213, -6.5976], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6948, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7191, -6.6821, -7.0432, -7.8350, -7.4778, -8.4858, -7.1874, -5.7060,
        -5.9195, -5.2822, -5.8187, -8.2509, -7.5394, -5.8578, -5.4848, -5.7370,
        -6.4426, -6.9110, -7.4354, -8.0902], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7953, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.9524, -6.2335, -6.3811, -8.2165, -7.9184, -5.7110, -5.8905, -5.6599,
        -6.5511, -8.0618, -8.0549, -7.1799, -5.6384, -5.6249, -6.5089, -7.2799,
        -7.3335, -7.6497, -5.8909, -5.7226], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7730, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7066, -8.2607, -6.7046, -6.3462, -5.5023, -7.3987, -6.7568, -7.4473,
        -7.5533, -5.8614, -6.4390, -7.9000, -8.1126, -6.0031, -5.9412, -5.5358,
        -6.1212, -7.8603, -7.8104, -6.5965], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8429, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.8123, -5.4054, -5.6201, -5.4581, -6.2426, -7.8175, -7.7929, -5.7768,
        -5.2805, -5.4649, -5.9690, -7.5675, -7.6770, -5.9139, -5.6014, -5.5983,
        -6.1069, -7.7405, -7.5823, -6.3840], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4406, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0199, -6.8000, -7.8596, -7.7851, -7.5340, -6.2274, -5.5678, -6.1850,
        -7.0236, -6.9974, -7.7648, -7.9688, -5.3159, -5.8834, -5.2479, -5.5503,
        -8.0897, -7.8817, -6.8532, -5.5796], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7068, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0330, -8.2969, -8.0797, -5.9779, -6.2460, -6.3546, -5.5393, -7.1685,
        -8.4446, -6.9657, -6.3185, -6.1379, -6.1839, -6.4484, -7.1139, -7.2578,
        -8.0534, -5.7570, -5.6388, -5.1869], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6601, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3893, -8.0446, -6.9967, -6.5713, -5.3064, -6.7709, -6.8749, -7.3594,
        -6.5395, -8.0637, -5.8274, -8.3718, -7.8066, -5.4050, -6.0627, -5.5144,
        -5.8399, -5.7665, -7.2514, -7.9718], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7367, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.1932, -7.9569, -6.4814, -6.2551, -5.8860, -6.2795, -8.3497, -7.5678,
        -6.0853, -6.2039, -6.3542, -6.2403, -6.5949, -7.4700, -7.6578, -6.0443,
        -5.2543, -6.7743, -6.2091, -6.9974], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7428, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7099, -5.8663, -6.3187, -6.0873, -7.7797, -7.2890, -6.2667, -5.2916,
        -6.5336, -6.8991, -6.7234, -7.1809, -7.8910, -4.9987, -5.4963, -6.2526,
        -6.3936, -8.3201, -7.9729, -6.1678], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5720, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8538, -6.6608, -7.0381, -7.4298, -7.5062, -6.2371, -5.7287, -5.3689,
        -6.3766, -6.9546, -7.8107, -7.5728, -6.3296, -5.4500, -5.4545, -6.3487,
        -7.6122, -8.2184, -5.2734, -5.9694], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5597, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2032, -6.2480, -5.4927, -5.5861, -8.4612, -7.6339, -6.5637, -6.3538,
        -6.0856, -6.9742, -6.9378, -8.1450, -6.0347, -8.3993, -8.5266, -5.6869,
        -6.4492, -5.3441, -6.0706, -7.6370], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7417, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2811, -6.7565, -7.1407, -7.7739, -5.8811, -5.4929, -5.4538, -6.3137,
        -7.3816, -7.8524, -5.4648, -5.9170, -5.4144, -6.2936, -8.0511, -8.1876,
        -5.8015, -5.7576, -6.1713, -6.9520], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4669, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6722, -5.0228, -5.5791, -7.2837, -7.8203, -6.3858, -5.6594, -5.9086,
        -6.3725, -7.5354, -7.9710, -6.2149, -6.4465, -6.6454, -6.0390, -7.1716,
        -7.7648, -7.4240, -5.0897, -5.5783], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4793, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.8186, -7.7945, -6.1808, -6.0788, -5.5521, -5.9864, -7.8859, -7.5684,
        -6.5483, -5.7430, -5.6724, -6.1127, -6.8318, -7.7719, -7.6632, -6.4622,
        -5.9816, -5.5798, -5.6862, -8.0222], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6470, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0552, -7.8220, -8.1358, -5.6129, -5.5235, -5.8207, -6.0588, -7.6089,
        -8.1879, -5.6267, -6.6458, -9.2064, -5.1031, -6.9940, -6.3177, -7.5085,
        -7.4935, -7.7686, -6.6647, -6.9812], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9068, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6626, -5.5857, -5.6698, -7.4135, -7.6397, -6.8416, -5.9181, -5.6877,
        -6.2587, -7.3943, -7.5031, -6.2714, -6.3614, -6.8304, -6.6196, -7.1692,
        -7.6687, -7.5800, -5.8788, -6.0269], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5991, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1948, -6.0046, -8.3286, -7.4116, -5.7907, -6.0311, -5.0200, -6.6839,
        -6.8651, -7.4919, -7.5658, -6.0587, -5.4383, -5.3125, -6.1959, -8.1334,
        -7.5507, -6.0533, -5.1339, -8.1263], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5196, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1949, -5.7542, -8.0399, -8.1948, -6.5515, -5.5668, -5.5028, -6.3673,
        -6.7959, -7.9977, -7.8839, -5.3652, -6.1316, -5.3348, -6.4034, -8.1772,
        -7.0730, -6.2631, -5.4401, -5.7649], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5402, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0142, -5.5117, -5.3161, -6.3730, -7.5401, -7.5897, -5.4393, -5.9563,
        -5.3197, -6.0291, -8.0723, -7.6186, -5.4556, -5.8204, -5.7148, -5.9874,
        -6.4785, -8.1414, -7.6908, -6.3012], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4185, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.9015,  -7.7261,  -6.1963,  -5.7366, -10.2231,  -5.7685,  -7.7352,
         -6.1264,  -7.9451,  -8.3371,  -5.7248,  -6.2124,  -8.5090,  -5.7889,
         -8.2207,  -6.2084,  -8.1025,  -8.4583,  -7.4114,  -6.1049],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.2219, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9752, -6.5493, -7.6220, -7.9777, -5.0389, -6.0946, -6.6794, -8.7059,
        -7.8449, -6.5275, -6.1530, -6.8177, -7.5141, -7.7976, -8.0499, -5.9831,
        -8.2472, -8.4019, -5.9604, -5.7478], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9844, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.0216, -5.2124, -5.5608, -6.2139, -6.1063, -8.4838, -7.1547, -5.7339,
        -5.7415, -9.7092, -5.4550, -8.1949, -6.1301, -8.0818, -8.3256, -5.8874,
        -5.9667, -6.2141, -6.2189, -7.1357], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7774, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7796, -6.4040, -8.7561, -7.3487, -5.9263, -6.8661, -5.7201, -5.6451,
        -6.2110, -7.9424, -7.1115, -6.1302, -6.2593, -5.8261, -7.2116, -7.8276,
        -7.8076, -6.3636, -8.4210, -7.7550], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9657, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4290, -5.5131, -5.6225, -6.4728, -5.9023, -7.1752, -7.7219, -8.0524,
        -6.2770, -5.8939, -6.0731, -6.5702, -7.7360, -7.8929, -6.3490, -5.4390,
        -6.2516, -6.3408, -7.1329, -7.1083], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6477, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7909, -5.4972, -5.7184, -5.5834, -5.9742, -6.7847, -7.3369, -8.0378,
        -5.0607, -5.7112, -6.5666, -5.7278, -6.7723, -7.4253, -8.5508, -6.0441,
        -6.0928, -5.8923, -6.2635, -6.3652], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4598, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0819, -7.5580, -7.6318, -7.6142, -7.1050, -7.5381, -7.2213, -8.5434,
        -7.8027, -5.7137, -6.3941, -5.8721, -6.6868, -8.1575, -7.0715, -6.0061,
        -5.8654, -8.7671, -6.0842, -7.7256], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0720, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8975, -5.5841, -6.9419, -7.3000, -7.3731, -6.6505, -6.0525, -6.8399,
        -6.0905, -7.7154, -7.0919, -7.7378, -7.1273, -8.4926, -8.3955, -5.8875,
        -6.3883, -5.4490, -6.4535, -7.3152], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8392, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9638, -7.7365, -6.4427, -8.3670, -6.1138, -7.9541, -8.0340, -6.4822,
        -5.8773, -6.2262, -5.6172, -6.1080, -7.8443, -7.0771, -6.2215, -5.0152,
        -7.8391, -7.6766, -6.8234, -8.0771], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8749, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7637, -7.4219, -6.1894, -6.4235, -5.6801, -6.0192, -8.3454, -7.2265,
        -6.4605, -5.8587, -5.5966, -6.5386, -7.2247, -7.9540, -7.8109, -5.6062,
        -5.7297, -7.1267, -5.3389, -8.0063], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7161, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0575, -6.6698, -5.2396, -8.6497, -6.3464, -5.8876, -5.3897, -7.0947,
        -6.6793, -8.0059, -5.9160, -7.9625, -7.4972, -5.8315, -6.1462, -5.9975,
        -5.6944, -8.1499, -7.3080, -5.8830], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6703, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2153, -8.3025, -7.1601, -6.3326, -5.9683, -6.6121, -7.0250, -7.2933,
        -7.7386, -5.9169, -8.0441, -8.2006, -6.5079, -6.5278, -5.7585, -6.2380,
        -8.2804, -7.3805, -6.4614, -6.9263], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9445, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4733, -6.1323, -7.4842, -7.3880, -6.7113, -7.7125, -6.4042, -7.8182,
        -8.0597, -6.3227, -5.8162, -5.7241, -6.1578, -8.0992, -7.2073, -6.2317,
        -5.8914, -5.4264, -5.7530, -7.9165], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7365, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.8133, -6.4959, -8.9908, -8.3268, -6.3771, -6.3897, -5.4624, -5.9344,
        -7.1146, -7.5970, -7.8830, -5.3515, -5.8490, -5.3748, -6.2215, -8.4580,
        -6.8578, -5.9955, -5.5516, -5.2794], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6662, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6208, -8.1458, -7.5236, -6.4276, -5.4654, -5.3937, -5.7374, -6.8933,
        -7.2463, -7.7986, -5.2686, -5.7086, -6.5299, -6.2536, -7.8906, -7.9228,
        -4.9803, -5.9595, -5.1707, -5.4315], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4184, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6478, -5.8729, -6.6429, -7.9461, -7.2351, -5.8228, -5.1686, -5.9204,
        -6.5737, -7.7846, -7.6751, -5.5887, -5.5994, -5.6953, -6.4135, -8.6276,
        -7.4840, -6.2789, -6.5991, -6.0226], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5300, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5199, -7.4573, -7.7849, -7.4607, -5.3902, -6.2422, -6.4467, -6.3870,
        -7.6353, -7.0545, -7.5299, -6.0975, -6.1732, -6.9670, -6.2571, -7.1663,
        -6.7488, -7.5408, -6.3603, -8.8811], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9550, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4595, -7.5597, -6.7001, -5.2701, -5.5913, -5.9153, -8.3145, -7.9358,
        -5.7728, -6.0719, -6.1555, -5.9538, -7.9526, -7.6444, -6.6016, -5.9221,
        -9.0642, -7.4593, -7.6121, -7.2563], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9107, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.9149, -7.5434, -6.4952, -7.8383, -8.0303, -7.2395, -5.0929, -6.3430,
        -7.9404, -8.3014, -7.5518, -6.4779, -5.5002, -6.1848, -6.9056, -7.6833,
        -7.4184, -5.7828, -5.6865, -5.6526], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8792, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0849, -7.1428, -8.1792, -7.4744, -8.3993, -7.3594, -5.8494, -6.4326,
        -6.2681, -5.8146, -5.9736, -8.2173, -6.9466, -6.1558, -6.1506, -5.1895,
        -6.2256, -7.0637, -7.7387, -7.4418], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8554, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0269, -6.9615, -8.3807, -6.9918, -5.9103, -6.0153, -7.7461, -6.8759,
        -6.0575, -8.1128, -7.5314, -8.7766, -8.7252, -6.7843, -6.5035, -5.4927,
        -5.3541, -8.3054, -6.9411, -5.6893], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9091, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4407, -7.3033, -7.9612, -5.9426, -6.3905, -5.9755, -6.4876, -8.0223,
        -8.1103, -5.5632, -5.7754, -5.4053, -7.2311, -7.7888, -7.5049, -6.1989,
        -6.0093, -5.3568, -6.6806, -7.7800], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6964, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8626, -7.1816, -7.6802, -5.7687, -5.2981, -5.5448, -6.5410, -7.8127,
        -8.0020, -6.2955, -5.3280, -5.2714, -6.5685, -7.7379, -7.6907, -5.7459,
        -5.9225, -5.5568, -6.6264, -8.5045], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5970, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3489, -5.3766, -4.9874, -5.8173, -7.2049, -7.3602, -6.3789, -5.7326,
        -5.7294, -6.4191, -8.3747, -8.0275, -6.0855, -7.1265, -5.9733, -7.1346,
        -7.8452, -7.6503, -5.9520, -6.2465], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5886, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.6154, -8.0055, -6.2112, -5.8647, -5.5473, -6.6697, -7.8508, -7.8246,
        -6.3326, -5.6014, -6.5835, -5.8175, -7.2250, -7.8830, -8.0832, -5.9086,
        -5.7780, -6.0674, -5.4041, -7.9591], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7116, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3483, -6.7393, -7.8671, -7.6550, -5.6832, -5.8324, -5.8934, -6.6935,
        -6.5073, -7.5282, -8.1038, -5.7671, -5.6609, -5.8750, -5.8014, -7.7093,
        -8.4062, -6.1335, -6.0196, -5.8193], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5522, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9314, -5.7327, -6.2712, -7.9942, -7.1557, -6.5850, -6.4431, -7.0024,
        -7.6366, -7.1478, -8.0405, -6.2292, -8.5385, -8.3215, -5.7147, -5.9717,
        -5.2529, -6.1199, -8.4219, -7.0280], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8769, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1846, -5.8931, -6.5542, -6.8972, -6.3677, -7.6991, -7.8246, -5.9561,
        -5.8274, -6.2807, -6.4703, -7.9526, -7.7517, -5.6890, -5.5132, -5.5716,
        -6.1166, -7.0882, -7.3966, -7.5028], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6269, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9088, -8.6733, -8.2978, -6.0651, -6.1763, -5.7179, -6.8144, -7.8400,
        -6.8075, -5.7711, -7.0258, -5.7656, -5.9226, -7.3580, -6.8106, -8.0504,
        -6.6087, -8.5575, -8.4623, -5.7738], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9204, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7216, -5.2810, -6.0390, -6.4479, -6.7846, -7.4482, -7.6154, -5.4494,
        -5.7763, -5.2290, -6.1217, -8.3961, -7.3252, -5.5830, -6.8188, -6.2618,
        -7.1683, -6.7426, -7.9388, -6.7483], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5448, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3475, -7.3252, -6.0196, -6.3100, -7.7040, -6.3560, -6.0981, -7.4806,
        -6.1195, -7.6611, -7.7698, -5.3094, -6.9165, -6.2269, -7.1144, -7.7876,
        -6.8437, -7.4821, -5.9557, -4.8994], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7363, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4693, -7.6600, -5.9525, -6.1368, -5.5998, -5.8378, -8.1144, -7.4375,
        -6.2941, -6.4327, -6.7991, -6.7681, -6.6510, -6.1898, -7.2755, -8.1646,
        -6.8750, -6.2161, -5.6097, -7.1695], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7327, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5430, -7.3472, -6.0687, -6.2578, -5.1495, -6.2742, -7.8454, -7.5808,
        -5.4833, -5.7972, -5.3914, -6.7109, -8.5616, -7.5028, -6.3175, -6.2004,
        -8.6662, -6.1732, -8.1439, -6.0227], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7519, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9420, -5.9280, -6.9778, -8.0333, -7.0361, -6.6150, -5.6872, -6.0970,
        -7.0135, -7.6184, -7.2829, -5.9477, -6.1741, -5.5525, -6.4247, -7.7186,
        -7.5412, -6.3167, -5.2209, -5.2374], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5183, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5581, -6.0743, -6.9990, -7.4833, -7.9571, -4.6090, -5.5885, -5.6870,
        -6.4356, -7.8165, -7.4809, -6.1182, -5.5125, -6.4859, -7.2950, -6.7345,
        -8.0384, -6.1448, -8.2830, -7.8520], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7077, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3993, -5.8982, -5.4931, -5.1821, -6.1088, -8.3100, -6.8990, -6.6003,
        -5.6070, -5.2283, -5.6509, -7.5083, -7.4370, -5.6318, -5.2831, -5.8838,
        -6.3696, -6.6741, -7.2793, -7.7006], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4072, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5588, -5.3554, -5.5140, -5.4935, -6.1002, -6.8223, -7.7769, -5.6452,
        -5.8299, -6.5267, -6.6506, -7.5860, -7.3687, -7.8411, -6.1310, -5.6448,
        -5.2122, -6.0137, -8.1976, -7.3068], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5288, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5498, -6.5445, -4.9809, -5.2367, -5.8708, -7.2398, -6.9360, -7.3427,
        -4.9311, -5.3923, -5.9944, -5.7836, -6.5796, -7.5006, -7.6915, -5.3302,
        -5.3360, -5.0936, -5.2408, -7.3929], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1984, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3855, -6.7727, -9.7543, -7.8881, -7.5711, -7.2244, -6.2322, -6.5357,
        -8.3580, -7.5378, -5.8598, -5.6063, -5.2489, -5.1339, -7.9698, -8.0761,
        -5.9441, -5.7891, -5.4825, -5.9321], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7651, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2702, -5.7379, -5.2349, -6.4343, -8.3419, -7.4383, -6.8445, -5.8646,
        -4.8745, -6.2745, -6.6928, -7.9854, -7.7614, -4.4304, -5.3343, -4.9773,
        -7.1724, -8.2484, -6.4509, -5.9209], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4645, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3955, -7.5869, -7.1684, -5.6072, -5.5036, -5.0370, -5.8348, -8.0265,
        -7.8523, -5.4154, -6.4737, -6.1597, -5.4949, -6.7229, -7.4737, -7.4665,
        -5.6843, -5.5050, -6.1797, -5.4440], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3516, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.6558, -5.9719, -6.4554, -5.6915, -5.6352, -6.8959, -7.5164, -7.3142,
        -6.2826, -5.2951, -5.0126, -6.3016, -7.6487, -7.0871, -6.3281, -4.9608,
        -5.1752, -5.6327, -6.7873, -6.9502], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3299, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0167, -6.6301, -8.0819, -6.9192, -6.9186, -5.7847, -8.6022, -7.4662,
        -7.0638, -7.1451, -7.9991, -6.8913, -7.3498, -7.6837, -7.0789, -6.5825,
        -5.8468, -7.7044, -8.4644, -6.5723], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0901, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3418, -7.5008, -7.6809, -5.3695, -5.5108, -5.0463, -6.3919, -8.1949,
        -6.9474, -5.9189, -7.1569, -8.8087, -5.4952, -7.4537, -5.7587, -8.2280,
        -8.0715, -5.5659, -6.0205, -6.2230], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7343, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7949, -6.0069, -6.0414, -5.5425, -5.8836, -8.2391, -6.8666, -7.0345,
        -6.6243, -5.9580, -6.4382, -7.5100, -6.5587, -8.0691, -6.3618, -7.8977,
        -7.6324, -5.1470, -5.6464, -5.3636], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6308, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4892, -6.0471, -5.5281, -6.8262, -7.8985, -7.5220, -5.1239, -5.4621,
        -6.0168, -5.8927, -8.3950, -7.2402, -6.2576, -5.4597, -6.3558, -5.6551,
        -6.4563, -7.5933, -7.9509, -5.5795], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4375, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2672, -5.1279, -6.6023, -6.5093, -6.8769, -7.3598, -7.2914, -5.9354,
        -6.0955, -5.5886, -6.0239, -7.6159, -7.8752, -4.9184, -5.7891, -5.3877,
        -6.1485, -8.4368, -7.1874, -6.6285], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4833, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4471, -8.2403, -7.4040, -6.4632, -5.5962, -4.5196, -5.9629, -7.3769,
        -7.4270, -6.8464, -6.6060, -6.6904, -6.1078, -7.1276, -7.1995, -7.3402,
        -5.1573, -5.7010, -5.2558, -5.6635], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4566, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1413, -6.7861, -8.4493, -6.8477, -5.9206, -5.3249, -8.7333, -6.6353,
        -7.0058, -7.8017, -5.1303, -7.4961, -7.9685, -5.9743, -6.0949, -6.2664,
        -5.5732, -6.2920, -7.5595, -7.6328], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7317, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1016, -7.6761, -6.0911, -5.7952, -5.6255, -6.2200, -7.9176, -7.6717,
        -6.0076, -5.5099, -4.9208, -5.4703, -6.0219, -7.2321, -7.8279, -6.1753,
        -5.0893, -5.2857, -6.1297, -7.3051], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3537, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6726, -5.6488, -5.0873, -6.2721, -6.4355, -7.6355, -7.6925, -6.1933,
        -5.6253, -5.5207, -5.0690, -7.9778, -8.2276, -6.4521, -5.9669, -5.8943,
        -6.1414, -6.9521, -8.2363, -7.9378], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5319, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9286, -7.6291, -6.9941, -7.2171, -5.5740, -6.1999, -7.2962, -7.1052,
        -7.6786, -5.9667, -8.8147, -8.3356, -6.1267, -5.5286, -6.0566, -5.9246,
        -6.4134, -8.3141, -7.0385, -5.9036], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8023, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9928, -6.5008, -4.9852, -5.8243, -5.8142, -6.7549, -8.1315, -7.5594,
        -6.1866, -5.5809, -5.1818, -6.1656, -8.2239, -7.0427, -6.6193, -5.3109,
        -4.9450, -5.9400, -6.5301, -7.7232], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4007, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5497, -5.4968, -5.4566, -4.9835, -5.7128, -7.9403, -8.0815, -6.0889,
        -5.4705, -6.8164, -6.1758, -6.7541, -7.6483, -7.8551, -5.4962, -5.8699,
        -5.7122, -5.5761, -7.7383, -7.5602], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4992, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2242, -5.1011, -6.2941, -6.4729, -7.8501, -7.9501, -5.3875, -5.6728,
        -5.1009, -6.4175, -8.3187, -7.2136, -6.5951, -6.1427, -5.9549, -6.7689,
        -8.0454, -7.7719, -4.9416, -5.2348], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4229, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3077, -5.6068, -5.2744, -5.8393, -6.7692, -7.1093, -7.2828, -7.3083,
        -5.2257, -5.3881, -4.9389, -6.4465, -8.1773, -7.2328, -5.7179, -5.0605,
        -4.9755, -5.5979, -6.7860, -7.7274], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2886, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.6464, -5.8625, -7.5959, -5.7769, -8.5819, -8.0105, -6.2033, -6.3427,
        -5.0563, -5.7306, -8.2187, -7.6180, -6.1391, -7.2870, -5.7191, -5.8783,
        -7.5574, -8.0410, -7.6519, -7.5408], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9229, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4362, -5.5031, -6.4513, -7.7155, -7.5529, -6.0556, -5.1775, -5.4353,
        -6.9090, -6.8468, -7.4183, -7.4854, -5.9974, -4.6568, -5.2451, -5.5732,
        -7.2168, -7.4593, -5.8657, -5.7416], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2871, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5554, -7.1435, -6.3041, -8.4934, -6.0801, -8.7115, -8.6618, -6.1417,
        -6.8990, -5.3471, -6.6077, -7.9016, -6.7902, -6.2416, -5.5844, -7.3522,
        -7.4533, -6.2478, -8.3109, -6.3131], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9570, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3185, -6.0673, -7.4870, -7.2811, -6.7720, -5.2967, -6.9009, -6.4866,
        -8.5189, -5.9932, -8.3052, -8.2965, -6.1734, -6.5920, -6.1982, -8.5490,
        -7.8249, -7.2670, -8.4321, -7.8593], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1310, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3803, -5.4848, -5.1048, -5.5276, -7.0817, -7.3673, -5.4502, -6.0470,
        -8.5543, -6.6861, -6.8081, -5.9747, -7.4789, -7.8763, -5.6747, -6.0141,
        -5.8197, -4.7372, -6.1989, -7.7609], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3514, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.2267, -7.7640, -5.5365, -7.1144, -5.0813, -6.7883, -7.6334, -7.2182,
        -6.3840, -5.8463, -4.6083, -6.8025, -7.5514, -7.3638, -5.8465, -5.2533,
        -4.7324, -6.0894, -7.2128, -7.6900], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5372, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5711, -4.9971, -5.9731, -5.1820, -6.6380, -7.8067, -7.3034, -5.6817,
        -5.6080, -4.8334, -6.1173, -8.4067, -7.2878, -5.6848, -4.9792, -7.7965,
        -6.3042, -8.5379, -7.0951, -8.6413], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6223, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2927, -5.5806, -5.0493, -5.8212, -7.6199, -7.4067, -5.3811, -5.7364,
        -4.9138, -6.7202, -7.8716, -7.7223, -5.4989, -5.4640, -4.8849, -5.3411,
        -8.1140, -8.0832, -7.6207, -6.0640], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3593, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9718, -7.2080, -5.1352, -5.7008, -5.6678, -5.3071, -6.3973, -7.8099,
        -6.9442, -6.2172, -5.3332, -6.6749, -6.1581, -7.3926, -8.1961, -7.4971,
        -7.0361, -6.3100, -5.6761, -7.7579], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5696, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0993, -6.9192, -7.0459, -7.2430, -6.1493, -5.6186, -5.9062, -5.7145,
        -6.9610, -7.9786, -7.5803, -5.2016, -5.3485, -5.1290, -5.6958, -7.0898,
        -7.7161, -5.4208, -5.9145, -5.6826], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3207, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4429, -7.9972, -7.2464, -6.2777, -5.5869, -6.1468, -6.3383, -6.8212,
        -8.0219, -7.8150, -5.4305, -5.9220, -5.9442, -6.0034, -6.9120, -7.7795,
        -7.5938, -6.4786, -5.5336, -5.0585], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5675, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.0054, -7.8871, -6.2284, -6.2315, -8.6920, -6.1495, -7.0260, -7.1160,
        -7.5452, -5.5249, -5.7931, -6.4605, -5.9469, -6.7396, -7.6832, -7.6847,
        -6.7179, -4.8964, -5.0803, -6.2510], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6830, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1166, -6.0538, -5.5858, -5.1151, -5.4075, -7.8951, -7.7960, -5.7081,
        -5.6340, -5.2635, -5.6344, -7.7974, -7.4106, -6.1961, -5.6989, -5.6764,
        -6.1634, -6.4080, -7.3192, -7.7620], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3821, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3175, -5.3299, -5.3031, -7.8743, -8.4499, -6.2187, -5.6253, -5.3856,
        -5.1591, -7.7252, -7.8203, -6.1740, -5.7797, -5.6167, -5.5384, -7.1536,
        -8.1730, -7.5462, -6.1343, -5.9111], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4118, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2646, -6.0232, -6.2240, -6.0644, -7.1844, -7.6888, -5.8765, -5.2111,
        -4.8863, -6.5534, -7.7984, -7.3012, -6.2184, -5.3482, -6.3277, -5.8084,
        -7.2031, -7.5955, -7.6439, -5.3411], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4281, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.8218,  -8.2828,  -7.3005,  -6.6988,  -7.0065,  -6.0198,  -6.7939,
         -6.5225,  -7.4332,  -7.2360,  -5.1616,  -5.5759,  -5.5863,  -6.6260,
         -7.9787,  -7.4188,  -6.7389,  -5.8082, -10.7784,  -7.4482],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.9618, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1379, -8.4939, -6.9784, -6.2649, -6.7221, -9.5019, -6.6481, -7.5320,
        -6.3495, -7.2781, -8.2070, -5.4400, -5.8797, -5.0581, -6.2776, -7.9247,
        -7.2822, -6.3227, -6.0451, -8.2279], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9786, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.9069, -6.3398, -5.9203, -5.1774, -6.8690, -8.1262, -6.6563, -6.4965,
        -5.6425, -5.0069, -5.5724, -7.1897, -7.2203, -5.9279, -5.6581, -5.6583,
        -6.1207, -6.8704, -7.1700, -7.3929], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4461, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3920, -5.0743, -6.8485, -8.3734, -6.7274, -5.6595, -5.5260, -5.3681,
        -6.0675, -6.8740, -7.3797, -7.3492, -6.2105, -5.3514, -5.2132, -6.3013,
        -7.7205, -7.2806, -5.0724, -5.4446], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2617, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0803, -6.2510, -6.8934, -7.6318, -7.5163, -5.8364, -5.0839, -5.7874,
        -7.0933, -7.8163, -7.1882, -7.4491, -6.4600, -4.8584, -6.8082, -7.7276,
        -7.0874, -6.0158, -5.2302, -6.8649], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5340, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1183, -5.5791, -6.5611, -7.6165, -7.9503, -5.1616, -5.2282, -4.9957,
        -5.4402, -7.3520, -7.6392, -5.3193, -5.8716, -5.3671, -5.0917, -7.6563,
        -8.1778, -5.7266, -5.5817, -5.4068], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1421, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0555, -7.3408, -5.5926, -5.3927, -5.1330, -6.4022, -8.2824, -6.6637,
        -6.6066, -7.1382, -8.1005, -7.2371, -7.0775, -7.5548, -5.4127, -6.7278,
        -7.6855, -5.9203, -6.4388, -4.9793], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6371, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.2051, -7.6465, -6.3010, -5.4706, -5.3108, -5.8956, -7.7789, -7.7721,
        -5.5557, -5.3421, -6.2957, -6.1199, -6.6821, -8.2327, -7.1598, -6.1631,
        -5.6000, -6.3639, -5.3339, -6.9402], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5085, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8841, -5.8230, -7.8408, -6.9179, -7.1119, -7.3135, -6.1657, -5.8677,
        -6.4839, -7.3044, -7.6050, -6.3104, -5.3991, -6.1568, -7.0728, -8.0389,
        -7.2382, -5.7426, -5.2424, -5.0015], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4760, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4863, -6.7893, -5.2627, -5.7574, -6.5316, -7.1361, -8.3706, -6.0074,
        -8.7399, -7.2852, -5.8208, -6.4305, -4.9554, -5.8226, -8.0974, -6.4086,
        -6.3318, -5.7938, -9.5037, -6.9380], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6735, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2793, -7.8513, -7.9971, -5.9803, -6.3130, -5.6131, -5.5272, -6.8188,
        -7.5109, -7.5028, -5.9018, -5.2754, -4.9855, -5.7266, -7.4414, -7.2229,
        -5.5152, -5.6908, -5.0756, -6.5735], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3401, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3771, -5.6275, -8.2043, -8.4883, -5.7629, -5.9300, -6.2442, -6.2564,
        -6.0480, -7.4437, -7.4517, -6.2461, -5.2585, -5.0933, -5.7907, -7.8096,
        -7.2007, -6.0866, -5.7707, -5.2240], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4657, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7117, -6.0182, -5.3307, -5.1343, -8.4237, -7.1662, -6.4939, -7.0861,
        -5.5026, -5.6373, -5.9053, -7.0007, -7.9926, -7.4190, -7.8571, -6.1499,
        -6.3763, -6.5191, -7.7766, -7.0274], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6764, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.2205, -6.6606, -6.1309, -7.2859, -7.8794, -6.9052, -7.7607, -6.9578,
        -7.2357, -5.7834, -8.3749, -7.5930, -6.4076, -5.9653, -5.5582, -6.3157,
        -8.3266, -7.3092, -6.0821, -4.9073], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8830, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4812, -6.6562, -7.0882, -7.6274, -5.4725, -5.1267, -5.2385, -6.4662,
        -7.6724, -7.5847, -6.1361, -4.9595, -5.3702, -6.3084, -6.6972, -7.1575,
        -7.6602, -5.7854, -5.2015, -4.9305], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2810, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8886, -6.4696, -5.6053, -5.7973, -8.0161, -6.8980, -6.5730, -7.8014,
        -8.5842, -7.0533, -6.8826, -7.3554, -5.7787, -7.7157, -8.1244, -6.1552,
        -6.3337, -5.6212, -6.3960, -8.2939], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8672, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0890, -5.8334, -8.3046, -7.1601, -5.5632, -7.2616, -6.3618, -6.4351,
        -6.6470, -6.8910, -8.0081, -6.2488, -8.7358, -7.2549, -5.4466, -6.7282,
        -5.8640, -6.5185, -8.3376, -6.9636], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7827, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7772, -8.1244, -5.6104, -7.4258, -6.2452, -8.0435, -6.8229, -7.9803,
        -7.9726, -6.1311, -5.6375, -4.9142, -6.5616, -6.4725, -7.2497, -7.4665,
        -6.0342, -5.2111, -4.7623, -5.9521], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5197, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8093, -7.5098, -6.1683, -5.6449, -7.8582, -7.4668, -7.4779, -7.8374,
        -5.8262, -8.1715, -8.1401, -5.5231, -5.8003, -5.4456, -4.7394, -8.0562,
        -7.9370, -5.7659, -5.4011, -5.0440], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6311, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7824, -7.2860, -7.0374, -5.5068, -6.4603, -6.4560, -7.3313, -7.6543,
        -7.7602, -5.9649, -5.6836, -5.6189, -9.0766, -8.0561, -6.7314, -6.0456,
        -9.1340, -6.3489, -7.4397, -6.6967], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0035, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7998, -5.8308, -5.6316, -5.1362, -8.2837, -7.2050, -5.9575, -6.5428,
        -5.8870, -8.0973, -7.3662, -8.3394, -6.3533, -8.1704, -7.5269, -6.8615,
        -5.4842, -6.1248, -6.4984, -6.6192], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6858, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8161, -6.2024, -6.7459, -7.3796, -7.4878, -5.8235, -5.8238, -4.8891,
        -5.8105, -7.6695, -7.3771, -5.5383, -5.8283, -5.1829, -6.7496, -8.0391,
        -7.2055, -6.0989, -5.6831, -4.8717], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3611, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6455, -5.2038, -5.5592, -5.3873, -7.9954, -6.7997, -5.9581, -6.0132,
        -4.9043, -6.5189, -6.3482, -7.1322, -7.3201, -6.8363, -5.1318, -6.2164,
        -6.4476, -6.8447, -7.4584, -7.3369], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3529, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2414, -5.3519, -6.9857, -7.7477, -7.5507, -5.6283, -5.3106, -7.0275,
        -5.7528, -6.6483, -7.8081, -7.5531, -5.1952, -5.3465, -4.7984, -6.2404,
        -8.1370, -6.8662, -6.2873, -6.4310], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4454, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2472, -7.6564, -5.2672, -5.9595, -6.0321, -5.3542, -6.7517, -8.2179,
        -7.0516, -5.3306, -5.1533, -6.6883, -6.2264, -6.6078, -7.3319, -7.4955,
        -5.9243, -6.2341, -4.9842, -6.1897], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3852, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9394, -5.3520, -5.9207, -7.0665, -7.3280, -7.2875, -5.2174, -5.7428,
        -4.6268, -6.2999, -8.0123, -7.4749, -5.9751, -5.2348, -4.8888, -5.7335,
        -7.2805, -7.5911, -7.5881, -5.4426], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3001, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.3159, -7.3927, -6.3032, -7.4760, -7.7548, -6.0350, -6.9966, -7.3024,
        -7.4053, -7.7749, -5.9033, -8.5878, -6.8302, -6.1707, -6.1698, -4.9723,
        -6.1536, -8.1188, -7.4282, -5.4826], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9287, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3620, -7.1570, -7.3836, -7.7754, -7.0444, -6.1076, -5.5223, -7.0232,
        -8.2594, -7.4617, -6.0456, -5.8001, -9.4065, -7.5747, -7.6173, -7.8174,
        -5.5872, -8.0665, -8.2953, -6.1149], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1711, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7340, -5.5282, -7.6486, -7.4954, -7.4736, -7.8582, -5.8072, -7.9716,
        -7.6231, -5.4873, -5.7125, -5.1516, -5.6217, -6.3455, -8.0005, -7.7569,
        -4.4601, -6.0235, -5.2418, -6.9258], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5434, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6499, -6.7417, -8.4046, -6.9951, -6.0250, -6.4024, -7.9397, -6.8977,
        -8.5739, -6.2234, -7.4620, -7.1731, -6.8733, -5.3747, -5.9857, -6.5246,
        -6.6380, -7.7454, -7.5915, -5.2712], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7746, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6107, -7.2344, -7.6509, -5.6931, -5.4544, -6.4412, -5.6073, -5.8294,
        -7.5634, -6.8095, -6.2634, -5.1708, -6.5349, -5.9955, -6.9068, -7.9385,
        -5.9956, -6.4299, -5.7986, -5.8923], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3910, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1482, -5.9300, -7.5493, -7.4492, -5.5348, -5.5581, -5.4207, -5.4855,
        -8.2014, -8.0812, -6.1454, -5.3379, -6.4734, -5.8365, -6.8147, -7.3530,
        -7.4831, -5.5790, -6.0780, -6.2718], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3866, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.1727,  -6.5518, -10.2627,  -5.4067,  -7.7551,  -5.8567,  -8.1618,
         -8.4332,  -5.7003,  -6.0606,  -6.1746,  -5.0276,  -5.9742,  -7.6294,
         -7.1018,  -5.6215,  -5.2489,  -5.3836,  -6.2908,  -7.6868],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.6250, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.2525,  -7.4718,  -7.8329,  -7.1896,  -6.0744,  -4.7759,  -7.7759,
         -8.2421,  -6.7595,  -6.0718,  -5.1087, -11.7464,  -6.0679,  -7.5951,
         -5.8747,  -6.3928,  -7.7390,  -6.2030,  -7.1751,  -5.4604],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.8905, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.2778, -8.1449, -5.9168, -7.1457, -6.4969, -6.8049, -7.4644, -7.5278,
        -7.9164, -6.2558, -6.5714, -5.4002, -5.8416, -7.3351, -6.5640, -8.3348,
        -5.6194, -8.3321, -8.3739, -5.6553], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0490, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1861, -8.2149, -7.1045, -5.8872, -6.9158, -4.9827, -5.8579, -6.8238,
        -7.8861, -7.5537, -5.5150, -5.6453, -5.2329, -6.7032, -7.5142, -7.3767,
        -5.9630, -5.6494, -6.6546, -6.5072], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5087, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5489, -5.8417, -5.2820, -5.0457, -6.0999, -8.3470, -7.2285, -5.6106,
        -5.2840, -4.9575, -6.5255, -7.7068, -7.1468, -6.4698, -5.2234, -5.2107,
        -6.6900, -7.2363, -7.1965, -5.7837], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3218, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.6984, -6.4609, -5.0942, -6.7448, -8.0485, -7.3681, -6.1070, -5.6779,
        -6.6666, -5.4261, -7.0293, -7.6516, -7.3103, -5.5281, -5.9145, -6.0178,
        -6.9107, -8.2177, -7.3472, -6.2888], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6754, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6054, -5.7500, -5.8260, -6.1707, -7.5562, -7.6666, -5.6578, -5.5939,
        -5.7364, -6.1870, -7.0697, -7.4967, -7.3231, -5.4215, -6.0350, -5.0363,
        -6.0163, -7.7168, -8.1694, -6.7518], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4393, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6373, -7.6086, -7.8985, -5.3959, -5.3616, -5.3475, -5.6148, -7.8590,
        -7.3078, -6.1984, -5.9123, -4.8148, -5.9285, -7.1914, -7.6150, -7.6430,
        -5.5838, -5.3251, -7.0345, -5.7159], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3497, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9848, -4.7426, -6.8452, -8.1141, -6.8079, -7.0216, -6.2243, -8.6178,
        -6.3548, -7.0928, -7.4511, -7.8286, -5.0102, -6.3329, -7.8317, -7.5563,
        -6.3342, -5.2034, -5.0041, -5.9308], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6145, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.8115, -5.5204, -6.7750, -5.3379, -5.8727, -7.5940, -7.9774, -7.9153,
        -6.2608, -4.8802, -5.8197, -5.7046, -6.6189, -7.4008, -7.9142, -5.9496,
        -5.2416, -5.2599, -6.2280, -7.8165], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4950, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9870, -8.0579, -7.4317, -6.8874, -5.3075, -6.4991, -5.5233, -7.2076,
        -7.4780, -7.4074, -5.5743, -5.5912, -8.8015, -8.2467, -7.1372, -7.6011,
        -5.8113, -8.1453, -7.3696, -5.9180], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8992, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2849, -5.2147, -5.1944, -4.8732, -6.4606, -6.9269, -7.3650, -7.2822,
        -5.4241, -5.4786, -5.0986, -6.1105, -8.1404, -7.2282, -5.8916, -5.4793,
        -5.1710, -6.0014, -6.4042, -7.5340], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2282, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.1498,  -5.7496,  -5.7659, -10.1766,  -5.3970,  -8.3923,  -5.7886,
         -8.1082,  -8.3692,  -5.9574,  -6.1372,  -5.0493,  -5.6646,  -7.6175,
         -7.7054,  -5.2443,  -6.0507,  -5.0073,  -6.1788,  -8.1315],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.6820, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4071, -4.9603, -6.2473, -7.2451, -7.1111, -5.6168, -5.3580, -5.1268,
        -6.4839, -7.9330, -7.5846, -5.7925, -6.1109, -6.5679, -5.9432, -7.0495,
        -7.5697, -7.5357, -6.3355, -5.4888], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3734, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4050, -8.3844, -6.7631, -5.8251, -7.3826, -7.6259, -7.7048, -7.5542,
        -6.8999, -6.5934, -8.5596, -8.3378, -6.1634, -6.1325, -4.8824, -7.2065,
        -7.5936, -7.4464, -6.6702, -5.8688], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0500, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4116, -5.3536, -6.4440, -7.5173, -7.8195, -7.7965, -6.2810, -5.4994,
        -5.3891, -6.5925, -6.9204, -7.9235, -7.4903, -5.1651, -5.4724, -5.2041,
        -5.2972, -8.4333, -7.5591, -5.8319], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4701, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4007, -6.4803, -6.3370, -7.9093, -8.0086, -4.4947, -5.8409, -5.1354,
        -6.2821, -8.2950, -6.7438, -6.3367, -5.3605, -4.8655, -5.8937, -7.5542,
        -7.6375, -5.5041, -5.4279, -5.6406], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2574, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6177, -7.5614, -7.6452, -5.9332, -5.3804, -6.8534, -5.8606, -7.2468,
        -8.0155, -7.6909, -5.8672, -6.0044, -5.2676, -6.3745, -6.3843, -7.2519,
        -7.8519, -5.9291, -5.1646, -5.3648], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5133, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4258, -8.3276, -8.5505, -6.3857, -8.7155, -7.6158, -4.8915, -6.6010,
        -6.2912, -5.1116, -5.8995, -6.9433, -7.2907, -6.4540, -5.7454, -9.4863,
        -7.7236, -8.2631, -6.0870, -8.7252], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0267, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8987, -5.7281, -7.1169, -7.9347, -7.4704, -5.5821, -5.4134, -5.1006,
        -6.4737, -8.1032, -7.1569, -5.9246, -5.6170, -4.8920, -6.3622, -7.6535,
        -7.2027, -6.1239, -5.3424, -5.1308], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2614, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0217, -8.0347, -7.9952, -5.6971, -6.1838, -5.6585, -6.2196, -8.3830,
        -7.1522, -7.0595, -5.5239, -8.7006, -6.8543, -7.1214, -7.8480, -5.7487,
        -7.8363, -7.6224, -5.9520, -5.7288], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8671, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.2967, -6.7486, -5.4521, -6.6887, -5.7794, -6.8320, -7.6235, -5.9390,
        -6.4813, -7.3841, -5.8808, -7.4103, -6.5995, -8.1565, -5.8564, -9.5815,
        -8.3673, -5.7191, -6.6322, -5.1524], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8291, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.9216, -5.9597, -8.3073, -8.4813, -5.5046, -6.1910, -5.4368, -6.2585,
        -8.1075, -7.0781, -6.4385, -6.9271, -6.8909, -7.0097, -7.4770, -7.4240,
        -6.3077, -8.1532, -8.2906, -6.0940], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.0130, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9142, -6.5616, -6.4725, -7.2497, -7.4665, -6.0342, -5.2111, -4.7623,
        -5.9521, -7.4605, -7.3310, -5.9193, -5.8826, -6.9058, -5.3236, -8.0446,
        -7.8116, -5.9447, -5.7591, -5.3024], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3155, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.9538, -5.0380, -5.8240, -5.5650, -5.0893, -8.2721, -7.4029, -5.9299,
        -5.5898, -5.6451, -6.2616, -8.0072, -7.5702, -5.1333, -6.1862, -5.0531,
        -6.9637, -8.3531, -7.0090, -5.9140], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4381, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1868, -7.2571, -6.3643, -5.7754, -5.0163, -5.8162, -7.4000, -7.2662,
        -6.3874, -6.6043, -8.1006, -5.8207, -7.7679, -5.8758, -8.2444, -8.3249,
        -5.4114, -5.8183, -5.1328, -5.7879], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5679, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5009, -6.2630, -5.8474, -6.2300, -7.4656, -7.9714, -5.4380, -5.2623,
        -4.9723, -6.3935, -8.1755, -6.8475, -5.7828, -5.3659, -4.2802, -6.3351,
        -7.2875, -7.2201, -5.5761, -5.7595], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1987, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1781, -6.2033, -7.8072, -7.4599, -5.5342, -5.7586, -5.4391, -5.7207,
        -7.9320, -7.6988, -6.0127, -5.8574, -7.4622, -5.7139, -6.6377, -7.0437,
        -7.2544, -5.2920, -5.2907, -5.0227], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3660, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2047, -6.5079, -7.5418, -7.4578, -5.9969, -5.3504, -5.2020, -6.9856,
        -7.8173, -7.1021, -5.5843, -4.8622, -5.6343, -5.9500, -7.7574, -7.3934,
        -6.9473, -6.4680, -5.5007, -6.4816], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3873, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9249, -6.2720, -6.0941, -7.0218, -7.1492, -8.0962, -6.8483, -7.8750,
        -7.8514, -6.2543, -5.8500, -4.8087, -6.3799, -7.7665, -7.3317, -6.3878,
        -6.1818, -5.4416, -6.8192, -7.9069], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7131, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3234, -5.4175, -6.7240, -6.8436, -7.4192, -7.6382, -5.5373, -5.4330,
        -4.9019, -7.0287, -8.4271, -6.5554, -6.3753, -6.1356, -6.3309, -6.4788,
        -7.5273, -7.1711, -6.7048, -8.6488], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6311, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.0332, -5.9197, -5.5707, -5.4575, -5.5896, -7.7068, -7.5440, -6.2630,
        -5.6370, -6.3041, -5.6359, -8.3227, -8.0993, -6.5231, -5.6104, -7.9073,
        -6.7551, -6.4610, -7.4654, -7.7532], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7279, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2897, -7.5101, -4.9656, -5.7723, -5.0088, -6.3107, -8.3390, -6.9602,
        -6.8383, -5.5084, -9.4687, -7.2724, -7.6524, -7.7593, -5.9359, -8.2663,
        -7.7417, -6.3561, -5.5981, -5.0307], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7792, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.8193, -7.3758, -6.4556, -5.4439, -5.2328, -5.6983, -6.9088, -7.3721,
        -7.6040, -5.1260, -5.2022, -4.8076, -5.2711, -7.6287, -7.5660, -5.8670,
        -5.4706, -5.4996, -6.1583, -6.9309], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2719, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3722, -5.4052, -7.0815, -8.2821, -6.8934, -6.3985, -6.2943, -5.1279,
        -6.2804, -7.9556, -8.5314, -6.0755, -8.1936, -8.2229, -5.9284, -6.3303,
        -5.2325, -6.2597, -8.3740, -6.5160], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7378, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7609, -5.4595, -4.7761, -5.8105, -7.1832, -7.7108, -7.4811, -5.4691,
        -5.3099, -4.7728, -6.9504, -7.8662, -7.1165, -6.8689, -5.6814, -9.9499,
        -7.5939, -7.9365, -7.7910, -5.3205], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6904, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7362, -5.6894, -5.1505, -6.2948, -5.2465, -6.6259, -7.2409, -7.6610,
        -5.2331, -5.6512, -5.1113, -5.8420, -6.4016, -7.3490, -7.0058, -5.9840,
        -5.1352, -5.4827, -6.3596, -8.3358], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2768, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4327, -5.5415, -6.1635, -5.0042, -7.2546, -8.4773, -6.6730, -6.0647,
        -6.3362, -7.0863, -7.7138, -6.9021, -7.5344, -6.0010, -8.1397, -8.0876,
        -5.8315, -5.8466, -5.2276, -6.8271], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7073, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6079, -8.4152, -7.2850, -6.6603, -5.9808, -5.9516, -5.7243, -6.0734,
        -8.2052, -6.9944, -6.4507, -5.5250, -5.4713, -6.3585, -7.4091, -7.2725,
        -6.1344, -5.2913, -5.7196, -6.0972], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4814, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5419, -7.7326, -5.6696, -8.2544, -8.5023, -5.6344, -5.9158, -4.8561,
        -6.0196, -7.7341, -7.0059, -5.7079, -5.7597, -5.8875, -5.8234, -6.8808,
        -7.1623, -7.3774, -6.2922, -5.0549], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5406, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5963, -5.1894, -4.9737, -6.0175, -7.1759, -7.4093, -5.6800, -5.6519,
        -4.7335, -5.3965, -7.8324, -7.7357, -6.1579, -5.8607, -5.3978, -6.2403,
        -6.5398, -7.3043, -7.8764, -5.2559], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2513, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.4043,  -5.0933, -10.1879,  -5.4729,  -7.8801,  -6.1178,  -8.4558,
         -8.3986,  -6.1779,  -5.7978,  -6.5551,  -5.2110,  -6.5739,  -7.6859,
         -7.3450,  -5.4194,  -6.2611,  -6.0827,  -6.1093,  -6.5619],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.6396, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.2506,  -6.6732,  -9.5583,  -5.6694,  -7.9352,  -7.9619,  -6.9564,
         -8.5362,  -7.1899,  -5.7853,  -6.4376, -10.6951,  -7.7013,  -8.0142,
         -6.1255,  -8.7603,  -7.0533,  -6.7937,  -7.4795,  -6.0428],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.3810, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.2486, -6.9316, -6.3100, -7.5094, -5.9540, -6.8771, -8.1249, -7.8040,
        -5.5363, -8.2965, -7.7842, -5.4024, -6.1839, -5.1198, -5.8704, -8.0622,
        -7.1183, -6.3612, -6.3309, -5.1674], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7497, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4704, -6.5433, -5.4131, -6.3034, -8.3411, -7.7147, -6.1958, -5.3496,
        -7.3980, -8.1371, -8.0701, -7.5621, -5.9172, -8.4033, -7.4491, -6.3863,
        -6.7794, -6.0913, -5.6796, -6.8044], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8005, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9477, -7.4095, -6.4919, -5.7009, -7.5823, -7.1036, -6.7574, -6.1536,
        -6.2374, -7.1902, -8.0907, -7.4359, -6.5834, -5.2427, -5.0806, -7.4165,
        -6.4076, -7.9138, -6.5276, -8.5505], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8412, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0496, -5.8630, -6.1075, -6.3877, -7.7745, -7.3862, -6.0467, -5.1993,
        -4.9761, -6.5220, -8.3829, -7.8665, -6.1153, -5.2554, -5.9122, -6.2998,
        -6.9230, -7.1784, -7.6653, -5.5243], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4218, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4955, -6.0530, -5.0497, -6.1071, -8.1411, -7.0660, -6.0081, -6.4189,
        -7.4336, -6.9571, -6.3337, -8.0424, -5.9511, -8.6247, -8.4359, -5.5231,
        -5.8555, -5.5416, -5.8992, -8.2357], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6086, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8117, -7.7610, -7.3123, -7.6747, -4.7177, -5.2720, -4.8960, -7.1339,
        -8.4405, -6.6180, -6.5386, -5.5302, -7.3420, -6.3700, -7.8347, -8.1479,
        -5.5881, -6.4494, -8.0762, -7.7240], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7619, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3307, -5.7461, -7.0034, -7.3414, -7.5386, -5.0959, -5.5015, -5.0584,
        -6.2451, -8.3303, -6.9973, -6.4486, -6.7553, -5.3135, -7.0179, -8.4051,
        -7.7477, -5.8491, -5.2976, -6.0349], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5029, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.9813, -6.3388, -6.4009, -6.5511, -7.8349, -7.6092, -5.6748, -5.3938,
        -5.0651, -5.8072, -8.1108, -7.2797, -5.8568, -6.3456, -5.4875, -5.5320,
        -6.5826, -7.2685, -7.5511, -5.0297], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4851, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5534, -7.2866, -7.0989, -6.4620, -6.1301, -5.0955, -5.8443, -6.1219,
        -7.2871, -7.0801, -6.5731, -5.4833, -5.3360, -6.2220, -8.1443, -6.8998,
        -6.5376, -4.9938, -6.4215, -6.0177], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3794, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1285, -5.6638, -5.2620, -6.8693, -6.0606, -6.6113, -7.3724, -7.6292,
        -5.5095, -5.2981, -6.3959, -5.7775, -6.6381, -7.3871, -7.7546, -5.7601,
        -5.4532, -5.7476, -6.1956, -7.8333], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4174, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8863, -5.0699, -6.0052, -7.0760, -7.0898, -5.8312, -6.0515, -4.9736,
        -5.6344, -8.1403, -7.5965, -5.9110, -5.3262, -5.1828, -6.3193, -7.1271,
        -7.5443, -5.9931, -5.5881, -5.9227], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2135, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5957, -8.0786, -7.5028, -6.0670, -5.2377, -6.0655, -6.1442, -5.8040,
        -7.1940, -6.6448, -6.0486, -6.0062, -9.5004, -5.1351, -7.6458, -5.8696,
        -8.3373, -7.9618, -6.1250, -6.1571], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7061, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.9028, -7.3786, -5.0607, -5.5143, -5.5750, -5.8945, -7.9407, -8.3781,
        -5.7110, -5.4255, -5.3109, -5.6790, -6.5122, -7.3892, -7.9100, -5.1997,
        -5.7294, -5.4733, -5.8124, -8.1669], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3982, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.9090, -6.6033, -5.2826, -5.0205, -6.3988, -7.3466, -7.5411, -6.0733,
        -5.4025, -6.5601, -5.0777, -7.7649, -7.9384, -5.9482, -6.0137, -5.4189,
        -7.0502, -7.7541, -7.1859, -6.3598], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5325, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0424, -8.4510, -9.5306, -6.3964, -6.5425, -8.0829, -5.8525, -8.1164,
        -5.9654, -6.5564, -7.5664, -5.5219, -6.5656, -5.7846, -5.7252, -6.6065,
        -7.6375, -7.4434, -5.9185, -5.6098], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8458, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3340, -5.2411, -5.2313, -6.4069, -7.7151, -6.8361, -6.5108, -5.0669,
        -5.0343, -6.4033, -7.7621, -7.7823, -5.4517, -5.6038, -5.2708, -6.1625,
        -8.1825, -7.3188, -6.0715, -5.8932], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2640, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.0008, -7.9049, -7.6602, -5.3111, -8.0584, -7.8072, -6.0233, -6.0610,
        -6.5405, -5.7956, -6.0449, -7.5861, -7.4327, -6.4085, -5.6390, -6.3141,
        -7.5971, -6.2276, -7.8518, -5.6955], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7980, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5345, -7.0206, -5.1105, -5.4276, -5.6562, -5.3215, -7.0981, -7.5722,
        -7.1369, -5.7434, -5.4465, -5.4378, -6.1245, -6.9058, -6.9846, -7.0321,
        -5.5332, -5.7574, -5.8810, -5.9706], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2347, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.7516,  -6.2599,  -6.9942,  -6.8001,  -8.0611,  -5.8162,  -8.5662,
         -6.8513,  -5.6141,  -6.2535,  -5.5528,  -5.8267,  -8.1028,  -6.4057,
         -6.9073,  -5.2463, -11.6188,  -6.7940,  -7.7425,  -5.7560],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.8461, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9702, -6.1287, -5.7743, -4.5113, -5.8251, -6.4524, -7.1148, -7.3024,
        -5.7552, -5.2078, -4.7047, -5.3593, -7.6561, -7.2544, -6.0858, -5.9982,
        -5.6903, -4.6543, -6.6911, -7.1327], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1135, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5560, -5.8728, -7.2900, -7.2636, -5.1696, -5.8277, -4.8381, -5.9539,
        -8.5540, -6.8186, -6.1782, -6.6806, -8.8477, -5.9250, -7.8832, -5.2439,
        -8.1014, -7.2801, -5.9325, -6.2530], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6735, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5438, -5.8723, -6.6937, -7.1927, -7.3275, -4.8201, -5.7896, -6.1875,
        -4.9322, -7.7755, -7.4488, -6.0837, -4.6592, -6.2822, -5.5516, -7.0388,
        -7.6332, -7.4078, -5.2983, -5.5770], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2058, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5535, -6.1175, -5.7568, -5.5372, -5.8113, -6.5769, -6.5780, -7.7466,
        -6.7202, -8.0621, -7.7839, -6.3636, -5.8756, -6.1994, -7.2736, -7.6735,
        -6.8561, -5.6409, -5.4312, -6.0304], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5794, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8753, -4.9591, -5.0555, -6.0744, -7.7851, -7.4270, -6.0572, -5.7932,
        -5.4617, -5.9985, -7.3638, -7.4246, -5.5349, -6.4254, -5.5147, -5.8152,
        -6.1156, -7.3152, -7.2637, -5.5914], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2426, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3966, -5.5919, -5.4330, -4.8423, -6.5265, -8.1481, -7.2396, -5.2261,
        -4.8245, -5.2877, -5.6180, -7.2588, -7.2789, -5.5092, -5.2007, -5.1543,
        -5.2476, -8.2587, -8.0220, -5.5719], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1818, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.0248,  -5.6655,  -4.8232,  -5.7199,  -6.1845,  -7.1238,  -7.3912,
         -7.2224,  -5.2063,  -5.9281,  -5.2585,  -6.4399,  -7.8837,  -6.9266,
         -7.1042,  -5.3076, -10.0519,  -7.5874,  -7.4489,  -7.0412],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.6670, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5638, -5.7864, -6.8094, -7.6155, -7.6327, -5.8780, -5.7606, -5.7067,
        -5.2311, -7.4923, -7.5797, -6.3512, -5.4979, -5.9831, -5.1921, -7.6786,
        -7.3077, -5.3461, -6.0655, -8.9092], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4694, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6889, -7.5735, -7.2214, -5.9387, -6.1046, -5.4589, -6.5526, -8.2921,
        -6.8542, -6.8309, -5.0252, -9.1857, -5.8946, -7.3426, -6.6190, -8.1454,
        -6.4429, -8.7872, -8.4099, -5.3963], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8882, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5439, -6.7823, -5.3174, -4.5027, -5.8295, -8.1245, -6.8167, -6.2212,
        -5.2212, -5.7927, -5.9248, -7.0930, -7.2340, -7.2042, -5.8867, -6.6116,
        -6.1885, -5.4676, -6.5837, -7.1549], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3750, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0067, -5.4161, -5.4226, -5.4276, -7.9715, -6.9968, -5.8563, -5.2077,
        -5.5815, -6.2983, -7.8001, -6.8734, -6.2694, -5.9431, -4.1650, -5.5506,
        -7.6844, -7.2267, -5.7209, -5.5367], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0978, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.9236,  -9.7641,  -6.0218,  -7.2993,  -6.8298,  -6.8102,  -8.8870,
         -7.2999,  -6.7574,  -7.8544,  -8.0541,  -6.5504,  -5.9400,  -4.9358,
         -7.0579,  -8.3765,  -7.2921,  -6.9575,  -4.9814, -10.7854],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.2689, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8137, -6.3621, -6.4531, -7.5049, -7.3451, -5.2097, -5.0145, -4.5195,
        -6.4858, -7.9257, -6.4707, -5.6417, -7.2630, -8.8165, -7.1059, -7.2544,
        -7.8357, -7.0128, -7.6304, -7.3709], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7518, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9767, -6.3059, -5.0877, -5.1558, -8.0539, -7.1803, -6.1412, -6.0351,
        -8.9061, -8.6212, -6.2883, -7.6243, -5.6552, -6.5228, -7.8859, -7.2175,
        -5.7805, -5.4990, -5.1194, -5.5275], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5292, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1900, -6.8869, -8.1041, -7.1164, -5.8151, -5.3691, -5.1157, -6.6861,
        -7.7820, -6.9390, -6.0366, -5.4191, -5.2451, -5.6839, -6.5923, -7.6032,
        -7.0602, -5.6002, -5.3376, -5.4499], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2516, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.2998, -8.3312, -5.5949, -5.5810, -4.6043, -5.8290, -8.2722, -7.0439,
        -5.6177, -5.9489, -5.3068, -6.0783, -6.7037, -7.4277, -7.2186, -6.3178,
        -4.6983, -4.7852, -6.0468, -6.9231], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3315, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7076, -7.4990, -7.4858, -7.0942, -5.0368, -4.4676, -6.5117, -7.7977,
        -7.6416, -6.8188, -6.3127, -5.4605, -4.9362, -7.2044, -7.8665, -7.2838,
        -6.0229, -4.8511, -5.5501, -5.9626], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4256, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9483, -5.5599, -5.4090, -5.9114, -7.5629, -7.6855, -6.9711, -5.9850,
        -5.3665, -5.7764, -7.5355, -6.8687, -6.1635, -5.4807, -5.5690, -6.2539,
        -6.5574, -7.3744, -7.5267, -5.2605], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3883, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7075, -6.3981, -6.0805, -5.4528, -8.0607, -7.3944, -5.3884, -6.4949,
        -6.8172, -4.9600, -5.7766, -7.5512, -7.4869, -5.5971, -5.3747, -6.0628,
        -5.9630, -6.4821, -7.2882, -6.5343], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4436, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7220, -7.7816, -6.4248, -6.1392, -7.3952, -6.3901, -7.3296, -7.5768,
        -7.5418, -5.4681, -7.3865, -6.6910, -5.6150, -5.8343, -4.6230, -5.7776,
        -8.3346, -6.8629, -6.6661, -4.9604], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5760, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2736, -5.7491, -8.0480, -7.0715, -5.6691, -5.4873, -6.9119, -6.6949,
        -6.0520, -7.6131, -5.0862, -9.6244, -8.1676, -4.9114, -6.3123, -4.5175,
        -7.9927, -7.8762, -6.7729, -6.1972], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6015, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0739, -7.1845, -5.7848, -5.8802, -7.4984, -7.3883, -6.2317, -5.2356,
        -4.4239, -5.7815, -7.1741, -6.7817, -5.3788, -5.5601, -5.1237, -5.4536,
        -8.2449, -6.9415, -6.3174, -5.6579], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2058, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8004, -8.2881, -5.6994, -8.2701, -8.0407, -5.6352, -5.3524, -4.6617,
        -7.1280, -7.3555, -6.9286, -5.5344, -6.6069, -4.5852, -4.9182, -7.4660,
        -7.4256, -5.7541, -6.1120, -5.8692], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4216, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.8192, -7.8194, -4.9526, -5.1223, -5.6152, -5.1867, -7.0770, -7.2983,
        -7.5405, -5.3468, -5.3047, -4.7813, -8.8363, -7.8290, -6.4208, -6.0176,
        -8.6383, -6.4413, -7.6517, -6.7624], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6231, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2208, -4.6817, -6.2722, -7.4025, -6.9618, -6.8110, -5.1922, -6.4288,
        -6.3132, -5.6872, -6.2787, -7.8074, -6.0529, -8.0542, -6.0371, -8.3481,
        -7.5189, -5.1781, -5.8827, -5.7541], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3942, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4922, -5.7408, -5.0600, -4.5224, -6.5451, -7.5510, -7.4233, -6.2018,
        -4.9789, -4.6572, -6.5509, -8.3173, -7.3492, -5.1570, -5.0445, -5.4345,
        -6.0086, -6.7032, -7.5825, -7.3774], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2849, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3188, -5.4073, -5.1563, -5.6385, -5.7576, -8.2202, -7.3153, -5.5970,
        -5.3587, -4.5742, -5.5378, -8.0909, -6.8422, -5.4912, -4.9895, -5.7316,
        -6.2582, -6.4701, -7.3385, -7.2346], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2164, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5475, -7.6849, -7.3315, -5.0503, -5.5028, -5.0522, -4.8029, -8.2558,
        -7.4449, -5.0688, -5.3124, -5.1615, -5.6309, -6.3800, -7.7913, -7.4852,
        -6.1746, -5.6795, -5.7595, -5.1519], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1634, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0851, -5.8912, -7.4404, -7.5925, -5.9035, -5.6366, -5.5488, -5.5931,
        -7.3375, -7.2668, -6.1544, -5.5531, -5.4343, -5.3607, -6.4414, -7.0578,
        -7.2029, -5.6068, -4.9476, -4.4910], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1273, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0675, -5.0438, -5.9413, -7.6943, -7.1873, -6.0290, -5.3658, -5.4261,
        -5.9158, -6.4839, -7.1866, -7.0982, -5.4271, -5.8281, -4.5596, -5.7399,
        -8.1865, -6.7551, -6.3573, -4.7827], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1038, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4609, -7.5965, -7.7496, -5.4337, -6.1073, -7.7973, -7.6621, -5.3902,
        -5.3899, -6.1449, -4.8709, -7.9876, -7.5337, -5.5090, -5.2910, -6.0469,
        -5.5325, -7.0074, -6.6139, -6.9894], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4557, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7063, -5.2413, -7.6843, -7.7162, -5.9149, -5.2739, -5.4461, -5.5113,
        -6.7974, -7.2738, -7.0385, -5.7540, -5.1609, -4.3661, -5.9072, -7.6043,
        -7.0347, -6.7550, -6.0362, -4.3112], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0767, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.7698,  -7.2669,  -5.8820,  -4.7241,  -5.0821,  -6.2767,  -6.8490,
         -7.4526,  -7.0982,  -5.2614,  -5.3917,  -5.0309,  -6.6542,  -7.9859,
         -6.6303,  -6.2401,  -6.3425, -11.1255,  -6.5011,  -7.5941],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.6580, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.5252,  -8.0172,  -5.0538,  -6.8796,  -7.8052,  -6.0376,  -6.3167,
         -4.3947,  -6.1977,  -8.0506,  -6.7475,  -7.0514,  -4.5707, -11.8041,
         -5.8752,  -6.5494,  -8.2065,  -6.5292,  -7.6657,  -5.1453],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.7712, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0866, -4.9626, -6.0304, -7.9050, -7.4647, -6.7685, -7.6549, -4.9212,
        -6.5839, -6.2493, -7.1745, -7.6379, -4.8851, -5.1554, -4.4209, -6.5780,
        -8.0542, -7.6203, -5.3685, -4.6598], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2591, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7025, -5.8654, -5.1157, -5.1732, -5.3555, -6.9931, -7.4146, -7.1512,
        -5.9923, -6.0491, -5.6561, -4.7259, -6.4973, -7.2781, -7.4098, -6.0014,
        -4.9499, -4.4178, -6.0490, -7.0377], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0918, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8606, -4.5576, -6.1552, -8.0636, -6.7944, -6.4077, -5.4047, -7.1137,
        -5.8406, -7.2339, -7.3863, -6.8454, -5.3945, -5.3010, -5.2501, -5.6702,
        -6.5512, -7.4955, -6.0192, -5.8695], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2107, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2840, -7.4585, -7.5763, -5.6747, -5.3507, -6.5552, -5.3135, -6.8704,
        -7.5521, -7.2276, -5.3425, -4.9412, -4.8470, -5.5390, -6.9494, -7.6263,
        -6.6025, -5.2296, -5.1962, -5.7544], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2446, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.1902,  -5.1634, -11.0344,  -6.7833,  -7.6392,  -7.9252,  -5.3774,
         -6.3552,  -7.5823,  -5.5670,  -6.4921,  -4.5670,  -5.0945,  -5.5329,
         -7.4500,  -7.4679,  -5.6751,  -4.8712,  -5.5473,  -5.4036],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3860, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3862, -5.8832, -5.4704, -4.9980, -5.8272, -7.6511, -7.0675, -6.4702,
        -5.1198, -5.2075, -5.6159, -6.6804, -7.5490, -7.0565, -6.3385, -5.3336,
        -5.2131, -5.1108, -7.9234, -8.2446], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3074, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1708, -7.4316, -5.1934, -5.2922, -4.6250, -6.2634, -7.6436, -7.2370,
        -6.1748, -6.4055, -5.2249, -6.6531, -7.9450, -7.3959, -5.5604, -5.4363,
        -5.3751, -6.2499, -8.0750, -7.0150], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4184, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.1118,  -5.9846,  -5.0542,  -5.5097,  -7.3692,  -7.1657,  -5.9769,
         -6.2709,  -4.9032,  -9.6949,  -7.8382,  -5.8690,  -7.2241, -10.0016,
         -6.2313,  -7.7711,  -7.1931,  -7.8312,  -5.2291,  -5.6544],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.7442, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7060, -7.2015, -5.9188, -8.5731, -8.5655, -7.0742, -6.7848, -5.5266,
        -4.2740, -5.4293, -7.4163, -7.8854, -7.4592, -6.0904, -5.7804, -5.0901,
        -5.4572, -8.2273, -6.6354, -5.0905], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6093, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5646, -5.4964, -6.5210, -7.1675, -7.4423, -5.4978, -5.5241, -5.5641,
        -5.3983, -6.3926, -7.5133, -7.6558, -5.4819, -5.2271, -5.7953, -5.2574,
        -6.4591, -7.5512, -6.9468, -6.2586], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2358, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0343, -5.8882, -6.3790, -7.2760, -7.1830, -6.0647, -4.6878, -4.7616,
        -5.0628, -7.8923, -7.2242, -6.3703, -5.0637, -7.3569, -6.4406, -6.8145,
        -7.7923, -5.7040, -8.4837, -8.0813], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4781, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3773, -8.9079, -7.7357, -5.8362, -5.4473, -4.2615, -7.6779, -7.8148,
        -6.4405, -6.1486, -7.5636, -4.7337, -5.0684, -7.8686, -7.4295, -5.3692,
        -5.4765, -5.3250, -4.9933, -6.8126], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3144, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3328, -7.4394, -5.1751, -5.5898, -5.7444, -5.1446, -6.7469, -7.0761,
        -7.6099, -5.4149, -5.3242, -4.6486, -6.3647, -8.0404, -6.9281, -5.5061,
        -5.1855, -4.8757, -5.9505, -7.4848], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1791, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1066, -4.8351, -5.2610, -6.1083, -6.5116, -7.4498, -7.4373, -4.9510,
        -4.9081, -4.8072, -6.6162, -7.9841, -7.1309, -6.4547, -5.4620, -4.4781,
        -6.9276, -7.7885, -7.2577, -5.8079], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2142, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3985, -5.4288, -5.9523, -8.4827, -6.8465, -6.8543, -7.8169, -5.3683,
        -6.4711, -5.8602, -7.1291, -7.8067, -7.1570, -7.8069, -6.0361, -4.8938,
        -6.5171, -7.7274, -7.3296, -6.0384], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6961, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.0855, -7.9143, -5.6126, -5.6625, -4.7990, -6.1913, -7.6310, -7.1736,
        -5.7701, -5.2743, -5.0698, -5.7356, -6.2381, -6.9296, -7.2691, -5.1018,
        -4.9268, -5.4581, -5.9379, -7.0586], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1920, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3521, -5.0162, -5.1242, -7.7423, -7.5980, -5.3737, -5.0479, -6.1354,
        -6.6555, -6.6384, -7.3316, -7.4389, -5.8817, -5.1533, -4.3135, -5.7486,
        -7.0936, -7.2255, -6.1661, -5.0247], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1031, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.6758, -7.5102, -5.7250, -5.8295, -6.3645, -6.2165, -6.1014, -7.2653,
        -6.9655, -6.1082, -4.8691, -4.4108, -5.7019, -7.7009, -6.9543, -5.4164,
        -5.6604, -5.5837, -5.6991, -6.4355], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2097, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4328, -5.1177, -5.6195, -6.5800, -7.4898, -7.4230, -5.5538, -4.8965,
        -4.9173, -5.2443, -7.4647, -7.5645, -5.1667, -5.1545, -5.4106, -5.2399,
        -7.8894, -7.4068, -5.7316, -5.5143], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0909, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2887, -6.5335, -9.0052, -8.0525, -6.3416, -7.5268, -7.1759, -7.3773,
        -8.3285, -6.2997, -6.2734, -5.4467, -5.3259, -6.0274, -8.0045, -7.6689,
        -5.2108, -5.4374, -4.7599, -7.3661], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6725, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.8786, -7.3557, -5.6632, -4.7970, -5.8286, -5.5842, -8.1168, -7.1498,
        -5.7243, -5.8548, -4.7281, -5.7409, -6.0683, -6.3345, -7.3322, -6.7927,
        -7.7010, -5.2116, -8.3032, -7.5465], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4856, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6582, -5.2994, -5.3055, -4.9512, -7.3297, -7.4170, -5.6744, -5.6115,
        -4.8758, -6.0920, -7.4523, -7.4484, -5.1442, -6.0380, -4.3207, -7.1501,
        -8.0171, -6.5764, -5.6330, -5.9194], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0957, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7442, -7.1914, -8.3302, -5.9619, -8.6500, -7.1188, -5.5737, -6.0086,
        -5.2230, -5.1169, -6.3854, -8.3440, -6.9643, -6.1384, -5.6774, -6.0179,
        -5.4644, -6.6501, -7.8670, -6.8258], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6627, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5837, -5.2777, -5.1326, -6.9359, -7.8296, -6.7690, -5.9778, -5.6483,
        -4.1158, -6.6524, -7.8075, -7.4191, -6.1136, -4.6871, -4.5774, -6.7616,
        -8.0784, -6.8486, -6.2402, -7.1224], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2789, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6659, -6.9698, -7.3686, -6.9800, -5.8832, -5.0617, -4.1841, -5.6803,
        -7.2265, -6.8581, -5.8688, -4.9651, -5.2673, -4.8587, -6.9721, -6.9039,
        -7.1275, -5.6046, -4.9000, -4.9028], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9124, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1821, -4.9926, -5.4140, -5.2431, -6.7268, -7.3672, -7.3522, -5.1086,
        -5.2710, -5.0234, -5.4905, -8.3809, -6.7193, -5.8268, -5.0521, -6.9481,
        -5.7503, -6.8654, -7.8945, -6.8612], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2235, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3846, -7.2107, -7.6171, -7.2592, -6.3482, -4.6710, -4.5104, -6.5902,
        -6.7402, -7.5697, -7.4441, -4.6677, -5.4983, -5.0932, -6.0547, -7.5223,
        -7.1165, -5.3015, -6.3780, -5.2591], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1618, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9322, -5.5333, -4.5404, -6.5798, -8.4081, -6.5135, -6.2250, -4.9280,
        -8.4239, -7.2281, -7.0231, -7.8702, -5.6693, -8.5723, -8.2451, -5.1494,
        -5.7941, -4.8476, -6.9654, -8.1757], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5812, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5554, -6.1909, -8.5467, -7.6844, -5.5479, -5.6165, -4.8528, -5.3495,
        -6.6691, -7.6678, -7.3810, -4.9061, -5.4859, -4.3220, -6.4226, -8.1313,
        -7.3158, -5.3490, -6.3830, -4.4829], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2930, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.6268, -7.5145, -5.4910, -4.9372, -4.4587, -5.8419, -8.1303, -6.8591,
        -6.4786, -5.7595, -5.3527, -6.4880, -6.8942, -7.5244, -7.6302, -5.6427,
        -4.8673, -4.6147, -6.2559, -7.5561], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2962, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.5011, -10.1194,  -8.2328,  -5.6231,  -6.7369,  -6.8007,  -7.2700,
         -5.7614,  -6.6070,  -7.2054,  -6.2094,  -7.6171,  -5.9610,  -8.5442,
         -8.0949,  -5.7016,  -6.0796,  -4.5753,  -5.8103,  -7.8890],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.7670, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8480, -7.1736, -7.1821, -5.8721, -5.7529, -6.2989, -5.1365, -6.3636,
        -7.9595, -7.1220, -5.2956, -5.0145, -4.5657, -5.2292, -7.6150, -7.3287,
        -5.9875, -6.0568, -4.9987, -5.4320], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1616, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7588, -4.6414, -6.0794, -6.8449, -7.8139, -7.2156, -6.7029, -6.1940,
        -5.7485, -5.9929, -6.7107, -7.5155, -7.1249, -5.2509, -5.2988, -4.4802,
        -5.7016, -8.0214, -6.9446, -6.1282], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3085, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7289, -7.9014, -6.9142, -5.4651, -5.0993, -6.1171, -5.9738, -6.1705,
        -7.5087, -7.1963, -5.3802, -5.1925, -6.7964, -5.0000, -7.0819, -7.3641,
        -7.3016, -5.3154, -5.6298, -6.4011], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2769, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.6985, -7.1002, -5.7450, -5.0230, -4.3889, -5.5454, -7.8002, -6.4174,
        -5.4361, -5.1831, -4.5791, -5.3946, -8.0186, -7.5554, -5.8693, -5.2448,
        -5.1063, -4.7012, -7.5826, -7.4730], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0931, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.8427, -7.4826, -4.7332, -5.6117, -4.6731, -6.3523, -8.3352, -6.9321,
        -5.9699, -5.0690, -5.2157, -6.6865, -7.6497, -7.1444, -6.0688, -5.9017,
        -5.0248, -6.4616, -7.5043, -7.2190], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3939, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5066, -7.5015, -7.3711, -5.2965, -5.6140, -4.5761, -6.0776, -8.2189,
        -6.5637, -5.9054, -6.6794, -5.8410, -5.6457, -7.3123, -8.0889, -7.3130,
        -6.3554, -5.6754, -4.5269, -6.0040], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3537, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9149, -7.9699, -5.5178, -8.1979, -8.0718, -6.8878, -6.2010, -4.7647,
        -5.7990, -8.5851, -7.0503, -6.7482, -7.6859, -9.8718, -7.3200, -6.9987,
        -7.5623, -5.2399, -6.8436, -7.9208], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1076, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9182, -5.6542, -4.6906, -6.0149, -7.7252, -7.1737, -6.0563, -5.0821,
        -5.1622, -5.9772, -7.9447, -6.9762, -6.8330, -4.9589, -7.5473, -6.0004,
        -6.9364, -7.2542, -7.2682, -5.8871], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3030, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2778, -6.3586, -5.3100, -7.6696, -7.4399, -5.6879, -5.4498, -4.9716,
        -5.7130, -6.7966, -7.0892, -7.6637, -5.6431, -5.2092, -5.1721, -6.1312,
        -8.1591, -7.0227, -5.4718, -5.0900], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1663, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4255, -5.9421, -6.4255, -7.2024, -7.3936, -5.2764, -5.1918, -5.4456,
        -5.5781, -7.9951, -7.5978, -5.3134, -6.2469, -6.5521, -7.1646, -8.8013,
        -6.0371, -7.8800, -8.2029, -5.8339], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5753, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3700, -8.1627, -6.5836, -5.2351, -5.1204, -4.9193, -5.3463, -7.7423,
        -7.2259, -6.0986, -5.5902, -6.2229, -4.8862, -6.7833, -7.6582, -7.4880,
        -5.6336, -6.4019, -5.7656, -6.0047], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2619, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4056, -7.2681, -7.2054, -5.0992, -5.3801, -6.1724, -4.9630, -6.4429,
        -7.3987, -7.4723, -5.7283, -4.9559, -5.3079, -5.6540, -6.7168, -7.6117,
        -7.5991, -5.8574, -5.4205, -5.9487], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1804, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7465, -5.8876, -5.0063, -6.8691, -6.1815, -7.7169, -7.5001, -4.7238,
        -5.8307, -4.7763, -6.3223, -8.3194, -7.0302, -6.2671, -5.3806, -5.8423,
        -6.7170, -6.2854, -6.8563, -7.4063], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3833, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4541, -5.9308, -4.2674, -7.0622, -8.1691, -6.7438, -7.0528, -6.4628,
        -5.0631, -5.2459, -6.2783, -7.6931, -7.8490, -7.7469, -5.2119, -8.8068,
        -7.0792, -5.7822, -6.1257, -5.0065], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4516, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.5030,  -5.5813,  -8.3789,  -7.6242,  -5.9499,  -5.4405,  -4.6368,
         -7.7193,  -8.1190,  -5.8714,  -5.7090,  -9.3939, -10.0017,  -6.6299,
         -6.8479,  -7.6523,  -6.9237,  -6.9287,  -7.7289,  -6.4237],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.0532, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1217, -5.2707, -5.7689, -7.5540, -6.7692, -5.6275, -5.2932, -6.8344,
        -5.3379, -6.3575, -7.6213, -7.4288, -5.9061, -5.4872, -5.5471, -6.0446,
        -6.4202, -7.4333, -7.5008, -5.3304], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2327, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.8528,  -7.5807,  -6.2788,  -5.7813, -10.5896,  -7.4962,  -7.7577,
         -7.3527,  -8.4959,  -7.2999,  -5.9577,  -5.5842,  -6.2289,  -5.6281,
         -7.1145,  -7.1362,  -7.1513,  -5.3615,  -5.4430,  -5.1133],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.8602, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1917, -7.1647, -5.6396, -5.0362, -4.4326, -5.2509, -6.5402, -7.1671,
        -7.6101, -5.6936, -4.7132, -4.4480, -5.7541, -7.5746, -7.0578, -5.6088,
        -5.3548, -5.7935, -5.4102, -6.5695], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0006, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4722, -5.5300, -6.2294, -6.7720, -7.4556, -7.1714, -5.1842, -5.4527,
        -5.0308, -6.2205, -7.6220, -6.6325, -6.0537, -4.6428, -8.3848, -6.1060,
        -7.2044, -6.7981, -7.9068, -5.4806], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3675, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7072, -7.3134, -5.5079, -6.2336, -5.6827, -5.1495, -7.2207, -6.9859,
        -7.1271, -5.1592, -5.0654, -4.4653, -5.5824, -7.9162, -6.9110, -6.3850,
        -6.7367, -5.0457, -4.8568, -6.8899], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1971, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1912, -5.5344, -4.8732, -6.3203, -7.6469, -6.7582, -6.6745, -5.5074,
        -6.1029, -6.1389, -6.9250, -7.6761, -5.2576, -5.6263, -6.1601, -5.9084,
        -6.2584, -7.3572, -7.4164, -5.8549], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3094, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.5701, -6.6806, -7.4859, -7.7337, -5.1380, -8.4940, -7.7765, -5.8745,
        -6.1530, -4.1039, -7.4710, -7.9633, -6.6007, -6.3824, -6.5182, -7.5155,
        -6.7454, -7.2346, -6.9957, -7.2699], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9853, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7577, -4.8620, -4.5321, -5.5756, -7.5378, -7.3275, -4.9876, -5.2038,
        -4.6619, -5.4990, -8.1046, -7.9145, -5.8931, -5.6577, -5.0582, -6.8517,
        -8.3721, -7.1606, -6.6119, -5.5673], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1568, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3123, -6.0416, -7.6397, -7.1486, -5.0885, -5.0530, -4.7382, -5.9960,
        -7.7721, -7.2736, -5.7460, -5.1095, -5.1510, -5.6945, -8.5319, -6.9062,
        -6.5919, -6.1590, -7.9064, -7.7283], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3294, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6899, -5.7320, -5.2046, -5.7016, -5.0798, -7.0951, -7.3647, -6.5445,
        -5.7901, -4.9063, -4.6128, -5.4404, -7.3894, -7.2156, -5.7978, -5.2148,
        -4.9961, -6.3810, -8.5152, -7.4120], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1542, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7275, -7.6753, -7.0930, -5.8951, -5.3218, -4.8864, -6.0062, -8.1324,
        -7.0675, -6.6855, -6.2153, -4.2470, -5.3406, -7.1034, -7.2164, -6.8804,
        -6.6317, -6.1973, -5.6765, -6.3223], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3661, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7394, -4.9759, -5.1068, -5.2638, -7.5372, -7.5474, -5.2209, -4.8605,
        -4.6426, -6.9939, -8.1824, -7.0510, -6.7436, -5.2723, -4.8366, -5.3383,
        -8.5022, -7.6867, -5.8903, -5.2224], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1307, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.1480, -6.5732, -5.7555, -6.8090, -9.9033, -5.4098, -7.8885, -5.9099,
        -8.6022, -8.3070, -5.3852, -5.9731, -5.1299, -5.5842, -7.8712, -7.1078,
        -6.1960, -5.8374, -5.3212, -5.8275], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6770, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3070, -5.0553, -5.1176, -5.3813, -7.8749, -7.6162, -6.8224, -6.9304,
        -5.1294, -7.2890, -7.7658, -7.6134, -8.3528, -5.3343, -3.9759, -6.2122,
        -7.4980, -7.4471, -5.9555, -6.6241], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4651, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5848, -6.4119, -5.0461, -4.5615, -5.6779, -7.6171, -6.9199, -5.8943,
        -5.0547, -5.8143, -5.8173, -6.4351, -7.2887, -7.6154, -4.8392, -5.1556,
        -5.1537, -7.1169, -8.0265, -7.0620], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2546, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0771, -5.0723, -5.7401, -5.3425, -6.5045, -7.3054, -7.6138, -6.6233,
        -6.0408, -6.2062, -5.1319, -7.0156, -7.1538, -7.6273, -5.7225, -5.3932,
        -5.5150, -5.1994, -6.2216, -7.6899], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2598, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2194, -5.1585, -5.2365, -7.5731, -7.2338, -6.1706, -5.2586, -6.5614,
        -5.9693, -6.3390, -7.5813, -7.3046, -6.0319, -4.8843, -4.9396, -6.2426,
        -7.5493, -7.1994, -5.5265, -5.5120], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1746, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2734, -7.2157, -7.0947, -7.0139, -4.8401, -5.3636, -5.8565, -6.9115,
        -7.2877, -7.5514, -4.9334, -5.2628, -4.7801, -5.7415, -7.9688, -6.9768,
        -5.6825, -5.2438, -6.5575, -5.0628], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1809, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9604, -7.5395, -7.0706, -6.0419, -5.6590, -6.7170, -8.9321, -6.6286,
        -5.6447, -5.9725, -6.3606, -6.8162, -8.0489, -7.2902, -7.9188, -6.5174,
        -5.3277, -5.6538, -5.4593, -7.8347], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7197, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2262, -5.9184, -6.3408, -6.3931, -5.5603, -6.0589, -7.0967, -7.1203,
        -5.7708, -4.9907, -6.2540, -5.9684, -6.0852, -7.0006, -7.1885, -5.2884,
        -5.2573, -4.8520, -6.3980, -8.1350], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2452, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7256, -7.3828, -5.8385, -6.2398, -5.3008, -5.2427, -7.1180, -7.7322,
        -7.4625, -5.8405, -5.0569, -4.6480, -6.4043, -7.4483, -6.7193, -5.5235,
        -4.7208, -4.3668, -6.0073, -8.2685], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2523, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8682, -4.8925, -5.4752, -4.5499, -5.6885, -7.6650, -6.9968, -5.2336,
        -5.7708, -4.8090, -5.3974, -8.1967, -7.4103, -5.7076, -5.4643, -5.8055,
        -6.5750, -7.8719, -7.3181, -6.5004], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2098, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4642, -4.6088, -6.5721, -8.0964, -6.9672, -5.6303, -5.7573, -5.1524,
        -5.9207, -7.8684, -8.6112, -7.8172, -7.1779, -5.2760, -6.5175, -8.2737,
        -7.0826, -6.2595, -5.6905, -3.9814], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4363, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5551, -4.7956, -5.3338, -4.6479, -5.5888, -7.9303, -7.4705, -5.6997,
        -6.1571, -6.0420, -6.2000, -5.9965, -7.5744, -7.6039, -6.0553, -4.8463,
        -4.5360, -5.5361, -7.5485, -6.9294], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2024, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6651, -7.2185, -7.8366, -7.7004, -5.0075, -5.2643, -6.3711, -5.1437,
        -7.8572, -6.9072, -5.9049, -6.6592, -5.1512, -5.4686, -6.5715, -6.9415,
        -7.1169, -5.5458, -5.4548, -4.6254], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2206, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8152, -7.7523, -7.3565, -6.5904, -5.7623, -4.5701, -5.4526, -8.2105,
        -7.5792, -5.8518, -5.1032, -4.5547, -5.8140, -8.3167, -7.0308, -5.7487,
        -6.4442, -5.8798, -5.3657, -7.0556], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3127, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0828, -6.2317, -4.4275, -4.7622, -5.8522, -7.5266, -7.2271, -5.9017,
        -6.0953, -6.3857, -5.9769, -8.4114, -6.7888, -5.1636, -5.0612, -5.3484,
        -6.3360, -6.8772, -7.1018, -7.0912], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2825, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4365, -7.3220, -7.4188, -6.0839, -5.9810, -4.8946, -5.6958, -7.6845,
        -7.5305, -5.9787, -5.3214, -5.6212, -5.6273, -6.7426, -7.0513, -7.4824,
        -5.6595, -5.2641, -4.2617, -6.5864], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2322, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2344, -8.4210, -7.7953, -5.4228, -6.1951, -5.0831, -5.5942, -8.2728,
        -7.0240, -6.1453, -7.2549, -5.8109, -6.6269, -7.0243, -8.0316, -6.8429,
        -8.5888, -8.0507, -6.7208, -5.7520], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7946, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0694, -5.6946, -7.5736, -7.1603, -7.9715, -5.4671, -8.1412, -7.8052,
        -5.6347, -6.0186, -4.5157, -5.0962, -8.2873, -6.9264, -6.1975, -5.2623,
        -5.7063, -6.0078, -6.2185, -7.3359], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5045, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5223, -7.4951, -5.7312, -5.2929, -5.4023, -4.5135, -7.1044, -7.5179,
        -6.0872, -5.7197, -4.8001, -5.7704, -7.1814, -7.4858, -7.3119, -4.8845,
        -5.7473, -4.4677, -6.2712, -7.9339], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2120, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0203, -6.2633, -5.0685, -6.5933, -6.3586, -6.4945, -7.8380, -7.3878,
        -4.5517, -5.4205, -4.7902, -5.0952, -7.7980, -7.5284, -5.7267, -4.6630,
        -4.6666, -6.3723, -7.7092, -7.4766], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2411, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4424, -5.9414, -7.0459, -7.3677, -5.7521, -5.5858, -5.2760, -6.8281,
        -8.0849, -7.1226, -5.9252, -4.9872, -4.4533, -6.7891, -7.5850, -6.9942,
        -5.4260, -4.6338, -5.1311, -6.7240], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1048, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7242, -5.3697, -4.4603, -6.2405, -6.8028, -7.3840, -7.1705, -5.3006,
        -5.5535, -4.9480, -5.4878, -7.4534, -7.8488, -5.0090, -5.6428, -4.8303,
        -5.9384, -8.3390, -6.7387, -6.5394], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1391, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.1306,  -5.7560,  -8.0521,  -5.5191,  -6.9827,  -7.5177,  -5.6513,
         -5.8744,  -6.8904,  -6.9463,  -8.4347,  -6.3428,  -8.4606,  -7.9046,
         -5.7083,  -6.7246,  -4.8714,  -5.6993,  -6.3832,  -6.5161],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.8683, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5068, -5.7162, -7.2321, -6.1377, -7.8012, -6.8564, -8.6133, -6.8271,
        -5.3530, -6.0112, -4.8163, -5.7485, -8.1063, -7.4444, -6.0563, -4.7913,
        -9.7594, -6.5066, -8.8924, -5.7747], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7476, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.0089, -7.4830, -6.7998, -5.4583, -6.4646, -5.3273, -7.6320, -7.7430,
        -6.4552, -6.3031, -6.1034, -5.6388, -6.3714, -6.8060, -7.3283, -7.5871,
        -5.0107, -5.0214, -4.8450, -5.4174], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3902, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.2224,  -5.2063,  -5.9281,  -5.2585,  -6.4399,  -7.8837,  -6.9266,
         -7.1042,  -5.3076, -10.0519,  -7.5874,  -7.4489,  -7.0412,  -5.3760,
         -8.8112,  -7.4795,  -5.2463,  -5.7735,  -5.3484,  -5.3492],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.6395, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0301, -7.7401, -7.3593, -5.4308, -5.5137, -4.8972, -5.7637, -7.9063,
        -6.7467, -5.2822, -7.3486, -5.0498, -5.7452, -6.7385, -7.7906, -7.1501,
        -6.3194, -5.5478, -4.7844, -6.7087], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2427, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7351, -7.3180, -7.4134, -5.7381, -5.2201, -4.3565, -6.2244, -7.5940,
        -6.9052, -6.2666, -6.9419, -4.8040, -5.7365, -7.0387, -7.1673, -7.3761,
        -5.6956, -4.6516, -4.4007, -5.4396], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1512, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0547, -5.5697, -4.9122, -5.2706, -6.4157, -7.4288, -6.9447, -5.6914,
        -4.6308, -5.1342, -5.2627, -7.9917, -7.8277, -6.1749, -5.3884, -4.5814,
        -5.6167, -8.3069, -6.9925, -6.3567], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1776, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2457, -6.0760, -6.8143, -7.8216, -7.1254, -7.8354, -8.1109, -4.5809,
        -5.6947, -7.5462, -6.6377, -6.9945, -5.0585, -6.3588, -5.9076, -6.9375,
        -7.0165, -7.3882, -5.6592, -5.1091], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4459, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1720, -5.4116, -7.1286, -5.2750, -6.4470, -7.4191, -7.8402, -7.5804,
        -6.5224, -5.0905, -5.1228, -5.4284, -7.6571, -7.2614, -6.3536, -5.4725,
        -4.3905, -4.9519, -8.1130, -7.2397], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3939, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3168, -5.5693, -4.9055, -5.1834, -7.6049, -7.5114, -5.8508, -5.3598,
        -6.2520, -4.4708, -6.6793, -7.3670, -7.5236, -5.7412, -5.8384, -5.5019,
        -5.8183, -5.9133, -7.0623, -7.6217], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2046, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8925, -7.5763, -7.7927, -6.0164, -6.2827, -6.7177, -6.4629, -7.0711,
        -7.4253, -7.5002, -6.6618, -5.3922, -4.7348, -9.4135, -7.9349, -6.6484,
        -5.4012, -5.4752, -7.7089, -5.8160], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6962, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0202, -7.0980, -6.7385, -7.9448, -5.1492, -8.1046, -7.1618, -5.3781,
        -5.6209, -6.9409, -5.1249, -6.3502, -7.3617, -7.5092, -5.7732, -5.3807,
        -5.8152, -7.1306, -7.8545, -7.4349], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5946, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.2996, -7.6538, -6.3380, -5.5276, -3.9037, -7.8892, -7.9868, -6.2250,
        -6.6053, -6.2828, -9.8629, -6.8815, -7.1576, -7.4535, -6.3644, -6.4995,
        -7.7194, -6.9994, -5.7685, -5.2864], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8352, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6949, -4.9383, -8.4596, -7.0778, -6.9499, -7.0945, -5.4037, -7.7910,
        -7.5220, -5.1800, -5.4685, -5.8412, -4.9217, -6.6738, -7.0230, -6.9983,
        -6.1330, -6.1017, -5.0947, -5.5835], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3476, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.0224, -6.6329, -6.1599, -4.7681, -4.4499, -5.7867, -7.3388, -6.7406,
        -5.6855, -4.5282, -4.6649, -5.9705, -7.9756, -7.0645, -5.4219, -4.7582,
        -4.8490, -5.4543, -8.2029, -6.9420], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0708, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7233, -6.7504, -5.5361, -5.4687, -3.9301, -5.5077, -6.8003, -6.4321,
        -5.5277, -5.3646, -5.1301, -5.6310, -6.3333, -7.0733, -7.2077, -5.2845,
        -4.8479, -5.1896, -5.2828, -7.9803], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9501, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8681, -5.8982, -8.1399, -6.8053, -7.2558, -5.0337, -5.4254, -5.4140,
        -6.4979, -6.7911, -7.0727, -5.8082, -5.1912, -4.6180, -4.7276, -7.5636,
        -7.2159, -5.3193, -5.3049, -5.6983], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0325, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1222, -6.3187, -6.8545, -5.2390, -5.5069, -4.4235, -5.1547, -7.8423,
        -6.9751, -6.6427, -6.0473, -8.2116, -7.9483, -7.4871, -7.7685, -5.1373,
        -8.0928, -6.4763, -5.5991, -5.1012], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3974, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9377, -6.5656, -6.3624, -6.5257, -5.9330, -5.9199, -7.2418, -6.9496,
        -6.4071, -4.7791, -4.4054, -4.7542, -7.2459, -7.2661, -7.1962, -5.4520,
        -4.5901, -3.8802, -6.0186, -8.0942], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1262, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2113, -7.1861, -6.7552, -5.7128, -5.2922, -5.6961, -4.7380, -7.3541,
        -6.9536, -5.8075, -5.2580, -4.3051, -5.2193, -5.8868, -7.6778, -6.4007,
        -7.1239, -4.9277, -3.4890, -5.9951], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.8495, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3461, -5.4276, -8.1963, -6.6521, -5.4483, -4.9587, -4.1879, -5.1062,
        -7.3765, -6.6802, -6.0782, -4.9903, -4.7303, -5.1913, -5.9691, -7.2036,
        -7.0034, -5.9229, -4.2859, -6.4004], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.8078, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5664, -8.0159, -6.9257, -6.2133, -4.9289, -6.5238, -5.7320, -6.5520,
        -6.7311, -6.8091, -5.9228, -4.7845, -4.4448, -5.2933, -7.5901, -7.3789,
        -5.8216, -5.2113, -4.0810, -4.6374], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0082, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0793, -10.3453,  -6.5991,  -7.1994,  -8.0268,  -6.4426,  -7.6733,
         -5.2328,  -8.0515,  -7.8088,  -5.7643,  -5.1838,  -4.3871,  -5.6813,
         -7.7704,  -6.8276,  -6.0590,  -4.9364,  -5.3908,  -4.9820],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4221, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9192, -5.7243, -4.9362, -6.2965, -4.6487, -6.5762, -7.4750, -7.0101,
        -5.4086, -5.7355, -6.5872, -4.7811, -6.4971, -7.3208, -6.7970, -5.7538,
        -5.2490, -4.0127, -6.4544, -7.6076], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0895, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1474, -6.6514, -5.9980, -7.4118, -4.9599, -6.5649, -7.6083, -7.4082,
        -4.8316, -4.8893, -5.1630, -5.5080, -7.4407, -7.0433, -5.5890, -5.2749,
        -4.7590, -5.4843, -7.5686, -7.3850], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2343, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.1776, -7.4779, -6.1142, -5.2980, -4.5169, -5.7453, -8.1068, -6.7312,
        -6.3911, -4.6649, -7.3033, -5.9596, -5.6114, -7.1808, -6.8974, -6.1051,
        -4.6639, -4.4353, -5.3414, -8.0385], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2380, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2119, -7.5717, -4.6459, -7.9794, -8.1437, -6.0390, -4.8573, -4.2631,
        -7.0908, -8.0149, -6.2919, -5.7460, -6.2058, -9.3221, -6.0560, -7.4936,
        -6.9115, -7.9443, -5.3909, -8.2451], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7712, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8298, -6.6238, -7.3588, -5.7882, -6.0461, -4.2085, -8.3594, -8.1767,
        -6.4889, -5.9773, -8.2571, -7.6193, -6.9509, -7.0603, -8.1599, -5.2193,
        -8.8664, -7.9789, -5.2832, -5.3281], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7290, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.7608,  -5.0208,  -6.7297,  -6.8998,  -7.3350,  -5.0789,  -4.8926,
         -4.1437,  -5.6463,  -7.7744,  -6.4804,  -5.6898,  -5.8626, -11.8517,
         -6.2793,  -8.1103,  -4.9637,  -8.3337,  -7.1049,  -6.2117],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5085, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5229, -8.0026, -7.0341, -5.9048, -4.9702, -5.8146, -5.5423, -6.1482,
        -7.2183, -6.9612, -5.3935, -4.8322, -5.1901, -5.4861, -8.1299, -6.5726,
        -6.3172, -7.8038, -4.5050, -5.9145], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1132, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.1676,  -6.3145,  -6.6862,  -5.1715, -10.3687,  -6.3264,  -7.4672,
         -7.9085,  -6.3473,  -8.5977,  -7.9214,  -6.4407,  -5.4179,  -4.5545,
         -4.6624,  -6.7132,  -6.7854,  -7.0568,  -4.8078,  -4.7343],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.6225, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8133, -7.3941, -5.1702, -4.6693, -5.5564, -5.2654, -7.8043, -7.6446,
        -5.3408, -6.0797, -8.6385, -7.6576, -6.9956, -7.8786, -4.8155, -8.2487,
        -6.8689, -5.7135, -5.0666, -6.8596], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5241, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2415, -7.7387, -7.1422, -5.4711, -6.1529, -6.1367, -5.6928, -7.2019,
        -7.5679, -7.5349, -4.2973, -4.9787, -4.6815, -6.3142, -7.3993, -6.9099,
        -5.4954, -4.5514, -4.2395, -5.8244], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0786, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2490, -4.7745, -5.5836, -5.0809, -6.9626, -7.3890, -7.0755, -5.8122,
        -5.2081, -4.6844, -4.6549, -7.8607, -7.5064, -5.4162, -5.0612, -4.2094,
        -5.9031, -8.1820, -6.6851, -6.2963], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9798, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5194, -6.5863, -7.6490, -6.8404, -5.3993, -5.1618, -5.6350, -5.5745,
        -6.7421, -7.0702, -7.5611, -4.8836, -4.8657, -4.1945, -5.9353, -8.3037,
        -6.7646, -5.3634, -4.6787, -5.5397], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9634, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9899, -5.1960, -4.3535, -5.3362, -7.8572, -7.1113, -5.6112, -5.4728,
        -5.7457, -5.4477, -5.8124, -7.4854, -6.8097, -5.7692, -5.2559, -3.9995,
        -6.7634, -7.6832, -6.8705, -5.5346], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0053, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4150, -5.0898, -5.2398, -4.9406, -7.8683, -6.9810, -6.1339, -4.7813,
        -5.3528, -5.1937, -7.8722, -6.5195, -5.1235, -4.8298, -4.1491, -4.8315,
        -8.0185, -7.2154, -5.5468, -5.6000], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.8851, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.9531, -10.4505,  -7.5161,  -6.8636,  -7.8627,  -5.1882,  -8.4295,
         -7.9633,  -6.1753,  -6.0236,  -3.9614,  -7.7505,  -7.9369,  -6.2680,
         -6.1026,  -6.3892,  -9.3469,  -8.2669,  -7.5381,  -7.4251],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.1206, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3586, -5.0368, -6.7378, -7.0423, -6.2369, -7.8179, -7.0415, -7.5522,
        -7.0250, -5.5661, -6.8290, -5.4684, -7.7550, -6.8423, -6.2663, -5.9184,
        -6.1206, -6.0055, -7.0877, -6.7170], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5713, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0358, -6.2928, -7.5242, -7.2488, -5.4495, -4.8968, -4.9506, -6.6629,
        -7.8953, -6.7524, -5.9131, -5.2710, -4.6535, -5.7791, -7.3484, -6.9195,
        -5.6224, -4.3681, -4.1111, -5.4814], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9088, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1995, -9.5623, -8.0894, -6.7362, -7.6181, -5.1807, -8.2406, -7.7996,
        -5.2899, -5.0915, -4.9656, -4.4181, -8.1330, -7.2912, -6.1024, -5.0772,
        -4.4785, -4.7853, -6.5371, -6.7760], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3686, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5020, -4.7614, -4.1799, -5.8091, -8.0082, -7.2086, -5.3616, -4.8714,
        -4.6398, -5.4574, -7.3366, -6.6078, -7.1252, -8.0542, -4.5330, -5.4111,
        -7.7193, -6.9042, -6.9453, -5.6503], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1043, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6985, -5.7263, -5.2023, -9.7381, -8.0708, -5.5655, -5.6199, -6.3829,
        -7.9207, -6.5046, -6.7398, -7.5379, -5.3484, -5.7272, -7.5340, -7.1164,
        -5.6639, -4.6897, -4.6604, -5.3457], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2896, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.6315, -6.4754, -8.3495, -7.6849, -4.8472, -6.0590, -5.2037, -6.9366,
        -5.9904, -7.7200, -6.4853, -7.9869, -7.6346, -5.9391, -5.5484, -6.7903,
        -6.1124, -7.7962, -4.9565, -7.7285], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6938, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5829, -7.9331, -6.3262, -6.3244, -8.3058, -6.6360, -5.7127, -6.0883,
        -9.0104, -6.4875, -8.2529, -4.9459, -7.7303, -7.6367, -6.3223, -5.9512,
        -5.4078, -5.0264, -5.5751, -7.3207], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6288, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.2793, -6.7394, -5.9780, -4.9657, -3.9437, -5.8261, -6.8239, -7.7777,
        -7.0665, -7.4088, -5.0000, -4.3540, -6.5502, -7.2475, -7.2890, -5.6553,
        -5.2702, -3.9242, -5.1760, -7.5343], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1405, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2604, -5.8734, -5.9213, -5.0727, -6.7831, -8.0753, -7.3804, -6.6457,
        -4.8180, -5.2550, -4.9732, -7.2385, -6.8806, -5.4538, -5.1410, -4.0689,
        -5.6097, -7.9964, -7.2876, -5.8384], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1287, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.8275, -7.2018, -5.4547, -4.4633, -5.8252, -5.4257, -8.0694, -6.8465,
        -5.5349, -5.7140, -4.5153, -5.5205, -5.9817, -6.2583, -7.2238, -6.7159,
        -7.7336, -5.0101, -8.4461, -7.4319], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3600, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0801, -6.8385, -5.3542, -5.3055, -7.0144, -7.8016, -7.2690, -5.5276,
        -5.0051, -4.6697, -6.3270, -8.0681, -6.2706, -6.2298, -7.8306, -4.4347,
        -5.7789, -6.7869, -7.3360, -7.2655], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3597, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.0851, -6.6704, -8.0534, -5.0787, -8.3116, -7.0023, -5.5596, -4.7680,
        -4.8898, -5.1963, -7.8289, -7.0177, -5.2320, -4.8229, -5.3126, -5.4675,
        -6.4464, -7.1297, -7.0653, -5.5302], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2734, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9303, -4.7604, -5.2012, -7.3990, -7.1955, -6.4102, -5.0391, -4.8154,
        -6.4236, -7.3323, -6.6634, -6.0619, -5.0849, -5.7307, -5.0485, -8.1275,
        -6.9690, -6.0244, -7.0292, -6.9309], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1589, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.1051,  -3.8104,  -8.4205,  -7.7777,  -6.2744,  -6.7329,  -6.0785,
        -10.7803,  -6.2116,  -6.8811,  -6.4411,  -7.8674,  -7.6359,  -5.4230,
         -8.4549,  -7.9933,  -5.6530,  -5.3327,  -5.3235,  -5.6747],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.7436, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0889, -6.8008, -5.7102, -5.2209, -4.7443, -5.7822, -6.1970, -7.2754,
        -6.9891, -6.5932, -4.4312, -6.1329, -4.6300, -7.4316, -7.7253, -6.2447,
        -6.5393, -4.7916, -4.6738, -6.3384], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0670, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2556, -8.3900, -7.8656, -5.0976, -5.0295, -4.2194, -5.5358, -7.5791,
        -6.7556, -6.0868, -4.2718, -8.5503, -7.2445, -7.8692, -7.9106, -4.9184,
        -8.7468, -7.4156, -5.8462, -5.9751], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5282, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.2151,  -4.8402,  -6.5822,  -7.7646,  -6.4990,  -5.6296,  -4.9403,
         -4.9510,  -4.6864,  -7.8058,  -7.4233,  -5.5300,  -7.1076, -14.1378,
         -4.4562,  -7.3794,  -5.1789,  -7.9407,  -7.3052,  -5.3590],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.6366, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9475, -5.7034, -7.0638, -7.9062, -7.4411, -6.4614, -4.7532, -6.1072,
        -4.9067, -6.9415, -6.8740, -5.7111, -4.6994, -4.7404, -5.9903, -7.7681,
        -8.1445, -7.8445, -6.8497, -4.5138], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2684, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5307, -5.4190, -4.9041, -3.8284, -5.7464, -8.0025, -6.8457, -5.4015,
        -4.4871, -4.1723, -5.2880, -7.9003, -6.9893, -5.8489, -6.3996, -6.9786,
        -5.8271, -5.7863, -7.2913, -7.0618], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0854, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1773, -6.7629, -5.4660, -5.4492, -7.2093, -5.0440, -6.6462, -7.2308,
        -6.9227, -5.7064, -4.8406, -4.0971, -6.2327, -7.0483, -6.5595, -5.1918,
        -4.5297, -4.2923, -5.8127, -7.8646], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0042, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.3748, -7.6468, -5.5054, -5.3486, -4.5652, -6.2196, -7.2724, -6.9372,
        -6.4304, -6.1073, -4.9814, -7.1918, -7.5156, -8.2931, -4.9837, -9.8191,
        -7.4259, -6.1273, -6.6537, -5.9152], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6657, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6985, -5.7263, -5.2023, -9.7381, -8.0708, -5.5655, -5.6199, -6.3829,
        -7.9207, -6.5046, -6.7398, -7.5379, -5.3484, -5.7272, -7.5340, -7.1164,
        -5.6639, -4.6897, -4.6604, -5.3457], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2896, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1171, -4.7899, -4.7578, -4.0314, -6.3831, -7.6867, -6.9763, -5.5127,
        -4.9502, -4.2558, -4.8487, -7.4141, -7.2187, -5.0809, -4.8326, -4.1103,
        -5.8450, -7.3932, -7.0769, -5.9021], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.8092, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4523, -5.2286, -5.5659, -6.8600, -7.1577, -7.3287, -4.7088, -4.9371,
        -4.4902, -5.4953, -7.8941, -6.7401, -5.5230, -4.9207, -6.5972, -4.8434,
        -6.9439, -7.9573, -6.5135, -5.8916], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0025, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5529, -7.3603, -7.8169, -5.3083, -8.0007, -7.1780, -5.1990, -5.3617,
        -4.5797, -5.0379, -6.1275, -7.9008, -7.4411, -3.9249, -5.4577, -5.1207,
        -7.0942, -8.1668, -6.5331, -6.0264], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3594, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8094, -8.0816, -7.7129, -5.7897, -5.4456, -5.1060, -5.1492, -6.2657,
        -7.3797, -7.4235, -4.4293, -4.7057, -4.2388, -5.9183, -8.0136, -6.8210,
        -6.2971, -6.4241, -4.7214, -5.7694], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0251, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.1316, -5.9392, -7.4953, -6.6976, -7.7385, -7.5893, -6.2866, -6.0331,
        -5.0457, -5.6558, -5.4441, -7.3477, -7.0629, -6.3449, -4.8543, -4.4127,
        -6.7417, -7.7567, -6.9019, -5.9483], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5214, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.2231, -6.5427, -5.8363, -4.6771, -5.9935, -6.1451, -7.6029, -7.1607,
        -5.6459, -4.8678, -4.6292, -6.9582, -8.0193, -5.9949, -6.1120, -4.6071,
        -7.8168, -7.8970, -7.5728, -7.6711], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4987, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.8878, -7.0384, -6.6915, -4.4845, -4.0573, -6.2420, -7.3597, -7.1952,
        -5.2258, -4.4592, -4.6970, -5.1744, -7.5180, -7.3038, -6.0500, -5.2865,
        -6.8926, -9.5946, -6.7424, -5.5087], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2705, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2350, -5.9480, -4.6447, -4.6595, -6.6291, -7.6769, -7.0309, -5.1719,
        -4.3764, -4.9564, -5.3554, -7.2845, -7.2789, -5.4315, -5.1578, -4.8925,
        -5.5560, -6.0707, -7.2601, -6.9334], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9775, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.2964,  -6.9213,  -6.6668, -14.9904,  -6.5980,  -7.0238,  -7.3972,
         -6.6560,  -7.7947,  -5.4145,  -8.6905,  -8.0775,  -5.0055,  -5.2219,
         -7.1147,  -4.2341,  -6.2511,  -7.7542,  -6.0601,  -6.1445],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.1157, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7157, -7.1128, -5.7414, -5.3354, -4.1574, -6.4511, -7.9339, -6.7167,
        -5.7468, -6.9162, -6.4904, -6.2685, -6.3968, -7.5626, -7.2917, -4.4978,
        -5.3424, -4.4579, -5.6179, -7.2767], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2515, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.9815,  -5.3723,  -3.8387,  -7.2084,  -8.3509,  -6.7318,  -5.6373,
         -6.5334, -11.8442,  -7.6327,  -7.8329,  -5.3436,  -8.0814,  -7.5954,
         -5.8873,  -4.9337,  -5.1664,  -4.1315,  -8.2218,  -7.0606],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.6193, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5807, -7.8748, -6.5579, -6.1371, -4.7147, -6.8976, -5.7643, -6.1966,
        -7.1946, -7.3841, -5.2719, -5.1360, -4.8479, -5.2124, -7.5506, -7.7371,
        -7.4623, -6.5093, -4.4793, -5.5129], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2011, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.6933, -7.4202, -5.6993, -5.1213, -4.3786, -6.6234, -7.1905, -6.7283,
        -6.1874, -5.3409, -8.3668, -5.9048, -7.6651, -6.9415, -7.6013, -6.3979,
        -8.4833, -7.7363, -5.5816, -5.7586], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6910, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4641, -5.9242, -6.9216, -7.2997, -7.0604, -6.7540, -7.9244, -5.6504,
        -8.3395, -7.0380, -5.6313, -5.8285, -5.6025, -4.4888, -6.9392, -7.3469,
        -7.3227, -5.3533, -5.1451, -4.5261], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3780, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5352, -4.8092, -6.2387, -7.3988, -7.5369, -4.7344, -4.5163, -4.4133,
        -4.8098, -7.2420, -7.1658, -5.0279, -5.1844, -5.0046, -4.8328, -7.5952,
        -7.7515, -5.4681, -5.1516, -5.0134], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.7215, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4057, -6.7093, -5.9448, -5.8852, -5.2430, -6.2278, -6.8854, -7.6370,
        -7.5527, -5.7453, -5.2988, -8.1314, -5.2967, -6.7582, -7.8486, -6.9868,
        -6.6606, -5.6613, -9.9224, -6.9129], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7357, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2007, -7.4538, -7.3292, -5.2940, -4.7623, -6.7954, -5.2060, -6.5638,
        -7.1087, -6.9528, -5.5775, -5.3087, -6.2610, -5.4632, -6.2183, -7.8705,
        -7.0827, -5.6103, -4.9221, -4.4322], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1207, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4725, -5.6122, -4.4579, -4.3164, -4.9316, -7.1879, -7.0097, -5.9408,
        -4.5916, -5.2011, -5.2687, -6.8617, -7.2218, -7.2550, -5.2214, -4.8675,
        -4.1429, -5.1817, -7.5236, -7.5467], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.8906, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.1811,  -5.5114,  -6.7348, -13.5941,  -5.7664,  -7.3261,  -5.4584,
         -7.4039,  -6.6164,  -5.3956,  -5.1485,  -5.1055,  -4.7548,  -6.1720,
         -7.6514,  -7.0540,  -6.6190,  -5.6003,  -5.6713,  -5.4332],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5099, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3662, -4.7636, -4.0651, -5.5183, -8.0869, -7.7709, -5.9869, -5.5150,
        -4.2863, -5.5566, -5.8578, -7.6933, -7.3951, -5.3857, -5.0330, -4.3128,
        -6.8243, -8.0328, -7.4692, -5.7989], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0359, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4064, -5.4700, -7.7713, -7.6460, -7.0793, -5.8351, -4.7959, -7.2913,
        -7.9760, -6.7837, -6.4493, -6.3311, -8.5445, -6.5384, -6.8944, -7.9281,
        -5.9413, -8.2459, -7.5592, -6.0915], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.8789, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0581, -6.8374, -6.6708, -5.5684, -6.2261, -4.7437, -6.1405, -7.1758,
        -7.0846, -5.1921, -4.8257, -3.9111, -5.8500, -8.2525, -6.5943, -5.9793,
        -4.4598, -5.1148, -5.8226, -7.5286], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0518, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3800, -6.3713, -8.1484, -6.1212, -8.8735, -7.8239, -6.7676, -5.5472,
        -4.4466, -5.6902, -8.1046, -6.9943, -6.4807, -5.9619, -4.5124, -5.1482,
        -7.1970, -7.7052, -6.9107, -5.0095], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5597, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3508, -6.4048, -7.4472, -7.2527, -6.1396, -5.0099, -8.6656, -5.0030,
        -7.8436, -6.9952, -5.1842, -6.5102, -5.2527, -6.2584, -7.2345, -7.9052,
        -7.4848, -6.5449, -5.5864, -4.6280], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4351, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3497, -7.6477, -5.5972, -4.1492, -4.9914, -6.8633, -7.0091, -6.3915,
        -4.2925, -5.0876, -6.1207, -7.6272, -7.1635, -5.4398, -5.0845, -5.7275,
        -6.0916, -7.6580, -7.1090, -6.1008], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1751, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6212, -5.9790, -6.3632, -7.3771, -6.9131, -5.3704, -4.7232, -4.4245,
        -4.9997, -7.8461, -6.9026, -6.1929, -4.8163, -5.7295, -5.3188, -8.0267,
        -6.8640, -5.4786, -4.9795, -4.3093], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9618, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3614, -6.8818, -4.7077, -3.9310, -5.9171, -7.4175, -6.8593, -6.3477,
        -4.5535, -4.4830, -5.2185, -7.1709, -7.5543, -7.4010, -5.2154, -4.5012,
        -5.4762, -4.9219, -7.9121, -7.1706], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0501, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0711, -7.9624, -5.8661, -7.9380, -6.6489, -5.9489, -4.8342, -5.5127,
        -5.5679, -5.5519, -7.1381, -7.5953, -5.0481, -4.9962, -4.3439, -9.1685,
        -7.9701, -5.3472, -5.9529, -6.7392], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3601, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0386, -4.5210, -4.9137, -4.9568, -5.5826, -7.4584, -7.1276, -6.4685,
        -4.9497, -4.9402, -6.0686, -7.8554, -6.5310, -4.8251, -5.8537, -4.6586,
        -6.9722, -7.6728, -6.7145, -6.1607], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0635, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2876, -8.1124, -7.5156, -4.3098, -5.1238, -5.6383, -6.8137, -7.7131,
        -6.8705, -5.4561, -4.5617, -6.5303, -6.3804, -8.7189, -6.5701, -8.3359,
        -7.2289, -5.6235, -5.3216, -5.1570], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4135, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.1372,  -6.3685,  -8.3442,  -4.9462,  -8.3316,  -7.3104,  -6.7237,
         -5.3795,  -4.8171,  -6.1751,  -7.7856,  -7.0284,  -6.0713,  -6.2333,
         -6.8543,  -5.1378,  -5.8874,  -7.1808,  -7.2035,  -6.1072],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.7012, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9254, -6.5379, -7.2498, -6.7851, -5.9218, -4.9703, -6.8372, -5.4895,
        -6.5600, -6.9367, -6.6370, -5.6396, -5.4600, -4.8664, -4.5181, -6.2002,
        -7.1563, -6.2939, -5.9571, -4.8798], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9911, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.1683, -7.4724, -5.5319, -5.6037, -4.9072, -6.1490, -7.8605, -7.1986,
        -4.9475, -5.4490, -3.7857, -7.0198, -8.1834, -6.5696, -5.1826, -4.7295,
        -5.2034, -6.0139, -7.6911, -6.8397], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2253, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7762, -7.6239, -6.8467, -6.5331, -5.1899, -4.0987, -5.4505, -7.2454,
        -6.9702, -5.7435, -4.5020, -4.4378, -5.6349, -7.6213, -6.2733, -6.2867,
        -4.8078, -4.3725, -5.9129, -7.2190], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9273, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2049, -5.2960, -7.5686, -6.5358, -5.8794, -4.7387, -5.7251, -5.0140,
        -6.7481, -7.2443, -7.1585, -5.2230, -4.8152, -4.0369, -5.0618, -7.6258,
        -6.8414, -6.2638, -6.1327, -8.8887], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0501, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7283, -7.0993, -7.2620, -4.7221, -5.6579, -5.9894, -5.6184, -8.2436,
        -6.6216, -5.7510, -5.1932, -5.7946, -5.0576, -6.1620, -6.9114, -7.3481,
        -5.4613, -5.5410, -4.4941, -6.2516], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9954, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5893, -8.5447, -7.6817, -5.6072, -5.6979, -5.0084, -5.2319, -6.2259,
        -7.5322, -6.6951, -5.1384, -5.3176, -5.3051, -5.2490, -7.6024, -7.2822,
        -5.9545, -5.7122, -4.8219, -5.9160], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1057, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0051, -6.7823, -7.0039, -6.8739, -5.2864, -5.3991, -4.1018, -5.0462,
        -7.8658, -6.7760, -6.0699, -4.9701, -6.8183, -5.0585, -6.3733, -7.4679,
        -7.2095, -5.1580, -5.0518, -4.6336], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9976, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7098, -8.1743, -5.5211, -4.9706, -5.4242, -5.8411, -6.1750, -7.4401,
        -7.0019, -6.3250, -4.3736, -4.5776, -4.8337, -7.9996, -7.3673, -6.5561,
        -4.9172, -5.4647, -5.0882, -6.5388], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1150, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9332, -5.7197, -5.6124, -4.3282, -4.9059, -6.9029, -7.8561, -7.3588,
        -5.4167, -4.5233, -5.7141, -4.8318, -6.6470, -6.9742, -5.5965, -6.0524,
        -5.6018, -5.3199, -6.6076, -7.6578], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0280, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3568, -6.6660, -5.8712, -4.6234, -7.4561, -6.7766, -6.4292, -7.5554,
        -7.5281, -4.7105, -4.6621, -4.2098, -4.9935, -7.7941, -6.6120, -7.2587,
        -4.4088, -4.5890, -5.5557, -6.8927], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0975, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3342, -7.2331, -6.7342, -5.9055, -4.9426, -4.3308, -5.5554, -7.5909,
        -6.8489, -5.7681, -5.1590, -4.6252, -6.2003, -7.7710, -6.6200, -5.6082,
        -5.5408, -4.3884, -5.2596, -7.2790], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9848, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0938, -4.4969, -5.9022, -7.5934, -6.9378, -6.6214, -6.4367, -4.7029,
        -5.7852, -7.8580, -7.5654, -5.9369, -5.7502, -4.9413, -6.2525, -8.0205,
        -6.5383, -6.3394, -5.3138, -5.2638], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2175, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3264, -6.9291, -5.7674, -5.6833, -4.1377, -5.1237, -7.3385, -7.1895,
        -6.0467, -5.1086, -5.6687, -4.9718, -6.7148, -7.0928, -7.0747, -5.2569,
        -4.5043, -4.9860, -5.0503, -7.3586], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9665, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4068, -5.4979, -5.7990, -8.2177, -6.6779, -7.4417, -6.6895, -7.7441,
        -7.7443, -4.6585, -6.3321, -4.5248, -7.8103, -6.4998, -8.3003, -6.2254,
        -8.5370, -6.8365, -5.7095, -5.7887], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6721, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0118, -6.7816, -5.3785, -4.9918, -6.3360, -5.5318, -6.3119, -6.9967,
        -7.1187, -6.7308, -5.1935, -6.5986, -4.8109, -6.3909, -7.1728, -5.5546,
        -6.3965, -4.2270, -7.0875, -6.2793], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1451, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4931, -5.3865, -8.4730, -7.0217, -5.7247, -5.4323, -4.9677, -5.8176,
        -6.2421, -7.6783, -7.3244, -6.3535, -4.9172, -4.7015, -5.5718, -7.4491,
        -7.0649, -5.1312, -5.0755, -5.5776], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1702, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6409, -5.0059, -4.9580, -4.0151, -5.0110, -7.5845, -6.9877, -5.1297,
        -5.0756, -4.7005, -6.4230, -7.8836, -7.0741, -5.3063, -5.1252, -5.4520,
        -4.9989, -6.9740, -7.0198, -7.1861], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9276, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2883, -6.8749, -5.8618, -4.8371, -4.9466, -5.9775, -7.0786, -6.8246,
        -5.3870, -5.2565, -5.1104, -6.0648, -6.1850, -7.5271, -6.5213, -4.3100,
        -5.0570, -4.7961, -5.3986, -8.1793], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9741, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3201, -6.8794, -4.9515, -6.6265, -6.4139, -7.2623, -5.5551, -5.9037,
        -4.5629, -4.4108, -8.6267, -7.3199, -6.1191, -7.4573, -5.7850, -4.9150,
        -6.1813, -7.2108, -6.4918, -7.2852], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2639, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3842, -5.9510, -5.8601, -6.1024, -7.2849, -6.8155, -5.9806, -4.5024,
        -5.5472, -4.6192, -6.3496, -7.0470, -7.2951, -5.3827, -4.7845, -5.2743,
        -6.2583, -6.4631, -7.1557, -7.1056], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1082, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8577, -4.9171, -4.8076, -6.4199, -7.8379, -6.6850, -5.4259, -5.5468,
        -4.1223, -5.5287, -7.5029, -6.6277, -5.9316, -5.0583, -6.4855, -6.5081,
        -7.8245, -6.7633, -6.0043, -5.7407], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0798, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5028, -7.5520, -7.1976, -6.0866, -4.7579, -4.4223, -5.1651, -6.7890,
        -6.7240, -7.3145, -5.5299, -4.6364, -7.1121, -4.9850, -7.0507, -7.1008,
        -7.1405, -6.7588, -5.5461, -5.1095], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1741, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7146, -5.5272, -4.7899, -4.1885, -6.0758, -8.0125, -6.1883, -5.9471,
        -7.3182, -9.2422, -6.3175, -6.3872, -7.1378, -6.4219, -6.9523, -8.0857,
        -5.9972, -6.4647, -6.4163, -6.6917], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5938, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9230, -4.7864, -4.5640, -6.4075, -7.7039, -6.6852, -5.8635, -4.7190,
        -7.1147, -5.2213, -6.7476, -7.3788, -6.7037, -6.3057, -4.6616, -4.5116,
        -5.0881, -6.8885, -7.4905, -7.1478], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0956, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.3791,  -8.2469,  -6.5034,  -6.4103,  -5.2366, -10.9824,  -6.5993,
         -8.2754,  -4.9991,  -8.2830,  -7.7190,  -5.7435,  -6.1724,  -5.1105,
         -4.9785,  -5.4389,  -7.5407,  -7.2668,  -5.5735,  -4.9126],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.6186, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.6697, -7.3491, -5.6606, -5.1315, -4.7936, -5.2410, -7.5584, -7.0173,
        -6.0794, -5.5497, -5.4035, -5.1558, -6.5577, -7.1699, -7.0909, -5.4850,
        -5.5837, -4.4370, -6.4838, -7.6113], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1515, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0231, -5.5717, -4.7523, -5.5192, -4.6660, -8.2557, -6.7293, -5.8861,
        -4.7799, -5.9565, -5.8159, -8.3722, -7.3376, -7.1155, -7.4941, -4.9344,
        -6.1544, -8.2589, -6.7645, -5.1653], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3276, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.4534,  -5.4717,  -8.3639,  -7.8793,  -5.1039,  -5.9709,  -4.1838,
         -7.9615,  -8.1495,  -6.4059,  -6.0662,  -5.9983, -11.0201,  -6.5837,
         -5.7304,  -7.4548,  -6.7110,  -7.6776,  -4.4100,  -8.2246],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.8410, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.9220, -7.0395, -5.9579, -5.6008, -5.8593, -4.1656, -8.0419, -7.3617,
        -6.4309, -5.9639, -7.0199, -5.3851, -4.7115, -6.5555, -5.8410, -7.3299,
        -7.0206, -9.2939, -6.4501, -6.0840], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5017, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.9194,  -5.2451, -10.7454,  -5.4726,  -7.6461,  -5.3663,  -7.8937,
         -7.4285,  -6.0893,  -5.5380,  -5.2518,  -5.1200,  -6.8403,  -7.1455,
         -6.9746,  -5.5992,  -4.4429,  -4.0273,  -5.1740,  -7.5796],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2750, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4279, -5.6254, -4.4400, -6.2110, -7.6818, -7.0631, -4.9748, -5.1020,
        -4.1824, -6.4965, -7.3088, -7.1425, -5.7138, -4.4533, -4.8521, -6.9650,
        -7.6941, -7.0837, -6.9277, -5.3467], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0346, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1322, -5.0174, -7.8708, -7.3860, -5.5266, -5.2374, -5.1030, -6.1283,
        -7.8111, -7.7960, -7.4696, -4.7928, -5.1692, -5.8423, -6.2762, -7.7459,
        -6.8924, -5.8913, -5.3985, -6.2766], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2382, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.8394,  -6.2472,  -7.9427,  -5.1792,  -6.2029,  -7.6389,  -5.3557,
         -6.6432,  -4.8303,  -5.8265,  -4.6492,  -7.7885,  -7.0480,  -5.9102,
         -7.5665,  -6.4830, -13.2739,  -5.1731,  -7.8634,  -6.0683],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.7765, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0414, -4.8626, -7.8343, -7.1806, -7.0526, -7.6581, -5.1688, -7.6553,
        -6.7875, -5.5402, -5.1061, -7.5202, -5.2756, -6.5875, -7.2633, -7.7042,
        -5.4969, -4.8882, -6.0343, -7.7693], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4714, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5376, -5.9254, -7.8609, -5.6901, -5.8083, -6.0293, -7.1939, -7.4329,
        -5.2946, -5.7839, -6.1464, -4.7443, -6.7256, -7.2104, -7.4291, -6.2514,
        -4.9883, -4.7244, -6.6725, -8.2783], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2864, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2481, -6.8474, -5.7056, -5.2579, -4.2924, -5.2427, -7.3328, -6.6403,
        -5.3899, -5.1584, -4.4254, -5.6024, -7.6327, -6.9866, -5.3793, -4.7383,
        -4.2285, -7.5915, -8.0603, -6.3301], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0045, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8057, -5.4250, -6.6396, -5.0762, -6.6338, -7.4343, -6.9967, -4.5856,
        -4.9114, -4.0691, -5.8156, -7.3043, -7.0550, -4.9411, -4.8936, -5.1930,
        -6.7589, -8.0846, -6.9716, -6.3333], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0464, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9468, -5.7744, -5.1478, -4.7314, -4.8255, -8.0846, -7.1611, -5.7885,
        -4.8133, -4.4569, -6.1521, -7.4699, -6.6751, -5.9600, -4.7440, -5.2168,
        -5.3676, -6.5242, -7.1082, -7.7326], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0340, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.2411, -6.9037, -5.3905, -5.2923, -5.9637, -5.3013, -5.6993, -7.2780,
        -7.4191, -5.9648, -5.0446, -6.4065, -4.4736, -6.9634, -7.1015, -7.1081,
        -6.0534, -4.5256, -6.0474, -5.0450], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1111, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0944, -4.7155, -5.8624, -7.5273, -6.7640, -6.0120, -4.4217, -5.0262,
        -5.1379, -6.0813, -6.9104, -5.0084, -6.1804, -4.2316, -4.9204, -6.3617,
        -7.4250, -6.4956, -5.8012, -4.6398], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.7309, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8154, -5.7570, -7.9179, -7.4513, -5.4975, -4.7108, -8.0601, -6.8957,
        -9.1078, -4.9558, -8.9320, -6.7654, -5.9755, -6.3417, -4.4622, -5.8586,
        -8.0840, -6.6681, -6.6127, -5.7070], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5288, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.7033, -6.6080, -8.1201, -4.9608, -6.8787, -7.0293, -5.1619, -5.7328,
        -4.4467, -5.2631, -7.3771, -7.8289, -7.5299, -6.0722, -4.5119, -4.0183,
        -5.3654, -7.0142, -6.4750, -5.8941], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2496, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2747, -8.4429, -6.7254, -5.6509, -5.8880, -4.6702, -5.8504, -7.3467,
        -7.8329, -6.9063, -7.3521, -5.5214, -7.2436, -5.0298, -6.9614, -7.0669,
        -6.8465, -5.0144, -5.0392, -4.9268], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3295, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-12.9557,  -6.5390,  -7.9565,  -5.2426,  -8.1617,  -7.9831,  -5.5462,
         -5.4114,  -4.5036,  -6.4193,  -7.2826,  -7.0032,  -5.5161,  -4.3551,
         -4.4236,  -5.5024,  -7.7791,  -7.0763,  -5.7868,  -4.7659],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5105, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.7447,  -5.2942,  -7.5424, -15.3269,  -5.3005,  -7.1305,  -5.2522,
         -6.4405,  -6.9582,  -6.1438,  -6.1169,  -4.3670,  -7.3339,  -7.6912,
         -8.0626,  -4.8326,  -9.0488,  -7.5861,  -5.7796,  -5.5702],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.8761, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.0858, -6.6289, -6.1009, -5.9007, -5.1338, -5.5539, -6.2336, -6.8012,
        -7.7999, -7.4694, -7.1750, -8.0358, -4.2585, -4.6950, -6.8048, -7.0078,
        -7.0387, -5.4794, -4.5009, -4.9179], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2811, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.5434,  -7.1148,  -5.7490,  -5.3089,  -4.6703,  -5.4360,  -8.2747,
         -7.3186,  -6.6363,  -5.1991,  -6.1701,  -4.8480,  -6.3926,  -7.6851,
         -6.5290,  -6.0881,  -5.4284, -15.9194,  -4.6945,  -7.5760],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.7291, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7885, -4.6154, -4.4619, -6.2448, -7.5395, -7.0527, -5.5098, -4.9635,
        -4.2895, -6.4657, -8.0169, -7.1659, -5.8746, -4.5441, -6.5213, -4.8100,
        -6.8539, -7.2465, -7.3507, -5.3610], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0338, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6709, -5.9572, -7.7584, -7.5713, -7.7866, -7.3578, -5.7017, -5.5597,
        -8.3361, -6.5618, -5.5152, -5.6502, -4.7433, -5.6427, -7.9101, -6.8569,
        -6.7555, -5.3794, -5.8151, -4.9841], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3257, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3326, -7.1073, -6.9361, -4.8001, -4.7718, -6.1885, -5.2424, -7.7989,
        -6.7444, -5.3623, -7.1670, -6.3075, -5.6444, -5.0519, -7.0533, -7.1167,
        -7.4725, -5.3429, -4.8439, -4.7257], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0505, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5698, -6.0256, -7.0000, -5.9632, -7.6213, -7.0084, -5.8887, -5.8898,
        -6.0118, -8.1143, -7.9846, -6.8062, -6.5258, -4.8813, -4.4951, -6.9799,
        -7.4638, -6.4714, -5.9086, -4.4497], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4030, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.7408,  -5.2084, -10.1226,  -6.6767,  -9.0270,  -4.8339,  -8.3799,
         -7.5559,  -6.7736,  -5.6532,  -6.0711,  -4.8844,  -5.4596,  -7.7784,
         -6.6980,  -5.5347,  -6.0980,  -5.3584,  -4.2325,  -6.4167],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4252, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8269, -7.6681, -6.9131, -6.2693, -4.5288, -3.3964, -5.5263, -6.9837,
        -6.5132, -6.3667, -6.5776, -3.8403, -5.9039, -6.5378, -7.7341, -7.8019,
        -5.3356, -5.1325, -6.6660, -5.4194], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0971, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.3243,  -6.8099, -15.2097,  -6.4601,  -7.6286,  -4.4307,  -8.0372,
         -7.8379,  -6.6126,  -5.6427,  -4.8785,  -4.7014,  -6.2925,  -7.2224,
         -6.9803,  -4.6199,  -4.3241,  -3.7991,  -6.3308,  -8.0524],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.6098, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.9736,  -7.9540,  -7.8535,  -7.1238,  -5.4907,  -4.2662,  -6.8645,
         -7.8905,  -6.8290,  -6.9574,  -6.5775, -13.5769,  -7.4911,  -7.5492,
         -7.3317,  -6.7019,  -6.4428,  -7.9839,  -6.5806,  -5.4305],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.0935, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0906, -6.5380, -7.2020, -7.1070, -5.1793, -4.2654, -3.8150, -6.4619,
        -6.9768, -6.8016, -5.4941, -4.2286, -5.4653, -5.1873, -6.7761, -6.9490,
        -6.9945, -5.4643, -4.3635, -4.9650], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.7663, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.2968,  -4.3400,  -4.6921,  -7.4832,  -7.2875,  -5.2412,  -4.4653,
         -4.2173,  -5.5435,  -8.2213,  -6.6215,  -5.9114,  -7.3231, -16.9153,
         -5.3375,  -7.4520,  -5.7950,  -8.6488,  -7.1437,  -5.4046],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.6170, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7140, -5.4522, -4.5081, -5.0367, -4.4761, -6.8772, -7.5566, -6.8771,
        -5.3430, -4.3635, -4.3514, -5.2411, -7.8014, -6.8455, -5.6009, -5.5313,
        -4.9546, -5.3226, -6.1156, -7.4600], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.8214, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.4851, -7.3733, -6.6608, -4.7612, -3.9877, -6.4373, -7.8656, -6.4765,
        -6.2109, -5.2177, -5.3438, -6.5832, -7.6886, -6.9944, -5.5437, -5.2445,
        -4.6456, -4.7657, -7.4572, -6.7787], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2261, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.6126,  -5.5674,  -4.9672, -10.0552,  -7.7128,  -7.4644,  -7.6741,
         -4.5809,  -7.9857,  -7.2882,  -6.1798,  -5.3595,  -3.9024,  -5.8622,
         -7.5619,  -6.2365,  -5.7090,  -5.4788,  -5.3824,  -5.1820],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3381, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0956, -4.8685, -8.1090, -7.3714, -5.8761, -4.5575, -4.1954, -4.2660,
        -7.3994, -7.3917, -5.0324, -4.4048, -3.8242, -5.5697, -7.5212, -7.0808,
        -4.6779, -4.3613, -5.1398, -5.1685], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5956, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2538, -3.9196, -5.9661, -7.1988, -6.5402, -5.6418, -5.3854, -4.6463,
        -4.2727, -6.3053, -6.7200, -7.3771, -6.1173, -3.9922, -3.7729, -6.4195,
        -7.8680, -5.9182, -5.7696, -6.8746], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.7480, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.9168,  -7.3808,  -6.1343,  -4.3534,  -5.0758, -13.7271,  -5.3786,
         -5.6827,  -6.4543,  -7.3068,  -9.5594,  -5.5423,  -5.9415,  -7.5460,
         -6.7803,  -8.2151,  -6.1223,  -5.7541,  -7.3429,  -6.7087],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.9461, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6947, -7.3537, -7.4361, -4.9250, -7.6647, -6.9602, -4.9565, -5.9194,
        -5.1775, -4.5084, -5.7804, -7.1528, -6.5114, -5.9256, -4.7605, -4.2286,
        -4.8980, -7.0593, -7.0256, -7.0906], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1014, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8770, -4.2455, -3.7961, -5.9834, -7.8276, -7.3832, -5.7749, -4.1021,
        -3.8268, -6.6982, -7.8699, -7.1710, -5.2345, -3.9827, -7.6038, -7.6692,
        -7.8120, -7.9060, -4.8618, -8.8043], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1715, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0741, -6.7158, -5.4222, -4.8246, -6.0566, -6.8251, -6.4672, -8.1204,
        -5.9998, -8.5423, -7.6118, -5.5308, -5.3750, -7.7688, -4.6166, -5.2502,
        -6.9737, -7.2527, -6.6318, -7.8843], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5472, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9329, -5.6209, -5.5304, -5.0525, -6.2135, -7.3125, -7.0603, -5.1808,
        -4.5206, -4.3561, -4.9713, -7.8520, -7.0142, -6.3293, -4.9493, -4.0227,
        -4.9388, -7.7135, -7.3112, -6.0734], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.8978, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1580, -5.6029, -6.7977, -6.3553, -6.5888, -4.0593, -8.6870, -4.4534,
        -6.8131, -7.9653, -7.0381, -7.2711, -8.1294, -7.9846, -9.6263, -7.9188,
        -8.2524, -6.0801, -7.4689, -6.5735], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.9412, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7401, -4.5295, -5.7898, -4.5551, -6.1180, -7.8647, -7.1153, -4.8209,
        -4.6333, -3.4787, -6.1627, -6.9116, -6.2367, -6.1113, -5.1717, -6.0986,
        -5.4863, -7.0548, -7.4341, -7.2767], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.8795, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2501, -5.7914, -7.2920, -6.9836, -4.6341, -4.8864, -3.7622, -6.2547,
        -7.7439, -6.7217, -5.9187, -6.1300, -5.8329, -7.1525, -6.4616, -8.0619,
        -4.5806, -8.5202, -6.7490, -4.8383], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1283, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1605, -5.8606, -4.8562, -6.6018, -6.9481, -6.6649, -5.6302, -4.4396,
        -4.5096, -4.8255, -6.4670, -7.1144, -6.9084, -5.8576, -5.8009, -5.2489,
        -5.1760, -6.3038, -7.1546, -6.5510], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9040, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5078, -4.5877, -5.5058, -7.7746, -7.0475, -5.1003, -5.8607, -4.6334,
        -5.2308, -6.9807, -7.4919, -6.8385, -4.7515, -4.4602, -3.6084, -5.3336,
        -7.7011, -6.7923, -6.0305, -4.2859], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.7762, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3762, -7.4116, -7.4491, -7.1317, -4.4686, -4.6077, -8.3801, -4.9602,
        -7.4048, -7.8387, -6.3976, -4.6493, -4.0519, -5.6297, -7.9550, -7.1272,
        -5.6213, -5.3528, -7.2647, -5.4676], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2273, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0826, -5.2080, -4.0534, -3.7998, -6.5474, -7.0765, -6.9032, -5.8528,
        -4.6439, -3.5045, -5.7315, -7.2039, -6.6221, -5.2143, -4.2053, -4.1883,
        -6.7195, -7.9925, -6.7163, -5.6144], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.7440, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7332, -4.1029, -4.5795, -7.3508, -6.9318, -5.7878, -4.5120, -5.1110,
        -4.1077, -6.7555, -7.0001, -6.7388, -5.6730, -4.4051, -4.0650, -5.5417,
        -7.6661, -6.9301, -5.4465, -4.6373], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.6038, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9134, -4.4592, -4.3179, -4.5581, -5.3801, -8.0223, -7.5134, -4.9341,
        -4.6816, -4.6085, -4.8128, -7.7877, -7.6245, -5.3216, -5.1793, -4.8932,
        -4.4917, -8.3126, -7.2607, -5.3748], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.8224, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3022, -7.5998, -6.9638, -5.8369, -5.7069, -5.1536, -4.4290, -7.1133,
        -6.5614, -5.4131, -5.1200, -4.0004, -5.8469, -7.3406, -6.5875, -5.0032,
        -4.1362, -3.7984, -6.1467, -6.9930], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.8026, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3397, -5.7159, -4.9771, -4.4592, -4.7579, -6.4206, -6.9676, -6.9795,
        -4.8069, -5.0341, -3.5821, -4.2985, -6.5000, -6.5266, -6.5656, -4.4196,
        -4.1837, -4.8052, -4.8144, -7.5725], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.4863, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.5397,  -7.4601,  -5.3028,  -5.6662,  -4.5482,  -5.2286,  -7.8887,
         -6.3055,  -6.3687,  -5.5895, -10.4583,  -6.7230,  -7.9500,  -4.7488,
         -5.4295,  -7.4503,  -6.7781,  -5.2517,  -4.4068,  -4.9421],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3518, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6162, -6.7432, -7.7636, -6.2735, -5.6228, -6.3825, -6.2656, -6.2332,
        -7.4492, -7.9775, -6.2166, -8.0815, -7.9517, -5.4475, -5.2380, -4.0493,
        -8.2971, -7.6624, -6.0342, -5.9862], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4646, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.1672, -8.1876, -5.9544, -5.9972, -3.5910, -4.8462, -8.2557, -6.3110,
        -5.2388, -5.7066, -9.8081, -6.9663, -6.7614, -7.9589, -4.9466, -7.6769,
        -6.9022, -5.1147, -5.0124, -4.5120], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4457, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6940, -8.6898, -5.6279, -5.5013, -8.0167, -6.7455, -5.7015, -7.2332,
        -7.1289, -5.2149, -4.9419, -7.6554, -6.9187, -5.4249, -4.9718, -4.9159,
        -5.3318, -7.9025, -6.2961, -5.7270], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3320, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3799, -6.1475, -5.7391, -4.3953, -6.1093, -7.5694, -6.3370, -5.5155,
        -4.2749, -6.2254, -5.4860, -6.9305, -7.2956, -6.2098, -5.8670, -4.6070,
        -3.3660, -6.0981, -8.0811, -6.0670], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.8851, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2260, -6.9677, -6.9405, -5.0579, -5.2062, -5.4816, -6.3348, -5.3771,
        -7.4877, -5.7886, -7.4711, -5.3332, -7.8765, -6.6875, -6.5238, -6.3443,
        -4.1748, -4.6628, -5.8263, -8.0760], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2422, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6865, -7.8423, -6.9040, -5.2968, -4.9277, -5.5835, -5.1811, -6.6614,
        -6.9450, -6.6505, -4.8902, -4.4062, -3.7882, -6.5366, -7.9158, -6.4506,
        -7.4000, -6.0711, -6.7844, -4.9337], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0428, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7692, -6.1423, -5.0660, -5.7056, -6.9894, -6.7355, -5.8325, -4.3958,
        -3.6700, -5.1991, -7.1997, -6.7348, -4.8578, -4.3406, -3.8350, -4.9829,
        -7.0790, -6.6711, -5.7890, -4.2238], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5610, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9191, -6.6911, -5.2030, -4.8396, -4.1042, -5.5767, -7.8401, -6.8296,
        -4.9127, -5.2352, -4.1554, -5.4745, -6.3564, -6.9650, -6.7631, -5.8690,
        -5.5362, -4.1549, -4.4084, -7.6873], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.7761, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1022, -4.8559, -7.5685, -6.8928, -5.6534, -5.3240, -4.0456, -5.6500,
        -8.2712, -6.1865, -5.5874, -4.8120, -4.7226, -5.6478, -7.7368, -7.2918,
        -5.5182, -5.9131, -4.9355, -5.9635], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.8339, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.8219, -6.9892, -6.0830, -4.7046, -4.1871, -5.1235, -7.6486, -6.6671,
        -5.3291, -4.6217, -4.7464, -4.1788, -7.0874, -7.6179, -7.1395, -4.7190,
        -4.5390, -3.8531, -6.9387, -7.8279], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.8912, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.4574, -7.5443, -5.4700, -4.8398, -3.9060, -7.6349, -8.3044, -6.0167,
        -5.8719, -6.2601, -6.9379, -4.9480, -7.6232, -7.0639, -7.5973, -4.1136,
        -8.6886, -6.5437, -5.0327, -5.3661], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4110, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8022, -4.7987, -4.3008, -3.6261, -5.1544, -7.6228, -6.5440, -5.3957,
        -4.5226, -3.8751, -4.2527, -6.7782, -7.0346, -5.4591, -6.0739, -4.9045,
        -4.7383, -6.5684, -7.7987, -6.7954], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.6523, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4023, -9.0120, -6.9575, -4.6741, -5.1915, -4.6525, -5.1331, -7.3854,
        -6.4556, -6.7789, -6.7358, -4.9036, -5.1545, -7.7317, -6.7147, -5.8591,
        -4.8438, -4.7988, -5.7777, -7.7359], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0949, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.1915,  -5.9533,  -7.9611,  -6.8487,  -6.2917,  -4.6101,  -5.8493,
         -4.9962,  -6.4177,  -7.3215,  -7.2798,  -5.9893,  -5.4997, -11.9939,
         -5.2204,  -7.5376,  -5.3333,  -8.3718,  -7.2980,  -6.1939],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.6079, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7050, -7.3506, -5.0850, -6.3461, -7.3978, -7.1552, -5.2657, -4.3991,
        -4.0424, -5.2595, -7.5127, -7.4799, -4.9645, -4.2655, -3.9188, -4.8349,
        -6.9907, -6.8661, -5.8259, -4.5840], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.7125, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.6760,  -7.8876,  -5.3343,  -8.0821,  -7.5301,  -4.8701,  -5.0640,
         -4.4122,  -6.3823,  -7.2918,  -6.4736,  -5.4064,  -4.5998,  -4.1244,
         -5.6315,  -7.6847,  -6.7208,  -6.0721,  -5.4481, -13.5859],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4639, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.5760,  -5.6543,  -7.0334,  -6.2463,  -5.7824,  -4.4660,  -6.1184,
         -5.2113,  -6.9910,  -6.8861,  -6.6467,  -5.6136,  -4.2294,  -4.0536,
         -4.8283,  -7.1806,  -6.8789,  -5.1202,  -4.8688, -11.2302],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0308, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.7427,  -6.2791,  -8.3350,  -5.7374,  -8.1753,  -5.8103,  -7.5166,
         -8.4694,  -7.2544,  -5.1226,  -6.4683,  -4.5783,  -6.9661,  -7.0307,
         -5.6821,  -6.8848,  -8.2731,  -7.7685, -10.6003,  -8.3228],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.1009, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8706, -6.7076, -7.4260, -7.9957, -7.4045, -7.3706, -7.0106, -7.8536,
        -7.1819, -7.3508, -7.3088, -7.0736, -7.4335, -7.2424, -6.9918, -7.1365,
        -7.0990, -7.0468, -7.5242, -6.9190], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2474, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.4345,  -5.2249,  -4.0459,  -5.5542,  -7.8494,  -6.5899,  -5.4365,
         -6.4627, -12.1574,  -7.1702,  -7.6875,  -7.7713,  -4.3532,  -8.1020,
         -7.3342,  -6.4581,  -5.0749,  -3.9934,  -5.1249,  -7.5222],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4174, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9000, -7.2992, -6.5392, -5.0552, -4.4598, -5.8342, -3.9990, -6.9676,
        -6.9376, -6.1221, -5.3140, -5.4268, -4.5385, -6.2492, -7.0305, -7.3422,
        -4.8871, -4.1358, -3.6760, -6.6284], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.7671, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3647, -6.5792, -7.0715, -6.7972, -5.7931, -5.4029, -3.9753, -4.3277,
        -7.7904, -7.0787, -5.0515, -6.8320, -7.2096, -7.9646, -8.4008, -5.4781,
        -4.7591, -7.0932, -6.0546, -6.5340], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2279, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.6736,  -5.4684,  -7.4666,  -6.7198,  -5.7453,  -6.0218,  -4.1568,
         -5.4074,  -6.9560,  -7.4042,  -4.1606,  -4.5949,  -4.9378,  -5.1833,
         -8.1863,  -6.5002,  -6.5066,  -6.0229, -11.7843,  -5.1257],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1011, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9647, -5.9440, -4.5992, -7.7193, -7.5126, -6.7317, -7.9888, -4.8450,
        -8.6399, -7.4219, -5.0310, -5.0352, -4.0925, -6.0721, -7.3554, -6.4830,
        -6.9118, -5.0264, -3.9125, -5.1176], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1702, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2997, -6.8588, -5.3926, -4.5737, -3.8369, -5.2167, -7.6428, -6.5542,
        -5.2052, -4.8425, -4.5493, -5.6159, -7.3282, -6.7126, -5.4164, -4.3528,
        -4.5675, -5.3775, -7.8811, -7.3251], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.8275, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4873, -7.8571, -6.4169, -5.9695, -5.8047, -7.9597, -5.0368, -6.4789,
        -7.1638, -6.5445, -4.4734, -4.6072, -4.1584, -4.5606, -7.1638, -7.2719,
        -7.0571, -4.4522, -3.9594, -4.8938], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9158, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5730, -4.1796, -5.5996, -8.1838, -6.7167, -5.5174, -6.7926, -7.8393,
        -6.3736, -7.0007, -8.2195, -4.5323, -8.5749, -7.0583, -5.3330, -4.7247,
        -5.0957, -4.3008, -6.7919, -7.3697], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2388, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.1721,  -5.9476,  -6.3776, -10.8056,  -6.3055,  -8.4666,  -4.9161,
         -7.8228,  -7.0717,  -6.0135,  -4.9907,  -4.4188,  -4.8446,  -8.2984,
         -6.7581,  -5.9592,  -4.5814,  -3.8673,  -4.1568,  -6.7254],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2250, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.2397, -5.3670, -8.4246, -7.2361, -5.9214, -6.3726, -7.3966, -4.8920,
        -5.8277, -8.0751, -7.3526, -6.0036, -4.2942, -4.0564, -5.4948, -7.5529,
        -7.1635, -5.1947, -6.5619, -4.2761], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2852, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8426, -5.7719, -4.8800, -5.0196, -4.3085, -8.1654, -6.7275, -5.4233,
        -6.7706, -8.5278, -7.1708, -6.4579, -7.5740, -5.4758, -5.9928, -7.8182,
        -6.7689, -4.9613, -4.7266, -4.2706], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1827, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3367, -5.7586, -4.2212, -7.1879, -6.9887, -5.6166, -5.4724, -4.6611,
        -4.9392, -6.0957, -7.7068, -6.8451, -4.6728, -4.9003, -6.2904, -5.3230,
        -7.5036, -6.2783, -6.7870, -7.4594], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9522, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7599, -5.0902, -5.0512, -7.5787, -6.3926, -5.8626, -5.4532, -5.3430,
        -8.6564, -8.1375, -7.5840, -5.0389, -8.5721, -6.9109, -5.2914, -6.1185,
        -4.0798, -5.3714, -7.6655, -6.3151], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2636, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.6691,  -6.8712,  -7.9553,  -7.3437,  -5.2571,  -5.4966,  -3.6827,
        -11.5061,  -7.9878,  -5.8911,  -6.0446, -15.0529,  -6.0772,  -5.5853,
         -7.1980,  -7.9284,  -6.8324,  -8.3391,  -4.7328,  -8.3676],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.1909, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6342, -7.2136, -6.6733, -5.6972, -5.1275, -4.9211, -5.0452, -6.8829,
        -6.8478, -7.0083, -5.1475, -4.6016, -4.3747, -5.5198, -6.7282, -6.7792,
        -6.7298, -5.6663, -4.1413, -4.8294], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.8284, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0826, -6.6477, -5.2649, -4.5247, -4.0110, -4.8121, -8.0798, -6.7670,
        -5.7858, -4.3149, -3.7876, -5.1503, -7.4709, -7.0432, -4.6806, -4.4269,
        -3.9239, -5.1234, -7.7783, -6.4889], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.6582, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.1465,  -5.0076,  -5.2284,  -6.1360,  -5.4465,  -6.4622,  -7.0947,
         -7.0542,  -7.3737,  -8.8808,  -7.3675,  -4.8535,  -6.0288, -10.4828,
         -8.0736,  -7.5503,  -7.8593,  -8.6107,  -6.4722,  -5.3897],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.9259, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8394, -5.7425, -5.6988, -4.4713, -4.1594, -8.1676, -7.4504, -6.5753,
        -6.5110, -9.7876, -7.1305, -7.2270, -7.6122, -4.5908, -6.0094, -7.9444,
        -6.9164, -5.4428, -4.4485, -4.4467], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3586, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9262, -4.6760, -4.8310, -6.5839, -7.4756, -7.4516, -5.3179, -5.3858,
        -7.8425, -5.7410, -6.5515, -6.9467, -6.6251, -5.0218, -4.5189, -4.1816,
        -5.7882, -7.9017, -7.0966, -5.8711], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1367, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0239, -6.9414, -6.6549, -6.0820, -4.2617, -4.0672, -4.7392, -7.2730,
        -6.8781, -5.1674, -4.5570, -5.2146, -4.2699, -6.1513, -6.7662, -7.0910,
        -6.3011, -4.5986, -3.6056, -6.0159], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.6330, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1986, -4.9002, -7.1979, -6.7298, -5.5060, -4.1713, -4.2298, -5.0187,
        -8.0678, -6.5112, -6.5192, -6.0139, -4.3895, -4.9855, -7.0464, -6.7983,
        -5.0964, -5.1162, -3.9153, -5.4970], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5954, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3138, -5.3447, -6.1140, -9.2390, -9.0017, -6.1520, -6.3107, -6.2334,
        -8.1781, -6.9152, -7.3386, -4.5852, -8.7126, -7.1328, -4.8441, -5.2538,
        -5.1831, -6.3519, -7.6757, -6.2929], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7087, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0531, -6.0702, -6.5233, -4.7992, -6.8497, -7.4251, -7.2369, -5.0579,
        -4.8657, -3.9078, -4.4517, -7.5551, -7.0592, -5.5842, -4.6908, -3.9208,
        -5.2107, -8.1201, -6.9513, -6.0195], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9176, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9479, -4.0908, -4.1443, -5.8080, -6.9481, -6.7711, -5.0891, -4.8190,
        -5.7455, -4.9218, -6.0157, -7.2700, -7.2788, -5.4328, -6.6198, -3.9645,
        -4.8130, -5.9229, -7.2355, -7.0901], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.7964, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5801, -4.8039, -4.9781, -4.2752, -5.2930, -6.3157, -7.1255, -6.7474,
        -5.3797, -4.0982, -5.6291, -4.5477, -7.4081, -6.9740, -5.4524, -4.8110,
        -5.2925, -3.8920, -5.9595, -7.5654], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.6564, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8911, -4.9800, -8.0139, -7.0549, -5.9635, -4.9283, -6.6920, -4.1691,
        -6.6389, -7.0245, -6.3978, -5.6837, -4.8518, -4.9704, -5.2272, -5.7402,
        -7.2438, -7.1589, -4.2010, -4.9226], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.7877, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.8134,  -7.9517,  -6.4278,  -5.9885,  -7.3466, -10.5990,  -6.0057,
         -7.2246,  -7.2134,  -5.2167,  -5.8893,  -7.8697,  -6.5361,  -5.5458,
         -4.7322,  -4.9248,  -5.6240,  -6.3444,  -6.9661,  -6.5476],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5384, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7864, -7.6102, -6.0317, -9.0253, -7.9883, -6.1568, -5.1943, -4.5568,
        -4.7353, -6.8875, -7.1950, -6.5065, -5.2163, -4.4016, -4.4964, -4.8443,
        -7.2783, -6.5365, -5.4489, -5.0184], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0957, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2317, -7.5488, -6.7976, -5.3405, -5.0768, -3.6481, -6.8002, -7.0091,
        -6.7652, -5.3828, -4.3870, -4.4854, -4.4069, -7.3167, -6.9258, -5.0513,
        -5.0456, -4.5041, -4.6623, -6.6168], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.7001, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7098, -5.9309, -5.7256, -6.9645, -6.9097, -5.1012, -4.0597, -4.0268,
        -6.3226, -7.6154, -6.8093, -5.4359, -5.2921, -6.8573, -5.5467, -6.3838,
        -7.0686, -6.9158, -5.8586, -5.1401], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9837, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.8944, -6.3909, -6.0260, -4.8100, -7.7195, -5.8477, -6.2777, -6.7693,
        -7.2021, -5.8194, -4.5728, -3.4146, -5.1756, -7.9266, -6.6679, -6.2175,
        -4.8492, -5.9299, -5.6530, -5.7036], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0434, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8020, -4.8117, -4.9736, -4.8758, -7.5320, -7.7566, -6.0452, -4.7091,
        -5.3535, -5.0108, -6.7156, -7.2946, -6.4708, -5.1312, -4.1506, -5.2893,
        -5.7338, -6.8580, -6.8043, -6.2821], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.8300, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1725, -6.2379, -6.1424, -7.7565, -7.5209, -6.8731, -5.1277, -4.2654,
        -5.6096, -7.8700, -6.5420, -5.8230, -4.6944, -4.4829, -5.2944, -5.7646,
        -6.9068, -6.8516, -5.4470, -4.5419], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9462, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.8354,  -6.7960,  -7.5411,  -5.8810,  -8.9948,  -8.1566,  -6.0050,
         -6.3315,  -4.7463,  -4.6691,  -7.7670,  -6.7821,  -6.2718,  -7.2552,
        -12.1700,  -5.8700,  -7.4938,  -5.1013,  -7.3848,  -7.6306],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.9842, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.9801, -7.1386, -5.9299, -4.7491, -4.1647, -5.7670, -8.2137, -6.4924,
        -5.9046, -4.9035, -5.7114, -4.9985, -6.8680, -7.2351, -6.9451, -5.9208,
        -4.6890, -3.8100, -5.2091, -6.8872], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9759, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7288, -5.6120, -3.8830, -4.5250, -7.9052, -6.5993, -5.3625, -4.9409,
        -8.5501, -6.2122, -7.8421, -5.6328, -8.5532, -7.3363, -5.3009, -5.4710,
        -3.9111, -5.7546, -7.8199, -6.5031], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1722, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.2577,  -4.4136,  -7.1284, -15.1713,  -7.1472,  -7.8367,  -5.9391,
         -7.2428,  -6.6063,  -6.4425,  -4.8214,  -4.9729,  -6.9207,  -7.9805,
         -6.7926,  -5.9921,  -4.2607,  -4.7301,  -6.2237,  -7.9544],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.7417, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8225, -5.3408, -6.2393, -6.9466, -7.0070, -5.3255, -4.5013, -4.8825,
        -4.9871, -7.9677, -7.2143, -5.0443, -6.0593, -7.1998, -7.4708, -8.7471,
        -5.5426, -7.8674, -7.7729, -5.4630], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3201, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9148, -8.0205, -6.2541, -4.8381, -4.6080, -4.5732, -4.6994, -7.6652,
        -6.7706, -5.7855, -5.2414, -6.1114, -4.3495, -6.5480, -7.5246, -6.8617,
        -5.3607, -6.2222, -5.4031, -5.6129], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9182, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.0292,  -6.9970,  -6.9306, -11.8347,  -6.0059,  -7.4696,  -7.1767,
         -7.8417,  -5.1148,  -6.0760,  -7.8555,  -6.6823,  -4.8648,  -4.6361,
         -3.9855,  -5.7568,  -7.5701,  -6.6540,  -5.8258,  -7.1709],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.6739, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5712, -5.3378, -6.9398, -7.4302, -6.1920, -5.5786, -5.1948, -6.0303,
        -6.3170, -8.1040, -7.0813, -6.0028, -4.6032, -4.5038, -5.3314, -7.4083,
        -6.6502, -5.2381, -5.2613, -3.8822], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.8829, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9043, -7.4858, -6.0375, -6.8397, -7.8289, -3.4191, -4.9552, -7.8105,
        -7.1018, -6.6938, -5.2618, -3.6675, -6.2032, -7.2451, -7.0813, -4.8685,
        -4.0041, -4.6984, -6.1496, -7.3111], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0284, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0704, -6.5603, -5.4512, -4.8355, -6.6136, -7.6818, -6.6579, -7.0455,
        -4.9647, -6.5592, -7.9097, -6.9772, -6.2439, -6.8111, -7.0835, -7.5170,
        -4.7314, -4.8427, -5.1212, -8.1368], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4407, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9566, -7.5717, -6.3836, -6.1782, -6.9250, -5.4120, -6.4711, -6.7985,
        -7.7949, -7.2522, -5.1793, -5.3540, -3.6394, -5.0680, -7.1052, -6.6012,
        -4.6424, -4.8067, -6.1182, -4.7244], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0491, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.4669,  -8.2242,  -6.1894,  -5.6624,  -5.5027, -10.3397,  -7.8310,
         -5.8077,  -7.9386,  -5.9263,  -8.6155,  -7.2628,  -6.1857,  -5.5977,
         -3.9030,  -5.2688,  -8.4288,  -6.2251,  -6.4603,  -6.9742],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.6405, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9490, -7.9511, -6.9627, -6.2256, -5.0161, -4.9293, -5.5235, -5.9054,
        -7.3812, -7.0687, -5.6499, -4.4820, -4.7679, -6.3501, -7.7574, -6.5363,
        -6.2119, -5.1624, -4.4730, -4.4906], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.8897, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5655, -6.5607, -7.1115, -6.8915, -5.5660, -5.4090, -8.2000, -6.2972,
        -6.2923, -7.1716, -7.2176, -4.4750, -4.5084, -4.4410, -5.6849, -7.4842,
        -6.4289, -6.3280, -4.3499, -6.4964], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0740, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.8065,  -6.2960,  -4.6710,  -4.0401,  -6.7548,  -8.0000,  -6.3107,
         -5.7966,  -6.5241, -10.8766,  -7.2941,  -8.4718,  -4.6975,  -4.7823,
         -6.8610,  -6.2856,  -6.0105,  -4.8816,  -5.6745,  -4.5369],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2786, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4995, -6.9905, -5.3619, -4.7918, -4.3131, -5.3674, -6.9417, -6.7287,
        -5.1944, -5.3289, -7.8356, -4.4385, -6.1256, -7.3696, -6.2881, -5.9029,
        -6.1144, -4.2878, -5.5488, -6.9023], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9666, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7529, -6.1595, -7.1268, -8.2500, -4.8171, -9.4843, -6.8027, -4.8584,
        -5.9072, -3.9460, -4.5489, -8.1380, -6.5737, -6.4741, -5.4358, -5.4723,
        -4.9419, -6.8849, -7.5827, -7.1388], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3148, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6636, -7.1684, -4.8130, -5.1431, -5.3568, -5.7022, -5.5963, -6.9534,
        -6.8098, -5.2808, -5.3542, -4.8582, -5.8239, -5.4877, -7.1349, -6.9415,
        -5.5359, -4.4508, -3.8971, -5.1250], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.7048, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2569, -7.0633, -5.8261, -4.6693, -4.1880, -5.3113, -8.0822, -6.9061,
        -4.9113, -4.4319, -3.4795, -4.3448, -6.9305, -6.8448, -5.5396, -4.8209,
        -5.1566, -4.9268, -6.9282, -7.0958], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.7357, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.8067, -7.0909, -5.4834, -4.6584, -4.1511, -6.7192, -7.4645, -7.2948,
        -5.3575, -4.8644, -5.6328, -5.2626, -6.5428, -7.5065, -6.6327, -6.9343,
        -4.9644, -3.6924, -7.2558, -8.1242], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1720, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7088, -6.0583, -7.9249, -6.2443, -5.8907, -5.9852, -9.4868, -6.9467,
        -7.1894, -6.9057, -5.4283, -8.1700, -6.9952, -6.0996, -5.9685, -4.2933,
        -5.1551, -8.1430, -6.3945, -7.1384], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5563, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.1834,  -7.5312,  -7.4025,  -5.0640,  -8.0458,  -7.5608,  -5.6185,
         -6.3888,  -4.4148,  -4.9842,  -8.4898,  -6.5809,  -6.2994,  -4.7673,
        -11.8429,  -6.0781,  -8.1662,  -4.4724,  -8.2816,  -7.5826],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.7878, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.2827, -10.0548,  -7.3746,  -6.7413,  -7.6583,  -4.9205,  -8.0020,
         -7.6268,  -5.9641,  -4.4184,  -4.1763,  -6.8276,  -7.8561,  -6.1759,
         -6.1338,  -8.1974,  -9.9588,  -6.7422,  -7.5411,  -6.7006],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.9177, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6145, -4.7070, -4.1500, -3.6806, -5.2495, -6.8942, -6.7402, -5.6913,
        -4.2754, -3.9934, -4.6948, -6.8293, -6.7866, -5.4465, -4.8836, -4.9742,
        -4.3511, -7.7318, -7.0152, -5.8297], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5269, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2696, -7.3420, -7.1428, -4.8994, -5.1685, -3.7029, -6.0703, -8.0579,
        -6.6341, -5.4626, -4.7322, -7.4592, -4.7404, -6.4605, -7.5980, -7.4081,
        -4.4168, -3.9126, -5.1172, -4.8736], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9234, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3772, -5.1909, -6.3778, -5.1732, -5.6107, -7.4306, -7.1950, -7.9418,
        -5.1861, -8.2030, -6.8490, -4.4760, -5.7815, -4.0241, -4.3020, -7.8236,
        -6.8865, -5.8251, -5.6757, -4.7940], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1062, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.1498,  -6.3890,  -7.6892,  -7.6626,  -9.5521,  -7.5191,  -4.3067,
         -6.1508,  -5.1950,  -4.9209,  -7.4147,  -7.1609,  -6.2588,  -5.2212,
         -4.5235,  -4.7427,  -5.5464,  -7.4412,  -7.2874,  -6.2820],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5707, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6936, -6.8675, -6.6251, -5.4124, -4.6160, -4.6365, -4.2799, -8.0406,
        -7.5569, -5.9694, -4.5799, -3.7181, -5.1334, -6.8559, -7.1328, -6.9155,
        -4.9668, -5.0088, -6.5659, -4.7744], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.7675, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1655, -4.7533, -4.7507, -5.7338, -7.2142, -6.7788, -5.4821, -5.8935,
        -6.5752, -5.7237, -7.6546, -7.0363, -5.1799, -5.4633, -3.7354, -5.5308,
        -7.4028, -6.5247, -6.4319, -5.1902], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9110, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1119, -4.8278, -5.0360, -4.6779, -6.9871, -7.7782, -6.3873, -6.6888,
        -6.9089, -9.1592, -6.3319, -6.8457, -7.2139, -7.4801, -8.1173, -5.0633,
        -8.3944, -7.5526, -5.9042, -5.6006], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7034, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8868, -3.7833, -5.6301, -7.1154, -5.8420, -6.1731, -4.1534, -4.3375,
        -6.2152, -6.3540, -6.8782, -7.1398, -4.5082, -3.5746, -4.4129, -5.2274,
        -7.5135, -7.5132, -5.8875, -4.8351], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5991, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1333, -4.5832, -4.4883, -3.8905, -5.4270, -7.2404, -6.5311, -4.6389,
        -4.1658, -3.7711, -6.7283, -7.3393, -6.5426, -5.5594, -5.2716, -7.4331,
        -5.7389, -7.4571, -7.0493, -4.6259], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.7808, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6917, -6.6592, -7.0552, -7.6946, -5.3630, -7.3350, -7.1239, -6.1486,
        -5.5633, -6.1039, -7.3043, -5.7394, -7.5207, -6.7759, -8.4259, -7.1022,
        -5.1905, -4.6837, -4.5745, -4.7861], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3921, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0857, -6.7729, -7.1610, -6.9884, -5.5194, -4.2804, -3.8723, -6.4460,
        -7.5538, -6.9122, -5.5025, -4.0647, -3.7176, -6.0542, -7.0597, -6.9864,
        -5.0625, -3.8144, -3.6342, -4.7074], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5598, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9805, -3.9086, -5.5684, -7.3994, -6.3917, -5.7597, -5.0450, -4.0722,
        -5.1411, -7.3590, -7.0750, -7.1404, -6.1060, -5.4919, -5.0292, -5.0467,
        -7.1417, -6.8600, -5.3062, -4.7990], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.7811, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0015, -7.0899, -6.8152, -5.4861, -4.6592, -5.4274, -4.7463, -7.2596,
        -7.1147, -5.8657, -4.6084, -4.1732, -4.8667, -7.8971, -6.2698, -5.9520,
        -6.3755, -4.7369, -4.6784, -6.5193], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.8271, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2196, -6.0388, -7.3941, -6.8857, -5.7927, -5.3233, -6.7686, -5.0260,
        -6.3115, -7.1249, -6.9839, -5.4882, -4.1834, -4.3317, -4.8645, -8.2202,
        -7.1490, -5.5276, -5.1132, -6.1559], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9452, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2784, -8.4784, -7.3715, -6.7657, -5.0642, -3.9227, -5.2426, -8.2713,
        -6.3273, -5.8472, -4.4427, -5.6020, -4.6474, -8.0772, -7.0163, -5.9073,
        -4.4499, -4.2241, -5.3086, -6.6227], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9934, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6670, -8.8881, -8.4281, -5.6563, -6.5907, -8.0476, -6.5164, -6.7929,
        -5.5259, -8.2856, -7.0257, -5.5391, -5.2481, -5.1453, -5.5916, -7.1213,
        -7.2826, -5.4730, -4.8405, -5.8758], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4771, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5916, -5.1141, -4.6706, -6.6281, -5.3314, -6.4773, -7.2803, -6.3892,
        -4.9193, -4.6706, -5.0336, -4.7713, -7.9424, -6.9757, -6.0525, -4.7785,
        -6.9768, -4.9817, -5.9727, -7.4431], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9500, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7515, -4.3284, -3.4521, -6.7500, -7.8459, -6.2365, -5.8651, -4.4773,
        -9.1699, -6.4591, -7.1141, -7.2320, -6.8125, -5.0388, -5.3106, -5.2472,
        -4.8317, -6.1507, -7.3053, -7.1498], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1264, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7157, -6.2821, -7.9699, -7.6505, -6.7729, -6.0494, -5.1011, -4.6567,
        -6.1692, -7.6095, -6.8643, -5.2245, -4.5118, -3.4463, -6.1222, -7.9377,
        -6.4564, -5.8468, -4.6529, -3.8543], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9447, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7357, -6.2739, -7.6642, -6.1082, -8.0271, -7.1193, -5.1646, -5.2486,
        -9.0269, -5.7917, -5.7628, -6.3528, -6.7253, -6.9906, -6.2635, -5.4536,
        -5.7806, -5.5563, -8.4362, -8.1783], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6830, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5808, -7.9610, -5.6601, -7.4102, -7.5071, -5.4445, -4.5732, -4.3837,
        -6.0791, -7.2977, -6.3717, -5.6422, -4.4969, -4.5161, -4.3238, -8.0272,
        -7.4366, -5.9837, -4.3345, -5.7434], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9387, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.5175,  -4.9744,  -8.6008,  -7.5923,  -6.6493,  -6.2709,  -5.5046,
        -13.6222,  -7.6032,  -8.2517,  -6.4484, -12.7414,  -7.1518,  -6.1718,
         -7.6983,  -6.3706,  -6.6504,  -6.6771,  -8.5084,  -6.6165],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.6311, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0346, -9.3228, -6.9817, -5.1029, -5.6939, -4.8204, -5.2284, -7.1257,
        -7.2362, -6.9138, -5.5031, -3.9420, -4.0387, -5.4656, -7.1461, -6.5543,
        -4.7985, -4.6070, -4.3255, -5.2325], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.7037, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3737, -8.3129, -6.2890, -5.4060, -5.6696, -4.1507, -5.3859, -7.1224,
        -7.6764, -6.5509, -7.0901, -5.1674, -7.9159, -4.3765, -6.7913, -6.9100,
        -6.3868, -4.8543, -4.4593, -4.4870], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0688, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.4550,  -5.8633, -19.6549,  -5.9737,  -7.6899,  -4.5624,  -8.2025,
         -7.0878,  -4.9546,  -5.0451,  -8.0123,  -4.6922,  -5.1872,  -7.0877,
         -6.9129,  -6.4558,  -4.6425,  -4.1694,  -5.1548,  -6.6577],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.6731, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6607, -4.7946, -6.0618, -5.8125, -7.1185, -6.4837, -5.7932, -3.7794,
        -3.6798, -5.6804, -7.6240, -6.8060, -4.6883, -4.3657, -5.7026, -4.9676,
        -6.0127, -7.0523, -7.1233, -7.2190], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.7713, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.9787,  -6.9774,  -5.0665,  -8.8209,  -7.5825,  -6.0063,  -5.4799,
         -5.3785,  -5.7011,  -7.8001,  -6.5651,  -4.6745,  -5.0076,  -4.7016,
         -7.7320,  -7.1743,  -6.1205,  -6.0778,  -5.7397, -10.5407],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5063, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0079, -4.5388, -4.5968, -7.9335, -7.7964, -6.1075, -6.4455, -6.3374,
        -3.8945, -4.7618, -6.6577, -7.0429, -5.1284, -4.8704, -7.6102, -3.8960,
        -5.7645, -7.0566, -6.0887, -6.0393], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.8287, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4469, -6.7080, -6.7123, -5.1951, -3.8221, -4.1124, -4.4972, -7.1375,
        -6.2834, -5.3294, -5.1894, -4.6697, -4.2855, -6.5655, -7.0779, -6.6780,
        -5.3891, -3.9106, -3.8069, -4.6961], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.4256, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7127, -6.9287, -6.0634, -3.8754, -4.3898, -5.0394, -8.0372, -6.4041,
        -5.3722, -4.2491, -6.6406, -5.5551, -5.9435, -7.6325, -6.6772, -4.4098,
        -5.3585, -3.2811, -5.7080, -7.3973], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.7838, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.6415,  -6.5667,  -6.5917,  -5.1578,  -5.4958,  -7.2468,  -4.9226,
         -5.3794,  -7.7538,  -5.9502,  -5.1662,  -5.3421, -16.5339,  -4.9590,
         -7.8817,  -4.8918,  -8.4788,  -7.3115,  -6.2953,  -5.0646],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.6316, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4664, -7.3898, -6.8722, -7.3882, -6.4769, -8.0134, -4.4209, -8.6575,
        -6.7670, -4.7631, -5.6192, -3.5328, -4.0358, -5.7238, -6.5392, -6.7864,
        -6.6789, -5.2324, -4.6097, -6.3994], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1186, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.0341,  -7.6663,  -4.9755,  -5.5864,  -3.1099,  -5.0267,  -8.0808,
         -6.3354,  -5.0408,  -4.3115, -11.6837,  -6.1234,  -7.3040,  -8.2277,
         -5.9178,  -7.3584,  -6.3839,  -5.2539,  -4.6872,  -4.5669],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3337, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3712, -5.1552, -4.1463, -5.0207, -6.1939, -6.2134, -6.5572, -5.2022,
        -6.4929, -4.5819, -3.2656, -6.4844, -7.3556, -6.7505, -5.4739, -3.7373,
        -3.9938, -4.6716, -7.7243, -6.0412], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5717, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0250, -4.5596, -5.0343, -7.3207, -6.7233, -4.9757, -6.1575, -9.9377,
        -5.8062, -8.0940, -5.1815, -8.0846, -6.9522, -6.1683, -4.4802, -3.9071,
        -6.9652, -7.8507, -6.3146, -6.2046], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2372, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.1298,  -9.0746,  -8.2801,  -6.9118,  -8.0627,  -4.3793,  -8.2707,
         -7.3204,  -5.6147,  -5.3659,  -3.0249,  -5.4984,  -8.1576,  -5.8892,
         -6.0257,  -6.9690, -10.9085,  -6.4249,  -7.5951,  -7.5088],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.0206, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0832, -4.7994, -3.5208, -7.8551, -7.6368, -6.2063, -5.7906, -6.6716,
        -9.6328, -7.1918, -6.9891, -7.4070, -5.1103, -8.2688, -7.0674, -5.7174,
        -3.8115, -3.6387, -5.8169, -7.5542], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2385, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9534, -7.4384, -4.8389, -8.0705, -6.9400, -5.4859, -4.6544, -6.4548,
        -4.0484, -6.3792, -7.3380, -6.4435, -5.4566, -5.2275, -3.7112, -4.8637,
        -7.4403, -7.0453, -6.2040, -4.1899], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9592, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0863, -7.0338, -6.6273, -5.4119, -8.1575, -6.0834, -7.3447, -5.5209,
        -7.6806, -5.1266, -8.8168, -7.1230, -5.5094, -6.2402, -3.0745, -4.8997,
        -7.6298, -6.4160, -5.5352, -6.1024], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3210, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.0886,  -5.3306, -13.7319,  -4.0117,  -7.4901,  -5.3016,  -9.1390,
         -7.0488,  -5.9870,  -6.7010,  -9.7357, -10.7182,  -6.6118,  -7.0431,
         -7.7685,  -5.2478,  -8.6507,  -7.2454,  -5.7661,  -4.6086],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.1613, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.0187,  -7.6251,  -6.0685,  -6.5299,  -4.2576,  -5.4723,  -4.7101,
         -6.9456,  -7.2721,  -7.0216,  -6.6826,  -6.5995,  -8.5758,  -4.5654,
         -7.8641,  -6.0363,  -4.9124,  -6.4130, -14.0478,  -8.2465],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.7932, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6376, -7.1314, -6.8578, -4.7618, -4.0123, -3.9121, -4.5347, -7.8155,
        -5.9337, -6.1599, -4.2513, -4.4265, -4.8835, -6.3930, -7.0911, -6.6853,
        -5.3127, -5.9608, -7.3695, -4.0601], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.6595, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.4523,  -7.5450,  -6.0762,  -5.8523,  -6.4987, -12.3847, -11.3131,
         -7.0738,  -6.7314,  -6.5249,  -5.0705,  -7.9655,  -7.0896,  -5.2262,
         -4.1415,  -4.0555,  -8.3812,  -7.9685,  -6.3048,  -6.1873],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.7922, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.5231, -17.9373,  -5.5513,  -7.3732,  -4.7080,  -6.1164,  -7.9531,
         -6.4430,  -5.5070,  -5.6545,  -5.3308,  -4.1568,  -6.3069,  -6.7096,
         -6.5337,  -5.4096,  -4.0864,  -3.9858,  -5.3195,  -7.4312],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5019, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0560, -5.0359, -5.1459, -6.8175, -5.3085, -4.8925, -6.7843, -5.9568,
        -5.1049, -5.6361, -5.4654, -5.6398, -5.9134, -7.2297, -6.7958, -5.1769,
        -4.3047, -4.0991, -4.7743, -7.0612], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.7099, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4335, -5.4571, -8.3652, -6.2093, -6.1267, -5.4142, -8.3060, -8.1718,
        -5.4309, -7.3545, -5.4024, -8.0693, -7.0226, -5.4124, -5.1196, -4.6155,
        -9.6502, -6.8175, -5.0675, -6.3609], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4404, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.2036,  -7.0657,  -6.1604,  -5.1234,  -4.9133, -17.8102,  -7.3382,
         -8.2161,  -3.8657,  -8.6633,  -7.5512,  -5.8576,  -4.8547,  -5.0203,
         -4.3553,  -6.1839,  -6.9957,  -6.4217,  -4.8524,  -3.7325],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5093, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6507, -5.9574, -3.8622, -3.6771, -6.3876, -6.9302, -6.5573, -4.5794,
        -4.0515, -4.3308, -5.4094, -8.0467, -6.2849, -5.8566, -5.9116, -7.8250,
        -5.0283, -6.0703, -7.1756, -6.7054], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.8649, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7697, -3.9001, -3.7731, -5.5958, -7.8827, -6.1854, -6.0691, -5.5647,
        -4.4098, -5.0236, -5.9688, -6.8897, -6.2706, -5.1430, -4.0683, -4.3572,
        -6.1199, -6.8719, -6.9331, -6.2772], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.6037, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.4552,  -7.9756,  -6.7052,  -4.9791,  -4.3869,  -3.4837,  -6.1665,
         -7.6400,  -6.5673,  -6.1776,  -4.5362, -18.4882,  -6.5256,  -8.0402,
         -5.8170,  -6.2341,  -7.6824,  -6.2031,  -5.3153,  -4.9576],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.6668, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.3471,  -5.9595,  -6.6918,  -6.3212,  -5.1468,  -4.6612,  -3.6020,
         -4.2741,  -7.4127,  -6.6404,  -5.1106,  -4.4717,  -5.3116,  -7.7795,
         -7.9847,  -5.9679,  -5.8590,  -7.5976, -11.6505,  -5.9221],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1356, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0974, -7.0915, -6.5513, -5.1624, -4.3587, -5.9054, -5.0924, -6.5233,
        -7.3622, -6.5020, -5.4146, -4.4052, -3.9165, -4.6141, -6.9588, -7.1204,
        -6.4150, -4.7089, -4.0362, -3.8060], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.6021, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.0826,  -5.0230,  -3.3593,  -5.1934,  -7.1914,  -6.3275,  -4.9610,
         -4.8487,  -3.3964,  -5.7935,  -7.9069,  -5.8060,  -6.2064,  -6.8705,
        -11.9182,  -6.4782,  -6.9324,  -7.3610,  -6.4398,  -6.8723],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1984, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0054, -5.1230, -8.0340, -7.2848, -4.9540, -4.7548, -3.9029, -5.8482,
        -7.6745, -6.5024, -4.9991, -4.7592, -4.3363, -5.0637, -7.5605, -5.9642,
        -4.8756, -4.5045, -4.2863, -3.9930], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5713, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.5563,  -6.4011,  -6.8414,  -6.2312,  -5.0923,  -4.7945,  -3.3905,
         -6.4663,  -7.6156,  -6.9131,  -6.6038,  -3.9209,  -5.3541,  -4.7876,
         -6.5050,  -7.1509,  -5.9387,  -5.9035,  -4.8857, -12.8431],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1098, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.9293,  -5.3190,  -3.6895,  -3.1329,  -4.6164,  -7.0138,  -6.0512,
         -5.9974,  -4.4548,  -5.1963,  -3.7029,  -7.9745,  -6.1619,  -5.4970,
         -6.2142, -19.4516,  -6.1764,  -8.2189,  -4.0082,  -7.4921],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3649, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8988, -5.2942, -6.5308, -7.5923, -5.1976, -5.4957, -6.4862, -6.9925,
        -6.8693, -5.8408, -4.1379, -3.9937, -4.3190, -7.6565, -6.9987, -5.9964,
        -4.2531, -3.6985, -3.7346, -7.7565], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.7871, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6486, -5.1178, -7.7632, -5.8375, -5.7195, -6.0411, -8.0350, -6.2287,
        -8.5229, -5.1570, -8.5530, -7.2273, -4.9659, -6.0360, -5.1732, -8.4888,
        -6.9866, -8.1806, -4.0164, -9.1871], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.5443, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1702, -5.1020, -5.1645, -5.0627, -5.3680, -7.1707, -6.7632, -6.2697,
        -4.1402, -5.0814, -5.6843, -7.4964, -6.6578, -6.0862, -4.7313, -4.4663,
        -5.8041, -7.5914, -6.4562, -5.9919], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.8629, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5018, -4.9279, -9.8149, -7.4393, -7.9634, -7.3793, -4.5614, -7.3934,
        -7.0667, -4.5140, -4.7575, -3.4288, -7.7906, -7.8563, -5.8283, -6.5580,
        -6.4844, -9.5578, -6.6816, -6.6200], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.6063, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3745, -7.4415, -8.0419, -6.6214, -5.3756, -3.9758, -4.9586, -5.2181,
        -6.3274, -7.3077, -6.7715, -4.8259, -4.4801, -5.0813, -4.2391, -5.5587,
        -7.0542, -6.6819, -5.6544, -4.8274], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.7409, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8149, -6.0404, -7.0622, -7.6617, -7.3860, -6.2638, -4.1157, -3.9267,
        -6.0260, -8.3855, -5.5989, -7.4624, -7.5275, -7.3203, -6.2904, -5.4275,
        -6.4128, -6.3923, -6.8686, -6.2136], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4099, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3971, -7.5882, -6.1421, -6.0999, -5.3099, -9.9880, -6.9339, -6.9892,
        -7.8987, -5.4278, -7.5298, -6.7393, -6.2259, -4.5460, -6.6938, -4.6919,
        -6.0171, -7.1569, -6.8615, -5.7413], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4489, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5672, -6.3288, -9.5070, -7.4436, -6.6537, -7.8366, -4.7351, -8.1138,
        -6.6972, -6.3076, -4.4468, -3.6167, -5.0944, -7.1659, -6.3312, -5.1471,
        -3.7911, -3.2444, -3.9168, -7.1491], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9547, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.9949, -4.9401, -8.3885, -7.6699, -6.1296, -5.2289, -3.8388, -3.8552,
        -7.2549, -7.0467, -6.4982, -4.8690, -3.8076, -5.3576, -3.9021, -7.7126,
        -7.1117, -5.1406, -6.9075, -7.7889], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0722, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4697, -5.7471, -7.8579, -6.8062, -4.3359, -3.8226, -3.3919, -5.3179,
        -7.3951, -6.8400, -5.4440, -3.8094, -3.4206, -5.5035, -7.8328, -6.3806,
        -6.5336, -4.6108, -3.7837, -4.7853], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.3544, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.5812,  -4.6555,  -6.6639,  -7.9688,  -7.3357,  -8.1707,  -3.5765,
         -9.3574,  -7.4647,  -6.5622,  -5.7760,  -4.3274,  -6.8496,  -7.3159,
         -6.4192,  -7.3582,  -5.8590, -18.4690,  -6.4989,  -7.3585],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.2284, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4942, -4.5141, -4.3926, -3.5706, -7.8749, -7.0269, -5.0416, -4.2915,
        -5.1554, -5.9223, -7.9478, -6.7152, -5.5635, -4.5282, -6.0776, -4.7759,
        -6.8216, -6.7690, -5.1292, -4.0576], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5835, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4698, -5.3911, -4.3329, -5.6605, -7.3695, -6.5899, -5.9506, -3.8116,
        -8.0021, -5.1473, -7.1749, -7.2215, -6.3994, -4.4985, -4.2439, -6.4918,
        -4.2762, -6.3506, -7.2613, -6.5114], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.8577, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6754, -5.4705, -7.8645, -6.1903, -6.1411, -4.5240, -5.7218, -4.2851,
        -6.4247, -7.0009, -6.6572, -4.9955, -4.3363, -3.9579, -4.2773, -8.1569,
        -6.6618, -5.4080, -4.4716, -4.1653], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5193, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2712, -4.6155, -7.7141, -6.8721, -5.0261, -4.5845, -3.8091, -5.2772,
        -6.9831, -6.7352, -6.8452, -4.7916, -3.9249, -4.4037, -3.7867, -7.4941,
        -6.6487, -6.0699, -4.1053, -4.0590], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.3509, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4263, -4.8099, -3.8953, -8.0290, -6.5204, -6.5469, -6.0096, -8.8244,
        -8.0915, -6.2639, -7.3361, -4.5171, -7.4871, -6.8905, -5.2502, -5.3886,
        -3.8300, -6.3916, -8.3929, -6.0872], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2494, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.5939,  -5.5895,  -6.6754,  -5.4935, -14.9897,  -7.1100,  -6.8723,
         -7.8404,  -4.7843,  -8.3921,  -6.3799,  -5.1776,  -5.6923,  -3.5562,
         -3.9874,  -8.0849,  -6.7262,  -5.1625,  -6.3831,  -6.5881],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.6540, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3302, -4.7261, -5.6950, -7.1601, -6.6076, -5.5416, -6.7116, -5.9055,
        -5.6796, -5.5684, -7.1141, -6.6894, -5.2125, -4.4101, -3.9548, -7.5558,
        -8.0046, -6.1421, -6.2018, -4.4031], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9307, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.8480,  -6.1003,  -4.9396,  -4.8511,  -7.8292,  -7.6161,  -7.4748,
         -7.7286,  -4.5871,  -8.6013,  -6.8687,  -5.0546,  -5.2392,  -4.1147,
         -8.0224,  -7.8249,  -5.7394,  -6.1785,  -4.6874, -11.6702],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.6488, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5859, -5.0256, -3.8736, -3.8662, -6.0754, -7.2027, -7.0088, -4.9450,
        -3.7174, -4.9345, -4.0238, -7.3344, -6.1574, -5.3471, -5.1176, -4.7300,
        -4.6187, -6.3017, -7.6551, -6.5405], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5031, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4228, -6.5962, -6.0500, -3.3263, -3.7080, -4.7941, -7.4728, -7.0060,
        -4.7001, -4.3970, -5.2744, -5.6106, -7.5222, -6.0592, -4.8168, -5.2288,
        -2.8434, -7.6661, -7.9673, -6.1909], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.6827, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8029, -5.0330, -4.3516, -4.5588, -9.7425, -6.9681, -5.2730, -5.2652,
        -9.9006, -6.4629, -6.9474, -7.8071, -4.5124, -7.0097, -7.0627, -4.5168,
        -5.8540, -3.8511, -5.5363, -8.0850], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2771, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0620, -4.7647, -7.8912, -7.6005, -5.4766, -5.6680, -5.5775, -4.1106,
        -7.1684, -7.7174, -6.0108, -6.0851, -4.1153, -5.9712, -4.2164, -6.6230,
        -7.2185, -6.7856, -4.7638, -3.9844], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9406, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1276,  -4.3530,  -7.1843,  -7.3561,  -6.8263,  -5.2669,  -4.2750,
         -3.7882,  -6.4791,  -7.7079,  -6.0920,  -4.9352,  -5.6386, -11.1752,
         -5.4302,  -7.2824,  -5.9247,  -7.8806,  -5.8209,  -8.0651],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2305, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.5735,  -5.8296,  -5.8381,  -7.6043, -13.5869,  -5.6090,  -7.1869,
         -4.9197,  -7.7569,  -6.7004,  -5.7287,  -5.2623,  -4.9591,  -5.2008,
         -5.6700,  -7.0567,  -6.8607,  -5.4615,  -4.4483,  -5.0302],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4142, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9501, -7.9391, -6.7255, -5.6839, -3.7557, -5.5067, -4.7753, -7.8869,
        -6.7239, -5.2518, -4.1045, -5.1056, -6.0366, -7.3377, -6.8060, -5.8042,
        -4.9541, -4.0879, -4.7733, -6.4011], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.7305, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5192, -4.8061, -6.7683, -3.5714, -3.8476, -5.9547, -7.6229, -6.0366,
        -5.0627, -6.0995, -3.4864, -5.2618, -7.2218, -7.5798, -6.9021, -7.4061,
        -5.0373, -4.7237, -6.1666, -7.6952], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9385, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2615, -7.3404, -4.8318, -5.9015, -7.4958, -6.9987, -6.6745, -4.2442,
        -4.0903, -4.8601, -8.0760, -6.1871, -5.8822, -6.7275, -8.9555, -6.2620,
        -6.7087, -7.6702, -4.5237, -8.5549], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4123, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3000, -4.7283, -4.7061, -6.0424, -6.8314, -7.0537, -4.5359, -3.7866,
        -3.6025, -4.8558, -7.8847, -6.3333, -5.0486, -6.3569, -4.1500, -4.8758,
        -6.4283, -7.4722, -6.7924, -6.0240], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.6904, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4789, -5.8071, -6.8001, -6.4944, -4.4198, -3.9355, -6.3398, -5.2159,
        -5.8373, -7.5766, -6.4257, -5.6478, -5.0995, -5.3079, -5.1787, -6.5654,
        -6.9003, -6.8019, -4.7215, -4.2724], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.6913, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4783, -5.3022, -4.3614, -4.9083, -4.9041, -6.2231, -7.5373, -7.0358,
        -6.1852, -6.1907, -9.1519, -6.2042, -5.7626, -7.2493, -6.7242, -4.2386,
        -4.4861, -3.8838, -4.4959, -7.2635], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9293, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2107, -7.0399, -7.2117, -5.0507, -4.3012, -3.1942, -5.7567, -7.1023,
        -6.5356, -7.4914, -4.8362, -3.1700, -5.6412, -7.7194, -7.0155, -5.3370,
        -4.0512, -5.5454, -5.2997, -7.8219], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.8166, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.9785,  -5.6302,  -7.0822,  -7.0008,  -5.1059,  -5.8670,  -8.0607,
         -7.4041,  -5.2478,  -4.6405, -11.9794,  -7.0069,  -8.7841,  -4.3235,
         -9.5953,  -7.2255,  -5.5020,  -6.4043,  -4.7137,  -6.2887],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.7421, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6775, -4.3826, -4.1677, -5.1089, -7.9870, -6.3914, -4.9395, -5.9257,
        -8.3114, -4.6143, -5.9977, -7.4570, -6.5663, -4.9113, -4.6571, -5.1570,
        -4.2627, -5.8778, -6.9117, -7.2769], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.7791, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6030, -7.7580, -6.8910, -5.5289, -5.2288, -3.4160, -4.8203, -7.5659,
        -6.3439, -6.1066, -5.7033, -4.7078, -6.7289, -7.3176, -6.5384, -5.7260,
        -5.9157, -7.0982, -6.2349, -6.6722], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0453, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.2420,  -6.6443,  -5.3429,  -5.0327,  -3.9824,  -6.5022,  -7.9671,
         -5.6489,  -5.8498,  -6.3814, -13.4681,  -7.3682,  -6.1887,  -7.5462,
         -4.7622,  -8.3535,  -7.6149,  -5.6265,  -4.3605,  -4.7006],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5792, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8528, -5.6233, -4.9515, -3.6319, -5.9014, -7.3315, -6.2956, -5.6221,
        -3.9736, -3.9260, -4.9613, -6.1546, -6.9805, -6.8518, -5.7830, -3.6874,
        -3.1578, -4.6413, -7.0739, -6.9734], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5187, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.4816,  -7.5544,  -7.8462,  -6.2501,  -6.0264,  -4.7246, -14.7693,
         -7.3877,  -7.7881,  -6.8033,  -5.5027,  -8.2750,  -6.9408,  -4.6868,
         -5.2093,  -4.3921,  -8.2982,  -8.0450,  -6.1570,  -5.4291],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.8284, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4990, -7.1308, -6.7121, -5.3022, -5.3564, -6.8558, -5.4752, -5.5418,
        -7.1608, -6.4609, -5.4982, -4.2441, -5.5830, -5.3854, -7.7131, -6.6819,
        -4.7906, -4.1567, -4.8932, -4.4281], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.7435, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4636, -5.2427, -7.1952, -6.4412, -4.9265, -4.8163, -3.7516, -4.6712,
        -7.6607, -7.3077, -4.4842, -4.4099, -3.6424, -5.9932, -7.5755, -6.5610,
        -5.3336, -4.6984, -4.0731, -5.6479], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.4448, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.0326,  -3.9656,  -3.8632,  -5.5296,  -7.9511,  -5.8643,  -5.2087,
         -8.6776, -11.5252,  -6.1867,  -8.2905,  -3.6526,  -6.0299,  -7.4974,
         -5.6979,  -6.2148,  -4.2725,  -3.2784,  -6.1269,  -7.6021],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1234, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9675, -6.8982, -4.1739, -4.2604, -3.4639, -5.8614, -7.8872, -6.0702,
        -5.2446, -4.4877, -4.9890, -7.1456, -7.6601, -6.4573, -4.1837, -3.9192,
        -3.4914, -6.0493, -7.6579, -5.6846], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.6277, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5941, -7.7582, -6.6664, -6.3324, -4.0288, -3.3793, -5.4605, -7.9948,
        -6.0292, -6.3568, -6.8717, -3.6284, -5.5760, -7.0541, -6.5464, -4.2862,
        -4.4591, -5.8561, -4.1902, -6.7185], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.7894, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.5988,  -4.3044,  -7.4152,  -6.6528,  -5.2466,  -4.3251,  -4.0799,
         -4.5512,  -7.8549,  -6.4762,  -5.3161,  -5.6629, -10.4365,  -6.5846,
         -7.9913,  -4.7639,  -5.3544,  -7.4391,  -6.1445,  -6.3160],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0257, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3577, -5.4187, -7.5963, -6.2979, -5.7070, -4.9250, -5.0520, -4.7250,
        -6.3944, -6.9163, -6.1065, -5.0536, -4.0768, -3.5962, -6.4099, -7.4455,
        -6.4874, -4.6418, -4.1389, -3.2117], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.4279, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7081, -6.5094, -7.4578, -7.1397, -4.9702, -3.9995, -5.2197, -5.2478,
        -6.6740, -6.6854, -4.3270, -4.2328, -4.1962, -4.6736, -7.8037, -7.4126,
        -4.2513, -4.8418, -5.1541, -4.4736], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.4989, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5748, -5.6913, -5.8330, -6.6734, -7.4392, -6.9822, -6.1334, -5.0567,
        -3.3802, -4.0999, -6.9812, -6.6998, -6.2121, -4.7131, -6.1859, -4.7697,
        -7.0124, -7.0051, -5.7704, -4.5401], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.8377, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.5952,  -6.3780,  -6.4679,  -6.0229,  -5.2516,  -4.0362,  -5.3725,
         -5.9357,  -7.0190,  -6.4029,  -4.5692,  -4.5674,  -4.2382,  -3.7450,
         -8.0356,  -6.8380,  -5.4025,  -4.0514, -11.4798,  -7.6386],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9024, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1435, -7.6346, -6.9932, -7.5466, -4.4318, -6.3307, -6.4142, -7.5445,
        -6.6416, -5.5434, -6.1067, -6.0904, -4.2935, -6.5811, -6.9416, -6.5542,
        -4.7408, -4.3318, -3.6868, -3.9165], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9734, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9727, -5.4621, -4.8774, -6.9143, -7.5254, -5.4258, -7.1011, -5.3987,
        -8.4504, -7.5583, -5.4345, -5.3474, -4.7252, -5.9638, -7.6007, -6.3441,
        -5.5005, -5.2881, -5.0579, -5.9579], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9953, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5173, -4.0121, -3.3462, -6.9389, -7.7464, -6.6499, -6.0269, -4.6686,
        -3.5556, -6.2904, -7.7221, -6.8213, -4.9566, -3.8645, -4.9314, -5.1614,
        -7.7520, -6.3746, -5.8602, -4.7157], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.6956, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.6425,  -4.1436,  -8.1509,  -7.3258,  -6.4929,  -4.8410,  -3.4063,
         -7.8090,  -7.8420,  -5.7023,  -5.9763,  -5.3835, -13.1189,  -7.4501,
         -6.4956,  -7.4675,  -3.6283,  -7.2146,  -7.2395,  -5.7462],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.7039, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5468, -6.4108, -6.3997, -6.8818, -4.7438, -3.8838, -5.5314, -3.9291,
        -7.3345, -6.9321, -6.1244, -5.0773, -4.0191, -4.6989, -8.1462, -6.5970,
        -6.0916, -4.1828, -3.9341, -5.0167], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5241, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5845, -4.0387, -3.2863, -4.9061, -7.6014, -6.3218, -5.8400, -4.9150,
        -3.9103, -4.4439, -6.6147, -7.5159, -6.2222, -5.3474, -5.1749, -4.8117,
        -5.4269, -5.6132, -7.1861, -6.6469], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5204, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5835, -4.9950, -6.2082, -3.9186, -5.2299, -7.3031, -6.3504, -5.5185,
        -4.2360, -5.4109, -3.7304, -6.4720, -6.8796, -6.7099, -5.4340, -4.1118,
        -3.3198, -6.5539, -7.4121, -6.7070], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.6542, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.8607,  -4.7459, -14.6912,  -7.6974,  -6.8941,  -7.5435,  -3.2372,
        -10.1161,  -7.2895,  -5.2419,  -5.8926,  -3.7674,  -4.7419,  -7.1368,
         -7.1649,  -6.0448,  -5.6558,  -4.8690,  -3.5906,  -5.4953],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4338, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3674, -4.8451, -4.4052, -7.9466, -7.3808, -6.3168, -4.7505, -3.6906,
        -4.1977, -7.8902, -6.3097, -6.2402, -6.7699, -9.6616, -7.2994, -8.4787,
        -4.5798, -8.7643, -7.3872, -5.5451], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3413, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4598, -7.0550, -4.6131, -8.6717, -7.2660, -4.7821, -4.8547, -4.6836,
        -6.7197, -7.7895, -6.1237, -5.3403, -8.1994, -4.2950, -5.1237, -6.0505,
        -7.5593, -6.6533, -6.8677, -6.0506], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2579, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6919, -4.6666, -3.9242, -4.5131, -4.8725, -6.7196, -7.0500, -6.6758,
        -6.8926, -3.3250, -6.6432, -9.1076, -5.9744, -4.5135, -6.9246, -6.3773,
        -7.0774, -6.2345, -7.0782, -4.7487], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0005, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4928, -6.4219, -7.0551, -6.4972, -5.6843, -6.4992, -5.1307, -5.4345,
        -5.8233, -6.9774, -6.8437, -5.7858, -4.6729, -9.7835, -5.9042, -6.7691,
        -7.0266, -6.3061, -5.2095, -5.1440], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1731, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4405, -5.8906, -5.7043, -6.0597, -7.6246, -6.5335, -5.1321, -5.5385,
        -3.4672, -4.8041, -7.1011, -7.0453, -6.9453, -4.5752, -4.2556, -2.8082,
        -5.2514, -7.0212, -6.5376, -4.4539], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5595, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.2100,  -5.6532,  -4.7340,  -7.0096,  -7.5940,  -6.1261,  -6.2022,
         -4.5176, -15.5044,  -7.6397,  -7.7243,  -7.4990,  -4.6621,  -8.3481,
         -7.0257,  -7.4485,  -4.0283,  -3.7440,  -6.3318,  -8.4618],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.7732, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-14.4564,  -5.7200,  -7.6440,  -4.9347,  -8.1074,  -6.6088,  -6.5199,
         -4.4713,  -4.5203,  -4.4828,  -6.6552,  -7.5922,  -6.9538,  -4.7128,
         -4.4320,  -2.8613,  -5.6382,  -7.0623,  -6.2003,  -4.8017],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2188, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6723, -6.8747, -7.6585, -7.0302, -4.6521, -5.0347, -4.0479, -4.6926,
        -7.3089, -5.9045, -6.0272, -3.7923, -3.4795, -6.3340, -6.8633, -6.5354,
        -5.8622, -4.1983, -4.7779, -4.6160], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5181, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4765, -3.9415, -3.8519, -4.5882, -7.4590, -6.5635, -4.6670, -4.5034,
        -4.7441, -4.4752, -6.0295, -7.1005, -5.2705, -5.9437, -3.7171, -4.3101,
        -5.2541, -7.4627, -7.2878, -5.7862], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.3716, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2918, -5.9626, -5.1855, -4.1215, -3.6066, -5.3048, -7.9668, -6.0747,
        -5.6526, -3.7757, -7.1986, -5.5095, -6.3528, -7.1125, -6.8231, -4.3734,
        -4.5490, -6.1031, -5.4609, -5.4956], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.6961, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9512, -4.9418, -4.0091, -3.4665, -4.7976, -8.0497, -6.5709, -5.3077,
        -4.2156, -4.5701, -5.5279, -6.5346, -6.8230, -6.6949, -4.3778, -4.4796,
        -3.1904, -3.5897, -7.3501, -6.5560], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.4002, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0816,  -5.1192,  -6.9306,  -6.5574,  -4.5070,  -3.9219,  -4.7333,
         -9.3350,  -7.3571,  -5.6108,  -5.8345, -15.3944,  -6.3639,  -6.5435,
         -6.6935,  -7.6137,  -6.2735,  -8.0308,  -5.4692,  -8.1277],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.7249, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.9979,  -6.3277,  -4.3978,  -3.6169,  -8.0489,  -7.6126,  -6.1587,
         -6.5441,  -6.0563, -12.3766,  -6.9849,  -7.7588,  -6.6304,  -7.2502,
         -4.5175,  -9.2194,  -7.1245,  -6.3725,  -5.6197,  -3.4070],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.6511, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.2059,  -5.9476,  -3.9637,  -4.0531,  -4.9826,  -7.6361,  -6.2986,
         -4.1535,  -4.4617,  -5.5497,  -4.5692,  -7.7167,  -6.3392,  -5.2839,
         -4.6772, -13.8545,  -4.9224,  -8.2631,  -4.8290,  -8.2702],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0989, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0497, -6.0876, -7.1282, -6.5447, -5.4499, -4.1921, -3.7395, -5.0096,
        -7.3234, -6.3931, -5.3417, -4.2906, -3.5408, -4.9940, -7.8606, -6.6144,
        -5.1569, -4.3999, -3.4249, -5.0978], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.3320, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1312, -4.2390, -3.6600, -4.9940, -7.8013, -6.1125, -6.1228, -6.2668,
        -4.1487, -4.6586, -6.3107, -7.5359, -6.9337, -5.1077, -4.6302, -3.3956,
        -5.3901, -7.3038, -6.3477, -6.3922], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.6241, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0327, -6.1471, -4.7755, -6.6492, -6.8542, -6.3804, -4.9320, -4.0700,
        -3.4879, -4.5374, -7.3169, -6.9842, -5.1679, -4.7961, -6.2480, -4.5248,
        -6.1486, -7.0009, -6.5375, -5.5953], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.7093, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1579, -7.6789, -3.3593, -8.9535, -7.2439, -5.4796, -5.7922, -3.5360,
        -3.6524, -6.1516, -7.2270, -6.2675, -5.7635, -4.7110, -5.0133, -5.1028,
        -5.7673, -6.7374, -6.3123, -6.5649], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.8736, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.5549,  -4.0580,  -5.6490,  -6.9371,  -7.3558,  -7.3308,  -4.2964,
         -3.9834,  -4.0892,  -6.7811,  -7.9100,  -6.1928,  -5.3500,  -7.5393,
        -17.0318,  -5.7845,  -7.7287,  -4.1229,  -7.5997,  -7.1954],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5745, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6030, -5.0098, -4.3866, -3.6998, -5.8584, -7.8596, -6.0382, -5.7448,
        -6.3707, -3.7821, -4.0787, -7.5031, -6.3776, -4.8336, -4.5775, -4.5636,
        -5.7989, -7.7422, -6.6114, -5.2341], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.6337, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4076, -5.2744, -3.6766, -5.1162, -6.0962, -7.2111, -6.9212, -5.0197,
        -5.0951, -3.1573, -6.0387, -7.3023, -6.6648, -6.1932, -4.9028, -4.8438,
        -5.3731, -6.4695, -7.4679, -7.2181], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.8225, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6335, -6.4866, -4.8059, -4.4018, -4.2145, -7.5471, -6.4609, -6.3312,
        -6.9707, -7.9751, -5.8897, -5.2120, -7.7670, -6.2088, -8.8932, -7.9970,
        -5.8839, -5.5391, -4.9703, -4.2956], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2242, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1117, -7.6188, -5.5478, -8.1995, -7.2375, -5.4650, -5.0541, -4.8568,
        -5.1635, -5.6635, -7.3495, -6.9232, -4.4598, -4.3518, -4.3591, -3.7941,
        -7.4395, -6.6087, -5.4156, -5.7866], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.8703, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3796, -5.1863, -7.7265, -6.3502, -5.3510, -4.7281, -6.8734, -5.5915,
        -6.5013, -7.2291, -6.5073, -5.3847, -4.5283, -4.3694, -5.4109, -5.9209,
        -7.8396, -6.8937, -5.1208, -4.4517], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.7672, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2696, -6.0966, -5.2680, -6.2204, -7.0201, -6.9885, -4.3845, -4.4602,
        -4.2150, -4.8083, -7.3411, -7.2647, -6.2142, -4.9666, -4.7220, -5.8246,
        -7.9429, -6.6484, -6.2178, -6.3731], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.8623, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.4266,  -6.4760,  -7.7981,  -6.1256,  -5.5383,  -8.7348, -10.8919,
         -6.4411,  -8.4252,  -4.9939,  -8.1170,  -6.8228,  -5.1295,  -5.0698,
         -5.5433,  -4.1905,  -5.5185,  -7.6315,  -6.2649,  -5.5067],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4323, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0553, -8.1432, -6.4343, -5.3548, -6.3521, -8.6607, -6.1902, -8.7686,
        -4.3384, -6.0583, -7.1874, -5.0037, -5.4345, -6.3302, -4.7180, -5.3040,
        -7.6584, -7.3418, -8.1345, -4.9521], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4210, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8570, -6.3022, -6.8898, -6.8776, -4.9527, -4.1961, -3.0122, -4.7418,
        -7.5941, -6.3858, -5.7180, -4.1131, -3.8221, -5.0944, -7.7067, -7.3950,
        -6.8440, -4.9279, -3.7344, -3.6634], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.4414, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8584, -7.8789, -6.4440, -5.1963, -5.0869, -6.1102, -8.2874, -8.0503,
        -7.5912, -4.3761, -8.6821, -6.4099, -5.1847, -4.7575, -4.4841, -4.3024,
        -8.2507, -6.0013, -5.0111, -6.8441], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2404, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2499, -9.0407, -7.2669, -6.8414, -5.6770, -7.2086, -9.3084, -7.2523,
        -5.4648, -4.9958, -8.1119, -7.0654, -6.2646, -7.9817, -4.7461, -8.4120,
        -7.4041, -5.9626, -5.5873, -3.2533], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7047, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.5023,  -6.3940,  -5.0621,  -4.6990,  -6.1740,  -8.3406,  -6.7626,
         -5.5786,  -4.3244, -15.3447,  -7.5857,  -7.4494,  -7.6542,  -3.2277,
         -7.7164,  -7.3161,  -5.8132,  -4.8386,  -3.7366,  -6.0188],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5769, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.3344,  -7.9892,  -3.8794,  -9.5258,  -6.4441,  -5.4150,  -5.2857,
         -5.2152,  -4.1476,  -6.5435,  -7.7816,  -6.5212,  -5.1467,  -4.3703,
        -18.5636,  -5.4573,  -7.9990,  -4.9156,  -8.7126,  -7.1789],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.8713, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7679, -8.7636, -7.6772, -6.5690, -7.6263, -5.7333, -8.0247, -7.0793,
        -4.8833, -4.9772, -5.4859, -5.4221, -7.4995, -6.5601, -5.3889, -5.8335,
        -4.9634, -5.5871, -6.4768, -6.9365], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3128, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6321, -7.5677, -6.7070, -5.6413, -7.0547, -6.2747, -4.5568, -6.2595,
        -7.0638, -5.9012, -4.8454, -4.4910, -6.0537, -4.1694, -6.4867, -7.6560,
        -5.8104, -6.0526, -6.6822, -5.7988], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0353, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4751, -5.3909, -6.3071, -6.9822, -7.1796, -4.3850, -4.4646, -6.3325,
        -4.8753, -6.7477, -7.2104, -6.6985, -6.1153, -4.1257, -3.4425, -6.6951,
        -7.9701, -6.2743, -5.9935, -3.6891], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.7677, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8976, -7.1895, -6.7744, -5.4404, -5.4787, -4.5412, -4.1511, -7.4047,
        -6.5868, -5.0209, -6.8610, -7.4742, -4.4254, -4.8729, -6.9662, -6.3849,
        -4.1632, -4.3734, -3.9582, -3.8052], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5885, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1871, -6.2948, -4.6511, -4.1769, -4.9435, -8.1169, -6.5073, -5.0417,
        -4.7197, -6.5037, -7.0087, -7.6745, -6.3288, -5.6969, -8.1452, -5.7662,
        -4.5692, -6.2775, -7.0921, -7.5014], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1602, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-14.9654,  -6.5326,  -8.3492,  -5.8328,  -7.8965,  -4.3867,  -8.0620,
         -6.9838,  -6.2092,  -4.4883,  -4.1548,  -5.3410,  -7.9185,  -6.5461,
         -5.5329,  -4.5543,  -4.4541,  -6.0637,  -8.0402,  -6.8637],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.6588, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4508, -3.9774, -9.2473, -7.9895, -5.6746, -6.1859, -4.1831, -4.4322,
        -4.8468, -7.6388, -6.9309, -5.6300, -4.3045, -5.4305, -5.4433, -7.8788,
        -6.4105, -5.3629, -4.6168, -4.3235], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.8479, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6904, -3.9789, -4.9719, -7.8240, -6.4232, -4.3656, -4.0789, -6.5656,
        -4.1361, -6.4679, -6.8855, -6.2336, -5.0765, -5.5627, -5.4856, -4.5266,
        -6.4449, -6.9983, -6.8027, -4.9462], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5733, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.5057,  -5.9898,  -4.4564,  -4.5585,  -2.9277,  -6.2350,  -7.6043,
         -5.9313,  -5.5296,  -3.7853, -12.7324,  -7.8563,  -7.5031,  -7.0435,
         -4.5147,  -8.1080,  -6.7740,  -5.6029,  -3.8700,  -4.1177],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1323, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1246, -5.0215, -4.4056, -8.0462, -4.6407, -5.9746, -6.7875, -6.3682,
        -6.2581, -4.3009, -6.1707, -7.1274, -8.0263, -5.5217, -4.6710, -7.7843,
        -7.3659, -6.6285, -4.8366, -7.5677], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1814, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3658, -6.2260, -5.1205, -6.0022, -4.9377, -5.2728, -7.8887, -5.9168,
        -4.0077, -5.5468, -3.2945, -4.9345, -7.2055, -6.6770, -5.3098, -3.5309,
        -3.5388, -4.0378, -6.8237, -7.3665], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5502, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9322, -4.5766, -3.5751, -5.3701, -6.5485, -6.9333, -6.5080, -4.7125,
        -4.6532, -3.9058, -4.3587, -7.3072, -7.0256, -4.1283, -4.5747, -3.5097,
        -5.8377, -7.9532, -6.0243, -6.1469], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.4291, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1280, -6.6253, -3.6677, -3.8342, -4.6972, -5.7063, -7.7125, -5.8666,
        -4.8967, -3.7573, -3.6921, -4.5803, -7.1566, -7.5311, -6.3863, -4.1748,
        -4.4642, -6.2718, -5.6091, -6.6016], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5180, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.3364,  -5.3436,  -7.2914,  -5.9782,  -5.6724,  -7.1240, -10.2891,
         -6.8032,  -8.5151,  -4.3203,  -8.4771,  -7.2990,  -6.7628,  -5.6324,
         -3.6502,  -4.0461,  -8.9149,  -6.6531,  -5.3765,  -4.0207],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3753, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9843, -8.2234, -3.7434, -6.9637, -7.3582, -6.0652, -5.0151, -4.8059,
        -6.2446, -4.1573, -5.6417, -7.0030, -6.1263, -5.3553, -3.5248, -2.8586,
        -5.0918, -6.6310, -6.2405, -4.8063], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.4920, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5527, -5.4360, -7.2921, -6.9057, -5.1223, -3.3376, -4.7129, -4.3167,
        -6.5280, -6.4395, -4.8564, -4.4440, -3.5630, -4.6497, -7.1161, -6.9175,
        -6.6150, -4.0778, -4.0243, -2.9294], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1919, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5257, -3.0575, -4.3877, -7.0937, -6.4535, -4.7859, -5.0631, -5.6178,
        -3.8002, -5.9927, -6.8465, -5.7661, -5.2288, -4.3007, -3.7010, -5.4530,
        -6.2082, -6.8668, -6.4820, -4.3606], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2496, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6379, -4.7129, -3.4714, -4.4226, -7.4881, -6.1308, -3.8331, -4.4265,
        -2.9140, -4.8596, -7.2406, -6.1661, -4.9787, -4.6302, -5.4161, -4.4940,
        -6.4149, -7.0332, -6.0376, -4.9408], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2625, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0359, -3.2789, -5.9581, -7.3690, -5.5848, -5.5689, -3.3432, -5.4345,
        -5.4650, -7.2340, -6.6676, -4.5065, -4.1197, -2.9749, -6.0252, -7.8787,
        -6.0755, -5.3945, -5.3453, -6.9180], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.4589, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1837, -3.0558, -4.5038, -6.8684, -7.3321, -7.0224, -4.1846, -3.9922,
        -4.1864, -4.3438, -6.6475, -6.2789, -5.4081, -5.5193, -4.5660, -4.8547,
        -6.7471, -7.0661, -6.5615, -4.5537], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.3938, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0998, -6.1992, -8.3177, -8.2882, -7.1887, -7.7023, -7.8104, -4.1782,
        -8.9707, -6.6850, -4.9307, -4.9439, -3.0093, -6.2927, -7.9349, -5.6390,
        -6.4301, -5.2650, -6.7023, -5.3497], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3969, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2355, -6.2396, -6.8179, -6.5656, -4.3095, -4.6005, -2.7713, -3.5373,
        -6.4102, -6.2761, -6.0658, -4.2368, -3.5435, -4.4517, -4.2981, -7.4622,
        -6.7184, -4.4444, -4.3290, -7.0182], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2166, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0886,  -5.5330,  -7.6281,  -5.8143,  -6.5930,  -5.0252,  -2.9516,
         -6.9152,  -7.6851,  -8.7798, -12.3387,  -7.9929,  -8.9222,  -8.3219,
         -7.9523,  -8.5179,  -8.0281,  -7.9036,  -8.2861,  -8.3354],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.3807, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.3801,  -6.7370,  -4.8986,  -7.3601,  -5.7532,  -6.4170,  -7.7106,
        -11.8411,  -7.1224,  -6.5188,  -7.1058,  -4.6372,  -8.8958,  -6.5297,
         -5.3907,  -4.2874,  -3.1008,  -5.0232,  -7.5185,  -6.2948],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3761, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9812, -7.6464, -6.0974, -5.1863, -7.4101, -4.8221, -4.8313, -5.7332,
        -6.9628, -6.5173, -5.0263, -3.6734, -6.7993, -4.1874, -6.8871, -6.9423,
        -6.0950, -4.4015, -3.9524, -3.3475], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5750, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7452, -5.5577, -4.1005, -7.3844, -4.1287, -5.9815, -7.7029, -6.7210,
        -4.5857, -7.3983, -5.9085, -4.6401, -5.8571, -7.1143, -6.5238, -4.4919,
        -3.5612, -3.1097, -4.7962, -7.7107], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.7010, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3266, -8.1550, -6.3523, -6.5535, -4.2027, -3.5095, -4.1012, -7.7332,
        -6.3329, -6.0333, -4.8329, -5.8951, -5.1400, -5.6590, -7.2712, -6.8767,
        -3.2632, -4.2801, -6.4623, -9.6335], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.8307, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3434, -8.1468, -4.1128, -5.6923, -6.5891, -7.7131, -6.9694, -6.4745,
        -5.8442, -3.5398, -7.0045, -6.6608, -8.6016, -4.2559, -8.8840, -6.1713,
        -6.2695, -5.6300, -2.8924, -3.6560], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0226, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1220, -5.6213, -3.6748, -5.3932, -5.8769, -6.5028, -7.3082, -6.5146,
        -4.3160, -4.0455, -3.7122, -4.3691, -7.6328, -7.1903, -5.2686, -4.7139,
        -7.0487, -4.0772, -5.4885, -6.7376], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5807, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4312, -6.6787, -4.9625, -3.9454, -3.8073, -4.4721, -7.1056, -7.5783,
        -6.7188, -5.0240, -3.5851, -3.1138, -5.7005, -6.8421, -6.4099, -5.4566,
        -3.9577, -3.6170, -6.4406, -7.4927], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5170, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7045, -6.4339, -7.2238, -6.3555, -5.1396, -4.3278, -6.7397, -3.8195,
        -6.2386, -7.4285, -6.7291, -4.7453, -4.1399, -2.7079, -5.3423, -7.8373,
        -6.0655, -5.7988, -5.5673, -5.9702], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.6157, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8416, -4.8480, -4.5054, -3.0859, -4.9071, -8.0164, -5.8187, -5.7347,
        -4.4839, -6.0154, -2.9664, -6.2469, -6.7853, -6.1614, -5.3014, -4.6124,
        -2.9063, -4.6552, -7.9522, -6.8259], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.3835, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7382, -4.5542, -7.2179, -6.5531, -4.6788, -4.1122, -3.6127, -5.7264,
        -7.8528, -5.7679, -6.2738, -3.9870, -2.9197, -4.2932, -7.9394, -6.4974,
        -4.6362, -4.2036, -4.5264, -6.2685], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.3180, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5168, -3.0654, -4.4248, -6.6303, -7.0895, -6.7810, -3.5469, -4.1617,
        -3.0781, -4.1866, -7.9816, -6.5526, -4.7379, -4.0808, -4.8671, -4.8732,
        -7.7147, -6.1202, -5.5787, -7.2788], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.3633, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3046, -3.7569, -3.3415, -6.4299, -7.6330, -6.0472, -6.0220, -3.6450,
        -5.0262, -5.8970, -7.1764, -6.0653, -6.7917, -4.2632, -5.1693, -4.4891,
        -6.8097, -6.8654, -6.1486, -5.0921], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5487, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.2444,  -7.7660,  -4.5798,  -8.3445,  -6.4079,  -5.6205,  -4.9883,
         -3.2036,  -5.5390,  -7.9909,  -6.1064,  -5.2564,  -7.7796, -20.3584,
        -12.0061,  -7.5454,  -6.3859,  -5.3127,  -7.4331,  -6.2040],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.2036, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.9365,  -2.7854,  -5.3031,  -7.5170,  -6.1110,  -4.4675,  -3.6583,
        -11.1518,  -7.7988,  -7.6250,  -7.9883,  -4.8877,  -8.3195,  -7.2604,
         -5.3138,  -5.0934,  -6.8145,  -4.6574,  -6.3465,  -7.1788],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2107, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9142, -5.9833, -7.0464, -5.8520, -4.4922, -3.7527, -2.6975, -4.1977,
        -7.8230, -6.4738, -4.9874, -4.5006, -6.3317, -6.8821, -7.8846, -6.4037,
        -5.8723, -4.2711, -4.6422, -6.5887], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.4799, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.3206,  -6.2645,  -5.1626,  -4.5036,  -6.5816,  -7.3806,  -6.4048,
         -5.0177,  -4.2409,  -4.8162,  -6.4171,  -7.9964,  -5.8896,  -5.1327,
         -5.6867, -15.6663,  -9.9946,  -6.7344,  -7.0644,  -4.6049],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5940, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5016, -5.5816, -7.5965, -6.4336, -4.5118, -5.3860, -7.3315, -4.9880,
        -5.9850, -6.8807, -6.4909, -4.7892, -4.2560, -4.9703, -6.6935, -7.5817,
        -6.3391, -5.3994, -4.2910, -5.1876], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.7597, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3806, -5.2661, -5.4899, -6.3999, -6.0462, -5.4288, -4.3239, -5.0448,
        -4.4038, -5.8993, -6.5873, -6.0225, -4.7930, -4.2015, -4.5181, -4.3933,
        -7.4778, -6.5407, -5.5742, -5.9467], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.4869, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2116, -4.8111, -7.2338, -6.3627, -4.9228, -4.3027, -4.0742, -4.1140,
        -6.5098, -7.0811, -6.8232, -3.8455, -3.9366, -4.3851, -3.7464, -6.9983,
        -6.7415, -6.2710, -4.7716, -4.3960], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2270, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4865, -6.7276, -3.8876, -3.3436, -4.5302, -6.7615, -6.1517, -4.4636,
        -3.8966, -3.3401, -7.2646, -7.6586, -5.7307, -5.0131, -3.8488, -3.5545,
        -4.5603, -7.5444, -6.3260, -5.1697], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.3130, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4641, -4.5586, -7.1503, -6.3396, -4.8988, -4.3591, -5.8135, -4.3486,
        -6.1034, -6.7426, -6.3568, -6.2934, -4.3019, -3.5553, -6.0357, -7.6779,
        -6.4299, -6.1336, -6.2559, -5.1296], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5974, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.1025,  -6.0893,  -6.1397,  -4.4102,  -4.2327,  -8.4493,  -7.9197,
         -5.6156,  -5.4093,  -7.2904, -10.3228,  -6.7539,  -6.9506,  -7.5220,
         -4.3435,  -8.1761,  -7.3490,  -6.0151,  -4.7346,  -3.8919],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4859, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5456, -6.7322, -6.1665, -4.1070, -4.0612, -2.8871, -5.4680, -7.2386,
        -6.1707, -4.2862, -3.9134, -2.8140, -5.0521, -7.7523, -6.0237, -4.4461,
        -3.2503, -2.6443, -5.1971, -7.0481], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0902, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6149, -4.1836, -3.4715, -6.5819, -7.5925, -6.3675, -5.4802, -4.2586,
        -3.2922, -5.0713, -7.7662, -6.1379, -5.8443, -5.7099, -6.0894, -5.4077,
        -6.1255, -7.2921, -6.8111, -4.4035], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.6251, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.3975,  -6.9683,  -5.9846,  -4.5317,  -3.8675,  -9.6100,  -7.5063,
         -5.9763,  -5.6091,  -4.1953, -17.0328,  -6.5772,  -7.7375,  -6.3967,
         -7.5341,  -5.2679,  -5.3198,  -7.6425,  -6.3111,  -4.6982],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.8582, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9148, -4.1998, -6.1430, -3.1355, -6.2403, -6.3753, -6.3357, -6.2513,
        -5.9966, -3.5931, -3.8078, -7.0643, -7.3424, -6.4227, -4.4000, -4.1857,
        -3.0242, -5.3601, -7.0027, -6.2882], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.4542, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7435, -4.8479, -7.5896, -6.4843, -4.9805, -4.8274, -6.9780, -3.0532,
        -5.6773, -7.3313, -5.9836, -4.9240, -4.6889, -7.6390, -5.8910, -6.2383,
        -7.7469, -6.9430, -4.3244, -3.7508], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.8821, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9148, -5.6354, -7.6929, -5.7272, -8.9218, -6.5466, -6.7047, -4.6889,
        -3.1160, -4.5839, -6.5601, -7.3504, -6.6132, -4.2788, -4.5097, -3.5517,
        -5.1832, -7.5770, -6.4780, -3.9421], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.7788, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.0452, -7.1489, -5.5978, -6.7615, -4.2898, -8.7642, -5.6791, -6.4455,
        -4.7154, -7.7188, -6.5644, -5.8637, -4.8705, -4.7884, -4.5429, -6.7257,
        -7.1419, -6.4625, -5.4136, -3.9042], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0722, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.2822,  -7.6094,  -5.6380,  -8.7307,  -6.8469,  -4.1850,  -5.1438,
         -4.2808,  -5.8471,  -7.2938,  -6.5610,  -4.9477,  -4.4859,  -3.6399,
         -5.7172,  -7.3659,  -6.1524,  -5.1701,  -4.5239, -12.3386],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1380, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.7075,  -6.8347,  -8.0338,  -6.6102,  -5.8888,  -5.4471,  -7.0941,
         -5.3877,  -5.7595,  -6.4365,  -6.2728,  -7.8936,  -6.0827, -13.6001,
         -6.4598,  -7.3480,  -7.7016,  -6.1776,  -8.0680,  -7.2596],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.0032, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1677, -7.6485, -6.0238, -5.1067, -4.4774, -3.0073, -5.1714, -7.1520,
        -6.2957, -3.7940, -4.4568, -3.1653, -7.6048, -7.6755, -5.6977, -5.6941,
        -6.7525, -7.9227, -5.0919, -6.8257], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.7366, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6432, -6.5212, -8.1507, -3.8304, -8.1679, -6.9196, -5.8578, -5.0105,
        -4.3239, -3.7795, -5.7094, -6.9319, -6.5539, -5.6021, -3.8002, -4.6895,
        -4.4235, -7.5906, -6.5266, -5.1645], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.8099, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3351, -5.4359, -3.5538, -5.6720, -7.8784, -5.6804, -6.1808, -6.3731,
        -8.8678, -7.5680, -6.2920, -7.1230, -5.5100, -8.2100, -6.4350, -7.3654,
        -5.6089, -8.0237, -8.0463, -7.9536], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.7057, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0398,  -6.9002,  -6.0398,  -5.2675,  -5.2744,  -8.4542,  -5.1883,
         -4.4075,  -7.3608,  -6.3788,  -5.3147,  -5.0722,  -3.5144,  -5.2020,
         -7.9737,  -6.1806,  -6.2437,  -6.6919, -14.8767,  -6.4155],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3398, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.7488,  -7.9308,  -4.0798, -10.7147,  -7.0468,  -6.7659,  -5.3549,
         -5.8439,  -4.3252,  -4.9186,  -6.3642,  -7.7559,  -6.9148,  -5.6331,
         -4.4493,  -3.4644,  -5.1787,  -7.8161,  -5.5530,  -5.3190],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0589, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4876, -6.4876, -5.6922, -3.5878, -3.2091, -5.2658, -7.2622, -6.0047,
        -5.0663, -3.7336, -3.4675, -4.5316, -6.7225, -5.6591, -5.2233, -4.4251,
        -3.7247, -4.4778, -7.0876, -6.7274], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2922, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0816, -7.3462, -7.2475, -7.0775, -4.5504, -3.8603, -3.9365, -5.2356,
        -7.7367, -6.2314, -5.5284, -3.4030, -3.6016, -4.4577, -6.5781, -6.8525,
        -6.4417, -4.0817, -3.6551, -2.6860], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2795, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4359, -3.8467, -5.5493, -7.4492, -5.8561, -4.5243, -5.5660, -3.0286,
        -5.9330, -7.4810, -5.8319, -6.2397, -4.4776, -3.7214, -4.0349, -7.5394,
        -6.9602, -4.9179, -4.1352, -5.1156], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.3322, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7425, -7.0780, -6.3475, -5.5125, -4.1015, -4.3368, -3.9915, -8.2334,
        -6.0552, -5.6721, -3.9595, -6.4336, -3.4971, -6.7169, -6.8624, -6.0959,
        -4.6450, -4.6132, -3.6779, -5.2968], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.3435, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.5937, -5.2428, -5.7041, -8.1048, -6.1471, -5.4872, -3.9722, -3.0917,
        -4.9686, -6.1283, -7.1759, -6.1807, -5.4212, -3.5928, -3.0428, -4.9451,
        -7.7992, -5.7146, -4.5726, -5.5890], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5737, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7751, -3.5834, -4.4783, -4.7985, -7.7876, -7.1517, -6.6019, -4.4457,
        -3.3844, -6.2245, -8.1408, -6.1389, -5.8680, -8.7404, -3.5441, -5.5032,
        -6.5275, -7.4091, -7.1210, -7.2910], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0258, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.2526,  -5.0745,  -7.3326,  -6.3485,  -5.8872,  -4.6926,  -4.8187,
         -4.9616,  -7.5297,  -6.4556,  -5.4945,  -5.4085, -15.4394,  -6.3886,
         -7.1192,  -7.6698,  -5.8654,  -7.2580,  -6.0390,  -5.8154],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5426, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5400, -5.2485, -5.1876, -4.4062, -4.6515, -5.8596, -7.5524, -6.9095,
        -3.7676, -3.7760, -3.3187, -5.6460, -7.5196, -6.2264, -4.3467, -5.4706,
        -8.6829, -7.1081, -6.8059, -7.3248], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.8174, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6635, -6.3927, -4.7795, -3.7963, -4.1728, -5.5752, -7.8887, -6.1315,
        -5.2065, -5.2635, -3.7925, -4.6828, -6.5549, -6.7940, -6.7261, -4.0673,
        -3.5834, -2.7852, -6.0763, -7.8221], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.4377, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.1162,  -5.0898,  -5.4745,  -7.6547,  -6.9325,  -4.7119,  -4.8932,
         -3.6605,  -6.8143,  -7.6279,  -5.8873,  -4.1697,  -7.1050, -10.5389,
         -6.0344,  -8.7858,  -4.4277,  -8.3099,  -6.7781,  -5.8416],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3427, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5383, -7.5943, -6.5949, -5.9113, -4.0198, -4.3114, -3.8849, -7.1463,
        -5.9391, -3.9972, -4.0273, -5.6122, -4.1580, -5.4589, -7.0509, -6.0448,
        -5.5890, -3.3318, -2.8875, -6.4974], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2298, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6068, -8.8456, -7.1711, -3.9495, -5.1267, -3.7416, -7.4818, -7.5945,
        -6.2613, -5.8038, -3.7462, -4.2552, -5.7072, -7.4483, -6.1567, -4.7543,
        -3.9304, -3.2161, -5.5663, -7.0287], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.6696, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6589, -8.4756, -6.8156, -6.0217, -5.2422, -4.2975, -4.3466, -5.8851,
        -7.0444, -6.5329, -4.5974, -4.1800, -3.1406, -5.1986, -7.7667, -5.8536,
        -4.5065, -3.6460, -5.2626, -4.0463], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.4259, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9326, -4.5005, -5.0483, -3.9891, -6.3685, -7.0131, -6.0243, -4.0462,
        -3.9371, -3.4707, -6.0292, -6.9821, -6.0564, -5.0348, -6.0985, -3.4158,
        -5.1814, -6.9657, -6.4772, -5.7719], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.4172, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1201, -4.6957, -7.7974, -4.4925, -6.5723, -6.9730, -5.7192, -5.6232,
        -4.2341, -3.2002, -4.3435, -7.9036, -6.6691, -6.1546, -5.1443, -7.4615,
        -5.6137, -6.3269, -7.3297, -6.2123], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.8294, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9404, -6.8859, -6.0184, -5.9802, -4.6717, -6.1877, -3.0263, -6.3364,
        -6.9008, -6.3109, -5.5547, -4.7601, -3.4539, -4.3161, -7.7488, -6.7921,
        -5.2488, -4.9705, -2.7608, -6.1256], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5495, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8744, -4.0350, -3.8098, -3.3789, -4.7612, -7.3210, -6.2210, -4.6960,
        -3.6311, -2.9573, -4.3679, -7.1975, -6.4553, -4.3782, -3.5085, -4.1218,
        -4.7855, -7.2216, -5.8739, -5.5604], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0578, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8470, -6.5150, -5.4774, -3.5956, -2.8932, -5.7481, -6.8457, -6.5980,
        -4.6697, -4.3767, -5.6669, -4.5866, -7.8601, -6.8150, -5.1232, -4.5476,
        -5.7379, -3.8693, -6.5267, -7.1823], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5741, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8708, -7.8366, -4.8197, -6.1272, -7.7167, -6.9053, -7.0675, -4.5496,
        -7.1746, -6.4318, -7.9212, -6.1351, -5.7735, -4.2572, -6.0549, -4.4248,
        -6.1517, -6.5915, -6.3175, -5.1311], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1629, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.9758,  -4.0711,  -6.8876,  -6.9180,  -6.5282,  -3.8657,  -3.9497,
         -7.8391,  -4.1179,  -7.9011,  -6.0780,  -6.2089,  -5.9996, -11.8852,
        -10.0033,  -7.4488,  -7.5752,  -3.9971,  -8.0698,  -6.2778],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4799, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2790, -7.4985, -6.5757, -5.0008, -4.2936, -3.3700, -3.7940, -7.0712,
        -6.5374, -5.3105, -3.6891, -3.5162, -5.7511, -7.5450, -6.6492, -3.5850,
        -4.0390, -5.8321, -9.1411, -6.4171], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5448, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2000, -7.2493, -7.4476, -4.7943, -8.4641, -6.8580, -4.7512, -4.7954,
        -3.5468, -7.8007, -7.7092, -6.2361, -5.0607, -4.4104, -4.8583, -3.6217,
        -5.7263, -7.2419, -6.2480, -4.0157], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9018, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9317, -6.1377, -6.7205, -4.4009, -4.1585, -4.8093, -6.0868, -7.2364,
        -6.6003, -4.6524, -3.4114, -4.8288, -3.8748, -6.5674, -6.0006, -5.1069,
        -5.1052, -3.0551, -5.7103, -7.2338], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.4314, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.5916,  -5.4275,  -5.0096,  -3.3512,  -3.8341,  -6.0566,  -6.9244,
         -6.4256,  -4.4626,  -3.7274,  -5.5368,  -5.2432,  -6.5841,  -5.9949,
         -5.5892,  -5.3382, -17.4453,  -5.8134,  -8.2927,  -4.3751],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1012, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.6479, -6.9541, -7.4106, -6.8163, -4.1191, -4.5439, -5.9946, -6.0802,
        -6.3824, -7.1635, -7.2602, -4.4446, -7.5588, -6.9501, -6.3373, -4.3685,
        -5.7999, -8.8878, -5.6104, -7.2263], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3778, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1338, -4.7038, -4.8341, -5.1900, -7.7152, -7.0160, -4.4055, -4.7509,
        -6.6977, -3.4682, -5.8627, -6.9558, -6.2162, -4.2216, -4.4191, -3.4039,
        -4.4320, -7.8097, -7.0660, -4.7355], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.4519, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2565, -5.1230, -3.9694, -3.0483, -4.3618, -6.3662, -7.2696, -6.8918,
        -3.9296, -4.5500, -3.8574, -4.7163, -7.2872, -6.5626, -5.6632, -3.6875,
        -2.9748, -4.2103, -6.5498, -6.1841], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1730, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7519, -6.4625, -7.4482, -6.9417, -4.7019, -3.8241, -3.1759, -6.0627,
        -7.6110, -6.0190, -5.8668, -5.1802, -5.2888, -3.7154, -6.2175, -6.6911,
        -6.3813, -5.4443, -3.6421, -2.9965], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.4211, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6819, -6.8170, -7.7029, -5.9450, -5.1906, -5.7035, -3.5955, -6.3960,
        -7.2570, -6.3187, -5.0122, -4.3062, -3.2563, -5.2061, -6.9281, -6.0640,
        -5.8377, -4.4560, -3.7174, -5.6192], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.4006, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7499, -4.8030, -6.3990, -4.5143, -6.5717, -7.1248, -6.7895, -4.1288,
        -4.3503, -2.5670, -5.6669, -7.7346, -6.0841, -6.1517, -6.4742, -2.6523,
        -3.3990, -7.0479, -7.5298, -6.8454], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.6292, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.6536,  -5.4385,  -5.9319,  -3.6451,  -6.7883,  -6.7145,  -6.1977,
         -4.9526,  -4.1531,  -2.6199,  -6.1118,  -7.8022,  -5.9421,  -5.1296,
         -3.2919, -21.6478,  -8.1140,  -6.8779,  -6.8059,  -5.8507],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5335, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.9867, -6.7082, -7.7653, -4.4490, -3.7630, -5.1871, -6.8705, -6.6504,
        -6.4120, -4.9216, -3.8103, -3.1647, -4.6932, -7.4803, -5.7017, -5.1291,
        -4.2766, -3.0926, -5.0418, -7.7779], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5441, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8516, -6.6843, -6.4636, -3.5041, -4.7751, -3.5152, -5.4275, -7.5156,
        -6.0628, -6.1473, -3.8651, -3.6645, -5.9563, -7.3216, -6.3105, -5.1476,
        -6.6173, -5.3218, -4.8806, -6.1129], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5073, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3693, -6.9405, -6.0166, -4.2957, -3.4968, -4.7207, -7.4115, -6.2282,
        -4.6235, -5.0146, -6.4096, -4.8806, -6.1150, -6.8879, -6.3241, -5.0585,
        -3.0789, -2.7229, -6.1445, -7.6038], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5672, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9934, -5.4204, -8.0070, -5.5912, -4.4842, -4.5965, -5.8364, -6.3981,
        -6.3816, -7.5865, -7.1061, -5.3495, -4.2178, -3.0760, -5.5190, -7.8236,
        -6.5832, -3.9929, -3.5060, -3.0579], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.3764, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.4323,  -8.6480,  -7.3725,  -6.3963,  -5.2539, -11.9743, -11.8145,
         -7.8535,  -7.7214,  -8.9101,  -7.2515,  -4.4333,  -5.1757,  -5.9473,
         -7.0220,  -6.8874,  -6.2018,  -7.3182,  -5.6038,  -8.1376],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.2678, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3357, -7.9048, -5.9781, -5.0715, -5.7843, -5.2817, -3.8037, -6.7078,
        -6.7682, -6.4982, -5.1456, -6.5647, -3.3317, -4.0570, -7.0733, -6.3064,
        -4.5256, -3.9864, -3.6283, -8.0250], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.6389, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6865, -3.2708, -6.1999, -7.2894, -6.2085, -5.2337, -4.4096, -9.7093,
        -5.2899, -6.3566, -6.6562, -6.6987, -4.3497, -5.2559, -3.6894, -5.4305,
        -7.9520, -6.7856, -6.5959, -4.4634], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.7766, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.9352,  -4.0556,  -4.9111,  -5.1259,  -8.0719,  -5.4029,  -5.0076,
         -4.3757, -11.2599,  -5.1011,  -8.1376,  -6.3986,  -7.4900,  -6.0309,
         -8.2052,  -6.1808,  -5.6563,  -5.5510,  -3.6947,  -4.8818],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0237, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1025, -6.5578, -3.8925, -7.9510, -5.6287, -5.9265, -5.6862, -5.1273,
        -6.2650, -8.1786, -6.7221, -4.5023, -4.4260, -3.1697, -3.9030, -7.1344,
        -6.5678, -6.4088, -5.3032, -4.8709], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.6162, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6785, -5.9271, -5.6634, -4.7299, -6.8163, -4.3524, -7.7087, -7.0984,
        -8.5282, -7.4564, -4.9354, -5.3629, -5.3547, -6.5964, -6.9237, -6.2606,
        -7.2896, -4.9766, -8.4423, -6.5946], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3848, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.0391,  -4.5128,  -5.6866, -11.9619,  -7.8734,  -6.9186,  -7.6516,
         -4.7215,  -8.3276,  -6.3359,  -5.0685,  -6.7153,  -7.1846,  -4.0152,
         -6.0715,  -7.1850,  -5.7599,  -5.8631,  -3.7302,  -4.9259],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3274, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.5901,  -7.6573,  -6.1177,  -5.6497,  -4.5021, -13.8624,  -6.1165,
         -6.6838,  -6.2782,  -7.4266,  -5.0469,  -9.9755,  -6.9651,  -4.9133,
         -4.4727,  -3.1071,  -6.1228,  -7.7152,  -5.8237,  -5.4422],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4735, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1405, -5.2154, -4.4917, -3.4467, -3.5826, -6.2005, -6.9328, -6.2976,
        -5.0558, -4.6348, -4.4395, -4.6298, -7.2429, -5.7620, -4.7813, -4.0083,
        -3.7787, -3.6985, -6.5522, -6.7072], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1799, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2417, -6.2646, -7.7492, -6.4723, -6.8622, -6.6313, -5.8555, -4.2694,
        -7.3879, -6.3985, -4.4490, -4.4856, -6.0129, -3.6030, -7.5166, -6.1951,
        -5.8840, -4.4192, -3.4232, -3.0410], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.6581, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4637, -6.3706, -3.6928, -4.2306, -7.6352, -4.3563, -7.6538, -6.6841,
        -6.2088, -6.0038, -4.6050, -4.2403, -6.0557, -6.2614, -7.5644, -7.0326,
        -5.8383, -4.9545, -6.5508, -4.7919], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9097, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5387, -6.3044, -6.6281, -6.2717, -4.6358, -3.4621, -2.6427, -6.0349,
        -7.6854, -6.5963, -4.9902, -3.3769, -5.4138, -4.8495, -7.2093, -5.8261,
        -4.7675, -8.4000, -3.2900, -5.0345], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.3979, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.5959, -12.3185,  -6.5273,  -5.9351,  -6.8895,  -7.6909,  -4.6863,
         -9.8294,  -7.5209,  -5.9159,  -5.7848,  -3.3131,  -5.9005,  -7.4656,
         -6.3045,  -6.7477,  -4.5529, -11.5504,  -6.0625,  -7.5728],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.8082, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2770, -8.8730, -7.0772, -4.8509, -4.6760, -3.4131, -6.5813, -7.5847,
        -6.2033, -5.5321, -6.2570, -5.6608, -4.4870, -6.3129, -6.6487, -4.8666,
        -4.9971, -4.0288, -6.4026, -7.8721], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.8301, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7859, -5.8056, -3.5144, -5.2409, -4.6774, -6.1790, -7.3795, -6.8065,
        -3.9771, -3.9024, -2.8919, -5.6742, -7.6274, -6.2806, -6.0616, -4.2887,
        -5.9563, -4.9649, -7.0403, -7.5455], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5800, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7734, -6.8785, -5.9556, -4.6456, -5.2810, -8.0647, -4.5364, -6.2066,
        -6.9623, -6.0372, -4.6802, -3.9245, -3.9581, -4.6969, -6.9756, -6.4147,
        -4.9790, -4.6972, -4.6268, -5.0575], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5676, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.5935,  -6.7012,  -3.0032,  -2.7079,  -8.1600,  -7.6160,  -5.5460,
         -7.2109,  -6.4086, -14.5178,  -5.3718,  -7.3583,  -5.4633,  -7.9879,
         -6.3898,  -5.5698,  -4.1843,  -2.6277,  -4.9475,  -7.4924],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2929, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.3692, -11.6551,  -7.0724,  -6.9933,  -7.7311,  -4.5897,  -8.6533,
         -6.8382,  -5.8770,  -5.1013,  -3.4117,  -8.8536,  -7.8408,  -6.1459,
         -5.6030,  -4.9091,  -9.1685,  -8.2680,  -6.2820,  -7.3704],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.8867, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0340, -6.2045, -4.8831, -4.3202, -9.0982, -3.8096, -6.3370, -7.3700,
        -6.1367, -5.0953, -4.5079, -6.1418, -4.0771, -7.4068, -6.9201, -5.0086,
        -4.5892, -3.2883, -5.0888, -7.4810], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.7399, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1881,  -5.0309,  -6.3981,  -7.8041,  -7.6414,  -7.5956, -10.5697,
         -5.1608,  -5.9664,  -8.4821,  -5.6494,  -5.0310,  -6.8883, -13.6333,
         -7.0851,  -7.0241,  -7.0305,  -4.1791,  -9.0624,  -6.5644],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.9992, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3650, -4.0920, -7.0176, -6.3179, -7.6952, -6.2516, -6.1145, -8.9715,
        -9.9343, -6.8127, -6.7183, -7.9630, -4.3010, -9.1816, -6.8323, -4.7499,
        -4.4252, -3.4183, -5.7742, -8.2814], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4609, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4495, -5.0436, -5.7399, -7.2773, -6.0701, -5.9099, -4.9047, -5.2977,
        -4.3885, -6.6804, -7.3083, -6.6605, -5.4671, -3.2330, -4.1526, -3.9010,
        -6.8274, -6.6207, -4.2380, -4.6869], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.4429, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.5792,  -4.6228, -10.9030,  -7.0263,  -6.5431,  -7.4858,  -4.7152,
         -8.1243,  -7.0450,  -5.7845,  -4.4598,  -2.5501,  -5.0921,  -7.8913,
         -6.0431,  -5.8178,  -4.5896, -27.2701,  -6.7750,  -8.4268],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.3373, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9572, -5.9233, -5.7390, -4.2834, -7.2717, -6.9492, -6.2383, -3.9911,
        -3.9573, -2.4484, -5.8555, -7.3209, -5.9716, -6.5998, -4.0644, -3.9578,
        -5.6162, -7.1547, -6.2938, -4.3007], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.4947, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.8885,  -6.8670,  -4.8589,  -4.8183,  -2.9465,  -4.9808,  -7.1176,
         -6.0077,  -4.8250,  -4.6169,  -3.0591,  -5.6096,  -7.8308,  -5.5060,
         -6.0929,  -6.9631, -13.2471,  -6.4382,  -6.8476,  -7.3387],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1430, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3167,  -5.9699,  -7.5082,  -6.7044,  -4.9546,  -3.6730,  -4.1127,
         -4.3766,  -6.9419,  -6.7182,  -6.4019,  -6.1607,  -2.9850,  -2.9460,
         -6.2191,  -7.7918,  -5.9083,  -4.8726,  -3.7304, -20.7885],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1040, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2553, -6.4524, -4.7791, -4.6998, -3.6128, -5.8076, -6.9039, -7.2915,
        -6.7964, -4.6351, -3.9465, -4.0267, -4.6178, -7.3689, -6.1182, -4.6288,
        -3.8137, -3.6271, -5.9324, -7.8199], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5067, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5645, -4.0391, -4.8104, -7.9804, -5.6157, -4.7185, -5.3219, -6.7890,
        -7.3724, -6.2861, -8.3065, -3.5381, -8.8585, -7.3121, -6.3143, -5.6533,
        -3.2824, -3.3246, -5.8881, -7.3952], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.8686, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4082, -6.6937, -6.1273, -3.8520, -4.4301, -3.0139, -6.0159, -7.5431,
        -7.3259, -6.7413, -2.5763, -6.8378, -5.2291, -6.6767, -7.1015, -6.8938,
        -7.1641, -6.2056, -4.3464, -3.9606], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.7572, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0451, -4.3102, -6.5790, -6.9521, -5.9488, -5.0711, -3.5808, -5.6277,
        -5.0733, -6.8313, -6.8426, -5.5645, -5.2636, -3.6572, -4.3170, -3.9551,
        -7.3606, -5.9980, -5.8375, -6.1126], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.4964, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5791, -7.2188, -6.6314, -5.5302, -4.4152, -3.6882, -4.9862, -7.2738,
        -6.7831, -4.1933, -4.0780, -2.5068, -4.7340, -6.9348, -5.9604, -4.9756,
        -3.6247, -4.3896, -4.0012, -7.0935], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1799, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3208, -4.3990, -5.4174, -4.5563, -7.6378, -6.0599, -6.0236, -4.2935,
        -6.8343, -4.8831, -6.0898, -6.6428, -6.4463, -4.5618, -4.0166, -2.7278,
        -5.4073, -7.3162, -6.1230, -5.2931], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5525, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2821, -7.6467, -5.9753, -5.4194, -3.9485, -9.4039, -6.9525, -7.4990,
        -8.1465, -4.0509, -8.5726, -5.8696, -5.2893, -4.4126, -4.7618, -4.7903,
        -7.4369, -5.7676, -5.3616, -5.2002], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0394, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7532, -4.4014, -7.5912, -6.2809, -4.7987, -6.1457, -8.6670, -7.4064,
        -6.6754, -7.7915, -4.2732, -7.5735, -6.9462, -5.6364, -6.2553, -3.7041,
        -5.7595, -7.7248, -6.2409, -6.4346], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3030, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9467, -5.8438, -4.4606, -3.9303, -4.3578, -6.0340, -6.9301, -6.5267,
        -3.7616, -3.7025, -2.8319, -5.4030, -7.8579, -5.9664, -4.9182, -3.6540,
        -3.2383, -4.3749, -6.9808, -6.3672], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1543, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.7268,  -6.1702,  -5.8367,  -5.7966, -16.4782,  -7.7772,  -7.3597,
         -7.2687,  -3.5381,  -8.2587,  -7.0875,  -5.0715,  -3.9097,  -3.2084,
         -6.0818,  -7.5211,  -5.6353,  -5.1658,  -3.7742,  -4.7612],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4214, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.5081, -6.8221, -4.9718, -4.2652, -4.8247, -3.8180, -8.1232, -5.7498,
        -5.2705, -4.7753, -6.8465, -4.9641, -7.8057, -6.6389, -4.0566, -4.6078,
        -3.0606, -5.5067, -7.6211, -5.4976], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.6867, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0233, -6.2284, -5.8279, -3.5685, -3.3457, -3.7166, -7.3796, -6.3848,
        -5.5798, -4.2223, -4.6871, -3.5748, -6.5133, -6.9953, -5.9549, -5.2970,
        -3.9727, -4.1850, -4.1058, -7.8385], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.3201, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.8690,  -5.9400,  -5.2135,  -3.5696,  -7.2010,  -4.3107,  -6.5269,
         -7.4290,  -6.4805,  -6.8664,  -3.6587,  -6.0887,  -9.5337,  -5.8205,
         -6.8284,  -6.1036, -18.4813,  -6.5304,  -5.8191,  -7.8346],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.8553, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7148, -5.9763, -5.0353, -3.9449, -3.6763, -4.9785, -7.5732, -6.8287,
        -6.4458, -5.5578, -3.9552, -4.6322, -6.1341, -7.0138, -6.8129, -5.2142,
        -3.3519, -3.0499, -6.2023, -7.1642], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5131, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8825, -6.7915, -6.3936, -7.0104, -3.5658, -3.5850, -4.6254, -6.7426,
        -6.4573, -6.8495, -4.4661, -3.8074, -6.9720, -7.7400, -5.4856, -5.1568,
        -5.8099, -3.6502, -6.6066, -8.0203], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.6809, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.7588,  -6.2183,  -6.3893,  -7.5579,  -3.9526,  -7.7196,  -5.8613,
         -5.9189,  -4.8614,  -3.6284,  -5.2198,  -5.3036,  -7.3845,  -6.5863,
         -4.3961,  -4.0022,  -3.3477,  -4.4575,  -7.0565,  -5.3686],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7995, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1463, -7.4252, -4.4004, -8.5765, -6.6072, -4.0812, -5.1364, -3.2222,
        -6.2318, -7.1888, -5.6140, -5.1402, -5.7254, -9.9276, -7.8463, -6.1295,
        -8.0423, -4.4047, -7.9294, -6.5873], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3681, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4206, -6.3863, -6.5083, -4.4772, -3.2306, -3.3273, -4.4446, -7.2835,
        -5.7180, -4.4684, -5.3991, -7.8515, -3.1996, -5.8167, -6.7229, -5.7788,
        -4.9846, -4.0409, -4.2022, -3.5782], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1920, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6496, -6.3267, -7.5477, -3.5179, -9.6656, -6.3739, -5.0855, -4.4310,
        -5.2378, -3.8659, -6.6991, -7.4495, -5.6739, -4.9194, -4.6477, -4.1527,
        -4.1386, -6.2887, -6.6660, -6.5197], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.7928, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2381, -3.5828, -3.0879, -6.1541, -7.6691, -6.2509, -5.3428, -4.7274,
        -5.9072, -5.4683, -8.4841, -6.0827, -3.8362, -4.5943, -4.6768, -3.5864,
        -6.1286, -6.8274, -6.2474, -5.2993], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5096, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5227, -6.1419, -7.5468, -6.4411, -5.1558, -3.4868, -2.1792, -6.0268,
        -7.3633, -5.4138, -5.5052, -4.5651, -3.0767, -4.1656, -7.4210, -5.8306,
        -3.6218, -4.1969, -2.5204, -4.6706], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0426, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.6757, -6.3543, -4.3584, -5.1438, -4.0072, -5.1318, -7.8323, -5.7463,
        -5.5194, -3.9730, -2.9634, -3.2414, -6.5813, -6.4644, -6.2310, -4.0576,
        -4.5855, -6.0289, -4.6345, -7.6645], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5097, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.7877,  -6.4124,  -4.1759,  -3.1866,  -5.3595,  -7.7455,  -5.6354,
         -5.2713,  -3.7859, -17.5792,  -7.6682,  -7.1149,  -7.2073,  -3.9735,
         -8.2107,  -6.0072,  -6.1717,  -3.6091,  -3.3957,  -4.9139],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2106, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8423, -4.1187, -4.1069, -8.0523, -3.8366, -6.2989, -6.8198, -6.2705,
        -4.5542, -6.1322, -4.3151, -4.0636, -5.9600, -7.0531, -6.1583, -4.5204,
        -4.3285, -5.1663, -2.7417, -6.4651], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.3402, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.1892,  -6.4325,  -8.2194,  -6.6369,  -6.0748,  -6.3384,  -3.2252,
         -6.6525,  -7.4899,  -5.9152,  -4.8715,  -6.0666, -15.9599,  -6.5298,
         -6.4444,  -7.6951,  -5.6638,  -8.0196,  -7.1010,  -5.0073],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.7767, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.9710,  -6.7920,  -7.5960,  -6.1201,  -5.3150,  -4.8083,  -3.2855,
         -6.8953,  -7.6380,  -5.6679,  -4.4992,  -3.8932, -16.6760,  -5.7824,
         -7.6360,  -5.2019,  -6.1040,  -7.3110,  -6.4233,  -4.7261],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3671, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1059, -4.5482, -4.2508, -3.0626, -6.7078, -6.7703, -6.0650, -4.6433,
        -5.6431, -4.2455, -4.2399, -6.2345, -7.0399, -5.9827, -5.1717, -3.1364,
        -3.1595, -5.2424, -7.2409, -5.4280], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1959, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8480, -2.5545, -4.9340, -7.9902, -5.2958, -6.2487, -5.8180, -3.0524,
        -3.8527, -7.5347, -6.1345, -4.3699, -3.7793, -4.1470, -3.4670, -7.6635,
        -5.8164, -5.2249, -5.4671, -4.6267], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0913, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4926, -5.5660, -7.1166, -5.9282, -4.9219, -4.1144, -4.5773, -4.4974,
        -6.7306, -5.2627, -4.9907, -3.6303, -2.7248, -5.0207, -7.8161, -5.8546,
        -4.9624, -4.3473, -9.9137, -6.4540], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.3961, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7853, -5.3068, -3.9930, -3.6664, -6.7860, -7.0461, -6.4688, -3.7864,
        -3.7921, -3.4797, -4.8255, -6.7473, -5.7146, -5.2335, -4.1142, -3.2733,
        -3.0085, -7.2330, -7.0401, -6.4298], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1865, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2647, -6.4411, -3.9640, -6.7393, -7.2639, -6.3734, -3.9322, -4.2207,
        -5.1614, -3.1957, -6.6507, -7.1209, -6.1290, -5.1278, -3.4131, -5.6630,
        -4.6363, -6.3071, -7.0014, -5.5966], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.4101, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3535, -4.0014, -4.8493, -5.2938, -6.8897, -5.8754, -4.8347, -4.8206,
        -6.1128, -4.3895, -5.6018, -6.9323, -6.2706, -3.8966, -4.0180, -4.2582,
        -5.4091, -7.6922, -6.0856, -5.4797], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.3532, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.6931, -7.0132, -9.1764, -4.4135, -7.6751, -5.5279, -6.1770, -3.4285,
        -4.8159, -3.1020, -7.3820, -6.1106, -7.4597, -4.8508, -2.8840, -3.6721,
        -8.3904, -6.1042, -4.0480, -6.3788], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.8152, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5339, -2.2709, -6.0413, -7.5979, -5.4905, -5.3267, -3.9404, -3.0166,
        -4.8910, -7.0486, -5.9155, -4.9367, -3.1915, -2.3982, -5.6729, -7.6961,
        -6.7453, -4.7432, -3.5185, -2.7503], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8363, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9754, -6.0280, -6.9111, -2.9032, -3.7700, -7.3647, -6.3383, -4.8938,
        -4.9919, -5.2173, -5.8218, -6.4003, -6.9326, -6.3204, -6.5562, -6.6282,
        -9.4699, -4.5151, -7.0192, -6.4472], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0252, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5909, -6.5601, -4.6382, -4.4253, -7.1556, -4.3361, -6.4502, -6.7606,
        -5.8436, -5.5083, -4.3832, -3.9319, -3.9845, -7.1544, -6.9828, -6.4490,
        -3.7047, -3.7843, -4.8579, -4.6283], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.4565, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4158, -2.9131, -2.4102, -5.6824, -7.5235, -5.9588, -5.4215, -5.4878,
        -2.7302, -3.8198, -7.3254, -5.9672, -4.5823, -3.9102, -5.1956, -2.9346,
        -6.5958, -6.6141, -6.0883, -4.0878], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9832, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9881, -4.6949, -7.9129, -6.5044, -4.7698, -3.7089, -2.5037, -4.4113,
        -7.2290, -5.8074, -5.3071, -3.5375, -3.3595, -3.8901, -7.1955, -6.0881,
        -5.1565, -3.7375, -4.3428, -2.6014], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8373, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1747, -4.6219, -7.1562, -5.3393, -4.9890, -3.5612, -2.9590, -4.9438,
        -7.8271, -5.3726, -5.2778, -3.1727, -7.2126, -4.9777, -6.1527, -6.9520,
        -6.2363, -3.9992, -4.0955, -5.8096], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2415, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3579, -4.9381, -6.8764, -6.9347, -8.3332, -5.5431, -8.8222, -6.9498,
        -6.2120, -4.3230, -3.3251, -5.1389, -7.3916, -5.8403, -6.8614, -4.2003,
        -2.8299, -4.5447, -7.3873, -7.3325], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9571, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6829, -6.6966, -6.6710, -3.6971, -6.0945, -6.6445, -5.4616, -4.8244,
        -4.3975, -2.5341, -3.6930, -7.9008, -5.7770, -5.0405, -4.7652, -3.2603,
        -4.7833, -6.0398, -7.2244, -6.8925], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.4040, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.8319,  -5.6061,  -4.8498,  -7.2994,  -2.5254,  -4.3506,  -7.4788,
         -6.5888,  -5.5637,  -4.1452,  -3.6575,  -6.2945,  -7.8147,  -5.5129,
         -5.4062,  -9.2129, -11.0060,  -5.3520,  -5.4227,  -7.5775],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1748, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2468, -5.3910, -5.7389, -4.7936, -5.6456, -3.8470, -6.0730, -6.8929,
        -5.7834, -4.0854, -3.5266, -3.0653, -5.2583, -7.7627, -5.4824, -4.6541,
        -3.8495, -2.9753, -5.0384, -7.0321], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2071, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.7794,  -6.2613,  -6.0632,  -4.1690,  -3.2707,  -5.5673,  -7.9581,
         -5.5490,  -5.0048,  -6.6513, -14.0690,  -6.4938,  -8.4996,  -3.5711,
         -9.1284,  -6.5391,  -4.7991,  -5.4039,  -7.6781,  -6.5242],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4990, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.8118, -5.4535, -4.6721, -7.2728, -3.0792, -4.3281, -7.3779, -6.6138,
        -3.6569, -3.7274, -4.1106, -4.1140, -5.9930, -6.4369, -6.2728, -6.0212,
        -3.1896, -4.0806, -7.9558, -7.3109], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.4739, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7469, -5.1642, -4.7742, -3.4685, -4.3666, -6.2876, -7.5164, -6.6446,
        -4.0989, -4.6407, -2.7112, -6.2444, -7.4096, -5.5793, -6.4732, -3.3277,
        -2.5832, -5.1230, -7.1814, -5.2565], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2299, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4716, -3.9223, -2.9099, -5.6331, -7.0476, -5.9594, -3.7108, -6.4827,
        -3.8010, -4.9417, -5.5892, -6.4207, -5.1918, -6.0596, -2.5907, -5.2205,
        -5.5372, -6.6336, -5.8755, -5.7002], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1850, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1311, -6.0752, -6.0970, -3.6862, -3.2293, -4.3094, -6.4428, -7.4731,
        -5.8240, -4.4310, -3.5325, -3.9678, -3.2605, -7.1903, -5.5741, -6.4040,
        -4.3562, -3.2246, -4.8739, -7.2981], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2191, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7970, -4.7772, -3.9226, -3.5366, -4.4802, -7.4867, -6.3647, -5.3677,
        -4.6732, -5.0399, -3.5632, -6.5176, -6.8082, -5.5694, -5.1233, -4.0599,
        -4.1627, -3.9751, -7.9351, -6.4567], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2809, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1256, -6.2297, -3.6715, -4.4682, -3.8565, -6.2628, -7.1055, -5.8375,
        -5.0047, -4.4317, -5.1232, -3.8588, -5.7146, -7.0791, -4.6730, -6.1114,
        -2.9373, -3.6195, -4.4604, -7.6928], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2132, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.2717,  -3.9380,  -5.1348,  -4.9806,  -7.3574,  -6.0401,  -4.4121,
         -5.9128,  -4.8928,  -4.6723,  -6.0217,  -6.5239,  -6.0232,  -4.4317,
         -6.4369, -19.7136,  -5.8108,  -8.5338,  -4.0278,  -8.5449],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4340, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.6318,  -3.6293,  -5.5030,  -7.7861,  -6.4907,  -2.9669,  -3.8655,
         -3.7172,  -7.9538,  -7.7690,  -5.6653,  -5.1210,  -4.2891, -11.8832,
         -5.3708,  -5.8843,  -4.8284,  -6.0288,  -6.4893,  -7.6673],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9770, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3480, -5.7039, -7.0164, -5.4171, -7.2402, -7.4806, -7.0160, -6.4827,
        -4.8190, -3.8117, -6.3128, -6.8908, -5.7453, -6.7451, -5.8472, -8.6543,
        -6.1629, -5.1104, -5.1072, -4.8906], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0901, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9399, -5.6440, -5.6029, -6.6109, -6.4100, -4.8668, -5.0954, -4.1818,
        -4.0759, -7.9002, -6.5083, -3.5985, -3.9212, -2.6253, -4.8972, -7.0586,
        -5.9445, -5.6202, -6.3504, -4.8729], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2363, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.7457,  -6.1406,  -4.1462,  -4.6592,  -3.3508,  -7.6565,  -7.8350,
         -5.8980,  -4.7703,  -6.3033, -13.6777,  -6.0299,  -6.6609,  -6.2196,
         -7.4890,  -3.0420,  -9.6007,  -7.6001,  -6.2728,  -5.1997],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5649, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.6420,  -5.3475,  -4.5596, -13.4805,  -7.8268,  -5.6325,  -7.7391,
         -4.2634,  -8.4555,  -5.8912,  -5.5973,  -3.9766,  -4.5621,  -5.8069,
         -7.6450,  -5.7504,  -5.3161,  -5.5784,  -3.7734,  -5.2508],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1047, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.2685, -6.0285, -6.7264, -3.8857, -1.9426, -7.4582, -7.6030, -5.9768,
        -4.2704, -4.3191, -4.3924, -5.4690, -6.8229, -7.4976, -6.5896, -8.2272,
        -4.3288, -2.8332, -5.2827, -6.5917], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.7257, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.0892,  -8.6456,  -7.6102,  -5.2205,  -5.2960,  -4.4301, -10.8111,
         -8.2134,  -6.6824,  -7.6190,  -4.1231,  -7.9476,  -6.4321,  -6.0357,
         -4.1998,  -4.9559,  -3.6407,  -6.0848,  -6.9267,  -6.0836],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2024, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1986,  -6.2340,  -7.5532,  -5.6664,  -5.4901,  -5.3677, -13.2330,
         -6.9628,  -6.6660,  -7.4941,  -3.5969,  -7.9089,  -6.6665,  -4.5450,
         -4.4762,  -2.9129,  -5.1322,  -7.4411,  -5.4388,  -5.0573],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0521, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.9725, -7.1884, -7.5585, -7.2035, -7.4982, -8.0417, -7.0219, -7.9107,
        -6.9173, -7.1637, -8.1793, -7.2653, -6.8151, -7.5181, -7.7370, -7.1891,
        -6.6198, -7.2643, -6.9184, -6.6573], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3320, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6378, -3.7290, -6.0663, -7.2984, -5.8647, -6.0823, -4.3105, -3.3445,
        -4.7298, -6.0695, -6.7274, -6.1657, -4.7259, -3.5929, -5.6842, -3.2755,
        -5.0065, -6.3436, -5.1681, -6.1782], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.3500, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-19.9277,  -5.7822,  -7.1380,  -5.7306,  -7.1252,  -3.4536,  -9.4026,
         -5.9396,  -4.5903,  -4.0643,  -3.2707,  -5.5561,  -7.6158,  -5.5103,
         -5.4964,  -4.0322,  -4.8973,  -6.5035,  -7.4581,  -6.3890],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4942, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.5349,  -3.9315,  -7.3789,  -6.0221,  -4.9723,  -5.0862, -23.3977,
         -3.2869,  -7.3779,  -3.4908,  -8.3885,  -6.5705,  -7.1744,  -4.7873,
         -5.0670,  -2.5128,  -5.8286,  -6.9960,  -5.7088,  -5.6662],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3590, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0095, -6.3050, -7.2987, -6.0978, -4.8843, -3.7507, -9.1861, -6.5552,
        -9.5747, -4.6882, -9.5063, -6.6168, -4.2784, -4.8051, -4.9702, -4.5018,
        -5.9703, -7.0834, -6.3187, -6.8310], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1116, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.0166, -11.4881,  -6.6659,  -4.9185,  -6.3592,  -7.4358,  -5.4823,
         -6.1290,  -7.9314,  -7.2170,  -6.2008,  -6.8564,  -3.3031,  -7.7624,
         -7.8035,  -5.2934,  -5.9303,  -6.8992,  -3.4700,  -5.2747],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2719, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3507, -6.3104, -7.3426, -3.9761, -3.8257, -4.7717, -7.7679, -6.1200,
        -5.1227, -3.6639, -3.0975, -3.4525, -5.6474, -6.5408, -6.3214, -5.9041,
        -2.7272, -3.0428, -4.4485, -7.0305], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1732, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1737, -4.3642, -4.5184, -5.0231, -6.0664, -6.8418, -6.1320, -5.5713,
        -3.9375, -4.1518, -3.0921, -7.6904, -6.7644, -6.2626, -4.2792, -4.6725,
        -4.1297, -6.4504, -7.5665, -6.1461], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.3917, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.9570,  -2.7872,  -6.6024,  -7.4275,  -9.1751,  -7.8560, -11.4220,
         -8.3453,  -4.6084,  -6.6008,  -7.7803,  -8.1298,  -4.5620,  -7.8230,
         -7.5649,  -8.9163,  -8.8461, -13.6667,  -7.1849,  -6.7243],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.5490, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9621, -3.8331, -8.0744, -3.5746, -6.2826, -6.7564, -6.3067, -4.8371,
        -5.8366, -5.0306, -4.3466, -6.0894, -7.0303, -5.7591, -4.8243, -4.1613,
        -5.0773, -3.8664, -7.1025, -5.9756], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.4864, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3124,  -2.1888,  -5.6009,  -7.7636,  -5.9690,  -4.5464,  -3.4799,
        -21.5850,  -8.1238,  -6.9511,  -7.6104,  -3.8116,  -8.3230,  -6.3704,
         -5.3435,  -3.8795,  -3.8379,  -2.9190,  -7.8291,  -6.2387],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2842, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.2430,  -6.2401,  -5.6385, -23.4514,  -5.7557,  -7.4599,  -5.9334,
         -6.8242,  -6.0647,  -6.9990,  -6.4438,  -4.0723,  -4.0146,  -3.3640,
         -4.2082,  -6.5571,  -6.5837,  -6.0342,  -4.5096,  -3.0695],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4233, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0025, -6.1776, -3.6530, -7.3713, -5.9190, -5.0509, -4.2219, -3.8231,
        -4.1920, -6.2437, -6.7457, -6.3383, -4.4437, -4.2632, -4.0348, -5.1041,
        -7.7916, -5.5211, -4.1288, -3.4468], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1237, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8548, -5.6733, -6.8555, -6.5260, -5.6296, -3.6502, -4.2435, -5.0422,
        -7.0514, -5.5855, -4.5327, -8.2964, -4.2731, -4.2420, -6.5069, -7.3133,
        -6.3756, -5.0717, -4.5414, -3.2334], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.4249, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0671, -4.3831, -3.3568, -2.5118, -4.5365, -7.3903, -5.6280, -4.9757,
        -3.5874, -2.7110, -3.2164, -6.5534, -6.4800, -4.9296, -5.7292, -5.0791,
        -3.8703, -6.3556, -7.6374, -6.1938], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0596, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.7077, -16.7755,  -5.7161,  -6.8165,  -7.4421,  -6.8526,  -7.9329,
         -5.4483,  -7.2773,  -6.2347,  -6.5165,  -4.2136,  -2.7328,  -4.7807,
         -7.3297,  -5.7299,  -4.8865,  -6.3268,  -2.9192,  -5.2282],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2934, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4474, -6.0330, -4.3591, -3.9145, -7.7802, -3.5051, -6.0931, -6.4532,
        -5.9740, -5.0100, -5.2020, -4.2877, -5.2066, -6.0811, -6.8932, -6.2578,
        -4.7873, -3.2811, -2.5871, -5.6788], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.3416, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5695, -4.2216, -3.9621, -2.8816, -7.4191, -7.5787, -5.1991, -6.6068,
        -3.0738, -9.5970, -6.4153, -7.6676, -6.6369, -6.2934, -5.1151, -3.6146,
        -5.9469, -8.0104, -6.1495, -4.6418], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.8300, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8464, -6.1454, -3.2931, -4.5677, -6.3531, -6.8199, -6.2239, -4.2519,
        -4.0652, -2.4888, -4.1898, -7.6824, -6.2190, -5.3161, -3.7799, -3.4431,
        -6.2122, -7.1173, -6.0947, -5.1658], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2138, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3337, -5.0073, -6.6375, -6.1210, -4.2537, -5.7830, -6.6543, -6.3321,
        -5.2991, -2.8319, -3.8012, -6.0956, -7.0381, -5.5604, -4.6324, -3.3716,
        -4.0383, -5.5748, -6.7042, -6.6293], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.3850, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1320, -5.2027, -7.0990, -5.8944, -4.3329, -3.3361, -2.3422, -5.0973,
        -7.5814, -5.3149, -5.6341, -6.3372, -3.7225, -3.9811, -7.0158, -6.7045,
        -6.3039, -4.0312, -2.9977, -4.2232], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0142, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.6004,  -7.7547,  -5.9722,  -7.6430,  -5.9718, -21.9991,  -7.6322,
         -7.2734,  -4.3780,  -8.3492,  -6.1630,  -5.1059,  -4.3236,  -3.1714,
         -5.2733,  -7.7417,  -5.7207,  -5.2851,  -3.6318,  -2.5173],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5754, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5757, -6.4646, -6.2951, -7.6474, -2.6673, -9.9631, -6.9289, -5.5317,
        -5.1257, -4.6562, -4.5400, -5.4451, -7.1835, -5.9565, -6.5237, -4.0219,
        -4.7606, -5.4668, -7.7762, -5.4595], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9495, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.5623, -32.4876,  -6.5231,  -8.0179,  -3.5352,  -7.6891,  -6.3216,
         -6.2129,  -4.1906,  -3.1960,  -5.4437,  -7.7312,  -5.7751,  -5.1000,
         -5.3345,  -3.6046,  -4.9672,  -6.7159,  -7.3390,  -6.6164],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.0682, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.6526,  -5.7084,  -6.8894,  -6.1130,  -4.9483,  -5.0201,  -3.1069,
         -3.9455,  -5.4145,  -7.5944,  -7.0773,  -4.0896,  -4.0921,  -2.3509,
         -7.7659,  -7.5286,  -5.5072,  -6.5128,  -4.2473, -14.7359],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8150, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1387, -3.8340, -5.2853, -4.8405, -6.0313, -7.4234, -6.1860, -7.0953,
        -2.9618, -3.0985, -4.5197, -6.7820, -6.2496, -3.7690, -3.8844, -3.6353,
        -7.4015, -7.5946, -5.2857, -5.5534], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.3285, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.2964, -7.0908, -6.1906, -5.7672, -3.7059, -2.9111, -4.5665, -7.3438,
        -5.8642, -5.8928, -3.3925, -3.0649, -4.6043, -6.9529, -5.5346, -4.4940,
        -4.3322, -3.1102, -4.9802, -7.5932], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2844, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9806, -4.1456, -3.5361, -2.8977, -3.6708, -7.3254, -5.5958, -4.9453,
        -3.6576, -4.6522, -6.1450, -7.5103, -6.2240, -7.0445, -3.2129, -2.7224,
        -4.1548, -7.4050, -5.9298, -5.1822], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0969, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3300, -4.9248, -6.0994, -6.8082, -6.5871, -7.4835, -6.7132, -7.6399,
        -6.9591, -4.0711, -5.3941, -3.3174, -4.5246, -7.2565, -5.9018, -6.0808,
        -4.8040, -4.8788, -6.8739, -7.3097], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.8979, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9114, -4.0219, -7.5268, -5.4376, -4.9238, -7.7014, -5.7627, -4.4315,
        -3.8949, -3.1358, -4.5924, -6.9949, -6.1988, -3.0860, -4.2932, -3.1142,
        -4.6140, -7.4412, -5.3426, -6.0554], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2240, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4291, -3.6046, -7.5268, -3.9648, -5.9926, -6.9063, -6.5625, -3.7968,
        -3.4359, -2.8552, -4.6531, -6.7012, -6.1254, -4.9286, -3.5168, -2.9047,
        -5.5365, -7.3095, -5.8605, -6.7705], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2191, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2568, -5.9345, -6.3169, -4.2001, -3.8846, -5.7784, -6.2379, -6.3460,
        -6.3675, -4.1947, -3.2021, -3.1139, -5.0458, -6.7629, -5.0864, -5.4001,
        -3.3186, -3.5135, -4.2215, -7.5174], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1350, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9600, -4.0651, -3.3973, -3.4288, -5.2666, -6.9993, -5.7647, -4.4637,
        -2.8606, -3.7629, -3.9457, -7.6663, -6.4719, -5.9106, -3.9292, -2.5753,
        -5.0774, -8.0130, -5.5827, -6.1387], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0640, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6068, -5.0272, -6.5349, -7.5688, -6.2969, -7.4071, -9.9243, -2.6085,
        -4.2558, -7.1961, -5.3005, -6.6986, -4.0572, -6.0821, -4.3774, -6.6097,
        -6.3955, -6.1682, -4.6873, -3.7765], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.6790, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0195, -5.7022, -4.3380, -3.8994, -6.0504, -6.6771, -6.1800, -4.7043,
        -3.8661, -4.8217, -4.1725, -7.1390, -5.8269, -4.5157, -4.3840, -3.3166,
        -2.8334, -6.2656, -7.2604, -5.4962], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1234, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.8997,  -2.7735, -10.4377,  -6.5981,  -4.7964,  -5.1105,  -8.1856,
         -5.2725,  -8.0712,  -7.3974,  -6.8060,  -6.4483, -17.6345,  -5.3979,
         -5.8706,  -7.1821,  -5.8886,  -7.6208,  -3.2245, -10.6257],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.1621, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4875, -3.9630, -8.7933, -6.3381, -4.9786, -4.0766, -4.0823, -3.9733,
        -5.9811, -6.8536, -5.9854, -4.5800, -3.0138, -3.6602, -4.7262, -6.8552,
        -5.7601, -4.0257, -4.1817, -3.4294], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1373, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.6760,  -5.8315,  -7.6967,  -5.6989,  -5.1231,  -4.7352,  -5.0184,
         -4.3268,  -7.1027,  -7.6970,  -6.6555,  -5.0979,  -4.1558,  -2.5925,
         -5.9297,  -7.6175,  -5.7132,  -4.8404,  -4.1767, -13.6845],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9185, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4384, -5.0300, -6.9180, -7.6580, -6.7524, -7.1079, -5.6793, -5.4089,
        -6.9086, -7.1509, -6.1188, -5.0729, -4.2630, -4.0924, -3.4508, -7.4509,
        -5.8840, -5.0968, -4.6663, -5.8710], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.7510, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0782, -5.4728, -3.9211, -6.6000, -7.5331, -6.2750, -4.4897, -4.4692,
        -3.5047, -6.3576, -7.7622, -5.1926, -6.5256, -5.1679, -2.4991, -4.3403,
        -6.3554, -7.5519, -6.4148, -7.7547], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.7133, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4701, -3.3222, -4.8061, -6.6359, -6.5278, -5.7334, -4.2982, -2.7662,
        -4.6052, -7.2174, -6.3206, -2.9510, -4.2685, -5.7513, -3.1873, -6.2093,
        -6.7607, -5.7546, -4.0913, -3.9008], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0289, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.0510,  -6.4831,  -7.7747,  -6.1048,  -5.9381,  -4.6422, -13.2562,
         -7.2627,  -6.2221,  -7.0988,  -4.8537,  -7.7151,  -5.8392,  -6.1187,
         -5.5694,  -4.2317,  -4.5779,  -4.8345,  -6.8399,  -6.1590],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2286, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.5585, -6.4298, -7.8057, -4.1052, -8.3759, -6.2102, -4.8493, -3.7729,
        -3.7727, -4.1896, -7.4831, -5.6479, -4.2511, -3.7311, -4.1656, -4.2946,
        -6.1566, -6.6974, -5.8545, -5.0763], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5714, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0024, -6.7753, -7.3697, -3.8362, -8.2356, -5.8129, -5.8417, -4.2872,
        -3.6139, -4.1006, -6.2051, -6.6858, -6.2622, -4.8128, -3.5204, -2.9788,
        -5.0978, -7.8837, -5.6053, -4.7508], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5339, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.7793,  -6.6203,  -6.1387,  -4.9599,  -4.6995,  -2.5081,  -6.8079,
         -7.5310,  -5.3593,  -6.0481,  -7.1268, -24.9348,  -5.8053,  -6.7143,
         -7.4913,  -6.1244,  -7.5863,  -6.0293,  -6.7332,  -4.6923],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.9845, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5557, -5.9758, -3.3448, -3.8732, -5.1077, -7.6084, -5.7582, -5.3064,
        -6.0822, -3.7887, -4.5795, -7.5180, -6.4712, -3.5850, -3.4291, -3.8038,
        -5.6499, -8.0370, -6.1138, -3.9046], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2746, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.8992,  -7.7632,  -5.2315,  -5.5400,  -7.0153, -17.2750,  -6.4083,
         -8.5020,  -5.4709,  -5.3086,  -7.4270,  -5.9269,  -5.2776,  -6.8327,
         -9.4032,  -7.7281,  -9.0045,  -3.6935,  -5.6020,  -7.0708],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.0690, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.8571,  -4.5585,  -5.1864,  -4.8729,  -6.1301,  -6.9142,  -6.3166,
         -4.0288,  -4.0816,  -4.0862,  -5.8234,  -7.2524,  -5.4283,  -5.0840,
         -3.6438, -14.6086,  -5.9418,  -7.0549,  -6.7013,  -7.5687],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0570, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0300, -5.8731, -5.0881, -3.3257, -3.0781, -5.5442, -6.6103, -5.7725,
        -4.3730, -3.0148, -3.5321, -4.3706, -6.6043, -6.7662, -6.3838, -4.1243,
        -3.4490, -3.4772, -4.8660, -8.0061], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0645, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3761, -4.4060, -7.2467, -5.9770, -4.6836, -3.4473, -5.9703, -3.4210,
        -6.4649, -6.9463, -6.0473, -4.8117, -4.0469, -5.4140, -4.5372, -5.6769,
        -6.5713, -6.0125, -4.4669, -3.3932], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0958, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.6903,  -6.9266,  -6.7378,  -2.9213,  -8.0109,  -5.9542,  -4.4730,
         -5.7247,  -3.2636,  -6.2618,  -7.6858,  -5.3495,  -6.4865,  -4.9543,
        -12.7617,  -6.0345,  -6.8547,  -5.9223,  -6.8286,  -5.0405],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2441, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.1685,  -5.8875,  -8.3576,  -4.3569,  -3.8905,  -7.3363,  -7.4674,
         -5.2403,  -5.6155,  -4.2502, -15.5088,  -7.8979,  -7.3780,  -7.3159,
         -3.1651,  -8.3314,  -6.8826,  -5.7639,  -3.8272,  -7.3947],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.6518, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.2933,  -7.9014,  -5.4800,  -9.5017,  -6.7666, -10.5035,  -4.6649,
        -10.1819,  -9.0631,  -6.0245,  -5.7089,  -5.3693, -18.1834,  -7.9094,
         -7.4429,  -7.4543,  -4.3979,  -9.2051,  -6.5347,  -5.5361],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.8061, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.0825,  -5.7291,  -6.0142,  -3.4841,  -4.4380,  -5.5906,  -7.1234,
         -6.1185,  -4.7682,  -4.2139,  -3.8049,  -6.9853,  -7.7190,  -5.7818,
         -4.9213,  -6.9045, -11.9426,  -7.7438,  -6.2859,  -7.5464],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1599, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5261, -4.2015, -3.9614, -5.3630, -7.5221, -5.8987, -3.3188, -4.0332,
        -3.4142, -4.5540, -8.1462, -5.8215, -5.6144, -4.6585, -3.5721, -4.1041,
        -5.8252, -6.5900, -5.7577, -5.9440], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1913, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.9225,  -3.5680, -11.1417,  -6.8705,  -5.5980,  -5.5412,  -7.0962,
         -6.1912,  -8.2230,  -5.8710,  -6.0616,  -4.1867, -17.9775,  -7.0407,
         -6.2199,  -7.1097,  -4.9755,  -9.7679,  -7.2733,  -5.1743],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.1905, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3394, -6.9960, -6.6975, -6.3225, -4.5306, -4.6636, -3.0631, -5.2184,
        -7.1575, -5.5976, -5.1038, -6.3027, -3.0236, -4.9329, -7.1257, -6.5117,
        -4.6838, -3.4017, -4.6001, -4.6121], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2442, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-12.0597,  -5.9737,  -7.1899,  -6.0427,  -7.4994,  -3.7644,  -9.9118,
         -6.5202,  -7.0617,  -4.6732,  -2.9867,  -3.6000,  -6.5900,  -7.0420,
         -6.0185,  -4.7319,  -3.4617,  -2.7959,  -3.6426,  -7.2262],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9396, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.8240,  -5.3965,  -6.1041,  -6.5122, -21.7191,  -6.6207,  -7.4991,
         -8.2914,  -5.9146,  -7.0999,  -6.0768,  -6.3401,  -3.6759,  -6.3211,
         -2.5236,  -8.0584,  -6.2024,  -4.9772,  -4.5890, -13.5834],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.2665, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9066, -5.3250, -5.5299, -4.8065, -6.9720, -7.9907, -5.7853, -5.4429,
        -4.3027, -9.3980, -6.1730, -6.8658, -6.5820, -7.3896, -5.4765, -9.3412,
        -7.1222, -5.2912, -3.9625, -3.1935], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1428, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.6909,  -4.0191,  -6.0534,  -6.9001,  -6.4438,  -5.0874,  -6.1687,
         -6.3779, -10.9519,  -6.2185,  -7.5656,  -4.0207,  -8.0912,  -5.9838,
         -4.9353,  -4.0799,  -5.2082,  -3.2867,  -7.2286,  -6.2472],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0279, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9672, -4.0014, -5.0189, -7.8415, -6.2988, -4.0942, -4.1041, -5.5840,
        -3.6667, -7.1653, -6.6607, -6.0164, -6.1538, -4.0388, -2.7366, -5.7441,
        -6.9246, -5.9782, -4.9037, -2.9634], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2431, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7424, -5.4298, -7.6664, -6.5045, -5.3947, -4.7389, -6.1939, -6.5858,
        -7.0612, -5.6437, -4.2502, -3.8808, -5.1717, -4.2448, -5.8983, -6.9523,
        -6.0086, -5.0301, -3.3378, -4.3952], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5066, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5832, -4.6645, -5.0163, -7.2534, -6.1455, -5.5075, -5.0924, -9.0043,
        -4.7297, -5.3350, -7.3598, -6.4256, -6.2648, -5.3570, -3.8329, -4.1846,
        -6.5882, -6.5110, -5.4206, -5.2943], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.7285, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.6782,  -6.1691, -20.8224,  -5.8259,  -8.0473,  -3.2698,  -6.7733,
         -6.5041,  -5.0418,  -6.3255,  -4.5027,  -3.1588,  -4.5201,  -7.7982,
         -6.6274,  -6.5940,  -4.4759,  -2.8240,  -8.0026,  -7.8054],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5883, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.9659,  -4.1043,  -3.0866,  -5.2838,  -7.2814,  -6.1133,  -3.2127,
         -4.3548,  -4.2045,  -8.8513,  -7.7005,  -5.3674,  -5.0915,  -6.2895,
        -11.9112,  -7.2254,  -6.6397,  -7.4312,  -4.5079,  -8.0919],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1357, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.9552,  -3.8168,  -7.4365,  -3.9295,  -7.5732,  -5.2337,  -6.0809,
         -4.2834, -12.7240,  -7.4120,  -6.5996,  -7.5183,  -4.3708,  -8.3615,
         -6.1723,  -5.9890,  -4.5025,  -3.0623,  -6.7398,  -7.9263],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2344, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3155, -5.7184, -4.8102, -3.3350, -7.9370, -7.2787, -5.3794, -5.2702,
        -4.9085, -5.7187, -5.5643, -8.4053, -6.3046, -7.9538, -3.4554, -9.6468,
        -6.5135, -5.6151, -4.7580, -6.0620], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9975, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3787, -2.6565, -4.7000, -7.5862, -5.4057, -5.1038, -5.6981, -5.3692,
        -3.9350, -6.5499, -6.9681, -6.2663, -5.4249, -3.8171, -3.0053, -4.6074,
        -7.1692, -5.3365, -5.2475, -4.2612], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1243, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1770, -3.4285, -4.8159, -3.1020, -7.3820, -6.1106, -7.4597, -4.8508,
        -2.8840, -3.6721, -8.3904, -6.1042, -4.0480, -6.3788, -3.5892, -4.8641,
        -6.8409, -7.5689, -6.3219, -3.5709], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.3780, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0986, -5.6409, -5.2053, -4.1543, -2.9594, -5.4419, -7.6960, -6.0286,
        -5.5623, -2.7024, -3.1137, -6.5922, -7.4678, -6.5366, -6.2478, -4.0710,
        -3.2705, -4.9068, -7.5814, -5.6422], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.3960, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0307, -7.7291, -6.7006, -5.0210, -3.9198, -4.5986, -4.6314, -6.3140,
        -7.2519, -6.1022, -5.3485, -5.6088, -5.4741, -3.5530, -6.5100, -6.5444,
        -6.1200, -4.0249, -2.9478, -2.9924], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2712, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7243, -2.5115, -4.1012, -7.3787, -6.5762, -5.6804, -4.2785, -5.0317,
        -8.8090, -6.9768, -7.3605, -4.5839, -7.5704, -5.4323, -5.2244, -4.6246,
        -3.6197, -3.8751, -6.1234, -6.9795], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5231, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-12.9414,  -6.2571,  -5.8401,  -7.6182,  -6.7506,  -6.3231,  -4.9611,
         -3.7039,  -5.0670,  -7.8519,  -5.3337,  -4.2389,  -8.1267, -12.0986,
         -7.6439,  -8.3821,  -4.6505,  -8.5581,  -6.7273,  -5.8135],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.9444, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.7684,  -7.4996,  -7.1923,  -7.2777,  -4.1630,  -9.6059,  -6.3118,
         -6.3992,  -5.4902,  -3.2250,  -5.0970,  -7.3795,  -5.9288,  -6.3533,
         -6.4052,  -3.6794,  -4.9444,  -7.6281,  -6.7079,  -5.2924],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3675, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2163, -3.8157, -2.9337, -3.4992, -7.0303, -5.8627, -4.6235, -5.8082,
        -4.1580, -4.7477, -7.3849, -6.7850, -5.2552, -4.1379, -3.8752, -4.8589,
        -7.9533, -5.4772, -5.4714, -4.1402], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1517, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1568, -6.2839, -6.2490, -3.5967, -2.8660, -4.2458, -6.9862, -5.9058,
        -3.9104, -3.9173, -2.5695, -4.1625, -7.8244, -5.8204, -5.1394, -3.7738,
        -3.8432, -3.6203, -6.3306, -7.1462], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0674, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9082, -2.7001, -7.7717, -6.1298, -5.4849, -5.9309, -3.6406, -3.2672,
        -5.8969, -6.9324, -6.1149, -5.0334, -3.7085, -5.1385, -4.7299, -7.1014,
        -5.6426, -5.4871, -4.3169, -4.1519], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2044, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9159, -5.2770, -6.5098, -6.3256, -5.8459, -4.4305, -4.0013, -3.7520,
        -4.6891, -5.9336, -6.6271, -6.3732, -4.2400, -4.0163, -6.3500, -4.3970,
        -5.7822, -6.5891, -5.9528, -4.9771], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2993, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2068, -3.2143, -2.1711, -6.1838, -7.2201, -6.3909, -5.7877, -4.4582,
        -3.5877, -4.3129, -6.6105, -6.4441, -6.1549, -5.0611, -4.3188, -2.5056,
        -3.7132, -7.5625, -6.2554, -6.0201], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1590, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3451, -3.4761, -4.5973, -6.8007, -5.7546, -5.0990, -3.4917, -3.3546,
        -4.5712, -7.7591, -6.4048, -3.7187, -3.8000, -4.1869, -6.5733, -7.6660,
        -5.7562, -5.0761, -3.4990, -4.4953], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9713, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8827, -6.2967, -4.0216, -4.6136, -3.9064, -5.5452, -6.9034, -5.3459,
        -4.5504, -4.1839, -2.6741, -4.2776, -6.9049, -6.9268, -6.7578, -4.9059,
        -3.5989, -3.0463, -3.5663, -6.5322], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0220, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1770, -3.8178, -3.6675, -3.6299, -6.8711, -5.6898, -5.0718, -4.3848,
        -4.5169, -3.6734, -6.0792, -6.8047, -5.7044, -4.4860, -4.1204, -5.7005,
        -4.4650, -6.1250, -7.2717, -6.0229], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1140, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.2724,  -3.4856,  -2.8601,  -4.3990,  -6.7755,  -5.9889,  -3.7328,
         -3.8173,  -2.6196,  -7.6230,  -7.8411,  -5.2464,  -5.2593,  -5.8344,
        -12.5908,  -8.2235,  -6.1656,  -7.3323,  -4.5131,  -8.0800],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8830, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0182, -6.2694, -4.0696, -2.6916, -4.4509, -4.2054, -7.0596, -4.9539,
        -5.1600, -5.0192, -4.7062, -4.5935, -5.5008, -6.9405, -6.0323, -4.6050,
        -3.6412, -5.6514, -2.8858, -6.4045], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0930, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3958, -7.3884, -4.0309, -3.8781, -4.5111, -5.9263, -7.1088, -6.1942,
        -3.6439, -3.3689, -2.3049, -4.3831, -7.2704, -5.5337, -4.7582, -2.9800,
        -4.7599, -4.4556, -6.6899, -7.1571], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0870, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1643, -8.0440, -5.4096, -4.9154, -5.1557, -3.6362, -4.9547, -6.8766,
        -7.4538, -5.8127, -6.5796, -4.6931, -8.1958, -3.3676, -6.5696, -6.6327,
        -5.6379, -4.7174, -3.7823, -3.9379], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.6269, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.1761,  -5.7928, -25.4163,  -5.5431,  -7.6087,  -3.8736,  -8.0805,
         -5.9951,  -4.6656,  -4.4773,  -7.7617,  -3.7801,  -4.6848,  -6.8216,
         -6.1307,  -6.9363,  -3.7322,  -3.3000,  -4.5614,  -6.4893],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5414, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9829, -3.6016, -3.9360, -4.9902, -2.6327, -5.5473, -6.6099, -5.2351,
        -5.8531, -9.0547, -2.6617, -4.6456, -5.4825, -6.0424, -5.9215, -4.5274,
        -6.6779, -4.0935, -3.6113, -6.1814], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1644, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0022, -3.3613, -4.1552, -6.8635, -7.2668, -6.7019, -3.9842, -2.8867,
        -3.1647, -6.1364, -7.1070, -5.2769, -4.5022, -3.9781, -4.7840, -4.8612,
        -6.4518, -6.6873, -5.4388, -4.7113], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0661, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3380, -3.1060, -5.0473, -7.8761, -5.3335, -4.0217, -3.5550, -6.2238,
        -3.0049, -6.5816, -7.1546, -5.0894, -5.0829, -4.0503, -3.9505, -4.2638,
        -5.2381, -6.8631, -6.2235, -4.1859], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0095, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2066, -3.8889, -2.2541, -5.0878, -7.4354, -4.8651, -4.3115, -3.8815,
        -4.1651, -4.4658, -6.3303, -6.9680, -5.3361, -4.2829, -5.6275, -2.6138,
        -4.0953, -6.4059, -6.9147, -6.5379], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9837, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-12.7879,  -5.9813,  -6.5938,  -7.1763,  -7.1145,  -7.4409,  -4.1507,
         -8.9459,  -6.1654,  -5.9768,  -4.6414,  -2.8488,  -6.5458,  -7.4936,
         -5.0023,  -5.2035,  -7.4375, -18.3117,  -6.5857,  -6.8748],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.1639, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2389, -7.6632, -5.6341, -5.6237, -3.5065, -4.2234, -3.1385, -6.8451,
        -7.1512, -5.4607, -5.9230, -3.1706, -2.1544, -4.8207, -7.0007, -6.2520,
        -7.0655, -3.6998, -4.0950, -3.9319], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0299, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6204, -2.8079, -3.1684, -4.2104, -7.6043, -5.0077, -4.9452, -4.1216,
        -9.9625, -4.9820, -5.6708, -6.7140, -5.6507, -4.6880, -3.8662, -4.3295,
        -3.5653, -6.8299, -6.7265, -5.5668], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2519, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0592, -5.6381, -6.3855, -5.6694, -5.1270, -3.6704, -1.8884, -4.9518,
        -6.8532, -5.4274, -5.0636, -4.3691, -7.8402, -5.1502, -6.4584, -7.5340,
        -6.6115, -6.2162, -4.5891, -2.9008], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2702, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8111, -4.8652, -3.2270, -7.3001, -5.2515, -5.5933, -3.9142, -2.5293,
        -4.1139, -6.8645, -7.4123, -6.3617, -6.4021, -2.9098, -2.2481, -6.4648,
        -7.1147, -6.1696, -5.0615, -3.3539], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9984, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9180, -3.8287, -5.9363, -4.2733, -3.3467, -6.6670, -6.6161, -5.3823,
        -4.4139, -4.4856, -2.6520, -3.9393, -7.4091, -5.5192, -4.8715, -2.9439,
        -3.4700, -7.1766, -7.7506, -5.4716], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1036, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5612, -3.4683, -7.0017, -5.2557, -4.2329, -4.2670, -7.8728, -2.9600,
        -6.1713, -7.2282, -5.0973, -5.2848, -4.1305, -2.7854, -3.9030, -5.7041,
        -6.6609, -5.5984, -4.8471, -3.3292], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9180, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0219, -7.1217, -7.4033, -6.6330, -5.4509, -4.4664, -6.0164, -3.8658,
        -6.7501, -6.6064, -5.6119, -5.5603, -3.1910, -2.9317, -5.4760, -7.6480,
        -5.4662, -6.0479, -4.7022, -4.3874], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5179, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8873, -4.6665, -4.5887, -3.1149, -6.0854, -6.6677, -5.9306, -4.3930,
        -3.6475, -2.7010, -5.2737, -7.3701, -5.6248, -4.0526, -4.0013, -6.6998,
        -2.2853, -6.9201, -6.2800, -5.5196], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0355, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6670, -4.8439, -3.6723, -5.9571, -4.9430, -6.2633, -6.7221, -5.9262,
        -3.9467, -3.4376, -2.1572, -4.6325, -6.8688, -5.9045, -6.6048, -3.3765,
        -1.9795, -8.0668, -7.7245, -5.2961], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1995, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.7399,  -5.9941,  -4.4885,  -3.9412,  -5.7133,  -4.1002,  -6.4834,
         -6.5288,  -5.6674,  -3.6715,  -3.1752,  -2.8311,  -6.2415,  -7.6386,
         -4.9709,  -4.8713,  -8.6422, -14.3403,  -6.5404,  -6.4869],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9533, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9458, -6.5411, -6.1722, -3.2569, -9.3636, -6.0625, -4.4727, -4.4196,
        -4.6528, -2.9617, -5.6439, -6.7217, -5.1630, -5.4223, -3.8004, -3.1761,
        -3.7480, -6.7442, -6.7435, -6.1611], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.4087, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5963, -6.8492, -5.2564, -5.6923, -3.3367, -4.2145, -3.7796, -6.8475,
        -6.8629, -6.1891, -3.7878, -2.9495, -2.4638, -4.0767, -6.6692, -5.5068,
        -4.4928, -4.0219, -1.7848, -5.0185], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8198, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.3792,  -7.5628,  -7.4279,  -3.9764,  -8.0693,  -6.3946,  -7.0863,
         -4.3576,  -2.4358,  -8.1246,  -7.6724,  -5.0019,  -5.9348,  -6.2326,
        -17.1001,  -7.4599,  -8.2123,  -4.7601,  -7.4411,  -5.9537],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.9292, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4583, -3.4171, -4.5702, -7.4489, -5.9488, -5.1648, -4.7492, -4.6015,
        -4.1427, -6.4189, -6.4348, -5.9949, -3.9836, -3.4662, -4.5671, -6.2084,
        -7.3625, -6.1824, -4.7925, -5.1851], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2549, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7723,  -2.6908,  -4.9126,  -7.4281,  -4.6568,  -5.0450,  -3.3999,
         -4.3306,  -4.9894,  -6.0308,  -7.0766,  -5.4066,  -4.5856,  -3.1971,
         -3.0837,  -6.7883,  -7.4928,  -4.6260,  -5.6550, -10.1318],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2150, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.6071,  -8.2187,  -5.7651,  -5.1642,  -4.8301,  -3.5905,  -5.7508,
         -7.5648,  -5.8667,  -6.0385,  -5.1254, -11.7109,  -5.4983,  -7.8779,
         -3.6188,  -8.0360,  -6.4480,  -5.0645,  -4.5634,  -3.2945],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9317, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.9127,  -4.7365,  -8.0082,  -5.9296,  -5.6021,  -4.0926,  -3.3143,
         -4.5847,  -7.2858,  -6.2590,  -4.6209,  -4.1266,  -4.2009,  -5.2630,
         -7.0408,  -5.6562,  -6.4473,  -3.9061,  -7.6242, -12.4110],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9511, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6039, -3.3914, -3.3687, -1.9752, -5.6063, -7.0884, -5.3430, -4.2232,
        -3.6316, -4.9850, -4.0323, -7.4349, -5.0623, -4.6240, -5.1520, -8.1175,
        -4.8691, -6.7296, -6.4280, -7.2086], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2438, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5050, -3.4284, -4.7672, -5.1918, -6.9630, -5.7509, -4.3439, -3.1261,
        -2.5669, -5.4312, -7.0534, -5.6950, -5.7277, -3.4490, -3.3731, -5.4707,
        -7.3851, -5.1974, -6.0540, -6.5759], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1528, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2376, -3.4070, -5.1475, -6.6505, -6.7925, -5.3672, -4.8478, -3.9743,
        -7.4150, -3.2435, -6.6415, -6.5224, -5.7724, -4.0458, -4.3623, -2.6319,
        -5.6476, -6.2898, -6.6581, -5.5932], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2624, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3673, -4.1634, -4.1917, -7.5477, -6.5111, -4.5538, -3.7869, -3.8522,
        -2.4673, -7.0685, -5.6826, -4.1170, -4.0485, -5.2631, -4.2056, -4.7403,
        -6.3435, -5.7880, -5.3001, -4.0798], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0039, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2735, -6.4573, -6.0616, -5.3458, -6.3893, -4.2749, -6.5118, -7.7808,
        -5.8810, -6.4029, -6.1741, -3.9187, -5.4049, -7.3563, -5.7190, -5.6328,
        -3.2255, -3.0799, -5.1570, -6.8966], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5972, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0619, -7.4746, -5.9176, -4.2163, -2.8839, -2.2041, -4.0479, -6.9502,
        -4.9569, -4.5812, -2.9996, -2.6905, -4.3665, -7.4230, -5.6968, -3.4776,
        -4.2852, -4.0489, -5.1784, -7.8310], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9146, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7106, -4.0086, -7.6451, -5.0562, -5.1163, -3.8944, -3.7319, -4.9377,
        -5.8689, -6.9184, -5.6250, -3.6054, -3.7411, -3.2253, -3.8900, -7.3018,
        -4.9328, -5.1063, -7.4985, -5.3978], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0106, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.8374,  -6.3981,  -6.2493,  -8.6457,  -3.7687,  -6.3806,  -7.4661,
         -6.0541,  -6.6292,  -4.2475,  -5.1104,  -9.0299,  -5.8828,  -4.7199,
         -7.2603, -13.2617,  -7.1409,  -5.8962,  -8.5283,  -4.3390],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.6923, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.8635,  -5.7882,  -5.4820,  -5.1359,  -8.8076,  -8.2319,  -5.3613,
         -7.6998, -10.4317,  -7.2262,  -6.1679,  -7.1051,  -6.1259,  -7.7749,
         -6.9039,  -4.4440,  -5.2277, -12.2007,  -6.2816,  -6.9367],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.0098, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-19.8351,  -4.8560,  -7.5452,  -4.6743,  -7.3534,  -5.4478,  -5.3792,
         -3.9700,  -7.4557,  -4.9575,  -6.9146,  -6.8399,  -5.8190,  -6.2948,
         -4.2667,  -3.1828,  -7.7734,  -7.7924,  -5.3532,  -5.0081],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5360, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.4908,  -7.8472,  -4.7407,  -2.1287,  -7.6821,  -7.8330,  -5.2120,
         -4.2758,  -7.4879, -16.0630,  -5.8420,  -8.0554,  -4.4685,  -6.2713,
         -7.3478,  -5.6277,  -4.0900,  -3.6459,  -3.0490,  -8.0595],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3109, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.1761, -5.9819, -2.9781, -3.6016, -3.3750, -4.9078, -6.9788, -5.1650,
        -4.5403, -5.7681, -2.9288, -3.6291, -7.0065, -7.1932, -6.6299, -5.4555,
        -5.9076, -2.1595, -5.8188, -7.6239], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2913, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.1584,  -3.2615,  -7.4437,  -7.4993,  -6.4170,  -6.2941,  -4.6639,
        -10.8353,  -7.5317,  -6.9156,  -7.1567,  -3.6538,  -8.6025,  -5.6877,
         -4.2897,  -5.3384,  -4.0279,  -3.4296,  -5.9273,  -6.9230],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0028, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9697, -5.7143, -4.9368, -9.8815, -6.6845, -7.2507, -6.8610, -4.6639,
        -7.5617, -6.0482, -5.2544, -4.3150, -3.5323, -6.4245, -7.3788, -5.1617,
        -5.4823, -3.7020, -2.8334, -4.0715], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.6364, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9338, -3.4953, -4.0050, -2.8170, -5.2552, -7.6398, -5.3844, -4.3010,
        -5.6673, -5.7879, -4.8595, -4.6880, -6.8384, -5.5924, -3.9245, -3.8050,
        -6.1828, -5.3263, -7.7574, -5.5075], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2384, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.4865,  -4.2614,  -4.2014,  -7.9562,  -5.6941,  -4.0499,  -4.1584,
         -3.2615,  -7.4437,  -7.4993,  -6.4170,  -6.2941,  -4.6639, -10.8353,
         -7.5317,  -6.9156,  -7.1567,  -3.6538,  -8.6025,  -5.6877],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1385, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.1099,  -7.6882,  -4.8902,  -4.6529,  -3.4019,  -3.8081,  -3.0060,
         -5.4380,  -6.3630,  -5.3562,  -5.9578, -10.0628,  -2.8034,  -5.6608,
         -7.0486,  -5.8063,  -4.7354,  -4.2989,  -3.0548,  -6.3243],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2234, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.6484,  -4.6124,  -7.8808,  -3.4385,  -4.9386,  -7.2369,  -7.1682,
         -5.8325,  -7.0894,  -5.7789, -19.8411,  -5.0475,  -6.5897,  -4.4279,
         -8.5948,  -6.4679,  -4.5922,  -5.7343,  -6.4301,  -9.5594],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.7455, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7136, -3.5398, -7.4939, -5.9405, -5.1553, -4.4272, -3.6236, -3.9243,
        -8.4330, -5.6134, -4.9544, -5.1204, -6.4435, -4.1516, -5.5403, -6.6490,
        -5.2291, -7.9958, -3.8995, -2.6285], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.4238, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.0848,  -4.0783,  -6.5064,  -7.4392,  -7.1220,  -7.7375,  -3.7961,
         -2.6476,  -5.3665,  -6.3051,  -5.1304,  -7.0500,  -3.6951, -19.4771,
         -6.2530,  -7.0921,  -6.6055,  -2.3260,  -9.0809,  -6.1807],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3487, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4351, -4.4697, -4.4816, -4.2681, -5.4046, -8.2827, -4.9832, -7.1529,
        -5.3208, -3.1061, -9.1956, -7.3006, -7.6591, -5.3401, -8.5983, -6.4773,
        -6.6083, -4.1394, -3.6511, -5.3272], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9101, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7234, -4.6666, -6.4289, -6.5977, -5.3914, -4.4627, -3.2813, -1.8119,
        -5.1323, -7.2970, -5.4065, -4.3110, -3.7993, -3.0819, -4.9287, -7.2083,
        -5.9843, -5.4100, -4.2931, -3.0225], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8119, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3596,  -5.0338,  -3.9844,  -7.7365,  -5.2598,  -4.9246,  -4.0926,
        -14.0569,  -6.6616,  -6.5935,  -7.6444,  -4.7082,  -7.8812,  -6.3268,
         -5.2501,  -3.9490,  -2.7634,  -8.3549,  -7.3445,  -4.6864],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0306, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1088, -2.6031, -6.3823, -7.5929, -5.0690, -5.1658, -4.5775, -3.3781,
        -3.7332, -7.3263, -5.3729, -4.9564, -3.5046, -2.3462, -4.4324, -7.2692,
        -5.9101, -6.5021, -2.8556, -6.1097], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9098, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7015, -5.8765, -4.2248, -3.8319, -3.1469, -5.6824, -7.1593, -6.1157,
        -4.2187, -4.5045, -2.2684, -4.8615, -6.8810, -5.4172, -3.8088, -4.0850,
        -2.4106, -6.2051, -7.5809, -5.6593], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0320, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.4354,  -4.3037,  -4.1749,  -4.5328,  -7.1774,  -5.4495,  -5.9056,
         -4.0841, -14.7201,  -7.5336,  -7.6309,  -7.2326,  -4.1454,  -8.0326,
         -5.8452,  -5.4943,  -3.7007,  -3.5806,  -7.7679,  -7.7016],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2224, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.6597, -5.5842, -6.0232, -3.3870, -2.5717, -4.7859, -7.0104, -5.3665,
        -4.5431, -3.4043, -3.5023, -4.8826, -7.7982, -5.8809, -4.2486, -3.2632,
        -3.4767, -4.6321, -7.6997, -5.5373], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0629, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3136, -5.4925, -4.7354, -4.6751, -2.5946, -4.8477, -6.2897, -5.3338,
        -4.5635, -4.3129, -3.5960, -4.6235, -5.9063, -6.4590, -5.8020, -6.0941,
        -3.3287, -3.4289, -4.0211, -7.8982], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0658, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4460, -8.3666, -7.1739, -6.1097, -4.7365, -4.5976, -3.8269, -5.8865,
        -7.3360, -6.0681, -4.5113, -4.3793, -4.1523, -5.0533, -7.7404, -6.0172,
        -5.6949, -4.1983, -3.6039, -6.4674], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.4683, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8844, -3.6952, -2.6341, -4.8146, -7.5681, -5.2369, -4.0508, -3.0314,
        -5.2137, -4.8311, -5.5595, -6.3879, -5.8717, -4.0290, -3.3394, -2.4643,
        -6.2284, -7.1232, -5.6041, -5.4890], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9028, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7467, -5.3125, -7.4168, -7.6566, -6.8922, -8.2583, -5.2804, -7.8259,
        -6.0313, -7.9400, -6.4808, -5.8015, -6.5647, -6.5075, -7.0590, -4.6417,
        -4.3772, -4.8501, -5.9002, -7.2065], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3875, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.8340,  -5.9972,  -6.4479,  -2.7761, -22.6242,  -6.5069,  -8.5324,
         -3.2866,  -8.8302,  -6.3253,  -4.9540,  -5.6546,  -8.8025,  -3.9559,
         -4.7122,  -5.8174,  -6.7547,  -5.1843,  -4.1167,  -3.8072],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.6460, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3329, -3.5154, -7.5519, -3.5322, -7.1042, -5.3306, -4.6856, -3.6535,
        -2.5003, -3.0684, -6.0265, -6.1898, -4.9764, -4.7792, -2.9173, -2.6621,
        -4.8546, -6.9230, -6.0112, -6.0922], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8354, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2456, -7.7413, -5.7282, -4.7706, -3.2556, -4.4088, -6.0545, -7.6333,
        -6.0010, -4.2320, -3.4547, -6.6281, -2.5631, -6.2682, -7.0779, -5.6926,
        -4.4136, -4.3666, -5.1777, -2.9701], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0842, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4173, -4.5289, -4.0073, -2.2666, -3.3387, -6.4571, -6.6543, -5.7863,
        -3.7356, -3.5116, -3.4487, -2.8755, -6.8605, -6.9295, -5.9323, -4.6164,
        -5.4243, -3.3467, -3.8090, -6.3432], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7645, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8897, -8.1429, -3.8305, -8.5332, -5.9291, -5.5528, -4.1695, -5.7547,
        -2.7319, -6.7569, -7.1414, -5.6604, -8.4010, -4.0273, -3.5538, -4.1375,
        -4.5189, -6.6588, -5.1391, -5.6936], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5112, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.9269,  -7.0658,  -5.7456,  -3.3631,  -3.2899,  -2.6478,  -9.1759,
         -7.2582,  -5.2626,  -5.3335,  -4.6864, -13.9999,  -5.2628,  -6.9237,
         -6.6641,  -7.8565,  -3.7154, -10.0637,  -5.5536,  -5.3750],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2085, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4294, -5.5251, -5.2446, -5.4460, -4.7115, -6.0651, -7.4855, -6.3075,
        -4.4412, -3.8086, -2.7854, -5.0109, -7.5924, -4.9458, -5.5108, -5.8396,
        -2.7567, -3.2752, -6.8736, -5.9096], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2982, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2548, -5.8256, -7.1384, -5.5239, -5.1678, -4.8270, -2.8392, -4.8847,
        -7.3666, -5.7806, -3.8181, -3.5578, -2.7050, -5.2548, -6.7487, -5.3388,
        -4.4476, -3.3303, -3.6651, -3.9021], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7689, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2438, -5.7579, -3.7133, -3.9963, -3.5099, -3.2576, -7.8566, -5.5060,
        -5.2196, -7.5884, -6.1155, -4.8948, -5.8191, -6.7418, -5.9815, -3.6314,
        -4.0133, -2.6870, -5.8223, -7.5436], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.3450, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7729, -7.6853, -5.5611, -4.9776, -4.5202, -3.4475, -4.5241, -7.4430,
        -5.5168, -5.6671, -3.1481, -5.1139, -5.7501, -7.4049, -6.8685, -5.6699,
        -6.7352, -7.3927, -7.0970, -7.3353], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.8316, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-13.8875,  -6.7702,  -9.4235,  -3.4792,  -8.8557,  -6.7201,  -5.6261,
         -4.3584,  -3.3679,  -5.0879,  -7.7064,  -5.5020,  -4.6094,  -3.5034,
         -3.5897,  -4.6372,  -6.7408,  -7.2266,  -6.1695,  -6.3372],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1799, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6116, -5.6085, -4.0820, -3.7694, -2.5249, -5.3331, -7.1169, -5.4648,
        -4.6611, -5.9215, -3.6875, -5.7529, -7.5360, -5.8814, -3.7497, -3.5251,
        -4.0748, -5.4045, -7.4434, -5.1766], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1663, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.1682,  -5.6651,  -4.5308,  -3.9993,  -6.8567,  -5.9638,  -4.4414,
         -5.4465,  -3.1953, -13.8282,  -6.7116,  -4.5473,  -7.2804, -18.1993,
         -5.9611,  -7.6103,  -6.8590,  -7.2247,  -3.4324,  -4.9375],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5929, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4196, -5.1738, -7.4866, -5.3567, -3.5014, -3.0794, -5.4639, -4.7398,
        -7.6951, -5.6114, -4.8109, -3.4602, -3.7592, -3.6289, -6.3800, -7.2488,
        -5.7673, -4.7152, -4.7695, -3.6929], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9380, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7072, -5.5756, -4.0179, -4.5981, -4.4855, -2.2361, -5.7553, -6.8247,
        -5.1956, -5.1055, -5.0981, -3.1379, -5.6371, -8.2420, -6.4084, -6.7050,
        -4.7742, -5.0612, -6.0721, -7.5895], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.4613, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.7878,  -5.7438,  -4.3264,  -3.8351,  -2.0660,  -4.8231,  -7.4674,
         -5.3097,  -5.1847,  -3.3435,  -2.2024,  -3.8346,  -7.0335,  -5.2259,
         -5.1312,  -3.5601, -10.4338,  -3.4185,  -6.4436,  -6.5833],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1377, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3870, -4.8359, -7.5317, -5.7468, -4.8301, -3.8059, -4.0882, -6.6167,
        -7.7451, -5.3707, -5.3759, -4.4342, -2.8558, -6.1591, -7.4968, -5.5283,
        -4.4818, -4.3961, -5.0038, -4.4801], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1585, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8321, -5.8181, -6.9553, -5.2964, -4.7141, -3.1191, -2.4181, -4.3617,
        -6.8021, -6.1267, -5.6118, -3.0777, -3.4338, -3.9483, -6.0303, -7.3172,
        -6.5228, -8.7661, -3.9584, -2.0014], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9556, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8444, -2.1878, -4.8926, -6.6733, -5.4864, -4.3435, -2.8165, -4.3889,
        -5.3640, -6.6697, -5.3917, -4.3146, -4.6130, -2.8268, -4.5941, -7.7931,
        -5.8221, -5.4081, -5.0950, -2.4439], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7985, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2222, -3.3893, -6.8943, -6.4675, -5.9684, -4.3511, -6.2956, -2.8076,
        -2.9501, -7.0513, -6.0483, -5.3615, -3.7833, -3.7101, -4.2400, -6.2702,
        -6.7498, -5.9926, -3.7262, -3.5354], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9907, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.3036,  -7.3099,  -5.4878,  -5.3331,  -3.2871, -14.3241,  -6.5133,
         -7.4598,  -7.5591,  -5.3706,  -8.5682,  -6.3615,  -7.0038,  -4.9639,
         -2.0949,  -3.9584,  -7.9480,  -5.4220,  -5.2037,  -6.8518],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4162, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.8749,  -7.4738,  -4.9045,  -5.9988,  -5.0646,  -8.5457,  -4.4871,
        -10.2069,  -6.8051,  -6.7630,  -5.3550,  -3.1602,  -4.2973,  -8.0066,
         -4.7884,  -5.5216,  -6.5542,  -5.7871,  -5.0760,  -7.6438],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0657, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0513, -4.6685, -7.1398, -5.4610, -4.4443, -4.3120, -3.3348, -3.1250,
        -6.5288, -6.4708, -5.5527, -5.4930, -3.2685, -2.6502, -5.3835, -7.5117,
        -5.3320, -5.8830, -6.2422, -4.8451], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0349, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3336, -7.5811, -6.4932, -6.0673, -4.0804, -3.8946, -6.3863, -7.5052,
        -5.1320, -5.9636, -3.0975, -3.9652, -4.5811, -6.5306, -5.1898, -5.0578,
        -5.0753, -2.9816, -4.1027, -5.7355], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2877, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2013, -5.2840, -3.7825, -4.9899, -2.5398, -6.2437, -6.9600, -5.6643,
        -5.1036, -3.8940, -3.2309, -4.3212, -7.7167, -6.0078, -4.0626, -4.2472,
        -5.4692, -8.6756, -7.4884, -5.4475], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.3165, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3168, -6.7490, -6.7068, -6.3662, -4.1761, -3.7570, -2.5473, -3.8494,
        -6.9924, -5.5795, -6.1599, -3.6107, -2.6204, -3.4015, -7.8581, -6.2046,
        -4.9609, -4.1331, -5.3580, -4.1917], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0270, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1119, -4.5066, -7.4128, -4.9882, -5.8107, -7.2078, -5.9915, -3.1178,
        -5.8770, -6.9637, -6.0824, -6.3115, -3.2069, -2.1876, -4.5230, -7.9615,
        -6.2303, -5.9452, -4.8845, -2.5237], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2422, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4075, -7.2663, -5.5238, -4.7781, -4.3850, -3.2729, -4.1436, -7.1282,
        -6.1367, -3.9579, -4.6916, -4.5726, -4.0004, -6.2543, -6.5011, -6.0475,
        -3.7336, -3.3036, -2.7908, -4.8393], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9367, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.9597,  -5.3993,  -3.9529,  -4.0349,  -2.1289,  -5.5965,  -6.9381,
         -4.8389,  -4.4936,  -3.4550,  -4.0265,  -6.0454,  -7.5365,  -9.7069,
         -7.0923, -12.9736,  -9.4511,  -7.1492,  -7.1144,  -5.4396],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2167, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7612, -7.1542, -5.6021, -5.9858, -5.8041, -6.2784, -5.8308, -6.1226,
        -3.5297, -5.6885, -7.5611, -5.9254, -5.7164, -4.3653, -4.6605, -3.9885,
        -7.4350, -5.7860, -6.2387, -3.8151], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5625, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.9166,  -3.4584,  -8.5864,  -7.4980,  -5.3105,  -4.4352,  -3.1184,
        -21.4654,  -6.0754,  -8.2359,  -3.8485,  -9.5253,  -5.8193,  -6.2534,
         -4.5917,  -3.0663,  -4.6728,  -8.1883,  -5.5634,  -5.2417],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4435, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9267, -4.1416, -6.0080, -7.6190, -5.5834, -4.1195, -3.8375, -5.0247,
        -3.5569, -5.7792, -5.9937, -5.3071, -5.5339, -3.2479, -3.0684, -4.6998,
        -6.4337, -5.8262, -5.0517, -4.9290], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9844, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0198, -5.6665, -3.5492, -5.0810, -5.9798, -7.4830, -6.4162, -3.7839,
        -4.0444, -3.3521, -4.6561, -7.2816, -5.8416, -5.5879, -4.2889, -2.5238,
        -6.8658, -7.6613, -4.8512, -5.7289], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2831, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9224, -6.0416, -4.5721, -3.9034, -2.5292, -5.6684, -6.8864, -5.4440,
        -4.2061, -3.9490, -3.5779, -4.1628, -7.8107, -6.3372, -3.7307, -3.6219,
        -6.2222, -2.9023, -5.4310, -6.4169], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0168, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.2494, -7.8073, -7.4229, -6.8963, -3.6905, -7.3380, -4.9278, -4.4575,
        -4.4563, -2.6396, -4.7990, -7.8917, -5.1946, -5.7739, -3.1874, -5.2167,
        -4.0243, -5.1897, -6.1125, -5.3105], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5793, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5129, -4.2946, -4.5968, -2.6824, -5.7363, -6.8394, -5.5994, -5.8293,
        -9.5839, -2.4691, -4.4712, -7.6703, -7.4438, -6.5162, -7.9887, -4.5974,
        -3.8157, -5.2688, -6.6106, -4.8675], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.6197, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0512, -4.8546, -5.6991, -5.3336, -3.3569, -5.3704, -5.8844, -7.2301,
        -6.3710, -3.1733, -4.3414, -6.6200, -3.8596, -5.4042, -6.7196, -5.2043,
        -5.6285, -4.6664, -4.3587, -4.3212], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2224, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9307, -6.7849, -5.9873, -5.3512, -3.8569, -2.4386, -4.2676, -7.9484,
        -5.1975, -4.4595, -4.7396, -2.7314, -4.4082, -6.0012, -6.7963, -5.8809,
        -5.1481, -4.9967, -3.4380, -3.9897], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9176, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.7722,  -6.6615,  -6.7054,  -5.9449,  -3.6572,  -3.4088,  -3.3190,
         -5.6099,  -7.5365,  -5.2828,  -4.8216,  -4.9513, -25.6099,  -6.3475,
         -7.7468,  -3.6283,  -7.8773,  -6.4543,  -6.3521,  -4.5257],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5106, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8687, -4.1347, -3.7803, -5.6520, -7.0167, -5.7208, -4.9956, -3.1530,
        -3.1170, -3.5590, -6.7664, -6.3272, -5.4332, -4.8312, -4.8383, -3.0722,
        -5.8952, -6.3127, -6.1856, -5.4904], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0575, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4276,  -5.5463,  -6.5868,  -5.3295,  -4.6896,  -4.5879,  -2.4714,
         -4.6509,  -7.6249,  -5.0125,  -4.0603,  -3.6384,  -6.4847,  -3.6838,
         -7.9299,  -6.5680,  -4.9963,  -5.0327, -11.6805,  -5.6390],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4320, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-25.6724,  -5.2821,  -7.1747,  -7.2653,  -5.9187,  -7.6969,  -5.4040,
         -4.5855,  -3.7953,  -4.7348,  -6.6138,  -6.7254,  -5.6183,  -3.7082,
         -3.9265,  -2.6544,  -3.1775,  -6.6126,  -6.7391,  -5.9270],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4616, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3596,  -5.0338,  -3.9844,  -7.7365,  -5.2598,  -4.9246,  -4.0926,
        -14.0569,  -6.6616,  -6.5935,  -7.6444,  -4.7082,  -7.8812,  -6.3268,
         -5.2501,  -3.9490,  -2.7634,  -8.3549,  -7.3445,  -4.6864],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0306, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2560, -5.4471, -4.7977, -3.4785, -3.3041, -4.6134, -6.9088, -6.4334,
        -4.7181, -6.5539, -6.1276, -3.3818, -5.2695, -6.9260, -5.4323, -4.9979,
        -4.5530, -5.1613, -3.6055, -6.1033], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2535, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7513, -4.3020, -7.8814, -5.4065, -5.3578, -4.1558, -5.3254, -3.0561,
        -6.3121, -6.3425, -5.3895, -5.4051, -3.5277, -2.3832, -5.3395, -7.4235,
        -5.3659, -4.9539, -6.6583, -2.8848], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1111, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5196, -4.6643, -4.9490, -5.7233, -3.5633, -6.2234, -6.6875, -5.7798,
        -3.7200, -4.1311, -6.7855, -3.5024, -5.9898, -7.0021, -5.5288, -4.8511,
        -3.9954, -3.9440, -3.9589, -7.4019], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1961, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8560, -7.4944, -3.4811, -8.0471, -5.9921, -6.3054, -3.9636, -3.2369,
        -3.4539, -5.5185, -7.2095, -5.9658, -5.6126, -3.6456, -2.2905, -5.1818,
        -7.5031, -5.5827, -4.5907, -3.8699], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1901, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1756, -6.7879, -3.9066, -6.2237, -6.5497, -6.0270, -4.8022, -3.6683,
        -8.2627, -4.3363, -6.0012, -6.4179, -5.7442, -4.2663, -3.2888, -6.0573,
        -2.9567, -7.8575, -5.5461, -4.6477], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.4262, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8885, -4.9055, -7.1584, -5.3641, -4.1178, -3.8179, -2.8517, -5.9692,
        -7.5185, -5.3323, -5.4565, -6.0526, -3.5641, -5.3738, -7.4955, -5.8597,
        -3.1256, -3.8613, -4.2463, -5.1698], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1564, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1887, -4.2188, -3.0005, -3.9569, -7.3661, -7.4534, -6.3827, -6.9056,
        -5.4781, -2.2733, -4.4793, -6.2833, -5.3962, -4.3207, -3.3354, -3.5165,
        -3.0423, -6.8385, -7.2372, -5.2535], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0463, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2163, -4.0326, -2.1141, -4.1348, -7.8783, -5.2083, -4.6788, -3.6945,
        -4.9870, -5.0980, -7.5427, -5.8281, -4.1020, -3.6113, -2.9883, -5.1975,
        -6.9156, -5.2695, -4.5484, -6.5815], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8814, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6741, -7.1508, -3.5889, -9.0122, -6.0396, -4.7980, -4.1181, -3.0543,
        -4.5907, -5.6683, -6.8789, -5.4925, -3.6581, -3.5990, -2.6097, -6.4150,
        -7.1230, -5.3868, -5.1237, -6.2821], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.3632, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5444, -3.5579, -4.7104, -3.1444, -7.2524, -7.6321, -5.2790, -5.2472,
        -6.0390, -3.6688, -6.5356, -7.8172, -5.9138, -3.8077, -3.5340, -2.9509,
        -4.9467, -6.7974, -4.7761, -4.6496], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1902, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.5239,  -6.4614,  -5.5088,  -4.8493,  -3.1731,  -3.2561,  -5.8022,
         -7.6702,  -5.0967,  -6.9785,  -4.5786, -23.1284,  -6.8844,  -7.0377,
         -7.3087,  -5.7585,  -5.4021,  -7.4986,  -5.4089,  -4.9290],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.6628, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.3941,  -4.9146,  -5.0523,  -5.7681, -12.5961, -10.1776,  -8.3113,
         -6.8094,  -7.8616,  -3.9439,  -8.1702,  -6.1810,  -5.1956,  -4.9953,
         -2.1432,  -5.1975,  -7.8990,  -5.0257,  -5.2750,  -6.6938],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4803, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.8675, -3.5832, -9.0671, -5.9576, -5.9679, -3.9087, -2.8953, -3.3990,
        -6.7491, -6.4934, -6.2380, -3.6897, -2.8342, -2.2175, -4.9455, -6.7624,
        -6.0133, -4.2512, -3.2690, -3.7144], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9912, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.9420,  -5.9942,  -5.6966,  -6.2256,  -3.7738,  -3.5233,  -8.4008,
         -6.0159,  -5.6079,  -7.1139, -14.5798,  -7.7988,  -7.5846,  -7.3179,
         -3.9469,  -7.8093,  -6.0503,  -5.5523,  -4.3622,  -2.9305],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3113, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.5916,  -5.7053,  -5.0425,  -4.7728,  -7.8606,  -3.4276,  -7.3552,
         -5.8700,  -4.4780,  -4.6362, -11.3921,  -6.2401,  -6.7007,  -6.3471,
         -7.6045,  -4.8150,  -9.1832,  -6.8817,  -5.5929,  -4.4402],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2969, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3605, -3.1730, -5.3302, -6.3673, -7.4829, -6.3598, -3.7565, -4.1686,
        -3.9840, -3.4550, -6.3799, -7.0560, -5.3905, -6.1657, -2.6338, -2.5702,
        -7.2961, -7.5097, -4.6556, -5.8907], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2493, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.8681, -16.7816,  -6.4589,  -7.3142,  -7.4192,  -3.4941,  -8.7728,
         -6.3143,  -3.7691,  -4.0473,  -8.0695,  -5.3782,  -7.4711,  -4.8775,
         -4.5774,  -3.5325,  -2.8142,  -4.4957,  -5.7469,  -5.9802],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1091, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1999, -5.4660, -5.2531, -3.2335, -3.5479, -4.6716, -6.5282, -6.8425,
        -6.2448, -4.2924, -3.4899, -3.6636, -3.1662, -6.3191, -7.0250, -4.9441,
        -4.7038, -3.3461, -2.4361, -5.4850], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8929, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8385, -6.1574, -9.6953, -3.8728, -5.9683, -6.5143, -5.5656, -4.3552,
        -3.3726, -6.0629, -2.8146, -6.3407, -6.3020, -4.9838, -4.8711, -3.3067,
        -2.6286, -5.9789, -6.5824, -5.3817], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.3297, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5663, -5.5217, -3.3630, -6.1104, -6.8619, -5.9576, -4.2476, -4.4582,
        -5.1061, -4.3562, -6.5551, -6.6544, -5.4532, -4.9539, -5.1271, -2.5395,
        -4.9029, -6.9674, -4.7496, -4.6160], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1034, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4696, -4.3525, -6.7157, -6.9092, -5.6609, -5.3034, -3.0109, -2.9117,
        -4.1509, -7.7750, -4.9893, -5.0722, -5.4484, -5.9716, -4.1016, -6.4235,
        -7.2489, -5.7837, -4.9262, -4.4625], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2344, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.7099,  -5.4378,  -5.5677,  -3.7474, -11.7395,  -7.0973,  -6.7045,
         -7.0703,  -6.1685,  -4.6700,  -3.8246,  -2.3004,  -4.0296,  -7.5350,
         -5.9211,  -5.7465,  -3.1640,  -2.3048,  -4.3068,  -6.2661],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5656, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.9775,  -7.3437,  -5.5494,  -4.6320,  -3.0552,  -6.6175,  -3.3796,
         -6.8938,  -5.9346,  -5.5423,  -3.6953,  -5.0524,  -2.5959,  -7.1160,
         -5.4489,  -5.5997,  -6.4400, -22.8161,  -8.5631,  -9.4612],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5857, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7256, -5.2357, -4.9276, -5.6882, -4.5046, -7.4001, -7.2071, -6.4794,
        -7.6106, -4.1390, -7.9267, -2.8867, -7.0003, -6.6620, -5.7233, -4.4866,
        -4.7512, -7.0081, -6.3528, -7.6366], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9176, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5902, -5.2424, -4.3882, -3.9393, -2.2545, -3.5294, -6.5450, -6.9857,
        -5.2689, -4.9855, -3.3597, -2.3603, -6.6588, -7.3889, -6.0890, -5.9058,
        -3.8439, -3.4623, -4.8539, -6.4276], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0040, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.3133,  -4.5162,  -3.4262,  -3.9915,  -4.7684,  -7.7623,  -4.8212,
         -5.2340,  -4.8032, -10.7985,  -6.4512,  -8.7399,  -5.3025,  -8.7147,
         -5.8309,  -4.8541,  -5.3902, -12.5911,  -7.2079,  -7.0446],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3781, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5166, -5.0297, -6.3501, -7.2009, -5.6123, -9.7831, -4.1459, -4.3488,
        -5.1472, -7.4604, -5.7471, -5.8453, -3.3922, -3.1549, -3.1264, -7.0821,
        -5.1587, -4.9646, -4.0157, -6.2200], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.4651, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0393, -7.8682, -5.4288, -3.5126, -3.9713, -5.2917, -5.7694, -7.5940,
        -5.1755, -6.8980, -5.1064, -2.6203, -5.6596, -7.4577, -5.7055, -8.1398,
        -3.9628, -7.2167, -4.9604, -7.5629], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.7471, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.7497,  -6.2996,  -4.8327,  -4.3484,  -4.9229,  -4.0972,  -5.1120,
         -7.5218,  -5.2920, -12.3847,  -3.0370,  -2.8836,  -3.5619,  -7.1465,
         -4.9401,  -5.9162,  -3.0314,  -2.4560,  -3.3250,  -7.6025],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2731, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.6834,  -3.7812, -28.6085,  -7.3122,  -5.1591,  -7.7016,  -5.7838,
         -5.8780,  -6.0780,  -5.4595,  -5.8927,  -2.7358,  -5.9561,  -3.7199,
         -7.3786,  -5.0322,  -4.1559,  -4.1348,  -2.6577,  -4.7909],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3450, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4881, -6.9345, -5.1348, -4.1602, -3.1248, -2.1931, -4.7347, -7.2654,
        -4.8004, -7.0279, -3.2237, -2.5283, -4.2730, -7.1376, -6.0065, -4.0263,
        -3.6927, -4.0155, -4.0402, -6.0285], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8418, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0554, -7.5261, -5.1839, -5.4893, -2.9945, -5.9594, -6.4797, -7.1797,
        -6.2448, -6.5109, -4.1445, -5.4202, -3.9361, -5.9501, -7.4287, -6.1723,
        -3.6776, -3.4315, -4.6492, -6.0339], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5234, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6803, -7.3081, -5.1308, -4.6221, -5.7040, -7.5949, -6.0198, -8.5111,
        -3.8331, -4.6544, -2.7449, -6.9003, -6.2593, -5.4748, -4.6363, -3.6673,
        -3.1900, -5.0302, -7.3009, -5.0845], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.4174, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.9510,  -5.3958,  -6.4869,  -3.3874,  -2.8435,  -3.9938,  -7.7192,
         -5.5615,  -4.9473,  -4.6182,  -2.5040,  -5.8374,  -7.7964,  -4.4616,
         -5.4316,  -3.7251, -24.2355,  -6.8624,  -7.8329,  -5.8029],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3197, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0678, -5.0551, -5.4478, -3.0084, -3.1886, -3.8533, -7.4414, -5.2626,
        -5.0768, -3.0120, -5.3212, -3.8567, -6.0001, -7.1647, -4.5958, -5.5394,
        -3.7601, -4.8857, -2.6561, -6.4044], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9299, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4519, -3.7791, -3.5064, -6.4865, -7.0929, -5.0712, -3.7233, -4.1842,
        -3.7602, -2.2588, -6.8492, -6.4173, -5.2550, -5.5210, -3.5728, -2.1598,
        -5.5982, -7.6632, -4.9260, -4.5837], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7930, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0145, -4.4245, -5.5125, -8.4255, -6.2969, -7.1572, -6.4984, -7.2323,
        -6.9249, -3.1404, -5.7803, -3.7195, -8.6847, -5.8122, -8.0830, -5.5091,
        -8.2640, -5.7793, -4.7677, -5.4439], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2235, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2474, -5.0512, -3.8029, -3.5464, -6.4695, -4.2076, -5.7446, -6.1527,
        -5.1002, -7.7434, -4.4807, -6.0006, -3.2694, -5.9134, -5.5875, -4.1662,
        -6.1422, -2.1847, -6.4648, -5.0028], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1639, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4912, -3.4712, -3.9308, -4.3458, -6.8075, -6.0861, -5.2227, -4.7184,
        -3.0607, -5.3844, -2.9351, -6.5182, -6.4619, -5.2228, -5.3428, -4.6351,
        -3.7405, -3.4105, -6.4863, -6.3644], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9318, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.3181,  -3.1120,  -8.9930,  -5.3954, -10.2358,  -4.8582,  -4.4296,
        -18.7866, -11.1678,  -8.0088, -10.4157,  -8.1135,  -5.6667,  -4.9558,
         -4.7524,  -1.7839,  -2.8663,  -6.8601,  -7.1664,  -5.8171],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.0852, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0235, -2.8458, -6.6978, -7.4124, -5.4324, -4.6319, -3.0856, -4.7392,
        -4.1428, -6.0359, -6.3743, -5.3516, -4.6981, -3.0961, -2.6520, -4.5852,
        -7.1770, -5.4046, -5.8158, -2.9232], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8563, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9965, -4.7259, -3.7171, -5.7984, -6.5931, -5.2995, -4.5445, -3.1526,
        -2.2885, -5.5403, -7.5727, -4.6512, -4.4995, -3.5963, -3.0038, -4.4029,
        -7.5211, -6.0217, -3.2828, -3.8026], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7506, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7009, -2.1669, -4.2008, -7.7631, -4.6213, -5.5536, -3.8530, -3.1787,
        -3.2064, -6.3214, -6.6383, -4.9680, -4.6682, -3.6984, -3.0506, -7.4907,
        -6.8940, -4.8547, -5.0019, -2.7193], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7275, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5552, -7.1565, -5.2963, -5.1217, -5.1660, -7.1169, -6.3910, -6.8304,
        -4.1192, -6.4730, -7.3739, -6.0448, -5.6318, -6.3786, -5.5614, -4.3382,
        -8.7460, -6.7210, -3.8862, -6.0953], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0502, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3382, -6.8519, -4.9520, -4.0650, -3.3748, -3.3961, -2.3182, -6.2681,
        -6.4795, -5.4917, -3.8740, -3.0158, -6.2079, -4.9050, -5.9530, -6.3674,
        -5.2271, -4.4288, -4.0134, -7.6024], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9565, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4970, -6.9794, -5.2405, -6.3058, -3.5581, -3.0183, -5.8960, -7.1537,
        -4.8879, -5.1944, -4.4812, -2.0766, -4.2280, -7.0099, -5.5111, -4.9853,
        -3.9161, -4.9308, -4.3815, -7.5776], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1915, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8036, -7.0364, -5.8066, -3.8725, -3.8720, -3.4250, -4.4987, -6.7562,
        -5.5710, -3.0701, -4.0865, -2.4440, -5.6103, -7.4005, -4.5498, -4.3120,
        -2.9205, -4.0408, -4.0235, -6.0899], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6595, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.4433,  -6.0214,  -6.7498,  -5.6224,  -3.6301,  -2.9811,  -3.2044,
         -3.1189,  -7.2884,  -5.7484,  -3.5460,  -3.7519,  -5.8573,  -2.1088,
         -6.3433,  -7.1055,  -5.1575, -10.9122,  -3.7630,  -4.3837],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0869, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3748, -6.0125, -4.4932, -5.1018, -2.8541, -2.5001, -6.2704, -6.7375,
        -5.7159, -4.5064, -2.3863, -4.2563, -6.6600, -7.1649, -5.7200, -8.1158,
        -3.9679, -4.9122, -3.8456, -7.2346], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2915, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1992, -5.0729, -3.6078, -5.1570, -6.4124, -5.5916, -4.4670, -2.3658,
        -6.6877, -4.5049, -7.7486, -4.8603, -5.7099, -4.1137, -3.8942, -4.1705,
        -7.1423, -4.9677, -5.0854, -6.6449], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0702, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8659, -5.1979, -6.3157, -3.7225, -6.3859, -6.9326, -5.8988, -4.7469,
        -3.7065, -2.3035, -5.4694, -7.5040, -5.4223, -5.3732, -6.2750, -1.8241,
        -2.4624, -7.0630, -7.3374, -6.1345], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2471, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.2484,  -5.7488,  -4.7708,  -4.6076,  -9.4726,  -7.3327,  -4.2831,
         -7.4622,  -4.3042,  -8.6679,  -6.3479,  -4.8668,  -5.0768,  -3.5441,
         -5.7143,  -7.2587,  -5.2007,  -5.0927,  -3.9702, -19.2984],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5135, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1362, -7.2921, -6.2095, -6.9615, -2.8443, -2.8941, -4.5536, -7.4831,
        -4.9503, -3.8722, -5.1212, -5.3361, -5.4138, -6.7667, -7.8049, -5.9490,
        -8.0041, -9.5808, -7.8688, -6.5125], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.1277, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5453, -4.7073, -5.8107, -6.2562, -2.8985, -3.7515, -6.9209, -6.6904,
        -5.6997, -3.8572, -4.3518, -2.5398, -6.0815, -6.4977, -5.8759, -3.8380,
        -3.6915, -2.3787, -5.6190, -7.6165], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1314, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0677, -4.2777, -3.5645, -3.3587, -5.1806, -6.5047, -7.0363, -5.0293,
        -4.9254, -4.6449, -6.2858, -4.6520, -6.0281, -6.7996, -5.9102, -3.7741,
        -2.8289, -2.9968, -2.7542, -6.6786], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9149, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.8545,  -3.0726,  -7.9032,  -7.1069,  -4.8131,  -4.9101,  -8.2237,
         -2.6826,  -4.9383,  -7.3928,  -6.5882,  -6.9509, -10.3778,  -5.8068,
         -3.9669,  -6.0082,  -7.2435,  -5.2400,  -3.6365,  -4.7079],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7212, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6447, -5.7765, -7.3679, -3.4009, -1.8582, -6.2807, -7.1661, -5.5009,
        -4.2796, -2.5413, -2.8903, -6.8256, -7.0758, -4.9292, -4.1666, -3.0774,
        -3.7594, -3.1265, -6.9103, -6.9036], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0241, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.4652,  -3.7662,  -2.2562,  -5.4873,  -7.3042,  -4.7022,  -4.2067,
         -6.3014, -14.2394,  -6.4554,  -5.7360,  -7.1581,  -3.4644,  -8.0035,
         -5.5951,  -6.2007,  -3.5290,  -3.5170,  -5.8290,  -7.5701],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0394, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9059, -7.5713, -3.7539, -9.9607, -5.4815, -4.7885, -5.4927, -1.9700,
        -2.4149, -6.6089, -7.1018, -5.7589, -4.9648, -2.9910, -2.6094, -6.0921,
        -7.1909, -5.2830, -3.4488, -3.4530], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1921, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9669, -6.9729, -6.2560, -5.6585, -5.9643, -3.7587, -2.3005, -2.7353,
        -7.7032, -5.4866, -5.1700, -5.2930, -9.3440, -7.8157, -8.0853, -7.7209,
        -3.7844, -8.1361, -5.9498, -9.9373], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0520, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9478, -6.2860, -4.0888, -3.2998, -2.1182, -4.7715, -7.7577, -4.4946,
        -5.9586, -4.8018, -3.0495, -3.9171, -7.8184, -5.6893, -3.9922, -3.4799,
        -3.9161, -4.1068, -6.3207, -7.2241], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0019, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.0539,  -3.6862,  -7.2820,  -7.4984,  -4.9941,  -4.6277,  -4.2300,
        -24.3355,  -6.2127,  -8.0749,  -6.2052,  -5.5830,  -7.2170,  -5.0091,
         -5.0465,  -4.2605,  -2.8005,  -4.6872,  -7.3643,  -5.9515],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5060, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9975, -2.1929, -2.7418, -5.7013, -7.1354, -4.8614, -4.1756, -5.4099,
        -2.8477, -4.7850, -7.0339, -7.2673, -5.9496, -9.7601, -4.4851, -4.2456,
        -5.7833, -7.2635, -5.4949, -5.8159], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.4974, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4182, -3.6650, -4.9045, -6.6588, -4.8890, -5.4949, -2.5170, -6.8019,
        -2.7915, -5.7936, -6.3789, -5.5090, -4.7479, -3.0219, -2.8574, -5.3450,
        -7.5203, -4.8624, -3.6609, -4.1955], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7517, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.4777,  -4.2510,  -4.0113,  -6.9238,  -4.6517,  -4.0000,  -4.3145,
        -16.1542,  -5.3545,  -8.7479,  -2.9914,  -9.6844,  -5.7101,  -5.8041,
         -4.7010,  -2.9305,  -5.8014,  -7.7532,  -4.5331,  -4.2216],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8009, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1673,  -3.3897,  -2.8558,  -5.9035,  -6.7722,  -5.4747,  -5.0436,
         -3.4459,  -2.7988,  -5.0051,  -7.1574,  -4.7676,  -5.0832,  -4.5657,
         -4.9410,  -4.3895,  -5.7218,  -6.8219,  -5.7387, -11.2212],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2132, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.4081,  -6.4958,  -1.9569,  -2.6273,  -4.6362,  -7.2268,  -4.8643,
         -4.1992,  -4.3723, -12.2919,  -5.7354,  -5.9411,  -7.8040,  -4.5109,
         -8.4809,  -4.9533,  -3.2866,  -4.3168,  -7.0257,  -4.2744],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5204, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0448, -6.2392, -5.8341, -4.6738, -5.4304, -3.2876, -2.5001, -6.0351,
        -6.5883, -5.1781, -4.1553, -3.2979, -1.6948, -3.2766, -6.7136, -5.0760,
        -5.0830, -4.7486, -3.2860, -3.8009], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6472, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8521, -3.6632, -3.4160, -6.6985, -6.3491, -5.1501, -3.5696, -3.4327,
        -2.5675, -4.5178, -6.7677, -4.7903, -5.2707, -2.8932, -7.0780, -4.0072,
        -5.7818, -6.6468, -4.8761, -5.3578], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7843, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6008, -6.7163, -7.5145, -4.9372, -4.3474, -3.2541, -3.8784, -4.6054,
        -6.4342, -7.1931, -6.4808, -3.7640, -2.5622, -7.0998, -3.7806, -6.3982,
        -6.4616, -4.9105, -4.4678, -3.3738], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0390, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.0641,  -6.2302,  -4.1325,  -4.7107, -14.9175,  -7.2399, -10.2073,
         -3.1810,  -5.8755,  -6.1156,  -5.1666,  -6.4253,  -4.3486,  -3.7127,
         -6.3205,  -7.6188,  -4.7777,  -3.4271,  -3.3185,  -5.7328],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2261, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.0951,  -3.5283,  -2.1621,  -4.5059,  -7.4362,  -5.0967,  -6.1059,
         -3.0127,  -2.9779,  -5.2694,  -7.3801,  -4.7366,  -5.0081,  -6.6267,
        -32.1832,  -4.9655,  -7.6064,  -3.3348,  -8.3967,  -5.5982],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5513, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.9872,  -6.6111,  -3.3935,  -6.1848,  -6.8096,  -7.0436,  -6.1733,
        -10.4700,  -2.7882,  -4.0282,  -7.5308,  -5.8320,  -3.6044,  -3.8698,
         -2.7732,  -6.0249,  -7.8702,  -4.8659,  -3.9141,  -3.7473],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4261, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5472, -4.5400, -3.9403, -4.1833, -6.2646, -7.7334, -5.6026, -3.7591,
        -3.7020, -2.1113, -5.1479, -7.2376, -5.4472, -7.4291, -4.5072, -2.2648,
        -3.6385, -6.3259, -7.4148, -6.0934], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1445, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9496, -3.9032, -3.4679, -5.9418, -6.0827, -6.5364, -7.3133, -6.2098,
        -4.2383, -2.8190, -6.4388, -4.4602, -6.9879, -4.8149, -5.4525, -3.6430,
        -3.4267, -4.6460, -6.0262, -6.9060], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2132, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4507, -4.1906, -7.7119, -5.7980, -4.2868, -4.1538, -3.7933, -5.6464,
        -6.9960, -5.4457, -4.8412, -4.9959, -3.1794, -6.9937, -6.9253, -5.6115,
        -5.6484, -2.8869, -2.8383, -4.8241], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2109, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.6844, -17.9014,  -6.7842,  -7.6689,  -4.8573,  -7.6919,  -5.1152,
         -4.8193,  -3.2650,  -1.9359,  -5.3147,  -6.5089,  -4.4537,  -5.1556,
         -3.5536,  -3.0253,  -5.4568,  -7.8018,  -4.7471,  -5.4256],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8083, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1397, -3.7349, -3.2019, -2.2721, -3.8002, -7.3941, -5.5470, -4.8748,
        -3.6325, -5.7041, -4.0689, -6.0691, -6.9216, -5.4582, -3.7978, -3.7646,
        -4.2281, -3.4594, -7.2210, -4.8446], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7567, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9805, -4.1786, -3.9200, -5.8794, -7.0800, -5.7655, -5.3262, -3.3465,
        -3.3539, -6.3665, -6.9774, -5.0946, -5.0208, -3.9483, -3.0552, -3.8231,
        -6.4928, -6.3003, -5.2645, -4.2534], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0214, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6779, -3.4036, -2.1557, -3.6186, -6.9421, -5.2833, -4.2320, -3.4320,
        -5.0173, -2.1600, -6.5586, -6.5534, -5.1160, -4.1641, -3.1271, -2.9072,
        -4.3053, -6.5355, -6.4256, -5.5621], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5589, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4497, -5.3254, -5.2010, -3.2656, -2.2738, -4.7892, -6.4043, -4.4802,
        -4.9689, -3.3742, -2.8829, -3.6847, -6.8570, -5.8592, -3.8600, -4.1026,
        -2.7413, -2.3785, -6.3586, -6.6720], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5965, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.4909,  -7.4361,  -5.0931,  -3.6730,  -6.2742, -11.2002,  -8.3641,
         -5.4806,  -7.5288,  -4.5272,  -8.6711,  -5.8294,  -6.6838,  -4.2247,
         -2.8793,  -6.7415,  -7.3265,  -5.1293,  -4.5944,  -4.3422],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0245, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3972, -5.4255, -3.7141, -4.2946, -4.8508, -2.9379, -7.1949, -5.6283,
        -4.0206, -3.7078, -3.7721, -5.5593, -6.8427, -5.1248, -4.3948, -4.4480,
        -1.7352, -5.0325, -7.5950, -4.5998], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8638, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.8879,  -6.4970,  -4.6602,  -5.1987,  -5.3576,  -4.4606,  -4.8076,
         -5.9615,  -6.3142,  -8.1226,  -3.8172,  -9.0506,  -5.8769, -10.1068,
         -5.1103,  -3.3892,  -4.5681,  -7.4480,  -5.3321,  -4.0797],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7023, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.3339,  -6.0913, -10.0169,  -3.9736,  -7.4525,  -4.1029,  -6.9072,
         -5.1581,  -4.4702,  -5.2217, -11.5305,  -5.2781,  -7.7973,  -6.9915,
         -7.3565,  -3.9090,  -8.6705,  -5.7670,  -5.5771,  -4.8845],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4245, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4074, -2.1642, -5.9993, -6.3952, -5.4242, -4.8071, -4.7215, -2.1465,
        -3.1495, -6.8092, -5.3885, -6.6562, -4.8185, -2.6256, -5.9741, -6.9182,
        -4.8642, -6.6070, -4.2216, -2.1091], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7104, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.0552,  -4.4405,  -3.6986,  -5.9331,  -6.1563,  -5.5859, -14.0886,
         -5.1038,  -2.2072,  -6.8784,  -7.4469,  -6.0171,  -6.8807,  -6.7441,
         -8.7785, -11.7188,  -8.7183,  -8.7329, -10.7033,  -6.4380],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.0663, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9811, -5.3497, -3.2995, -6.1309, -2.9447, -5.8686, -6.1006, -5.8093,
        -4.9127, -3.7494, -3.0876, -6.0743, -7.6612, -4.4274, -3.9414, -4.0598,
        -7.3690, -4.4106, -7.1211, -5.2602], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1780, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6351, -2.9993, -4.0119, -6.5022, -6.4976, -5.9169, -4.2112, -2.8233,
        -2.5869, -4.6811, -7.9680, -5.7025, -4.4750, -3.3125, -2.7933, -3.6227,
        -7.6255, -5.7291, -4.7922, -4.1784], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6532, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4906, -2.9224, -2.7685, -5.1227, -7.0869, -4.7070, -4.4472, -5.1377,
        -3.5270, -3.9850, -6.1881, -6.4722, -5.1823, -3.7891, -3.9315, -2.2316,
        -5.3885, -7.0794, -6.2182, -5.7123], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8194, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.6725,  -5.6226,  -3.5960, -13.3023,  -7.3611,  -6.0767,  -7.1527,
         -3.7918,  -8.2157,  -5.4958,  -5.9541,  -3.5600,  -3.1645,  -4.3002,
         -7.7735,  -4.7867,  -5.5891,  -4.0523,  -4.3391,  -5.2085],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7508, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.8025,  -8.9806,  -4.2912,  -5.1154,  -7.6859,  -5.6163, -10.3451,
         -2.8389,  -1.3337, -13.8665,  -5.8810,  -5.9820,  -3.6118,  -7.4028,
        -11.0945,  -5.4064,  -5.9823,  -6.9040,  -3.4675,  -8.9782],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5293, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.2308,  -5.2177,  -3.8741,  -4.1741,  -2.8922,  -7.6530,  -7.4117,
         -5.1579,  -3.8843,  -5.8273, -16.3177,  -4.6560,  -6.0405,  -4.0308,
         -5.1120,  -6.3904,  -3.9860,  -5.3062,  -3.7946,  -2.6014],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5279, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3156, -5.3704, -3.8596, -2.7130, -3.5933, -5.4595, -6.9232, -5.4172,
        -5.0691, -3.1922, -2.6146, -4.4405, -7.5672, -5.3850, -5.1148, -3.9222,
        -3.9014, -4.3159, -6.0210, -6.8056], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8501, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.7462,  -5.6212, -16.6165,  -6.9983,  -6.8308,  -7.7852,  -4.1867,
         -8.0604,  -5.7164,  -5.0730,  -4.3195,  -5.8774,  -3.1425,  -7.7770,
         -5.0574,  -4.3586,  -4.1305, -16.1749,  -6.0709,  -6.6141],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.8079, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-15.6788,  -6.1202,  -6.4241,  -6.0728,  -7.0286,  -3.3787,  -9.0831,
         -5.8089,  -4.1610,  -4.0323,  -3.1353,  -5.4134,  -7.4667,  -4.7383,
         -6.8552,  -3.3093, -17.4108,  -7.9324,  -8.3246,  -7.6274],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.0001, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.9648,  -5.0369,  -4.2664,  -4.7434,  -6.9326,  -5.3219,  -8.4483,
         -5.3953,  -4.5367,  -6.2591,  -5.7352,  -3.3072,  -5.8096,  -7.2388,
         -5.9751,  -4.7676,  -3.1312,  -2.7626,  -4.5198,  -6.9856],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6069, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6761, -5.4452, -3.0986, -6.1541, -5.9917, -7.5141, -5.4143, -5.4893,
        -3.3153, -1.6284, -6.4679, -7.2040, -4.9369, -4.8816, -3.7478, -2.4141,
        -2.9371, -7.1187, -5.4747, -6.9641], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0437, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5995, -1.8952, -6.1795, -7.3287, -4.9123, -4.7203, -4.1792, -2.8874,
        -3.3024, -6.6338, -6.9098, -5.8719, -6.2884, -4.1727, -5.0466, -7.6640,
        -7.3275, -4.9827, -4.4968, -5.6554], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1527, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-28.7189,  -6.7952,  -6.9535,  -5.9309,  -3.3752,  -2.5057,  -4.7093,
         -7.0080,  -5.1697,  -3.4564,  -3.3707,  -3.2332,  -6.9302,  -7.3420,
         -4.9983,  -5.0120,  -2.7822,  -1.7799,  -4.8074,  -6.4465],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0663, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.0262,  -7.4163,  -7.6317,  -4.5271,  -8.2451,  -5.0640,  -4.6548,
         -4.3692,  -3.3006,  -8.6756,  -7.4094,  -4.6802,  -5.3665,  -6.1270,
        -12.2257,  -6.0545,  -7.0103,  -7.5361,  -3.8248,  -8.4991],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5322, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3296, -5.2065, -5.3858, -5.3299, -4.5369, -2.8291, -6.2073, -6.3263,
        -5.4592, -3.2950, -4.6079, -3.4012, -3.3994, -5.9874, -6.3816, -4.8081,
        -5.1196, -2.7473, -5.9204, -2.6835], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8481, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3765, -2.4560, -4.3665, -5.6663, -5.6474, -5.8910, -4.9300, -3.2746,
        -3.6591, -6.6272, -6.8762, -4.9735, -4.4959, -3.2601, -1.7954, -6.2456,
        -7.3330, -6.2147, -5.8037, -3.6360], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8264, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5311, -2.6343, -4.6526, -7.8037, -5.2974, -4.2633, -4.4587, -2.4597,
        -3.6862, -5.9479, -6.2342, -5.4566, -4.5832, -2.9245, -1.9690, -4.3940,
        -7.4520, -4.7823, -6.1004, -2.9944], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5313, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2371, -5.5286, -6.1521, -5.3486, -6.9177, -5.5329, -4.9832, -6.3300,
        -7.2446, -4.6085, -7.2018, -7.2595, -4.7688, -5.9648, -8.4245, -4.7424,
        -6.9874, -5.7794, -7.5736, -7.1845], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2885, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1411, -6.2820, -5.4410, -3.8029, -3.2045, -1.9799, -3.7993, -6.2576,
        -6.5518, -5.3302, -3.5539, -3.5177, -2.4579, -5.5930, -7.2519, -5.3819,
        -4.8883, -3.6916, -2.4764, -5.6461], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6624, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0897,  -6.5261,  -4.8022,  -4.5895,  -3.8801, -32.2608,  -4.9270,
         -8.1783,  -3.4163,  -7.7916,  -5.6629,  -6.0329,  -3.4770,  -4.4314,
         -3.5639,  -6.0750,  -6.6559,  -5.7851,  -4.2162,  -3.1206],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4741, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0223, -2.0241, -5.3511, -6.2773, -4.8461, -4.2961, -3.6562, -2.6592,
        -4.2111, -7.0230, -5.2809, -5.0728, -4.3647, -4.2647, -2.0524, -6.6660,
        -5.9871, -4.1845, -4.2728, -3.5577], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4535, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.9996,  -6.1459,  -7.0469,  -5.7423,  -5.3037,  -3.9260,  -2.3861,
         -3.8009,  -7.6431,  -4.8813,  -4.4211,  -4.9408,  -4.0490,  -5.2338,
         -8.5127,  -6.7555,  -6.9656, -10.5718,  -4.3390,  -2.6703],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4668, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5060, -6.1048, -4.1315, -4.6393, -5.1547, -6.6438, -7.1673, -5.6029,
        -5.7366, -3.8467, -4.3152, -3.0128, -4.5991, -6.9440, -4.8922, -4.1243,
        -4.1665, -5.3756, -5.2513, -8.3135], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2764, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7152, -3.3689, -3.1393, -4.3733, -6.9984, -5.7058, -4.9198, -2.6779,
        -1.7422, -4.0011, -7.8000, -4.6180, -4.5134, -4.2809, -2.6830, -5.3555,
        -6.5898, -6.7016, -5.6542, -4.2954], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7067, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5879, -5.4161, -4.3329, -3.5084, -2.0264, -3.8536, -6.6045, -5.1125,
        -4.6554, -3.6027, -4.4293, -4.7217, -7.2599, -6.0476, -3.0580, -4.6805,
        -2.5384, -4.5458, -7.0284, -4.4944], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7252, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.9414,  -3.2112,  -5.0516,  -6.2171,  -4.1497,  -6.0573,  -4.6050,
         -3.5132,  -4.5812,  -7.1198,  -4.5516,  -4.3091,  -3.1140,  -2.4107,
         -3.9713,  -7.1026,  -5.0167,  -4.6669,  -2.9146, -19.2194],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4362, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4179, -4.4393, -3.5041, -6.1151, -8.4793, -5.1772, -4.6833, -5.1882,
        -3.6680, -6.1200, -7.4940, -5.6454, -7.1529, -3.6879, -2.9865, -6.6637,
        -6.5899, -4.4917, -5.0026, -3.5103], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.3509, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.3776, -14.5494,  -7.4097,  -6.2231,  -7.4403,  -3.8378,  -8.3206,
         -4.1706,  -7.4315,  -4.9623,  -2.8508,  -5.7164,  -7.7721,  -5.4270,
         -4.8491,  -5.9200,  -2.0196,  -5.0040,  -6.7101,  -4.4457],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1219, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.9284,  -3.6073,  -3.4180,  -5.1111,  -7.7219,  -5.2514,  -5.1668,
         -3.6285,  -4.6094,  -2.4219,  -6.3288,  -7.1328,  -4.8127,  -4.8493,
         -6.5282, -17.9306,  -6.8868,  -5.0193,  -7.6225,  -6.4644],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0220, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8043, -7.9511, -5.5734, -6.5086, -7.3665, -5.1066, -7.2682, -7.3718,
        -4.1636, -7.9836, -6.0630, -6.7370, -7.4086, -6.0784, -7.5629, -6.3808,
        -8.6657, -6.4145, -5.3700, -4.8960], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.4337, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9699, -5.8529, -4.8874, -3.2848, -5.1558, -3.1040, -7.4650, -5.1888,
        -4.8121, -3.6660, -2.2817, -4.7665, -7.1550, -5.5487, -5.3153, -3.4041,
        -3.8707, -4.4210, -7.5682, -4.7702], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9744, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3502, -3.0334, -6.6453, -6.4532, -5.6639, -4.9324, -3.5252, -4.5867,
        -2.6188, -6.3161, -6.4435, -5.3713, -7.1201, -3.1666, -2.5695, -4.2972,
        -6.0044, -6.9158, -5.1504, -3.8274], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8496, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8817, -5.5822, -7.5910, -5.5889, -5.3555, -3.6089, -2.6214, -5.3164,
        -6.5594, -4.7012, -3.7126, -3.2440, -4.3338, -2.6612, -6.3033, -6.4927,
        -5.4937, -3.1894, -3.7541, -5.0779], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7035, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9522, -3.4769, -4.4025, -8.1999, -5.2971, -2.9523, -3.3320, -2.6362,
        -5.5524, -7.5505, -4.5733, -4.2922, -5.3392, -4.2428, -2.5382, -7.0886,
        -7.3006, -6.2436, -5.0726, -3.1496], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8596, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4668, -6.5842, -5.3519, -3.9774, -3.8761, -2.8050, -3.5255, -6.8389,
        -6.7790, -5.6019, -3.8940, -3.2607, -4.0112, -2.9919, -7.0388, -5.2764,
        -5.1530, -3.7077, -3.7740, -5.2484], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8082, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5285,  -4.0116,  -8.0457,  -4.5363,  -4.4446,  -3.5235, -12.0305,
         -5.3697,  -7.4263,  -4.5539,  -5.4112,  -6.1796,  -4.0030,  -5.2206,
         -7.4195,  -3.9183,  -4.2208,  -6.2965,  -6.6714,  -5.7061],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5759, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4863, -2.9307, -3.4472, -3.4414, -6.4328, -6.5363, -5.4675, -5.8394,
        -2.7903, -2.4096, -5.3056, -7.3717, -5.1711, -5.7095, -4.8112, -2.8443,
        -4.1228, -5.6984, -6.3745, -4.9625], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8577, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3007, -7.6187, -7.1079, -5.9496, -8.0998, -6.4048, -4.0203, -4.4398,
        -5.3772, -3.5548, -5.7887, -6.5374, -5.0345, -5.6439, -4.4657, -2.8568,
        -3.1497, -6.7176, -6.1737, -5.6484], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5445, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8844, -2.1862, -5.9897, -6.7360, -5.0805, -4.0615, -3.3263, -5.4169,
        -2.9621, -6.3406, -6.4994, -5.1107, -4.4473, -3.3686, -4.9978, -4.4733,
        -5.7609, -7.0882, -5.7127, -4.5436], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9993, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.8520,  -5.5054,  -5.6228,  -8.3208, -26.0399,  -5.7384,  -7.2192,
         -6.8940,  -7.2674,  -3.7218,  -8.2263,  -5.7791,  -7.9056,  -4.6722,
         -3.7166,  -8.1355,  -8.0642,  -4.6197,  -9.0511,  -2.5935],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.3473, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2273, -3.5887, -3.5372, -2.8668, -5.3837, -7.3404, -5.2820, -5.2311,
        -3.1594, -2.6679, -3.2248, -6.9799, -5.3302, -5.2626, -4.3038, -2.3606,
        -3.2268, -6.9294, -5.3956, -5.7803], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6539, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2134, -5.6299, -4.5311, -3.0716, -5.1048, -3.2474, -6.3408, -6.8936,
        -5.7533, -4.0593, -2.7686, -5.0076, -5.1842, -7.8551, -5.3032, -4.9723,
        -4.4818, -4.3271, -3.9768, -6.7279], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1225, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8491, -6.8638, -4.5680, -4.4936, -3.2460, -2.6634, -4.5618, -7.1277,
        -4.7704, -4.4044, -5.0185, -4.0875, -3.0745, -7.1728, -5.7865, -4.3386,
        -3.0288, -2.1962, -5.0326, -7.7562], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7520, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7052, -6.3349, -4.5337, -4.2488, -4.8023, -5.0053, -7.5775, -5.6708,
        -4.5226, -5.2592, -4.0933, -5.6561, -6.2428, -6.4500, -5.7435, -4.7200,
        -4.3234, -2.7203, -5.5148, -7.2714], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.4198, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7409,  -6.5983,  -7.6852,  -5.0254,  -5.3931,  -6.3073, -34.5428,
         -4.9915,  -6.5147,  -4.4929,  -7.2962,  -4.8692,  -5.2480,  -3.8318,
         -2.0102,  -4.8758,  -7.3638,  -4.7482,  -4.0549,  -3.1068],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5848, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2078, -6.2600, -5.6245, -4.7341, -3.8177, -5.5524, -4.0936, -5.6634,
        -6.8136, -4.7932, -4.6710, -4.2249, -4.5779, -3.7820, -6.5915, -6.9832,
        -5.6445, -3.9811, -3.0803, -2.1363], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9616, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9568, -7.4728, -4.9514, -4.0394, -5.3597, -5.4145, -2.8670, -5.5648,
        -6.1444, -5.4217, -4.7829, -2.4991, -3.0685, -3.9417, -5.8955, -6.6696,
        -5.4948, -3.7337, -3.5323, -2.2324], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7022, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.1840,  -2.9261,  -8.1876,  -5.7405,  -8.6584,  -4.0584,  -2.4596,
         -7.5824,  -7.4060,  -4.4429,  -4.9863,  -4.8539, -19.7813,  -7.2397,
         -6.1351,  -7.1507,  -2.2714,  -7.4803,  -6.0763,  -5.2977],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5959, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1489, -6.1965, -5.9798, -5.6399, -4.1691, -2.8636, -4.7218, -2.6735,
        -6.9887, -5.4713, -5.4291, -3.9961, -2.8408, -4.3910, -7.8569, -5.0657,
        -5.5255, -3.3183, -2.7193, -4.4559], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7226, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5097, -5.1105, -5.1139, -3.4429, -2.2939, -3.0402, -7.3030, -5.5313,
        -5.7249, -3.6541, -1.8716, -2.9818, -7.5628, -5.3265, -6.7076, -4.3832,
        -1.6394, -2.6848, -8.0351, -5.3911], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7654, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.6435,  -6.5710,  -6.0778,  -5.6764,  -2.9116,  -2.6230,  -7.8120,
         -5.2648,  -4.0151,  -6.5833, -16.5491,  -5.7315,  -4.8172,  -7.8042,
         -3.6303,  -6.1523,  -7.2594,  -4.9588,  -5.7913,  -3.3911],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1132, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0284, -3.6283, -6.4690, -7.5568, -4.7564, -4.8254, -3.7718, -3.5884,
        -4.6058, -5.9864, -6.9723, -5.8354, -4.6312, -3.0740, -2.2243, -4.9473,
        -7.0673, -5.0284, -5.1938, -2.7176], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8954, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6242, -8.5504, -4.1509, -8.4008, -5.5645, -3.2910, -4.5083, -2.5989,
        -4.9961, -7.3194, -4.8801, -4.9282, -4.5298, -3.3808, -5.0345, -7.3975,
        -5.4495, -4.1715, -3.1205, -3.3147], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0606, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1080, -2.6707, -4.8460, -7.2972, -5.1629, -4.9958, -3.6939, -3.4704,
        -4.1955, -6.9595, -7.3567, -6.4350, -4.8240, -3.2541, -2.5844, -5.3092,
        -6.9912, -4.9497, -4.2436, -4.9994], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9174, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3522, -6.0028, -6.8875, -5.5775, -3.5183, -3.1341, -1.6111, -5.4645,
        -6.8551, -5.1090, -3.4338, -3.5795, -3.2921, -4.0284, -7.1559, -6.4564,
        -5.6676, -5.3101, -2.8272, -4.2716], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7267, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7274, -6.0924, -2.7772, -2.4850, -3.0254, -7.0005, -5.0118, -5.4117,
        -2.8246, -5.1785, -5.4920, -6.2697, -6.8469, -5.5922, -5.9789, -3.4211,
        -2.1550, -4.1843, -7.3504, -5.2194], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9022, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.9460,  -6.9782,  -6.5140,  -7.0668,  -3.4218,  -9.0029,  -5.7094,
         -5.5388,  -4.6227,  -2.1169,  -5.8348,  -8.1355,  -4.9372,  -4.5753,
         -5.5478, -18.7124,  -7.4285,  -6.0464,  -7.2220,  -4.1842],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4771, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.3013,  -6.1877,  -5.0751,  -4.1759,  -3.0178,  -3.9738,  -6.0931,
         -7.8250,  -4.8995,  -3.5323,  -3.6832, -25.3445,  -7.9193,  -8.0828,
         -7.1662,  -4.8528,  -8.2939,  -5.5191,  -5.3916,  -3.4428],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5389, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2003, -4.4371, -3.5777, -3.8849, -3.6644, -6.7661, -6.6035, -5.3625,
        -5.5640, -3.0340, -5.1659, -2.7153, -6.1981, -6.5141, -5.5584, -4.7349,
        -4.3455, -2.8462, -3.7699, -5.9621], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7952, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.0418,  -7.2497,  -5.6300, -10.0246,  -3.0715,  -6.7917,  -7.3056,
         -6.4019,  -5.6943,  -4.0392,  -4.0862,  -5.8552,  -7.6022,  -5.1530,
         -3.4323,  -2.9054,  -4.0413,  -3.4461,  -7.1786,  -5.4508],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5201, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3239, -4.6216, -3.8987, -6.6738, -7.0893, -5.8295, -3.6140, -3.2285,
        -2.3895, -4.3642, -6.8850, -4.8012, -4.7000, -3.8965, -3.1978, -3.5025,
        -6.4609, -6.3976, -5.6485, -4.7366], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8630, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.0380,  -9.7799, -14.6709,  -4.7415,  -5.2783,  -6.5830,  -5.9232,
         -7.7284,  -2.8026, -10.9144,  -6.1122,  -5.6667,  -5.2655,  -2.3681,
         -2.6723,  -7.7444,  -4.8118,  -5.8301,  -4.5277,  -4.0027],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1231, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.7703,  -6.3164,  -6.0470,  -4.7179,  -2.6801,  -2.3867,  -6.8155,
         -7.4642,  -5.5828,  -5.3299,  -4.4336, -25.3722,  -5.9642,  -7.9681,
         -3.9577,  -4.1845,  -6.6304,  -5.7515,  -4.0224,  -3.1145],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2755, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.7241,  -5.1449,  -2.6755,  -4.2475, -10.8937,  -7.9580,  -7.8262,
         -7.4318,  -4.9910,  -8.0357,  -5.0503,  -4.9596,  -4.0757,  -3.5419,
         -8.5739,  -7.4187,  -4.8461,  -4.6757,  -3.5488, -16.5683],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5094, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1144, -5.6186, -5.2154, -4.5018, -4.9011, -4.8377, -7.8004, -6.4776,
        -5.9128, -5.1092, -7.1588, -8.2105, -6.0328, -7.5963, -4.7214, -8.8948,
        -5.8423, -5.5354, -4.7573, -3.6905], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9965, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0360, -4.7989, -2.6635, -5.5672, -6.8931, -4.8867, -4.7700, -2.5325,
        -3.0803, -4.9227, -6.9114, -5.0447, -3.6926, -3.1949, -2.6170, -6.7642,
        -6.6766, -4.8340, -5.8626, -3.3958], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6572, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.7816,  -4.9997,  -7.0556, -12.8207,  -5.3885,  -6.7719,  -8.0921,
         -7.0842,  -2.7289,  -8.0079,  -5.5862,  -4.4336,  -4.2671,  -2.1877,
         -4.5840,  -7.5417,  -5.3365,  -5.9407,  -2.9191,  -5.7604],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8144, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5649, -6.9012, -7.6564, -3.7717, -8.7558, -5.5396, -4.3890, -4.7368,
        -5.1770, -9.6232, -7.5103, -5.4562, -4.6342, -8.8167, -2.8027, -9.2310,
        -6.0684, -5.7560, -5.3225, -8.0381], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3876, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7460, -2.6330, -3.2483, -7.5061, -5.1601, -4.3260, -3.7601, -4.0363,
        -2.5425, -6.6037, -6.8043, -5.3857, -5.6104, -2.6015, -2.6953, -5.3936,
        -7.0397, -5.1467, -4.5695, -5.1251], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6967, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6040, -7.6553, -5.3261, -4.3062, -5.2584, -5.1864, -3.4975, -6.3990,
        -6.7475, -4.6274, -4.0390, -3.5349, -4.5112, -2.4860, -6.6582, -5.9024,
        -4.9237, -4.1862, -2.7758, -1.7337], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7179, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.3702,  -7.8377,  -5.3543, -10.3688,  -3.9316,  -2.4749,  -8.2040,
         -5.8632,  -6.1392,  -6.7417,  -8.2665, -18.5659,  -5.4144,  -8.0184,
         -5.4921,  -6.8904,  -3.8638,  -4.6041,  -2.8209,  -2.8625],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4542, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.7105, -4.3840, -5.8678, -6.6017, -5.3497, -4.8327, -4.0517, -3.1791,
        -3.5644, -6.1547, -6.8359, -4.4098, -4.2247, -3.2446, -2.2457, -4.3640,
        -6.4829, -4.2629, -5.1361, -2.6813], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8292, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0650,  -5.0951,  -2.7768,  -3.8715,  -2.4387,  -6.6223,  -6.4050,
         -4.5578,  -6.8135,  -3.4409,  -2.0004,  -3.7535,  -7.5940,  -4.9752,
         -4.8947,  -5.3780, -13.3241,  -7.1640,  -7.2033,  -7.5704],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4972, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9813, -4.9630, -2.3994, -1.7457, -5.1676, -6.5647, -4.8603, -2.8604,
        -2.7000, -2.7365, -3.0826, -6.6042, -6.4421, -4.9450, -4.1642, -2.3280,
        -2.6712, -6.3019, -7.1125, -4.5987], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3615, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6477, -5.8132, -3.7512, -5.3431, -6.8382, -4.7975, -6.5450, -3.6885,
        -2.3938, -7.4528, -7.5899, -5.1939, -4.5623, -3.7599, -3.5826, -3.5231,
        -5.3700, -6.2830, -4.5486, -7.2487], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0967, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.6461,  -7.4429,  -3.8302, -16.2256,  -5.8787,  -7.3891,  -4.5553,
         -7.8377,  -4.6392,  -4.9341,  -4.4941,  -5.6388,  -2.4668,  -4.3877,
         -6.7336,  -4.6648,  -4.3762,  -2.8710,  -3.8709,  -5.8851],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6384, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6100, -5.7724, -5.3869, -3.4896, -5.6568, -3.5471, -4.5542, -3.6506,
        -6.3503, -4.7478, -5.3023, -4.2564, -5.6402, -4.4698, -7.5032, -4.0896,
        -4.0936, -3.2413, -7.1948, -4.1214], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7839, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.2356,  -5.6902,  -7.1501,  -4.6952,  -4.5250,  -3.0825, -28.6487,
         -5.2256,  -6.1303,  -7.3309, -10.5900,  -5.7979,  -6.0139,  -7.2678,
         -8.1421,  -9.5657,  -8.0457,  -7.8807,  -6.6855,  -9.4414],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.6572, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4913, -4.8584, -3.0465, -3.1650, -3.7939, -6.9217, -4.8706, -5.8108,
        -5.6130, -2.4167, -3.9738, -6.8539, -5.1711, -2.8789, -3.6432, -2.1003,
        -6.2650, -7.2478, -4.3477, -5.0942], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6782, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2875,  -6.5834,  -7.1998,  -4.2864,  -4.7725,  -3.3865,  -5.3320,
         -4.4018,  -6.3571,  -4.5759,  -4.3034,  -6.1661, -26.0953,  -9.3754,
        -10.2004,  -8.1652,  -8.7048,  -5.8590,  -7.2455,  -4.7910],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.0544, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3467, -7.1532, -3.6108, -9.1012, -4.8705, -6.5380, -3.8762, -2.5232,
        -3.0161, -6.2986, -6.7105, -4.6548, -5.4217, -2.7110, -2.2888, -5.0250,
        -7.5022, -4.4402, -5.2547, -5.9469], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2145, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9157, -6.6585, -5.4486, -6.6824, -5.2134, -7.3949, -2.3213, -6.8375,
        -6.7217, -4.5745, -6.0569, -3.1094, -2.1738, -3.2263, -6.6561, -6.1222,
        -4.9471, -4.0213, -2.9225, -2.2894], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9647, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-20.3726,  -6.2059,  -6.6362,  -6.1471,  -7.1764,  -5.3218,  -5.9758,
         -7.7994,  -4.6407,  -4.6946,  -3.4283,  -2.2016,  -4.8486,  -7.2121,
         -5.0873,  -3.4724,  -3.2284,  -2.9744,  -1.8163,  -7.4794],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8360, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-21.3189,  -7.2690,  -7.2727,  -7.3550,  -3.6917,  -7.4433,  -5.4875,
         -5.9833,  -3.6354,  -3.8089,  -5.4354,  -7.4768,  -4.4413,  -5.1288,
         -2.7614,  -2.4747,  -3.7592,  -5.8501,  -6.7251,  -5.1196],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1219, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9494, -6.7294, -3.0538, -3.6444, -2.4993, -6.8796, -4.5174, -4.2959,
        -3.7479, -2.7507, -6.3905, -6.6164, -3.9340, -3.8257, -3.1285, -3.3609,
        -3.3188, -6.6364, -4.7414, -2.8860], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3953, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6047, -4.5718, -6.2150, -6.7903, -4.8415, -4.1610, -7.4921, -4.4663,
        -5.4308, -5.0197, -6.4562, -4.5728, -4.9341, -4.3192, -1.9701, -3.5384,
        -7.8771, -4.5310, -5.0990, -3.5320], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9711, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4135, -3.4609, -5.0474, -5.1259, -6.0914, -6.5914, -5.1429, -3.1661,
        -2.5443, -4.1528, -3.5154, -6.3924, -6.2537, -5.0475, -5.2929, -3.0330,
        -4.1129, -5.6445, -7.6112, -4.7862], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8713, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-17.2518,  -4.7673,  -4.4397, -17.1880,  -7.8474,  -5.6923,  -8.8771,
        -14.5410,  -8.0311,  -6.3213,  -7.8301,  -3.2480,  -8.9375,  -5.6199,
         -3.8996,  -4.6172,  -2.8592,  -7.9395,  -7.1559,  -4.6400],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.5852, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7783, -2.4189, -5.8313, -7.4334, -4.5972, -4.2956, -4.8443, -1.8429,
        -4.1082, -6.9630, -5.1567, -9.1851, -3.1567, -1.6663, -2.8025, -7.5384,
        -5.0761, -7.8669, -7.3599, -8.4472], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1684, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4733, -2.4457, -4.5031, -6.7949, -4.6856, -6.0915, -3.1145, -5.3732,
        -3.5452, -6.5802, -4.5482, -3.7915, -3.0540, -5.9657, -3.5980, -6.3517,
        -5.8913, -4.9458, -3.8086, -3.5277], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6045, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0917, -6.6037, -4.7444, -5.0464, -3.1815, -7.2342, -3.0848, -6.2123,
        -5.9811, -4.7140, -3.0497, -3.3258, -3.0564, -3.4453, -6.2927, -6.5315,
        -4.8681, -4.0806, -3.1404, -6.9741], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8329, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9263, -6.1055, -7.2594, -5.3248, -5.9421, -3.3649, -3.8378, -2.4102,
        -6.9334, -5.8900, -4.7072, -4.0139, -2.7088, -2.0258, -5.1079, -6.7348,
        -4.3756, -4.0117, -2.4517, -2.3455], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5739, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7324, -4.1817, -6.5860, -4.6751, -6.8824, -4.2108, -5.5230, -2.4363,
        -6.3212, -6.1656, -4.7595, -4.1632, -4.1828, -3.5988, -4.4873, -5.7316,
        -6.3978, -4.6596, -6.3903, -2.8515], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7969, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3554, -3.7691, -5.2217, -7.4362, -5.1812, -2.9689, -2.9202, -1.5875,
        -3.8299, -7.0816, -4.3765, -4.5486, -4.5753, -1.7154, -5.6600, -7.1811,
        -5.0239, -5.2556, -4.1447, -3.5847], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5209, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7964, -7.4378, -7.1436, -4.6271, -5.6435, -6.1683, -3.2896, -3.5333,
        -6.7572, -6.5172, -5.5813, -2.6930, -3.0639, -2.0209, -5.5892, -6.7257,
        -4.0524, -3.5136, -2.3314, -1.8974], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5691, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3176, -6.0599, -6.3110, -5.0265, -3.1378, -3.0872, -1.9293, -2.5255,
        -6.5890, -4.5531, -9.1090, -3.3278, -4.9125, -1.8416, -5.9040, -6.6827,
        -4.8930, -5.6082, -4.5812, -6.3068], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8352, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.8874,  -4.9305,  -3.6822,  -3.4548,  -3.6822,  -5.4923,  -7.3905,
         -4.7511,  -5.7850,  -6.5036, -18.8802,  -7.2366,  -6.9399,  -7.1967,
         -4.0111,  -8.4583,  -5.0641,  -3.9426,  -4.8014,  -3.0843],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1087, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9395, -5.8710, -3.8628, -2.2136, -4.2327, -3.0341, -6.5217, -6.5609,
        -4.8705, -3.6224, -3.2183, -3.2174, -4.9782, -6.9039, -4.6016, -4.9726,
        -3.3424, -2.7933, -3.6995, -6.9551], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6206, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.2804,  -4.4614,  -4.2058,  -5.4440,  -3.3296,  -3.1705,  -7.0853,
         -5.2648,  -3.8077,  -3.5126,  -5.7416,  -9.1045,  -7.0580,  -4.1795,
         -4.5424,  -4.5942, -24.7638,  -5.0979,  -8.0837,  -7.6884],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4208, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8579, -2.5964, -6.7489, -6.1531, -5.1501, -4.3680, -3.1662, -3.1110,
        -3.0291, -6.5997, -6.4859, -4.9382, -4.8405, -3.5624, -3.3281, -5.9539,
        -7.2822, -6.4172, -5.2025, -5.8955], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0343, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1206, -6.6009, -3.7422, -2.9878, -4.5959, -7.9906, -4.7051, -6.4249,
        -4.2362, -3.8207, -2.5144, -7.3215, -4.7989, -4.1675, -3.6350, -4.2157,
        -2.6766, -6.5696, -6.2835, -5.1696], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8789, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.2906,  -5.9009,  -4.8014,  -5.1364,  -2.1536,  -4.5049,  -7.6208,
         -4.8290,  -3.3467,  -3.2165, -20.7400,  -5.5989,  -7.2185,  -8.6150,
         -5.5063,  -6.8438,  -4.6779,  -5.6032,  -3.5980,  -2.9914],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1097, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6505, -5.7301, -2.9681, -2.7719, -6.0905, -6.6705, -4.4441, -4.8502,
        -5.2667, -8.5661, -4.2591, -7.0155, -6.3125, -5.5200, -5.1596, -3.0696,
        -2.5871, -3.7978, -5.8590, -6.9495], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1769, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.2411,  -5.2707,  -4.7216,  -5.8925, -14.3167,  -6.8908,  -6.7143,
         -5.9325,  -4.0652,  -7.9902,  -4.8070,  -4.8927,  -4.5091,  -3.1275,
         -8.5475,  -6.5271,  -5.0289,  -4.9901,  -5.4144,  -2.5074],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9694, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.8515, -5.3847, -4.2244, -5.3273, -9.8007, -7.2508, -9.3683, -5.1613,
        -7.5172, -5.3152, -3.9696, -3.6614, -3.0637, -7.0208, -6.9850, -4.6152,
        -6.0475, -2.7877, -1.8968, -3.8488], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5549, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9935, -2.9916, -3.0185, -6.1812, -5.5562, -3.5289, -6.2116, -4.9105,
        -3.5313, -4.9401, -6.9115, -4.3979, -4.1364, -2.7635, -1.4963, -6.0555,
        -6.6047, -4.3764, -4.0501, -3.1909], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4923, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.4762,  -3.0945,  -2.1442,  -6.5931,  -7.0775,  -4.3827,  -4.9036,
         -3.4068, -12.0275,  -5.1855,  -7.0323,  -6.4556,  -5.0636,  -3.6687,
         -4.0706,  -3.8773,  -3.2758,  -5.5462,  -6.4917,  -4.9532],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2363, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.4104,  -7.4043,  -6.2858,  -4.7440,  -4.6646,  -2.9618,  -7.9128,
         -7.1770,  -4.5287,  -4.5546,  -3.9319, -10.9674,  -6.0487,  -6.4101,
         -8.5431,  -2.4485, -11.4719,  -5.4186,  -6.4264,  -5.5186],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1915, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9611, -4.2542, -3.0510, -3.8988, -4.0499, -7.2153, -5.3162, -5.4433,
        -3.2895, -2.6160, -4.7271, -6.9993, -4.3464, -5.9025, -3.0251, -2.3641,
        -2.7310, -7.0846, -4.9764, -7.9204], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7086, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7578, -3.6811, -2.6700, -4.9045, -4.0383, -6.6870, -4.7659, -7.8324,
        -2.8793, -2.6229, -3.4019, -7.4716, -4.7332, -4.9984, -3.2059, -3.9580,
        -4.3519, -5.7727, -6.4527, -4.9100], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7048, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7540, -4.2764, -5.0309, -2.6261, -4.8939, -4.0311, -6.2716, -6.0174,
        -5.0458, -3.6325, -2.6543, -2.2296, -4.5523, -6.7603, -4.7518, -4.6338,
        -2.9763, -2.2103, -4.3014, -7.1621], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4906, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.6240,  -5.0789,  -4.6998,  -5.2684,  -2.1035,  -6.5444,  -7.6411,
         -4.8006,  -2.1991,  -4.1906, -11.6727,  -7.9637,  -8.0788,  -7.4044,
         -5.1555,  -7.8980,  -4.5056,  -4.7695,  -3.9824,  -3.6723],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7127, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.0271,  -2.1355, -10.3326,  -5.5002,  -3.2164,  -5.4763, -14.9389,
         -4.7914,  -6.4542,  -7.2719,  -5.5189,  -2.7009,  -4.3101,  -1.7183,
         -5.2148,  -5.9699,  -6.1924,  -5.6829,  -4.5074,  -2.4840],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5722, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.1541,  -5.8508,  -4.7023,  -3.4447,  -5.4750,  -2.3344,  -5.1260,
         -6.0305,  -5.2659,  -6.4064,  -1.4643,  -4.0096,  -5.0000,  -7.3653,
         -3.9057,  -3.6378,  -4.1240, -37.5612,  -4.3358,  -7.4450],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5319, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-13.9257,  -4.7979,  -6.2953,  -7.5922,  -6.0764,  -6.7267,  -3.9443,
         -3.4076,  -3.7902,  -6.3892,  -4.4525,  -5.0758,  -4.2633,  -5.4197,
         -4.4669,  -4.8795,  -6.3703,  -4.7436,  -4.0015,  -3.0729],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4846, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7898, -7.0590, -4.5522, -4.3882, -3.7658, -2.8083, -3.1571, -6.6657,
        -7.1251, -6.1329, -2.5530, -3.1021, -2.4541, -5.8698, -6.4424, -5.0470,
        -4.9662, -3.0163, -2.7724, -6.7279], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6698, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2622, -7.7009, -7.6000, -6.8915, -7.0572, -5.7419, -7.1115, -5.0613,
        -6.4392, -5.5869, -7.6538, -6.1832, -4.3350, -5.3054, -4.2899, -9.5302,
        -7.2567, -5.1258, -5.8063, -4.5879], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3263, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.7755,  -6.5671,  -5.9842,  -7.9120,  -5.8293,  -8.8900,  -5.9304,
         -6.6805,  -4.9977,  -3.4961,  -6.7341,  -7.4413,  -4.2506,  -4.7205,
         -4.6667, -26.0807,  -5.6163,  -7.8562,  -7.0229,  -5.4908],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.0471, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7271, -5.5595, -7.3728, -5.8565, -6.8019, -5.2403, -7.5074, -7.2491,
        -4.6552, -5.1211, -4.8089, -7.1506, -7.2383, -4.7487, -4.4862, -5.4299,
        -3.0785, -4.6540, -7.2575, -4.6826], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.6813, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.5382, -10.2775,  -4.6545,  -8.8153,  -5.0047,  -4.3314,  -4.6555,
         -2.3647,  -3.6018,  -7.4378,  -4.6677,  -5.4159,  -3.3797,  -2.9828,
         -3.8967,  -6.1083,  -6.3717,  -5.1867,  -3.8566,  -3.8817],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1715, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3599, -1.6903, -5.6208, -7.4652, -4.1637, -4.1612, -4.3947, -3.0880,
        -4.2397, -6.3722, -6.6703, -4.9420, -4.2513, -4.0850, -2.1216, -4.8012,
        -7.1457, -4.4654, -3.4230, -5.9253], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6193, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6292, -4.2620, -4.7339, -6.2885, -5.3597, -4.3206, -2.3016, -2.4596,
        -4.6587, -6.6752, -5.0672, -2.9939, -4.6539, -5.8971, -4.6559, -7.6940,
        -5.1684, -3.8881, -5.9968, -7.5077], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9106, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7851, -5.1109, -6.5856, -2.9982, -2.4289, -4.7346, -7.1501, -4.1193,
        -3.4807, -3.8245, -4.8412, -3.7248, -6.7100, -5.3853, -3.6866, -4.1851,
        -7.3154, -4.2094, -4.4513, -6.5205], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9124, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4026, -2.7401, -4.9705, -8.6153, -5.4536, -4.9046, -5.7755, -5.1518,
        -4.5663, -5.0275, -6.3019, -5.0254, -5.3350, -3.3664, -2.9632, -4.9911,
        -7.3013, -4.9684, -5.4821, -4.7156], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2529, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6317, -3.1104, -6.8976, -6.8269, -5.1372, -6.7856, -2.7762, -3.3001,
        -5.4629, -6.9896, -4.6056, -4.0937, -2.3421, -2.2568, -2.5310, -6.2913,
        -4.6289, -4.6687, -3.9747, -5.1400], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5726, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0088, -4.6354, -5.0315, -4.8995, -4.7937, -6.2637, -7.3080, -5.7416,
        -7.0124, -4.5512, -3.2086, -4.7375, -6.6518, -3.8548, -4.8099, -2.1025,
        -2.0276, -4.8810, -6.4440, -5.4956], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9730, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3986, -6.9323, -5.0989, -3.4091, -3.9993, -2.8442, -2.0519, -7.6714,
        -4.2275, -5.9701, -3.8470, -3.9664, -1.8253, -6.6590, -6.1858, -4.4641,
        -5.1116, -2.2769, -2.0297, -4.5139], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3742, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6815, -6.4100, -7.7528, -6.6283, -9.1305, -5.9237, -3.8607, -5.5774,
        -4.8309, -3.0452, -6.4744, -7.0968, -5.0162, -4.4222, -3.4287, -5.1914,
        -3.4594, -5.5762, -6.6202, -4.8608], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5994, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3007, -4.9507, -4.3175, -2.8915, -2.3796, -4.3458, -6.4298, -4.4244,
        -5.2810, -2.9030, -3.5589, -6.0476, -7.1578, -4.3265, -4.9992, -3.4737,
        -3.5081, -3.7323, -5.9936, -6.4082], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6715, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7995, -5.8931, -3.8596, -3.4179, -6.5352, -2.1727, -3.0872, -5.7963,
        -6.5675, -4.8355, -5.9999, -4.7767, -4.7680, -4.4020, -5.1710, -6.0588,
        -5.1018, -5.1719, -3.0302, -2.6579], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6551, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3833, -4.3538, -2.8236, -3.4266, -3.0076, -6.6911, -6.3282, -4.0678,
        -4.3720, -3.5040, -2.2649, -5.5732, -6.9387, -5.3788, -3.8453, -3.1792,
        -2.8880, -7.3205, -7.4189, -4.7724], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6769, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6046, -5.8247, -7.6009, -5.1804, -5.3059, -6.5575, -2.8797, -5.7177,
        -7.3724, -5.0714, -3.9765, -2.7866, -1.7483, -3.8171, -6.7360, -4.3324,
        -5.0405, -3.2276, -2.9474, -7.1798], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7454, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1506, -5.0847, -4.2421, -2.5380, -2.6148, -5.2988, -6.9680, -4.8351,
        -3.5351, -3.1717, -1.9754, -4.7841, -6.2479, -4.5889, -5.2698, -2.7273,
        -2.1655, -4.7311, -7.2312, -4.5304], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4345, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3043, -5.0064, -4.2761, -3.4367, -6.1424, -5.0934, -7.2026, -5.3187,
        -6.2384, -4.3679, -2.0828, -5.1700, -7.1296, -4.7540, -4.3647, -3.0259,
        -5.7160, -4.0451, -7.2619, -5.1838], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1560, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.9517,  -7.0881,  -4.4702,  -8.0616,  -3.2691,  -6.0990,  -5.8828,
         -4.9556, -11.1368,  -5.2547,  -3.0316,  -6.0872,  -6.4474,  -4.7726,
         -4.1695,  -3.3525,  -4.9592,  -2.3115,  -5.6557,  -6.6254],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4291, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8903, -5.3789, -3.2677, -3.6682, -2.9293, -5.6705, -6.5130, -4.8444,
        -4.1099, -2.7430, -2.3312, -3.5299, -6.9953, -5.0381, -4.2353, -3.1429,
        -3.5594, -3.1450, -5.3020, -6.7172], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3506, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7398, -3.0030, -6.5796, -7.1495, -4.3820, -6.0172, -4.4464, -5.6682,
        -1.4565, -8.1685, -5.3827, -4.8307, -5.3536, -6.8295, -8.0395, -4.0094,
        -6.0018, -4.8778, -7.2701, -8.1738], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5690, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3876,  -5.4801,  -6.9310,  -4.7467,  -6.0627,  -4.0544,  -3.1105,
         -8.6139,  -6.7655,  -5.1213,  -5.2788,  -3.6689, -19.7054,  -7.3888,
         -6.3934,  -6.5319,  -4.0821,  -7.9994,  -5.0244,  -5.1395],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2743, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0592, -6.8890, -6.7592, -5.2197, -5.2341, -2.9199, -3.1997, -5.5398,
        -7.7727, -4.2524, -3.5487, -3.4019, -5.2431, -3.1371, -6.8104, -4.6955,
        -5.2271, -3.6819, -3.6795, -4.9542], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8612, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2611, -2.9886, -3.5047, -2.5433, -5.3455, -7.1339, -4.5832, -4.2288,
        -2.9432, -5.3183, -5.0579, -6.2355, -6.5268, -4.7171, -5.0332, -2.8074,
        -1.9104, -5.7504, -7.5295, -4.2588], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6839, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9468, -4.2808, -6.7108, -4.2412, -5.7066, -4.3505, -4.2518, -3.0540,
        -6.0405, -6.8547, -5.4338, -3.5880, -4.5431, -4.0648, -2.1738, -5.1141,
        -5.6919, -5.0913, -4.9530, -3.0681], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5580, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9414, -6.8294, -4.6774, -6.2958, -5.1058, -3.6621, -2.8567, -5.0647,
        -5.7821, -4.2580, -5.6649, -6.0097, -4.3187, -5.5982, -5.3330, -6.3269,
        -5.3139, -3.1415, -3.5988, -4.3487], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0064, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3681, -4.6347, -6.9113, -6.2312, -2.7177, -3.9460, -5.5373, -7.0745,
        -5.4423, -4.4876, -3.6942, -1.6682, -2.8991, -6.8778, -5.2582, -2.9145,
        -3.3608, -5.1597, -3.7149, -6.0223], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7460, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1365, -4.6641, -7.0758, -4.3302, -5.3851, -3.5959, -3.6598, -3.4491,
        -6.1412, -5.9638, -4.4867, -4.0222, -2.5029, -2.9702, -6.1327, -6.9661,
        -4.8417, -4.5963, -3.2658, -2.6553], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4921, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7189, -3.0065, -5.1975, -6.5800, -4.8659, -3.7323, -4.2197, -3.3685,
        -4.8386, -6.7066, -6.8851, -6.0045, -4.2641, -2.8829, -2.4089, -4.5587,
        -6.4193, -4.5709, -4.1773, -3.5041], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5955, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1289, -6.5165, -4.3315, -3.9923, -2.8972, -1.8615, -4.4620, -6.6056,
        -4.7999, -4.5775, -4.7903, -2.1429, -3.8932, -7.1326, -5.2325, -4.5026,
        -3.8775, -3.0964, -4.1189, -7.6275], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5294, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5266, -4.6347, -5.7596, -5.5954, -4.1771, -5.3176, -6.9632, -5.5276,
        -3.8281, -3.3782, -2.7813, -4.7266, -7.4656, -4.7745, -4.3503, -3.2470,
        -4.8068, -2.7042, -6.5712, -6.2758], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9706, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4189, -4.0671, -6.0310, -6.2114, -4.8987, -4.2292, -2.9809, -2.1634,
        -5.7641, -6.2277, -4.7762, -5.6570, -3.2291, -2.9745, -4.1004, -7.3309,
        -4.1162, -4.6271, -2.7562, -2.5444], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4552, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.7858,  -6.3750,  -6.6452,  -4.4587,  -4.5885,  -2.4679,  -3.0000,
         -4.1714,  -6.8255,  -6.2615,  -5.4122,  -3.1471,  -3.4408,  -2.6201,
         -3.6990,  -6.7499,  -3.9588,  -4.4818,  -2.6972, -12.8254],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8806, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.9490,  -3.4843, -10.0178,  -5.5574,  -3.9381,  -5.0875,  -2.2603,
         -5.1717,  -7.7033,  -4.8837,  -4.2412,  -5.9480,  -7.6874,  -6.7348,
         -6.3877,  -7.7382,  -2.4863,  -9.3484,  -5.7152,  -6.8317],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9586, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2861, -3.0617, -3.2776, -6.4364, -6.2313, -4.8952, -3.4193, -3.3159,
        -2.9444, -5.5711, -7.4095, -4.1801, -4.9220, -4.2398, -2.1953, -4.7665,
        -7.0448, -5.0065, -3.3891, -4.4679], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5530, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8862, -1.7980, -5.8793, -6.8541, -4.8333, -6.2169, -3.7083, -2.1731,
        -3.1207, -6.9425, -6.8529, -5.5432, -4.5867, -2.8641, -2.6779, -5.6916,
        -7.2211, -4.7080, -4.8470, -3.2512], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6828, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6285, -4.7455, -4.2486, -5.6447, -5.3481, -5.8177, -7.7896, -6.3552,
        -6.2727, -4.5444, -3.4712, -5.8795, -6.7342, -4.1545, -3.3914, -2.6601,
        -4.9272, -4.1124, -6.5535, -7.2742], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.3277, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4866,  -2.1714,  -7.4134,  -5.4528,  -3.8773,  -4.8604, -19.1701,
         -4.7272,  -7.7151,  -4.0155,  -8.0892,  -4.9592,  -4.1622,  -3.7479,
         -1.9851,  -6.2713,  -7.3700,  -4.6678,  -7.0488,  -3.8853],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7038, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9295, -3.2430, -3.2868, -7.3040, -4.5575, -7.2839, -5.1705, -3.6398,
        -3.1498, -5.5339, -6.4224, -5.1247, -4.4979, -2.6129, -1.7959, -6.2393,
        -7.2737, -3.9122, -3.4446, -2.5556], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5489, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0836, -7.4303, -3.9813, -5.0540, -4.1533, -6.0428, -6.3916, -4.6073,
        -5.6776, -3.1911, -2.0422, -6.5212, -7.4207, -5.2365, -6.3604, -3.8435,
        -3.7341, -3.7349, -6.3778, -6.9091], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1896, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2455, -4.4159, -6.9174, -3.4218, -4.3257, -5.5394, -6.3685, -6.8379,
        -5.1504, -4.7003, -2.9604, -4.6918, -5.3019, -5.5766, -6.4484, -4.7440,
        -6.2891, -3.0385, -3.4213, -4.3809], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0888, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1550, -5.5227, -3.0792, -4.6470, -7.5666, -5.3581, -4.3379, -3.3435,
        -7.0258, -3.3272, -6.3388, -6.5355, -4.3121, -3.3821, -3.5572, -3.5977,
        -5.6506, -6.7159, -4.3785, -4.6729], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8752, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1590, -2.3711, -6.1778, -6.2035, -4.6430, -3.5087, -2.9827, -2.5476,
        -3.3225, -6.7835, -4.3153, -5.2824, -3.5305, -3.1082, -3.7890, -6.2449,
        -7.1071, -5.4343, -3.0066, -2.8882], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3703, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7151, -2.6237, -4.4740, -7.5131, -5.1404, -5.4095, -5.4907, -3.1804,
        -3.5703, -6.3823, -6.2128, -5.1348, -3.0581, -2.7413, -2.9337, -4.0453,
        -6.8110, -6.2987, -5.2394, -4.9644], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6969, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6882, -3.3027, -3.9076, -6.4532, -6.1344, -5.0253, -4.3047, -3.3309,
        -5.0572, -3.0645, -6.6957, -6.6008, -5.5717, -4.6692, -4.5223, -5.0267,
        -3.9816, -6.7368, -7.0333, -5.9287], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0018, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3665, -3.9066, -7.0460, -6.7309, -5.9988, -3.0673, -3.3998, -4.8356,
        -5.8491, -6.7437, -4.3847, -6.4147, -4.6709, -5.5952, -3.0193, -6.3649,
        -6.1897, -4.8434, -5.5800, -2.7305], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9869, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.6607,  -6.5013,  -7.1855,  -5.4658,  -9.3285,  -6.4234,  -5.8769,
         -5.9049,  -3.3655,  -3.2838,  -7.3898,  -5.2898,  -8.1376,  -7.7942,
        -16.6662,  -9.8493,  -7.5122,  -4.0953,  -7.1756,  -5.0955],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.9501, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.0554,  -2.2296, -15.0180,  -9.8228,  -4.3099,  -5.0243,  -5.2119,
         -3.6569,  -3.0358,  -6.1490,  -5.0756,  -4.4641,  -3.7908,  -2.3370,
         -3.7658,  -7.4496,  -5.0903,  -5.3314,  -2.9518,  -2.4728],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1122, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9478, -4.8187, -5.3036, -2.9689, -3.0775, -3.3574, -5.5691, -6.2189,
        -4.9104, -6.5442, -3.9773, -1.7554, -4.9044, -7.2567, -5.2220, -3.8815,
        -3.9158, -2.7831, -3.1652, -5.9870], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6283, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0322, -6.3193, -5.4578, -5.0795, -3.8245, -2.7150, -4.2145, -4.1162,
        -6.4020, -6.4774, -5.2552, -5.1962, -2.6799, -2.7869, -6.9943, -7.2678,
        -4.5447, -5.4174, -4.8586, -2.7626], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7701, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.8103,  -3.9821,  -2.5598,  -6.3675,  -7.7042,  -5.3735,  -5.4446,
         -7.9028, -28.8014,  -5.7155,  -7.1769,  -6.8819,  -7.2128,  -3.5799,
         -8.1642,  -5.2595,  -8.4317,  -4.5803,  -3.5630,  -8.4093],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.1961, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2004, -3.5993, -2.1285, -5.0650, -6.6524, -4.7807, -5.5128, -4.2492,
        -2.1991, -6.3133, -7.0374, -4.8603, -5.2579, -2.9988, -5.8379, -5.0219,
        -7.5837, -8.1316, -5.3268, -9.1291], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2943, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-12.9761,  -6.2498,  -4.2241,  -6.8883,  -1.9471,  -8.3405,  -6.0069,
         -6.3121,  -3.7692,  -3.0060,  -4.0891,  -5.3873,  -6.9112,  -4.7213,
         -4.5964,  -3.9475,  -3.4143,  -4.4334,  -6.2239,  -7.1395],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5292, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4670, -3.7789, -5.3931, -7.6084, -4.7555, -4.5491, -2.8496, -2.4352,
        -4.3436, -6.8973, -7.3824, -5.6138, -5.0154, -4.4906, -2.6789, -6.8422,
        -7.0313, -4.3717, -5.5808, -6.9340], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1009, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.7602,  -5.0386, -15.9927,  -3.9955,  -7.5213,  -3.6678,  -8.4779,
         -5.2622,  -4.9524,  -3.9398,  -4.6605,  -1.4098,  -6.5292,  -6.6614,
         -5.4644,  -4.6621,  -6.5044,  -3.7561,  -6.0936,  -6.8336],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9092, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2434, -3.4202, -5.0455, -7.3062, -4.3219, -4.3993, -3.7663, -4.6781,
        -3.3787, -6.3810, -6.1649, -4.6520, -3.1665, -3.2173, -2.4134, -3.3830,
        -6.4018, -6.4580, -5.2117, -3.6767], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5343, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6802, -4.5113, -3.7624, -3.3645, -4.4314, -3.6782, -5.5935, -6.1620,
        -4.8020, -3.4501, -3.2393, -4.4893, -2.6213, -7.9210, -4.5952, -4.6234,
        -4.8866, -3.1855, -4.8546, -4.8365], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5844, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4889, -8.3726, -5.7877, -3.2624, -4.6883, -2.0599, -5.1522, -6.4977,
        -5.5418, -3.9375, -3.2273, -4.1918, -4.2181, -5.8230, -6.3974, -4.3315,
        -4.3101, -3.8501, -4.3068, -4.4257], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7435, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3837,  -4.1957,  -5.9420,  -7.3241,  -4.9100,  -4.4173,  -4.0205,
        -28.7598,  -6.1839,  -7.2689,  -3.6923,  -8.4672,  -4.6861,  -7.2104,
         -4.6832,  -4.6187,  -3.3065,  -4.8865,  -7.0706,  -4.3148],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4671, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5807, -6.1648, -4.9257, -3.8006, -2.6857, -5.1184, -4.7801, -6.5694,
        -6.2022, -4.9406, -4.3513, -3.4500, -4.6756, -4.1798, -5.7147, -6.4923,
        -5.1533, -5.0382, -2.9639, -3.6425], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8215, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6172, -4.7165, -7.2357, -6.5287, -6.0176, -2.6493, -4.1514, -3.0916,
        -3.4510, -6.0393, -7.1409, -4.6840, -7.6115, -2.8367, -3.1796, -3.6010,
        -6.2477, -5.2028, -3.9625, -4.7805], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9873, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.4976,  -6.8731,  -7.8359,  -8.9909,  -4.7360,  -8.9152,  -4.3782,
         -8.2936,  -5.1034,  -4.4118,  -4.9055,  -2.2979,  -6.9507,  -7.5494,
         -4.8469,  -5.1444,  -2.8724, -19.7289,  -6.0911,  -7.5201],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5972, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2754, -4.1473, -6.5728, -4.6986, -5.1813, -3.7790, -2.5868, -4.2790,
        -6.7989, -4.9279, -3.9669, -3.7166, -1.6433, -5.2679, -6.9171, -4.2284,
        -3.8229, -3.8827, -4.0767, -2.9925], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2881, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.9808,  -6.3982,  -4.3585, -24.5985,  -5.7521,  -8.4369,  -6.6849,
         -6.3467,  -5.3991,  -6.7741,  -5.2913,  -6.2479,  -2.7735,  -6.3879,
         -5.2855,  -7.6812,  -4.7160,  -5.0928,  -4.1067,  -2.5895],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4451, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9265, -6.3462, -4.7991, -4.6086, -3.2303, -5.2335, -3.8871, -5.7297,
        -6.3485, -5.1789, -5.3275, -3.1132, -2.0995, -4.0687, -7.6120, -4.2673,
        -4.4830, -2.8634, -5.4293, -2.7024], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6627, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3706, -6.6888, -4.9095, -5.0778, -3.6509, -3.4606, -5.4670, -7.2646,
        -4.9599, -6.9580, -3.0736, -2.1270, -4.4724, -7.6295, -4.3559, -3.2108,
        -3.3791, -4.5511, -1.8683, -6.1839], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6830, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6867, -5.1777, -3.7108, -5.2919, -6.5709, -5.3074, -3.5342, -3.6874,
        -4.2882, -5.2508, -7.4294, -4.7931, -5.1646, -2.9613, -1.5803, -5.0538,
        -7.0615, -5.2725, -5.3040, -3.9611], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7544, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3297, -7.2550, -4.8952, -4.7084, -2.8926, -2.4958, -4.1314, -6.6731,
        -4.4698, -3.9160, -3.0070, -2.8468, -4.5205, -7.2675, -3.9638, -6.8230,
        -4.5187, -2.1209, -4.2022, -6.0361], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4537, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7374, -1.6464, -5.0623, -6.7690, -4.9800, -3.9761, -3.6930, -1.9970,
        -5.1378, -6.6901, -5.3869, -5.1913, -3.1059, -8.0339, -3.0585, -6.0315,
        -7.1664, -5.3394, -6.5593, -4.3071], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8935, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5430, -6.1823, -4.8417, -3.6446, -3.0177, -5.0835, -4.0173, -5.8193,
        -6.2792, -4.6649, -5.2518, -3.1010, -3.1827, -4.6899, -6.0880, -6.5354,
        -5.2166, -4.3391, -2.7108, -4.6947], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7452, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7434, -3.1714, -2.1310, -3.6403, -7.0799, -4.7349, -4.8656, -3.7502,
        -2.6306, -5.3915, -7.1713, -4.7325, -2.9469, -3.3354, -2.0284, -2.6864,
        -7.9551, -7.1335, -5.6627, -4.5101], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5651, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1973, -5.3351, -6.8621, -4.5453, -5.9583, -4.7379, -2.7308, -6.4302,
        -6.9647, -5.3485, -5.1448, -4.0486, -2.1134, -4.1852, -6.4862, -6.3445,
        -5.5939, -4.0500, -6.1384, -1.4394], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9327, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0911, -4.7455, -3.8399, -3.2378, -2.0434, -3.9721, -6.3284, -3.9230,
        -4.0452, -3.1157, -3.4117, -6.6448, -7.2710, -3.8698, -5.1178, -4.8560,
        -3.6264, -5.9753, -6.6938, -7.1185], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8464, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.6063,  -6.8685,  -6.1984,  -4.0895,  -7.6885,  -4.6001,  -9.4130,
         -6.2216,  -5.6910,  -5.0286,  -3.7886,  -5.2949,  -8.0539,  -4.4227,
         -4.6695,  -5.3613, -14.2148,  -6.8315,  -6.5488,  -6.8731],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3732, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5605, -3.4384, -4.1456, -3.6705, -3.8614, -4.9522, -6.7953, -5.2418,
        -4.2235, -3.7485, -2.8438, -5.2947, -6.9939, -4.7052, -3.0748, -3.5996,
        -4.4353, -3.2476, -5.6088, -6.1130], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5277, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2547, -4.6685, -5.9453, -3.9053, -5.4961, -4.7769, -6.1422, -6.2135,
        -4.6236, -4.5942, -3.6153, -3.4608, -4.5265, -6.4350, -6.7775, -5.8394,
        -7.6769, -3.3839, -1.9260, -3.3780], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0320, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1152, -2.7646, -2.9198, -6.2685, -7.2679, -5.0712, -4.1824, -6.2355,
        -4.2152, -3.9349, -6.0727, -6.1304, -5.1153, -7.0292, -3.3213, -3.6826,
        -1.7253, -5.3652, -6.2858, -4.6706], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8187, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4383, -4.7660, -2.3250, -2.5591, -5.2822, -6.6421, -5.0758, -5.0198,
        -3.8650, -1.7536, -4.3261, -6.7260, -5.2861, -3.6766, -3.6382, -6.3690,
        -3.9431, -5.2657, -6.7399, -4.6124], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6155, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.3672,  -3.8626,  -4.3766,  -2.7699,  -6.1817,  -6.5356,  -4.8800,
         -5.9939,  -3.7996,  -2.4226,  -4.1374,  -5.8889,  -6.4701,  -4.3588,
         -5.0935,  -3.2348,  -8.3863, -11.5739,  -6.0891,  -3.7647],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2593, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.8717,  -3.4043,  -2.2790,  -7.1540,  -7.5298,  -4.6922,  -3.6939,
         -3.8939, -10.0894,  -7.9718,  -8.3024,  -6.3441,  -3.9126,  -8.6557,
         -5.1476,  -5.8344,  -4.1335,  -2.1753,  -4.4027,  -7.7161],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7102, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2821, -5.5997, -3.8140, -5.7263, -7.1219, -3.8862, -4.7006, -6.5780,
        -4.8642, -2.9640, -3.3757, -4.2220, -4.0872, -6.2359, -6.2515, -4.8398,
        -4.0257, -3.0701, -7.1201, -3.2000], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8482, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.3810,  -7.0942,  -4.2661,  -3.3740,  -3.7651, -15.9515,  -5.6570,
         -7.6875,  -7.6385,  -4.8533,  -7.6977,  -4.5184,  -3.1188,  -5.1405,
         -2.0033,  -4.0893,  -7.7858,  -4.5627,  -4.8559,  -4.4812],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6461, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7603, -2.5595, -2.3652, -3.1748, -6.5867, -3.8106, -4.4820, -3.7156,
        -1.6132, -4.0197, -7.8128, -4.2970, -4.7699, -4.2156, -4.3420, -4.3276,
        -3.7641, -6.8204, -5.2262, -9.2693], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5466, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6665, -7.4927, -2.4578, -5.9442, -5.9340, -4.6112, -5.9707, -3.1438,
        -2.6725, -3.6381, -5.3843, -5.9417, -4.0245, -4.1344, -3.6325, -3.5786,
        -3.6637, -6.3485, -4.1215, -3.7627], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4562, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.9497,  -4.0601,  -3.2962,  -4.5565, -22.5371,  -7.0881,  -7.2586,
         -8.0230,  -5.8998,  -9.0462,  -4.8971,  -3.9012,  -4.7131,  -3.4117,
         -1.8458,  -6.3345,  -6.5122,  -4.5178,  -4.6248,  -3.9228],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1698, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5468, -7.4437, -4.9443, -3.3439, -3.9844, -2.2614, -3.7834, -5.5011,
        -5.9397, -4.6493, -4.6352, -3.6636, -3.1074, -7.5210, -7.1075, -4.2896,
        -5.1482, -3.2758, -1.8253, -4.2327], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5102, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9806, -5.3838, -7.5135, -4.6572, -3.8977, -6.4663, -6.1702, -7.6679,
        -5.4415, -7.6676, -5.1726, -8.5104, -5.9337, -3.8039, -4.5751, -4.2421,
        -4.6194, -7.5434, -5.0959, -5.1879], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.7265, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0455, -4.2666, -3.7927, -5.1502, -6.6139, -4.3109, -6.1145, -3.4585,
        -1.7827, -5.6184, -6.9694, -3.7785, -4.8729, -3.7830, -2.5436, -2.9915,
        -6.5285, -6.3278, -4.8332, -5.8057], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6794, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7646,  -3.7555,  -7.4230,  -4.3512,  -5.5594,  -7.5914, -19.7478,
         -6.1683,  -7.0533,  -7.5522,  -4.7750,  -7.4123,  -5.4974,  -3.5388,
         -5.2893,  -2.3609,  -2.5210,  -6.3956,  -3.9266,  -5.8382],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9761, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1406, -4.0732, -2.3433, -4.2160, -5.9962, -6.2966, -4.3513, -3.7713,
        -3.6431, -2.1121, -3.8331, -7.2030, -4.3908, -3.5402, -3.6494, -6.7331,
        -2.3075, -5.8590, -5.9613, -4.6555], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4538, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4949, -7.6309, -4.7541, -4.0379, -3.5812, -2.9892, -4.9859, -6.8737,
        -4.2768, -5.7510, -4.4483, -3.9060, -3.6800, -6.4120, -5.8919, -4.6369,
        -2.7199, -2.6378, -5.8660, -2.5785], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6577, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-25.6065,  -4.1830,  -8.0498,  -4.9111,  -5.4029,  -6.7088,  -4.8433,
         -7.3745,  -3.1764,  -2.2016,  -3.4031,  -7.9698,  -3.7363,  -4.9511,
         -3.0330,  -1.9086,  -4.8891,  -6.6001,  -4.2079,  -3.3183],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8238, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7628, -2.3938, -5.2333, -2.3491, -5.2645, -7.8310, -4.9781, -9.0950,
        -3.3246, -4.4956, -3.4659, -6.7086, -4.5784, -7.6066, -3.0696, -2.2924,
        -3.1841, -6.7482, -3.9846, -4.5040], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7935, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-17.2963,  -6.2459,  -9.4278,  -3.1857, -10.6352,  -5.0060,  -4.6176,
         -4.5941,  -4.0850,  -2.7526,  -4.4803,  -6.6475,  -4.0792,  -4.0014,
         -4.3473,  -2.8652,  -4.3428,  -6.2632,  -7.0375,  -4.9247],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8418, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7006, -4.7879, -4.0274, -2.9935, -6.1486, -5.9936, -4.2804, -4.2832,
        -2.9845, -1.7766, -4.1459, -7.1770, -4.1963, -4.7566, -2.5808, -6.9289,
        -2.8238, -5.8084, -6.1097, -4.6815], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5093, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.3367,  -8.2208,  -5.1045,  -8.0256,  -3.9661,  -3.6027,  -9.6660,
         -6.7867,  -5.6134,  -6.5331,  -8.9369, -10.9090, -10.4466,  -8.3334,
         -5.5047,  -7.7040,  -4.3622,  -3.4924,  -3.2485,  -2.0270],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3910, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.2933,  -5.2507,  -6.8835,  -4.0624,  -4.7470,  -4.6142, -19.9626,
         -4.3317,  -7.7160,  -6.1619,  -9.3359,  -5.1477,  -3.6917,  -3.7112,
         -4.1201,  -4.7091,  -7.8143,  -5.3205,  -5.3940,  -5.0240],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0146, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9664, -6.6174, -4.1574, -6.6253, -4.4141, -4.5489, -3.6398, -6.2247,
        -5.8415, -4.6758, -3.4402, -2.8288, -2.5715, -4.2086, -6.9972, -4.3656,
        -3.9208, -3.4687, -3.2177, -3.7271], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5229, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.1566,  -4.6072,  -8.3339,  -7.1136,  -8.4605,  -5.4695,  -5.7611,
         -5.0147,  -1.8171,  -3.9947,  -7.3095,  -4.5482,  -3.6403,  -6.0610,
        -21.8812, -14.1186,  -7.9932,  -3.1258,  -8.7430,  -5.1765],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.9663, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.3224,  -4.0814, -10.5200,  -4.7344,  -4.3619,  -5.1520,  -2.6242,
         -3.8997,  -6.1659,  -6.4579,  -4.9614,  -2.6044,  -3.0058,  -2.7031,
         -4.7486,  -7.0075,  -4.1808,  -3.4936,  -3.4652,  -2.5949],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7543, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2560, -7.5788, -4.1508, -5.2462, -3.0528, -1.7027, -2.4296, -6.5070,
        -5.9217, -4.7023, -3.7250, -3.1690, -5.1368, -4.1403, -7.6012, -5.0492,
        -5.9513, -3.5477, -1.5466, -2.1773], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4296, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2965, -1.6671, -4.5275, -6.6224, -6.8788, -4.6450, -2.7973, -2.5963,
        -2.0600, -4.2917, -6.7763, -3.7992, -4.6134, -2.8123, -1.7112, -4.3963,
        -6.6304, -4.8862, -2.6041, -4.3410], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2477, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9963, -4.8546, -2.4381, -1.7641, -3.0690, -6.6162, -4.1713, -5.4320,
        -3.2521, -2.1319, -4.1816, -7.5283, -3.9812, -3.4363, -3.0686, -5.8808,
        -6.1437, -6.5673, -7.1411, -5.4811], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6068, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3837, -2.8478, -4.1910, -3.2398, -2.0730, -5.9458, -5.7895, -4.4971,
        -6.5069, -2.0785, -2.2584, -5.1349, -6.9911, -4.7503, -3.7183, -2.9802,
        -1.5083, -6.2360, -7.1080, -5.0522], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4145, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0522,  -2.8067,  -3.7280,  -3.4589,  -6.6354,  -7.0361,  -3.7126,
         -4.9101,  -4.4784, -22.4386,  -6.0746,  -5.8974,  -7.4965,  -5.0173,
         -5.5585,  -7.1417,  -4.1187,  -4.1009,  -3.2456,  -1.3822],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6645, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5397, -6.9000, -4.4170, -2.8899, -5.6105, -3.1674, -4.8625, -8.4654,
        -4.5953, -3.9563, -2.8422, -3.5174, -2.2545, -5.2711, -6.0358, -4.1651,
        -4.9576, -2.3782, -4.6309, -3.4402], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4949, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.9259,  -6.5702,  -4.8266,  -3.9686,  -3.5445,  -5.3157,  -4.9242,
         -7.6173,  -5.2914,  -3.4561,  -4.0107,  -5.1834,  -6.2159,  -7.4962,
         -4.8021, -19.5033,  -3.3614,  -4.8356,  -6.4939,  -6.9495],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9146, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.8314,  -4.4471,  -3.8139,  -3.6402, -11.5174,  -7.8708,  -5.9680,
         -7.8355,  -5.0702,  -9.0125,  -5.5917,  -5.3834,  -5.1948,  -3.2575,
         -4.9821,  -7.0010,  -3.8006,  -4.6376,  -3.9973, -18.6495],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4251, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.7877,  -3.8855,  -3.5358,  -7.1916,  -6.9553,  -3.8427,  -4.1143,
         -3.6920, -17.4862,  -7.7824,  -6.9353,  -7.3555,  -4.0248,  -5.3383,
         -6.2657,  -4.6517,  -5.3549,  -4.2690,  -2.6446,  -5.2309],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8172, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5898,  -6.7071,  -5.3368,  -4.3586,  -7.6746,  -3.3404,  -2.5435,
         -3.8831,  -7.1596,  -4.5914,  -2.9824,  -4.7529, -28.9238,  -7.5547,
         -8.5841,  -4.3618,  -7.2546,  -4.3881,  -9.0836,  -2.5225],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4297, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8244, -2.0056, -2.7897, -4.8645, -7.1810, -5.0984, -3.6828, -2.8478,
        -2.5514, -3.9194, -5.4517, -5.9783, -3.8120, -3.3278, -3.3746, -5.9100,
        -3.0332, -5.8463, -6.4407, -4.1730], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4056, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7084, -5.7118, -4.6244, -2.6216, -2.8149, -2.2742, -4.4239, -6.6192,
        -3.8196, -5.1397, -3.0414, -4.7560, -3.2323, -5.9680, -5.6739, -4.0393,
        -4.7042, -3.4098, -4.8290, -1.7465], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3079, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.6669, -21.2300,  -4.2629,  -8.2809,  -3.5112,  -8.1962,  -4.6133,
         -7.1626,  -3.6664,  -2.0385,  -2.3583,  -5.6641,  -6.3201,  -5.1160,
         -4.7046,  -4.4809,  -1.6029,  -2.5759,  -5.4982,  -5.8667],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6408, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3380, -4.6928, -4.4950, -4.0266, -2.7763, -4.5294, -6.1960, -6.4369,
        -4.8272, -8.4107, -2.7845, -4.5935, -3.7886, -6.9660, -6.2834, -4.1339,
        -3.6478, -3.2392, -3.4639, -6.3638], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8997, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4033, -6.8731, -3.2763, -3.5505, -4.1328, -4.4192, -6.6159, -4.4024,
        -4.2379, -2.8853, -3.2113, -4.3055, -6.6656, -4.3449, -7.2780, -2.5591,
        -2.0263, -4.5265, -6.5020, -3.9872], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5102, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1109, -4.8214, -9.1960, -3.0454, -2.0094, -3.1531, -6.7256, -3.6869,
        -3.2689, -3.5731, -3.5664, -2.7087, -6.1431, -6.1395, -4.2083, -3.9904,
        -3.3005, -3.1112, -5.7804, -7.0511], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6295, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.1629,  -6.2255,  -4.0708,  -3.1523,  -5.5268,  -7.6527,  -4.0033,
         -5.4120,  -3.9344, -14.5940,  -5.9666,  -7.3525,  -4.3326,  -8.4088,
         -4.8271,  -7.8805,  -5.4962,  -3.3424,  -5.3405,  -7.5046],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0093, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7454, -5.8657, -7.0524, -4.5140, -3.9783, -3.6059, -4.4130, -3.3640,
        -5.5571, -5.1168, -4.5552, -5.1731, -3.1556, -2.7998, -4.3651, -6.1807,
        -4.5979, -4.5191, -4.7056, -2.0858], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4675, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1630,  -3.8593,  -5.8352,  -6.5684,  -4.5759,  -5.3483,  -3.2782,
         -4.1701,  -7.4584,  -6.6207,  -4.1375,  -7.6208,  -4.8535, -27.0777,
         -4.4277,  -6.1969,  -4.8904, -12.6116,  -2.8188,  -2.0453],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3779, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.0271,  -2.4058,  -7.9927,  -5.1412,  -7.1146,  -3.9070,  -2.4383,
         -6.3022,  -7.5381,  -5.2892,  -5.2964,  -7.4515, -31.5263,  -5.6982,
         -7.1531,  -6.8864,  -7.1497,  -3.4729,  -8.0844,  -4.6874],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.1781, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.6113,  -5.3877,  -6.0709,  -4.1522,  -4.2096,  -2.0353,  -2.5813,
         -5.5439,  -6.3312,  -3.9201,  -3.3699,  -3.2016, -30.4106,  -6.8720,
         -8.0440,  -7.1090,  -2.1036,  -8.4069,  -4.4671,  -3.8064],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1317, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2119, -4.8639, -3.8058, -3.1437, -2.9812, -5.9140, -5.8380, -4.5419,
        -6.3149, -3.8432, -1.7339, -3.9967, -6.7239, -6.2606, -4.5277, -4.7692,
        -2.7233, -2.7421, -5.7274, -6.6565], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5660, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2446, -6.9812, -3.5817, -3.8636, -3.9077, -4.9272, -2.4452, -5.9550,
        -6.2864, -4.5736, -4.6361, -2.8486, -1.3231, -5.6727, -6.9496, -3.7929,
        -5.1303, -2.9223, -2.3773, -5.8140], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4117, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.9104,  -4.6743,  -2.9965,  -5.6733,  -2.0701,  -6.7200,  -4.2865,
         -3.4470,  -5.4377, -23.7218, -13.3027,  -5.3508,  -5.6929,  -7.2915,
         -8.0260,  -3.9320,  -7.1960,  -3.9194,  -6.4465,  -3.8271],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3961, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.5296,  -6.6812,  -5.4258, -39.6410,  -7.9056,  -7.4465,  -5.5263,
         -7.7393,  -6.2885,  -8.2878,  -2.3545, -10.8552,  -4.6980,  -5.3941,
         -4.8888,  -1.9322,  -3.5807,  -5.9147,  -6.4103,  -4.3143],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.4907, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.8538, -10.3692,  -1.5400,  -1.5708,  -5.1014,  -7.5573,  -4.5177,
         -3.7109,  -4.2597,  -2.4315,  -5.0829,  -6.7010,  -6.6603,  -5.3806,
         -6.9158,  -2.4053,  -1.9398,  -3.2106,  -7.1630,  -4.2507],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7811, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3104, -6.4813, -6.0398, -7.2129, -7.9309, -7.4967, -6.5021, -7.1974,
        -7.1041, -8.0142, -6.5818, -7.4845, -7.2778, -6.7704, -7.0246, -7.4543,
        -6.8798, -7.3591, -8.0033, -7.2500], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.1688, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.7672,  -4.6930,  -7.2724,  -6.7031, -14.5891,  -4.7133,  -7.3764,
         -3.0631,  -7.5895,  -4.2282,  -4.7042,  -3.6353,  -2.5482,  -3.1896,
         -6.3091,  -6.1650,  -4.5649,  -2.6392,  -3.0993,  -4.2359],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4543, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0119, -2.3794, -3.1226, -6.1973, -6.8822, -4.9274, -5.0199, -2.9919,
        -5.6596, -5.6366, -6.6819, -5.9668, -4.8020, -6.5768, -2.7852, -3.1205,
        -5.1778, -7.0659, -4.1297, -4.7750], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8955, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.2361,  -4.4534,  -3.6871,  -4.2279,  -5.0278,  -6.9992,  -4.7540,
        -13.8246,  -3.1092,  -4.1167,  -4.9328,  -7.1142,  -3.6851,  -4.7146,
         -3.5406,  -3.3454,  -6.1217,  -7.0102,  -4.1395,  -4.5293],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1785, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7665, -2.8750, -6.2504, -6.2479, -4.2964, -5.1830, -5.0148, -4.1280,
        -2.8275, -6.6615, -4.1389, -6.0038, -3.8819, -3.1110, -3.3307, -4.5411,
        -6.6505, -4.7137, -4.4333, -3.6038], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5330, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3016, -3.3730, -2.5137, -4.7800, -6.5621, -4.4192, -3.7944, -3.0240,
        -5.1849, -2.9375, -6.1834, -6.1247, -4.1041, -8.9375, -3.4626, -2.1097,
        -4.4163, -5.1062, -6.4839, -4.7011], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5760, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.8885,  -4.4809,  -6.7852,  -3.3253, -41.0799,  -7.4524,  -8.0205,
         -2.4764,  -9.5803,  -5.1392,  -6.1890,  -6.3587,  -6.3697, -14.8870,
         -2.5644,  -7.5312,  -4.1488,  -7.1711,  -4.5856,  -4.1169],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.9576, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0960, -7.4826, -4.7223, -8.5238, -4.2559, -3.2198, -3.9448, -3.1924,
        -1.9580, -5.3848, -5.5998, -4.7661, -5.7333, -3.1620, -1.5561, -4.1548,
        -6.8443, -6.4027, -4.7313, -9.2870], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1009, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1461,  -5.3888,  -7.6083,  -4.3758,  -5.7145,  -3.7677,  -4.5459,
         -2.4106,  -5.5356,  -6.3817,  -3.8783, -10.7375,  -3.8008,  -2.0689,
         -5.0422,  -6.9635,  -4.8079,  -9.0897,  -2.9940,  -4.6453],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1452, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4818, -5.0346, -3.3321, -5.9884, -6.4264, -7.2182, -5.3029, -3.6720,
        -3.1629, -3.8636, -3.8476, -6.0378, -6.1095, -4.4557, -3.7899, -4.5397,
        -2.0009, -3.3552, -6.0966, -5.1891], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6452, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9808, -6.1710, -3.5588, -4.4647, -6.5485, -4.5033, -5.2951, -4.3365,
        -7.3990, -4.1100, -3.6175, -6.8117, -4.5990, -5.4915, -2.6776, -2.0790,
        -4.3638, -7.6201, -3.7831, -3.9167], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7664, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.6423,  -5.9291,  -6.3910,  -4.7936,  -6.9726,  -3.3639,  -1.5099,
         -8.0697,  -7.2432,  -4.1648,  -3.6621,  -4.9139, -33.9393,  -7.8298,
         -7.7972,  -3.1616,  -7.1950,  -4.4735,  -4.7003,  -3.9670],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.6860, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7720, -3.7902, -7.6361, -5.1217, -3.3273, -4.1541, -2.2312, -3.0170,
        -5.8254, -6.3611, -3.8545, -3.2891, -2.7937, -3.8573, -3.0732, -7.4221,
        -4.4589, -4.0690, -2.7296, -3.3726], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4078, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7092,  -4.6032, -20.4057,  -5.7790,  -7.4520,  -4.6430,  -8.3156,
         -4.3719,  -5.5163,  -4.3256,  -1.9238,  -3.1830,  -5.8678,  -6.1899,
         -4.1465,  -4.9016,  -3.1601,  -4.5183,  -4.4684,  -5.9600],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6220, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.8144,  -4.2883,  -4.0243,  -4.5135,  -6.2210,  -5.8750,  -4.8273,
        -11.7497,  -3.7427,  -2.2486,  -4.3544,  -7.0534,  -3.8928,  -4.4274,
         -3.0946,  -5.3884,  -2.3275,  -6.1723,  -6.0384,  -4.6074],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9831, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7684, -3.7262, -2.9624, -5.3283, -4.5276, -5.5281, -6.2412, -4.1927,
        -5.5406, -3.3553, -1.7465, -3.6660, -5.9684, -4.9729, -3.4414, -6.2470,
        -4.9785, -2.2946, -6.1477, -7.0374], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6336, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.5303,  -4.2814,  -8.2048,  -4.7964,  -3.6057,  -5.1801,  -3.4684,
        -11.8698,  -6.6477,  -3.1720,  -4.9678,  -3.4748, -29.0240,  -4.2096,
         -7.5927,  -3.1883,  -5.0847,  -6.8276,  -4.3537,  -4.3159],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5898, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0802, -3.3851, -2.8443, -3.6158, -6.1620, -6.7159, -3.8283, -4.6300,
        -4.2375, -2.1060, -3.9029, -6.0358, -5.6532, -4.4402, -4.1079, -2.4667,
        -5.3052, -4.1396, -6.8557, -3.8583], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3685, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.4398,  -5.5932,  -4.1654,  -7.1497,  -8.1909,  -5.0905, -11.9152,
         -5.8687, -11.4965,  -7.3961,  -5.0456,  -4.6938,  -3.6350, -12.6094,
         -4.0766,  -7.6983,  -6.8182,  -9.1759,  -6.4555,  -7.2422],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.0378, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1588, -7.0591, -6.0122, -4.9317, -4.2856, -3.1304, -2.7889, -5.3916,
        -6.0963, -3.8791, -4.0766, -7.5205, -4.4114, -3.2872, -4.6285, -7.0957,
        -4.7705, -1.8299, -4.0820, -1.9326], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5184, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4863, -7.4895, -6.6165, -3.5826, -8.5297, -3.9266, -4.9050, -4.5685,
        -3.7403, -2.8917, -5.5968, -6.5994, -4.5430, -3.7798, -3.1984, -3.3843,
        -4.2919, -7.3383, -4.1321, -3.8253], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9713, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.4136,  -7.1710,  -6.0469,  -5.1806,  -5.0249,  -4.1196,  -7.8073,
         -6.9888,  -4.9732,  -3.9808,  -5.3734, -23.8101,  -6.2012,  -8.0786,
         -3.7825,  -8.0283,  -4.3853,  -4.1455,  -5.1116,  -3.5773],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4600, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3060, -5.0348, -6.9048, -3.6168, -1.4297, -6.3468, -6.4789, -3.4719,
        -5.3079, -2.9090, -2.3107, -4.2747, -6.6585, -4.5179, -4.3423, -2.8170,
        -3.0996, -7.3988, -7.2240, -3.6601], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7055, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8817, -1.7280, -3.5348, -6.4732, -4.0397, -4.9101, -5.3165, -4.7699,
        -4.2767, -7.0497, -4.8446, -3.3698, -3.1050, -4.5885, -3.6098, -7.3258,
        -3.5447, -6.7606, -3.2293, -4.3918], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4875, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1739, -7.7858, -2.9120, -6.5429, -5.5827, -4.5801, -7.3685, -4.1141,
        -2.8584, -4.8935, -7.4795, -3.7207, -4.7699, -3.1968, -2.1119, -4.4644,
        -7.1820, -4.1578, -6.7231, -2.7332], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9176, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4521, -4.3354, -8.4438, -3.9495, -3.7879, -4.2785, -2.1847, -4.9030,
        -7.3295, -3.2968, -4.7457, -3.0358, -3.6624, -6.0950, -6.8082, -4.2539,
        -4.7640, -2.9790, -2.0718, -5.3517], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6864, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3487, -4.0515, -2.7032, -6.5498, -2.0858, -5.5509, -6.7803, -4.1142,
        -9.1806, -3.9288, -2.1992, -5.8591, -6.3948, -3.5726, -3.8036, -2.2752,
        -1.8813, -4.3940, -6.4794, -4.9462], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7050, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0727, -2.5444, -4.2401, -6.9995, -3.5152, -5.7722, -2.3510, -4.0323,
        -2.8943, -6.3293, -5.7788, -4.6923, -3.9549, -2.4824, -2.7386, -4.7925,
        -6.5984, -3.9858, -2.6291, -4.7324], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1568, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8956, -3.3173, -4.3540, -2.3661, -5.3796, -7.4069, -3.5803, -3.7526,
        -3.4378, -2.8974, -3.3293, -5.6674, -5.8781, -4.8634, -3.6423, -2.3228,
        -2.7131, -4.6192, -7.3602, -3.6095], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2696, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.3003,  -7.0281,  -4.0512,  -3.7596,  -5.6624, -24.3172,  -5.2414,
         -6.9532,  -4.0498,  -7.0522,  -5.0231, -12.5330,  -3.8763,  -2.9561,
         -5.7661,  -7.1631,  -5.0651,  -4.5437,  -4.5050, -28.3432],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.8595, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.8557,  -5.2073,  -5.5047,  -3.1560,  -4.0712,  -4.6989,  -5.1347,
         -6.6913,  -4.2919,  -7.1175,  -2.9743,  -4.5304,  -5.3623,  -7.0325,
         -5.7001,  -4.2385,  -6.9782, -14.4844,  -8.6804,  -6.1916],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9951, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.5512,  -5.8029,  -9.1750,  -6.9204,  -9.4921,  -4.8054,  -3.0940,
         -5.3425, -17.1824,  -4.1868,  -4.9811,  -7.7211,  -5.0792,  -8.6065,
         -5.3921,  -3.3520,  -4.6271,  -7.1751,  -4.3588,  -3.3204],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5583, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1615, -3.0996, -2.8743, -4.3412, -6.7139, -4.1608, -5.0769, -4.4325,
        -2.5185, -4.7157, -5.9040, -7.0491, -5.5812, -7.0732, -3.1763, -6.4249,
        -2.6737, -6.4294, -4.4636, -3.3224], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7096, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1329, -4.9262, -4.4822, -4.0788, -2.7095, -5.5606, -6.3473, -4.3715,
        -3.7396, -3.3236, -4.7283, -1.9912, -5.8580, -6.5442, -5.1936, -3.2220,
        -2.5042, -2.5191, -2.9348, -6.5627], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2865, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.6558,  -4.0674,  -4.6932,  -2.8469,  -4.0234,  -2.4212,  -5.6066,
         -6.1150,  -4.2924,  -4.3823,  -4.2954, -19.6831,  -6.5376,  -8.4178,
         -4.2338,  -8.2647,  -3.9811,  -4.9917,  -3.4689,  -1.9199],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5449, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-12.0170, -14.6823,  -9.1252,  -5.4679,  -7.4097,  -5.0833,  -4.8586,
         -3.5372,  -3.9947,  -3.4891,  -6.6679,  -6.4468,  -4.9220,  -2.5002,
         -5.1961,  -2.1903,  -3.9124,  -6.5283,  -4.0658,  -7.3317],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9713, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.9293,  -6.0717,  -7.1274,  -5.5965,  -3.3827,  -5.6140, -13.2259,
         -5.2860,  -8.3008,  -3.6974,  -7.1039,  -5.8733, -11.4941,  -4.5848,
         -3.7440,  -5.0241,  -7.7896,  -4.9949,  -4.7566,  -5.7243],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3661, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1359, -8.7885, -5.0353, -4.2916, -3.8474, -3.6877, -6.0095, -7.1215,
        -4.1474, -4.1348, -4.5270, -3.9517, -5.1033, -7.2094, -5.3631, -5.2817,
        -6.7522, -1.8254, -3.4806, -6.7891], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0241, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9978, -3.1081, -3.4226, -7.3138, -4.2286, -4.2488, -3.8960, -3.1051,
        -3.8083, -5.8624, -6.1702, -4.6188, -4.3356, -2.6630, -2.3671, -8.8601,
        -6.6321, -4.7103, -6.3125, -7.4449], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8053, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.7384,  -5.3162,  -4.1173,  -1.7897,  -2.0706,  -4.3659,  -6.2438,
         -3.9130,  -3.7489,  -3.8364, -27.5483,  -3.9928,  -7.6843,  -4.4437,
         -7.3570,  -4.2729,  -6.4935,  -3.5047,  -4.0540,  -8.9575],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0224, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2141, -5.6003, -2.7718, -3.3422, -5.9616, -6.5407, -4.5596, -3.3549,
        -5.3891, -3.1624, -2.8426, -5.7607, -5.7574, -4.7756, -3.3548, -4.0478,
        -7.3961, -2.6151, -5.2779, -5.8108], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6268, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.9067,  -4.0394,  -4.0463,  -2.0000,  -5.5796,  -5.5142,  -4.0275,
         -6.4049,  -9.2302, -15.6427,  -4.3628,  -8.1074,  -4.5796,  -7.0274,
         -4.5424,  -5.0958,  -3.9262,  -4.3976,  -3.5015,  -5.8131],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9873, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.3244,  -7.7883,  -4.0376,  -3.0956,  -3.8521,  -5.8571,  -6.0484,
         -5.1042,  -7.3971,  -2.4453,  -3.7710,  -3.7489,  -7.2582,  -4.2723,
         -4.0400,  -6.9441, -13.4933,  -5.9904,  -5.8026,  -6.1276],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5699, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.9864,  -7.1020,  -4.3589,  -5.9297,  -5.2015, -22.4483,  -5.6970,
         -7.2877,  -2.8680,  -8.5203,  -4.4234,  -3.9445,  -4.0194,  -2.9738,
         -3.5346,  -5.5350,  -6.3777,  -4.3577,  -5.7341,  -2.2284],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9764, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8092, -7.9027, -3.5915, -2.9022, -3.3666, -5.5040, -6.8636, -5.1345,
        -3.2344, -2.7042, -2.5381, -5.4887, -7.1432, -3.7610, -5.2246, -4.6246,
        -2.3545, -5.9026, -7.3507, -3.2821], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6841, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4139, -5.0456, -3.0361, -2.6241, -1.8566, -2.6448, -7.7569, -4.4134,
        -6.1616, -4.2878, -4.0667, -4.3368, -4.9687, -6.0102, -4.2400, -6.1901,
        -2.8927, -3.4202, -3.7046, -7.6741], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5872, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.7742,  -2.7898,  -2.5656,  -6.4904,  -7.2705,  -4.4523, -10.2588,
         -6.7203,  -2.8507,  -2.3047,  -5.9680,  -6.9426,  -5.0268,  -4.0521,
         -2.8758,  -2.7754,  -4.3051,  -6.2477,  -4.3392,  -4.3979],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9704, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.5915,  -4.8290,  -6.7347, -19.0607,  -5.1685,  -7.4062,  -5.3680,
         -7.5585,  -1.5346, -11.9928,  -5.0486,  -6.0122,  -4.9248,  -3.1220,
         -2.9892,  -5.3809,  -6.1701,  -4.8940,  -4.8786,  -3.6775],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0671, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4132, -3.3410, -7.8981, -5.2662, -4.4104, -3.3913, -2.4803, -7.1169,
        -7.2713, -4.1705, -3.1365, -3.2865, -2.8650, -5.5655, -6.2997, -6.8336,
        -5.3388, -3.3193, -3.8513, -3.9366], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8596, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5782,  -6.7881,  -6.6033,  -4.0412,  -5.2440,  -4.0424,  -6.8343,
         -4.5384,  -5.8552,  -4.3757,  -9.2478,  -4.8707,  -6.8547,  -1.6183,
         -5.4551,  -7.2138,  -4.1019, -11.3536,  -4.4060,  -3.0030],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4513, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.2682, -5.1664, -4.9301, -3.8095, -4.4528, -4.9080, -7.7144, -5.2998,
        -3.9683, -3.4448, -3.0338, -5.2866, -6.9678, -4.6682, -6.3800, -3.9566,
        -3.8408, -5.8771, -6.4860, -4.7196], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1590, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9816, -2.9805, -3.6983, -6.1579, -7.5007, -4.6493, -3.8780, -2.2111,
        -2.3325, -4.7076, -7.2306, -4.9842, -5.9060, -4.0676, -4.1058, -4.2737,
        -7.5248, -3.9979, -4.5876, -3.9745], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6375, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4073, -3.7353, -3.3759, -3.2563, -2.6048, -4.9917, -7.2938, -4.6476,
        -5.9201, -3.4214, -5.0295, -3.8125, -6.2749, -6.1241, -4.4802, -5.4583,
        -3.2106, -4.1628, -3.3105, -6.0197], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6769, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5499, -4.3610, -4.4976, -3.9432, -4.6951, -4.7026, -5.7186, -4.7266,
        -5.3182, -4.6242, -3.1628, -4.6098, -7.0502, -4.0002, -6.1666, -3.2222,
        -4.4591, -2.3407, -6.0466, -6.7376], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7466, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.2596,  -3.9889,  -3.5487,  -2.1516,  -7.3582,  -5.0320,  -4.1861,
         -6.9140, -15.6258, -12.5922,  -8.9459,  -4.3239,  -6.9660,  -3.9735,
         -5.6697,  -3.1900,  -3.9227,  -2.7243,  -7.6484,  -4.3004],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9661, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4038, -2.8156, -6.4937, -4.7988, -3.7400, -4.5194, -2.6675, -4.4417,
        -5.9927, -7.1265, -5.4531, -4.1762, -3.4067, -2.4575, -4.0617, -5.8472,
        -4.8019, -4.5911, -3.5022, -4.1662], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3232, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5606, -5.0291, -6.2012, -4.8514, -4.7788, -3.3382, -2.3597, -3.3243,
        -6.4932, -6.5585, -4.8165, -4.8678, -2.0598, -1.9945, -3.8453, -6.4983,
        -3.8437, -5.7958, -5.4519, -3.3051], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3987, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.7498, -16.2154,  -3.3993,  -6.8093,  -7.2667,  -5.4110,  -5.1015,
         -2.8633,  -2.2407,  -4.9695,  -7.3355,  -4.5122,  -3.2987,  -4.8560,
         -3.5746,  -4.1274,  -6.2091,  -6.0207,  -4.4333,  -7.5214],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5458, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2174,  -4.8152,  -5.9104,  -7.2427,  -3.5075,  -5.9644,  -6.3960,
        -33.1536,  -6.6497,  -7.3678,  -3.5539,  -6.9002,  -4.4949,  -5.7219,
         -3.7854,  -2.2419,  -1.7290,  -5.7348,  -6.1533,  -4.4075],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4474, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.4402,  -5.4998, -11.2976,  -5.2266,  -3.0878,  -3.6339,  -7.1511,
         -4.3226,  -4.8274,  -6.9320,  -5.4599,  -9.3846,  -7.1032,  -6.8490,
         -7.1129,  -3.9883,  -8.0881,  -4.9124,  -4.8212,  -4.2453],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1692, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0332, -6.1899, -6.4582, -4.8177, -3.5861, -4.3942, -6.6502, -4.8572,
        -4.5084, -2.4055, -2.1752, -4.8739, -7.3581, -4.2020, -5.6797, -2.3329,
        -1.8222, -3.4325, -6.7217, -4.0777], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6788, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7089, -3.8682, -6.0106, -6.6833, -7.0058, -5.4411, -6.3335, -4.2903,
        -5.2770, -3.5749, -6.3566, -6.1774, -4.6534, -4.3402, -3.2709, -4.2033,
        -4.8782, -7.0942, -4.3878, -3.4771], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1016, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.8876,  -4.8925,  -4.8509,  -3.1308, -16.9979,  -9.3124,  -5.5879,
         -2.3707,  -5.1193, -14.5272,  -5.9624,  -7.7595,  -5.1411,  -7.9914,
         -4.8647,  -5.1880,  -4.6094,  -9.9953,  -4.1602,  -6.1814],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.8265, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.4140,  -4.7373,  -4.0045,  -4.8125,  -2.5501,  -4.2208,  -6.7217,
         -7.2270,  -5.1501,  -5.3622,  -3.2441,  -3.5647,  -4.8584,  -7.3173,
         -3.8750,  -2.1862,  -6.4894, -15.1664,  -6.6769,  -7.5740],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6576, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0044, -8.2685, -3.7646, -4.4707, -3.7143, -3.9355, -3.8377, -5.2695,
        -6.5491, -4.6127, -5.3028, -2.7129, -2.0307, -3.2713, -6.6592, -4.5014,
        -2.8395, -3.8414, -2.7562, -1.9896], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1666, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.7992,  -3.8076,  -3.9671,  -3.8367, -18.1140,  -5.3099, -11.8598,
         -3.2477,  -9.2193,  -4.7005,  -4.6344,  -3.3303,  -3.0891,  -3.2598,
         -5.9846,  -4.8469,  -3.6028,  -4.8700,  -6.3601,  -3.4917],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7666, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.9578,  -4.0714,  -4.1219,  -6.4094, -15.0462,  -5.8637,  -9.6564,
         -3.3450,  -8.4130,  -3.7851,  -5.6426,  -6.1152,  -4.0907,  -3.1888,
         -4.8298,  -6.9210,  -5.4273, -10.0228,  -3.8978,  -2.2841],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0045, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4514, -1.6976, -4.5883, -6.7436, -4.2237, -3.5246, -3.8455, -2.8581,
        -6.1315, -6.7386, -3.9118, -3.5707, -2.4091, -4.2373, -2.5712, -6.3919,
        -6.2666, -4.8619, -3.1305, -4.1234], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2139, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.2359,  -4.3402,  -2.9725, -18.4638,  -5.9181,  -9.3331,  -4.1725,
         -8.6463,  -4.2711,  -7.7886,  -4.2902,  -2.9337,  -2.4736,  -8.4766,
         -4.3069,  -6.3553,  -3.6640, -28.1815,  -5.9694,  -7.2020],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.1998, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.5740,  -2.7735,  -4.2084,  -6.0579,  -7.8029,  -4.8732, -11.9213,
         -2.7930,  -1.7893,  -6.6353,  -7.0679,  -3.6467,  -4.0624,  -6.6886,
        -25.4086,  -7.2008,  -6.4594,  -5.8377,  -2.5120,  -8.6407],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5477, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4141, -4.6758, -5.3993, -3.8891, -2.4517, -5.1605, -6.2064, -3.2931,
        -3.3572, -2.8135, -2.0782, -3.2265, -6.5164, -4.8873, -5.8737, -6.3358,
        -3.4848, -2.4817, -5.4099, -7.3538], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6154, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5332, -3.1030, -6.2876, -3.8723, -5.6282, -3.2331, -3.7993, -3.8842,
        -7.2987, -4.4580, -4.1275, -5.3688, -6.6133, -5.1309, -7.6749, -6.2406,
        -7.8453, -3.5690, -3.5933, -6.4149], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0338, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8855, -7.8181, -7.2875, -6.7461, -7.3358, -7.5900, -6.4659, -7.7257,
        -6.4602, -7.8051, -7.9226, -6.2675, -7.1208, -7.6267, -7.2338, -8.1366,
        -6.6320, -7.5890, -8.1485, -7.0170], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2907, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5466, -2.8269, -3.0849, -4.0326, -5.6778, -6.5953, -4.1660, -4.5843,
        -2.6833, -2.3692, -5.5088, -6.8924, -4.2231, -4.9700, -5.6063, -2.6071,
        -4.9547, -6.8819, -4.2589, -2.4108], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4940, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9928, -5.4091, -3.3221, -2.3084, -4.8111, -7.8131, -3.8251, -4.2597,
        -3.7184, -3.1424, -3.3814, -7.6711, -7.0946, -5.3392, -8.2054, -4.7583,
        -1.6715, -4.6095, -5.8855, -4.0375], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7628, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0357, -3.8370, -6.5507, -2.9491, -2.7786, -5.5951, -7.2773, -4.8630,
        -6.3989, -4.3721, -4.0891, -3.4055, -6.5161, -6.4172, -4.6932, -5.1659,
        -3.1735, -4.1774, -3.0742, -5.9917], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8681, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7923, -3.1723, -4.6359, -3.1337, -1.7007, -2.9185, -6.4061, -6.6683,
        -5.2395, -4.3035, -2.7390, -2.7350, -5.2195, -7.0684, -3.8950, -5.0247,
        -4.6175, -5.6339, -3.8458, -6.3907], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6070, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6395, -6.6602, -4.3048, -3.5621, -3.2253, -2.9340, -6.3060, -6.7979,
        -4.5479, -4.2411, -3.8964, -6.2628, -2.9572, -6.6878, -3.8129, -3.6200,
        -3.5545, -5.6380, -3.7312, -5.1700], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6275, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.4820,  -4.3089,  -2.3576,  -6.5605,  -7.3551,  -4.3123,  -5.7865,
         -5.7132, -25.4169,  -5.9335,  -8.6411,  -2.5558,  -9.2698,  -5.1127,
         -5.4161,  -5.6184,  -4.7747,  -3.2584,  -4.4412,  -7.0349],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4675, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.2288,  -6.3455,  -4.5931,  -3.4480,  -3.8940,  -3.7827,  -4.0166,
         -7.4109,  -4.3806,  -5.2407,  -3.7897,  -5.2753,  -1.6432,  -6.3230,
         -6.4408,  -4.4046, -18.2529,  -3.1909,  -1.7629,  -4.2148],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2319, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5708, -1.9747, -6.3142, -6.5775, -4.4597, -4.6066, -3.5915, -6.8437,
        -3.7070, -5.0889, -7.0225, -4.0126, -4.3951, -3.8822, -2.6598, -4.8955,
        -5.2925, -6.3173, -4.5177, -3.2826], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7506, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.7646,  -6.1449,  -2.4076,  -6.6768,  -6.3477,  -4.0300,  -5.0286,
         -3.3174,  -6.8561,  -5.6972,  -7.6751,  -5.2115,  -3.7426,  -4.4925,
        -10.1595,  -3.4814,  -6.4885,  -6.8517,  -5.6479,  -8.5714],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6297, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5482, -2.8175, -3.6717, -5.0257, -6.1471, -4.3049, -3.0541, -3.4409,
        -3.3740, -3.3625, -7.0372, -3.8387, -8.9355, -6.7277, -3.1775, -2.1472,
        -5.8993, -6.3293, -4.9554, -5.5650], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6680, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4421,  -5.9427,  -6.6440,  -3.8976,  -4.3391,  -2.8061,  -6.4035,
         -3.8283,  -6.2159,  -4.4509,  -3.7075,  -6.0929,  -3.1735,  -6.4507,
         -7.1195,  -3.8955,  -6.8790,  -4.5084, -27.3776,  -5.3081],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0741, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.8620,  -5.7276,  -4.0297,  -4.7474,  -3.0236,  -2.6601,  -6.3562,
         -6.6854,  -3.7589,  -4.3181,  -3.2056,  -1.5995,  -6.0885,  -6.9570,
         -3.7554,  -4.3768,  -5.6290, -19.9599,  -6.1659,  -7.5137],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6710, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9104,  -2.5907,  -6.7231,  -6.7519,  -4.1300,  -3.6077,  -5.9620,
        -17.6519,  -4.0119,  -7.2928,  -5.8087,  -7.5862,  -4.2970,  -8.2181,
         -3.5297,  -5.0793,  -3.9419,  -3.7047,  -7.7064,  -6.9131],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9209, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5433, -3.8383, -6.3516, -3.1491, -5.3855, -2.5389, -6.2856, -6.2917,
        -4.1311, -4.1278, -2.1048, -1.6968, -4.4791, -6.4592, -3.1921, -3.2675,
        -4.6369, -2.1251, -3.2740, -6.3927], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3135, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.8365,  -7.7530,  -5.6111, -16.1410,  -3.9128,  -1.7029,  -2.8298,
         -6.9285,  -3.5979,  -4.9278,  -2.9077,  -3.7160,  -3.8709,  -5.0596,
         -6.5617,  -3.9826,  -3.4395,  -4.9196,  -2.4643,  -5.1041],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1134, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.3770,  -4.4783,  -7.6900,  -2.8705,  -2.8281,  -5.5118,  -6.5126,
         -3.5313,  -3.9193,  -3.2967,  -8.6889,  -4.8002,  -6.2925,  -6.2317,
         -4.4907, -10.2889,  -2.8986,  -1.6424,  -3.5309,  -7.3905],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2135, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.6682,  -3.1893,  -3.9501,  -4.0766,  -1.6370,  -6.0397,  -5.7307,
         -3.9561,  -8.7919,  -4.6134,  -2.0376,  -2.2216,  -7.2688,  -4.3860,
         -2.5851,  -5.9757, -13.7825,  -9.9950, -10.5895,  -4.4276],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4461, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9146,  -5.3812,  -6.8339,  -3.8528,  -4.5044,  -3.6979, -20.1921,
         -4.0096,  -8.3000,  -3.6636,  -7.8354,  -3.6358,  -6.7177,  -3.9454,
         -5.5888,  -2.0659,  -4.6384,  -6.4799,  -3.3744,  -7.3562],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7494, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.0752, -31.3347,  -4.5936,  -7.5435,  -3.0370,  -8.2128,  -3.8978,
        -11.5910,  -4.1165,  -4.0683,  -2.8214,  -3.5962,  -6.2569,  -3.9534,
         -4.1400,  -2.4478,  -3.4683,  -3.8553,  -6.7171,  -5.5991],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2163, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.3264,  -4.4541,  -2.3425,  -3.5298,  -3.6596,  -6.0703,  -6.5371,
         -4.3859,  -8.2661,  -4.1595,  -1.7295,  -3.0332,  -6.8312,  -3.3140,
         -3.3731,  -3.6875, -13.2187,  -4.9408,  -8.1991,  -7.8194],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1939, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1003,  -2.9887,  -4.8360,  -6.1400,  -3.1432,  -3.9857,  -5.3733,
         -7.6246,  -4.5430,  -7.9245,  -6.1384,  -8.1906,  -4.8142,  -8.2568,
         -4.6117, -12.8278,  -3.9082,  -2.1850,  -4.3449,  -6.1101],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5024, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.2483,  -3.6701,  -4.3302,  -4.2708,  -3.0815,  -4.0477,  -5.0087,
         -6.4106,  -4.9321,  -9.1962,  -5.4440,  -3.1260, -10.1431,  -7.1966,
         -6.0431,  -5.2067,  -5.5871,  -2.9801,  -5.0426,  -6.9008],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4933, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2562,  -2.2063,  -2.9335,  -2.7409,  -6.3143,  -5.8514,  -4.2172,
        -12.0012,  -2.2927,  -4.5601,  -3.4045,  -5.1999,  -6.8285,  -4.5242,
         -6.0569,  -6.1079,  -4.1208,  -4.1343,  -5.3271,  -6.6203],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9349, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.8273, -32.0136,  -4.9863,  -5.9758,  -6.7566,  -7.2704,  -5.5514,
        -10.0321,  -5.3170, -10.2452,  -5.7029,  -3.7241,  -8.1391,  -6.9563,
         -3.5539,  -5.9304,  -4.8354, -30.7503,  -5.3259,  -6.2449],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.6569, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.3582,  -3.1826,  -4.3195,  -2.6125,  -6.6881,  -3.8456,  -3.6610,
         -2.9445, -17.3941,  -4.6121,  -7.1028,  -7.1084,  -7.5277,  -7.4067,
         -8.5617,  -4.8418,  -4.9754,  -4.4032,  -4.9633,  -2.9518],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6731, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-13.5079,  -7.4718,  -3.3519,  -7.2249,  -1.8473,  -9.9912,  -4.1642,
        -10.0291,  -4.8685,  -3.8306,  -5.5716,  -7.7531,  -3.7740,  -6.3849,
         -2.7015,  -4.2741,  -4.3720,  -6.0473,  -6.5627,  -4.3751],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9052, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5556, -2.4047, -3.0065, -7.7025, -4.4298, -4.7528, -3.1295, -1.5767,
        -3.1992, -6.7507, -6.0450, -4.4307, -2.8543, -2.7863, -2.7389, -6.4022,
        -6.7024, -3.6107, -5.8019, -4.3299], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2605, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1313, -1.8651, -3.4067, -5.1492, -5.8895, -3.5543, -4.9147, -4.0095,
        -2.1604, -4.7065, -7.1584, -7.1625, -5.1931, -5.7650, -3.4383, -1.4341,
        -5.3999, -6.6436, -5.1981, -6.6050], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7893, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.3478,  -3.6874,  -3.9291,  -6.0094,  -6.7681,  -5.2741,  -6.8505,
         -3.3566,  -2.3615,  -5.8084,  -6.8283,  -3.5430,  -4.9033,  -2.5770,
         -6.1752,  -4.1668,  -6.3084,  -4.2966,  -4.5259, -12.2763],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2497, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0247, -3.5305, -3.9750, -6.8096, -4.5538, -3.5587, -3.5168, -3.9158,
        -3.6732, -7.0628, -4.2816, -3.2759, -3.9988, -2.2452, -4.1361, -6.9834,
        -3.6502, -4.6164, -3.4208, -2.9178], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2074, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5316, -6.3759, -5.4089, -4.1259, -2.5682, -2.1759, -3.3278, -4.9039,
        -6.5319, -3.9738, -5.5361, -3.7364, -2.2325, -3.3299, -5.8159, -5.5141,
        -3.8340, -3.4912, -2.7163, -2.6447], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0887, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.8639,  -4.8122,  -7.6770,  -4.0323,  -4.7952,  -5.1981, -18.9467,
         -7.1653,  -7.3445,  -7.3493,  -4.4459,  -7.8461,  -3.9394,  -9.4322,
         -3.7556,  -6.7486,  -8.7077,  -5.5596,  -2.4498,  -5.0904],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3580, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5659, -3.9879, -4.5424, -2.9605, -2.6361, -3.5268, -6.1885, -6.7253,
        -3.9471, -4.2113, -3.5054, -3.2994, -3.6841, -6.6955, -3.0832, -3.8222,
        -3.7927, -2.7758, -5.2420, -7.5634], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4378, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.5613,  -4.1460, -10.6728,  -4.6934,  -3.5514, -12.6207,  -7.6703,
        -12.6286,  -9.0375, -21.2862,  -6.9996,  -6.3482,  -7.2793,  -5.1035,
         -6.5502,  -7.7830,  -5.0447,  -4.9620,  -6.1706,  -4.6117],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.7361, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0514,  -5.0435,  -2.5222,  -6.1331,  -6.8874,  -4.1750,  -4.8652,
         -5.0923, -22.1278,  -6.9767,  -7.0901,  -6.6214,  -7.1361,  -6.2346,
         -7.4922,  -3.3891,  -6.7966,  -4.9134,  -2.7100,  -6.7592],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3509, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4964, -3.5570, -3.7349, -6.7549, -4.1497, -2.9387, -2.3981, -4.1273,
        -3.2397, -5.6306, -6.1136, -4.1661, -3.7094, -3.0132, -5.1452, -6.3680,
        -7.2001, -3.7235, -5.1065, -3.8933], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4233, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7143, -5.1857, -5.2316, -5.3210, -5.2445, -7.3074, -6.9875, -6.5914,
        -4.2433, -8.3729, -5.1366, -3.5867, -4.5951, -2.9932, -7.5325, -5.2156,
        -5.4425, -8.9202, -7.2956, -8.9470], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9932, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.1600,  -6.6529,  -4.9217,  -3.0265,  -5.5959,  -3.9181, -11.9314,
         -6.8995,  -3.6349,  -3.8378,  -4.7107,  -9.0467, -12.6949, -10.4216,
         -7.4403,  -5.0607,  -7.4107,  -3.7279,  -4.0881,  -2.5842],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1882, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1096, -2.0680, -3.7313, -7.0876, -3.7939, -4.5223, -2.4794, -2.9022,
        -2.4342, -5.8214, -6.1850, -5.2379, -4.0329, -2.6191, -2.0193, -2.6196,
        -6.5999, -6.1156, -4.3958, -5.8870], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1831, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4636, -3.4384, -6.0158, -6.0195, -3.8617, -4.5642, -3.4854, -4.4510,
        -3.7460, -6.2276, -5.6417, -3.8727, -3.5036, -5.0331, -2.8781, -2.2007,
        -6.7003, -4.5563, -6.0455, -3.7528], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4229, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.7585,  -2.7383,  -3.9365,  -3.6272, -23.2770,  -3.3238,  -8.1830,
         -4.0440,  -8.0713,  -3.9411,  -4.0567,  -5.0880, -10.5780,  -4.6785,
         -5.3493,  -7.1913,  -5.1799,  -4.8891,  -3.9173,  -2.0567],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0943, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.2846, -11.6640,  -1.9901,  -8.8828,  -4.3654,  -7.8216,  -4.2360,
         -5.5607,  -4.0693,  -4.1171,  -2.8738,  -5.7999,  -6.3015,  -4.2769,
         -5.8900,  -3.1675,  -1.8397,  -3.5298,  -7.5650,  -4.4821],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3359, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.3298, -3.0232, -7.7312, -4.3927, -3.3385, -3.8083, -7.2169, -5.5021,
        -3.5759, -3.4726, -2.4661, -5.4208, -7.3750, -3.4613, -6.2801, -5.6951,
        -1.7806, -4.6001, -7.3237, -4.5541], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9674, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4997, -6.4849, -3.9053, -2.7875, -3.7491, -1.8500, -3.4521, -6.6274,
        -4.3163, -5.8939, -3.7044, -1.4862, -3.4022, -6.9853, -3.9828, -4.6240,
        -3.6366, -2.7814, -4.9162, -7.0930], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2589, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.8683,  -7.2938,  -4.9057,  -6.4413,  -6.7469, -20.2634,  -7.9157,
        -10.9652,  -7.9154,  -8.2276, -10.0782,  -7.0081,  -9.7005,  -4.8356,
         -4.8526,  -6.0240,  -6.4789,  -6.4562,  -6.9961,  -5.4371],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.7705, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7664, -2.6916, -1.9956, -3.7422, -6.3301, -3.5871, -7.6772, -2.3860,
        -3.0673, -2.4203, -6.0588, -5.9113, -3.9486, -5.0154, -2.5954, -2.4749,
        -4.0926, -6.7785, -3.6606, -3.5609], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0880, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5349, -4.9890, -3.4709, -3.9810, -2.4832, -6.9296, -7.2654, -3.7395,
        -4.3154, -2.1971, -2.8556, -6.0004, -5.9262, -4.2012, -3.6295, -2.7207,
        -2.3074, -4.1777, -6.3910, -3.2717], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3694, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5312, -6.5182, -5.0737, -6.3307, -6.7814, -5.9567, -6.3641, -7.8668,
        -7.3401, -4.3385, -4.5218, -1.9899, -6.9304, -5.5901, -6.9085, -5.0975,
        -7.2017, -3.6715, -5.6419, -6.0657], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.8360, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8223, -7.3958, -7.1159, -3.9757, -9.1095, -3.9033, -3.2039, -4.6375,
        -2.3815, -2.7854, -6.4267, -4.0794, -3.5786, -4.4141, -3.2964, -5.0035,
        -4.9563, -6.1553, -4.1173, -5.2853], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9322, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9202, -6.7904, -4.9689, -4.1045, -3.8572, -2.9284, -1.2263, -2.9303,
        -6.2230, -4.1517, -3.9899, -3.0439, -7.7422, -4.7911, -4.7781, -5.8735,
        -3.6073, -4.1636, -2.6538, -1.8588], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2301, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6479, -3.6719, -4.8274, -5.2065, -7.2028, -2.8754, -5.6236, -2.2263,
        -1.7871, -3.9324, -7.0038, -3.5754, -6.7937, -2.5976, -1.6480, -2.9776,
        -5.5169, -5.8902, -3.8165, -6.4508], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3636, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.1761,  -3.6858,  -7.0228,  -5.7614, -34.4731,  -6.5445,  -6.7276,
         -7.5208,  -6.4645,  -8.5095,  -2.0584, -10.6867,  -5.0724, -10.7400,
         -4.3428,  -2.9085,  -2.4387,  -4.2281,  -6.8958,  -3.9349],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.3596, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8604, -6.4314, -6.7175, -4.5459, -2.9805, -4.8132, -3.6173, -3.4102,
        -7.1785, -3.0295, -4.9404, -3.2746, -5.7823, -6.6780, -7.1921, -4.8610,
        -3.6405, -2.9706, -3.6126, -3.7226], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6130, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.4366,  -6.6718,  -3.3569,  -6.4859,  -2.9588,  -6.6031,  -4.7398,
         -6.9818,  -5.8699,  -3.7479,  -5.7339,  -2.6439,  -5.7944,  -6.6024,
         -8.4428,  -4.6487, -12.0734,  -4.1457,  -2.7849,  -6.1791],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5951, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.6775,  -6.6820,  -4.5683, -10.5435,  -2.7951,  -3.3863,  -5.1111,
         -6.4778,  -3.7678,  -4.0322,  -4.4218,  -5.6300,  -3.1878,  -5.6531,
         -6.1200,  -3.7625,  -9.3898,  -5.0511,  -1.5063,  -8.4563],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3610, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4060, -3.2768, -4.3894, -3.4961, -2.9926, -6.2588, -5.8888, -4.1619,
        -3.1210, -3.3261, -2.7908, -3.9834, -7.4698, -5.3261, -5.1299, -5.7875,
        -2.1671, -3.5206, -4.8082, -4.9253], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3613, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7628, -5.3691, -4.2283, -3.1616, -3.8325, -4.2135, -6.8309, -4.5199,
        -3.9592, -2.6180, -2.0384, -2.4135, -6.5570, -3.8104, -5.5151, -3.4728,
        -5.2530, -4.5654, -5.6248, -5.9899], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4868, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.7982,  -1.8274,  -6.4237,  -7.0673,  -4.2861,  -4.8594,  -4.2035,
        -10.3685,  -3.9870,  -9.0076,  -3.6006,  -7.4837,  -4.3358,  -6.3922,
         -6.6317,  -5.1863,  -8.1757, -15.8843,  -5.0956,  -4.9176],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1766, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.9542,  -3.3178,  -8.2551,  -3.0728,  -3.1030,  -3.7650,  -6.2799,
         -6.8924,  -4.8739, -17.4788,  -2.7243,  -3.1036,  -5.3692,  -6.4794,
         -3.1026,  -4.0587,  -3.6873,  -3.8904,  -2.7324,  -5.6363],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1889, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2303, -5.6855, -3.9705, -3.8027, -3.6614, -1.5148, -4.3397, -6.2320,
        -3.8884, -7.2653, -3.8965, -2.4642, -6.1140, -6.5560, -3.7510, -4.2000,
        -3.3389, -2.1850, -2.5820, -6.7376], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4208, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4903, -3.8818, -5.5325, -4.0013, -6.0579, -3.1830, -5.8470, -5.9727,
        -3.6212, -5.0359, -3.4338, -3.6510, -5.7952, -7.2392, -3.5715, -5.2126,
        -4.8156, -3.6063, -3.6304, -5.1328], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7856, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9343, -1.3821, -5.1965, -5.8606, -3.7335, -3.3491, -2.9146, -2.2506,
        -6.2239, -6.2693, -3.5822, -3.3066, -3.0999, -2.1864, -2.8851, -7.2840,
        -3.9150, -4.1979, -3.2835, -3.7453], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8800, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.8798,  -4.2263, -21.4614,  -4.2221,  -8.4011,  -3.3902,  -7.5031,
         -3.9297, -11.2118,  -3.5356,  -4.8743,  -9.6984,  -7.7136,  -4.1045,
         -8.1957,  -8.2966, -20.0032,  -5.3910,  -5.6917,  -6.8066],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.6768, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.1578, -15.0493,  -1.9117,  -6.2029,  -8.4830,  -3.9408,  -6.8104,
         -4.6724, -48.3829,  -6.7426,  -6.4003,  -7.3019,  -5.1471,  -5.3721,
         -5.9890,  -4.4578,  -7.3110,  -2.4671,  -3.7667,  -2.6301],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.8598, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.9406,  -7.3027,  -4.0428,  -6.8809,  -5.5542, -13.0834,  -7.1512,
         -7.6486,  -7.2892,  -6.1367,  -8.1883,  -4.1570,  -8.9264,  -4.7405,
         -1.7985,  -6.4208,  -6.9856,  -5.3065,  -3.1514,  -8.1767],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3941, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.3701,  -4.6051,  -1.9169,  -7.6060,  -7.3898,  -5.3151,  -3.9161,
         -7.0248, -22.4504, -15.5107,  -5.3960,  -8.8297,  -5.6496,  -7.0298,
         -5.0792,  -8.6492,  -3.6210,  -5.2158,  -4.7919,  -3.0277],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.8697, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7425, -6.7972, -7.4284, -4.1963, -7.9042, -5.0203, -4.9477, -3.6120,
        -4.0548, -5.2389, -6.9781, -3.1741, -3.6433, -2.9772, -2.0237, -2.6248,
        -7.1357, -4.1429, -4.4335, -3.1704], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8623, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5581,  -4.9248,  -6.7990,  -4.2617,  -3.5275,  -3.4603,  -2.9604,
         -7.2249,  -6.8420,  -3.5069,  -4.5749,  -3.3112, -22.9190,  -5.3518,
         -8.8250,  -4.6063,  -8.5104,  -3.5502,  -5.1276,  -4.0448],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8443, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5641, -2.0123, -3.6127, -6.8689, -6.1025, -4.5369, -4.2966, -3.1558,
        -3.9618, -3.8853, -6.4137, -4.5708, -4.3859, -4.0596, -3.8595, -6.7961,
        -8.1233, -3.7160, -4.7633, -3.4111], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6548, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8922, -5.1003, -5.5265, -2.8412, -2.3258, -5.9173, -7.0490, -3.4237,
        -3.9146, -2.3215, -2.8908, -3.7128, -7.4704, -6.0614, -3.7430, -6.3174,
        -2.0732, -3.3940, -5.2536, -7.0641], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5147, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.2756,  -5.1376,  -3.5664,  -2.3573,  -4.5447,  -5.4350,  -5.9502,
         -4.4821,  -4.6498,  -2.3333,  -2.8820,  -6.7240,  -6.7803,  -4.1892,
         -4.9609,  -3.0720, -21.2408,  -7.5277,  -7.8937,  -7.0527],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8028, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.4350, -15.6999,  -7.5682,  -6.0036,  -7.5297,  -4.0261,  -8.8020,
         -4.1100,  -3.7658,  -3.7843,  -1.8107,  -2.9592,  -7.3189,  -3.9342,
         -4.3513,  -3.4395,  -1.8263,  -7.1436,  -7.3582,  -3.8614],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4364, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.1000,  -7.0313,  -4.3982,  -4.3500,  -4.3065,  -4.0246,  -6.3106,
         -6.8032,  -3.6064,  -6.8363,  -6.2801, -26.2464,  -4.8759,  -7.5697,
         -6.0763,  -7.7843,  -6.0794,  -8.8164,  -4.8334,  -5.9070],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.8118, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.1877, -11.1262,  -3.8529,  -2.0006,  -4.9269,  -6.8559,  -3.2999,
         -4.5063,  -2.9649,  -2.9545,  -3.1370,  -6.4920,  -5.9975,  -3.8025,
         -6.1719,  -4.3696,  -3.8209,  -2.8560,  -5.8722,  -5.7264],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7961, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.4330,  -2.4981,  -1.9941,  -4.8315,  -6.1147,  -4.1839,  -3.4891,
         -3.3532,  -1.6334,  -4.2275,  -7.0821,  -3.3364,  -3.6145,  -4.3633,
        -15.2491,  -6.6900,  -9.3251,  -4.2144,  -7.5417,  -3.5253],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1350, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.1545,  -3.8873,  -2.1109,  -4.2631,  -3.6993,  -6.5330,  -4.5853,
        -14.9454,  -2.9671,  -2.2818,  -2.7288,  -7.4444,  -4.6771,  -4.6515,
         -4.6762,  -2.5895,  -3.7963,  -6.8552,  -7.1754,  -5.2442],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0133, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3416, -4.6105, -5.1467, -4.4506, -3.5255, -3.0820, -5.7323, -6.3943,
        -3.8970, -5.7263, -2.6426, -2.6409, -4.7185, -6.8925, -4.2474, -3.0199,
        -3.6250, -2.5110, -5.4611, -6.7047], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6185, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0831, -6.5292, -6.3186, -3.7534, -3.9347, -3.3664, -1.5371, -3.9355,
        -6.1487, -6.5054, -4.4530, -3.9881, -2.3166, -2.4191, -5.3204, -6.2543,
        -3.8392, -2.7974, -3.4846, -1.9274], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0456, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7803, -5.9387, -5.7600, -4.4000, -9.1500, -1.8888, -2.8618, -4.3935,
        -6.7135, -4.0390, -4.2483, -4.8530, -3.0727, -3.7147, -6.3114, -5.9804,
        -4.3132, -3.8123, -3.1159, -3.5735], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5461, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2382, -3.4064, -3.0567, -2.5262, -5.0816, -6.5380, -4.0300, -2.4875,
        -3.6830, -2.5370, -6.5694, -7.0494, -4.2285, -5.1015, -2.1915, -1.9650,
        -4.6142, -6.6827, -3.6676, -5.0892], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2372, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.2749,  -3.2289,  -3.8540,  -6.3274,  -7.1819,  -5.0770, -12.3053,
         -3.5529,  -1.7688,  -3.0027,  -6.7910,  -4.1510, -20.2239,  -2.9056,
         -3.3929, -14.3934,  -6.4417,  -2.6153,  -5.8370,  -9.7782],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4052, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4955, -5.3110, -6.4380, -3.7530, -4.0455, -2.6094, -3.8352, -6.8722,
        -6.8281, -2.8356, -4.7102, -3.2545, -6.3331, -4.5595, -6.9504, -6.2696,
        -4.8494, -7.8683, -2.9709, -3.1111], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7450, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6005, -3.2301, -3.3703, -3.4831, -2.7662, -6.4813, -5.9578, -3.9900,
        -4.3006, -2.4469, -1.3505, -3.4283, -5.8201, -4.7307, -4.9693, -4.5214,
        -3.0820, -3.6956, -5.8404, -6.3564], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2211, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9285, -6.9584, -3.3482, -5.6272, -3.6857, -1.4247, -5.5905, -6.7584,
        -4.2158, -8.9250, -2.8271, -3.1697, -3.3185, -6.5908, -3.3749, -5.3354,
        -3.3351, -1.7730, -3.4063, -6.7254], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5159, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.0146,  -4.1661,  -3.7686,  -6.2299, -11.2263,  -6.6553,  -6.5688,
         -7.8392,  -4.8441,  -8.9718,  -4.2444,  -3.5214,  -3.7694,  -2.4707,
         -6.0294,  -6.8397,  -3.6669,  -4.5139,  -2.6855,  -2.9235],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3975, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0678, -1.4718, -5.2361, -6.3556, -3.6172, -4.3351, -4.9838, -2.9081,
        -3.2223, -6.2616, -6.7537, -4.6759, -6.8047, -3.4263, -3.0517, -9.4090,
        -6.7686, -3.8064, -5.5468, -9.1238], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0413, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.6526,  -9.0234,  -2.6223,  -1.6229,  -3.7464,  -6.3566,  -4.8221,
         -3.3417,  -9.5891,  -1.9247,  -8.2489,  -7.0684,  -4.1670,  -9.0428,
         -5.6626, -32.7715,  -5.2952,  -4.1286,  -6.3115,  -7.4626],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.8431, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.7383, -11.1289,  -6.3038,  -8.4524,  -6.2109,  -3.5054,  -7.6006,
         -4.2676,  -5.0369,  -3.3907,  -1.7582,  -4.5100,  -6.8731,  -4.7725,
         -3.7302,  -2.8769,  -2.4858,  -3.8155,  -6.5163,  -3.4099],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0692, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4316, -8.2248, -5.3240, -6.3773, -7.4480, -5.0377, -7.0989, -7.1042,
        -3.6675, -7.7573, -5.8737, -6.7266, -7.2660, -5.9517, -7.5046, -6.6183,
        -8.3981, -5.7883, -5.4468, -4.7667], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2906, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5842, -4.6762, -5.4736, -2.9109, -3.8095, -2.7905, -7.2201, -3.7321,
        -4.1397, -3.0663, -1.9269, -4.9877, -6.8617, -4.3359, -8.9900, -2.9255,
        -2.8988, -5.0749, -7.0571, -3.2373], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6350, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.3951,  -3.8615,  -3.0577,  -3.4564,  -6.3120,  -4.0604,  -4.5491,
         -4.1832,  -9.1686,  -7.0466,  -7.5005,  -6.7862,  -5.0884,  -7.3644,
         -5.8308, -11.2222,  -3.5682,  -3.7533,  -3.2283,  -5.6136],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5523, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2559, -6.5070, -3.6364, -2.6559, -3.2053, -2.2408, -4.4714, -7.4216,
        -3.3951, -5.9986, -3.5938, -4.7687, -1.4423, -6.6786, -5.7515, -4.1124,
        -3.1271, -3.4093, -3.9768, -4.8455], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3747, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1417,  -3.5978,  -7.5768,  -3.6888,  -3.3933,  -4.4766, -15.6083,
         -7.2893,  -8.0335,  -6.3704,  -7.6021,  -3.0054, -11.3187,  -4.0687,
         -5.7570,  -5.9523,  -1.6160,  -2.2295,  -5.9240,  -6.1507],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8400, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6466, -6.8522, -6.2633, -4.1947, -5.6177, -3.0062, -1.7225, -3.5033,
        -6.3279, -3.7969, -6.0641, -2.5044, -1.1847, -4.9546, -6.6034, -4.1838,
        -5.0465, -2.6689, -2.1965, -2.0825], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1210, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.9513,  -4.6502, -33.2246,  -7.0765,  -7.2877,  -6.6631,  -6.4110,
         -5.6631,  -6.5289,  -3.6852,  -5.6262,  -2.4968,  -2.7157,  -5.9043,
         -7.0673,  -3.7083,  -4.5970,  -2.5777,  -4.6024,  -2.3492],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5893, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.5181,  -5.5891,  -6.6859,  -4.6618,  -5.2898,  -3.9703,  -3.4355,
         -3.2066,  -6.6595,  -4.0817, -10.5371,  -2.9324,  -3.9449, -12.1734,
         -6.2086,  -2.7646,  -5.2598, -17.6265, -14.8862,  -6.0855],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4759, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1120, -7.8581, -4.1426, -8.2054, -3.9160, -3.9842, -3.5013, -4.1126,
        -5.8654, -6.5482, -3.2475, -3.9846, -3.6852, -3.3617, -6.4042, -6.8395,
        -3.9744, -7.0134, -3.9223, -5.4855], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1082, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.5865,  -6.6669,  -5.7845, -27.7989,  -5.6242,  -7.1717,  -7.0099,
         -7.1238,  -4.4420,  -6.5361,  -3.8919,  -3.8942,  -5.1357,  -2.7693,
         -3.7775,  -5.3617,  -6.0710,  -3.9614,  -3.9073,  -2.1890],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1352, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4278, -7.2406, -4.5155, -4.3015, -3.5506, -3.1019, -6.0041, -6.5344,
        -3.8553, -3.9512, -2.3529, -6.1355, -3.9137, -6.1624, -6.4401, -4.3647,
        -2.7281, -2.1698, -2.5412, -6.0230], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5657, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.3935,  -4.8932,  -4.7239,  -6.2410, -18.2123, -14.3099,  -8.9547,
         -5.5372,  -7.6233,  -4.0843,  -6.6764,  -2.8987,  -2.6285,  -2.9795,
         -8.0507,  -4.6222,  -3.9616,  -5.9333,  -3.8150,  -5.4915],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4515, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.1188, -4.8683, -6.6914, -4.8595, -3.0739, -5.6952, -7.4304, -4.0902,
        -5.7524, -3.2891, -2.2613, -4.2249, -7.5312, -3.8770, -9.7980, -4.9482,
        -4.3026, -5.6829, -5.0399, -7.4207], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.4478, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.6545,  -2.5981,  -2.5648, -14.4245,  -4.2536,  -5.0834,  -3.8208,
        -13.1426, -12.8635,  -5.5841,  -6.9694,  -7.3060,  -6.0412,  -8.2101,
         -4.4408,  -6.7540,  -3.3716,  -2.8091,  -3.4134,  -5.0476],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2177, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.1737,  -7.9193,  -1.9070,  -1.9959,  -3.6941,  -6.3254,  -3.2802,
         -3.9322,  -2.1145, -15.5755,  -8.0149,  -8.3551,  -7.4739,  -4.9642,
         -7.3391,  -3.7548,  -3.5024,  -2.8593,  -2.3632,  -2.5094],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1027, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9014, -4.4347, -3.8161, -3.3549, -3.5086, -4.2815, -7.2255, -3.4859,
        -5.6754, -2.6727, -1.7840, -3.8532, -6.6732, -3.3960, -3.1583, -4.4607,
        -1.8143, -5.9185, -6.3783, -3.5015], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2647, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0842, -4.0184, -3.9242, -3.1677, -4.4669, -6.1343, -6.6983, -4.1069,
        -4.5933, -4.2193, -4.0935, -3.1253, -7.1953, -4.5499, -7.8161, -3.0264,
        -3.5741, -6.3208, -7.0793, -4.0521], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9623, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.8334,  -5.4356,  -6.9895,  -4.8854,  -3.9367,  -3.5394,  -3.7069,
         -6.8918,  -6.7439,  -3.5438,  -3.3273,  -3.1639, -19.1433,  -4.9443,
         -6.9275, -13.7026,  -6.1376,  -7.0681,  -3.8371,  -5.4373],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1598, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.5929,  -6.8852,  -3.6613,  -4.5684,  -3.2667,  -2.7590,  -3.4847,
         -6.2444,  -6.3452,  -5.1261,  -7.0700,  -3.4895,  -5.8200, -11.7503,
         -5.5700, -11.9155,  -6.6929, -35.8834,  -5.8422,  -6.1319],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.3550, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.8759,  -3.9163,  -6.0943,  -3.8601,  -3.1177,  -3.3620,  -2.2512,
         -6.7527,  -7.0411,  -3.4001,  -5.5130,  -2.8698,  -2.2312,  -5.1416,
         -7.1771,  -3.5854,  -5.7893,  -2.7221, -32.9283,  -5.0794],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7354, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.9798,  -4.4332,  -5.3905,  -7.0791,  -3.9168,  -6.9000,  -4.3743,
        -32.3880,  -5.5336,  -7.4212,  -7.3309,  -7.2797,  -4.3270,  -8.7289,
         -3.7882,  -6.8083,  -3.4254,  -2.1327,  -3.5303,  -5.4357],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.7102, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.1431,  -7.7976,  -5.4848,  -5.7346,  -4.3975,  -3.1424,  -6.3936,
         -7.4688,  -2.9705,  -8.1147,  -3.6753, -33.6305,  -6.9639,  -7.5048,
         -7.5936,  -1.8822,  -8.9177,  -4.0997,  -8.7915,  -4.6314],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.2169, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1878, -2.0386, -4.6171, -6.8489, -3.7919, -2.8334, -2.2647, -4.5022,
        -2.7301, -4.4435, -4.4611, -4.2559, -6.2464, -2.0144, -2.5597, -4.8992,
        -6.3422, -5.9826, -4.1507, -6.9466], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2559, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9986, -3.5959, -3.6010, -3.4303, -2.7391, -6.4491, -5.8747, -4.0245,
        -4.1572, -3.2636, -1.7095, -4.1459, -6.4631, -3.3264, -5.3262, -1.8555,
        -3.2094, -4.0450, -6.1956, -5.8447], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1627, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.1407, -18.9162,  -4.7010,  -6.9616,  -5.1336,  -7.6355,  -6.5713,
         -7.9071,  -4.6556,  -3.9359,  -3.7132,  -2.2141,  -4.7256,  -5.3166,
         -5.9244,  -4.4390,  -4.1973,  -2.7387,  -1.8227,  -4.3852],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5518, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1495, -6.5716, -7.0379, -4.9938, -3.3835, -2.9898, -3.0639, -4.4796,
        -6.3303, -3.2045, -4.7031, -3.5059, -1.9091, -4.5921, -7.0324, -2.9827,
        -2.9824, -3.3717, -1.2420, -3.7398], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1133, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3140, -6.4061, -6.8935, -3.3784, -5.5087, -4.1513, -8.9137, -5.1580,
        -6.9014, -4.9237, -5.3085, -9.8676, -3.8500, -3.8787, -7.1151, -3.2838,
        -5.2002, -3.0257, -3.3789, -2.2061], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1332, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2812, -2.2512, -2.9040, -6.0159, -4.0779, -4.1542, -3.8962, -4.9878,
        -1.9646, -5.7866, -6.5337, -4.7487, -4.9476, -5.2320, -4.2755, -4.4028,
        -6.6862, -4.4608, -2.4617, -2.7950], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2932, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.9327,  -6.4469,  -4.9119,  -9.1350,  -3.6941,  -1.7267,  -7.2742,
         -6.8791,  -4.1681,  -4.1742,  -5.4857, -13.1995,  -7.3919,  -7.2326,
         -7.6154,  -4.0687,  -8.0468,  -4.0590,  -4.2732,  -3.5873],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9652, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7019, -6.7126, -9.3216, -4.8486, -6.3516, -4.4155, -4.2079, -2.9091,
        -2.4061, -3.4436, -6.9058, -4.0334, -4.5253, -3.9101, -1.6146, -5.3052,
        -6.5890, -3.5639, -2.8847, -3.1191], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6885, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8192, -3.8123, -5.5034, -2.7000, -8.0690, -2.3875, -7.3945, -6.5012,
        -3.9964, -6.2767, -4.0710, -3.4453, -2.7983, -5.2704, -5.8269, -3.6377,
        -5.1236, -4.3771, -6.2931, -3.9858], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8645, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0000, -2.3080, -4.7439, -6.9203, -4.2360, -4.3724, -2.7944, -6.4849,
        -5.6781, -7.1985, -4.4024, -2.9336, -3.7532, -1.8733, -6.3504, -7.2197,
        -3.2407, -6.2087, -3.0960, -1.2696], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4042, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1206, -5.5660, -7.5863, -3.7907, -4.5839, -4.3648, -5.0416, -9.1062,
        -6.8563, -4.1396, -4.1780, -2.9107, -3.8791, -6.2128, -7.4327, -4.4640,
        -5.8591, -3.8405, -3.1756, -5.7376], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2423, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6864, -5.1307, -2.3272, -8.7532, -2.1538, -6.8590, -5.8745, -4.6899,
        -6.3162, -3.8496, -2.3221, -4.1356, -6.6956, -5.8623, -3.9789, -5.0748,
        -2.7598, -2.1698, -6.5051, -6.8354], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7990, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.6073,  -4.0304,  -3.0068,  -2.1364,  -5.2431,  -6.4885,  -3.6237,
         -4.4034,  -3.0216,  -1.2247,  -3.2762,  -6.0925,  -4.5687,  -4.5389,
         -5.4130, -22.9059,  -6.0554,  -8.9545,  -3.4198,  -6.3029],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4157, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8514, -2.8929, -2.2612, -7.0489, -6.6708, -3.2618, -5.9076, -5.1524,
        -3.8667, -3.3208, -5.9264, -3.2641, -3.4977, -2.6544, -4.8243, -2.4729,
        -6.5499, -3.2454, -4.7515, -3.5515], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2986, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2555, -26.2221,  -6.8543,  -8.2392,  -7.7934,  -6.3007,  -9.0275,
         -4.7237,  -3.6735,  -5.3409,  -2.8675,  -3.8238,  -7.4924,  -3.2957,
         -4.2080,  -4.6253,  -8.0147,  -6.9149,  -6.4995,  -8.1946],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.8684, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6094, -6.6293, -4.8020, -4.3552, -2.9454, -3.3758, -5.8161, -6.7227,
        -3.8098, -2.8997, -3.2489, -4.2009, -1.8677, -5.6063, -5.6016, -3.8185,
        -6.0754, -4.2925, -4.7762, -2.2499], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4852, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7935, -5.7022, -4.1375, -4.3166, -3.5596, -3.2428, -4.4761, -7.1237,
        -3.5561, -4.1246, -2.7898, -3.6111, -3.0888, -6.4092, -6.2824, -4.4179,
        -3.0659, -2.3729, -2.5718, -2.9850], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1814, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.5091,  -4.2892,  -3.8285, -19.1355,  -8.0313,  -6.5889,  -7.2854,
         -3.8396,  -8.6938,  -3.7469,  -9.1570,  -4.0204,  -3.0856,  -7.8762,
         -6.2738,  -3.4090,  -3.7456,  -8.0561,  -6.3178,  -9.5895],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5240, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.7567,  -3.7218, -23.0625,  -7.9166,  -7.2214,  -6.7665,  -5.2160,
         -7.7278,  -4.3791,  -4.4620,  -3.5204,  -4.4688,  -7.4449,  -6.8223,
         -3.8510,  -5.8145,  -4.6188, -28.9634,  -5.5908,  -8.2110],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.8768, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.5473,  -5.0625,  -3.2495, -15.7346,  -7.4139,  -3.1321,  -4.3936,
         -5.1585,  -3.0365,  -4.3567,  -7.2514,  -4.7515, -12.9028,  -2.5557,
         -2.6997,  -3.9590,  -6.9817,  -3.4872,  -3.9712,  -2.9855],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5815, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5850, -3.3009, -7.2916, -7.7939, -3.6502, -3.3785, -3.1575, -3.5496,
        -3.4903, -7.1778, -3.6434, -4.9690, -3.7649, -1.8181, -3.7798, -5.1691,
        -6.0425, -3.0809, -5.6358, -2.9701], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4125, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.7288,  -5.1968,  -3.2734,  -2.0510,  -6.7154,  -7.3629,  -3.1884,
        -10.3576,  -4.3532,  -9.9286,  -5.3398,  -5.4368,  -6.3486,  -9.9638,
         -3.3318, -12.8480,  -4.7646,  -5.1597,  -3.9821,  -5.2859],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9309, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4493, -3.8837, -6.4616, -3.5870, -6.8721, -2.7339, -5.7330, -6.4549,
        -4.7763, -5.5281, -3.6894, -1.9938, -3.4681, -6.8537, -4.7264, -2.8372,
        -3.3920, -2.5102, -3.0641, -6.8776], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5946, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.6721,  -7.4142,  -7.2979,  -4.9070,  -7.0576,  -4.1841,  -8.0577,
         -2.9509,  -2.0659,  -5.0098,  -7.3002,  -3.5365,  -3.8379,  -4.3580,
         -3.0389,  -4.5533,  -5.5226,  -6.7934,  -4.6124, -10.9678],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4069, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.8279,  -4.0761,  -4.5528,  -2.2992,  -3.0361,  -7.3885,  -3.5341,
         -4.6680,  -4.7379, -10.5435,  -7.2854,  -6.2373,  -7.8095,  -6.3755,
         -7.2214,  -4.5382,  -6.4065,  -3.9249,  -3.7115,  -3.1373],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2656, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.5288,  -6.8930,  -7.0050,  -4.0480,  -7.6840,  -3.9314,  -5.6260,
         -3.1314,  -3.3152,  -2.3921,  -7.8109,  -3.5305,  -5.3185,  -4.2958,
        -14.8405,  -7.2475,  -6.4864,  -7.6276,  -2.5132,  -8.2470],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9736, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5642,  -8.2609,  -6.9538,  -7.6569,  -6.2837,  -6.8972,  -5.0618,
         -7.6047,  -4.1221,  -3.7704,  -3.5823,  -4.1446,  -6.8815,  -6.7359,
         -4.8915,  -3.6901,  -4.8378, -16.6931,  -6.7373,  -7.1454],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2258, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6974, -4.1721, -7.2518, -4.3947, -2.6375, -5.7762, -6.7906, -8.1479,
        -5.8318, -6.3934, -7.1010, -5.0690, -7.8779, -5.6024, -3.8038, -6.7199,
        -1.7987, -6.0718, -7.0390, -3.1620], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5669, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0747, -1.5230, -4.9929, -6.3942, -4.2673, -4.4882, -1.7990, -2.8598,
        -5.0082, -6.0627, -3.4025, -4.5926, -3.2767, -2.6715, -3.8279, -6.7952,
        -3.7537, -2.9100, -2.9044, -2.2446], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7925, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3245, -3.8151, -2.2361, -6.5510, -5.3567, -3.5720, -8.2014, -2.9954,
        -3.2788, -2.3519, -5.3504, -5.2761, -3.0081, -3.9627, -3.2936, -3.5640,
        -2.4309, -6.0581, -5.6480, -4.0193], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1647, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.2190,  -2.8577,  -5.6623,  -7.1980,  -3.5161,  -4.7438,  -3.0212,
        -31.3338,  -5.6240,  -7.6685,  -6.5523,  -7.4372,  -3.3364,  -8.5852,
         -2.9724,  -4.9169,  -3.6571,  -2.0057,  -3.7357,  -6.6004],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2822, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4779, -3.5726, -3.6736, -3.1507, -7.7913, -3.3312, -4.9505, -3.3470,
        -3.5557, -4.2867, -5.1567, -6.7219, -3.7544, -9.5337, -2.8576, -3.6478,
        -8.5587, -6.4584, -3.5130, -4.4327], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7886, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3605, -6.1732, -3.3394, -4.5902, -6.4178, -9.8142, -3.0183, -6.8841,
        -5.8822, -4.8057, -4.0747, -1.7777, -1.5987, -3.1485, -6.6605, -5.5083,
        -3.6467, -3.9762, -3.7033, -2.0815], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6731, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2478, -6.7436, -2.3849, -3.1180, -3.8627, -6.8657, -2.9538, -5.9467,
        -1.9054, -5.7727, -3.4198, -6.5431, -6.0747, -3.9786, -5.0991, -2.5525,
        -2.3781, -2.7496, -6.3194, -5.3541], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4135, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1198,  -6.0588,  -6.0281,  -3.3127,  -3.1066,  -2.9911,  -2.9368,
         -6.4865,  -6.8452,  -4.9864,  -7.7855,  -4.7360, -10.5825,  -4.4837,
         -6.1942,  -5.4722,  -6.6396,  -5.1752,  -6.3839,  -2.5844],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2955, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1289, -4.5881, -2.6678, -1.7857, -2.7882, -6.2219, -3.2289, -4.2430,
        -3.3955, -2.5908, -3.7189, -5.9852, -5.4698, -2.9920, -4.0791, -2.9008,
        -5.1478, -3.4111, -6.4927, -6.3077], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0572, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0113,  -6.9718,  -3.8430,  -2.5428,  -6.1287,  -9.7304,  -3.3125,
         -5.1511,  -7.3416,  -4.2674, -22.9213,  -2.5176,  -2.9180,  -5.3275,
         -6.1347,  -3.5929,  -4.3676,  -3.1678,  -3.8909,  -3.0077],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5573, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.5785,  -5.7351,  -3.4979,  -2.2364,  -2.8179,  -1.5905,  -3.0683,
         -6.7130,  -4.4884, -16.7906,  -2.8927,  -2.7813,  -5.9606,  -6.9258,
         -3.0134,  -3.5681,  -3.3694, -25.2173,  -4.9637,  -7.9864],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0098, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.2720,  -5.4640,  -4.8766,  -4.3262,  -5.1076,  -6.2842,  -3.6401,
         -5.6005,  -2.3768,  -3.9807,  -5.3473,  -7.0773,  -9.3760, -32.2148,
         -8.7961,  -8.8886,  -8.4288,  -8.9253,  -9.3138,  -8.7724],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.8035, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2670,  -7.0932,  -5.4303,  -4.8473,  -4.6754,  -2.6155,  -3.4724,
         -1.3069,  -6.6898,  -5.4330,  -3.4690,  -4.5246,  -2.7925,  -2.8403,
         -4.2188,  -6.7937,  -3.6684,  -4.5029,  -4.5492, -25.5940],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3892, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.1183, -5.4517, -3.1564, -3.8724, -3.3490, -2.5750, -5.0619, -5.8628,
        -5.8227, -3.3619, -3.6494, -2.4992, -1.9456, -3.1090, -7.1525, -3.6511,
        -5.4774, -2.6176, -3.3278, -3.5455], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1804, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4150, -3.7137, -3.1076, -4.2991, -4.6720, -5.9017, -5.6842, -3.0176,
        -3.6526, -3.0248, -4.2476, -4.9562, -7.2711, -3.1624, -5.9296, -4.3565,
        -2.6444, -3.4791, -6.5135, -5.5795], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4314, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0049, -2.9894, -1.7011, -6.0854, -6.6254, -2.8731, -3.4367, -3.1672,
        -5.0167, -4.5406, -6.5757, -6.2099, -4.3520, -4.4685, -2.3238, -1.8986,
        -3.2191, -7.7757, -3.4334, -8.5583], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4128, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.7077,  -6.9880,  -2.6948,  -6.4984, -11.1929, -13.7170,  -7.1298,
         -6.6241,  -6.5749,  -4.4862,  -7.5833,  -3.3412,  -6.9150,  -3.3188,
         -3.5095,  -6.2362,  -7.0839,  -4.1613,  -8.1855,  -4.8986],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5924, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2486, -9.0505, -2.3877, -1.8247, -5.6341, -6.6215, -4.0954, -5.3279,
        -2.7023, -2.1917, -4.0080, -5.7354, -5.6373, -3.1381, -3.6799, -2.3022,
        -3.7201, -5.1568, -6.0142, -3.7620], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3619, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.2365,  -3.4010,  -6.6892,  -5.5791,  -3.5108,  -7.3014,  -4.3214,
         -1.5352,  -6.9132,  -6.7519,  -3.6610,  -6.5937,  -6.1059, -13.4969,
         -8.9546,  -8.4161,  -5.4173,  -6.9707,  -2.9021,  -5.9980],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8378, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4063, -7.2369, -3.2679, -5.7069, -2.2424, -4.2134, -4.0609, -7.3662,
        -3.8016, -3.7423, -2.6627, -3.6636, -5.5746, -6.0039, -2.9178, -4.7818,
        -3.4389, -2.2188, -3.1006, -6.1086], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3258, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3999,  -4.0313,  -7.2431,  -1.7158,  -2.2392,  -5.6536,  -6.3646,
         -3.4373,  -3.6144,  -4.5762,  -3.0591,  -4.7178,  -7.0431,  -6.8403,
         -4.4499, -15.6820,  -4.3319,  -3.0945,  -5.9676,  -6.6067],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2034, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.2930,  -8.2530,  -6.5532,  -7.8641,  -5.9158,  -4.1500,  -9.3334,
         -6.2902,  -5.8571,  -7.3412,  -4.2181,  -5.3542,  -6.7252,  -5.7734,
         -2.4565,  -5.0702,  -3.1028, -10.3897,  -7.2330,  -7.5951],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2885, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.9849,  -9.3967,  -4.9319,  -7.4992,  -2.8267,  -5.9613,  -4.9898,
         -4.3347,  -7.2372,  -7.6339,  -4.1773,  -4.3810,  -2.9934, -17.3318,
         -6.8759,  -6.7648,  -6.6771,  -3.5221,  -7.2798,  -3.4334],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2116, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6790, -3.5972, -2.5050, -3.4068, -3.8134, -4.7713, -6.0993, -5.5586,
        -2.9609, -3.9050, -2.6883, -1.8730, -3.6716, -6.4467, -5.0146, -3.7506,
        -3.7090, -2.0724, -2.2301, -4.6767], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9215, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7302, -8.4226, -6.7699, -8.5360, -5.2981, -3.1698, -5.1297, -2.3243,
        -4.2700, -6.9268, -3.0438, -4.0323, -3.8570, -3.5203, -5.0339, -7.3362,
        -4.7342, -4.4065, -2.9104, -1.7395], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8096, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4554,  -3.4328,  -6.8305,  -6.6486,  -3.8174,  -3.8635,  -5.2971,
         -4.3275,  -5.3222,  -5.9860,  -6.9432,  -4.4911, -10.9518,  -3.0065,
         -5.0017,  -1.7189,  -6.1857,  -3.4952,  -6.0046,  -2.9477],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9364, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.0215,  -4.3989,  -2.1442,  -4.1323,  -6.3294,  -5.6360,  -4.9514,
        -10.4474,  -3.1318,  -5.1553,  -6.5424,  -6.7105,  -4.4706,  -3.5155,
         -2.8588,  -3.2135,  -5.5796,  -6.8775,  -2.8163,  -6.6734],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2803, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6081, -2.0168, -2.7421, -5.8981, -5.6343, -3.7606, -4.0927, -1.8598,
        -2.8563, -4.5234, -6.5276, -4.6016, -4.6353, -8.8823, -4.7652, -2.9687,
        -5.9678, -6.0152, -3.4139, -6.7793], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5775, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.6310,  -3.4507, -23.6897,  -6.3149,  -7.8522,  -4.7019,  -8.5105,
         -2.9045,  -5.5462,  -3.7941,  -5.3949,  -2.4416,  -5.4545,  -6.1960,
         -2.8825,  -4.3489,  -3.3746,  -1.6024,  -4.6926,  -7.0279],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7906, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2676, -4.8519, -6.1881, -2.6240, -2.5746, -3.5565, -6.3082, -6.0283,
        -4.3680, -5.7715, -2.6152, -1.8665, -5.1674, -6.9903, -2.8214, -2.7475,
        -3.4538, -1.4625, -4.1011, -6.4807], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2123, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6928, -3.0325, -4.1592, -7.1081, -3.7298, -4.5334, -4.2668, -7.9198,
        -8.4506, -5.7292, -8.7327, -2.4224, -9.9427, -4.1151, -7.4312, -5.4533,
        -1.2624, -1.3812, -5.5875, -5.8872], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2419, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.6199,  -4.0867,  -6.5419,  -7.2326,  -3.5137,  -3.0600,  -3.7417,
        -18.5169,  -6.9734, -10.1323,  -7.2143,  -4.2006,  -7.3048,  -3.2870,
         -7.2899,  -3.9552,  -4.2965,  -5.8092,  -7.1344,  -3.7498],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1830, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-13.4852,  -3.6473,  -7.0406,  -4.3876,  -2.0085,  -2.8780,  -6.0688,
         -6.3778,  -3.1017,  -4.3207,  -2.5149,  -1.3703,  -3.5777,  -7.0982,
         -3.1111,  -5.5991,  -3.1473,  -3.8093,  -1.8085,  -6.2136],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5783, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.5791,  -4.4995,  -2.9815,  -3.2312,  -2.4645,  -7.2597,  -3.3631,
         -6.5917,  -3.1655,  -4.6997,  -6.6648,  -7.2016,  -2.7035,  -4.0544,
         -4.7748, -24.8352,  -3.9496,  -7.6805,  -3.7192,  -7.4282],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7424, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7480,  -3.1171,  -2.3208,  -5.1414,  -2.6371,  -6.1885,  -6.1429,
         -3.7235, -14.7207,  -3.1947,  -2.1251,  -3.8268,  -7.8309,  -3.0288,
         -4.5987,  -3.4310,  -3.0818,  -3.1487,  -7.8037,  -3.2374],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6024, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.0562,  -6.0627,  -3.6690,  -2.4445,  -4.6013,  -4.6463,  -6.8924,
         -7.0834,  -4.3153, -21.3279,  -2.4787,  -3.8495,  -6.5085,  -6.0729,
         -3.0908,  -6.9648,  -3.3434,  -3.6090,  -4.8429,  -6.0830],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7471, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.4632,  -6.3428,  -3.2803, -10.0231,  -2.1462,  -4.5894,  -4.7765,
         -6.7731,  -6.9466,  -4.3976, -10.3406,  -3.5705,  -2.3520,  -4.6457,
         -6.7403,  -3.7349,  -2.9454,  -2.3127,  -2.4108,  -3.0831],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7937, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.6748,  -3.8068, -30.4678,  -3.3215,  -7.3735,  -3.5713,  -7.3032,
         -3.4455,  -6.2170,  -3.5911,  -3.1757,  -9.3300,  -7.4131,  -4.5188,
         -5.2856,  -3.5374,  -2.1459,  -3.0895,  -6.8031,  -3.8272],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2449, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.4695,  -7.2268,  -6.0683,  -6.5871,  -2.2856,  -8.6843,  -3.5026,
         -3.3747,  -4.9520,  -2.6516,  -5.8286,  -7.2932,  -3.9833, -11.7475,
         -4.5685,  -1.1218,  -5.5208,  -5.6672,  -2.7132,  -4.1546],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2201, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.4681,  -2.9353,  -3.3258,  -2.8886,  -5.7880,  -6.0363,  -2.8654,
         -9.5119,  -4.2797, -10.3785,  -6.2939,  -7.0009,  -4.7486,  -8.9228,
         -2.4705,  -1.6820,  -5.9562,  -6.5560,  -3.5830,  -5.2876],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1990, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.4597,  -3.4378,  -5.2755,  -3.1506,  -4.3514,  -7.6373,  -6.5250,
         -3.0718,  -4.3704,  -5.8337, -15.6704,  -5.1859,  -6.6147,  -6.4686,
         -7.3799,  -4.7813,  -9.7707,  -3.0449,  -4.0000,  -6.1331],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0581, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0641,  -7.0153,  -4.1197,  -5.2757,  -3.4116, -28.1357,  -3.4783,
         -6.8386,  -4.8984,  -7.3541,  -3.1813,  -6.2323,  -3.3107,  -3.2852,
         -4.1304,  -7.5755,  -3.1221,  -6.7919,  -4.2802, -13.6313],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5066, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1242, -3.4189, -6.5971, -2.9495, -4.5896, -3.0025, -2.5363, -2.3455,
        -5.9022, -5.9563, -3.6147, -3.1737, -3.1202, -2.4568, -4.2393, -6.5594,
        -4.0645, -2.1869, -3.6698, -1.7968], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7652, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5480, -6.0495, -3.6200, -2.7700, -2.2707, -2.7983, -2.0613, -6.9372,
        -5.2721, -3.8620, -3.0295, -2.7737, -6.5348, -2.6535, -5.8657, -5.3603,
        -3.2818, -5.3988, -2.0364, -2.2808], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0702, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.4711, -9.8384, -6.5443, -3.5270, -5.1404, -6.9326, -4.8264, -6.7084,
        -6.8633, -4.9867, -7.1697, -4.2460, -5.2606, -5.9987, -5.9264, -5.6317,
        -6.6796, -6.8985, -6.9536, -8.3250], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.3464, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5922, -3.7932, -4.7309, -6.7788, -4.3972, -9.7538, -2.2826, -2.2936,
        -3.1168, -5.9551, -3.9590, -2.6509, -2.9772, -2.1490, -2.5271, -5.3476,
        -5.2299, -4.9236, -4.4517, -2.3596], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1135, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0471,  -2.5702,  -3.0742,  -5.6752,  -5.5586,  -3.1108,  -4.3581,
         -4.2946, -11.1848,  -7.9518,  -6.5455,  -7.1379,  -5.0054,  -8.5808,
         -5.1838,  -3.4378,  -6.0733,  -4.1355,  -5.1077,  -6.7363],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4885, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.3393,  -5.9259,  -3.3620, -12.3054,  -1.9437,  -3.2832,  -4.1202,
         -6.7278,  -3.0559,  -5.0609,  -3.7371, -20.1123,  -2.6156,  -6.5721,
         -4.3521,  -6.8860,  -3.5561,  -3.8245,  -5.1434,  -3.3956],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6160, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.7169,  -7.0890,  -8.1904,  -4.1999,  -5.5780,  -4.5027,  -6.9275,
         -0.6834,  -4.7343,  -7.0351,  -5.2078,  -4.4179,  -5.0593, -21.0179,
         -2.8567,  -8.3601,  -5.7332,  -6.8761,  -3.4820,  -4.7562],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2212, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.1063,  -7.5239,  -3.1218,  -3.7622,  -2.4889,  -4.1085,  -1.8529,
         -6.6534,  -3.9650,  -3.6178,  -4.3110, -15.6959,  -6.6272,  -7.1687,
         -6.3052,  -8.0020,  -2.4876,  -9.6214,  -3.4030,  -6.7399],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5781, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0520, -3.1443, -2.5159, -6.6208, -7.2687, -3.5341, -8.3033, -2.0121,
        -1.4866, -3.5823, -6.5742, -5.5796, -3.3733, -4.0675, -2.4207, -3.5118,
        -3.1508, -7.5745, -3.1762, -9.4047], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6677, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.0776,  -1.3719,  -3.8040,  -6.9053,  -6.0266,  -3.8170, -10.4931,
         -3.9337,  -1.3805,  -3.6307,  -4.9581,  -4.9902,  -3.4223,  -3.8351,
         -2.7081,  -3.2910,  -3.8516,  -6.8157,  -4.7778,  -3.4673],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3279, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5301, -3.5778, -2.2267, -2.3059, -3.5143, -6.1916, -5.5881, -3.5737,
        -5.2842, -3.6592, -3.0033, -7.3100, -6.9866, -3.9959, -3.4928, -4.5816,
        -3.5650, -2.7913, -6.2388, -5.6450], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3531, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8286, -3.1104, -3.3702, -4.1185, -1.7663, -6.2855, -6.1327, -3.1019,
        -6.8029, -2.0189, -1.8452, -4.6040, -6.7362, -3.3756, -4.0737, -3.1629,
        -4.1091, -8.8542, -6.4647, -2.4272], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2594, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.2297,  -3.6714,  -2.9928,  -4.5998, -19.9679,  -6.6789, -11.0345,
         -3.9799,  -4.7561,  -6.5122,  -3.7389,  -5.9720,  -2.4181,  -1.8926,
         -3.3111,  -6.6552,  -3.6506,  -4.1372,  -3.2300,  -3.2928],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4861, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9488, -4.2843, -3.8965, -1.8493, -4.6862, -6.6796, -3.1906, -2.3833,
        -3.9088, -3.0117, -4.1651, -5.6876, -5.6134, -3.0694, -6.3151, -3.2477,
        -2.4983, -4.8299, -6.6986, -2.8728], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1419, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4260, -3.9549, -3.6133, -3.6347, -3.9176, -6.3212, -3.7122, -7.4378,
        -3.7591, -2.5265, -3.0827, -6.5675, -5.9031, -3.4633, -3.9844, -2.3504,
        -2.4678, -5.2379, -6.0795, -2.9728], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2206, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.1736,  -2.3600,  -7.7677,  -5.3801, -47.5067,  -7.3815,  -9.4850,
         -5.9677,  -7.4680,  -1.4586, -10.3711,  -3.6995,  -6.0592,  -4.6687,
         -2.7026,  -3.2442,  -6.0196,  -6.5354,  -3.9036,  -5.2475],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.7200, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.4750, -11.9066,  -4.9567,  -2.9905,  -3.6263,  -7.1509,  -6.9404,
         -4.4541,  -8.5085,  -2.8698,  -1.9389,  -5.1016,  -5.8373,  -3.3195,
         -4.6538,  -3.3370,  -2.7577,  -3.9883,  -6.7677,  -3.1976],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8889, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.1236,  -6.7878,  -2.1379,  -6.0487,  -4.2592,  -9.1920,  -5.2561,
         -1.9240,  -8.3647,  -6.8316,  -3.9495,  -3.6336,  -5.6174, -10.7375,
         -6.6252,  -6.6152,  -7.8927,  -4.8244,  -8.7549,  -3.6641],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0120, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8402, -5.5051, -7.3427, -3.1397, -5.4778, -3.7913, -2.4826, -3.6376,
        -6.4713, -4.6993, -5.5915, -5.4045, -5.3434, -3.6406, -6.7795, -5.4019,
        -3.5586, -3.4042, -3.8067, -2.5361], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6927, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.0856,  -5.6825,  -3.8475,  -3.3785,  -3.1055,  -2.9593,  -2.6949,
         -6.5834,  -5.3131,  -4.3686,  -7.1074,  -2.3734,  -3.4045,  -2.3231,
         -5.7242,  -5.5807,  -3.4876,  -3.8568,  -2.1226, -17.3551],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8677, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9539, -2.8240, -3.5106, -3.5022, -2.5796, -5.9442, -6.7259, -4.3472,
        -5.7562, -2.7400, -3.8837, -2.9012, -6.3729, -5.6716, -3.3985, -3.5420,
        -2.6510, -2.8289, -4.5422, -7.7147], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2195, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.2848,  -6.6446,  -4.1391,  -6.7357,  -3.2832,  -2.2776,  -7.1461,
         -6.6462,  -2.9388,  -7.1057,  -4.0864, -27.8304,  -5.5001,  -9.7370,
         -6.7461,  -6.1580,  -5.4193,  -5.9834,  -3.8629,  -6.1497],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.6838, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.6557,  -5.6483,  -1.7403,  -3.9025,  -6.0929,  -5.7363,  -4.2517,
         -3.7749, -14.8388,  -5.5314,  -4.5375,  -7.6099,  -4.6645,  -8.7154,
         -5.4795,  -4.6077,  -5.8627,  -8.9091,  -8.7450,  -5.0812],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9693, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-24.3314,  -5.3271,  -7.9362,  -4.7081,  -7.3212,  -3.5950,  -4.7737,
         -3.6566,  -2.3852,  -8.7652,  -6.4647,  -3.9206,  -3.9968,  -2.0274,
         -1.3044,  -4.5888,  -6.5881,  -2.8848,  -4.5129,  -3.5130],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6301, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7087,  -3.3460,  -5.8518, -32.3242,  -3.4874,  -6.9538,  -4.0533,
         -5.1119,  -2.9853,  -5.3637,  -4.7407,  -7.5746,  -7.7497,  -8.3825,
         -7.9147,  -2.8180,  -9.3362,  -3.6261,  -7.7297,  -4.3580],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.8208, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-15.3629,  -5.9764,  -6.9888,  -7.0522,  -7.1431,  -7.2613,  -4.2203,
         -8.4535,  -4.3066, -10.9132,  -4.5593,  -3.1060,  -6.0220,  -6.6462,
         -3.1485,  -4.8583,  -5.2000, -24.1085,  -6.2362,  -7.1575],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.4361, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4755,  -7.1977,  -2.9833,  -6.6065,  -2.8239,  -2.8940,  -2.0685,
         -6.9284,  -5.9304,  -3.9173,  -6.9954,  -2.5960,  -2.4331,  -4.7176,
         -6.5837,  -4.0534, -12.0375,  -3.1012,  -2.5258,  -3.3064],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6088, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.8056,  -3.2290,  -2.9579,  -2.0998,  -5.4521,  -5.9047,  -2.8708,
         -4.1932,  -3.4585, -38.5829,  -5.5436,  -7.8041,  -4.4696,  -6.8100,
         -2.9519,  -5.5781,  -5.0608,  -4.0567,  -2.2155,  -5.3275],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3686, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.2141,  -5.5670,  -3.7175, -11.0912,  -8.5384,  -4.7410,  -3.1529,
         -5.2075, -11.0505,  -5.6876,  -6.6798,  -7.6259,  -6.6887,  -6.0911,
         -8.2287,  -5.8188,  -4.3001,  -6.8868,  -1.7396,  -2.4755],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0251, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0275, -3.7858, -1.1746, -6.0635, -6.1725, -3.6008, -5.0618, -2.7481,
        -4.8102, -4.6629, -6.5843, -3.8569, -2.5264, -4.2129, -3.9061, -5.9811,
        -6.7741, -3.6450, -5.0181, -4.4797], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5046, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.4741,  -5.7058,  -6.0818,  -2.7513,  -3.7420,  -4.8464,  -7.3050,
         -3.3364,  -8.2427,  -4.0185,  -5.1910, -10.4589,  -8.4996,  -7.4604,
         -5.4785,  -8.4349,  -5.2458,  -5.0287,  -4.6299,  -4.2951],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7613, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8209, -3.8986, -4.0841, -3.7142, -2.1070, -6.4394, -6.2497, -3.0662,
        -2.9456, -2.4446, -3.3233, -4.2570, -6.6347, -5.7046, -6.1269, -5.7903,
        -2.9211, -3.0402, -4.6407, -6.8826], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5546, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8015, -3.8758, -5.5011, -6.0475, -3.5870, -3.6889, -2.4285, -5.2553,
        -2.9739, -6.2427, -5.6796, -3.2690, -6.7228, -3.2018, -1.4971, -4.4051,
        -7.0455, -3.2097, -6.3202, -3.8663], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3810, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7654, -1.6474, -4.3798, -7.1357, -3.8534, -4.9752, -3.2018, -2.3403,
        -2.4418, -7.1289, -3.3061, -5.1949, -3.3945, -1.6392, -2.3980, -7.7131,
        -3.7811, -5.5711, -3.4617, -3.9632], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0646, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.7470, -36.4676,  -6.5655,  -7.3048,  -7.6460,  -2.6833,  -5.3096,
         -6.2408,  -4.5278,  -3.7616,  -3.1844,  -3.6503,  -4.9440,  -6.6860,
         -3.9867,  -6.9791,  -3.5944,  -4.5461,  -2.7825,  -7.1621],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5885, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.5766, -3.6311, -6.1539, -4.9764, -3.4078, -6.6360, -6.6315, -3.4747,
        -5.7434, -4.7268, -3.9881, -1.7137, -6.2446, -5.2798, -4.0812, -5.4677,
        -6.4345, -1.5978, -6.2941, -6.5938], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0827, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8672, -2.2667, -2.2431, -3.2199, -6.7528, -5.2200, -3.5916, -2.4958,
        -2.7409, -2.1546, -4.4582, -6.2300, -2.7413, -4.7969, -2.6406, -4.1280,
        -2.9497, -6.0272, -5.0926, -2.9332], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8775, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6793, -7.0917, -6.5396, -4.4900, -6.4785, -2.6595, -3.2047, -4.8605,
        -6.1018, -3.1147, -6.2550, -2.5282, -3.3638, -2.0994, -6.5305, -5.4969,
        -3.9133, -5.2650, -2.6539, -1.6052], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4466, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8583, -3.6190, -6.1428, -2.8737, -4.8152, -4.3594, -5.3526, -2.6762,
        -3.6794, -7.1902, -6.8124, -4.4225, -4.6041, -3.5711, -3.1081, -4.2133,
        -7.2595, -4.2990, -3.6092, -3.3911], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6428, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9656, -2.4401, -4.0142, -7.1194, -3.7291, -4.1425, -4.8032, -3.4665,
        -4.2771, -6.1596, -6.5350, -3.9937, -5.1817, -2.7596, -1.3616, -6.0255,
        -7.2042, -3.6667, -8.3028, -4.3308], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6739, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.9499,  -5.8665,  -6.2085,  -5.9910,  -4.3948,  -3.5790,  -3.9031,
         -8.5467,  -7.6838,  -4.1360,  -5.2422,  -5.7346, -11.0321,  -4.8312,
         -6.5210,  -7.7720,  -6.0958,  -7.0393,  -5.4248,  -4.0874],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0520, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9123, -3.9973, -4.9568, -1.8530, -4.9353, -5.8108, -5.3104, -6.9924,
        -5.1069, -2.9430, -2.3413, -4.9973, -7.2834, -4.2760, -8.2581, -3.8894,
        -2.9308, -5.0717, -6.9720, -3.6670], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7753, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.6960,  -3.1841,  -7.0535,  -6.7593, -25.0255,  -5.3626,  -6.8940,
         -6.1529,  -7.4973,  -4.3601,  -8.6276,  -3.3162,  -8.4514,  -2.8945,
         -2.0478,  -5.8987,  -6.6779,  -3.1723,  -3.5666,  -6.7781],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5208, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3228,  -3.1415,  -6.9417,  -7.5891,  -3.7427,  -5.3672,  -3.7053,
         -3.2684,  -5.4694,  -6.8592,  -3.4414,  -2.1838,  -3.6181,  -3.5249,
         -7.1399,  -6.8981,  -2.8077, -13.5969,  -4.5046, -21.0307],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9077, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9574, -6.6315, -2.6225, -2.2804, -2.8424, -5.0457, -2.4322, -6.7314,
        -6.3312, -3.5570, -4.4138, -3.4961, -2.4338, -4.3591, -6.5884, -2.8372,
        -6.9209, -3.0385, -3.4041, -3.4372], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2680, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4920, -7.8680, -3.7936, -5.2081, -7.3145, -4.8296, -3.8385, -5.8296,
        -6.1770, -6.6556, -4.3572, -2.5325, -3.1333, -1.3670, -5.3489, -5.9045,
        -3.5417, -2.9475, -2.2528, -1.2712], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3832, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.4421,  -7.7922,  -4.3080,  -5.3065,  -5.5706,  -3.1671,  -8.0227,
         -6.8156,  -3.6342,  -6.5760,  -4.6643, -19.9034,  -4.6238,  -5.3660,
         -7.1541,  -6.9377,  -7.1883,  -4.0575,  -9.5289,  -3.2808],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4170, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.2844,  -6.7435,  -7.7820,  -3.3487,  -7.6829,  -5.7016,  -3.7489,
         -3.2453,  -7.5094,  -3.6666, -11.1672,  -9.3570,  -9.1404,  -4.2256,
         -7.3898,  -8.3621,  -5.5845,  -6.9900,  -4.4217, -16.2273],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.9789, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.5760, -16.2830,  -3.8469,  -8.1919,  -2.2210,  -8.0327,  -3.4978,
        -11.7922,  -4.9628, -12.3198, -17.9474,  -6.5773,  -9.3171,  -7.2741,
        -35.4870,  -7.2662,  -7.4307,  -5.8415,  -5.8479,  -5.9236],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-9.2318, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.4122,  -3.5852,  -4.0320, -13.3100,  -7.6789,  -4.3233,  -7.1786,
         -5.3007,  -8.1983,  -5.2352,  -3.6414,  -6.1914,  -3.1648,  -4.9375,
         -7.0511,  -4.4970,  -6.3214,  -5.6069, -15.8669,  -6.5277],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3030, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9214, -3.4557, -3.8730, -5.5223, -5.2021, -3.7723, -3.4984, -2.7211,
        -1.8270, -4.0997, -7.2801, -3.2867, -5.8816, -3.3090, -1.4798, -3.9390,
        -7.7639, -3.3140, -4.9497, -4.0367], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1567, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0472,  -1.9595,  -3.5725,  -6.9670,  -3.3755,  -4.2108,  -5.9720,
         -6.3323,  -2.5398,  -3.8303,  -7.3159,  -5.6501,  -6.5759, -19.3600,
         -4.3345,  -4.9092,  -7.4227,  -4.3397,  -3.0197,  -3.5520],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4643, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.6157,  -7.0981,  -2.8221,  -3.7680,  -2.7327,  -1.8631,  -4.3126,
         -6.8417,  -3.0644,  -3.2706,  -3.2150,  -3.7604,  -2.2590,  -6.2237,
         -6.3283,  -4.4097,  -5.2018,  -5.1428, -11.1511,  -9.8530],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8467, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9538, -3.2177, -3.6041, -6.0990, -2.1674, -4.5018, -7.2557, -6.9104,
        -5.1496, -4.6988, -5.6253, -2.2062, -6.1182, -6.6381, -3.4285, -6.8439,
        -3.2738, -1.9118, -6.1637, -6.2674], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9518, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4393, -4.3716, -3.3749, -1.9439, -6.1237, -4.1269, -6.2838, -5.4768,
        -3.5973, -5.5672, -3.1817, -3.0024, -3.9798, -7.4351, -3.9216, -5.7568,
        -3.8925, -2.8056, -3.5846, -7.4502], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6158, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3388,  -2.6820,  -3.1139,  -4.0298,  -3.7720,  -6.5436,  -5.5312,
         -3.6261,  -4.6451,  -2.1970,  -1.6816,  -3.7508,  -6.5079,  -2.7455,
         -4.9568,  -4.0975, -14.6367,  -4.2996,  -6.9273,  -3.5037],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6293, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4583, -6.4457, -6.7053, -3.2696, -4.0217, -2.5608, -3.4200, -4.5629,
        -4.6244, -4.9681, -4.3880, -4.8096, -3.1307, -1.6582, -3.6831, -7.0920,
        -3.4101, -4.3264, -3.3652, -2.7969], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2849, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4332, -3.3088, -5.6538, -6.2595, -3.6246, -4.3063, -3.0709, -4.0368,
        -2.7042, -6.7653, -5.8213, -3.3943, -5.8670, -2.4622, -2.9354, -5.0948,
        -7.4136, -2.8787, -4.3841, -3.1078], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3261, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.7653,  -4.5379, -10.6586,  -2.6608,  -3.8485,  -2.8006,  -6.0380,
         -3.0707,  -2.9802,  -2.7161,  -5.0915,  -4.0616,  -5.5819,  -5.6339,
         -3.5869,  -3.2112,  -3.6027,  -2.8029,  -4.9134,  -6.6044],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5584, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.8942, -6.1573, -3.8713, -3.3718, -5.4066, -2.0379, -2.5917, -7.2890,
        -3.8296, -3.0948, -3.4549, -2.6857, -2.0561, -6.0561, -3.5733, -6.6775,
        -4.0023, -6.4898, -5.0869, -5.3474], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5487, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.6535,  -6.6699,  -5.5286,  -3.4850,  -3.5621,  -2.9817,  -4.1646,
         -2.7732,  -6.5410,  -5.4090,  -3.4584,  -8.7151,  -3.5983,  -5.0829,
         -5.8511,  -7.0625,  -5.2725,  -5.5287,  -4.5256, -34.1876],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4526, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8628, -5.8227, -3.3619, -3.6494, -2.4992, -1.9456, -3.1090, -7.1525,
        -3.6511, -5.4774, -2.6176, -3.3278, -3.5455, -5.9894, -6.0357, -3.4243,
        -4.2408, -2.9249, -1.8242, -3.8119], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0137, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.8231,  -5.4730,  -7.4963,  -7.0406,  -4.3109,  -8.1717,  -3.1132,
         -1.9910,  -2.4810,  -6.5124,  -4.2450, -13.5938,  -4.6534,  -1.9257,
         -4.0493,  -7.1302,  -3.9634,  -7.0592,  -4.6866,  -4.8656],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4793, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1739, -3.7024, -6.6688, -2.3194, -4.2774, -5.4714, -6.5281, -4.1055,
        -4.4511, -2.7748, -1.9695, -6.2711, -6.5513, -3.1981, -3.0225, -3.0732,
        -2.8334, -5.7899, -6.3553, -3.0299], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4784, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2172, -6.2581, -3.9584, -4.1543, -4.7322, -2.2406, -4.4018, -6.8277,
        -3.1842, -2.8672, -2.9803, -5.1996, -2.5983, -6.1490, -5.0824, -3.8372,
        -3.6858, -2.5104, -2.5451, -5.0585], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1244, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.1950,  -3.5064,  -5.9927,  -3.0151,  -3.0968,  -5.6856,  -7.1001,
         -2.8916,  -5.8780,  -3.7092,  -3.5959,  -4.7466,  -5.5218,  -7.0360,
         -4.1876, -18.9918,  -3.2840,  -6.7430,  -2.9264,  -5.4175],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4761, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7296, -3.3552, -2.6979, -6.3664, -6.7126, -3.5037, -6.0510, -2.9126,
        -1.7930, -3.2003, -6.8864, -3.0912, -5.8898, -3.8572, -2.4475, -4.7214,
        -6.8544, -5.9271, -4.2630, -4.3664], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4313, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.7113, -10.0592,  -4.9479,  -2.8415,  -3.1257,  -7.9354,  -3.2892,
         -4.0831,  -7.8007,  -2.7526,  -6.0464,  -5.9321,  -7.4283,  -4.5492,
        -18.6221,  -4.6335,  -2.5041,  -2.5228,  -6.5519,  -6.9587],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8148, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.0164,  -5.5127,  -3.8510,  -4.4232,  -2.4716,  -4.4710,  -3.2767,
         -5.6022,  -5.5032,  -4.0251,  -7.6112,  -1.5745, -12.3180, -10.4783,
         -5.2373,  -2.6439,  -4.7442, -12.7121,  -5.0850,  -6.0454],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6802, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.1031,  -3.4289,  -7.4000,  -6.2549,  -3.7084,  -5.1918,  -8.9810,
        -22.1450,  -5.3688,  -6.8570,  -5.8556,  -8.5849,  -3.2182,  -5.0818,
         -6.6169,  -3.9671,  -3.6487,  -4.7834,  -3.6617,  -3.4700],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1164, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-32.7002,  -4.6866,  -7.8833,  -3.2986,  -6.4688,  -2.9252,  -4.4402,
         -5.2711,  -7.3850,  -4.0426,  -4.4207,  -6.6874,  -3.6788,  -5.1008,
         -4.7039, -11.9186,  -5.9524,  -9.4722,  -5.2039,  -5.1173],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.0679, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.4491,  -2.9810,  -0.9902,  -3.5772,  -6.5082,  -3.5440,  -4.9227,
         -5.9549, -11.7979,  -7.5387,  -7.2055,  -7.4151,  -3.8381,  -8.1986,
         -3.3914,  -6.0498,  -4.9072,  -2.0074,  -5.3326,  -6.8034],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3707, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3440, -2.9419, -2.7522, -2.7020, -5.6201, -2.1018, -6.3856, -6.3041,
        -3.7309, -2.4203, -3.6838, -3.9872, -2.2319, -5.9484, -5.8583, -3.9304,
        -4.2062, -3.5183, -3.1349, -6.9242], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2863, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-16.0277,  -7.5037,  -7.1937,  -7.8251,  -6.1060,  -7.1464,  -3.1946,
         -5.9344,  -5.3052,  -1.7901,  -2.8828,  -5.7865,  -6.1300,  -4.0899,
         -5.2476,  -4.4357,  -2.9699,  -3.5884,  -5.5316,  -5.2199],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6955, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1401, -5.3755, -3.4731, -3.4424, -2.9081, -2.2184, -4.0975, -6.7423,
        -3.0399, -6.6761, -4.4824, -2.3230, -5.9120, -6.7234, -3.9425, -5.2365,
        -2.3520, -2.2845, -5.0179, -6.7568], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4572, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9490,  -6.4736,  -2.9370,  -5.1052,  -3.0199,  -2.6162,  -3.3062,
         -6.0341,  -6.2655,  -3.6142,  -3.3921,  -3.6775,  -3.3836,  -4.9086,
         -6.9697,  -3.5409,  -3.3665,  -3.3032, -17.2490,  -5.3635],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8738, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0697, -6.4646, -3.2336, -3.0784, -3.8359, -4.6344, -5.9747, -3.1900,
        -4.3478, -4.1136, -3.8103, -1.4486, -6.7904, -6.0205, -4.3002, -5.0099,
        -2.6696, -2.0458, -5.6990, -6.8133], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3275, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.4540,  -6.6200,  -2.9193,  -4.3018,  -3.3667,  -6.9852,  -2.8357,
         -6.4975,  -3.3941,  -6.6099,  -4.9853, -13.9068,  -7.3307,  -4.1925,
         -7.2894,  -5.1606,  -8.0205,  -3.1182,  -4.6302,  -3.6393],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6129, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2660,  -2.8490,  -2.8563,  -3.7508,  -6.0040,  -6.6146,  -3.4885,
        -19.4748,  -3.7041,  -7.3178,  -5.4357,  -7.0632,  -6.6849,  -4.4490,
         -7.4068,  -5.1544, -11.3135,  -8.1648,  -6.6501,  -8.1455],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4897, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1497, -5.6788, -2.9779, -3.9422, -2.5348, -1.6210, -4.8173, -6.9357,
        -2.8091, -5.0543, -4.1185, -3.1823, -3.0688, -5.2815, -5.9248, -3.4847,
        -7.0561, -3.6767, -2.9163, -7.3523], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3792, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.4352,  -7.8131,  -3.5422,  -3.8877,  -3.7953,  -3.3424,  -5.0783,
         -7.6536,  -6.4374,  -4.4850,  -5.0586,  -3.8617, -12.5341,  -6.5008,
         -6.3565,  -6.1314,  -4.1539,  -2.1191,  -3.1995,  -1.1611],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0773, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4201, -6.2884, -5.6971, -3.1831, -3.8230, -2.1015, -2.0381, -4.1437,
        -6.4004, -2.6308, -4.0176, -4.2172, -2.5041, -3.6035, -5.0002, -5.4280,
        -3.9866, -6.7829, -2.8890, -2.4844], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0820, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.5495,  -2.8747,  -4.2276,  -7.7634,  -4.4505,  -4.3992,  -4.7887,
        -15.9743,  -6.7421,  -6.1817,  -7.6730,  -5.0942,  -8.9849,  -3.8534,
         -4.3191,  -4.9840,  -3.0203,  -3.1739,  -5.3602,  -6.8491],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7632, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.9025, -11.0115,  -6.0171,  -7.0749,  -3.2442,  -7.7981,  -3.2466,
         -6.1773,  -3.3684,  -2.0499,  -4.0639,  -8.6705,  -4.0249,  -4.9583,
         -3.7037,  -2.2510,  -2.1300,  -6.7515,  -5.9520,  -3.1206],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0758, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.8555,  -2.8187,  -3.5273,  -6.3379,  -5.8875,  -2.3740,  -3.7596,
         -4.7450, -17.5976,  -6.0590,  -5.4448,  -7.5193,  -3.3737,  -8.5842,
         -3.2128,  -5.8970,  -3.9249,  -4.1601,  -1.6866,  -5.2741],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3520, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-18.0034,  -6.8218,  -8.4445,  -4.4186,  -6.9538,  -3.3151,  -3.7179,
         -3.1682,  -2.3106,  -4.8282,  -7.6491,  -2.8526,  -4.1062,  -3.2007,
         -1.8384,  -2.9374,  -6.1275,  -3.0282,  -3.9749,  -4.1230],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0910, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.2186,  -7.4047, -16.5260, -12.1873,  -7.7144,  -4.6689,  -6.6309,
         -3.1897,  -4.7005,  -3.9953,  -1.7999,  -5.0209,  -7.3424,  -3.7520,
         -2.8575,  -3.5560,  -2.6597,  -1.8095,  -6.2601,  -5.4552],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9375, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6164, -5.2633, -5.4449, -3.0919, -3.7288, -2.8968, -1.7191, -2.5709,
        -6.2868, -3.1748, -5.6694, -3.3725, -3.7055, -3.7384, -5.5239, -5.6686,
        -2.6195, -4.3999, -4.0011, -4.5148], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9504, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.6077,  -3.3376,  -2.6722,  -6.6256,  -6.5440,  -3.9909,  -7.6586,
         -2.8766,  -2.2729,  -5.4922,  -6.5403,  -2.3258,  -4.0949,  -3.5705,
        -13.2920,  -8.4900,  -9.8229,  -7.2959,  -4.5824,  -6.9868],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6040, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4244, -4.5845, -4.0936, -6.7031, -2.0143, -2.5143, -4.8255, -6.4363,
        -3.6015, -4.1317, -2.0148, -1.7340, -3.8741, -6.6136, -5.2682, -3.0839,
        -7.8075, -2.7256, -2.2660, -4.8421], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2280, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.7549,  -3.6194,  -7.5348,  -6.2769,  -3.5048,  -5.7754,  -4.4958,
        -17.6526,  -5.7538,  -6.5226,  -8.0025,  -2.3428, -12.0770,  -3.1043,
         -6.4342,  -5.2712,  -4.0528,  -4.6306,  -7.2888,  -6.2292],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2662, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.5333,  -3.3099,  -3.1985,  -5.5287, -18.7028,  -4.1736,  -6.4478,
         -4.9208,  -6.2749,  -2.9691,  -8.1464,  -3.6388, -23.8695,  -7.6430,
         -8.8087,  -3.8246,  -8.6058,  -3.0317,  -4.6496,  -7.1286],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.0703, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.6881,  -2.2518,  -4.6376,  -5.9525,  -4.1080,  -4.0918,  -3.0856,
         -4.7553,  -3.1947,  -5.7584,  -5.5416,  -2.8704,  -4.1195,  -3.4675,
         -3.2004,  -4.1930,  -5.7548,  -5.8411,  -2.3693, -11.3883],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4635, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1933, -3.4435, -3.6082, -2.7306, -2.6201, -1.9400, -6.6194, -2.7916,
        -7.4448, -2.9525, -2.0302, -3.6125, -6.1103, -6.0506, -3.5479, -2.5829,
        -3.8351, -2.5331, -5.1841, -5.8466], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0339, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6574, -4.0211, -4.3725, -7.6138, -7.1834, -5.3520, -8.1848, -4.6437,
        -8.2233, -5.3302, -4.6966, -4.3040, -3.0222, -2.8387, -5.9702, -6.1883,
        -3.5881, -7.1357, -3.0489, -2.5834], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1479, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.9248,  -4.3981,  -6.9766,  -3.3401,  -7.3284,  -5.6396,  -3.5220,
         -7.1650,  -6.7442,  -3.8151,  -4.1373,  -4.2193, -13.2312,  -6.4971,
         -6.5222,  -6.3345,  -3.1226,  -6.7717,  -4.1246,  -4.4256],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7620, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8235, -3.1548, -3.5742, -4.1938, -3.5677, -7.2118, -2.5346, -4.1710,
        -2.2779, -2.4004, -2.5971, -5.9203, -5.9175, -3.6591, -6.8727, -2.5365,
        -1.9156, -6.5691, -6.5851, -3.9292], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1706, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.9775,  -4.4177,  -6.8803,  -2.9003,  -3.6795,  -2.4723, -10.8976,
         -8.3489,  -8.1291,  -7.8539,  -3.8635,  -7.4213,  -3.7281,  -3.7345,
         -6.4462,  -3.3702,  -1.9918,  -4.9300,  -5.3004,  -3.8703],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1107, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.9008,  -3.4111,  -2.8921,  -3.4937,  -2.7322,  -2.5377,  -5.7081,
         -5.9852,  -3.0282,  -9.9454,  -2.4831,  -2.1918,  -3.0944,  -7.1506,
         -4.0375, -12.5526,  -5.2982,  -1.7694,  -7.9793,  -6.3887],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9290, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2160, -5.3408, -3.2774, -7.5956, -6.4593, -4.5887, -3.7876, -2.2805,
        -1.5396, -5.0183, -5.5320, -3.3143, -2.8227, -3.7489, -3.8779, -4.7429,
        -7.0111, -3.4721, -8.3031, -3.1558], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5042, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1524, -7.4912, -6.8480, -3.8981, -4.5203, -3.2239, -4.9062, -3.7011,
        -6.0721, -5.7889, -3.5487, -2.1991, -5.4737, -2.3534, -2.3039, -6.8895,
        -5.6857, -3.2445, -2.8106, -2.0114], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4061, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1677, -2.7193, -1.4386, -3.0988, -7.0092, -2.7591, -3.8846, -3.6521,
        -4.7187, -2.1077, -6.3390, -5.5100, -5.5425, -4.3021, -8.2743, -3.1004,
        -2.6445, -6.7391, -6.2633, -2.9023], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4087, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5308,  -6.7188,  -3.4132,  -2.0925,  -3.7486,  -6.5532,  -5.6007,
         -3.9499,  -7.9112,  -3.3649,  -2.9939,  -7.0908,  -6.2488,  -3.2692,
         -7.9241,  -3.7686, -23.7659,  -2.2817,  -8.1472,  -4.4960],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7935, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9890, -6.5059, -2.9672, -3.2222, -3.1463, -6.2342, -2.6853, -6.2383,
        -6.7037, -3.6074, -3.6098, -3.7176, -4.0199, -4.7146, -7.1622, -3.9437,
        -4.2546, -3.0089, -2.2013, -4.3516], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3142, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2331, -6.3092, -2.3816, -2.6753, -3.5374, -3.2038, -3.1556, -5.4344,
        -6.4058, -3.4354, -1.4732, -3.5522, -4.5407, -2.1843, -5.3287, -5.1162,
        -3.0509, -4.8458, -3.4264, -2.2034], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8247, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.6617,  -5.5740,  -5.7272,  -3.0046,  -6.0438,  -1.3761,  -2.8668,
         -3.4674,  -6.3479,  -3.0407,  -5.4067,  -2.6268, -15.0060,  -6.2807,
         -6.9848,  -7.6686,  -6.9466,  -7.6068,  -3.0461, -18.1945],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0939, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3601, -5.0678, -3.4324, -3.9666, -2.2520, -2.0215, -4.4055, -6.1724,
        -2.3990, -3.2682, -3.7034, -6.1996, -1.9118, -5.8228, -5.1984, -2.8447,
        -5.6705, -3.5669, -2.1456, -2.4690], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9439, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7786,  -4.1207,  -6.0762, -15.7408,  -7.2862,  -8.3916,  -5.4412,
         -7.0988,  -2.9277,  -9.7015,  -3.1664,  -3.4113,  -3.0283,  -5.6534,
         -5.7867,  -2.5318,  -3.4792,  -3.2228,  -3.2452,  -3.2447],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3167, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.6986,  -6.4007,  -9.3365,  -5.2231,  -4.6947,  -4.0272,  -5.3196,
         -4.0899,  -8.3444,  -5.4241,  -6.8829, -11.1935,  -7.5261,  -6.1883,
        -11.2954,  -4.8294,  -7.5220,  -7.4407,  -5.4230,  -7.0735],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.8967, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3865, -7.4786, -4.0394, -1.5336, -5.8538, -6.3743, -3.1101, -3.4723,
        -2.7158, -2.1760, -3.5712, -7.0886, -3.1105, -5.1869, -3.7548, -3.3098,
        -1.8807, -6.5220, -5.4429, -2.6493], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0828, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2468, -2.7591, -5.1192, -2.8618, -4.1540, -4.2974, -5.9083, -6.2409,
        -3.7074, -9.1627, -2.8413, -3.8015, -5.9640, -6.4693, -5.0948, -2.9019,
        -3.3372, -3.1677, -2.1883, -4.1297], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5677, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0565, -2.8518, -6.8263, -2.9686, -2.8643, -3.6161, -2.9763, -2.8396,
        -5.9999, -5.9492, -2.5743, -4.0236, -2.6982, -3.5797, -1.7588, -6.7954,
        -5.8481, -3.9934, -4.0342, -3.0617], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9158, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4878, -6.3536, -5.1631, -2.8551, -4.0328, -2.7431, -5.9955, -2.0239,
        -6.2705, -5.2609, -2.8099, -4.5575, -3.6268, -3.7895, -3.5458, -6.0231,
        -5.8202, -3.0733, -4.5258, -2.9190], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1439, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7545, -3.0529, -1.7940, -2.4525, -6.0406, -3.9259, -4.0084, -3.5948,
        -4.6757, -1.6703, -6.8241, -5.6545, -2.7021, -8.5892, -2.4325, -4.1005,
        -6.6042, -7.0000, -2.7970, -4.4860], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3080, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.6724,  -3.7369, -21.3742,  -2.2198,  -8.6410,  -4.2778,  -7.3211,
         -2.6717, -14.1789,  -3.8354,  -2.3932,  -6.9713,  -6.4535,  -2.2567,
         -4.4279,  -3.0138,  -4.4248,  -2.3115,  -7.0760,  -5.3639],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8811, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8153, -5.1403, -5.2876, -2.1552, -5.0087, -5.4960, -2.6536, -5.6022,
        -3.5768, -3.0526, -4.2648, -6.2504, -5.6647, -3.0093, -2.6299, -3.8268,
        -1.9016, -6.8006, -6.5240, -2.6476], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3654, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4749, -3.7026, -6.1929, -2.4697, -4.0157, -3.4513, -3.4841, -5.6867,
        -7.2228, -6.8952, -3.9417, -5.0450, -3.3635, -1.1341, -5.2004, -6.3303,
        -2.2259, -2.9947, -2.9919, -2.0125], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0418, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.6492,  -4.6537,  -7.3498,  -3.6083, -10.9602,  -2.6584,  -2.8641,
         -3.9642,  -5.3781,  -5.7114,  -2.8516,  -4.4594,  -3.5889,  -0.9128,
         -4.9725,  -6.1699,  -2.5319,  -4.0910,  -2.8602,  -2.5402],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1888, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.6417,  -4.3609,  -6.9042,  -6.8588,  -6.3349,  -5.7035, -19.6912,
         -1.8762,  -4.4424,  -7.1620,  -5.4791,  -2.3566,  -4.0786, -25.7592,
         -7.0683,  -6.5369,  -6.9944,  -4.4645,  -7.6913,  -3.9640],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.1684, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.4149,  -5.2769,  -3.3295,  -9.9598,  -1.7632,  -7.2094,  -8.8365,
         -3.8397,  -4.7669,  -5.1632,  -9.8243, -13.8326, -13.5710,  -7.9359,
         -4.9764,  -6.8206,  -2.4865,  -6.4179,  -3.2395,  -4.2312],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4448, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2641, -1.9632, -2.5660, -6.2779, -5.9091, -3.1964, -3.2451, -3.4889,
        -1.7133, -2.9214, -7.2809, -5.5532, -4.1798, -1.3076, -3.0033, -5.0168,
        -1.7384, -6.2489, -5.5945, -2.8176], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8143, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1277, -3.2661, -6.7621, -3.1063, -5.9866, -3.1325, -4.2753, -0.5833,
        -6.9307, -5.9026, -3.0025, -3.5270, -5.9624, -4.7634, -4.2203, -4.2642,
        -5.6405, -3.7431, -1.9524, -5.3864], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1768, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0579,  -3.2100,  -6.6550,  -6.7142, -13.4193,  -7.1087,  -7.8457,
         -3.6605,  -7.1227,  -2.6199,  -5.2438,  -1.9098,  -4.3565,  -3.9766,
         -7.4918,  -5.1991,  -2.6720,  -5.6379,  -2.3994,  -2.6513],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1976, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4608, -4.6566, -7.8682, -5.7534, -5.6010, -7.5024, -2.2210, -1.2981,
        -5.2960, -4.7451, -5.6201, -4.8300, -3.8606, -1.7888, -3.9467, -6.5582,
        -5.5775, -3.7141, -4.3002, -3.5896], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8094, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2574, -3.7818, -2.5308, -3.8226, -2.6536, -3.3195, -6.5799, -2.5564,
        -5.3089, -2.8888, -3.4024, -3.9553, -5.8797, -5.8570, -3.1211, -3.6149,
        -2.6449, -2.2074, -4.1126, -6.0394], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0267, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.8984,  -7.4421,  -2.5839,  -2.0509,  -4.0387,  -6.5092,  -2.5205,
         -5.2939,  -2.4239, -29.9734,  -4.3703,  -7.8639,  -4.6670,  -7.2900,
         -2.8254,  -5.3386,  -3.4300,  -3.9275,  -1.7004,  -6.2150],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6682, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.7314,  -2.0580,  -6.0908,  -6.3977,  -3.2297,  -5.5046,  -5.0753,
         -1.4905,  -3.4612,  -6.8461,  -3.7678,  -3.2356,  -6.1064, -13.3258,
         -8.3043,  -7.4094,  -6.7791,  -4.3650,  -7.3316,  -2.8270],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2669, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6685, -4.5879, -4.8178, -3.4210, -6.2383, -1.8617, -1.4617, -3.5179,
        -6.4094, -2.8885, -3.8779, -3.6424, -4.0689, -5.8866, -9.5107, -6.1134,
        -6.2685, -9.3264, -2.4786, -8.2402], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8643, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.8439, -11.1748,  -3.8052,  -3.0857,  -5.9481,  -7.7925,  -2.5156,
         -3.8643,  -3.2243,  -1.2694,  -5.8711,  -6.5427,  -2.7947,  -4.8214,
         -2.5432,  -1.6294,  -4.4008,  -6.4131,  -3.4121,  -3.8417],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3897, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1302, -2.3832, -5.2414, -3.7313, -5.6418, -5.5205, -4.7538, -6.3131,
        -3.4928, -8.9020, -2.5795, -5.0790, -2.8688, -6.1217, -5.2294, -3.3935,
        -3.9151, -2.7658, -1.9488, -4.8404], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5926, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.5570,  -2.5795,  -6.8501,  -5.6141, -11.5869,  -7.5671,  -7.4421,
         -7.3508,  -4.2563,  -7.4643,  -2.4853,  -6.1557,  -3.7020,  -3.8105,
         -2.2348,  -6.8247,  -5.6701,  -3.0021,  -3.5975,  -3.7532],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4252, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2435,  -9.6674,  -4.2222,  -8.1317,  -3.5207,  -5.9155,  -5.3199,
         -3.8435, -12.3530,  -4.2074,  -2.6414,  -6.0847,  -5.5616,  -3.0310,
         -3.6475,  -2.9367,  -3.7195,  -1.3775,  -5.7359,  -6.0200],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0590, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.6598,  -2.6143,  -8.4607,  -4.0502,  -1.9010,  -5.8711,  -6.3968,
         -3.0932,  -5.9878,  -7.8583, -16.8415,  -7.7765,  -6.2561,  -6.5527,
         -2.9904,  -7.0793,  -2.9913,  -5.5034,  -3.8410,  -2.9687],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8347, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1286, -5.7882, -5.9321, -5.8136, -8.2099, -6.3806, -5.4233, -6.9520,
        -3.0097, -5.3315, -6.2837, -6.7484, -6.3144, -6.7420, -6.9793, -6.6789,
        -5.9486, -4.3967, -3.9742, -3.5194], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.8277, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6613, -5.5647, -7.2999, -3.1369, -2.8871, -3.6844, -7.4665, -2.9829,
        -5.2482, -2.2980, -3.6948, -3.6506, -5.9949, -5.6406, -2.4969, -6.1589,
        -3.7753, -3.7916, -2.3866, -7.0007], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4910, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4348, -3.8705, -5.8645, -6.8823, -2.9256, -5.8482, -4.2858, -2.7853,
        -3.1501, -6.1873, -6.8579, -3.9292, -3.6468, -3.1817, -2.2089, -4.8683,
        -6.5519, -2.8119, -4.9575, -3.2033], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3726, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6318, -3.4601, -6.1602, -6.0914, -3.8621, -7.2558, -2.0734, -2.9337,
        -4.3953, -6.6459, -2.5882, -2.6464, -2.4910, -3.5579, -2.6624, -6.6254,
        -5.4578, -3.0353, -3.3997, -2.9186], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0446, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.0182,  -3.1231,  -3.1024,  -1.7525,  -3.8706,  -6.5573,  -3.1902,
        -15.6983,  -3.3147,  -1.2586,  -4.0916,  -7.0592,  -3.0635,  -6.8704,
         -2.9879,  -3.2620,  -2.5752,  -6.1795,  -5.7722,  -3.2110],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4979, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0640, -3.6081, -3.2613, -3.0868, -5.8086, -6.8602, -2.2789, -5.5823,
        -3.3998, -1.6181, -4.7759, -6.6573, -3.4083, -2.9901, -3.8349, -2.7440,
        -2.6086, -6.4499, -5.9926, -3.9352], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0982, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.2469,  -4.9984,  -7.4530,  -5.9055,  -6.8731,  -3.5319,  -7.9225,
         -2.7865,  -4.6290,  -3.7057,  -4.8050,  -6.0694,  -6.6378,  -3.6640,
         -4.8207,  -4.3997, -26.5966,  -5.0361,  -6.7724,  -3.5884],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5721, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4721, -6.2710, -5.3587, -5.0721, -4.8489, -6.9775, -4.5377, -7.1531,
        -6.2542, -6.8087, -5.1003, -7.0946, -9.1522, -3.2376, -3.0282, -6.2727,
        -5.1678, -2.5727, -3.5415, -3.4048], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.4163, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3542, -6.2106, -3.0215, -1.5720, -3.5604, -6.4240, -5.0229, -3.2970,
        -2.3379, -1.9570, -3.0418, -5.4285, -6.1862, -3.3297, -5.6096, -3.5541,
        -2.2566, -3.4036, -5.8688, -5.1304], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0283, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5962, -2.8064, -4.1058, -4.9770, -3.3662, -3.5053, -5.9793, -6.1597,
        -2.9880, -5.8316, -2.8798, -3.4353, -6.1413, -6.5436, -3.3461, -4.1987,
        -3.4001, -1.4541, -2.9907, -6.7137], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3709, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3803, -3.4638, -2.9799, -2.4821, -6.1909, -5.8099, -5.2814, -3.1628,
        -9.6559, -2.0949, -1.8531, -6.5803, -6.0565, -2.5068, -3.2207, -3.1343,
        -5.9818, -1.7468, -5.4804, -3.5449], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2804, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.3910,  -2.7985,  -5.5169,  -3.1108,  -3.9955,  -5.4029,  -6.2231,
         -6.8922,  -4.0883, -11.6347,  -3.2824,  -2.1259,  -6.3462,  -5.7659,
         -2.7301,  -5.3982,  -3.4450, -19.2036,  -8.4559,  -7.0768],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0442, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7734, -6.6042, -2.7963, -7.1338, -2.8683, -2.7354, -4.7341, -6.9453,
        -3.3996, -4.8173, -6.4770, -2.5324, -4.1915, -5.8335, -6.5356, -4.6978,
        -4.3170, -3.6253, -3.4344, -5.4501], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5451, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6565, -4.0128, -4.7569, -6.9614, -3.2644, -3.8170, -2.5830, -2.1661,
        -5.7386, -6.5644, -2.5345, -4.3262, -3.1097, -2.1063, -6.3133, -6.4608,
        -2.5008, -2.9480, -2.0706, -1.3141], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9103, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.9899,  -4.0199,  -6.1320,  -2.5622,  -2.6353,  -3.3891,  -4.5783,
         -5.9345,  -5.9292,  -2.7807,  -4.4326,  -3.3614,  -1.9457,  -5.4160,
         -6.7525,  -2.6817,  -3.9647,  -2.5356, -40.3839,  -8.5944],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9510, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.8133, -2.9662, -3.0809, -3.6465, -5.6693, -2.4191, -3.7207, -3.0898,
        -2.3052, -3.3863, -6.4036, -5.6879, -2.6832, -2.9640, -2.0977, -2.8099,
        -3.4153, -6.0361, -2.9315, -4.3987], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9263, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.0377,  -6.1178,  -4.5738,  -3.8057,  -4.1258, -19.5709,  -6.7281,
         -7.1580,  -7.2589,  -3.6728,  -7.3570,  -1.9162,  -4.9050,  -5.7510,
         -2.1786,  -4.5461,  -6.8638,  -1.8839,  -3.9464,  -5.0187],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7208, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2851, -4.2182, -3.4888, -6.4839, -5.8434, -2.0349, -2.8828, -2.4229,
        -3.6827, -3.4339, -6.5985, -6.6662, -3.3799, -4.7430, -2.1211, -1.8657,
        -5.1676, -5.9226, -3.4150, -3.3895], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2523, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9929,  -7.9376,  -7.6288,  -2.6454,  -7.0873,  -6.2681, -39.6470,
         -5.1855,  -6.5396,  -8.9499,  -8.1274,  -6.0914,  -5.2052,  -6.2069,
         -6.1914,  -2.9132, -10.6211,  -2.9615,  -3.0684,  -6.4873],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.6378, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1017, -6.0996, -2.4373, -5.4722, -2.4662, -5.0958, -6.8758, -6.7919,
        -6.7509, -4.6826, -3.0439, -4.2318, -3.7821, -4.1740, -6.4183, -5.1807,
        -3.4570, -7.7086, -3.6354, -1.8431], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7124, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.1300,  -2.7832,  -6.8717,  -2.6444,  -4.1828,  -3.9411,  -6.8986,
         -5.4785,  -3.3413,  -3.3213,  -3.9983, -19.2664,  -2.5678,  -7.8644,
         -4.6600,  -6.9446,  -2.9660,  -5.3423,  -3.7513,  -3.8949],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2925, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.7822,  -4.1995,  -5.3348, -25.5141,  -5.0882,  -6.8004,  -7.1459,
         -3.6138,  -5.1898,  -5.6833,  -2.9069,  -4.2117,  -2.3576,  -1.9789,
         -6.3988,  -6.8919,  -4.0164,  -5.8994,  -7.2087,  -3.4954],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8859, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1073, -4.8202, -6.9362, -6.6184, -4.3219, -4.3114, -4.5907, -7.7256,
        -4.1473, -6.7474, -5.8151, -3.6812, -4.2987, -3.5030, -2.5248, -5.8504,
        -6.7274, -3.8635, -4.5062, -3.5031], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9300, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7165, -3.3886, -0.8349, -3.2096, -7.5184, -3.1527, -6.0707, -3.1501,
        -2.4509, -3.0274, -5.7026, -5.1956, -2.6687, -7.3548, -2.4319, -2.6574,
        -3.9870, -6.7000, -3.4122, -4.6746], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1152, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.2722,  -2.7915,  -4.1473,  -4.0664,  -5.8970,  -6.1820,  -3.3573,
         -7.1766,  -2.2489,  -2.3700,  -5.4132,  -6.8689,  -2.2732,  -3.5304,
         -5.6191, -11.9018,  -7.6529,  -8.9091,  -6.5771,  -4.2634],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4259, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.7877,  -3.3540,  -7.0641,  -4.5408,  -4.7311,  -5.6777,  -7.5811,
         -4.1996,  -7.4913,  -4.2504,  -9.2636,  -7.3354,  -6.9549,  -5.6267,
         -7.1985,  -5.5063,  -7.2688,  -3.6695, -10.4321,  -4.0420],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1488, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9029,  -4.4509,  -3.3763, -33.5723,  -5.8510,  -6.0468,  -7.0890,
         -5.7760,  -4.7570,  -6.1405,  -2.9722,  -5.4423,  -2.2991,  -2.1151,
         -4.1299,  -6.7816,  -2.3848,  -9.1735,  -1.8456,  -3.7271],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0417, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.6805,  -3.4456,  -3.6446,  -6.2625,  -6.5929,  -3.5803,  -4.3986,
         -4.6584, -17.1174,  -6.5195,  -8.2352,  -6.2682,  -6.1517,  -5.5439,
         -2.8620,  -3.5433,  -3.0823,  -2.7479,  -2.8209,  -5.1194],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3137, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9867, -4.0395, -2.8248, -2.1969, -6.6105, -6.2138, -2.9200, -3.6387,
        -3.0527, -2.3128, -3.4763, -6.9842, -5.6319, -2.5415, -4.8058, -2.5509,
        -2.1100, -4.0849, -6.8473, -3.4817], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9655, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.4269,  -3.6128,  -2.3785,  -4.4367,  -6.8918,  -2.6116,  -5.4359,
         -2.5317, -14.6404,  -4.5559,  -9.4309,  -5.2934,  -6.6339,  -2.0549,
         -4.1978,  -2.8389,  -3.1644,  -2.6949,  -6.7393,  -5.1067],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3339, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.0476,  -2.0453,  -6.8207,  -6.2023,  -2.7957,  -3.1564,  -3.8326,
        -19.4690,  -5.5606,  -7.6594,  -2.5396,  -5.7648,  -6.5664,  -4.0405,
         -9.9107,  -3.7092,  -3.6718,  -1.8463,  -6.2258,  -5.8345],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5350, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0792, -3.2482, -1.2177, -2.9135, -6.7640, -2.8490, -5.8180, -2.4845,
        -3.0506, -3.6465, -5.9393, -4.8121, -2.9137, -6.1547, -2.5408, -2.0694,
        -4.3801, -6.9480, -2.7193, -3.9375], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9743, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.9140,  -4.5890,  -1.6551,  -4.1735,  -6.4930,  -3.5737,  -5.9000,
         -2.3567, -20.1822,  -6.6308,  -7.0774,  -6.2280,  -3.0145,  -4.1186,
         -5.1820,  -3.7698,  -5.7671,  -2.3223,  -2.3766,  -4.0295],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2677, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3079, -8.9757, -7.9067, -2.5937, -7.5084, -3.4941, -5.7146, -4.4678,
        -2.9647, -2.0685, -6.0324, -5.8291, -2.3647, -8.3350, -5.7771, -3.6699,
        -3.1403, -7.4524, -5.1812, -5.1414], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2963, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9690, -3.2762, -4.6188, -2.2532, -3.3033, -5.9845, -5.8312, -3.5802,
        -5.2544, -2.7110, -2.6246, -7.5854, -5.5799, -2.4814, -2.7883, -3.4542,
        -6.1873, -3.6421, -6.5931, -5.5366], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3127, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1839,  -4.3863,  -5.4328,  -5.7186,  -3.6233,  -5.0915,  -3.0925,
         -2.1777,  -2.9363,  -6.2400,  -2.5114,  -4.4257,  -2.6263,  -1.6348,
         -2.8585,  -6.3649,  -2.9842, -10.5255,  -3.7903,  -4.0275],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1816, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.9767,  -8.0317,  -7.4093,  -7.6645,  -4.9318, -11.1897,  -4.7811,
         -2.6162,  -7.5487,  -7.6731,  -3.4778,  -6.2764,  -5.3280, -19.8886,
         -6.4341,  -6.3819,  -7.8302,  -3.6251,  -7.3865,  -3.1403],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.9796, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0655, -5.5964, -3.7631, -1.7645, -3.3176, -5.8901, -5.3314, -4.0879,
        -4.9463, -1.6453, -3.5568, -6.2953, -6.6284, -3.6816, -3.6931, -4.0164,
        -1.8481, -5.4481, -6.3052, -2.7727], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2327, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8578, -7.1891, -3.8070, -4.9357, -1.8084, -5.6128, -5.1134, -3.0509,
        -4.6966, -3.2043, -3.8772, -3.5909, -6.3851, -6.9817, -4.3535, -4.2395,
        -2.9092, -3.5488, -4.8688, -6.7170], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4874, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.1673,  -3.5606,  -3.1547,  -3.9077,  -7.1747,  -3.0022,  -5.2545,
         -3.9532,  -3.5424,  -4.6453,  -7.8389,  -6.6650,  -4.1367, -20.7267,
         -3.7536,  -6.0066,  -4.2799,  -6.1414,  -4.7839,  -3.2156],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6955, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5165, -3.4931, -3.3930, -4.2042, -6.4379, -5.5703, -2.4410, -3.5415,
        -2.6786, -1.3638, -3.7708, -6.2850, -2.6821, -6.6542, -3.1389, -2.1219,
        -4.9769, -6.7529, -2.3109, -4.4582], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1896, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.5379,  -2.0176,  -2.1634,  -3.8129,  -6.8992,  -3.6846,  -7.8101,
         -3.1960,  -1.4878,  -7.0095,  -6.0761,  -3.7544,  -2.7454,  -4.4609,
        -30.2868,  -4.3868,  -9.2360,  -3.7777,  -4.8424,  -6.6518],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9419, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.5311,  -3.5322,  -5.8785,  -6.9125,  -3.5368,  -5.0502,  -2.5046,
         -1.9366,  -3.1803,  -6.2349,  -4.0322,  -5.0351,  -3.2771,  -2.1613,
         -4.2778,  -7.0177,  -4.1781,  -6.9055,  -3.4665, -10.3907],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6520, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.5830, -13.6832,  -4.4273,  -2.2973,  -4.4001,  -7.0474,  -3.4667,
         -3.4105,  -4.5142, -13.5012,  -5.6893,  -7.3184,  -7.1977,  -2.7317,
         -5.8917,  -3.8824,  -4.4292,  -4.9856,  -3.5398,  -3.7299],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4863, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.0319,  -3.8042,  -5.5043,  -4.1991,  -3.7910,  -6.8768,  -6.6497,
         -5.0835,  -3.5380,  -4.4864, -17.8600,  -5.4238,  -6.7417,  -6.0949,
         -7.3689,  -2.2611, -10.7423,  -3.9616,  -9.2785,  -4.3952],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3046, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2608, -6.5991, -5.1298, -2.9307, -8.3117, -3.0051, -3.1623, -2.2608,
        -5.3665, -5.0243, -2.4234, -3.8062, -3.3760, -3.3001, -2.3218, -6.1068,
        -5.4603, -3.5715, -5.1334, -3.1553], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1353, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3281,  -2.7606,  -6.3821,  -6.6775,  -2.3415, -13.5354,  -2.0354,
         -1.4537,  -4.5672,  -5.7945,  -2.7348,  -3.3876,  -3.2651, -11.2331,
         -7.3122,  -5.0142,  -7.9119,  -3.3310,  -8.3021,  -3.4838],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1926, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8239, -5.8638, -3.2343, -4.0220, -3.0436, -2.8839, -4.3063, -6.7962,
        -2.3993, -5.7109, -3.0746, -3.1425, -5.1855, -6.5139, -2.3756, -3.6397,
        -3.4141, -4.2960, -4.6509, -6.6384], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3508, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.6028,  -2.9225,  -3.9954,  -4.4366, -20.8475,  -3.7694,  -8.7143,
         -6.5684,  -6.5029,  -3.1807,  -3.2851,  -4.0111,  -2.3918,  -4.1665,
         -7.0920,  -2.9156,  -4.6567,  -2.9052,  -2.6352,  -3.5623],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2581, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2130,  -5.9282,  -3.3378,  -8.4401,  -8.6433,  -3.4647,  -6.4706,
         -3.4730, -19.5399,  -8.2201, -10.6476,  -6.7984,  -4.7894,  -7.4863,
         -5.1260,  -5.7886,  -6.8912,  -4.2114,  -3.9254,  -6.1973],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.6296, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.9701, -5.6855, -7.0654, -2.8872, -6.4015, -4.1226, -3.1993, -1.5113,
        -5.8471, -5.0394, -3.3748, -4.7931, -2.9769, -2.1167, -1.3749, -5.6391,
        -5.5539, -3.4339, -3.2870, -2.8442], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3062, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.1375,  -6.5781,  -6.8029,  -5.4465,  -5.2452, -16.5637,  -2.5173,
         -3.9003,  -6.5757,  -5.6455,  -3.0631,  -3.1703,  -3.4259,  -1.6873,
         -5.4540,  -6.5024,  -3.5591,  -3.3515,  -2.7495,  -2.2027],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9289, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2724,  -4.1166,  -3.6437,  -5.9178,  -5.8151,  -3.2029,  -9.6754,
         -3.6181,  -2.4836,  -4.0609,  -7.6466,  -2.6013,  -2.9637,  -4.0452,
        -13.6567,  -6.3045,  -7.7185,  -7.7736,  -6.3752,  -6.2932],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5592, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-23.1464,  -6.6074,  -6.9646,  -7.0055,  -7.6226,  -4.5413,  -8.1279,
         -3.3363,  -5.7137,  -4.5557,  -1.5564,  -4.9393,  -7.1726,  -3.7904,
         -2.4276,  -5.7122, -26.2587, -13.0077,  -8.7859,  -4.5826],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.7927, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3401, -5.1373, -7.7579, -2.2964, -4.3561, -6.5707, -7.0243, -4.1887,
        -4.5478, -3.6290, -1.5336, -4.9122, -5.7847, -2.4512, -3.3127, -3.5619,
        -4.4474, -2.4945, -5.2119, -5.2195], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5389, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0348, -2.1058, -1.8797, -6.5166, -6.1267, -3.7253, -8.6947, -2.8264,
        -2.4360, -5.0765, -6.8898, -4.8080, -3.0814, -2.7145, -3.2000, -3.8948,
        -1.5889, -6.7721, -5.4197, -3.3452], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2568, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6267, -2.4000, -7.2294, -3.3238, -3.3439, -5.1290, -6.0302, -5.3679,
        -3.2134, -2.4048, -3.1228, -3.5617, -3.6624, -6.2994, -5.0228, -3.2220,
        -3.3520, -2.9144, -4.9181, -3.8824], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1514, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2517, -6.3489, -3.4438, -6.2485, -3.4969, -4.3396, -6.5355, -3.1162,
        -5.0844, -3.3969, -2.5907, -5.2817, -6.9800, -3.9114, -9.6340, -2.8678,
        -4.2456, -2.7610, -6.0303, -6.2404], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7903, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6271, -7.2709, -2.3548, -1.4938, -3.1534, -6.4889, -2.6752, -3.8070,
        -2.5770, -5.1430, -3.5370, -6.6153, -5.2488, -3.3624, -3.5447, -2.2632,
        -1.9158, -2.3186, -6.4908, -2.5734], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7731, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5943, -2.3637, -8.0044, -3.7830, -5.5035, -4.5939, -3.8453, -3.1732,
        -6.3833, -6.8678, -3.4964, -3.8094, -3.0144, -2.1231, -5.9011, -6.0534,
        -2.5064, -8.1935, -2.5524, -2.8881], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5325, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.3327,  -6.5829,  -3.8674, -19.1504,  -4.6354, -12.3772,  -5.2479,
         -6.1110,  -3.4140,  -5.4158,  -2.8434,  -3.3195,  -3.5000,  -7.2438,
         -3.8041,  -7.9579,  -4.1829,  -3.8611,  -3.5273,  -5.9024],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9139, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5616, -6.1312, -3.8038, -2.5459, -2.9205, -1.3581, -4.2969, -6.2540,
        -2.2411, -4.4812, -3.0266, -3.2745, -3.5741, -6.3120, -6.3112, -3.9392,
        -7.5944, -2.4685, -1.3151, -4.8336], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1622, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.6981,  -5.0325,  -8.3724,  -4.8824,  -4.3450,  -3.3805, -12.8520,
         -5.2850,  -7.2934,  -5.2129,  -7.4238,  -5.5015,  -6.5744,  -3.4810,
         -4.5294,  -5.1130,  -7.0969,  -4.7402,  -3.9114,  -4.2095],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6968, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.6578,  -6.2571,  -3.4429,  -3.3276,  -3.6408,  -1.9500,  -3.0556,
         -7.1810,  -2.2920,  -5.6906,  -4.9563,  -3.6169,  -5.9264,  -7.3018,
         -3.8455, -16.5075,  -3.0068,  -4.0378,  -5.3199,  -6.3445],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1680, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.6317,  -4.5679,  -5.1606,  -4.0167,  -5.5416,  -6.9030,  -3.6781,
         -5.7100,  -4.1874, -10.9534,  -5.8859,  -7.7637,  -6.4151,  -7.1846,
         -6.4508,  -8.4383,  -3.3616,  -6.0194,  -3.4194,  -2.6490],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5969, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4898,  -5.9626,  -5.7088,  -3.3186,  -6.6452,  -3.0895,  -2.7401,
         -3.8895,  -6.1172,  -6.3302,  -3.6461,  -5.3035,  -2.5785,  -1.3005,
         -6.0032,  -5.4145,  -2.5892,  -6.3011,  -3.3316, -21.8803],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2320, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.2515,  -8.5736,  -6.9793,  -4.8937,  -7.4176,  -2.2970,  -5.9566,
         -2.9196,  -3.4522,  -6.9740,  -6.6301,  -2.7185,  -4.1546,  -2.8047,
        -20.9043,  -7.7673,  -6.4149,  -7.1706,  -4.8858,  -8.0082],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4087, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.0847,  -7.1943,  -4.1831,  -2.4220,  -7.1150,  -6.4148,  -3.6461,
        -11.4642,  -3.4485,  -2.4653,  -6.7001,  -6.7571,  -3.0636, -12.1038,
         -3.2463,  -3.7661,  -3.6984,  -6.3719,  -3.6848,  -5.3252],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3078, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1951, -6.7062, -2.6700, -6.5648, -2.6290, -2.8326, -2.3348, -7.3157,
        -2.6888, -5.8909, -3.7234, -6.6344, -4.3202, -6.2468, -6.7452, -4.3645,
        -7.3764, -4.6960, -2.8242, -9.8505], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9305, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9376, -8.8511, -4.4270, -7.7673, -6.4457, -2.5461, -1.7463, -6.0383,
        -6.1346, -4.7937, -4.7867, -3.6740, -3.6320, -4.1159, -4.9728, -5.6322,
        -2.7512, -2.6967, -2.2896, -1.6125], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5426, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.2598,  -3.7236,  -7.1077,  -7.2470,  -3.5435,  -2.7286,  -4.2552,
        -31.0772,  -7.3761,  -8.2476,  -7.4158,  -3.6811,  -7.2033,  -2.6076,
         -4.9513,  -2.8795,  -3.1150,  -6.9452,  -6.9346,  -2.4800],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3890, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4553, -4.5245, -7.3547, -6.2064, -5.5795, -7.8448, -2.5350, -4.6406,
        -6.8104, -6.5057, -4.4027, -4.0003, -3.3534, -3.0953, -5.8529, -6.4045,
        -3.3920, -3.6595, -2.7108, -2.0535], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7691, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3280, -17.4822,  -3.7660, -19.1423,  -8.9985, -10.9931,  -7.9451,
         -4.3667,  -8.3659,  -2.8835,  -7.0451,  -3.5998,  -1.9778,  -4.1766,
         -6.4515,  -2.3348,  -3.8249,  -3.3904,  -2.3367,  -3.9265],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3168, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0742, -4.3191, -1.0725, -6.2936, -5.3752, -2.6975, -4.8614, -4.3209,
        -5.3332, -3.1368, -4.0708, -5.5847, -3.4101, -1.6771, -4.8419, -4.9736,
        -4.5725, -6.0263, -6.1212, -3.0516], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2407, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3322, -5.3638, -2.3865, -3.0436, -4.4053, -3.1788, -3.4649, -6.8188,
        -6.7272, -3.8512, -3.5734, -2.2830, -1.7008, -3.9178, -6.5889, -2.8532,
        -2.9245, -5.6360, -2.9374, -5.6120], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1800, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8022, -6.6451, -2.9960, -4.0862, -2.6556, -2.1391, -4.2253, -6.3762,
        -3.2386, -8.2601, -2.9249, -2.4821, -4.7919, -6.6981, -3.1088, -6.5666,
        -3.7335, -4.0346, -2.8706, -6.7398], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3688, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-26.4761,  -6.2920,  -7.8716,  -3.9004,  -6.9132,  -2.7580,  -6.2727,
         -3.4692,  -5.8205,  -2.0728,  -6.2898,  -5.9885,  -1.6697,  -6.0800,
         -5.4199,  -3.5098,  -3.7497,  -7.6707,  -5.4654,  -8.4074],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3049, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.8104,  -4.1794,  -2.2131,  -1.6892,  -3.0897,  -6.6697,  -3.7518,
         -3.8664,  -4.7692,  -1.6225, -10.4312,  -6.6058,  -6.2712,  -7.2680,
         -1.9033,  -8.5837,  -5.8851,  -6.3054,  -6.6749,  -5.1183],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9854, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.4460,  -7.3362,  -6.1001,  -6.6741,  -2.2668,  -8.4757,  -2.9391,
         -3.6506,  -4.9910,  -2.7358,  -5.9014,  -7.1841,  -3.5733, -12.2980,
         -4.6140,  -1.0348,  -5.5001,  -5.4285,  -2.3231,  -4.2872],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1880, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.3753,  -2.7217,  -6.8274,  -2.7728,  -5.4987,  -2.7189,  -7.9042,
         -4.2461,  -7.2380,  -4.4379,  -6.5497,  -4.4403,  -2.7866,  -7.1521,
         -6.4176,  -2.9278,  -6.2979,  -3.3154, -23.5573,  -5.3795],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7283, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.9516,  -9.0478,  -2.6285,  -2.8735,  -5.6110,  -6.8850,  -3.4345,
         -5.9094,  -5.0912, -33.7512,  -4.7421,  -7.4995,  -5.9427,  -5.7409,
         -6.6824,  -2.9778,  -4.8452,  -3.2078,  -3.1776,  -2.6445],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2322, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.5453,  -6.2896,  -2.8248,  -7.7354,  -4.5497, -28.9351,  -7.8087,
         -8.8312,  -6.8230,  -3.1023,  -7.7938,  -2.6237,  -7.6218,  -4.8273,
         -2.9183,  -2.2071,  -6.1081,  -6.2877,  -3.6195,  -2.2149],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4334, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2113, -2.9003, -2.1949, -3.5462, -5.5481, -5.3553, -2.7792, -4.3768,
        -2.3270, -2.1981, -4.6014, -5.5818, -2.2511, -6.2228, -5.0483, -8.5596,
        -7.0486, -5.9353, -7.4360, -6.0470], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7085, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3614, -4.8104, -6.1765, -8.1433, -6.4147, -8.5439, -3.4298, -9.0517,
        -5.4556, -3.6525, -6.9140, -6.6041, -3.4834, -4.7748, -5.4063, -2.6540,
        -3.2163, -5.0169, -6.6041, -6.9917], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.7353, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.0316,  -5.6705,  -5.7667,  -2.4356,  -3.5530,  -3.0962,  -2.3598,
         -4.6899,  -6.4321,  -3.5256,  -6.6530,  -3.5683,  -3.1924,  -6.4964,
         -6.6884,  -3.2840,  -6.3809,  -3.3410, -34.3802,  -6.3627],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9954, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9927,  -1.3561,  -4.3700,  -7.2864,  -3.0682, -11.1673,  -4.1082,
         -3.9817,  -2.6011,  -5.6400,  -5.2649,  -2.2309,  -6.9605,  -2.9988,
         -3.6521,  -3.7351,  -6.6895,  -5.4973,  -3.1394,  -2.2056],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4473, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4058, -7.4902, -6.1969, -3.8879, -5.2019, -3.0199, -2.1292, -4.4187,
        -6.1801, -3.8486, -6.2681, -2.4426, -1.8357, -3.0073, -6.5971, -5.6709,
        -3.3210, -3.8144, -2.9632, -1.0974], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1899, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.7662,  -5.0733,  -7.2651,  -3.7919, -15.1092,  -4.0733,  -2.6769,
         -7.0477,  -6.5163,  -2.7263,  -6.8020,  -2.7517,  -1.4086,  -5.3162,
         -5.2799,  -4.1644,  -3.5186,  -2.9925,  -2.2829,  -6.2907],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0427, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9605,  -3.6926,  -7.1307,  -6.6987,  -2.6904, -13.6848,  -3.4331,
         -2.1880,  -4.4383,  -5.7117,  -4.8267,  -6.8642,  -4.1497,  -9.6127,
         -6.2379,  -7.6663,  -5.9905,  -8.0028,  -7.8901,  -7.6363],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0753, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2851, -2.2392, -4.9364, -3.2714, -2.5642, -3.7210, -6.4267, -5.3872,
        -3.0440, -4.6797, -2.9899, -2.6755, -5.8809, -6.1359, -2.2414, -8.4652,
        -4.4215, -6.4253, -7.9553, -6.7300], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7738, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.8019,  -6.2982,  -3.3043,  -2.1885,  -5.2518, -15.6541,  -8.6481,
         -5.3751,  -8.4749,  -3.6932,  -7.9942,  -2.0894,  -6.7237,  -3.6052,
         -3.4544,  -7.3964,  -6.3348,  -2.2667,  -5.2046,  -2.5014],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4130, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6166, -3.1339, -2.6914, -5.5777, -5.9366, -1.6518, -5.9950, -2.5252,
        -2.4837, -5.6421, -6.6800, -1.5052, -2.9814, -2.7959, -2.5461, -2.1912,
        -6.2053, -5.8418, -3.3812, -4.4919], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9437, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.0621,  -1.7675,  -7.2427,  -6.2612,  -2.2274,  -3.3063,  -2.4954,
         -4.2251,  -3.5126,  -5.7874,  -5.4963,  -2.8990, -20.6148,  -1.9277,
         -3.0605,  -3.2314,  -7.0361,  -2.6098,  -7.8824,  -2.9171],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8781, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1713,  -3.6810,  -4.3898,  -4.1964,  -1.2638,  -6.7262,  -5.1270,
         -1.8405,  -7.6777,  -2.4813,  -1.8395,  -3.9701,  -6.5552,  -2.7892,
        -14.1356,  -2.5110,  -2.2993,  -2.4166,  -6.0600,  -6.2750],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4203, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3028, -3.8996, -3.3685, -6.8527, -6.5144, -3.1843, -4.3934, -3.7552,
        -2.3181, -5.0068, -5.8337, -2.8477, -4.9246, -3.1176, -2.3581, -5.2537,
        -6.5908, -3.2024, -5.3099, -3.9482], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4491, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.9618,  -2.5804,  -6.6025,  -6.3461,  -2.0506,  -7.7953,  -3.1304,
         -3.1665,  -2.8090,  -6.8238,  -3.2409,  -2.6862,  -4.2213, -35.8855,
        -14.2020,  -7.4151,  -6.6898,  -7.7824,  -5.5488,  -4.4005],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.7669, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.6450,  -2.2902,  -3.8758, -11.5564,  -6.1148,  -7.7276,  -7.0374,
         -6.4393,  -5.2199,  -5.9619,  -2.9150,  -5.1324,  -2.2117,  -2.2281,
         -5.5929,  -7.2149,  -2.0758,  -6.8233,  -4.0722,  -3.3189],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0227, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-14.6925,  -7.7682,  -4.5267,  -7.4490,  -4.9961,  -6.9203,  -2.5943,
         -4.9911,  -4.7807,  -3.1122,  -4.4645,  -6.7210,  -3.1485,  -2.8905,
         -4.1885,  -3.2246,  -6.0413,  -6.7704,  -6.5543,  -3.6681],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4751, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.1884,  -6.8399,  -3.5266, -19.0868,  -3.8346,  -3.5674,  -3.2860,
         -5.8883,  -2.1108,  -6.9704,  -2.5610,  -2.8515,  -2.7862,  -5.8388,
         -5.4459,  -2.8053,  -3.3036,  -3.3843,  -3.3699,  -2.4706],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8058, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8249, -1.6982, -4.0980, -6.1082, -3.3284, -1.9417, -2.5809, -6.5202,
        -0.8015, -7.1779, -5.5574, -2.6160, -6.4311, -4.9472, -4.9585, -2.1095,
        -4.0613, -5.6373, -2.2436, -7.1838], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1413, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9263, -3.4815, -2.1557, -4.5803, -4.2987, -5.9005, -2.4698, -9.8703,
        -2.7133, -2.9663, -3.2755, -6.8280, -2.5049, -5.4705, -3.1779, -2.7986,
        -4.1534, -5.7775, -5.5479, -2.8401], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1869, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6406, -1.9187, -5.5792, -2.4334, -3.1212, -3.8075, -6.3729, -4.9846,
        -2.6365, -3.3591, -2.5118, -2.0774, -4.8494, -6.0332, -2.3377, -5.2055,
        -2.1386, -2.0285, -4.2825, -6.6181], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8468, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.8348, -4.4890, -7.1152, -3.0663, -8.3540, -3.9307, -2.0227, -4.5828,
        -6.4308, -2.5622, -3.4617, -2.1362, -1.8946, -2.9488, -6.0696, -3.5306,
        -2.5546, -2.8756, -1.5849, -3.9558], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0700, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9309, -3.8510, -3.3795, -5.1642, -7.2489, -6.6382, -4.3994, -6.2141,
        -2.8347, -5.0477, -2.9034, -6.5467, -4.8866, -2.2624, -6.7670, -2.2026,
        -1.8272, -4.9464, -6.4519, -2.4395], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5471, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1713,  -3.6810,  -4.3898,  -4.1964,  -1.2638,  -6.7262,  -5.1270,
         -1.8405,  -7.6777,  -2.4813,  -1.8395,  -3.9701,  -6.5552,  -2.7892,
        -14.1356,  -2.5110,  -2.2993,  -2.4166,  -6.0600,  -6.2750],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4203, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.8184,  -6.1170,  -5.3320,  -2.4773,  -3.3918,  -3.6500,  -2.3568,
         -3.1719,  -6.2399,  -6.4200,  -3.2667, -15.5098,  -2.9298,  -1.4596,
         -4.0796,  -5.9315,  -4.9279,  -4.2424,  -5.4865,  -1.7918],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5300, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7064, -3.8958, -3.0249, -3.8769, -1.8112, -4.4883, -5.8402, -2.5756,
        -4.9580, -2.6460, -3.2292, -3.5856, -5.9163, -6.2722, -3.3626, -6.9914,
        -2.4397, -2.4599, -3.0716, -7.3258], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0239, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5718,  -6.7541,  -6.7835,  -1.8472,  -6.0959,  -5.0781, -30.3468,
         -5.9478,  -5.5454,  -8.2022,  -6.3614,  -6.8757,  -8.0772,  -7.0310,
         -3.2552,  -4.1980,  -4.1935,  -2.0446,  -2.5905,  -6.9537],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5377, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3231, -2.3895, -5.4443, -4.6925, -3.1978, -5.8426, -2.7158, -4.7957,
        -4.4946, -6.0384, -3.3701, -7.3931, -4.0323, -3.3356, -5.3634, -6.5587,
        -3.7525, -8.1060, -3.3893, -3.1916], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5213, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2697, -3.2144, -4.6014, -3.3623, -5.7990, -5.8358, -2.2144, -6.4182,
        -2.7825, -2.1136, -2.6624, -6.2718, -5.5138, -2.3825, -4.1008, -4.8582,
        -3.3843, -2.1337, -6.0664, -5.4781], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1232, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2563, -6.7753, -2.6436, -4.9076, -4.3319, -4.3468, -3.4456, -6.5786,
        -5.7303, -2.1771, -3.2187, -2.8931, -3.2726, -1.9813, -6.7648, -4.7156,
        -3.3171, -3.5492, -2.5541, -1.4450], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9952, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.4446,  -6.9068,  -2.2272, -19.8272,  -4.0547,  -2.9383,  -6.8125,
         -5.1397,  -7.5442,  -6.8964,  -7.6329, -17.5954,  -4.8980,  -8.5507,
         -6.0198,  -5.7200,  -1.4620,  -4.0328,  -2.5631,  -3.1858],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4726, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.7696,  -3.4463,  -7.2100,  -2.9467,  -3.8366,  -3.6175, -13.7484,
         -5.4463,  -8.6724,  -3.0975,  -6.2443,  -2.5423,  -5.8848,  -4.5236,
         -3.1213,  -2.7867,  -5.1833,  -5.6600,  -2.4592,  -3.7846],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7991, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2193, -2.8416, -5.4713, -6.5553, -3.0593, -5.5381, -3.6201, -1.5117,
        -4.5694, -7.1111, -2.0050, -3.5542, -3.1802, -2.7212, -2.8217, -6.5664,
        -5.9907, -3.4894, -2.4026, -2.9415], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9585, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.1985,  -2.7255,  -3.1692,  -2.6776,  -1.6878,  -5.0366,  -6.7422,
         -1.4746,  -2.4768,  -2.3451,  -3.1280,  -6.7422,  -6.9014,  -2.4730,
        -11.8567,  -5.3673,  -5.9703,  -2.6315,  -6.1504,  -2.2031],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3479, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.6759,  -3.7078,  -7.2671,  -2.1159,  -3.3422,  -3.6076,  -2.2797,
         -4.3911,  -6.2371,  -1.8680,  -4.1172,  -2.3102, -15.3204,  -7.3356,
         -8.6939,  -8.2392,  -4.5919,  -8.0807,  -2.2849,  -6.7563],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2611, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.8112,  -6.8843,  -3.2566,  -7.3970,  -4.1127, -33.1121,  -5.6012,
         -7.2987,  -7.2263,  -6.8254,  -7.0989,  -3.8695,  -8.3623,  -2.4118,
         -5.3710,  -4.6209,  -3.4944,  -5.9751,  -6.3002,  -3.5457],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.9288, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0730, -7.3446, -2.9381, -3.9098, -4.0960, -2.6403, -3.7901, -6.7188,
        -6.1995, -3.3549, -7.4124, -3.9786, -2.8377, -3.7684, -6.6873, -1.9037,
        -5.7195, -3.3828, -3.2951, -6.2685], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6660, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3530, -5.9635, -7.8925, -7.6558, -7.0636, -3.8514, -4.4843, -4.5745,
        -6.6916, -3.1290, -6.1229, -5.0034, -3.2809, -4.2024, -3.2465, -0.9960,
        -3.0735, -7.8457, -3.9853, -8.2460], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2331, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3990, -3.2981, -6.0795, -6.6248, -3.7902, -9.6001, -4.2856, -2.5861,
        -6.2146, -5.9231, -2.5595, -4.0409, -3.2959, -1.9265, -5.0362, -6.3626,
        -4.5822, -4.9534, -3.0294, -3.2167], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5902, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.1689,  -3.1166,  -7.1554,  -6.4022,  -3.8705,  -3.0416,  -2.3856,
         -2.6959,  -3.7791,  -6.7017,  -6.3067,  -3.1575,  -3.8776,  -2.7451,
         -2.8449,  -7.5687,  -6.4773,  -3.6046,  -3.9295,  -2.7478],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6289, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.7777,  -2.8339, -14.2105,  -4.4811,  -4.0737,  -5.9704,  -5.9500,
        -12.1889,  -5.9252,  -7.1734,  -6.4781,  -4.3201,  -8.1985,  -1.9175,
         -5.3778,  -4.4713,  -3.2973,  -1.4409,  -5.8244,  -5.8071],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6859, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4977, -5.8818, -3.8250, -4.1448, -6.4804, -2.1660, -5.4293, -5.6095,
        -7.7625, -2.9987, -3.7805, -2.3119, -1.9121, -5.1465, -6.1343, -1.9652,
        -3.1153, -2.5261, -4.7211, -4.6184], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2514, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.4498,  -4.2443,  -7.0925,  -6.3673,  -3.6443,  -4.5798,  -4.5494,
        -15.5149,  -5.2803,  -8.2420,  -7.0285,  -8.1722,  -2.6472,  -9.2799,
         -2.8475,  -6.5840,  -5.9368,  -3.4718,  -2.2597,  -6.1308],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8662, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.9678, -3.3253, -1.5277, -5.4359, -5.3452, -2.3787, -2.5076, -3.8484,
        -1.7035, -3.8837, -6.9544, -3.1313, -5.2653, -4.1293, -4.1738, -1.3740,
        -6.1403, -5.4945, -2.9769, -7.7165], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2640, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9017, -3.4072, -2.1226, -3.2291, -6.1637, -5.4521, -2.1758, -2.3227,
        -2.7627, -3.8727, -2.9793, -6.4350, -5.5519, -1.9829, -8.4046, -2.9283,
        -2.7698, -5.0571, -6.5231, -2.9441], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0493, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.9942,  -6.7011,  -3.0288,  -4.9511,  -4.1234,  -3.0836,  -6.7834,
         -6.5629,  -2.6777,  -4.3439,  -3.8454, -16.7430,  -5.9286,  -7.3227,
         -5.0082,  -6.0242,  -2.4033,  -7.3625,  -5.6503,  -4.3223],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6430, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0607, -4.9729, -3.3054, -3.5473, -5.8270, -6.5826, -6.3597, -2.7775,
        -2.5099, -2.8786, -1.6166, -4.4081, -7.0301, -1.9435, -5.5830, -2.7976,
        -2.2574, -2.1788, -5.8211, -5.0207], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0239, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8391, -7.3910, -6.2824, -4.1394, -4.1178, -3.4918, -3.1888, -6.8660,
        -6.2878, -2.1718, -8.3607, -5.0076, -2.7567, -4.8200, -5.8302, -2.3062,
        -5.4217, -3.3018, -3.0128, -5.2406], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6417, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3612, -4.9362, -4.4008, -2.9758, -6.4985, -6.1532, -3.2504, -8.0604,
        -3.5615, -2.1558, -5.7401, -6.7680, -4.3972, -4.9582, -4.8747, -1.0434,
        -1.7991, -7.4188, -6.9723, -4.2675], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7297, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3392, -2.3877, -2.9221, -4.6803, -6.7384, -2.1991, -5.8312, -2.1804,
        -2.3192, -6.6861, -6.8203, -1.7740, -2.8717, -2.6046, -2.7694, -2.2852,
        -6.4958, -4.9388, -2.2832, -3.3065], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7217, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.9077,  -3.6258, -15.8569,  -8.0438,  -7.1884,  -8.0132,  -5.3403,
         -7.4829,  -2.8948,  -7.3839,  -3.9445,  -3.0102,  -5.6422,  -6.7273,
         -2.7411,  -5.1263,  -4.5149, -22.0590,  -6.0588,  -6.5738],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.7068, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-13.3752,  -3.0943,  -3.0435,  -5.5367,  -5.1616,  -2.8580,  -3.7645,
         -2.1374,  -2.6014,  -5.0168,  -6.0024,  -2.2554,  -4.1250,  -2.4316,
         -2.7593,  -4.2781,  -6.0925,  -2.0580,  -4.9561,  -2.8955],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2222, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6993, -2.2427, -4.8904, -3.7535, -9.8490, -5.7415, -6.7985, -6.1732,
        -2.6253, -1.8098, -4.8754, -6.2546, -4.2924, -5.9606, -6.6443, -3.0617,
        -3.5198, -3.6365, -7.0575, -1.4692], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8678, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3446, -2.1435, -2.7910, -6.5879, -5.4865, -2.7958, -3.5891, -2.2515,
        -1.7849, -3.4739, -6.6074, -2.2615, -6.1926, -3.0571, -3.2868, -3.9566,
        -5.8158, -5.0695, -2.8254, -9.0910], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1206, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4747, -6.3357, -4.2870, -3.1200, -2.0469, -6.3493, -6.3535, -2.5178,
        -5.5578, -2.1883, -2.8241, -2.6526, -7.0374, -1.8884, -7.3191, -2.4750,
        -3.3809, -2.6735, -5.8163, -4.7855], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1042, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0981, -5.7893, -6.5465, -4.6048, -4.0387, -3.2137, -2.1103, -5.9156,
        -6.3277, -3.2010, -5.1804, -4.8307, -2.4543, -5.8850, -6.7858, -2.6500,
        -7.4156, -2.9386, -1.4767, -4.3703], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4416, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7361, -4.1912, -4.0206, -3.6176, -5.8610, -1.4730, -5.6003, -6.6236,
        -3.9000, -5.5273, -7.4681, -3.7292, -4.4871, -4.3084, -2.7269, -7.0381,
        -6.2386, -3.9569, -4.6002, -2.9238], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6014, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3548, -2.9239, -4.9449, -7.3861, -3.6924, -5.6851, -3.2149, -2.5407,
        -3.4824, -6.1907, -5.9467, -3.5593, -7.1492, -2.0409, -3.1046, -4.3590,
        -6.4646, -2.0501, -2.6022, -2.5262], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2109, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.3564,  -3.1997, -10.5201,  -7.9422,  -8.4017,  -7.5179,  -4.4924,
         -7.0782,  -2.1662,  -3.9641,  -4.3045,  -3.6108,  -5.5965,  -6.6622,
         -3.7123,  -7.1832,  -5.0541, -13.8269,  -7.5025,  -7.0740],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2083, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.4063,  -5.6533,  -7.0845,  -3.4328,  -4.7500,  -1.8821,  -2.3554,
         -3.0819,  -5.3100,  -3.1517,  -2.7977,  -6.4297, -13.4705,  -4.5569,
         -3.9746,  -6.6657,  -3.3355, -11.7436,  -3.0437,  -2.0144],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9070, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.4510,  -7.3206,  -4.3029, -28.7531,  -4.7235,  -7.4953,  -6.3583,
         -7.7732,  -9.2710,  -7.4496,  -2.1869,  -5.2730,  -4.0915,  -1.5834,
         -1.2715,  -6.2164,  -2.6321,  -4.4477,  -4.8297,  -4.4264],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2428, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1405,  -7.1719,  -2.8178, -10.5127,  -3.8194, -10.2103, -11.2318,
         -5.2506,  -5.4024,  -4.5587, -24.5185,  -7.3253,  -7.0597,  -7.9089,
         -5.9275,  -6.9055,  -3.6511,  -6.9314,  -5.2336,  -4.1841],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.1881, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3127, -2.5396, -4.3332, -1.4705, -6.8011, -5.3739, -2.5807, -6.5495,
        -3.1885, -2.7751, -5.5793, -5.9284, -2.5366, -3.0384, -4.6962, -2.5149,
        -6.8317, -6.6471, -2.6604, -2.7357], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1047, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.2289,  -3.2594,  -1.7640,  -3.9459,  -5.7903,  -6.2150,  -3.7786,
         -2.6542,  -2.3294,  -1.7370,  -6.1489,  -5.9725,  -1.9047,  -6.3997,
         -4.3914,  -5.4184,  -4.9410,  -5.8847,  -5.2996,  -2.2714],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5667, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7867, -3.3561, -3.3279, -2.4881, -1.3184, -5.3544, -6.0900, -2.3788,
        -6.0829, -3.4805, -2.5768, -3.3587, -6.0619, -2.9924, -3.9202, -5.2005,
        -3.9537, -4.2327, -5.6900, -5.5560], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1603, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6504, -2.3847, -7.9663, -3.3805, -5.7751, -4.5711, -3.9421, -3.1231,
        -6.4453, -6.8102, -3.1273, -3.8655, -2.9718, -2.1590, -5.9876, -5.9273,
        -2.1119, -8.2495, -2.4979, -2.9881], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4967, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8740, -2.2778, -4.1170, -3.9412, -3.3662, -3.6513, -6.5135, -6.4533,
        -3.6544, -3.4471, -1.7039, -1.5159, -4.2501, -5.8081, -2.8021, -3.8478,
        -3.6990, -3.1435, -4.1795, -5.8171], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0031, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9681, -3.5577, -6.7345, -2.4828, -2.7173, -5.4843, -5.1067, -3.2856,
        -5.0196, -2.7289, -3.1289, -5.0398, -6.2210, -3.0392, -3.0029, -2.3921,
        -2.5957, -5.6362, -6.2106, -2.3612], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0357, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7301, -8.6000, -2.1722, -3.3040, -3.3310, -6.0808, -2.1811, -4.4972,
        -3.6557, -2.4984, -2.0303, -5.7951, -5.9829, -2.9697, -4.2689, -2.9983,
        -3.7970, -1.9241, -6.7704, -5.2524], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0920, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.2643,  -4.4130,  -2.9820,  -5.4435,  -6.6364,  -3.6125,  -2.3599,
         -3.2778, -11.3729,  -4.0346,  -9.2649,  -5.4243,  -6.8883,  -2.2154,
         -5.9579,  -3.7258,  -3.2891,  -6.6740,  -6.4312,  -2.2168],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0742, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1924,  -2.2873,  -3.1243,  -7.2783,  -5.5032,  -3.9467,  -4.7021,
         -2.6509,  -1.9276,  -2.4756,  -7.3032,  -2.3728,  -1.7327,  -5.7690,
        -23.1318, -16.5721,  -8.5201,  -3.8746,  -6.8010,  -2.1576],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7162, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.5148,  -6.8430,  -3.7560,  -6.8293,  -2.6282,  -5.3018,  -2.9736,
         -3.5382,  -6.4176,  -6.1425,  -3.3862,  -3.5216,  -4.5114, -18.5955,
         -6.1134,  -8.3648,  -6.7871,  -7.5720,  -4.8598,  -7.3269],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1492, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3094, -5.9052, -3.5861, -9.8693, -5.2854, -2.0667, -7.1801, -6.5392,
        -3.7881, -3.6818, -4.8269, -9.5423, -6.5913, -6.7947, -8.0066, -4.3906,
        -8.2753, -2.4700, -4.4452, -3.6298], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.4592, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.3815,  -2.0293,  -5.4200,  -3.0366,  -1.6069,  -4.4165,  -6.3621,
         -1.9632,  -4.1495,  -2.5825,  -2.9163,  -3.4359,  -4.6838,  -4.5516,
         -5.8593,  -6.0590,  -2.4271,  -2.1305, -11.4082,  -4.3471],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3383, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-14.7884,  -6.4060,  -7.9437,  -7.0474,  -4.7760,  -8.0495,  -1.7273,
         -4.8377,  -4.5934,  -2.0941,  -5.2668,  -6.3137,  -3.3861,  -4.9471,
         -3.9254,  -3.0053,  -3.6905,  -6.5558,  -6.8581,  -3.9498],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5081, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3502, -8.0694, -4.4231, -6.2959, -5.4416, -2.9861, -4.0509, -3.3727,
        -1.2392, -4.9092, -5.5773, -2.2399, -4.6768, -2.8894, -1.4837, -4.4429,
        -6.9632, -3.1800, -3.8402, -2.8413], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1137, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9680, -4.5368, -2.7337, -2.7917, -5.5367, -5.3162, -5.2267, -2.4528,
        -5.3307, -3.2358, -2.3235, -5.3655, -6.9180, -2.8888, -4.1392, -4.6994,
        -2.7558, -3.7961, -5.7547, -6.1448], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2457, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2336, -3.4726, -2.6604, -2.5214, -6.0780, -5.1465, -3.0045, -4.4649,
        -2.9517, -2.1006, -3.0502, -6.3191, -5.4337, -2.9867, -4.8662, -2.4717,
        -1.7300, -1.8062, -6.4353, -2.5655], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7650, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.8341,  -2.2787,  -4.7394,  -3.7434,  -2.8673,  -1.9539,  -5.9488,
         -4.9318,  -2.7992,  -3.8554,  -2.4888,  -0.8431,  -3.3611,  -6.1450,
         -2.4188, -15.3488,  -2.7597,  -2.8477,  -5.5520,  -6.7137],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3715, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2105, -3.9184, -5.0350, -3.1337, -2.0609, -2.8495, -6.1072, -2.4701,
        -6.0047, -3.0437, -4.2696, -2.3047, -6.2571, -5.2160, -3.7693, -4.2469,
        -2.8368, -3.5715, -2.2958, -5.5574], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0579, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9895,  -8.3343,  -7.0517,  -3.6589,  -3.6228,  -5.0537,  -3.1910,
         -7.5230,  -4.8897,  -7.2866,  -6.0685, -10.8571,  -5.7238,  -9.5774,
         -5.4321,  -7.7169,  -5.9902,  -7.2264,  -4.1614,  -9.1821],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2769, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.1015,  -6.1929,  -9.9331,  -5.1438, -10.7964,  -6.1024, -31.7572,
         -6.7939,  -7.3462,  -7.8930,  -6.5280,  -3.9978,  -7.3577,  -2.4175,
         -4.7936,  -6.0747,  -5.4690,  -2.7397,  -3.9996,  -6.3232],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.3381, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.1297,  -5.7780,  -2.8560,  -6.5225,  -2.8901,  -2.1447,  -3.3566,
         -7.0799,  -2.5545,  -1.7949,  -6.0314,  -1.7589,  -4.4820,  -6.3989,
         -6.6443,  -3.5657, -14.5342,  -2.5302,  -4.3915,  -6.0242],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8734, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.8989,  -8.5180,  -3.6336,  -6.6787,  -5.4036,  -1.2497,  -2.6148,
         -5.6182,  -5.7008,  -3.1030,  -3.3662,  -3.7094,  -9.6213,  -6.4253,
        -11.0517,  -5.3671,  -5.3094,  -5.3704,  -4.1880,  -9.2688],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6048, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.9117,  -3.3057,  -3.3714,  -5.9225,  -6.6163,  -2.5392,  -4.8237,
         -3.8232, -26.2963,  -7.0939,  -9.8929,  -7.5827,  -4.3822,  -7.8468,
         -2.0520, -14.5196,  -3.4781,  -2.6627,  -6.0945,  -6.4008],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.6808, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7702, -5.3381, -3.8890, -3.5464, -7.2407, -7.5152, -5.8171, -7.3160,
        -4.9292, -6.7757, -5.0752, -3.6032, -5.0624, -6.9229, -7.4447, -3.7919,
        -7.0575, -5.2256, -7.7677, -4.9740], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.8031, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.7438,  -2.7338,  -3.8520,  -5.3524, -14.2152,  -6.3165,  -7.0172,
         -7.8528,  -4.8777,  -7.7362,  -2.9429,  -8.3766,  -3.5785,  -5.1429,
         -3.6653,  -4.4316,  -6.2038,  -2.7133,  -5.3955,  -3.1140],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6131, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7695, -3.3022, -3.6370, -5.7629, -2.8345, -5.6574, -9.7521, -4.6540,
        -9.1361, -3.5400, -3.3375, -4.4618, -2.7595, -4.8544, -6.7618, -2.2609,
        -4.7201, -6.3891, -3.1482, -3.3115], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7025, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2375, -1.5430, -4.5446, -6.6402, -3.4810, -4.8248, -4.7815, -6.8996,
        -7.9041, -7.0507, -6.8821, -7.9931, -4.4305, -7.4122, -4.6612, -5.6684,
        -3.9841, -2.9047, -4.8179, -6.7279], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.3694, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.4644,  -6.7914,  -2.4889, -16.1359,  -2.1465,  -1.2184,  -6.2551,
         -6.0808,  -3.1245,  -6.8127,  -2.5114,  -1.2767,  -4.8870,  -5.6476,
         -2.0468,  -3.6575,  -3.3448,  -2.2474,  -2.4719,  -6.7141],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6662, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.0659,  -3.7048,  -4.1784,  -3.4790,  -4.1821,  -6.0744,  -6.9257,
         -4.2339, -17.0966,  -4.4397,  -4.0125,  -3.3345,  -6.4741,  -3.3894,
         -4.9187,  -3.9776,  -3.7192,  -4.8688,  -6.8617,  -6.3181],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2128, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.6491,  -5.7836,  -6.6573,  -2.7415,  -4.5190,  -2.6494, -20.5361,
         -5.7215,  -8.7230,  -6.0901,  -7.0370,  -4.8498,  -6.5574,  -1.8508,
         -4.3039,  -3.6004,  -2.3925,  -6.3443,  -6.2772,  -2.7108],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6497, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3955,  -3.4364,  -6.7988,  -6.4899,  -3.3370,  -3.6920,  -3.6178,
         -1.1270,  -7.0584,  -6.2287,  -4.4712,  -7.7087,  -5.3393, -23.6507,
         -5.7066,  -6.7206,  -2.9742,  -7.0877,  -2.4849,  -5.4309],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7878, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0572, -2.8693, -3.8191, -1.1813, -6.5271, -5.1202, -2.3389, -6.4808,
        -2.7196, -1.9640, -5.2559, -6.7634, -2.0697, -2.6626, -2.5281, -1.3609,
        -4.2692, -6.4518, -6.3089, -3.1071], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9428, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.6799,  -3.3857,  -6.5273,  -1.9745, -13.0895,  -6.1228,  -2.2603,
        -16.6854,  -7.1885, -13.7440,  -6.9367, -14.5319,  -6.9826,  -3.4329,
         -5.7926,  -4.4306,  -3.0120,  -6.1995,  -2.3381,  -2.4524],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.7884, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.5900,  -1.5257,  -4.9522,  -3.9313,  -3.5179,  -3.1650,  -6.5800,
         -6.8089,  -3.6919,  -3.0880,  -3.6897,  -3.8545,  -7.0597,  -5.8691,
         -4.8615,  -4.8101,  -3.3447, -13.8982,  -6.9487,  -7.5496],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2368, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7966, -2.8741, -7.2426, -6.6181, -3.8694, -3.8007, -2.9777, -2.3176,
        -5.7709, -6.7336, -2.8658, -5.2582, -2.8703, -4.2531, -5.9988, -6.4316,
        -3.4205, -7.3429, -4.2034, -5.2133], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5930, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.3740,  -6.1403,  -5.4945,  -2.9769,  -7.7165,  -3.0062,  -1.7216,
         -6.8096,  -6.3188,  -3.9930,  -2.9542,  -5.3821, -17.8179,  -5.6443,
         -5.6469,  -3.5492,  -5.5424,  -5.4757,  -3.1294, -10.8277],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5761, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1406, -2.8225, -2.2728, -4.3755, -5.9510, -3.6995, -7.3042, -3.1308,
        -3.2028, -3.6075, -6.2512, -5.2841, -2.3561, -2.7559, -3.8656, -1.8890,
        -4.2033, -5.7696, -2.4651, -3.2921], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8820, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9139, -5.7630, -6.3635, -3.6302, -6.2279, -2.8946, -6.3506, -1.6164,
        -6.1369, -4.8797, -2.6776, -3.1514, -3.3416, -2.3150, -3.7684, -8.2004,
        -2.4458, -7.9123, -4.0111, -2.7372], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4169, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4767, -2.7554, -6.0994, -5.8184, -2.7698, -5.4798, -3.6846, -0.7769,
        -6.1914, -5.8681, -3.2337, -3.9434, -1.9102, -2.8367, -4.8160, -6.5653,
        -2.5203, -7.8278, -2.9971, -4.8774], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1724, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.8965,  -6.3824,  -3.2739,  -4.7045,  -2.3109,  -1.8338,  -5.2244,
         -5.7992,  -2.0781, -14.6031,  -3.0620,  -8.4104,  -8.5469,  -4.8826,
         -7.8136,  -2.8968, -18.8406,  -5.6446,  -7.3145,  -6.6277],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3073, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.8516,  -1.8190,  -1.2954,  -5.9271,  -5.7507,  -3.0630,  -4.1676,
         -3.3327, -14.6975,  -5.2878,  -9.6841,  -5.0857,  -6.9871,  -2.1783,
         -7.9588,  -3.3790,  -1.8192,  -5.3615,  -6.7853,  -2.4645],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0448, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3047, -5.9006, -2.4421, -8.1525, -5.4460, -1.9135, -3.4321, -7.1782,
        -3.5918, -5.8238, -4.3263, -1.9410, -5.9333, -5.6175, -2.1603, -3.6324,
        -2.5350, -2.0376, -5.6939, -5.9993], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4531, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9129,  -7.7459,  -3.8286,  -4.5554,  -6.7348,  -6.4709,  -3.1676,
         -8.2764,  -4.6041, -10.8313,  -8.1808,  -7.3531,  -5.5295,  -4.3406,
         -3.6395,  -9.3082,  -2.2427,  -3.8600,  -4.2274,  -7.9107],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7860, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8270, -6.5137, -7.0290, -5.7481, -6.9178, -7.6963, -4.0488, -6.7996,
        -3.9396, -5.1902, -6.6878, -4.3719, -7.5755, -8.8851, -4.5863, -4.9361,
        -4.8554, -8.4089, -7.8796, -5.2041], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0550, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.9865, -15.2847, -12.9182,  -8.2703,  -7.2040,  -7.5692,  -6.5540,
         -5.7920,  -2.2042,  -4.9306,  -4.3562,  -4.0110,  -1.7329,  -6.6082,
         -5.3527,  -2.1648,  -5.0321,  -3.1424,  -2.7907,  -4.4479],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7676, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.9992, -3.9861, -7.2704, -2.6823, -5.8859, -3.6828, -4.5013, -3.6335,
        -6.7484, -5.5729, -3.7227, -5.7112, -2.4321, -2.7994, -5.5638, -6.1328,
        -2.5442, -4.3959, -3.9005, -2.5690], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5867, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1818, -5.9020, -3.0048, -3.8528, -2.6224, -1.1720, -1.9437, -6.5203,
        -2.1934, -5.5717, -3.1560, -4.4377, -2.0612, -6.1740, -5.6719, -2.3130,
        -5.6995, -2.9793, -3.4132, -3.4340], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8652, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9411, -2.1792, -6.5517, -5.5491, -2.2116, -3.5914, -2.6999, -3.0815,
        -3.9305, -6.7474, -4.7494, -1.9254, -6.4566, -2.6893, -2.5678, -2.7298,
        -6.4139, -2.7171, -8.9227, -4.7798], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1718, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7215, -6.0100, -2.2510, -5.9839, -3.2686, -3.6104, -4.9754, -6.3886,
        -2.9646, -3.4788, -3.2734, -1.0275, -4.0696, -5.5776, -1.8771, -3.5060,
        -2.9973, -2.5291, -3.0722, -6.4184], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9001, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8325, -6.4547, -3.4921, -2.6075, -4.1945, -3.5397, -6.8988, -5.9649,
        -2.0277, -4.6152, -3.4673, -2.4924, -6.1051, -6.7951, -2.9922, -6.4992,
        -3.9334, -1.5073, -3.8361, -6.5902], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5423, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.5869,  -2.2690,  -2.2534,  -6.7136,  -5.9991,  -2.3205,  -9.5581,
         -3.7200,  -2.0414,  -2.0464,  -6.3754,  -2.7308,  -3.5492,  -4.5332,
        -13.3624,  -5.7438,  -7.0436,  -7.9113,  -5.6745,  -7.2928],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2863, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6218, -4.1483, -5.3662, -6.4794, -3.0578, -4.4357, -2.6382, -3.7471,
        -3.0303, -6.5649, -2.4943, -4.8092, -2.8203, -3.8935, -4.0519, -5.4549,
        -5.1040, -2.5735, -3.5612, -2.1504], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9502, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.3105, -10.2921,  -7.2460,  -4.0200,  -7.9466,  -2.3860,  -5.2974,
         -5.1669,  -3.0999,  -5.2342,  -6.6198,  -2.9883,  -3.7605,  -3.0505,
         -2.4975,  -3.4200,  -6.9765,  -3.1209,  -5.7488,  -4.1432],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0163, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9026, -6.8969, -5.7968, -3.0839, -3.5927, -2.9766, -1.7488, -4.7802,
        -6.5595, -4.2709, -6.0403, -3.8548, -3.3393, -3.2397, -6.2302, -5.6867,
        -2.3978, -5.3029, -2.4243, -3.2577], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2691, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.5513,  -3.2221,  -3.5648,  -2.4758, -21.1903,  -6.0092,  -8.2365,
         -7.7461,  -3.5011,  -6.7700,  -1.9324,  -3.9864,  -2.6385,  -1.7895,
         -6.6868,  -5.9721,  -2.7223,  -3.0720,  -2.6040,  -2.8605],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2266, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.6654,  -4.1409,  -3.0135,  -3.6213, -23.5929,  -6.4936,  -8.1008,
         -4.1753,  -8.4463,  -2.1158,  -3.7357,  -4.7257, -31.2815,  -3.7254,
         -9.7876,  -3.3792,  -6.6317,  -1.9535,  -5.6818,  -3.8429],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.2555, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6842, -8.3504, -3.2536, -6.7166, -2.0218, -4.1427, -4.5156, -4.8317,
        -2.7780, -4.5027, -6.1197, -2.0897, -9.5701, -3.9345, -2.8033, -4.6558,
        -7.4141, -2.8164, -5.2728, -4.2450], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6859, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1967, -5.5647, -4.8839, -3.2849, -3.7725, -2.5300, -3.5642, -3.7097,
        -5.5572, -5.8250, -2.4558, -4.9588, -2.9882, -3.2420, -4.0465, -6.3933,
        -5.2552, -2.6648, -5.9573, -3.3233], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1587, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1277,  -3.4066,  -3.0711,  -5.2615,  -3.8302,  -6.3277,  -6.7933,
         -3.8009, -13.0414,  -5.8896,  -6.7924,  -5.2644,  -5.2148,  -5.1164,
         -2.3039,  -2.4343,  -3.0938,  -2.6236,  -3.3676,  -6.4808],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8121, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.6513,  -5.2328,  -2.2149,  -2.1126,  -2.7234,  -1.7746,  -3.0740,
         -6.5793,  -3.6246, -17.2012,  -2.8831,  -2.6338,  -6.0116,  -6.6719,
         -2.3693,  -3.5590,  -3.0840, -21.6570,  -5.0945,  -8.0427],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6598, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4817, -1.5674, -6.9744, -4.7913, -1.9892, -5.5046, -5.5034, -1.8220,
        -5.6615, -4.5328, -9.9289, -7.2059, -5.6061, -7.2976, -5.8407, -7.8193,
        -3.8062, -7.7252, -2.9804, -3.4705], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2255, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.2427,  -2.1097,  -3.9350,  -2.4716,  -9.9674,  -5.9393, -11.2870,
         -4.8480,  -8.3063,  -2.4036,  -7.5592,  -5.3358,  -9.7610,  -6.7210,
         -7.9097,  -3.4814, -10.3883,  -3.5472,  -8.6019,  -6.9938],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4405, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.4361,  -8.5860,  -7.8138,  -2.8151,  -8.4161,  -2.6659,  -7.4107,
         -5.2482,  -3.5953,  -4.6394,  -6.7428,  -3.3248,  -5.2731,  -5.4074,
         -2.7402,  -4.8476,  -6.6343,  -6.6002,  -4.0223, -14.9401],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9580, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.2116,  -3.8119,  -2.1505,  -4.8987,  -6.3597,  -2.3710,  -7.8300,
         -4.5666,  -2.1209,  -5.4069,  -6.3968,  -2.5286,  -5.2290,  -2.5166,
         -4.3893,  -1.8344,  -6.9012,  -2.2634, -10.4164,  -5.0974],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6150, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2251, -3.3315, -6.8009, -2.1362, -6.8729, -2.7950, -1.9199, -5.7393,
        -6.6103, -2.7055, -8.0196, -2.8678, -4.8079, -0.7769, -7.1953, -5.7701,
        -2.3406, -4.4269, -3.2350, -3.6269], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2102, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.1048,  -8.9677, -22.8639,  -5.8743,  -6.6484,  -5.0203,  -2.9318,
         -1.9737,  -5.1700,  -5.7738,  -3.0932,  -3.6811,  -2.2387,  -1.9127,
         -5.0864,  -6.4601,  -2.1935,  -4.1502,  -2.9493,  -4.7696],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3432, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.7810, -11.2949,  -3.0073,  -1.7134,  -4.7032,  -5.4857,  -3.0761,
         -2.0178,  -3.5794,  -2.2296,  -6.2818,  -6.3260,  -3.4297,  -3.5042,
         -5.8390,  -9.2142,  -2.9682,  -6.1167,  -6.1906,  -4.4320],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7595, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1424, -5.8998, -2.0085, -6.4952, -3.8423, -2.9436, -3.3882, -5.9107,
        -3.1839, -4.9921, -2.3882, -2.0062, -3.9438, -6.6259, -1.7447, -5.9841,
        -1.8556, -4.7976, -1.9799, -6.1381], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1135, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3830, -3.1705, -5.2109, -3.4086, -4.2700, -2.7754, -6.6140, -4.7283,
        -2.6309, -5.6248, -2.8947, -2.2001, -6.1241, -6.7577, -3.4339, -7.3910,
        -2.7770, -2.2471, -3.6236, -7.3289], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4797, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0281, -6.9123, -6.3721, -1.8927, -5.5695, -3.6504, -4.2940, -4.6976,
        -6.1019, -6.6960, -3.7191, -5.4431, -3.8331, -1.9134, -3.3363, -6.0602,
        -5.9080, -3.6626, -4.4042, -2.3735], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4934, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.9919,  -5.0917,  -6.9842,  -2.4487,  -7.3743,  -4.1727,  -0.9616,
         -4.6530,  -5.8868,  -1.1546,  -3.3529,  -2.9400,  -2.0906,  -4.1903,
         -6.2048,  -2.7334, -10.5035,  -3.5090,  -2.4166,  -3.7135],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2187, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0904, -2.3103, -4.7705, -2.4422, -1.8892, -2.4758, -6.1652, -5.6258,
        -3.1528, -3.6020, -2.5684, -1.6569, -4.2372, -5.9233, -2.4204, -3.3815,
        -2.5785, -2.8199, -1.5198, -6.0311], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5331, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4668,  -2.9625,  -3.9221,  -5.9444,  -5.7013,  -3.7961,  -2.0412,
         -3.4435,  -3.4566,  -5.2326,  -5.9805,  -2.5823,  -4.0012,  -2.6681,
        -16.8352,  -5.2131,  -8.5130,  -6.8095,  -7.6540,  -4.0546],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1639, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5954, -4.9001, -2.4074, -8.0207, -2.7482, -2.5506, -5.4842, -6.4624,
        -1.5006, -2.1727, -2.7285, -5.0120, -2.1329, -5.6009, -5.5079, -1.6705,
        -3.3822, -3.0272, -1.4892, -3.4372], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7915, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.7112, -9.1046, -5.1484, -6.5728, -2.1304, -5.1965, -4.3711, -4.3269,
        -3.4581, -6.6241, -5.5362, -1.7181, -3.2164, -2.8576, -3.2409, -1.9203,
        -6.7825, -4.5357, -3.1575, -3.4813], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6045, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7828, -5.2759, -5.7830, -3.2826, -3.2553, -3.0236, -1.9681, -4.0768,
        -6.1846, -1.9926, -5.9661, -2.9337, -2.3692, -2.8087, -7.4500, -1.8127,
        -3.7282, -2.7754, -3.9554, -2.1359], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6280, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3180, -1.8855, -6.5775, -3.2270, -2.0858, -3.6156, -6.8719, -1.6641,
        -4.2608, -2.7709, -3.3212, -6.9579, -7.1322, -3.2814, -6.5680, -4.8950,
        -1.6869, -5.7764, -5.5133, -2.6615], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3535, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5137, -2.6297, -5.9570, -5.8732, -2.6546, -8.3961, -2.7451, -3.9289,
        -3.0277, -5.9104, -1.7823, -4.6503, -3.0386, -4.6510, -4.7877, -6.9784,
        -6.8057, -4.1821, -7.1786, -3.1444], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5418, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3774, -5.5903, -6.3307, -3.9962, -5.6618, -2.6770, -1.7294, -2.9124,
        -5.9433, -2.2868, -4.2586, -5.3441, -2.6403, -3.8605, -7.1448, -6.6157,
        -3.7644, -2.9214, -2.6157, -3.0989], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1885, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5823, -6.4783, -6.1064, -2.6501, -5.2947, -2.6796, -1.7038, -2.1710,
        -6.2147, -3.7454, -1.2841, -6.3026, -4.1801, -1.8433, -4.5550, -5.8877,
        -1.7045, -4.6270, -6.5326, -4.1473], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9845, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4917, -6.0474, -1.5928, -2.7261, -5.6535, -6.0121, -1.8045, -2.8820,
        -2.3462, -2.6379, -3.4930, -6.5413, -5.6206, -3.2248, -4.1218, -2.2474,
        -2.0776, -4.0255, -5.7693, -1.6553], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5985, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5783, -6.1795, -5.1462, -2.2034, -1.8442, -3.1625, -0.9069, -4.2515,
        -5.8132, -1.6638, -5.5156, -2.8856, -3.8786, -4.3551, -5.9512, -2.6997,
        -5.0446, -3.6681, -2.1432, -3.9051], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6898, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1284, -5.9047, -3.3652, -3.5947, -1.5170, -6.7334, -4.6904, -2.1096,
        -5.4523, -3.4702, -2.3279, -3.9168, -6.6367, -1.9294, -5.8463, -3.3773,
        -3.1194, -1.8156, -6.8567, -5.4620], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0127, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2153, -6.5725, -2.3712, -6.7971, -2.6037, -1.8546, -3.8999, -7.1625,
        -7.0159, -4.1082, -6.0426, -8.8864, -3.7945, -2.7552, -6.8672, -5.5593,
        -3.3448, -5.1411, -3.2435, -1.8663], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8551, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6392, -1.5349, -4.1464, -1.8805, -2.4654, -5.2244, -7.2486, -2.8137,
        -1.7594, -3.7130, -4.4230, -6.6338, -6.2522, -1.1634, -5.9550, -3.7102,
        -3.4707, -5.2423, -6.9642, -5.0809], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3161, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.1515,  -1.9107,  -1.6752,  -5.8733,  -6.4797,  -2.9585,  -3.6002,
         -3.8754,  -3.7629,  -3.0378,  -7.1388,  -5.5670,  -7.2619, -14.6121,
         -3.6630,  -3.1384,  -7.0225,  -1.8159,  -5.6239,  -3.4974],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8833, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.6091,  -8.8096,  -1.4930,  -6.9644,  -5.1599,  -1.6864,  -1.5298,
         -5.2936,  -5.4652,  -4.3538,  -3.5063,  -3.5689, -16.4758,  -7.5046,
         -7.4719,  -7.5664,  -4.2977,  -7.3978,  -3.1080,  -6.6376],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5950, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8332, -2.3785, -1.4915, -3.8507, -5.8660, -2.7103, -4.5641, -3.2626,
        -2.0453, -4.0203, -6.2500, -2.1846, -5.8643, -2.8451, -3.7402, -1.2836,
        -6.2224, -4.9882, -4.1469, -3.2439], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8396, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.8556,  -7.7477,  -1.7741, -13.3589,  -5.3063,  -3.3242, -12.7204,
         -6.6415,  -4.3943,  -7.1608,  -2.5491,  -2.4722,  -4.4294,  -5.9934,
         -6.0979,  -3.8494,  -9.8105,  -3.6099,  -2.9619,  -5.3073],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6682, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3085, -2.9738, -4.9457, -5.8532, -3.0902, -3.5948, -3.7456, -1.5391,
        -4.3322, -6.3237, -1.5021, -6.0337, -4.1091, -6.9833, -8.1882, -9.2971,
        -7.9752, -7.6072, -6.7188, -3.8728], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0997, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-12.0433,  -3.2268,  -2.8513,  -5.5938,  -5.0368,  -2.6256,  -3.8730,
         -2.1500,  -2.7611,  -5.0140,  -5.8578,  -1.9371,  -3.8692,  -2.3830,
         -2.9289,  -4.2457,  -5.9512,  -1.6862,  -4.9983,  -2.9935],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1013, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.8164,  -4.0229,  -2.2291,  -2.5142,  -3.2359,  -6.4316,  -6.0039,
         -2.9272, -10.1065,  -2.8897,  -2.1901,  -4.3620,  -5.5086,  -2.0641,
         -3.5784,  -3.0144,  -2.6186,  -4.3409,  -6.6602,  -2.8967],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9706, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0029, -14.3124,  -7.7347,  -6.4757,  -8.1167,  -2.8893,  -7.5808,
         -2.0564,  -7.0840,  -3.5985,  -1.7736,  -4.1613,  -6.1770,  -2.3650,
         -3.9393,  -2.9756,  -3.4880,  -2.0820,  -6.0687,  -5.5700],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1226, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3905,  -4.2616,  -3.3507,  -4.1706,  -6.2095,  -6.2275,  -2.2106,
         -4.0397,  -2.7306,  -2.2054,  -4.4488,  -6.4635,  -2.1999, -11.0858,
         -2.6223,  -2.5902,  -5.3793,  -5.3467,  -1.2377,  -4.6103],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1891, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1191, -2.5739, -2.2819, -4.9313, -5.5568, -2.1098, -7.6630, -3.1537,
        -1.8902, -3.5336, -6.8180, -5.6586, -3.1325, -3.0420, -2.8816, -2.6405,
        -6.1968, -6.4550, -3.1101, -3.4194], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1084, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-20.8806,  -5.6218, -10.3675,  -6.8536,  -6.0042,  -5.4865,  -5.1678,
         -1.9289,  -6.4047,  -2.7152,  -5.0291,  -5.7966,  -6.9815,  -1.9993,
         -6.0808,  -3.6959,  -2.2573,  -4.1640,  -6.4096,  -4.0486],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8947, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6254, -1.2637, -3.6599, -3.4968, -2.4036, -5.9108, -6.7613, -1.3581,
        -4.2090, -4.4563, -3.6856, -1.3579, -6.1307, -4.9278, -2.0978, -3.4173,
        -2.2168, -1.8452, -3.1312, -6.1895], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7572, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.0300,  -3.9259,  -4.1658,  -6.4286,  -2.0669,  -5.0120,  -4.6526,
        -27.0198,  -5.5477,  -9.3160,  -3.3940,  -7.6723,  -3.9570,  -7.8643,
         -4.6658,  -8.7532,  -3.9188,  -5.8149,  -6.1968,  -2.8481],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3125, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7429, -4.6173, -6.9892, -2.3039, -6.3410, -3.2974, -2.7620, -5.7242,
        -6.4932, -4.4444, -7.3418, -2.9911, -1.6185, -5.2945, -6.1444, -3.7296,
        -5.7670, -4.1413, -3.1114, -4.4925], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7174, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9296,  -4.1861,  -7.0055,  -2.9518, -18.8918,  -2.8453,  -2.9449,
         -6.9557,  -5.7376,  -1.6231,  -3.1475,  -2.8410, -30.3897,  -4.1147,
         -7.7776,  -3.1816,  -6.8871,  -1.7162,  -9.4086,  -3.2146],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4375, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9487,  -2.0268,  -6.1392,  -6.1498,  -3.8864,  -3.5422,  -2.4577,
        -17.5984,  -7.6226,  -7.6524,  -6.3608,  -5.1346,  -6.5342,  -1.4850,
         -5.5405,  -3.8339,  -8.8657,  -6.5915,  -8.1192,  -7.9891],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0239, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2558, -3.7948, -5.8851, -6.9452, -6.8967, -2.9480, -4.4844, -2.6596,
        -3.0173, -6.7283, -5.9688, -1.4669, -3.9842, -3.7307, -3.6356, -2.7386,
        -6.7725, -5.4381, -4.6563, -6.8132], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5910, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.0236,  -7.7446,  -4.6761,  -5.2300,  -3.2855,  -3.7746,  -6.7566,
         -6.6006,  -3.8171,  -7.0432,  -5.2257, -14.4668,  -6.1311,  -7.5468,
         -4.4965,  -6.6636,  -2.8809,  -5.3976,  -4.2590,  -3.5748],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8797, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4008,  -6.2003,  -7.1590,  -4.1117,  -4.3220,  -6.7245,  -4.5487,
         -3.8752,  -8.3643,  -1.8112, -10.9641,  -6.5636,  -3.9367,  -5.9411,
         -3.8610, -20.6272,  -3.6197,  -6.9898,  -7.5224,  -4.9222],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2233, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.3105,  -6.8173,  -2.3877,  -9.7639,  -3.6938, -32.4327,  -5.6407,
         -5.6156,  -5.9869,  -6.5175,  -6.1731,  -1.8568,  -5.5800,  -3.6771,
         -1.7540,  -2.6812,  -5.6219,  -6.1505,  -1.7642,  -3.6735],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2550, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9026, -6.9090, -3.1588, -3.1042, -3.1157, -6.8800, -2.5056, -7.0795,
        -2.8427, -2.1907, -3.7755, -5.1468, -5.9859, -1.6422, -4.0595, -2.5582,
        -1.0471, -4.0180, -5.7051, -1.8397], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7733, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0989, -2.6978, -7.1530, -3.6539, -3.0324, -3.7835, -5.3944, -5.3018,
        -1.6860, -3.2762, -3.7058, -1.6023, -4.0717, -6.7339, -6.1074, -6.8445,
        -3.5739, -3.1419, -3.3558, -6.5223], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3869, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.2818,  -7.2724,  -3.8566,  -2.9102,  -7.5271,  -6.2478,  -3.2287,
         -4.6398,  -3.3548, -10.9677,  -4.7204,  -7.6115,  -9.7743,  -1.7439,
        -11.4322,  -2.6946,  -6.8903,  -5.6205,  -4.6701, -12.7284],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0087, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7056, -1.8888, -4.3041, -2.6035, -4.1865, -4.8154, -5.8673, -5.6538,
        -3.1853, -3.4568, -3.6349, -1.6849, -3.9904, -6.3235, -2.8893, -4.4042,
        -3.3803, -7.0776, -8.7584, -7.2541], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5532, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6223, -4.0698, -3.0530, -3.8917, -6.6259, -4.3134, -6.6484, -3.6970,
        -2.0475, -3.1511, -6.3850, -5.3199, -2.3349, -7.9670, -3.4348, -2.0074,
        -6.4317, -6.1414, -1.5027, -5.1035], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4374, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.2967,  -7.2791,  -2.2703, -10.6069,  -1.8433,  -2.9264,  -1.6233,
         -6.1384,  -1.9297,  -5.5977,  -3.0340,  -3.5187,  -3.3136,  -6.3807,
         -5.0574,  -3.1431,  -5.6202,  -2.4184,  -4.1807,  -2.1308],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2155, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4882, -3.0358, -3.0553, -7.3332, -2.1361, -4.6936, -3.3461, -2.3818,
        -4.0380, -6.7586, -5.4047, -2.3621, -3.7599, -3.3666, -2.1731, -3.1109,
        -5.8687, -1.2601, -5.6747, -3.6035], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8426, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7490, -3.2770, -2.6158, -4.2661, -2.0959, -5.9404, -4.9388, -2.4308,
        -8.9426, -1.8708, -3.0475, -5.6062, -6.4706, -3.6285, -3.4333, -3.8721,
        -2.2375, -3.3169, -5.3410, -4.7942], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0937, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9728, -5.5118, -6.3282, -4.4097, -3.1117, -9.3660, -4.8323, -3.0836,
        -1.7661, -6.7496, -5.3611, -2.6789, -8.1856, -4.9348, -3.6239, -3.5228,
        -4.9284, -5.3240, -2.1557, -4.4223], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6635, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6130, -4.1951, -2.2226, -2.4786, -6.5881, -5.7025, -2.0344, -2.2754,
        -4.1602, -3.1580, -4.1575, -5.6899, -1.5370, -7.3197, -3.9289, -4.1919,
        -5.4903, -6.7209, -3.8294, -5.6837], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3989, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.6976,  -2.1983,  -5.1508,  -3.4059, -19.4946,  -6.5599,  -9.6334,
         -2.6215,  -6.8980,  -1.7516,  -8.9812,  -3.9267,  -4.1082,  -6.2330,
         -6.8043,  -2.2731,  -4.1384,  -3.1665, -23.6954,  -8.2478],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.7993, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.4907,  -3.2081,  -4.9663,  -7.0399,  -5.4229,  -3.2432,  -4.5131,
         -4.5889, -10.5456,  -6.4528,  -7.4413,  -7.6939,  -3.1816,  -7.6482,
         -1.6912,  -9.2316,  -3.6688,  -3.3120,  -3.4628,  -6.7312],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4267, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.0472, -5.6701, -3.6026, -6.8140, -7.2463, -4.5075, -5.2518, -3.8588,
        -7.0370, -7.9089, -4.6031, -7.8652, -5.1310, -6.5021, -4.2623, -3.4592,
        -5.1136, -4.0439, -6.3145, -6.9446], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.7592, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4145, -2.3071, -6.6253, -3.0707, -2.0055, -4.9235, -6.8052, -1.7918,
        -4.6492, -3.3028, -2.2799, -2.4152, -6.9907, -2.0973, -5.1481, -2.9529,
        -4.4244, -2.5711, -5.5773, -4.9329], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0643, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3568,  -6.2336,  -1.5550, -15.5802,  -2.6294,  -3.8100,  -4.0329,
         -7.4261,  -1.8725,  -7.1096,  -3.7124,  -1.2696,  -1.6248,  -6.1731,
         -2.5648,  -3.4104,  -4.5627, -16.6669,  -8.1162,  -7.2615],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3984, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3261,  -3.3729,  -2.3845,  -2.6302,  -7.5048,  -6.5116,  -1.9708,
        -12.8562,  -2.7498,  -1.0108,  -4.5098,  -5.6221,  -1.7507,  -4.8123,
         -3.2471,  -3.0987,  -2.5390,  -6.0522,  -5.2040,  -1.7269],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1440, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9571, -2.8455, -5.2740, -6.2786, -5.4436, -3.6351, -4.0797, -3.1865,
        -3.4999, -2.1687, -5.8023, -5.2698, -0.5149, -4.2508, -4.3007, -4.1450,
        -3.3603, -7.0666, -6.8927, -3.2879], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2630, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.0423,  -4.0314, -14.2317,  -9.1840, -15.2042, -18.5198,  -7.8390,
         -3.3333,  -9.7027,  -6.1705, -15.9083,  -4.1153,  -4.6199,  -7.6766,
         -7.0933,  -8.0655,  -6.0409,  -7.1872,  -1.4230,  -8.5306],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.1960, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7827, -1.7306, -5.9669, -4.9383, -1.6950, -7.4087, -2.3329, -1.5686,
        -2.5926, -5.6487, -5.6636, -3.6941, -4.5938, -3.7793, -3.2769, -2.3074,
        -7.1801, -5.2082, -3.0379, -3.9661], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0186, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9163, -4.8877, -2.6899, -3.4816, -2.5434, -6.6298, -4.9178, -1.5612,
        -6.8931, -2.8685, -4.3624, -1.2126, -5.1942, -3.2284, -3.9578, -5.5626,
        -2.7312, -3.0720, -5.6198, -5.8497], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9590, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.5910,  -5.5833,  -2.5595,  -5.4784,  -6.1431,  -2.3158, -11.9622,
         -4.9490,  -3.2272,  -2.5839,  -6.6243,  -5.1783,  -3.4319,  -5.6349,
         -2.2365,  -1.8547,  -4.6827,  -6.4502,  -3.1734,  -5.4254],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6543, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4615, -8.0598, -4.1672, -7.2738, -1.4088, -5.7220, -3.8411, -3.6985,
        -4.8065, -6.2644, -2.6736, -4.9272, -5.4831, -3.1350, -5.1865, -6.0077,
        -6.9683, -4.2160, -7.6485, -3.0545], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0502, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2846, -2.3824, -5.0347, -6.1055, -2.5601, -3.0472, -2.4129, -2.3053,
        -3.2893, -6.5791, -5.3094, -2.8334, -2.8287, -3.5061, -3.1512, -6.4627,
        -6.3946, -4.4242, -5.0328, -2.8969], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9420, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.7958,  -1.2275,  -6.2100,  -5.0449,  -2.3797,  -4.7792,  -2.6163,
        -11.5939,  -3.0385,  -8.6151,  -3.7495,  -7.5390,  -2.6924, -18.9714,
         -3.6484,  -2.4924,  -4.0170,  -5.5019,  -5.7572,  -5.4799],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5075, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.2063, -19.1670, -10.3683,  -6.8907,  -6.3409,  -6.3090,  -7.2553,
         -5.6194,  -5.6205,  -4.9782,  -3.5687,  -7.8591,  -6.0427,  -5.3284,
         -7.2785,  -8.7780,  -4.5392,  -7.6657,  -4.2845,  -1.9061],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.8003, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0040, -3.7159, -2.0701, -4.2257, -7.4983, -2.5147, -4.1205, -3.5580,
        -3.8871, -4.7901, -7.7446, -6.1116, -3.7393, -5.5197, -4.2036, -8.9456,
        -5.9978, -6.4546, -5.6188, -3.0216], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9371, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.0227,  -6.7210,  -6.3048,  -3.6696,  -3.3670,  -3.0433,  -4.3592,
         -1.9153,  -6.3351,  -2.0710,  -3.7843,  -3.5773, -10.2864,  -8.6301,
         -6.8935,  -6.6616,  -8.6300,  -4.6900,  -7.1654,  -2.4981],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1813, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.1471, -3.9074, -3.3268, -3.6225, -5.2047, -5.2091, -2.3559, -5.3471,
        -3.2747, -1.6134, -3.9307, -6.6990, -2.0183, -3.1095, -2.7739, -3.3409,
        -3.1148, -5.7470, -5.1824, -2.4701], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0198, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7189, -3.2022, -4.4765, -4.7692, -4.6714, -5.8905, -6.7012, -4.0587,
        -3.6488, -4.8462, -3.6103, -5.4718, -6.9526, -3.1857, -7.7718, -4.3718,
        -4.1688, -6.9126, -6.4318, -2.3156], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0088, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5587, -2.6604, -8.4076, -3.3243, -2.1122, -5.6319, -6.4469, -2.2471,
        -4.2469, -1.9233, -4.5292, -7.3057, -6.4631, -2.6760, -3.6129, -2.7421,
        -1.6727, -5.3086, -6.0970, -4.4340], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3700, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6248, -6.4270, -6.2756, -1.5012, -3.5160, -2.5183, -3.4583, -2.7759,
        -4.9311, -5.0014, -3.4931, -6.1787, -2.4050, -1.8185, -4.4377, -6.3341,
        -3.0786, -3.6995, -3.5522, -3.9555], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8491, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6161, -1.9558, -1.7078, -6.0551, -2.1652, -4.9260, -2.3354, -2.0733,
        -2.2031, -6.3136, -5.5475, -2.0468, -2.9948, -3.3266, -1.8186, -3.0194,
        -7.2925, -5.3045, -3.5008, -1.3119], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4257, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4440, -6.9900, -5.7381, -4.3443, -7.7610, -7.3169, -6.3969, -3.2442,
        -6.9843, -3.4582, -3.4200, -6.9397, -6.7955, -2.5322, -3.6050, -4.7214,
        -2.8481, -6.9871, -7.0810, -1.8409], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0724, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4432,  -6.7285,  -1.3259,  -3.2764,  -3.3527,  -5.9109,  -5.8976,
         -3.1979,  -4.1544,  -3.7739,  -2.4874,  -3.6460,  -6.3707,  -5.9562,
         -3.0087,  -3.0185,  -3.4974,  -3.0620, -13.1745,  -4.9429],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4613, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7818,  -5.4140,  -6.4802,  -4.5983,  -3.3908,  -2.5559, -15.2971,
         -5.8931,  -5.2261,  -7.0468,  -7.7577,  -5.4876, -10.4757,  -3.2176,
         -8.5902,  -5.4663,  -2.9498,  -6.7012,  -6.4389,  -2.1164],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8943, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.5465,  -6.1417,  -1.7367,  -3.8524,  -3.3055,  -1.8669,  -2.2947,
         -6.2003,  -5.0292,  -2.0469,  -2.2114,  -3.6381,  -1.9097,  -2.8658,
         -7.3478,  -2.3066, -14.9419,  -3.2436,  -4.6633,  -0.8789],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0014, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2527, -14.1321,  -5.5084,  -9.7656,  -4.8060,  -6.7588,  -1.6459,
         -8.1306,  -3.3826,  -1.7620,  -5.3268,  -6.6208,  -2.2494,  -4.7381,
         -4.8032,  -6.6673,  -7.2636,  -7.6396,  -7.8498,  -7.8771],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0090, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.3484, -2.9174, -2.5903, -4.4853, -6.7570, -2.6982, -6.1013, -3.4818,
        -7.2412, -4.6659, -6.3264, -5.5721, -2.2660, -4.5636, -2.3059, -2.1830,
        -4.0388, -7.0095, -2.6939, -4.4253], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5836, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5364, -3.2010, -4.7900, -3.9960, -7.0457, -8.2481, -5.8407, -8.7564,
        -2.1147, -9.4414, -2.9188, -8.1703, -5.5699, -1.4875, -1.2737, -5.5905,
        -5.3113, -3.0953, -7.1403, -2.8030], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1666, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8932, -3.6305, -2.8091, -2.4813, -5.6772, -5.3383, -1.7303, -4.3662,
        -2.0580, -1.6424, -5.1718, -5.8463, -1.5937, -3.9499, -2.8187, -1.8111,
        -5.9268, -5.8328, -1.9613, -4.9030], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8221, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3285, -6.6623, -3.1018, -6.8075, -2.3975, -3.6278, -2.9918, -6.2030,
        -1.2994, -3.3470, -3.1743, -4.0318, -1.8638, -6.9103, -6.2463, -3.6567,
        -3.1559, -3.1358, -2.3052, -6.8085], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1028, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5847, -6.3019, -2.5873, -5.1393, -2.8385, -2.6251, -4.7078, -6.4134,
        -4.1982, -3.2325, -2.5745, -2.8875, -2.9010, -5.8222, -2.4393, -4.0254,
        -3.3849, -2.6527, -7.1154, -6.3450], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1388, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.9168,  -3.9857,  -8.5726,  -2.6874, -11.2806,  -5.0090,  -8.1419,
         -3.3622,  -7.8623,  -2.1365,  -9.6381,  -6.0729,  -2.1402,  -6.5770,
         -6.5754,  -4.3277,  -6.4234,  -4.0116, -20.1556,  -7.0753],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.6476, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3718, -3.3491, -1.9201, -4.0080, -5.6366, -2.3345, -5.4537, -7.8833,
        -5.3019, -4.4509, -3.3411, -6.5711, -5.7324, -3.3698, -3.0636, -4.1513,
        -2.5984, -5.4487, -6.7671, -2.7118], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4733, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1060, -6.3115, -2.6605, -6.4204, -3.3499, -4.8907, -3.7185, -6.4259,
        -6.2801, -3.6661, -2.7752, -2.9704, -1.2490, -5.8864, -5.5336, -3.0736,
        -5.1459, -3.2572, -4.2405, -3.4456], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3704, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.6744,  -6.5059,  -1.6260,  -6.4611,  -4.1139, -25.1181,  -5.1850,
         -8.4418,  -7.1421,  -7.2482,  -3.4743,  -7.9989,  -4.4100,  -4.7719,
         -4.5419,  -3.4976,  -3.7847,  -6.2539,  -2.4198,  -3.7389],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1704, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.5445,  -6.0537,  -2.1930,  -4.5878,  -3.9894,  -2.5399,  -5.5005,
         -6.0218,  -2.8788,  -3.4772,  -3.4679,  -2.7320,  -9.0758,  -5.9710,
         -1.4940, -12.0741,  -5.6416, -13.5923,  -8.1627,  -7.6307],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6314, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7255, -6.5710, -3.4567, -3.0746, -6.8109, -6.0715, -3.1778, -1.3702,
        -3.3063, -3.3614, -6.0355, -5.7198, -2.7866, -8.3266, -3.7731, -6.1269,
        -2.3607, -6.6586, -6.2504, -3.1402], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5552, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6019, -2.9392, -4.5422, -1.7621, -1.8963, -4.1949, -6.1034, -1.7463,
        -6.9125, -2.9352, -4.8267, -4.1147, -7.0008, -6.5978, -4.2160, -6.4731,
        -2.5318, -4.0653, -3.2617, -6.3866], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4054, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.0143,  -6.8480,  -7.7686,  -4.7584,  -7.9039,  -4.4522,  -3.6848,
         -4.5160,  -6.2548,  -2.5435,  -3.8402,  -4.8042, -21.0457,  -5.3688,
         -8.5835,  -2.9240,  -5.9804,  -5.8956,  -2.3413,  -7.0118],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2270, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.3753,  -3.0332,  -3.1401,  -5.8970,  -6.5304,  -3.2855,  -5.7109,
         -2.0295,  -2.4128,  -3.7409,  -6.1126,  -2.3327,  -3.9491,  -4.2274,
        -18.7714,  -6.9267,  -7.3792,  -7.0461,  -2.9178,  -6.1362],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3477, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-15.8481,  -8.1719,  -5.0921,  -4.9413,  -7.5820,  -5.1104,  -5.6318,
         -7.0981,  -1.5100,  -4.9714,  -5.7820,  -5.3699,  -2.9051,  -4.7751,
         -2.9418,  -2.3382,  -6.2868,  -6.0356,  -2.2758,  -4.5523],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4610, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8369, -6.4770, -1.3675, -6.5738, -3.0276, -1.1226, -4.7620, -6.6658,
        -2.2112, -4.1557, -2.4178, -1.9120, -2.5429, -6.7460, -4.8026, -2.3139,
        -3.2336, -3.1113, -3.4360, -5.5763], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9646, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.5923,  -3.7373, -10.3185,  -3.0889,  -4.0984,  -7.2910,  -2.9641,
         -5.6151,  -5.2695, -31.1475,  -6.9354, -11.1816,  -6.1227,  -5.2893,
         -8.7234,  -7.7484,  -7.0131,  -1.9786, -12.9751,  -6.1882],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.6139, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.1130,  -3.1462,  -5.3489,  -2.8854,  -3.2097,  -2.8236,  -5.8165,
         -3.1262, -14.2621,  -2.7857,  -4.7642,  -3.8529,  -6.4429,  -1.8273,
         -4.2009,  -5.1160,  -5.5753,  -6.4683,  -4.4277,  -7.3067],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9250, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6858, -3.0119, -3.5226, -2.5924, -3.4773, -4.2849, -6.5687, -2.7090,
        -3.9163, -4.4681, -3.5000, -5.0182, -6.1094, -1.7743, -4.2718, -3.0361,
        -1.6605, -3.4445, -7.8538, -2.5263], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0216, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5751, -6.2484, -4.7013, -1.8720, -4.3072, -2.2844, -1.8579, -6.0231,
        -6.4107, -1.4462, -3.6410, -2.5259, -2.6552, -3.8795, -5.5592, -2.1106,
        -4.5997, -5.5825, -1.9515, -4.1457], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7689, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5559, -15.2971,  -5.8931,  -5.2261,  -7.0468,  -7.7577,  -5.4876,
        -10.4757,  -3.2176,  -8.5902,  -5.4663,  -2.9498,  -6.7012,  -6.4389,
         -2.1164,  -9.5213,  -3.3935, -14.0174,  -6.0303,  -7.4450],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.7814, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3704, -4.3296, -4.7421, -5.2047, -5.3230, -2.2038, -5.9033, -5.8451,
        -2.1054, -6.7983, -2.7883, -2.6249, -4.3264, -6.4564, -2.2633, -6.5952,
        -4.4748, -3.6907, -1.9991, -6.6667], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5356, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-12.6678,  -6.1691,  -6.1689,  -5.0380,  -6.4154,  -6.9296,  -2.3656,
         -4.2426,  -2.9383,  -1.8142,  -2.0843,  -5.4448,  -4.6772,  -3.0708,
         -3.4659,  -2.3157,  -1.9568,  -3.0295,  -6.9393,  -4.7004],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6217, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.4519,  -4.3265,  -5.6917,  -2.5678,  -4.1017,  -3.2636,  -1.4740,
         -5.6891,  -6.8086,  -1.7935,  -4.6576,  -3.9569,  -2.9904,  -6.7511,
         -7.0250,  -2.4011, -10.9373,  -3.3264,  -3.7042,  -6.4018],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4660, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0842, -6.0575, -5.9038, -1.5028, -4.0733, -3.6542, -3.5023, -3.9241,
        -5.0821, -5.2813, -2.6536, -5.9171, -2.4613, -2.5428, -4.0888, -6.6903,
        -2.0128, -3.2777, -3.2616, -1.3816], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7677, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0071, -2.6466, -3.3266, -3.3829, -4.8659, -0.9251, -7.0317, -5.1385,
        -1.7570, -4.0492, -3.3140, -2.0813, -7.8655, -6.0750, -1.0201, -3.7633,
        -2.9387, -5.8196, -4.8617, -5.8505], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1360, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.4652,  -7.9877,  -3.8445,  -3.3049,  -8.6987,  -6.5764,  -2.6496,
         -3.9965,  -4.4206, -18.5981,  -6.2742,  -6.7799,  -6.5715,  -4.1312,
         -5.5308,  -6.2391,  -2.0126,  -3.7158,  -3.3594,  -3.8737],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6015, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2904, -1.8356, -5.9118, -6.4634, -3.6933, -9.4349, -2.3288, -4.0598,
        -2.9943, -6.8353, -4.6494, -2.4457, -9.8652, -2.9774, -3.7660, -6.5189,
        -6.0369, -3.1222, -4.1907, -5.9469], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8683, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1706,  -6.2549,  -2.6758,  -7.6947,  -3.0250,  -1.8099,  -5.3689,
         -6.6662,  -2.6320, -10.3585,  -2.3466,  -1.5103,  -5.2945,  -5.0917,
         -2.1674,  -7.5418,  -2.9051,  -2.7084,  -6.8155,  -6.3072],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6172, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.8880,  -1.9700,  -6.6222,  -6.5190,  -3.3019,  -8.3728,  -4.6949,
        -13.2338, -17.3344,  -7.4683,  -6.6232,  -3.9018,  -7.7128,  -6.0811,
         -1.6837,  -7.6675,  -3.3989,  -2.3202,  -5.5550,  -6.4505],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1900, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0353, -3.7737, -4.4616, -4.2926, -7.3287, -6.2867, -3.6210, -4.1904,
        -3.0974, -3.1926, -6.1930, -5.8505, -1.5941, -6.3259, -2.7129, -2.1564,
        -4.8416, -5.7160, -2.9349, -3.8091], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4207, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5045, -3.7147, -8.0121, -6.2511, -3.6197, -6.9238, -3.1739, -2.7863,
        -5.6353, -5.7280, -2.9689, -6.0009, -4.2407, -2.9435, -5.0486, -6.1860,
        -2.4976, -4.9739, -3.0309, -2.2039], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4722, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.8238, -17.0008,  -6.0522,  -7.8083,  -2.5936,  -5.5616,  -3.2655,
         -2.6317,  -5.7207,  -5.9671,  -3.5563,  -5.0405,  -8.4519,  -2.4841,
         -9.6261,  -4.5192,  -4.2891,  -6.4144,  -7.0875,  -3.3313],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7613, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4728, -4.3807, -6.2885, -2.5896, -8.6794, -2.8696, -2.1184, -3.6213,
        -6.0560, -2.1849, -4.4841, -3.0554, -1.8507, -4.5377, -6.4909, -2.0703,
        -5.9203, -3.8625, -2.2299, -6.5878], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1175, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.9521,  -6.8325,  -6.7269,  -9.4257,  -9.8564,  -5.6619,  -5.1983,
         -5.3474, -12.4088,  -7.3584,  -7.6285, -13.9208,  -9.1553,  -4.1422,
         -3.8398,  -6.0511, -11.3467,  -4.2340,  -6.7411,  -6.7173],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.4273, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3315, -3.3380, -4.3671, -5.5395, -5.1475, -2.0419, -7.2923, -2.2439,
        -2.4013, -4.7288, -5.2713, -2.4722, -3.1598, -3.1580, -1.6995, -3.0945,
        -5.4471, -3.6200, -2.3170, -5.6482], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9160, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.8876,  -1.7538,  -3.3443,  -3.3668,  -3.4427,  -6.6569,  -6.2653,
         -2.8232,  -6.4818,  -3.9081, -14.7854,  -6.5110,  -6.6986,  -7.4572,
         -3.8768,  -7.9907,  -3.2114,  -4.4101,  -5.7002,  -4.2247],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3898, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4244, -6.8527, -4.9677, -2.0730, -3.9832, -3.7000, -3.3327, -2.7354,
        -5.6234, -4.9998, -2.7414, -4.4258, -2.4629, -2.6372, -2.3239, -5.4354,
        -5.0602, -4.3291, -4.5279, -4.2143], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9925, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.1300, -4.0002, -8.2096, -1.4084, -5.2898, -2.8502, -2.0457, -1.7467,
        -7.1266, -1.8207, -7.0376, -3.0390, -2.3415, -1.8647, -6.6467, -5.5393,
        -3.6721, -6.2191, -3.2114, -3.1710], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2685, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4431, -6.6576, -2.4772, -6.7618, -3.6728, -2.5831, -4.1745, -6.5830,
        -2.9333, -5.7904, -3.5033, -3.3235, -3.1373, -6.2643, -5.1304, -1.9132,
        -4.6014, -2.9874, -1.4494, -3.6372], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1012, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1772, -1.3975, -3.8289, -6.1221, -2.7561, -5.5906, -3.5227, -3.9793,
        -2.0282, -6.8515, -5.3652, -2.3987, -5.1914, -2.7052, -2.5899, -2.8089,
        -5.4230, -3.5439, -3.0775, -4.3143], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7836, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.4474,  -2.0149,  -4.6712,  -6.7755,  -5.5174,  -3.1750, -19.4670,
         -3.9500,  -4.1012,  -4.5449,  -6.0082,  -5.8534,  -1.7101,  -7.4607,
         -2.1083,  -3.7893,  -3.6598,  -5.4923,  -1.3284,  -7.0692],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2072, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6580, -4.1362, -5.7600, -4.4937, -2.4599, -6.4103, -6.5684, -1.4436,
        -6.9793, -3.7665, -2.6505, -3.7156, -6.9977, -6.0396, -3.0272, -4.1477,
        -2.5192, -1.6819, -4.5947, -6.1402], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5095, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.8756, -2.3011, -6.8111, -2.5590, -6.2332, -2.3044, -5.4485, -5.3348,
        -1.7684, -4.2177, -3.3226, -2.5134, -3.3248, -6.3204, -5.3594, -2.2807,
        -7.0062, -3.3502, -1.6877, -3.8372], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1928, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.5265, -12.2070,  -3.4230,  -2.6390,  -3.0990,  -7.4004,  -5.2467,
         -3.3589,  -5.5973,  -2.6707,  -2.3437,  -4.3138,  -6.6316,  -2.5656,
         -5.8849,  -2.7975,  -3.1424,  -4.1377,  -6.6808,  -5.6975],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7182, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5792, -6.9992, -1.5983, -6.9008, -2.6385, -2.5894, -3.3666, -6.5911,
        -5.9641, -3.0766, -4.6667, -3.1550, -2.8093, -4.4779, -6.4556, -2.8083,
        -5.5210, -3.2723, -1.9714, -2.8486], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0145, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0973,  -5.5327,  -6.7797,  -2.5219,  -3.6752,  -3.0431,  -3.9920,
         -6.2005,  -6.7117,  -3.8638,  -7.2037,  -3.2245, -12.4761,  -7.9437,
         -8.8759,  -7.2315,  -4.3292,  -7.7854,  -2.0168,  -4.6617],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6083, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0540, -2.4231, -3.1717, -5.9690, -6.1088, -2.5897, -5.4022, -2.6923,
        -2.4472, -5.6071, -5.8797, -1.5644, -5.3572, -3.0581, -4.0518, -5.7392,
        -6.3886, -2.5526, -8.0751, -5.0807], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4606, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6553, -3.0625, -3.3105, -1.2905, -5.3682, -6.3594, -3.0547, -3.5345,
        -2.4419, -2.5128, -3.9575, -6.8398, -4.9897, -2.3866, -3.3573, -3.0704,
        -2.6381, -4.4522, -6.1589, -3.0736], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6757, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3983, -2.8444, -2.2666, -4.4398, -0.9229, -6.6906, -4.6583, -2.2193,
        -5.6469, -2.7517, -1.6560, -3.4455, -5.3178, -3.9227, -3.2024, -6.4049,
        -2.8170, -3.3797, -2.9847, -5.8152], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6892, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3724, -1.6693, -6.6082, -6.4043, -1.6723, -3.6957, -2.8314, -3.4309,
        -3.9873, -6.0131, -6.2167, -3.2002, -4.5491, -3.3123, -5.7227, -3.7881,
        -5.2497, -5.0278, -2.1518, -3.4853], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1194, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.7900,  -2.4512,  -3.2616,  -5.8785,  -3.3519,  -2.7440,  -4.5227,
        -15.7859, -10.9039,  -6.0622,  -7.5866,  -7.0304,  -5.0895,  -5.5075,
         -4.5133,  -7.7538,  -3.3803,  -2.8908,  -3.2716,  -5.7035],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5740, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7125, -5.5414, -7.5663, -3.5793, -7.4895, -2.0106, -7.1293, -3.7845,
        -3.5093, -7.8510, -6.2168, -2.7002, -3.9942, -4.9604, -8.0237, -7.7258,
        -7.6889, -8.2459, -3.1524, -7.7306], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.8306, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3827,  -2.5922,  -8.1713,  -6.6979,  -8.9082,  -6.6207,  -3.9584,
        -17.3001,  -8.9045,  -8.4580,  -3.4877,  -7.0227,  -1.2252,  -5.5524,
         -3.9825,  -3.8843,  -7.4728,  -6.4046,  -3.0585,  -5.2350],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1160, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.7586,  -2.6826,  -3.8086,  -0.7899,  -5.8087,  -4.8716,  -2.7752,
         -3.6490,  -2.3642,  -2.8408,  -2.6099,  -6.3146,  -4.6992,  -2.7620,
         -2.8005,  -3.1701,  -5.5573,  -1.6064,  -7.0045,  -5.5516],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1213, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6803, -1.4282, -4.7073, -2.9009, -3.2901, -2.1457, -5.8087, -5.7314,
        -2.4168, -2.8387, -2.9035, -1.5111, -5.2702, -6.1006, -2.0332, -2.7418,
        -4.5331, -3.7283, -2.1044, -4.2591], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5567, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0009, -2.5315, -7.0892, -1.9523, -6.9725, -5.1356, -2.7067, -5.2723,
        -6.7383, -4.0447, -3.1795, -3.9793, -2.7183, -2.8816, -6.2856, -4.8555,
        -1.6020, -8.2197, -3.0994, -2.0659], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4165, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0762, -4.5140, -2.7978, -3.5567, -2.1701, -5.9485, -5.2624, -1.0195,
        -5.5643, -2.4603, -2.7612, -2.9144, -6.3002, -5.3455, -1.8707, -2.5996,
        -3.0030, -3.2561, -3.5164, -7.1596], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7048, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.9305,  -3.6062, -17.4460,  -4.6654,  -9.0207,  -3.4472,  -6.7515,
         -1.4521, -11.5205,  -3.4891,  -3.0578,  -8.3291,  -6.2039,  -2.1236,
         -5.9285,  -3.0043,  -3.5436,  -5.6824,  -6.8922,  -3.4047],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6750, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-13.1200,  -6.3027,  -7.4170,  -7.6325,  -4.3811,  -6.6391,  -1.3087,
         -5.4093,  -3.3664,  -3.3866,  -7.2019,  -5.9556,  -1.5458,  -4.5601,
         -4.7882, -13.1513,  -6.2604,  -7.3198,  -7.2193,  -3.5820],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0274, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2105, -5.0861, -4.2982, -1.2201, -5.9437, -2.4318, -2.9683, -3.6336,
        -6.3871, -4.5944, -2.1942, -3.4610, -2.6110, -1.7819, -5.0398, -5.7486,
        -1.7007, -5.5877, -1.8984, -1.8936], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5845, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.1990,  -6.0973,  -5.9821,  -6.6401,  -3.6542,  -3.3510,  -3.5719,
         -5.5605,  -5.6467,  -5.4984,  -2.1567,  -7.4089,  -3.7480, -13.4085,
         -6.7382, -10.3863,  -7.8282,  -6.9058,  -6.7972,  -1.8484],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9714, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3241, -4.8994, -2.0543, -3.2632, -2.8503, -2.3466, -2.7880, -7.0358,
        -5.5093, -2.7504, -5.0623, -2.6555, -1.7086, -5.8025, -6.5865, -1.0591,
        -6.0451, -2.8185, -2.0456, -5.6444], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9125, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6114, -6.1555, -5.3539, -1.7936, -2.9613, -2.7961, -2.2527, -6.8345,
        -6.2047, -4.0928, -8.5488, -5.4803, -6.5584, -4.7792, -6.3289, -5.4473,
        -6.4377, -4.5715, -6.4915, -2.6501], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8675, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7459, -5.0563, -2.3461, -1.9300, -2.8561, -5.6328, -1.7502, -4.2524,
        -3.2771, -3.0183, -2.7422, -6.0483, -4.6867, -1.3422, -4.5899, -2.4520,
        -3.8778, -2.4264, -6.5242, -5.6545], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6105, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7940,  -4.3384,  -3.3713,  -4.5829,  -3.2398,  -5.3986,  -5.3043,
         -1.3989, -11.0328,  -2.6769,  -2.6468,  -2.6916,  -6.7337,  -5.9162,
         -2.9941,  -6.8733,  -3.2416,  -1.4042,  -7.0673,  -6.3113],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5009, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1157, -4.8233, -3.8403, -4.7232, -6.1043, -6.0127, -3.9732, -5.1717,
        -2.1957, -3.1278, -6.3239, -6.5975, -1.9688, -5.4156, -2.7666, -1.9831,
        -2.0985, -6.5329, -5.8638, -3.2965], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4468, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3645,  -5.5710,  -6.2219,  -3.1237,  -4.0054,  -4.0762, -11.2880,
         -8.3863,  -7.4176,  -7.8746,  -3.7854,  -6.9027,  -2.6393,  -6.2993,
         -3.2531,  -2.6304,  -4.8991,  -6.4883,  -2.4808,  -6.4807],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3594, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5969, -6.5008, -6.0117, -1.5710, -4.2972, -3.1492, -2.7399, -2.7249,
        -6.9215, -4.9896, -2.4891, -3.0244, -2.7919, -2.2347, -5.8233, -6.7771,
        -1.7206, -6.0256, -2.2099, -3.8542], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9227, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5395, -2.8337, -5.7886, -2.3110, -2.5914, -3.8265, -6.2064, -1.5562,
        -7.7954, -3.3636, -4.4979, -1.1896, -5.5975, -4.6278, -2.4905, -7.4426,
        -2.2242, -2.6527, -5.9369, -6.3638], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1918, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.9383,  -6.4658,  -4.6533,  -1.5520,  -3.6407,  -2.4115,  -2.9507,
         -5.9025,  -7.1554,  -2.3515, -11.2025,  -3.3202,  -3.9705,  -3.6890,
         -6.0782,  -5.1365,  -2.2862,  -5.8380,  -2.7874,  -2.0432],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2687, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7690, -2.8873, -2.2290, -5.6108, -6.3635, -1.2312, -6.2627, -1.7339,
        -2.3086, -3.4755, -5.4578, -3.9039, -2.7093, -6.8335, -3.4109, -3.1227,
        -5.8319, -6.2150, -2.6318, -3.0402], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9514, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8203, -3.6064, -1.9952, -3.9040, -6.2932, -1.5695, -7.3670, -5.1630,
        -3.0221, -6.4734, -6.1523, -1.8749, -4.5967, -3.5115, -4.2122, -2.1196,
        -7.1794, -4.8446, -2.4309, -3.4572], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2297, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.2606, -2.6320, -3.1890, -5.7431, -2.8782, -2.0744, -5.6585, -5.5788,
        -2.5041, -4.6086, -2.8931, -3.2527, -1.5001, -6.3613, -6.0041, -2.7934,
        -2.6746, -2.4302, -2.2261, -5.6215], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9442, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.2621,  -1.9100,  -5.6490,  -5.1480,  -2.6858,  -6.4356,  -3.6019,
        -20.8686,  -4.4428,  -8.5358,  -3.9652,  -6.8692,  -1.9488, -12.0932,
         -4.6706, -10.1530, -11.0911, -15.6829,  -4.4285, -10.0649],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.1253, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2459, -8.1528, -2.3062, -6.9192, -5.5810, -3.1975, -5.4989, -3.1151,
        -2.9030, -1.1978, -6.4516, -5.6063, -1.3346, -4.3896, -3.0674, -2.6287,
        -3.2536, -6.8898, -1.2427, -4.7635], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0873, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2574, -6.1651, -1.2770, -2.5748, -3.2117, -2.8825, -1.9957, -5.7806,
        -5.1063, -2.8590, -4.2807, -2.6028, -1.6165, -5.7224, -5.6251, -2.2388,
        -3.8285, -2.6463, -3.1910, -1.9591], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4911, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3361, -2.6447, -4.3213, -6.0864, -5.9616, -2.7544, -6.0824, -2.7693,
        -3.0244, -5.6763, -6.1885, -1.0755, -5.6139, -2.8632, -1.9110, -2.9212,
        -7.1401, -2.4526, -6.0535, -3.2989], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0588, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7032, -3.0517, -2.7932, -3.3639, -1.9932, -5.9767, -5.8600, -2.9746,
        -2.6204, -2.3026, -2.2208, -2.0902, -5.9697, -2.2041, -5.6858, -2.6944,
        -3.7164, -1.6536, -6.5945, -4.7150], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5092, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1696, -3.2150, -2.6210, -6.4193, -6.3011, -2.8658, -4.0674, -2.4011,
        -2.4205, -3.6078, -7.2650, -6.9931, -3.5090, -9.4235, -3.1599, -2.2202,
        -5.0341, -6.0701, -2.9618, -4.8080], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5767, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.9827,  -4.3638,  -6.7137,  -1.6741, -34.9643,  -4.4278,  -1.7599,
         -6.2695,  -6.4246,  -5.5403,  -5.5696,  -5.4892,  -3.0142,  -4.8275,
         -6.1409,  -6.8528,  -3.8829,  -8.4530,  -2.8631,  -3.3086],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5761, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.2399,  -6.1124,  -6.3167,  -3.1698, -14.4784,  -1.9636,  -2.1348,
         -5.2145,  -5.7456,  -2.0941,  -7.8346,  -2.4053,  -2.6394,  -2.1682,
         -6.1797,  -2.1516,  -4.4614,  -3.3827, -18.6988,  -7.3911],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3391, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4503,  -4.2637,  -2.4504,  -2.7696,  -2.2996,  -6.0481,  -2.7526,
         -4.0026,  -4.2345,  -2.7181,  -2.1766,  -7.0302,  -1.8579, -10.9717,
         -3.9653,  -2.0340,  -5.5309,  -6.0689,  -3.1237,  -6.9428],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1846, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.3178,  -4.1251, -13.6559,  -6.6488,  -7.6200,  -7.9535,  -4.0089,
         -6.9649,  -1.2917,  -5.1934,  -3.7487,  -4.6714,  -2.0823,  -6.6957,
         -2.6754,  -2.6880,  -3.0582, -14.4525,  -5.5790,  -7.0066],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8719, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-16.3478,  -5.9765,  -6.0765,  -6.2259,  -7.4769,  -3.0129,  -7.9526,
         -3.2304,  -5.8633,  -3.5382,  -3.8012,  -5.3009,  -6.3052,  -2.3764,
         -8.7064,  -2.5183, -17.4726,  -7.9095, -11.2176,  -7.7963],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.9553, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3502, -2.2875, -2.8026, -2.9641, -2.9425, -5.7224, -4.7490, -3.3438,
        -6.0044, -1.9305, -3.8693, -3.0009, -6.1841, -3.9750, -9.1344, -6.4499,
        -5.1897, -7.7652, -6.0240, -4.6301], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5660, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8890, -5.0989, -5.4442, -0.8285, -4.2019, -3.3878, -3.6354, -3.3156,
        -5.9367, -4.7716, -1.8483, -9.2099, -2.7754, -2.3608, -3.5086, -6.0894,
        -5.0578, -2.5838, -7.5894, -2.3605], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1447, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.1813,  -3.3815,  -5.1669,  -2.3055,  -2.7728,  -4.0982,  -5.9862,
         -1.2040,  -3.1437,  -1.9807, -17.9694,  -4.1821,  -8.5162,  -3.7018,
         -7.3555,  -1.2397, -11.7514,  -4.5303,  -3.4443,  -6.1631],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2537, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.0784, -5.2491, -6.3770, -0.8588, -6.0369, -4.2770, -3.2208, -0.8013,
        -5.9235, -4.4972, -3.0890, -4.5264, -2.9986, -2.2874, -1.1099, -5.6811,
        -5.0737, -2.8178, -3.1034, -2.9826], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9995, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7564, -8.8040, -6.3592, -8.6365, -4.1870, -4.0082, -5.3091, -4.6600,
        -6.8155, -8.0714, -5.2796, -4.4829, -4.4899, -7.1646, -4.0086, -2.9589,
        -4.8988, -4.0377, -8.2659, -6.0966], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.8145, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5753, -3.1274, -7.6968, -6.1231, -2.1003, -3.4528, -4.7384, -9.6695,
        -4.6003, -6.0966, -5.6316, -7.6632, -5.7584, -5.1905, -5.3344, -2.7935,
        -9.1708, -4.2710, -3.7186, -4.9567], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2835, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.5903,  -2.4334,  -4.8634,  -3.1519,  -1.9846,  -3.3120,  -6.1582,
         -2.3874,  -6.0543,  -2.9798,  -3.5338,  -5.4759,  -6.1521,  -2.6124,
         -3.4787,  -4.9355, -10.3152,  -6.5790,  -9.0166,  -7.5781],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9296, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5345, -2.1791, -6.0809, -2.2066, -3.2478, -3.8013, -2.7192, -2.5526,
        -6.5451, -5.3272, -2.6420, -5.1366, -1.6758, -1.2339, -4.8543, -5.1570,
        -2.4026, -4.0234, -3.0443, -2.6297], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4997, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0846, -1.4557, -2.9926, -6.6097, -1.9058, -5.3447, -3.3460, -4.6805,
        -1.2148, -5.6606, -4.5949, -3.4046, -3.9906, -2.9865, -2.1130, -2.0200,
        -6.8259, -4.4929, -1.5698, -3.0474], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5670, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3953,  -5.8115,  -8.6425,  -3.8905, -12.0759,  -5.9103, -27.1033,
         -5.2633,  -3.8035,  -5.0117,  -7.1421,  -4.5152,  -8.1453, -11.2418,
         -8.7585,  -4.8499,  -5.9444,  -5.7539,  -7.2825, -12.0671],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.7804, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4778, -5.3604, -2.3618, -3.7835, -3.4605, -3.5374, -3.5449, -5.5031,
        -5.2105, -1.9129, -4.0859, -2.3300, -2.6173, -5.5204, -5.1930, -2.2630,
        -7.1745, -2.3470, -4.1531, -2.8028], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9820, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5708, -3.1415, -5.8182, -5.2617, -7.6049, -6.4490, -4.5404, -9.3309,
        -3.1637, -5.8318, -1.9016, -6.1503, -4.7581, -3.7865, -2.8420, -2.8921,
        -2.8828, -0.8150, -6.1740, -5.0776], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6496, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.6471, -1.3146, -7.0694, -3.2210, -3.5480, -1.7416, -5.8928, -4.9103,
        -1.7534, -4.6360, -4.0555, -4.1968, -0.7526, -6.3625, -4.9338, -1.1074,
        -5.1537, -4.1135, -2.3398, -1.9448], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8347, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7031, -3.6102, -3.4993, -4.0745, -6.0160, -5.7439, -0.7727, -4.4627,
        -2.6686, -4.7160, -5.2027, -6.9437, -2.5028, -6.0251, -2.9536, -3.3893,
        -5.8822, -6.0437, -2.8975, -5.4655], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2286, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0545, -3.0758, -4.8405, -5.3001, -1.1537, -4.1523, -2.3237, -1.8098,
        -3.5579, -6.3724, -2.1266, -6.8098, -3.7155, -1.9807, -6.7179, -6.1879,
        -1.4534, -4.6546, -3.8276, -8.4403], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0778, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.4883,  -2.4177,  -0.9261,  -6.6407,  -6.6879,  -3.4358,  -2.7406,
         -7.4434,  -3.0685,  -6.2530,  -6.5132,  -4.3528,  -8.4335,  -2.5097,
        -15.4730,  -6.4410,  -9.2982,  -4.4443,  -6.4384,  -5.4642],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6735, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9818, -7.3678, -8.0657, -6.5030, -6.9402, -7.3470, -4.9060, -5.2279,
        -8.5191, -4.7724, -3.1347, -5.2523, -5.1747, -8.6363, -4.9017, -5.6472,
        -6.1294, -6.0424, -7.4080, -4.7332], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0845, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2327, -2.4732, -1.5723, -3.1742, -6.7152, -1.2662, -6.2192, -3.3031,
        -3.6356, -0.8531, -6.3082, -4.5523, -1.5431, -3.7263, -2.3504, -1.9038,
        -3.0881, -6.8716, -5.4937, -3.4452], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6364, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6775, -4.3816, -3.0948, -1.9037, -6.2354, -5.4864, -1.7079, -4.2120,
        -2.9692, -2.0560, -3.9081, -6.1103, -1.9325, -9.0854, -3.6938, -3.4166,
        -5.4734, -6.4308, -3.2282, -7.7634], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2383, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.5054, -13.1785,  -6.5923,  -3.9164,  -7.2266,  -4.6649,  -9.2094,
         -3.0864,  -7.1222,  -4.2512,  -4.6783,  -1.3942,  -4.3665,  -5.4768,
         -0.6083,  -6.5444,  -4.7550,  -3.3645,  -2.6009,  -6.0086],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1275, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.0314,  -7.1441,  -6.1142,  -3.3794,  -4.8094,  -3.5071, -22.3533,
         -4.8075,  -7.3898,  -4.8587,  -5.7558,  -5.3183,  -1.1254,  -6.1918,
         -3.2217,  -2.1751,  -2.7247,  -6.1415,  -4.9109,  -2.2696],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3615, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.7294,  -4.2403,  -7.5058,  -6.0495,  -2.8917,  -6.3354,  -4.8251,
        -13.0717,  -6.2269,  -8.6404,  -7.5633,  -3.8400,  -8.9335,  -1.4608,
         -7.2241,  -4.9109,  -2.0228,  -2.3271,  -7.7583,  -1.9139],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6235, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9725, -7.5184, -2.5287, -3.0127, -6.8085, -6.2776, -2.1809, -7.1107,
        -2.7989, -2.4982, -5.8916, -5.5366, -1.2647, -3.3306, -2.2400, -2.3604,
        -4.6047, -5.5144, -2.4055, -2.7310], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9793, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.6833,  -3.8034,  -1.7768,  -5.4608,  -5.6310,  -4.5493,  -6.4834,
         -6.3551,  -3.6867,  -5.7129,  -3.0832,  -3.9018,  -7.2724,  -5.8729,
         -3.9273,  -5.3099,  -2.7483, -18.6520,  -6.6709,  -8.9406],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7261, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5805, -2.7268, -2.8684, -3.2429, -6.2287, -5.2770, -1.5380, -3.9862,
        -1.9807, -5.2451, -7.0919, -5.8885, -1.8094, -6.6424, -3.1629, -1.4240,
        -7.0455, -5.3423, -2.2398, -7.7276], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3024, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8224, -2.1824, -4.3943, -2.3705, -4.3801, -8.0487, -5.1545, -4.9157,
        -2.5634, -4.4874, -2.8220, -2.2853, -1.0288, -6.2203, -4.4762, -2.0451,
        -5.0660, -2.4265, -2.5632, -5.4073], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8830, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0960, -3.9994, -3.6172, -2.1990, -3.2557, -1.7778, -5.6901, -4.9864,
        -1.5769, -4.8188, -3.3095, -3.4158, -3.4035, -5.7904, -5.3451, -1.5324,
        -3.6011, -2.6635, -2.2754, -5.6178], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6986, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.4397,  -6.4332,  -4.5429,  -2.6921,  -7.6950,  -6.9376,  -3.9186,
        -12.1789,  -4.7472,  -1.7588,  -3.3235,  -6.4727,  -2.7741,  -4.6759,
         -3.2581,  -3.1821,  -3.5430,  -5.9344,  -5.8175,  -3.4190],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8872, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.5838,  -1.8912,  -6.2968,  -6.1780,  -5.9693,  -6.0841,  -8.3274,
        -10.2802,  -4.5206,  -8.1897,  -5.4116,  -5.8054,  -4.9823,  -1.4364,
         -4.2791,  -6.0538,  -6.5998,  -3.6249,  -2.4851,  -2.1447],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3572, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.5042,  -2.6273,  -4.3930,  -6.6507,  -6.9275,  -9.4120,  -8.4290,
         -4.1461,  -6.7283,  -5.1300, -24.7924,  -7.4765,  -8.9632,  -7.8684,
         -2.4259,  -8.1988,  -1.7594,  -7.6657,  -5.3310,  -3.5091],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.1469, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.0047,  -2.9506,  -3.4396,  -6.8150,  -6.4031,  -2.9755, -11.1048,
         -2.9245,  -6.1919,  -1.5184,  -6.6742,  -4.5285,  -1.6681,  -4.0660,
         -2.7875,  -3.0637,  -3.7196,  -6.3033,  -6.7758,  -3.5954],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6255, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7841, -3.8809, -4.7735, -6.6455, -6.7124, -3.7542, -8.8407, -3.0714,
        -3.8345, -4.6050, -5.4411, -1.2413, -6.1686, -2.7308, -2.0918, -2.5020,
        -7.0167, -1.9613, -7.7171, -2.8583], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5316, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1319, -11.4541,  -7.8771,  -7.3790,  -8.6277,  -4.6133,  -6.3575,
         -5.1269,  -4.6780,  -3.5482,  -2.1871,  -3.6238,  -6.2141,  -1.6924,
         -2.4291,  -2.5605, -17.9598,  -4.1743,  -8.6058,  -2.5385],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7389, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7053, -1.3218, -3.9735, -6.4056, -1.4513, -3.8841, -3.9516, -2.2703,
        -4.9900, -6.2540, -6.1802, -2.5832, -7.6221, -2.3245, -3.5770, -2.6493,
        -6.1714, -2.2834, -4.5157, -2.7879], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8951, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5029, -1.5989, -4.6541, -2.4859, -2.8684, -5.6307, -6.3787, -2.5804,
        -6.1137, -3.1927, -4.1078, -4.4928, -5.8964, -5.1176, -1.5460, -2.6978,
        -2.3223, -1.4460, -2.1306, -6.0851], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8424, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2802, -3.6014, -2.1073, -2.8875, -6.9411, -5.4009, -2.6601, -2.9446,
        -2.2732, -1.6644, -3.0652, -5.9025, -1.5492, -6.2209, -3.4304, -1.5821,
        -3.0308, -7.5173, -6.0516, -3.5367], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8324, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6218, -2.9445, -6.1573, -4.4406, -5.9539, -1.7061, -5.0577, -3.5444,
        -2.6591, -2.8381, -8.0159, -1.5941, -9.7727, -4.0914, -2.7709, -0.7376,
        -5.8747, -5.0660, -3.9787, -9.2814], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6053, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6051, -1.7746, -5.8849, -3.2350, -5.3519, -0.5164, -6.6441, -5.4700,
        -1.7933, -3.6334, -3.4300, -2.5649, -3.9664, -5.6343, -6.0990, -2.7418,
        -4.1851, -3.8717, -2.1609, -5.5759], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0569, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.5259,  -7.1511,  -4.3523,  -6.4845,  -2.3104,  -3.2166,  -4.3555,
        -10.0429,  -3.7924,  -3.2488,  -8.2015,  -2.2565,  -6.3508,  -2.8665,
         -2.5251,  -2.6397,  -6.4573,  -5.0988,  -1.7748,  -6.4008],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0526, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7032, -1.3999, -3.5667, -2.6280, -4.9525, -6.6042, -6.9422, -2.3700,
        -9.9048, -3.8186, -3.0013, -3.9974, -6.6470, -5.2811, -3.7312, -3.5325,
        -3.4203, -5.4625, -2.7583, -5.5702], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5146, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3076, -7.4953, -1.1021, -4.7346, -3.8595, -1.9228, -3.4641, -6.1493,
        -5.7664, -1.5900, -6.4867, -3.6358, -3.0295, -3.3564, -5.7484, -3.0634,
        -4.8921, -2.2999, -2.0409, -3.7688], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9357, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.0720,  -3.8874,  -4.9161,  -6.9726,  -3.9269,  -6.7137,  -5.7854,
         -1.8165,  -6.2898,  -5.9133,  -1.1862,  -7.1017,  -2.8215,  -1.7211,
         -2.9179,  -6.9747,  -6.0233,  -3.7322,  -6.9041,  -1.9882],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8832, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8431, -1.9624, -2.6619, -6.8482, -2.1074, -3.4094, -2.6013, -4.5316,
        -6.8233, -7.0969, -3.7061, -3.6736, -2.8347, -1.9804, -2.4528, -7.2213,
        -6.2392, -3.0748, -4.1674, -3.3152], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9775, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4887, -3.2431, -1.7845, -5.1460, -6.2882, -1.2311, -4.7309, -1.5049,
        -2.0972, -5.7026, -5.5813, -1.8323, -6.4100, -3.1205, -1.1298, -6.0170,
        -6.0122, -1.8422, -3.6694, -1.9742], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6403, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.8940,  -2.7701,  -5.1057, -12.5352,  -7.5931,  -8.3742,  -6.8916,
         -3.9710,  -6.2549,  -0.7182,  -5.4897,  -2.8314,  -3.1581,  -4.3332,
         -5.8468,  -1.9934,  -5.8135,  -1.9725,  -2.6258,  -4.5640],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7368, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.8975,  -5.0656,  -4.3573, -21.1009,  -3.8562,  -7.5069,  -5.2017,
         -6.2328,  -1.9816,  -4.3640,  -3.7989,  -4.4008,  -2.2579,  -5.3140,
         -4.6502,  -0.9562,  -5.4488,  -3.3550,  -2.0336,  -4.2341],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9007, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6354, -2.1416, -6.1372, -2.3817, -1.1555, -5.5803, -5.9463, -1.2193,
        -3.8176, -3.1443, -9.7476, -7.9690, -9.8421, -7.4989, -4.7048, -6.7805,
        -4.3285, -5.4008, -4.8336, -1.8054], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0035, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4306, -4.4203, -5.1880, -3.5104, -3.3994, -6.1280, -3.5842, -4.8457,
        -4.0711, -8.6616, -7.9673, -5.7557, -8.1431, -3.8097, -7.3252, -1.0782,
        -8.8795, -4.5844, -2.0041, -5.0091], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2898, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4244, -3.5425, -3.0985, -6.5421, -5.7344, -2.6446, -4.7401, -3.2867,
        -3.8018, -3.1794, -6.9949, -1.2888, -6.4004, -2.7961, -4.4017, -5.4522,
        -7.1650, -2.7887, -8.0341, -3.6463], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4981, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.4781, -3.4544, -3.0856, -2.4021, -6.5193, -4.7833, -2.1398, -2.6844,
        -2.5390, -3.3448, -1.4488, -5.6940, -4.2396, -4.3012, -3.7165, -3.5746,
        -4.1604, -4.5888, -6.8761, -2.7879], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0909, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-15.1097,  -7.1616,  -7.0711,  -6.2706,  -2.3801,  -7.0544,  -2.0405,
         -8.2515,  -4.8416,  -2.0745,  -6.0061,  -6.6439,  -4.0768,  -5.0558,
         -4.6066, -11.5884,  -6.6578,  -7.8965,  -6.8501,  -2.3422],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1990, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7690, -2.9002, -4.8381, -2.8858, -8.9926, -6.5292, -6.1756, -8.0009,
        -2.9557, -8.2933, -1.3304, -5.2357, -3.2999, -3.7823, -5.0835, -6.1001,
        -2.0649, -5.3843, -2.0285, -3.2246], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7437, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.0679,  -6.1555,  -3.5475,  -4.2791, -10.1115,  -3.0073,  -5.6228,
         -6.6788,  -2.1241,  -6.1104,  -3.4913,  -3.9206,  -5.0956,  -7.3833,
         -6.6258,  -3.6340,  -3.1185,  -2.0748,  -3.6287,  -2.0598],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7369, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4925, -2.5643, -1.9005, -3.6693, -6.0936, -3.8788, -4.5474, -3.3486,
        -3.2963, -1.6148, -6.6708, -4.5761, -2.2053, -3.0592, -2.7655, -2.3822,
        -3.3282, -5.8972, -2.7418, -4.6271], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6830, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0233, -1.1351, -3.2074, -3.5005, -4.5007, -1.5797, -6.5626, -5.4907,
        -1.9429, -5.6322, -2.7099, -2.1865, -7.2632, -5.9580, -5.8251, -7.2083,
        -3.8006, -4.5641, -4.9472, -5.4704], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4254, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.8728, -3.2352, -4.2208, -5.2076, -5.8874, -5.9856, -3.0449, -2.9655,
        -3.0809, -2.6615, -4.3504, -5.4043, -2.4875, -9.3528, -1.9219, -3.8259,
        -5.6502, -6.2620, -2.6299, -5.2850], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5666, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0809, -2.6432, -6.7692, -4.7395, -6.8157, -4.6323, -5.9617, -3.4420,
        -2.9130, -3.9203, -6.2288, -1.1770, -9.7531, -4.7051, -4.4071, -3.0585,
        -5.1223, -5.4102, -2.7772, -3.5029], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5530, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.9117,  -1.0945,  -6.1689,  -3.2357,  -5.2347,  -2.3815,  -5.8837,
         -4.8088,  -1.3365,  -2.7064,  -3.0002,  -2.5477,  -5.3921,  -5.9631,
         -2.8943,  -4.7462,  -4.2187, -14.6328,  -6.2006,  -9.8926],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9625, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.6312,  -5.7003,  -5.2050,  -4.1464,  -4.3037, -12.6680,  -7.5650,
         -6.3564,  -7.9097,  -2.0618,  -7.3266,  -2.0120,  -7.1561,  -3.1334,
         -2.8349,  -3.5415,  -6.1878,  -2.5486,  -5.8758,  -3.4512],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3308, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0378, -7.9634, -6.4513, -2.7842, -4.8357, -3.3240, -5.8321, -8.6029,
        -7.0351, -8.0962, -4.4704, -7.9779, -4.2115, -6.0165, -4.5558, -4.2359,
        -3.7836, -6.4481, -2.9468, -6.2279], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.4919, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8184, -2.1526, -2.6600, -2.9503, -4.7295, -3.6319, -4.1332, -5.8689,
        -7.6827, -3.3710, -4.4472, -6.4119, -2.3764, -4.4415, -2.4158, -2.2831,
        -2.5837, -5.7882, -4.9325, -2.4373], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0058, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.4440,  -6.5908,  -5.8017,  -1.3986,  -1.9500,  -5.6723,  -7.2423,
        -12.3107,  -4.5081,  -7.3521,  -7.4002,  -8.1779,  -3.9571,  -9.6927,
         -3.7713,  -7.4215,  -5.3267,  -2.7522,  -5.0328,  -6.7028],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7253, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8989, -7.7685, -4.9413, -6.6994, -1.8440, -5.7869, -5.6707, -3.5210,
        -6.0431, -5.9381, -3.2181, -7.7044, -3.6482, -3.0364, -3.3362, -4.9941,
        -6.1527, -6.2448, -4.1657, -3.9968], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0805, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.0459,  -8.3391,  -7.4381,  -7.9620,  -4.2745,  -8.0315,  -0.9624,
         -6.2597,  -3.6304,  -2.1028,  -6.4135,  -6.1811,  -3.1166,  -5.5917,
         -5.0154, -14.5012,  -6.0931,  -8.9977,  -5.0979,  -9.4214],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4738, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4804, -5.5112, -1.5290, -7.2971, -5.1206, -3.4687, -4.0581, -4.3519,
        -2.7340, -3.1549, -7.2737, -2.2373, -6.5608, -3.7901, -3.3235, -1.0483,
        -6.4623, -5.1277, -1.6203, -4.6880], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1419, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3360, -3.0670, -6.8538, -1.7364, -3.7339, -3.2144, -3.1243, -3.6892,
        -6.4402, -5.0893, -1.7233, -4.7241, -3.5326, -2.2972, -5.6139, -6.2505,
        -2.5855, -6.1308, -2.3391, -1.9812], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8231, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7031, -2.8017, -2.2096, -2.0679, -6.3358, -1.1782, -5.4511, -3.0379,
        -2.2743, -1.8977, -6.5664, -1.7632, -5.8400, -2.2436, -2.4573, -2.6318,
        -5.7776, -4.9651, -3.1259, -3.9478], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6138, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4032, -3.3397, -6.2797, -4.9162, -2.3105, -5.1422, -2.3553, -4.2247,
        -3.1810, -7.0336, -5.0634, -2.0669, -3.5791, -2.3453, -2.1959, -4.3669,
        -6.0658, -1.7609, -5.9544, -3.9366], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0261, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1454,  -6.1035,  -6.4027,  -5.0959,  -4.9778,  -5.9227, -13.0167,
         -5.3952,  -6.8506,  -6.8290,  -9.6249,  -4.4172,  -7.5826,  -4.1474,
         -6.6883,  -4.9562,  -2.2353, -10.3619,  -6.2034,  -4.5590],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2258, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3737, -1.0764, -6.9866, -5.0112, -2.1533, -3.5135, -3.1009, -3.9687,
        -1.4024, -6.4122, -5.3185, -1.1641, -5.5335, -3.4358, -6.5317, -3.2618,
        -5.8308, -1.2393, -1.9385, -6.2243], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9239, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.7274,  -5.1398,  -2.6905,  -2.9639,  -2.7081,  -2.1657,  -2.3682,
         -6.4701,  -2.5562,  -8.4731,  -4.0092,  -1.9335, -10.3705,  -5.6040,
         -3.4379,  -2.9111,  -4.2755, -16.7420,  -4.8514,  -8.6647],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2031, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.9852,  -6.3473,  -2.5615,  -7.4635,  -2.1717,  -2.7705,  -5.7888,
         -6.0873,  -2.2968,  -3.0693,  -4.7720, -16.6741,  -4.1154,  -7.6031,
         -2.6820,  -6.7988,  -1.3311,  -4.3554,  -4.1072,  -7.6803],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1831, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.8729, -11.5121,  -7.4434,  -6.9739,  -7.4701,  -3.7071,  -7.3705,
         -1.0598, -10.8021,  -4.4218,  -2.8342,  -6.2240,  -6.9315,  -4.2276,
         -6.4054,  -4.5077,  -1.8446,  -4.9628,  -5.3485,  -1.8613],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4891, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.2359,  -3.1209,  -2.6587,  -8.6958,  -6.8570,  -2.3071,  -5.2573,
         -3.3907,  -3.1110,  -1.3504,  -6.5722,  -5.5852,  -0.9783,  -5.1191,
         -4.6819, -15.2437,  -6.3549,  -4.6826,  -7.9708,  -7.9634],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4069, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1925,  -4.6664,  -2.0347,  -1.9289,  -3.0672,  -6.3613,  -2.9040,
         -3.8684,  -4.8087,  -1.5635, -10.1187,  -6.4701,  -6.5775,  -7.5396,
         -1.4532,  -6.1338,  -5.6218,  -6.3884,  -6.4248,  -4.6947],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7409, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9744, -3.2049, -3.7739, -3.6092, -4.0712, -6.5008, -6.2447, -3.3962,
        -3.8377, -2.0663, -3.6730, -4.5720, -5.8473, -1.0147, -3.1887, -3.7955,
        -4.0317, -1.8627, -6.3768, -4.8103], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8926, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9735, -6.6207, -4.5780, -1.7872, -3.2913, -3.0002, -3.2575, -2.9283,
        -5.7270, -5.3251, -3.1272, -3.2911, -3.7729, -9.9665, -7.6074, -5.2664,
        -8.0436, -8.0151, -6.7898, -3.1031], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8736, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3099, -6.0390, -5.5570, -2.5993, -5.6807, -1.8982, -3.6344, -2.8311,
        -6.1320, -4.4543, -1.7404, -3.4235, -3.5088, -1.8358, -5.3419, -6.2024,
        -1.0595, -9.4867, -2.5012, -4.5111], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0874, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.7788,  -5.4091,  -1.7658,  -9.1502,  -3.8973, -13.3202,  -7.4668,
         -8.6522,  -7.3266,  -3.4830,  -6.7927,  -1.2756,  -6.9573,  -3.9898,
         -5.0488,  -3.6100,  -5.4951,  -4.2848,  -4.9500,  -7.5482],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8101, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4569, -4.1427, -3.1764, -3.9054, -4.8452, -6.9068, -4.1279, -6.1654,
        -2.4483, -2.0966, -6.1141, -6.0894, -1.1390, -8.6803, -3.5199, -1.4191,
        -4.3181, -6.4039, -1.9016, -4.3079], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2582, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3096, -2.5262, -6.4312, -3.6256, -5.0278, -4.9406, -7.5659, -3.4407,
        -3.7226, -3.2251, -3.5706, -5.6520, -6.5772, -3.3127, -6.8659, -4.1267,
        -4.7977, -6.5178, -5.8709, -2.4584], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8782, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.6252,  -6.5977,  -6.9224,  -2.2985,  -5.5290,  -4.1863, -21.6153,
         -6.5785,  -9.5887,  -8.2532,  -4.0299,  -7.3352,  -2.4557,  -6.9894,
         -4.0289,  -2.3906,  -6.8409,  -6.2521,  -2.3759,  -3.9826],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0438, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3025,  -6.3040,  -5.9238,  -3.3957,  -9.0040,  -4.3646,  -0.8613,
         -4.0487,  -5.7933,  -2.2080,  -3.5012,  -2.7405,  -2.9225,  -6.5376,
         -6.2709,  -2.3737, -11.5897,  -2.7950,  -2.5137,  -5.7656],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6108, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-20.2706,  -7.1996,  -7.3088,  -4.8603,  -6.5183,  -1.8888,  -4.8026,
         -3.5863,  -3.2971,  -7.8608,  -6.6802,  -1.6580, -10.1791,  -4.1041,
         -1.8570,  -3.9304,  -5.2552,  -1.4171,  -5.6776,  -2.1935],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5273, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.4422,  -6.2187,  -3.3813,  -3.6886,  -7.3785,  -6.7679,  -2.0454,
         -4.4556,  -2.5812,  -2.1350,  -5.8225,  -6.4025,  -2.2541, -11.8279,
         -2.8086,  -1.9796,  -3.0556,  -6.2904,  -6.9677,  -3.6976],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5600, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5791, -1.1938, -3.9600, -3.2148, -3.5279, -4.6973, -6.7864, -2.0987,
        -6.3175, -2.6929, -2.5804, -4.0538, -6.6130, -5.7811, -2.8882, -2.8766,
        -3.5657, -5.1421, -2.4278, -6.4989], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1248, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3865, -4.9531, -3.2998, -2.1170, -3.3128, -7.1018, -6.0905, -3.5721,
        -2.9999, -2.4714, -1.7580, -2.8101, -6.0936, -2.3823, -4.5748, -3.2055,
        -2.8264, -4.2141, -6.4929, -2.8686], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7266, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9073, -3.3884, -6.4511, -6.2402, -4.3505, -2.8155, -3.1066, -9.8268,
        -5.4341, -7.2105, -6.9096, -7.7379, -7.1873, -9.4079, -3.1650, -7.9977,
        -5.3530, -1.7233, -1.8421, -6.9567], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5006, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2422, -2.8624, -3.2535, -5.4765, -5.9922, -1.3858, -8.3773, -4.1957,
        -1.5206, -1.9533, -5.7007, -5.2618, -1.4359, -3.4692, -2.2925, -3.3931,
        -0.9804, -6.4863, -5.0132, -1.9006], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9097, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3777, -6.1800, -0.6843, -8.8157, -2.7631, -2.0700, -4.3797, -5.6934,
        -1.2278, -3.8279, -3.1542, -2.4243, -3.6409, -5.6803, -1.0761, -5.0734,
        -6.0039, -2.3397, -2.5492, -6.2826], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8622, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0792, -6.4993, -6.4555, -3.0136, -2.3827, -2.9636, -3.5712, -1.8917,
        -7.1056, -2.9384, -3.3785, -4.4948, -3.0821, -2.8996, -6.9339, -2.1496,
        -2.9772, -3.2614, -2.7745, -3.3365], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8095, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6844, -4.8168, -6.0367, -5.3433, -2.0655, -2.8206, -2.4670, -2.2471,
        -4.2761, -5.3582, -1.8174, -5.0009, -2.4522, -1.6134, -5.2292, -5.5981,
        -2.7393, -1.7562, -3.4820, -3.5732], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6689, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2171, -2.5447, -2.5218, -5.8890, -4.6244, -5.5703, -4.3353, -2.9919,
        -3.9381, -4.6934, -4.6215, -5.2256, -1.2083, -4.2156, -1.6645, -1.7803,
        -3.4025, -5.7008, -1.4447, -3.9263], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7758, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.6058,  -2.6768,  -5.6608,  -6.4210,  -1.8188,  -5.7147,  -2.9115,
         -4.0675,  -6.6497,  -7.0209,  -3.3076, -10.0083,  -3.6767,  -4.1288,
         -2.9536,  -4.2569,  -3.4944,  -3.6342,  -5.7268,  -9.8862],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8810, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8714, -2.9552, -3.5074, -2.8309, -7.2530, -5.8509, -3.5559, -3.2213,
        -3.6259, -2.4799, -5.4749, -6.4336, -1.3567, -6.2063, -3.0874, -4.3172,
        -0.1617, -6.5750, -5.0199, -1.2941], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8539, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3418, -7.3504, -3.7373, -1.2747, -5.9218, -5.9814, -2.3745, -3.9123,
        -3.1573, -2.5431, -2.8197, -6.9148, -1.9546, -5.6947, -3.7918, -3.7412,
        -0.9734, -6.6129, -4.8460, -1.3419], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8143, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3641, -4.3089, -5.6750, -1.8352, -7.4004, -2.8906, -1.9570, -5.3280,
        -5.0771, -1.6726, -1.9994, -2.7843, -3.1829, -1.4420, -6.3814, -4.0873,
        -6.7214, -5.9508, -4.9801, -3.4848], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9762, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.9290,  -1.9515, -13.5948,  -3.8780,  -2.7592,  -7.1935,  -6.1595,
         -2.5612,  -5.0663,  -5.4837,  -8.8946,  -6.1231,  -7.5395,  -7.7794,
         -6.0515,  -6.5156,  -2.2456, -13.5156,  -4.9266,  -1.7576],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9963, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2028, -3.4166, -3.5338, -3.9975, -5.1123, -5.3345, -2.2106, -3.7714,
        -2.7843, -2.2223, -4.7139, -6.7796, -4.3580, -2.2526, -3.8398, -2.4589,
        -2.7551, -7.0598, -5.9435, -1.1947], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8971, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.8864, -6.6419, -7.0596, -4.1316, -8.1158, -0.8656, -5.7654, -3.8291,
        -2.0695, -1.6758, -5.7295, -5.2791, -1.9668, -4.5706, -2.2362, -4.0361,
        -5.0673, -5.4162, -0.0643, -5.2464], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3827, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.7818, -3.1304, -1.6132, -4.4191, -5.9194, -1.7111, -4.5026, -3.2200,
        -1.5871, -3.3150, -6.1451, -1.7689, -4.6471, -2.5449, -4.9708, -0.0469,
        -6.3602, -4.7541, -0.9442, -2.8468], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7114, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.8463,  -2.8942,  -4.5668, -15.4938,  -5.0094,  -6.4134,  -6.0884,
         -7.9330,  -4.2377,  -6.4555,  -1.1206,  -4.9907,  -2.7470,  -5.6216,
         -3.9312,  -7.6077,  -5.1771,  -1.8074,  -3.3704,  -4.3083],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1310, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.5806,  -1.6497, -11.1745,  -5.0161,  -1.8122,  -4.4148,  -6.0620,
         -2.0999,  -5.6519,  -3.0114,  -1.5243,  -5.6600,  -5.5449,  -5.8854,
         -4.9152,  -2.5391,  -3.0214,  -3.8286,  -6.2688,  -2.8759],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4768, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.6718,  -2.3501,  -9.1098,  -5.9298,  -3.7254,  -5.4843,  -3.7813,
        -12.9647,  -4.8359,  -7.7061,  -5.5022,  -8.0173,  -1.2008, -11.1418,
         -1.8763,  -8.2120,  -4.9211,  -2.9789,  -1.8953,  -5.1779],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5741, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.2231,  -5.5975,  -2.2963,  -2.7589,  -3.8062,  -2.0373,  -6.3563,
         -6.3518,  -1.1569,  -3.9387,  -3.3012, -23.7827,  -7.1315,  -9.6848,
         -7.6495,  -4.0365,  -6.6202,  -0.3946,  -4.6964,  -3.8592],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5840, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9079, -7.9276, -3.0087, -8.3228, -1.1686, -9.6004, -3.3920, -4.5714,
        -3.0405, -6.5673, -4.6194, -1.0995, -6.3928, -2.6498, -4.8863, -6.2892,
        -6.3452, -1.3367, -4.6736, -3.7914], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7796, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1599,  -4.6559,  -2.2876,  -5.8068,  -5.3429,  -5.8221,  -5.8872,
         -3.8408,  -3.9263,  -4.7758,  -6.1063,  -6.3659,  -3.2748, -12.0393,
         -2.6707,  -4.4392,  -2.4870,  -6.4731,  -1.2550,  -8.0212],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9319, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6759, -3.6786, -3.1165, -3.7761, -5.3731, -2.5882, -5.8518, -3.2711,
        -1.9329, -7.6012, -5.0972, -1.1264, -4.0678, -3.5352, -3.8536, -6.2518,
        -6.2739, -2.4178, -7.3164, -1.9107], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2858, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4978, -4.5506, -3.4072, -7.6042, -5.3567, -4.8027, -6.4301, -6.1034,
        -6.9657, -2.5192, -5.7770, -5.9623, -3.1050, -7.8413, -6.3961, -2.9159,
        -2.7509, -5.6011, -5.7800, -4.3149], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2341, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.6859, -3.6428, -5.3155, -1.4563, -6.5292, -5.3548, -0.8801, -4.7811,
        -4.0657, -4.2643, -2.8559, -6.5896, -5.3704, -3.1093, -6.2681, -2.4997,
        -4.3124, -2.4357, -6.5696, -4.3683], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4677, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0385, -6.2008, -4.0913, -4.2395, -8.6404, -7.1104, -5.1866, -8.8775,
        -6.0311, -7.4420, -2.4759, -7.9628, -5.5719, -4.0621, -4.2155, -8.6082,
        -4.9103, -4.2254, -5.7218, -8.0081], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9810, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.5221, -10.6672,  -3.3767,  -3.1763,  -6.5914,  -6.2873,  -1.7446,
         -4.6477,  -5.3098, -21.6601,  -7.0265, -10.0860,  -5.3333,  -6.6590,
         -2.0445,  -9.9115,  -2.4445,  -3.4820,  -5.6014,  -2.4593],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0016, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2296, -5.8728, -3.6983, -2.5305, -2.8360, -5.7955, -4.4148, -1.6684,
        -3.6172, -2.0358, -4.6113, -2.5605, -6.4716, -4.4720, -1.9569, -5.9509,
        -2.5309, -2.0800, -5.4733, -5.8723], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8339, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.4327, -3.2278, -7.0135, -1.1877, -7.0586, -4.5413, -2.7351, -3.8485,
        -6.3226, -5.5704, -3.3232, -4.7623, -3.0266, -2.2911, -4.9740, -5.1304,
        -2.4611, -2.8401, -3.2452, -3.0335], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2513, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7610, -7.0173, -6.9770, -2.0085, -5.9643, -5.1762, -1.4331, -7.6508,
        -3.8962, -3.3677, -5.5081, -6.1051, -6.9094, -2.9924, -4.0257, -2.5824,
        -8.1067, -5.2900, -4.8102, -2.6745], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9128, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.6635, -4.3890, -2.8578, -2.0924, -1.4335, -6.7602, -1.2583, -3.9524,
        -3.2685, -3.9796, -4.1807, -5.1946, -6.1440, -2.3576, -7.3836, -2.9959,
        -2.3785, -6.8770, -5.8981, -1.6005], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7833, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.7820,  -8.4528,  -7.4980,  -2.1490,  -2.2181,  -6.0141,  -9.2715,
        -17.9211,  -5.1973,  -6.5325,  -7.8321,  -7.0570,  -8.0836,  -3.5581,
         -7.7423,  -0.8714,  -9.0807,  -2.7063,  -2.9449,  -7.3102],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2112, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.6135,  -3.0901,  -4.7370,  -6.2684,  -6.6577,  -2.5199,  -3.7485,
         -3.7142,  -1.8417,  -6.6085,  -6.5699,  -5.0434,  -8.1293,  -9.9232,
        -15.0506,  -5.4906,  -7.4560,  -7.7613,  -7.6866,  -3.3233],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0117, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5963, -2.6036, -3.0696, -2.7898, -3.2863, -2.3506, -6.7944, -5.0583,
        -1.7322, -5.1950, -2.1707, -3.3973, -2.0767, -6.9517, -1.3801, -4.6136,
        -3.2598, -2.4636, -2.1234, -6.1899], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6551, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8001, -6.7482, -2.3041, -3.7281, -4.1356, -2.5725, -1.5906, -6.7596,
        -4.3384, -1.7815, -2.3135, -3.3709, -2.6694, -6.1773, -6.1495, -4.2627,
        -5.1791, -4.1781, -2.8276, -5.3752], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1131, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0046, -3.2466, -6.2616, -6.1029, -1.3864, -8.1866, -2.1748, -2.0085,
        -6.1484, -5.0733, -1.6516, -2.9583, -2.7504, -2.4139, -4.4349, -5.8671,
        -2.5996, -2.5044, -2.8498, -1.5136], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8569, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.2821,  -6.0178,  -3.0762, -10.2436,  -3.9751,  -4.3499,  -2.9190,
         -6.2824,  -5.8717,  -2.4133,  -3.4596,  -2.5767,  -3.3970,  -6.1301,
         -6.0558,  -2.9585,  -4.9939,  -2.9880,  -2.6105,  -3.6398],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4621, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8636, -4.8424, -1.5447, -4.0163, -2.2024, -5.3421, -4.1303, -3.6618,
        -3.7742, -2.3080, -2.9990, -1.8175, -7.0604, -4.7705, -2.1240, -2.1894,
        -3.4094, -2.9415, -4.5790, -6.0806], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6329, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0529, -3.6170, -3.9344, -6.7953, -1.4243, -6.9681, -3.7064, -2.7657,
        -4.0097, -6.4902, -4.8821, -1.1643, -3.6487, -2.8705, -2.7547, -7.6351,
        -5.4439, -2.3047, -7.1966, -2.5831], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2124, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3804, -2.7932, -3.8773, -6.1303, -1.1805, -7.7107, -3.8662, -2.6542,
        -3.1568, -6.5449, -4.2069, -1.9051, -4.7546, -3.3107, -1.5295, -5.9736,
        -5.6677, -3.0601, -2.8798, -4.2026], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9393, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1867,  -3.8973,  -1.6546,  -2.1638,  -2.1795,  -5.8698,  -1.9991,
         -2.6070,  -4.9010,  -3.6405,  -3.7731,  -6.0120,  -6.5265,  -3.1441,
        -12.4245,  -3.2555,  -1.7532,  -2.7027,  -6.2198,  -1.8510],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9881, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2051, -6.4296, -2.0906, -6.7654, -7.5254, -7.4667, -4.9766, -4.0505,
        -8.1216, -2.6802, -5.0200, -3.8915, -6.0763, -7.1418, -6.8298, -1.9590,
        -3.4770, -2.9393, -0.7260, -4.0189], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8696, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1628, -4.6551, -5.6206, -2.2200, -3.5259, -3.1646, -2.1057, -4.9834,
        -5.5417, -0.6571, -5.6169, -3.0737, -2.3365, -3.2180, -5.2770, -2.7331,
        -5.3557, -3.4981, -3.9997, -0.8927], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5819, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-12.9507,  -7.2376, -10.1665,  -4.2332,  -5.5593,  -1.5034,  -5.2292,
         -2.8731,  -3.4013,  -4.4722,  -5.8759,  -0.3501,  -7.9237,  -3.4404,
         -3.2638,  -2.2565,  -6.0876,  -2.0693,  -4.5895,  -4.0200],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8752, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.9696,  -4.1852,  -6.9446,  -6.5694,  -3.4971,  -1.7893,  -4.1860,
         -3.8732,  -4.2085,  -5.7238,  -2.8504,  -4.4517,  -2.8562, -14.5246,
         -4.6062,  -7.8437,  -2.8391,  -7.4257,  -1.6218, -16.0653],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4016, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7404, -7.3058, -3.2668, -3.0920, -6.4215, -3.5685, -2.6814, -6.5065,
        -5.8497, -2.0140, -4.7485, -2.5414, -1.6123, -5.5600, -5.8384, -1.5942,
        -4.6235, -3.8069, -4.2959, -2.6662], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0367, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3465, -2.9797, -6.2471, -5.3282, -2.5770, -3.8841, -2.1258, -1.4596,
        -4.3187, -4.8836, -2.1574, -4.3756, -3.1616, -6.4305, -1.6049, -5.5977,
        -5.1623, -0.5593, -4.1405, -2.0363], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6188, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3892, -7.2125, -2.6384, -6.1126, -3.2320, -3.5065, -4.8388, -6.4129,
        -1.6638, -4.0312, -2.3160, -3.5785, -4.2035, -6.6038, -4.7709, -1.9895,
        -3.0766, -2.9403, -4.4514, -1.7118], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9340, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9449, -5.0407, -5.4674, -6.9091, -3.8655, -9.4236, -2.1207, -3.0101,
        -3.5705, -5.7039, -3.8019, -5.9525, -1.7729, -2.2036, -4.9590, -5.8055,
        -2.3540, -3.8566, -3.2476, -2.7537], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1882, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3097, -2.2077, -4.5863, -2.0611, -2.5342, -3.5185, -5.5672, -1.4433,
        -2.4269, -2.6521, -1.4900, -5.0439, -5.9460, -3.3143, -3.4376, -3.9357,
        -4.6496, -5.1264, -5.3637, -6.9343], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9274, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1151, -2.5956, -6.6300, -4.7034, -6.6902, -4.5898, -6.2174, -3.3713,
        -2.9407, -3.6911, -6.0422, -0.8234, -9.8223, -4.6912, -4.3454, -2.9894,
        -5.1079, -5.2828, -2.6250, -3.5576], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4916, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.2856, -1.1908, -8.6846, -4.5605, -2.7883, -2.3672, -5.8047, -5.5532,
        -1.4913, -4.5793, -2.4336, -2.9503, -4.4727, -5.7653, -1.0679, -4.2861,
        -1.9491, -1.9131, -3.2131, -5.7964], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9576, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.7253,  -1.9730,  -3.7723,  -6.4732,  -1.9631,  -3.1988,  -3.9735,
         -5.1004,  -3.8602,  -5.0501,  -4.8681,  -1.4526,  -4.6885,  -3.2023,
         -2.8795,  -1.2535,  -7.3772,  -2.1512, -14.4025,  -2.3683],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0867, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8478, -1.8529, -2.6333, -5.7268, -5.6465, -1.1907, -3.9349, -2.6551,
        -2.2536, -1.8422, -6.1285, -4.9012, -1.6396, -4.5169, -2.3008, -1.5673,
        -4.2368, -5.3921, -2.7232, -5.8675], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6429, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.6564,  -6.4864,  -3.6535,  -2.2381,  -5.2400,  -6.6488,  -2.4876,
         -8.2818,  -3.9989,  -2.6543,  -6.8057,  -5.8380,  -3.4074,  -5.0666,
         -5.6941, -15.0094,  -8.4705,  -7.0000,  -6.4766,  -2.6856],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4400, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.3707,  -1.1593,  -5.4622,  -3.2484,  -4.2372,  -4.9749,  -7.8513,
         -6.0086,  -2.9691,  -4.1913,  -2.1898,  -2.9526,  -3.8705,  -6.1338,
         -2.2861, -10.4643,  -2.6327,  -1.1489,  -6.7791,  -6.0708],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5501, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0272, -2.0582, -4.9798, -6.9413, -4.8048, -2.6226, -2.4126, -3.2129,
        -1.7258, -5.4232, -5.1918, -1.7564, -3.9611, -2.9104, -3.0690, -2.6252,
        -6.7500, -5.4550, -1.7856, -8.1186], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9416, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.1864,  -5.0859, -22.2558,  -6.4801,  -6.5850,  -5.8369,  -6.0481,
         -5.1382,  -0.9620,  -5.7617,  -3.5980,  -1.7888,  -6.4604,  -6.3658,
         -1.5060,  -2.6350,  -3.5223,  -3.7730,  -2.3573,  -5.9583],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4153, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1455,  -7.5609,  -6.2025,  -1.0819,  -8.4901,  -4.9385, -22.5298,
         -5.2030,  -5.8506,  -7.4266,  -6.6745,  -7.9461,  -3.1019,  -8.9102,
         -1.1412,  -7.3526,  -4.1937,  -2.5057,  -2.5594,  -6.6257],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1720, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.8190,  -6.2285,  -3.1694,  -3.8530,  -1.4163,  -6.5549,  -4.8783,
         -0.9836,  -2.8768,  -3.5403,  -2.1097,  -4.4842,  -6.0699,  -2.2424,
         -5.6978,  -2.2561, -21.5165,  -4.8916,  -8.9688,  -3.3583],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9458, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.2266,  -4.2431,  -6.8297,  -5.4220,  -3.4802,  -6.9495,  -2.4064,
         -0.8515,  -5.5531,  -6.4242,  -5.1464,  -4.2902,  -3.0433, -18.7287,
         -3.2914,  -8.0769,  -3.2938,  -6.5866,  -0.9169,  -9.3321],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3046, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5165, -2.4605, -4.8456, -6.2911, -1.2996, -3.3118, -2.5795, -2.0401,
        -2.5685, -6.9293, -1.1627, -4.3357, -4.3906, -2.5093, -1.6456, -5.4431,
        -4.3448, -1.5246, -5.1062, -3.1109], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4208, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.2448,  -6.3444,  -1.4192,  -2.0106,  -3.7843,  -6.0630,  -1.4380,
         -2.5998,  -3.6426, -10.6896,  -5.0635,  -5.6991,  -8.2409,  -4.0979,
         -6.9502,  -2.0512,  -4.2231,  -3.6695,  -4.8416,  -3.8137],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4443, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.6813,  -0.8669,  -7.2945,  -3.5820,  -2.2679,  -3.3527,  -6.6676,
         -2.0814,  -5.2291,  -3.5740, -11.2212,  -7.6203,  -6.4849,  -8.2204,
         -5.6616,  -7.4499,  -2.5303, -16.5138,  -5.1045, -16.0540],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3229, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3376, -6.6948, -1.8713, -5.4968, -3.4688, -3.5661, -0.9472, -5.8212,
        -5.0233, -1.0704, -3.7808, -3.2835, -2.8955, -2.1135, -5.5055, -4.5000,
        -1.2922, -3.1261, -2.1239, -2.0775], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3498, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.6375,  -1.3299,  -3.3911,  -2.8670,  -2.2740,  -2.7460,  -5.9864,
         -1.5074, -12.3703,  -4.1052,  -3.6738,  -2.2758,  -6.3226,  -4.7110,
         -2.1755,  -5.2712,  -3.2671,  -3.1941,  -2.5514,  -6.2894],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0473, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-14.7903,  -4.3196,  -6.3800,  -6.3081,  -7.6712,  -8.7779,  -3.6303,
         -7.8154,  -2.9510,  -6.6500,  -4.9465,  -2.3440,  -8.3847,  -6.6036,
         -5.4658,  -5.9021,  -3.3761, -12.5386,  -7.4977,  -7.8969],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.7125, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2906, -4.2450, -3.0344, -1.2409, -5.9276, -5.3978, -2.9569, -4.0310,
        -4.5918, -3.6299, -3.3424, -6.0911, -5.4423, -1.7010, -6.5126, -2.9300,
        -3.7159, -6.6714, -6.3903, -2.6001], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3871, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.0703,  -3.5358,  -2.6969,  -2.5889,  -3.1524,  -5.8254,  -6.0360,
         -2.5201,  -6.1351,  -3.1672, -13.4393,  -6.9417,  -8.7788,  -6.7997,
         -3.5036,  -6.7516,  -0.4895,  -7.0077,  -2.7365,  -3.1296],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0653, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.4734,  -4.5691,  -2.1740,  -4.3899,  -2.2466, -21.3242,  -4.9646,
         -9.0039,  -5.4168,  -7.0692,  -2.2593,  -6.8171,  -7.1664,  -8.3064,
         -2.8479,  -4.2695,  -6.3325,  -5.1847,  -2.2553,  -4.0250],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8548, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1927, -2.8684, -4.8216, -5.0979, -0.9232, -4.1526, -2.3514, -1.7547,
        -3.4601, -6.2490, -1.8432, -6.1498, -3.6973, -1.7767, -6.8432, -6.0115,
        -1.1999, -4.8057, -3.7014, -8.0454], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9473, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.5250,  -1.9016,  -5.3634,  -4.3737, -17.7383,  -7.5857,  -8.6105,
         -4.2758,  -6.3067,  -0.9553,  -4.9608,  -2.7933,  -3.4397,  -4.0915,
         -6.2651,  -0.9455,  -3.7414,  -3.3075,  -6.4161,  -6.3248],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2961, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.5846,  -3.1899,  -8.1253,  -6.5974,  -7.9531,  -8.1166,  -2.2978,
         -8.0207,  -1.1881,  -5.2955,  -3.4019,  -5.1260,  -2.5180,  -6.3861,
         -5.2818,  -1.0494,  -5.0855,  -3.5364, -15.5514,  -5.2586],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4782, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5355, -10.7894, -13.4358,  -7.9865,  -3.1890,  -6.9374,  -2.8222,
         -6.9057,  -4.0189,  -3.9031,  -5.2881,  -6.4276,  -2.2396,  -7.9276,
         -2.5499, -12.8630,  -4.2538,  -5.9465,  -8.1691,  -6.2482],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2218, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.1716,  -6.5903,  -3.6254,  -1.1529,  -4.9318,  -6.2258,  -1.8668,
         -5.8108,  -2.6279,  -7.5850,  -6.0544,  -9.0668,  -8.3619,  -5.6773,
         -6.8502,  -1.8558, -11.4036,  -4.7355,  -3.6367,  -0.9516],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0091, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8302, -2.6106, -4.2781, -6.1584, -2.0918, -6.1330, -2.6658, -3.4777,
        -4.8116, -6.4145, -4.7555, -1.5763, -2.9405, -2.6703, -2.7378, -6.9665,
        -5.7472, -1.8185, -5.8965, -4.9717], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0776, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-15.9997,  -7.7972,  -9.2357,  -6.4228,  -2.8780,  -6.1218,  -5.7926,
         -2.0416,  -4.9791,  -3.1048,  -4.2307,  -1.5814,  -6.1489,  -5.1441,
         -1.8538,  -4.1819,  -2.1102,  -1.6520,  -2.5082,  -7.1284],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0457, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6988, -1.8341, -4.3188, -5.2510, -3.4860, -4.8511, -3.0588, -2.0461,
        -6.3848, -5.5568, -2.3644, -3.4500, -3.2124, -3.0541, -4.7204, -6.1595,
        -3.1260, -8.4722, -2.4139, -3.9041], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0182, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2355, -6.0628, -3.2866, -2.2328, -3.2438, -5.9812, -4.5758, -2.4208,
        -2.6513, -2.4525, -1.6388, -6.2301, -5.5852, -1.5111, -9.3910, -3.4646,
        -2.8283, -3.7264, -5.9388, -1.4663], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7962, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7832, -1.5333, -6.0077, -3.6261, -3.4334, -2.4668, -6.7779, -5.4298,
        -2.3590, -4.2930, -2.9767, -2.3977, -3.8064, -5.7193, -1.8740, -3.0001,
        -2.7652, -2.1571, -4.6427, -5.6399], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8845, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4723, -2.0403, -5.2315, -2.8994, -2.8164, -3.9676, -7.2810, -5.9038,
        -2.6090, -3.7742, -2.0007, -2.3259, -3.1567, -5.4442, -1.4722, -2.3252,
        -4.3201, -3.2001, -3.1919, -6.0828], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8258, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2720, -4.6822, -1.5007, -1.7075, -3.2775, -4.3819, -2.3040, -5.0444,
        -5.2124, -1.0473, -5.7040, -3.0818, -3.4424, -4.7498, -6.6830, -3.2989,
        -4.1261, -3.7144, -3.2508, -5.5443], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9513, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.9866,  -4.4722,  -3.7948,  -6.5006,  -5.5444,  -2.9293,  -1.9842,
         -2.3004,  -2.8445,  -5.4848,  -5.3721,  -1.5065,  -5.9403,  -3.3992,
        -16.2331,  -6.1704,  -8.1718,  -3.1996,  -6.9585,  -0.4909],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8642, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.8181,  -7.8542,  -4.3717,  -2.7064,  -6.6655,  -6.4696,  -2.8018,
         -8.9318,  -3.5424, -17.9001,  -6.5797,  -7.5797,  -7.6964,  -2.1055,
         -5.1117,  -2.9377,  -3.6776,  -6.3536,  -3.4818,  -3.7111],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6148, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6749, -5.5697, -0.8893, -5.9664, -2.5705, -4.2073, -1.7913, -6.2528,
        -5.4960, -2.3268, -4.6285, -3.3000, -3.7751, -1.0333, -6.4543, -4.8205,
        -1.1759, -4.1833, -2.8866, -2.7982], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8400, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.0425,  -2.9376, -16.6441,  -4.8396,  -8.1010,  -3.1042,  -7.0851,
         -0.8179,  -7.9263,  -3.9649,  -2.4179,  -3.3269,  -6.8139,  -1.5463,
         -3.6919,  -4.4055,  -3.2499,  -4.7411,  -5.9513,  -4.8638],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9736, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8687, -5.1670, -1.7878, -3.7678, -2.5036, -2.0942, -2.0544, -6.7045,
        -1.6441, -5.7578, -2.6934, -3.6521, -1.8665, -6.1063, -5.0873, -1.1664,
        -4.1789, -2.7421, -2.1699, -3.7009], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5357, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.1734,  -1.8360,  -4.7908,  -4.7140,  -3.0654,  -5.6598,  -8.0049,
         -3.2004,  -6.4594,  -4.0959, -10.7958,  -5.3689,  -6.8905,  -8.0341,
         -3.6609,  -7.7886,  -1.6018,  -8.2286,  -4.8783,  -4.2259],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5737, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.8830,  -6.0514,  -4.4466,  -1.4493,  -2.1392,  -3.6704,  -1.9949,
         -4.0965,  -6.0304,  -0.6516,  -3.7280,  -2.9514,  -2.9751,  -4.9029,
         -5.8320,  -1.8546,  -5.0535,  -2.0933, -11.0269,  -7.3836],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0607, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4384e+00, -6.0921e+00, -1.1370e+00, -3.9452e+00, -1.9597e+00,
        -1.8019e+00, -4.8391e+00, -6.3705e+00, -5.5956e+00, -2.0109e+00,
        -4.5473e+00, -2.0701e+00, -1.4779e+00, -3.6364e+00, -5.5542e+00,
        -1.6657e+00, -2.9228e+00, -2.7733e+00, -4.9032e+00,  4.4457e-03],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3868, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3907, -5.0209, -5.2115, -5.8913, -4.5620, -9.0470, -8.0725, -4.9848,
        -6.4138, -3.2695, -3.4680, -5.2628, -5.8822, -2.6187, -7.3517, -2.6466,
        -5.4012, -6.7174, -6.2274, -5.5164], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.4978, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8382, -3.1290, -6.6494, -6.0852, -2.2919, -6.1111, -3.2541, -2.5238,
        -2.2667, -6.5986, -1.4537, -2.6959, -4.5465, -3.9175, -2.7617, -5.5572,
        -5.9729, -2.7352, -7.7911, -2.2165], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0698, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.5406,  -1.2759,  -8.6592,  -4.8075,  -2.3055,  -7.1904,  -6.6970,
         -6.0731, -12.3027,  -4.1735, -22.0993,  -7.8759,  -6.6359,  -5.8728,
         -2.0884,  -5.8693,  -4.7130,  -2.8011,  -4.2856,  -6.3974],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5332, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.5909, -18.0858,  -8.1729,  -7.3446,  -7.4763,  -3.8659,  -6.8587,
         -0.5719,  -6.8382,  -3.1752,  -2.0285,  -4.9793,  -6.3058,  -1.7413,
         -4.2390,  -5.8095,  -5.0127,  -4.3569,  -6.0729,  -5.9535],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6740, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4535, -4.2200, -4.0668, -2.2509, -6.2382, -4.5814, -1.4692, -6.9224,
        -4.7992, -3.2434, -3.5373, -6.0664, -6.8665, -3.9453, -9.6527, -3.1449,
        -3.0604, -2.6717, -5.0443, -2.2766], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3756, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0187, -2.1716, -2.1028, -6.9365, -5.1677, -1.8866, -5.3626, -1.9072,
        -2.1563, -2.2837, -6.4253, -4.8698, -2.0359, -3.5913, -2.7276, -2.8175,
        -4.6037, -5.4505, -3.1699, -9.0660], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0376, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.0179,  -4.1880,  -3.9039,  -2.4867,  -6.7201,  -1.2188, -12.3466,
         -3.7524,  -1.8915,  -3.1054,  -6.5138,  -1.4504,  -6.1253,  -3.1402,
         -2.9697,  -3.6469,  -6.5787,  -2.3441,  -3.8138,  -4.2391],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2727, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1048, -5.4004, -1.5279, -2.3554, -3.6852, -4.8440, -5.4401, -6.5621,
        -2.6174, -3.1837, -3.0178, -3.0411, -5.8480, -5.2383, -1.9643, -3.3668,
        -3.3509, -2.9016, -1.7953, -6.5335], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8389, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.8931,  -4.8977, -14.8513,  -9.2917,  -8.2715,  -8.2092,  -5.0281,
         -6.0799,  -2.4336,  -5.6598,  -5.9557,  -1.4452,  -3.2520,  -5.5713,
         -2.4501,  -3.3622,  -4.2106,  -3.4022,  -6.9794,  -6.3562],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6300, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1721, -3.1913, -4.7035, -0.8234, -5.4570, -2.9140, -1.9820, -3.1341,
        -5.8433, -5.1879, -1.9585, -4.1171, -1.7925, -1.6725, -3.0728, -5.7227,
        -1.5688, -3.4040, -2.6739, -2.2702], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2331, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.7474,  -5.9809,  -6.0589,  -6.7698,  -2.8005,  -3.3956,  -3.5875,
         -3.0776,  -6.3033,  -6.9987,  -2.9482,  -5.1886,  -3.9035, -10.8592,
         -8.4916,  -4.9331,  -7.6553,  -3.8858,  -3.4123,  -4.2079],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2103, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.5522,  -0.8980,  -3.3507,  -3.1990,  -3.9012,  -4.0466,  -6.1938,
         -5.1357,  -1.7368,  -5.0779,  -3.7205, -20.2002,  -4.8395,  -6.8918,
         -3.2685,  -6.7766,  -1.1284,  -8.2546,  -3.9683,  -3.5605],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0350, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.6410, -4.7469, -3.6017, -3.7948, -3.7299, -6.2673, -1.7949, -3.5719,
        -2.5628, -3.5652, -1.1763, -5.9152, -4.4769, -1.6154, -4.4569, -1.7700,
        -2.6951, -3.3661, -4.8309, -1.3928], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6986, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7028, -1.4953, -3.8528, -6.4961, -2.1658, -9.9078, -4.9437, -3.4773,
        -7.2514, -5.3957, -1.3369, -2.6928, -3.3529, -2.6745, -2.1928, -5.5584,
        -4.5460, -1.9762, -2.5078, -2.6289], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8578, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.9124,  -4.6648,  -5.0996,  -2.0226,  -5.7992,  -5.9004,  -2.6368,
         -3.6571,  -2.5722,  -5.5321,  -3.9070,  -6.8169,  -6.4252,  -3.0118,
        -10.5460,  -4.5516,  -1.8817,  -4.0592,  -5.2817,  -0.9356],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4607, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.9127,  -2.0208,  -6.0147,  -4.7992,  -2.1825,  -4.4961,  -6.4459,
         -2.6956,  -2.5604,  -2.3898, -14.5345,  -6.4813,  -7.3180,  -7.5567,
         -5.9860,  -5.4656,  -1.1955,  -6.8012,  -2.9456,  -2.7182],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1260, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0265, -7.5341, -2.6161, -1.1688, -6.7440, -5.5226, -1.1640, -5.4498,
        -4.3075, -4.7671, -3.1030, -7.3849, -5.1386, -3.0299, -4.4313, -3.0727,
        -3.1558, -2.3067, -5.9778, -6.0562], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2979, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7916,  -8.8862,  -0.8827,  -7.5178,  -4.6826,  -3.2999,  -5.9664,
         -6.5010,  -2.3579, -10.7686,  -2.9073,  -3.2750,  -2.0891,  -6.5002,
         -4.0351,  -3.1533,  -8.1815,  -2.3390,  -2.0399,  -6.0554],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7115, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2442, -3.7527, -7.2277, -3.7459, -3.8594, -6.1735, -5.9439, -3.0188,
        -4.0926, -4.5924, -5.4619, -5.2525, -6.2104, -8.3808, -6.0187, -7.0192,
        -3.3862, -4.5577, -4.6845, -4.5120], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2067, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0407,  -6.1683,  -6.8358,  -4.7640,  -4.1955,  -5.6682,  -5.7643,
         -6.4940,  -4.1463, -10.4210,  -3.8448,  -2.4909,  -3.4664,  -5.9821,
         -2.5825,  -2.6192,  -4.6317, -18.1797,  -4.0544,  -7.2180],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6784, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.0884,  -7.0513,  -3.5702,  -6.6098,  -2.3973,  -7.7301,  -5.7447,
         -3.2132,  -6.5451,  -6.8905,  -3.5076,  -4.6708,  -3.5906, -10.2991,
         -6.6104,  -6.6626,  -6.5594,  -3.0576,  -6.5133,  -3.4916],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6402, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.6711,  -8.2814,  -3.1611, -10.0698,  -6.5132,  -6.3183,  -5.4677,
         -4.1308,  -2.5884,  -3.3072,  -5.9181,  -1.8526,  -7.2988,  -4.0026,
         -5.5287,  -1.6774,  -5.6718,  -5.0188,  -4.3214,  -4.2759],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1038, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.8389,  -4.7934,  -1.0425,  -6.1159,  -2.6240,  -3.8075,  -3.9147,
         -7.0560,  -2.7925, -13.6530,  -2.9625,  -3.7654,  -2.6669,  -6.6264,
         -4.3075,  -2.0490,  -3.2419,  -3.2725,  -1.4207,  -2.5072],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2729, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4006, -7.9455, -6.3859, -3.3620, -5.7439, -3.1278, -2.4959, -3.2813,
        -5.3475, -1.8396, -5.5389, -2.9231, -3.2440, -2.3837, -6.5746, -4.8655,
        -1.9182, -3.9422, -2.5857, -1.4841], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8695, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9699,  -6.9615,  -2.9673,  -2.9325,  -7.2560,  -5.9010,  -2.2752,
         -5.5917,  -2.6790,  -8.6045,  -8.1860,  -5.0579,  -7.7018,  -2.7330,
        -10.2271,  -1.4580,  -6.1955,  -5.5615,  -4.1296,  -1.4124],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0401, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.0290,  -2.7455,  -3.4288,  -5.3850, -13.9052,  -6.6054, -10.1215,
         -3.1259,  -7.0975,  -0.6052,  -5.2885,  -5.4593,  -3.5834,  -3.6507,
         -3.8535,  -6.5371,  -2.7722, -10.2747,  -3.5163,  -2.5744],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3280, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6738, -3.0723, -2.5782, -2.9450, -5.7680, -5.6845, -0.6645, -3.7668,
        -2.6743, -3.1944, -3.0915, -6.2490, -4.7838, -3.1724, -3.1822, -1.8753,
        -3.3438, -6.4187, -5.4690, -0.6186], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5113, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9182, -5.4080, -1.3318, -4.1522, -2.8086, -2.4583, -2.6867, -6.7035,
        -2.6731, -6.4516, -3.1567, -2.5388, -5.3822, -6.0119, -0.9954, -3.7240,
        -3.1517, -1.4539, -4.5001, -5.3398], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8423, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3532, -3.7049, -7.5902, -3.7109, -2.3297, -6.4950, -6.3106, -2.7658,
        -7.2975, -2.9687, -6.1921, -1.2859, -6.0403, -4.7868, -0.9988, -4.1810,
        -2.9058, -3.4065, -2.2541, -5.1908], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2884, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0603, -3.5113, -6.6749, -4.1097, -1.6723, -4.0160, -4.2749, -2.9041,
        -4.1635, -6.4467, -4.3537, -1.7820, -4.5980, -2.3849, -1.8968, -2.3695,
        -6.1968, -1.4600, -6.1287, -3.0787], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8041, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8571, -5.3666, -3.8052, -4.9698, -6.9548, -2.2401, -9.1641, -3.9924,
        -2.7310, -6.4714, -5.2566, -2.6441, -4.7119, -3.9322, -2.7016, -5.8546,
        -5.6047, -2.1971, -5.7718, -3.3383], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6283, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.8457,  -2.4719, -12.3855,  -3.3882,  -3.0472,  -5.4405,  -5.1854,
         -2.8824,  -3.0808,  -2.5854,  -1.7340,  -5.8878,  -6.1285,  -2.5482,
         -3.9602,  -3.8275,  -3.9436,  -4.8646,  -5.9247,  -3.5078],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4820, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.1162,  -1.2671, -12.9903,  -3.6674,  -3.6861,  -7.5974,  -5.9789,
        -11.9234, -13.4557,  -8.6011,  -7.0099,  -8.3892,  -6.7392,  -7.6567,
         -4.5551,  -6.7137,  -0.4447,  -6.8601,  -3.8106,  -1.9523],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5207, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.3344,  -3.7303,  -4.1908,  -4.5003, -14.7892,  -6.3212,  -8.5027,
         -6.8755,  -5.6002,  -7.3476,  -0.7591,  -5.5921,  -3.9070,  -4.1108,
         -6.6270,  -6.8801,  -2.1059,  -4.4258,  -3.6435,  -2.9929],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4618, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.9475, -3.2756, -3.2026, -1.4613, -5.5153, -5.5024, -1.3672, -7.5594,
        -3.4676, -1.9627, -5.3089, -6.0531, -1.7387, -5.4939, -1.6758, -1.5231,
        -3.3556, -6.1176, -2.2166, -7.4195], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2082, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8517, -3.4903, -3.1108, -3.0528, -2.6816, -5.8127, -4.9962, -1.5114,
        -6.7284, -3.0956, -4.2014, -6.9652, -6.0583, -1.4578, -5.9421, -3.2023,
        -1.7213, -6.0306, -5.9948, -1.7881], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9847, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.7429,  -3.1776,  -6.4518,  -6.1141,  -2.5862,  -4.4536,  -6.6796,
         -4.7396, -13.5764,  -1.0780,  -7.8128,  -4.8680,  -6.9668,  -4.9327,
         -5.1086,  -3.0282, -10.7905,  -4.9357,  -9.5739,  -7.8597],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0738, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8557, -5.0728, -3.3473, -3.3861, -5.6661, -6.7972, -2.1896, -3.2854,
        -2.7016, -1.7909, -3.4438, -6.2396, -2.3648, -6.3612, -2.4027, -2.1593,
        -6.1285, -5.7150, -1.4113, -4.0892], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7704, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.2490,  -4.7474,  -3.2725,  -1.5098,  -5.5223,  -5.0537,  -4.6846,
         -4.0323,  -3.2249,  -6.9958,  -7.0261,  -8.2297,  -7.8995,  -3.9619,
         -6.8173,  -2.8698, -14.6501,  -3.3490,  -3.6145,  -2.8068],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2758, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2841, -3.2969, -3.5681, -2.8344, -2.9594, -4.8445, -6.1669, -1.3893,
        -4.6157, -3.4503, -3.7462, -2.9288, -6.0425, -5.9783, -3.1101, -2.3104,
        -2.5513, -1.6829, -4.9271, -5.8090], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8748, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0999, -1.4963, -5.1371, -4.6058, -3.4615, -5.2026, -1.6794, -4.3720,
        -6.7787, -6.1532, -2.8314, -7.3925, -2.2154, -3.2895, -4.1138, -6.0734,
        -3.3461, -3.8214, -3.7750, -4.3487], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1597, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.7949,  -0.8683,  -5.4097,  -5.4665,  -3.7666,  -3.5781,  -5.3382,
         -5.4548,  -1.5450,  -8.0075,  -2.0242,  -2.0463,  -5.8224,  -5.4224,
         -2.3043,  -7.5992,  -4.1000, -22.0093,  -4.7417,  -7.3345],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3817, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0389, -5.9067, -2.5212, -4.9787, -7.1111, -1.8504, -5.8467, -6.3864,
        -2.4039, -3.0689, -2.5971, -3.5105, -3.6951, -4.7183, -4.9904, -2.5642,
        -3.3293, -1.9483, -1.3089, -6.6339], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9704, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3138, -8.7097, -6.5164, -8.0575, -3.9153, -7.0928, -0.5325, -5.4087,
        -2.9725, -2.8388, -4.9055, -4.8819, -1.8470, -4.2743, -3.3398, -5.0750,
        -9.1171, -9.0878, -8.0679, -4.2854], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.3620, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.3698,  -3.4333,  -4.4791,  -2.7684,  -4.7172,  -6.1388,  -6.6397,
         -3.4469, -10.1671,  -4.3056,  -2.7431,  -4.9359,  -6.1041,  -2.6905,
         -9.0507,  -2.3154,  -1.8376,  -2.6452,  -5.8889,  -1.8349],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3756, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7672, -4.0224, -3.0582, -5.1565, -4.7503, -2.3569, -3.6839, -7.7338,
        -1.3655, -7.2958, -3.6168, -3.7689, -3.1142, -5.2240, -4.6228, -1.3770,
        -4.3359, -2.5486, -1.8591, -6.1803], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0419, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.8785, -2.3888, -4.3498, -5.2005, -6.4011, -3.6679, -7.5531, -2.4504,
        -2.8662, -2.7630, -6.0710, -1.0191, -4.2030, -3.3019, -2.6536, -2.5706,
        -5.9803, -4.6943, -1.7468, -5.6363], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1698, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4582, -4.0210, -4.4429, -5.2975, -5.3056, -6.4173, -7.9014, -9.8613,
        -6.4443, -7.0197, -7.5907, -4.5078, -7.4030, -7.5887, -6.5057, -7.1250,
        -4.4029, -7.2919, -3.4003, -4.0745], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.0030, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.4049,  -4.4278,  -3.2961,  -4.6411,  -2.0426,  -6.3899,  -1.0342,
         -4.5809,  -2.9845, -17.5884,  -6.4215, -10.3776,  -6.3046,  -5.2495,
         -5.6566,  -3.4847,  -8.1957,  -4.0892,  -3.0907,  -1.3434],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1302, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7360, -20.2354,  -3.6321,  -1.3171,  -4.8840,  -5.6937,  -1.3928,
         -3.1003,  -3.4509,  -3.7195,  -2.1428,  -5.7798,  -4.3410,  -2.0166,
         -4.1975,  -2.5348,  -2.6269,  -7.7174,  -5.6810,  -2.4229],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4811, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.4914,  -5.4362,  -3.4993,  -3.6937,  -4.9177,  -6.2540,  -1.7891,
         -3.8560,  -3.0539, -12.4759,  -7.3068,  -8.4099,  -8.0385,  -6.2376,
         -6.6913,  -2.4349,  -8.3701,  -3.7112,  -2.0752,  -7.1354],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3439, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1450, -6.8269, -6.4985, -3.5592, -4.7538, -2.8223, -3.0157, -4.2138,
        -5.6058, -2.1680, -2.4802, -2.3501, -2.9010, -3.8484, -6.2954, -6.2497,
        -2.9353, -4.7722, -2.5122, -2.6523], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0303, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1320, -2.3702, -5.8647, -5.1124, -0.9950, -2.6664, -2.4292, -1.2814,
        -7.6045, -5.5347, -0.8547, -4.7227, -1.9902, -1.0279, -6.1844, -5.5105,
        -1.8198, -7.6561, -2.7260, -4.6225], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6553, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4647, -3.2280, -4.1990, -3.5423, -6.7979, -7.3595, -3.3863, -3.2096,
        -3.2678, -3.2206, -4.0181, -6.4808, -2.5914, -4.7398, -3.2167, -3.7598,
        -6.3185, -6.3755, -1.5491, -6.4971], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5111, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7827, -5.7152, -3.2227, -4.5759, -2.1579, -5.8890, -4.9887, -0.9682,
        -2.5816, -2.0105, -1.9896, -2.4508, -5.7602, -4.1549, -1.3333, -5.2698,
        -1.9278, -1.8665, -4.4407, -4.9518], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4019, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7690, -1.8174, -3.1633, -5.7298,  0.0829, -4.9617, -2.4884, -4.1316,
        -2.8086, -5.4997, -4.6661, -1.7140, -4.2550, -2.9077, -3.2042, -2.1842,
        -8.0950, -2.0177, -6.8209, -3.8064], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6479, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5554, -3.7238, -4.8960, -1.8187, -6.6047, -4.6754, -0.7564, -3.3035,
        -2.5163, -1.2546, -4.1067, -4.5760, -1.4969, -2.7715, -3.0767, -4.6571,
        -2.9944, -5.9204, -4.4362, -0.7396], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3440, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4785, -2.4676, -6.9840, -2.6725, -3.3563, -2.3287, -5.6871, -4.4546,
        -0.7103, -6.9139, -2.6837, -1.6770, -3.1450, -5.9503, -0.5405, -4.4092,
        -4.1755, -5.8493, -8.6669, -8.0766], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3114, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.1707, -3.8598, -3.9692, -2.1965, -2.8452, -5.3825, -6.6613, -2.8501,
        -2.0524, -3.9940, -2.8272, -5.2798, -6.1404, -1.9449, -5.1538, -5.0565,
        -6.5987, -7.0699, -6.9359, -6.2577], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3623, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7492, -3.7120, -4.7632, -1.7631, -6.0738, -5.0664, -2.1351, -4.6809,
        -3.5198, -5.9644, -7.2809, -5.6334, -1.7104, -8.3512, -4.0515, -1.3853,
        -4.2677, -7.0026, -5.5294, -3.4411], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5041, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.0592,  -5.3245,  -2.1764,  -2.1637,  -4.9168, -11.1253,  -3.9427,
         -6.1221,  -9.2364,  -7.7973,  -2.5153,  -7.3178,  -2.0054, -10.0454,
         -2.5815,  -2.4847,  -4.8597,  -6.6848,  -1.8540,  -9.4155],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2314, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9551, -2.7228, -3.5179, -5.6074, -6.3461, -3.2993, -7.0746, -2.0394,
        -3.2269, -3.6275, -5.2294, -0.7453, -8.8714, -4.0772, -3.0943, -2.8786,
        -5.8964, -4.3843, -2.6873, -4.9562], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2119, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.5127,  -5.6584,  -6.8228,  -5.8251, -18.7376,  -4.5854,  -3.1778,
         -5.0734,  -5.2335,  -3.2690,  -3.1465,  -2.8532,  -2.3815,  -3.4338,
         -6.0466,  -0.6737,  -5.0304,  -3.3983,  -2.9976,  -2.5561],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7707, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0945,  -6.0115,  -1.8195,  -7.5894,  -3.5323,  -3.3420,  -7.3694,
         -5.7750,  -2.4053,  -3.3205,  -3.0342,  -3.8256,  -1.2185,  -7.2161,
         -4.4956,  -0.5880,  -3.2448,  -3.6056, -14.3612,  -4.0195],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5434, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.8315,  -2.4583,  -1.3914,  -5.8899,  -5.0325,  -1.9719,  -4.2587,
         -2.4565,  -3.1535,  -4.3835,  -5.9082,  -4.3843,  -1.4552,  -5.6112,
         -2.3126,  -2.8142,  -2.6700,  -6.5204,  -2.2052, -11.8522],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0281, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8367, -2.3510, -3.2396, -2.7822, -3.2854, -5.0687, -3.4880, -2.8528,
        -6.4378, -6.6130, -3.3950, -3.0299, -5.9913, -2.2783, -2.5395, -3.4955,
        -2.9651, -2.7531, -4.9074, -5.4914], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6901, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2975, -2.2379, -4.5077, -6.3395, -3.9425, -2.1567, -3.1558, -1.9878,
        -3.0648, -4.1550, -6.4104, -5.5826, -2.2837, -5.1840, -2.8510, -2.7089,
        -4.5657, -4.8238, -2.7212, -6.0553], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8516, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.7959,  -7.2278,  -2.3293, -10.3764,  -4.5505,  -2.4792,  -5.5434,
         -6.2223,  -1.9177,  -5.2663,  -3.4033,  -3.1186,  -2.4848,  -6.6627,
         -5.5656,  -2.1103,  -2.0861,  -2.3875,  -2.5245,  -2.4037],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1228, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.4181,  -4.9421,  -4.9985,  -5.3185, -18.9450,  -6.5190,  -4.8761,
         -8.7819,  -3.1852,  -7.2113,  -1.3849,  -6.4282,  -3.7458,  -2.6903,
         -7.9196,  -5.4123,  -2.2810,  -5.2891,  -2.4314, -17.9848],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2882, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8873, -0.7724, -2.9302, -2.1865, -2.0714, -4.1033, -5.3938, -2.0138,
        -5.0786, -1.9000, -2.4012, -3.5499, -6.6265, -4.6021, -2.3073, -2.9896,
        -1.8883, -2.3798, -5.7092, -4.9153], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4853, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5985, -5.1133, -0.8400, -7.1861, -2.8969, -2.3636, -4.9944, -5.5844,
        -0.9449, -2.8487, -2.1670, -2.3600, -2.4579, -6.7902, -5.3916, -2.6829,
        -6.0254, -2.5917, -4.7953, -1.1396], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7386, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4017, -6.3946, -2.8974, -4.0095, -3.4512, -4.1353, -3.8309, -7.0322,
        -6.2786, -2.9240, -6.7557, -4.1686, -4.9173, -1.8657, -6.6287, -4.6552,
        -2.6675, -3.3431, -2.8561, -3.0699], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3142, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.0075,  -3.8437, -23.4372,  -4.4401,  -8.0569,  -3.4556,  -6.4821,
         -0.4475,  -6.3672,  -4.0679,  -6.4406,  -3.0630,  -4.5525,  -5.3334,
         -3.9411,  -8.7418,  -3.2477,  -3.0175,  -5.3923,  -6.4525],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8894, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.3156,  -3.4187,  -3.5235,  -2.6285,  -6.0051,  -4.2948,  -1.4549,
         -3.7130,  -3.0827,  -2.2055,  -5.4924,  -4.7133,  -1.5856,  -3.4888,
         -2.6144, -15.1530,  -6.8220,  -9.0643,  -3.7854,  -7.6464],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7504, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4393, -1.6997, -2.7433, -6.0494, -5.7625, -3.0374, -7.1037, -2.0309,
        -1.4821, -3.1526, -5.4795, -1.8683, -3.9292, -4.2039, -4.5835, -3.8498,
        -5.7772, -4.5581, -1.3679, -5.2891], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8204, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4481, -4.9322, -2.3840, -1.5632, -7.0181, -5.2893, -1.9999, -5.5019,
        -2.0620, -2.1523, -5.1608, -5.3646, -0.5444, -9.1260, -3.3861, -2.3509,
        -6.5715, -5.9786, -2.8912, -2.9493], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9337, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6200, -7.8734, -5.6057, -3.1606, -6.3046, -2.7503, -4.0534, -3.8677,
        -4.7707, -1.4777, -5.1622, -4.2383, -3.6069, -4.6107, -6.3126, -5.7498,
        -4.0047, -8.5078, -3.0603, -4.9880], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6363, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9497, -5.1600, -1.2566, -6.7359, -2.5638, -3.0041, -4.3669, -5.6417,
        -0.2496, -3.7492, -2.2478, -2.5859, -4.2672, -5.2839, -4.3118, -1.7080,
        -3.1413, -2.7885, -1.5025, -6.2403], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5877, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8287, -6.7479, -7.5433, -7.6576, -5.0320, -3.8489, -8.1975, -2.4752,
        -4.9249, -3.8392, -6.1462, -7.1753, -6.6907, -1.7950, -3.6318, -2.8124,
        -0.7203, -3.8125, -5.3472, -1.5501], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5888, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9987, -1.5630, -7.2703, -2.7988, -3.3961, -2.3477, -6.6499, -4.4348,
        -2.7447, -3.5821, -3.4389, -2.1977, -4.6262, -6.2838, -2.0562, -4.5083,
        -1.8303, -3.5285, -4.2557, -5.3782], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8945, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.2357,  -1.9703,  -5.9062,  -4.3308, -15.2033,  -6.9628,  -8.0183,
         -5.8805,  -6.9884,  -1.5658,  -2.9710,  -5.0213,  -6.5956, -10.7014,
         -6.8497,  -9.8630,  -3.7456,  -6.0294,  -0.4334,  -5.1819],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0227, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.9268,  -0.6716,  -4.2973,  -3.1234,  -6.7653,  -5.7014,  -4.0662,
         -5.1483,  -2.9802,  -4.6905,  -4.9682,  -8.2937, -14.0323,  -6.5501,
        -10.0180,  -4.0445,  -5.6211,  -0.7447,  -4.0274,  -2.1816],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2426, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3852, -6.0508, -0.8067, -3.8785, -2.4360, -4.5173, -3.7969, -5.8365,
        -4.9056, -2.2394, -3.1086, -3.5324, -2.2087, -4.2294, -6.1019, -2.9805,
        -3.3602, -4.0512, -4.5421, -3.2872], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9128, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6606, -6.7310, -4.1098, -8.4433, -3.3198, -7.9639, -7.8633, -8.2367,
        -6.9995, -2.6573, -7.1935, -1.3519, -7.3202, -5.0423, -5.1242, -6.3474,
        -6.0201, -1.9101, -4.4077, -4.9278], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5815, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.5536, -3.8819, -3.0749, -3.4004, -6.5911, -4.9717, -3.3259, -3.6301,
        -3.3221, -5.9275, -1.4345, -5.5791, -4.6303, -0.8628, -4.4971, -2.7426,
        -1.9030, -9.3286, -5.8244, -2.7244], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3103, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.1934,  -4.3660,  -2.9116,  -3.0298,  -3.1511,  -3.7180,  -0.6975,
         -6.6988,  -4.2027,  -3.1507,  -5.8017,  -2.4799,  -3.1909,  -1.1672,
         -5.8548,  -4.4018,  -1.7027,  -2.9855,  -2.2833, -13.6378],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0813, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8847, -2.5326, -3.2048, -4.0711, -1.6303, -6.0120, -6.2134, -2.7889,
        -5.0876, -2.7401, -3.7290, -1.4884, -6.5149, -4.4924, -1.6940, -3.9834,
        -2.2062, -1.6930, -5.1936, -7.0559], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6608, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2263,  -6.2513,  -1.1423,  -7.6496,  -2.2097,  -2.3927,  -6.6180,
         -7.2109,  -2.6626, -17.5568,  -4.2375,  -3.2708,  -6.1457,  -5.0793,
         -0.9452,  -3.2914,  -2.0594,  -4.5176,  -2.0221,  -5.2781],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6884, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3272,  -5.2345,  -6.6521,  -6.2851,  -4.0060, -12.0921,  -4.3379,
         -3.7286,  -2.9980,  -7.0973,  -4.5521,  -3.0784,  -3.0609,  -2.9025,
         -1.4974,  -2.0144,  -7.0023,  -4.0699,  -0.7133,  -4.5941],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4122, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3284, -7.6656, -5.4467, -6.1755, -3.7263, -7.7496, -1.9062, -4.9462,
        -4.3682, -4.1258, -8.0309, -7.2683, -2.1983, -5.2443, -3.4790, -8.8122,
        -7.4537, -6.3834, -8.0846, -4.4276], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.6910, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.0615,  -2.7182,  -4.0540,  -3.6918,  -5.7948,  -6.0745,  -2.3188,
         -5.8313,  -4.1257,  -8.0753,  -6.7175,  -6.0013,  -8.0187,  -2.6361,
         -7.4853,  -1.1696, -11.6660,  -4.3609,  -1.7914,  -5.9810],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9787, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.7683,  -5.8367,  -0.9289,  -5.7309,  -2.9111,  -3.3984,  -4.5721,
         -5.8528,  -1.3025,  -5.6289,  -2.7988,  -2.4944,  -5.0598,  -5.8933,
         -4.1811,  -7.0836,  -4.1829, -17.4790,  -6.9101,  -5.7725],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1393, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.7969,  -6.3194,  -7.0713,  -2.4438,  -5.8726,  -3.7083,  -3.3431,
        -11.2853,  -5.6445,  -5.1926,  -4.5659,  -5.5704,  -8.4961,  -3.5218,
         -7.7199,  -6.2654,  -6.4453,  -4.3165,  -4.0066,  -4.4819],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5534, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9164, -4.6073, -6.3350, -1.2438, -3.6081, -3.0602, -1.6964, -4.0049,
        -5.7304, -0.8180, -7.1767, -2.5836, -2.2852, -3.3934, -5.6409, -1.0945,
        -5.1079, -2.9947, -3.3502, -1.4593], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4553, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4814, -3.6768, -5.4347, -1.2683, -4.7759, -2.5478, -2.6671, -4.7019,
        -6.2317, -0.4840, -5.2033, -2.0455, -3.9951, -3.3119, -6.1853, -5.0941,
        -1.8882, -2.6486, -3.0770, -3.6382], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5678, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2285, -1.1500, -5.7038, -3.2419, -3.0783, -3.8128, -5.6976, -5.5442,
        -3.3593, -3.2044, -1.7923, -1.8033, -5.6432, -5.1261, -1.2155, -4.3433,
        -2.3081, -3.1370, -6.2182, -5.9459], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9277, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7852, -2.4351, -5.2690, -2.5716, -2.7617, -5.8408, -5.5906, -2.8849,
        -5.4119, -6.1387, -3.2595, -2.7530, -5.6716, -5.2851, -2.4042, -6.1450,
        -1.6286, -1.4401, -4.9242, -4.9806], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1591, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.7785,  -1.3810,  -5.3304,  -2.4430,  -4.6129,  -5.3903,  -6.1185,
         -6.1795,  -3.2695,  -8.2066,  -3.1357,  -1.4291,  -6.8280,  -4.8936,
         -1.3653,  -6.1231,  -3.2270, -12.4125,  -8.8508,  -7.6046],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2790, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5252, -5.3378, -5.9740, -7.0407, -2.5483, -4.9962, -3.3317, -4.7823,
        -4.2007, -6.1417, -5.7736, -1.7892, -9.2652, -2.3354, -2.3379, -4.2430,
        -5.1173, -2.1478, -6.1156, -2.2860], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4145, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.2141,  -3.4491,  -6.7294,  -0.5106,  -8.3841,  -3.6214,  -3.3292,
         -2.4734,  -7.4883,  -1.4700,  -5.1637,  -3.5251, -11.7449,  -6.1211,
         -9.5004,  -4.0835,  -6.7972,  -0.9106,  -6.2453,  -4.3050],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2533, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6929, -3.7058, -5.1866, -6.1054, -1.4840, -3.0599, -1.6070, -6.4629,
        -4.2334, -2.2081, -3.5853, -2.5155, -1.8227, -3.5662, -5.9753, -4.4609,
        -1.4573, -3.1977, -3.0815, -1.8728], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5141, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7133, -2.8762, -2.6813, -6.5721, -2.2051, -7.4463, -2.2611, -3.0111,
        -3.1884, -5.9314, -5.8027, -2.7096, -6.4136, -2.8002, -2.7258, -2.9952,
        -5.9816, -1.6934, -3.0726, -2.3632], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8722, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6384, -3.6897, -3.4685, -6.3942, -5.6615, -2.4120, -7.0325, -5.9877,
        -3.4758, -2.5162, -6.2291, -6.2677, -2.8544, -5.9820, -4.0651, -1.3817,
        -7.0343, -5.7152, -0.8550, -5.1899], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5925, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.9782, -6.6020, -3.9434, -7.3367, -0.7720, -7.2231, -2.5622, -5.0854,
        -1.4781, -5.5070, -5.3169, -0.5934, -4.8221, -2.3685, -2.1197, -2.2954,
        -6.6185, -1.3312, -4.6091, -2.6925], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0628, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.7834,  -2.0811, -12.2702,  -4.7768,  -4.9801,  -5.8591,  -6.4268,
         -2.1997,  -8.9606,  -3.4389,  -2.5438,  -4.5064,  -5.3903,  -5.2370,
         -2.4281,  -3.1153,  -2.7458,  -1.3078,  -3.8511,  -5.8543],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7378, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.4177,  -2.0754, -11.0933,  -4.9209,  -7.2661,  -8.0413,  -2.5956,
         -7.7125,  -4.8342,  -5.7197,  -5.8346,  -2.5835,  -6.1797,  -6.9151,
         -3.6634,  -8.4118,  -2.9571, -14.7441,  -4.5786,  -6.1516],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1348, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7736, -2.4281, -2.8808, -5.4054, -5.3332, -2.0775, -5.1698, -3.4060,
        -3.2461, -5.0497, -6.1772, -2.8842, -3.6601, -4.9069, -4.4439, -5.2246,
        -6.3262, -4.9861, -2.5980, -4.0038], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2991, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5225,  -3.9209,  -2.8233,  -0.9796,  -6.2238,  -5.3061,  -3.0740,
         -3.5336,  -2.9836, -13.5279,  -8.3483,  -7.4383,  -8.3518,  -6.5705,
         -7.7743,  -1.8591, -11.1278,  -3.3820,  -4.0822,  -2.5992],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3214, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6911, -4.6435, -4.9500, -1.9920, -3.5752, -2.1701, -3.8032, -3.6817,
        -6.5325, -5.5996, -1.9936, -3.9130, -3.5128, -3.1692, -3.0874, -5.8874,
        -1.1391, -3.6200, -3.2003, -3.0912], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5626, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8404, -2.0134, -6.3111, -0.2840, -3.5978, -4.5534, -4.3689, -3.6233,
        -5.9808, -6.0629, -2.8860, -7.4613, -4.2718, -2.9463, -4.4303, -5.0533,
        -3.0121, -3.6437, -3.0906, -1.8939], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9163, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4162, -4.1229, -9.1275, -2.2035, -2.8655, -6.5044, -5.0680, -1.7783,
        -2.5704, -2.6149, -2.8109, -2.8723, -6.1346, -3.7764, -2.1990, -4.2151,
        -3.4517, -2.9743, -6.7411, -5.5868], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1517, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3848, -3.9414, -3.3927, -3.2993, -1.3328, -6.5802, -1.5483, -9.5843,
        -3.8536, -3.5856, -1.7669, -6.5951, -5.2813, -1.1874, -4.6292, -1.7405,
        -3.4391, -3.5177, -4.7127, -1.0324], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6203, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3057, -4.5647, -1.9307, -3.3105, -3.0019, -3.3440, -6.4479, -6.0558,
        -1.7784, -4.3141, -3.1615, -2.3867, -2.3590, -5.6662, -3.1401, -2.6683,
        -5.2513, -6.2387, -3.6911, -6.2436], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0930, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6828, -6.3569, -2.5464, -3.9381, -5.3007, -3.3355, -4.4341, -6.8180,
        -6.4192, -3.4060, -6.5431, -3.1746, -2.7590, -6.9246, -5.7153, -1.7116,
        -5.9845, -3.3485, -2.9912, -3.4827], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4936, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5452, -3.2344, -5.0984, -6.8278, -6.0987, -1.8264, -8.6846, -2.6802,
        -7.2793, -4.0261, -7.2839, -5.0696, -1.5994, -8.0130, -3.8743, -3.9629,
        -7.7850, -6.2251, -2.0555, -9.9351], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2052, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7999, -3.0079, -1.6633, -4.0489, -5.2332, -1.1680, -4.4778, -4.2139,
        -3.5501, -1.8047, -6.9016, -4.4057, -1.6219, -2.6088, -3.7927, -3.8064,
        -5.8632, -6.3640, -1.8500, -5.3784], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8280, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4966, -5.1124, -4.8049, -3.7376, -5.8384, -2.6594, -1.7872, -5.0180,
        -4.9341, -0.9944, -3.6925, -3.1397, -1.8454, -4.8809, -5.3127, -3.5507,
        -4.0199, -3.9048, -3.4803, -1.8459], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7028, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6474, -6.0478, -7.8598, -8.4966, -3.1429, -8.2490, -2.2607, -5.9583,
        -4.8915, -2.1919, -2.3936, -6.3827, -1.2640, -9.4710, -4.2832, -4.8120,
        -3.1945, -6.7201, -5.7995, -3.1780], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0622, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3468, -2.8240, -2.8655, -3.9204, -3.5825, -2.7045, -4.8382, -4.0539,
        -0.9716, -3.3009, -3.4997, -2.1816, -4.0903, -4.5653, -5.2671, -1.8348,
        -3.8038, -2.4794, -2.7801, -4.0135], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3962, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.8145,  -3.2287,  -2.6689,  -5.3678,  -3.9590,  -4.3908,  -3.0757,
         -6.5597,  -5.8562,  -2.7197, -13.7297,  -3.3068,  -1.6848,  -3.2738,
         -5.6297,  -0.1273,  -3.4273,  -3.9387,  -3.9385,  -1.4567],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2577, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.8512,  -7.5815,  -5.2982,  -4.3242,  -6.6313,  -6.5382,  -3.7787,
         -5.3785,  -3.6152,  -2.0648,  -6.6744,  -6.0624,  -2.2628,  -7.6879,
         -2.8156, -25.6274,  -4.9794,  -7.3615,  -2.9096,  -6.4316],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1937, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8133, -2.2077, -5.3704, -5.9294, -1.7693, -9.3116, -3.5570, -2.2112,
        -3.2834, -5.2369, -0.4488, -6.7177, -2.0617, -1.4268, -3.7273, -6.4687,
        -1.7724, -5.9662, -4.8812, -1.4419], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8301, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4286, -4.2805, -6.5775, -6.5176, -8.3776, -7.0068, -5.6314, -3.5002,
        -6.5103, -8.9542, -5.1839, -7.3457, -8.8731, -6.7734, -4.3527, -6.1424,
        -2.5217, -8.8094, -5.0120, -4.9740], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2386, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0686, -2.0363, -4.3294, -5.1614, -1.6397, -5.1145, -3.0659, -2.4160,
        -2.1501, -6.3133, -4.8390, -2.3637, -2.2393, -2.4127, -3.4661, -5.2244,
        -4.9490, -0.0332, -8.2076, -3.3917], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5711, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2137, -8.1208, -7.5864, -3.4496, -6.8394, -3.8034, -5.4132, -3.3166,
        -3.0043, -7.0388, -6.0003, -0.4001, -1.7379, -3.6420, -2.3329, -6.8742,
        -6.0944, -1.7426, -3.4339, -2.1177], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5081, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2233, -6.9338, -4.0180, -3.5786, -2.8836, -1.9710, -1.1645, -1.8774,
        -6.3068, -1.9241, -3.3478, -3.0966, -3.5711, -3.0324, -6.2281, -5.8142,
        -2.9222, -5.4342, -2.6074, -2.3819], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5658, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2520, -4.3200, -6.1782, -4.2461, -3.5287, -3.9418, -3.1482, -3.5573,
        -4.0893, -6.5586, -4.7562, -2.3769, -2.7713, -2.9020, -2.9027, -6.6261,
        -5.1937, -2.1013, -4.0627, -2.8753], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0194, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9849, -3.2201, -1.9046, -2.0220, -6.5027, -5.4424, -1.4644, -4.0466,
        -1.7916, -2.0334, -2.9287, -6.8605, -5.2588, -2.3625, -4.4378, -3.1462,
        -2.5789, -4.1627, -5.8367, -2.2934], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5139, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2659, -7.7574, -5.4342, -2.6593, -5.5741, -3.0145, -2.2857, -1.9521,
        -6.1997, -4.3413, -2.4163, -4.3570, -2.3680, -1.9137, -4.6145, -5.8977,
        -1.6804, -4.6515, -3.1766, -1.7439], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7152, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.3257,  -5.9877,  -4.3904,  -1.6778,  -1.9576,  -2.3464,  -2.0550,
         -4.6485,  -5.6674,  -0.7019,  -4.4932,  -2.4860,  -2.4038,  -4.5678,
         -6.2049,  -5.8561,  -2.9088, -10.1010,  -2.7478,  -3.7906],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8159, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7924, -4.1570, -3.1302, -2.2432, -6.9637, -6.9017, -3.8995, -5.3422,
        -2.9966, -3.7178, -4.6680, -5.0222, -0.1419, -6.3129, -3.3418, -3.0836,
        -4.3814, -5.6879, -2.1634, -3.4697], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2709, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1946, -6.8124, -1.0846, -6.9520, -3.9779, -3.0588, -3.4244, -6.1312,
        -4.0179, -0.8192, -3.7234, -3.1005, -2.3568, -1.7330, -5.7202, -4.3602,
        -1.9772, -2.5242, -2.1339, -1.2080], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4155, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4151, -5.5376, -1.2706, -7.2465, -3.7678, -4.1558, -1.7644, -6.2863,
        -4.5137, -1.8384, -3.3177, -2.3862, -2.2237, -3.2627, -5.6417, -2.3450,
        -5.6475, -5.0754, -3.1017, -3.3372], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8068, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.0444,  -4.6240, -10.7996,  -8.1536,  -4.0714,  -7.7653,  -3.2664,
         -7.0866,  -0.8746,  -6.5716,  -3.2562,  -2.3171,  -8.0360,  -5.8098,
         -2.0248,  -5.3748,  -4.0743,  -6.1182,  -4.7304,  -6.6375],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3818, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1133,  -6.5831,  -1.5040,  -4.7906,  -5.2644,  -0.3051,  -3.2351,
         -5.6553, -20.1784, -12.1281,  -9.4656,  -4.4293,  -6.0649,  -0.4667,
         -4.2829,  -3.5517,  -5.1793,  -4.9834,  -5.1135,  -5.5942],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5944, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9692, -0.2136, -3.9688, -3.1895, -4.0914, -6.7283, -6.4304, -1.6545,
        -5.4345, -3.8867, -3.5169, -3.6196, -7.8298, -5.3889, -2.7962, -2.7431,
        -1.8156, -3.8827, -1.0776, -6.5542], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0396, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.9042,  -2.5326,  -5.5727,  -3.8840, -15.0627,  -5.3398,  -6.6438,
         -6.9229,  -7.1430,  -3.8753,  -6.5617,  -1.3397,  -6.5158,  -3.2803,
         -2.7437,  -5.7411,  -6.0259,  -4.0488,  -8.1198,  -2.5532],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5406, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.8353,  -6.7122, -25.3557,  -4.4403,  -7.2743,  -7.3116,  -7.6628,
         -1.3666,  -8.0898,  -1.2863,  -6.6582,  -3.2145,  -2.6060,  -2.9475,
         -6.2842,  -1.4567,  -5.3176,  -3.1116,  -4.5583,  -6.3741],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8432, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5912, -4.4487, -2.2837, -3.0447, -3.0892, -5.4927, -0.7858, -5.6969,
        -3.0037, -2.7979, -2.4567, -6.1520, -5.2819, -1.5720, -4.6821, -3.0797,
        -2.2585, -6.2881, -5.9349, -3.0531], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6497, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.6434,  -2.8873,  -6.5486,  -0.9707,  -3.1874,  -4.1029,  -9.2311,
         -7.5269,  -8.3396,  -6.4044,  -8.1158,  -2.2509, -10.1423,  -0.7982,
         -6.8719,  -5.5129,  -2.0138,  -1.3086,  -6.0009,  -4.5549],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9706, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4327, -6.9581, -4.9846, -2.0934, -6.2094, -2.2756, -1.9923, -2.4402,
        -5.3458, -1.5048, -5.5417, -2.6800, -1.4041, -3.9477, -5.3326, -2.7913,
        -5.4932, -3.2907, -1.9243, -1.1372], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4890, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2012, -2.8311, -5.4442, -5.8619, -1.9132, -5.4398, -2.6904, -5.6142,
        -5.9346, -5.7830, -6.4203, -3.2399, -3.1962, -4.1728, -2.1966, -3.7910,
        -5.3575, -0.2242, -9.4751, -3.3994], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3093, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0844,  -8.4197,  -1.7973,  -2.8991,  -4.9476, -14.6023,  -8.1153,
         -8.6151,  -7.7244,  -5.2371,  -4.8314,  -0.1680,  -3.8377,  -3.6406,
         -3.2164,  -4.4738,  -6.0346,  -0.1697,  -3.8078,  -4.0594],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0341, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3395, -2.0792, -6.2009, -6.0303, -2.9995, -5.1855, -3.6031, -4.0366,
        -2.1226, -5.1913, -5.6072, -2.7506, -3.1008, -2.7351, -2.4038, -6.2358,
        -5.5519, -1.5558, -3.9916, -3.8103], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9266, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0318, -4.8588, -2.7310, -5.3693, -5.3697, -1.4914, -3.9694, -3.1570,
        -2.1927, -1.2240, -6.8326, -5.4356, -2.1156, -4.3244, -1.9440, -2.3918,
        -2.9298, -6.2372, -1.8457, -1.9376], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5195, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-13.8628,  -7.3793,  -8.3729,  -6.7075,  -3.8524,  -6.0922,  -0.9955,
         -3.1735,  -3.0656,  -2.2627,  -5.0368,  -6.4623,  -1.0778,  -5.9148,
         -2.7156, -13.2112,  -7.5598,  -7.1703,  -7.6764,  -3.0367],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7813, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.7749,  -6.6239,  -4.1461, -15.1005,  -5.0827,  -2.3384,  -5.9618,
         -6.2885,  -1.4848,  -5.4724,  -3.5843,  -1.8974,  -2.1520,  -6.1944,
         -2.4717,  -4.5842,  -2.7657,  -2.1304,  -4.9166,  -5.6682],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7319, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7821,  -5.1720,  -4.6036,  -1.8383,  -4.1896,  -2.2139,  -4.5124,
         -1.4375,  -5.5998,  -2.7207,  -3.5168,  -6.0128,  -2.3462,  -4.9860,
         -2.5909,  -6.3279,  -5.0935,  -1.0482, -11.1335,  -3.9541],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1040, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.9764,  -6.0094,  -3.4186,  -6.3760,  -5.1796,  -1.1163,  -3.8284,
         -2.7088,  -1.9017,  -7.4280,  -5.7601,  -2.6496,  -9.2295,  -2.6171,
         -4.7099,  -1.7310,  -7.3360,  -3.7102,  -1.7287,  -3.1350],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5775, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2493, -3.3740, -3.0172, -7.3586, -6.2494, -1.4429, -7.3958, -1.9324,
        -1.3710, -2.9145, -6.6010, -4.3567, -1.8221, -3.4615, -2.7145, -3.4958,
        -2.6596, -6.6995, -0.9850, -8.6649], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1383, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2257,  -4.7458,  -5.9876,  -2.0841,  -6.6892,  -1.8436,  -1.6263,
         -7.0289,  -5.5799,  -1.3569,  -5.2333,  -2.5465,  -4.6915,  -3.5051,
         -5.2883,  -0.9949,  -4.7674,  -3.5307, -13.6162,  -7.1971],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5769, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.2500, -4.3268, -7.4013, -2.4255, -6.9544, -2.1806, -3.3970, -3.6156,
        -3.3355, -4.9168, -6.1897, -3.0386, -3.8869, -2.5111, -3.5706, -4.6281,
        -7.1057, -2.9626, -5.4328, -3.6742], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5402, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2132, -7.1094, -2.1931, -4.4927, -3.1262, -3.3853, -2.0969, -5.4889,
        -5.1376, -3.4401, -3.4759, -3.0196, -4.5001, -0.3476, -6.1930, -4.5839,
        -1.4377, -2.5242, -3.2013, -3.4107], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7689, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5274, -4.2740, -3.4030, -4.2651, -2.5188, -3.3541, -2.0778, -6.5292,
        -3.9524, -1.2801, -3.8778, -2.5864, -2.1550, -1.7268, -6.8475, -1.6799,
        -6.4739, -3.2851, -3.3133, -2.3174], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5722, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.2154,  -1.9807,  -6.8435,  -3.1053,  -1.9017,  -1.0627,  -6.1013,
         -4.8757,  -2.2433,  -4.1853,  -1.7452,  -2.6069,  -1.5199,  -6.1794,
         -0.9027,  -4.4041,  -3.4585, -14.6273,  -4.9037,  -8.9266],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3894, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1801, -1.4169, -4.1275, -5.4156, -2.2236, -2.8593, -3.4915, -2.8536,
        -1.3973, -6.5202, -4.2639, -2.0300, -2.7444, -2.5641, -2.7561, -1.8429,
        -6.6974, -1.3843, -4.7612, -3.0923], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2311, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-14.4424,  -4.5854,  -8.3680,  -3.3472,  -6.9492,  -2.4901, -15.4157,
         -5.1391,  -6.1036, -12.0334,  -6.3892, -12.5570,  -8.6009, -10.7836,
        -10.2193,  -6.7855,  -6.0315,  -6.0340,  -6.9679,  -5.9917],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.9617, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.8532,  -6.1190,  -1.3589, -10.0810,  -3.1248,  -3.4869,  -4.6692,
         -5.6972,  -4.6322,  -1.4874,  -4.7661,  -2.1821,  -2.7790,  -4.3599,
         -4.3654,  -0.6934,  -2.1104,  -4.1874,  -3.6445,  -5.5911],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8595, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5184, -3.8182, -6.1601, -4.7390, -3.0956, -6.3303, -6.1682, -2.3938,
        -3.7803, -3.2717, -3.1970, -5.5883, -5.5370, -2.3915, -3.9260, -4.2125,
        -8.6517, -6.3254, -4.8636, -8.3366], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9653, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.1589,  -1.8330,  -4.3199,  -2.4953,  -3.2418,  -5.1877,  -5.7606,
         -2.8411,  -3.7506,  -4.3355, -17.8396,  -4.7996,  -7.6771,  -3.3948,
         -5.8703,  -4.9847,  -2.2269,  -5.1877,  -3.0098,  -4.9988],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8957, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.6783, -5.2232, -3.6032, -5.2454, -2.8159, -6.1965, -4.8437, -2.3476,
        -5.5744, -3.0283, -1.8586, -5.1339, -5.1283, -1.7186, -4.4002, -3.0619,
        -3.7637, -1.3507, -6.3372, -0.9112], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6610, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5128, -5.6176, -2.1221, -4.0408, -3.7078, -2.4290, -6.4911, -5.7694,
        -1.2866, -2.9638, -2.1237, -5.0159, -4.0807, -5.6315, -1.5125, -2.7729,
        -3.6894, -3.9823, -2.8249, -6.4323], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8504, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1755, -2.2413, -6.7657, -5.5209, -1.5919, -4.7091, -1.6503, -2.3156,
        -3.3118, -4.7564, -0.9432, -3.3176, -3.8983, -2.6204, -6.1552, -6.6641,
        -2.2068, -8.0717, -2.7212, -2.3214], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6979, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9419,  -5.7411,  -4.3955,  -1.4176,  -3.8408,  -3.1966, -16.2783,
         -7.5810,  -7.0666,  -7.5506,  -3.8721,  -6.9209,  -0.1502,  -7.1215,
         -3.1936,  -3.5117,  -4.5293,  -6.3776,  -2.5213,  -3.1132],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0661, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4762, -2.3730, -6.5152, -5.3400, -2.6644, -3.8786, -3.1468, -1.4202,
        -3.3820, -5.8181, -0.4348, -2.6285, -2.6773, -3.3938, -3.2775, -7.2226,
        -5.9031, -3.0609, -7.0756, -4.0940], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8391, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.8919,  -4.8542,  -1.0559,  -3.6476,  -3.7941,  -3.9849,  -2.8909,
         -5.1559,  -5.2781,  -1.6762, -12.0332,  -2.7153,  -1.8247,  -6.5841,
         -5.2244,  -2.4789,  -5.1078,  -2.3508,  -2.4990,  -4.0308],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1539, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7156, -4.7081, -1.9330, -3.7814, -2.5591, -2.6787, -5.6895, -6.4016,
        -0.7026, -8.5489, -3.1380, -4.3950, -2.8068, -5.7958, -4.1290, -1.1798,
        -5.6525, -2.4273, -3.6537, -7.5808], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2239, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0557, -9.4317, -2.3720, -3.0125, -2.5074, -6.1493, -4.7709, -1.7301,
        -2.7540, -3.1616, -2.8155, -2.4123, -6.5689, -0.5851, -7.5135, -3.8672,
        -3.8776, -1.6164, -6.5749, -5.0949], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9436, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2991, -1.9929, -1.5447, -5.9070, -4.7426, -3.0728, -5.0567, -3.3707,
        -5.2959, -3.5944, -3.4506, -6.3305, -2.4548, -7.8597, -3.3184, -1.7894,
        -5.9104, -4.8500, -0.9837, -3.2485], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9036, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.8798,  -6.8584,  -5.2045,  -4.2365,  -8.1205,  -3.8975,  -2.8932,
         -2.3013,  -5.7530,  -2.0054, -11.0793,  -3.6865,  -5.8008,  -2.6370,
         -4.7482,  -4.2130,  -2.9579,  -4.8902,  -2.6476, -19.8039],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3807, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.8281,  -7.1219,  -6.8848,  -1.3605,  -3.2626,  -2.7799,  -2.1770,
         -5.3010,  -5.4344,  -2.1699,  -5.4420,  -4.1290,  -2.7110,  -2.7285,
         -6.6366,  -6.2809,  -3.1168, -10.1960,  -3.0062,  -3.5899],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4079, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0307, -2.4311, -1.7332, -6.2703, -5.7412, -3.6253, -4.7449, -2.0869,
        -1.9916, -4.5852, -5.2995, -0.8159, -2.8742, -3.5842, -2.6367, -2.3831,
        -6.6437, -5.8878, -2.6956, -2.9944], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6028, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0254, -5.4110, -2.4678, -6.0154, -3.4456, -2.0423, -5.4701, -5.1868,
        -1.4753, -7.4351, -2.4188, -1.1804, -3.1780, -5.7561, -0.8877, -4.9459,
        -3.1011, -3.9475, -2.8331, -6.2444], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9734, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9093,  -2.5568,  -7.1270,  -6.0048,  -2.5047,  -2.7571,  -3.4218,
         -0.9805,  -4.0466,  -5.9393,  -1.9524,  -4.4682,  -4.3601,  -2.9661,
        -10.9063,  -8.6821,  -7.8722,  -3.2042,  -6.7981,  -2.4437],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5951, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0067, -4.4965, -2.8188, -1.1235, -3.9068, -5.3132, -1.2447, -4.2351,
        -2.3681, -1.9231, -5.0563, -5.3595, -1.3746, -4.1858, -2.8285, -1.5763,
        -3.2980, -5.2934, -3.3212, -4.9983], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2864, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2259, -2.6808, -1.3582, -3.9051, -6.3965, -1.8970, -2.1815, -3.8333,
        -3.3401, -1.1201, -6.6443, -1.0055, -3.3993, -4.6045, -2.8479, -1.8352,
        -4.9923, -5.3840, -6.8271, -3.8530], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5666, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.0812,  -5.0455,  -2.7179,  -6.1206,  -4.5110,  -1.5463,  -3.6078,
         -2.9779,  -2.4824,  -5.4499,  -6.1750,  -1.9174,  -3.2712,  -2.0235,
         -1.5193,  -2.8852,  -5.8359,  -2.0107, -14.0971,  -2.7182],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9997, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.9275,  -1.0398,  -7.1834,  -3.3353,  -2.3284,  -6.0148,  -5.2122,
         -0.8063,  -5.1372,  -2.3015, -11.9526,  -4.7936,  -8.7009,  -5.5913,
         -8.4042,  -2.5446,  -8.8835,  -0.4377,  -5.2748,  -4.8721],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0871, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2045, -1.1939, -7.3087, -4.9052, -1.8861, -4.1575, -2.6286, -2.2679,
        -2.4153, -7.6688, -4.6353, -3.0603, -2.4467, -2.0127, -0.9454, -3.9959,
        -5.4713, -0.0814, -8.0679, -4.4707], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6912, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9410,  -4.4264,  -3.3842, -13.8990,  -4.9380,  -4.6747,  -6.5448,
         -7.2077,  -7.4498,  -7.6608,  -1.6521, -12.7047,  -4.2320,  -4.5831,
         -2.3456,  -4.1364,  -3.7057,  -2.6405, -10.1318,  -2.3475],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5803, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4946, -2.3590, -1.4565, -5.8894, -4.5864, -0.8328, -3.0109, -3.3227,
        -3.2271, -6.0564, -4.9237, -2.3340, -3.9868, -2.7754, -1.9807, -3.5306,
        -6.3662, -4.0706, -1.7186, -2.3290], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6126, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.2171,  -1.3623,  -9.6951,  -5.6270,  -1.1251,  -8.3122,  -4.5851,
        -21.6322,  -5.6512,  -7.3524,  -8.7577,  -7.9766,  -8.9211,  -4.2962,
         -6.1869,  -4.8073,  -0.7825, -11.6989,  -2.0959,  -2.3486],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4216, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1124, -0.9371, -6.0508, -2.1118, -6.2662, -3.2867, -5.9380, -4.4267,
        -1.8544, -3.2963, -2.8070, -2.9149, -0.4535, -6.1910, -3.6850, -1.2949,
        -2.7437, -3.0868, -2.5072, -4.0768], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4521, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8860, -3.4405, -6.4645, -5.1817, -2.6404, -2.5530, -2.1238, -3.3768,
        -4.2982, -5.4029, -1.5121, -4.8290, -4.0701, -2.8909, -5.4631, -6.6157,
        -5.9036, -3.1190, -7.1841, -3.0913], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1523, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1546,  -2.3966,  -4.8918,  -6.4375, -17.0269,  -6.6701,  -8.4096,
         -3.3457,  -5.4636,  -0.6397,  -6.8232,  -2.9296,  -1.7610,  -6.2838,
         -5.8211,  -0.4447,  -5.3429,  -2.3745,  -1.3328,  -6.7678],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9159, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.3077,  -4.1119,  -4.1951,  -0.7141,  -6.3093,  -4.3659,  -1.6242,
         -4.9508,  -3.5310,  -1.6272,  -9.0999,  -5.9684,  -1.7917, -10.8995,
         -4.9690, -26.0131,  -5.2581,  -9.8185,  -6.7410,  -7.1157],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4206, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.9517,  -3.8517,  -2.8674,  -1.7438,  -5.3911,  -5.7881,  -0.7640,
         -5.4042,  -3.1210, -18.2616,  -8.8126,  -9.4342,  -7.7581,  -2.6493,
         -7.5948,  -0.4814,  -3.3863,  -2.7980,  -1.7999,  -3.6043],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8232, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8295, -2.5328, -6.0039, -5.6419, -3.2668, -4.3178, -2.1788, -2.5341,
        -2.5015, -5.5759, -4.5430, -1.9284, -3.4345, -1.7943, -4.8600, -3.9888,
        -5.1983, -1.0211, -5.0477, -2.5521], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6376, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4031, -3.1321, -2.7100, -6.3147, -0.4152, -4.4268, -3.1873, -4.0660,
        -2.6501, -5.4256, -4.3706, -0.9983, -7.8140, -2.4149, -1.8242, -4.3995,
        -6.6219, -3.9406, -1.2445, -5.4847], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7422, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.6974,  -4.4781,  -8.0140,  -7.8042,  -7.1273,  -1.9723, -19.9087,
         -3.9168,  -1.4608,  -5.0391,  -6.5952, -24.4844, -23.1492,  -4.8925,
         -4.1517,  -4.3089,  -5.1613,  -6.8855,  -6.4692,  -4.7660],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.7641, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9357, -5.6218, -3.4413, -1.7730, -2.9601, -2.3224, -5.6274, -5.4985,
        -2.0639, -3.6305, -2.3555, -2.2243, -2.7623, -4.8031, -0.9100, -4.0637,
        -2.9785, -1.9073, -5.1215, -5.6381], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6319, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1549, -2.0627, -2.2242, -6.6225, -4.3553, -1.6975, -2.5917, -3.2922,
        -2.7382, -5.9614, -6.1979, -1.7377, -9.5651, -3.0978, -4.1149, -2.3872,
        -6.8571, -5.0742, -1.9122, -3.2770], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9461, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9790, -1.8310, -4.2208, -4.1479, -5.0007, -5.4661, -7.4473, -2.8351,
        -5.2973, -2.5454, -3.8050, -3.9671, -6.1317, -1.8218, -4.7066, -2.0504,
        -3.3522, -0.5666, -6.1758, -4.7540], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0551, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3328,  -1.6946,  -6.6951,  -5.4829,  -2.0819,  -4.5782,  -2.1941,
        -19.3144,  -4.6546,  -8.7335,  -2.6876,  -7.8340,  -0.4185, -10.4456,
         -4.0115,  -3.1543,  -6.1798,  -7.0030,  -1.4112,  -5.6538],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3781, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.6373,  -4.5310, -13.9800, -12.4605,  -8.8337,  -6.1243,  -2.6826,
         -6.2853,  -2.4575,  -3.0785,  -5.1184,  -2.5352,  -3.6084,  -6.6582,
         -2.8496,  -9.1067,  -4.5024, -11.8275,  -5.8693,  -5.5608],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3354, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1266, -5.1040, -2.2590, -4.6905, -2.2266, -2.4047, -4.4530, -5.8189,
        -1.6110, -7.5709, -2.6115, -2.3089, -4.3549, -4.7423, -0.4794, -3.2439,
        -2.7519, -2.2285, -6.2892, -6.2943], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8285, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.1219,  -3.4393,  -2.3027,  -1.9808,  -2.2670,  -5.4056,  -0.1270,
         -6.3001,  -3.0427,  -3.9223,  -6.7174,  -5.9597,  -1.6423, -11.9655,
         -2.4106,  -1.1075,  -3.4649,  -5.4599,  -0.9033,  -4.5436],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7042, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1618, -1.6683, -1.7036, -3.7075, -6.2938, -0.7957, -6.2672, -2.7476,
        -4.3074, -6.4575, -6.3136, -2.3575, -5.7601, -1.8539, -2.4300, -4.0681,
        -5.0401, -1.0452, -4.2571, -3.3226], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6779, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.6214,  -3.5269,  -4.2547, -11.8622,  -5.4546,  -7.3267,  -7.8061,
         -8.1328,  -6.6029,  -1.5559,  -8.3009,  -5.1615,  -4.0636,  -4.8232,
         -6.9147,  -2.1354, -13.0891,  -9.0093,  -8.9046,  -4.0758],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1811, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.9149,  -5.1749,  -4.4323,  -3.6435,  -6.9257,  -3.3948,  -3.6072,
         -3.0155,  -3.6337,  -5.6663,  -2.7790, -18.4102,  -2.5436,  -2.6993,
         -6.1972,  -5.5683,  -2.0301,  -7.2603,  -2.7546,  -8.5984],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9625, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.7540,  -3.2178,  -4.7088,  -3.7787,  -1.2735,  -3.8195,  -3.6371,
         -1.2559,  -2.8327,  -6.1775,  -0.4039, -12.7404,  -2.5400,  -3.7975,
         -6.5177,  -6.5377,  -3.0913,  -6.1919,  -4.7593, -25.7908],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3413, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.8646,  -6.8141,  -1.0629,  -1.4639,  -3.6918, -17.2726,  -8.6528,
         -8.8963,  -7.0158,  -5.5858,  -5.0856,  -1.0494, -11.7326,  -8.6722,
         -6.8919,  -9.3361,  -4.7070, -10.1553,  -3.8723,  -6.2081],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5016, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.9554,  -3.2988,  -5.5418,  -6.2143,  -1.9836,  -8.8972,  -5.9473,
        -21.1247,  -6.6343,  -6.8675,  -6.8638,  -7.2663,  -8.5256,  -2.8525,
        -11.1017,  -0.5482,  -6.0845,  -4.2306,  -4.2251,  -1.4640],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1814, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3713, -1.2048, -2.5640, -2.9487, -2.1711, -6.2876, -5.2269, -0.6437,
        -3.9691, -3.7714, -2.4018, -7.1762, -5.5934, -3.4215, -4.2464, -2.7699,
        -2.7830, -7.0440, -4.9518, -0.1971], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6872, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.0311,  -2.5408,  -3.8845,  -3.0087,  -5.6655,  -2.3609,  -4.3776,
         -5.7658,  -2.5252, -15.3988,  -3.7766,  -7.2002,  -6.1978, -17.1731,
         -5.7326,  -7.8233,  -7.7521,  -7.2457,  -2.0511,  -4.7132],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0612, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.9001,  -3.7584,  -3.6633,  -5.7164,  -6.2003,  -2.7577, -11.0793,
         -3.1159,  -1.7725,  -2.3244,  -5.9373,  -1.1989, -19.1909,  -2.7875,
         -2.4477, -13.9892,  -4.8708,  -1.0693,  -5.0021,  -9.1287],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5455, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6725, -4.8705, -4.9754, -1.3695, -3.8082, -2.3372, -3.8449, -6.9083,
        -5.7149, -1.2228, -5.3942, -3.9188, -3.5427, -3.9730, -7.1253, -4.9545,
        -3.1317, -7.3133, -2.6856, -3.3162], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1040, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7888, -6.5924, -4.0124, -2.6229, -3.1059, -2.1486, -1.5566, -7.4405,
        -4.6965, -1.6957, -6.6489, -1.7123, -1.5984, -2.1955, -6.6024, -1.9047,
        -4.0669, -3.6003, -3.9107, -4.3008], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6101, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9813,  -5.5451,  -2.9419,  -1.5821,  -5.8640,  -6.2248,  -0.5408,
         -3.3813,  -3.0741,  -2.8297,  -2.1833,  -5.5949,  -5.6069,  -2.6820,
         -2.4852,  -2.6684,  -7.7931, -11.3120,  -4.8607, -11.1852],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5669, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.5346,  -2.3002,  -2.4800,  -2.8487,  -1.4038,  -7.3437,  -5.5421,
         -0.5303,  -8.7816,  -3.7779,  -5.1762,  -2.7767,  -6.5567,  -4.3791,
         -3.5470, -12.2066,  -2.6034,  -2.6542,  -9.3035,  -5.2277],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6987, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9362, -1.0681, -5.3907, -3.1990, -5.0506, -2.8574, -5.6525, -6.1798,
        -2.8482, -7.5487, -2.9219, -3.0202, -6.2616, -5.5851, -1.4862, -3.5744,
        -2.8348, -2.3434, -3.7558, -6.3310], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2423, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8389, -2.0192, -2.2289, -6.0752, -5.1830, -2.1316, -3.9602, -2.3005,
        -0.8768, -7.2514, -5.1204, -0.9649, -4.0634, -2.1439, -3.9151, -1.4767,
        -6.2963, -4.0885, -2.3047, -1.9778], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3609, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.8939,  -5.0780,  -4.2935,  -5.9903,  -2.9900, -11.5304,  -6.8437,
         -5.5930,  -7.3266,  -3.5833,  -6.2517,  -0.8490,  -6.6082,  -2.1886,
         -2.3643,  -6.8996,  -4.7158,  -1.7390,  -3.4954,  -2.0105],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8122, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8398, -2.1612, -5.8868, -4.4192, -0.8723, -3.3602, -3.0750, -3.2802,
        -3.0633, -5.4699, -0.5286, -4.5423, -3.3232, -2.3392, -2.3721, -6.3780,
        -4.7198, -1.8962, -3.0923, -2.3960], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3008, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3590,  -1.2641,  -3.6890,  -5.9776,  -1.2944,  -5.3449,  -3.0489,
         -3.5400,  -8.9485,  -4.8608,  -1.2061,  -7.5149,  -1.7856,  -2.0196,
         -1.8358,  -6.5120,  -1.9568, -12.4978,  -3.3835,  -2.7439],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1392, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5353, -2.7224, -7.1188, -1.4546, -5.8516, -4.6060, -2.6294, -2.2441,
        -6.7605, -1.6468, -6.3831, -2.2317, -3.2781, -2.1446, -5.3524, -5.2262,
        -2.2651, -3.0360, -2.0313, -2.4302], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8474, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0438, -2.4289, -6.3872, -4.6677, -2.3243, -2.9845, -3.6411, -2.1320,
        -5.4976, -5.3970, -0.2393, -3.8433, -2.4568, -1.4441, -2.6449, -6.9253,
        -4.0764, -1.2816, -3.2186, -2.4599], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3547, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9465, -5.6812, -6.1115, -6.3424, -7.6638, -5.4686, -8.2730, -5.8292,
        -4.1203, -8.9111, -6.1625, -5.8145, -6.9952, -7.3998, -4.8975, -7.0019,
        -5.1447, -5.5843, -5.5558, -5.0224], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-6.2463, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.4539,  -3.0920,  -2.9595,  -2.5327,  -5.9339,  -5.5643,  -1.6068,
         -5.3155,  -5.4403,  -8.7230,  -6.4782,  -3.7592, -10.2724,  -5.6194,
         -8.1826,  -1.7234, -13.4812,  -5.9380,  -2.5762, -17.5848],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9119, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.7428, -6.3984, -3.7641, -2.6621, -2.1959, -5.6872, -5.2334, -1.7531,
        -6.2019, -1.6635, -2.0072, -3.6024, -5.7845, -0.2846, -3.6087, -2.7947,
        -1.2980, -7.6530, -5.0918, -0.7940], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4611, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7635e+00, -6.9430e+00, -5.7748e+00, -2.7287e+00, -6.4010e+00,
        -1.9445e+00, -3.6404e+00, -4.6721e+00, -6.8217e+00, -6.4228e+00,
        -4.0925e+00, -8.9903e+00, -3.0590e+00, -2.4647e+00, -3.6276e+00,
        -5.4201e+00, -6.7637e-03, -6.2925e+00, -2.9853e+00, -2.1925e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3622, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9713, -2.1320, -0.6687, -3.8906, -3.3616, -2.7676, -6.1042, -1.6631,
        -5.4035, -2.5581, -1.8375, -2.4628, -6.6214, -4.6017, -1.8025, -4.4095,
        -1.8645, -2.3672, -8.5534, -4.3133], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6177, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0340, -1.2780, -4.9439, -6.0520, -0.8687, -4.0131, -2.2674, -4.1925,
        -0.9031, -6.5355, -4.8277, -0.5340, -6.0546, -4.2473, -2.2547, -4.4973,
        -6.5675, -2.1867, -6.8261, -4.5101], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7797, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.8174,  -6.5894,  -5.5597,  -1.3499,  -5.0290,  -2.9840, -30.5115,
         -7.3986,  -9.2383,  -1.8147,  -7.9501,  -0.9411,  -4.8630,  -5.4945,
         -6.2855,  -4.9885,  -3.5539,  -6.3010,  -1.9927,  -4.0240],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0343, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3154, -5.2237, -1.5435, -4.2367, -2.4445, -2.5316, -6.8682, -6.1935,
        -0.9558, -8.1513, -4.1124, -3.2646, -3.2563, -6.5463, -5.1420, -7.6918,
        -8.8541, -4.0229, -4.5987, -7.1224], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8538, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.0227, -5.4936, -3.1848, -7.6047, -2.1313, -2.6170, -3.6818, -6.5498,
        -4.0681, -2.0305, -4.0652, -2.2387, -1.4089, -3.7195, -5.9014, -0.5977,
        -3.4273, -3.8155, -2.4049, -4.8584], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8911, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.2205, -8.1972, -6.4536, -5.3821, -4.9099, -1.9324, -6.5951, -3.1411,
        -2.2939, -1.7387, -6.5578, -4.5195, -2.2167, -3.3317, -2.5087, -2.0011,
        -2.6803, -6.2816, -1.7427, -6.5102], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3607, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4786, -6.0926, -2.4519, -5.2847, -3.0294, -2.3365, -4.6737, -6.0832,
        -0.4118, -7.5419, -2.6371, -2.4074, -3.2685, -4.9281, -0.9808, -6.2866,
        -3.0519, -3.3503, -4.1287, -5.5335], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8979, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1271, -2.9684, -2.1736, -6.6535, -4.1255, -1.8436, -2.9819, -1.8306,
        -3.9255, -4.3640, -5.5788, -0.1422, -3.6969, -2.7340, -2.1661, -4.2946,
        -5.7635, -1.6385, -2.6108, -3.7742], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3697, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.9092,  -8.2819,  -6.2746,  -7.7462,  -2.6555,  -7.3538,  -1.1169,
         -6.4995,  -3.0499,  -2.1489,  -6.5795,  -5.6749,  -1.9062,  -4.9602,
         -4.5992, -22.4003,  -7.0029, -10.0993,  -8.6932,  -6.6530],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.6802, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.2337, -3.9887, -4.5751, -2.4904, -2.4530, -5.7699, -3.9126, -3.7758,
        -5.6512, -2.6889, -5.5519, -6.0747, -6.1517, -2.1596, -3.5293, -3.2433,
        -1.6036, -5.8351, -5.5191, -3.5947], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9401, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2976, -5.4790, -9.6162, -9.1568, -4.4455, -6.3467, -6.4411, -8.3875,
        -9.4945, -6.3060, -4.1912, -3.8207, -5.0399, -3.4576, -3.4639, -5.3917,
        -1.4168, -3.8092, -4.4082, -3.6135], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.3792, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3486, -2.7297, -0.8284, -6.4730, -4.9670, -1.0481, -6.3718, -2.0722,
        -1.3940, -5.8566, -4.7389, -0.3990, -4.2049, -2.4028, -4.5152, -0.0522,
        -6.2815, -1.0395, -5.1572, -3.9874], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3934, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6473, -6.2091, -4.1177, -2.9203, -5.9518, -2.7709, -2.8522, -4.4314,
        -5.2698, -1.1324, -3.6280, -3.7946, -2.2108, -3.1145, -6.2870, -4.7516,
        -1.7735, -4.9960, -2.0053, -3.5188], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7191, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7519, -3.1503, -4.4716, -2.0002, -5.4439, -3.7707, -2.4645, -4.5152,
        -4.6373, -2.6422, -1.4381, -5.1123, -3.6737, -1.2220, -4.0311, -2.3863,
        -2.5209, -1.3360, -5.9880, -0.9247], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1740, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2300, -7.6162, -1.2868, -9.0891, -3.9305, -3.7050, -1.7067, -5.8180,
        -5.0321, -1.0386, -5.6524, -2.9319, -1.5730, -4.0496, -5.5461, -0.4122,
        -5.0647, -2.5110, -3.1229, -3.3714], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7844, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6168, -5.2854, -5.8036, -1.6516, -6.3092, -3.0331, -2.0469, -2.3193,
        -6.2204, -1.7332, -1.2891, -3.6303, -1.4033, -5.8145, -6.1997, -1.3808,
        -4.8420, -1.2531, -6.2458, -4.0503], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6564, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.3875, -9.4945, -6.3060, -4.1912, -3.8207, -5.0399, -3.4576, -3.4639,
        -5.3917, -1.4168, -3.8092, -4.4082, -3.6135, -3.5234, -7.7945, -5.8033,
        -3.5329, -3.8666, -3.0996, -3.6232], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7022, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.7844,  -2.9981,  -2.5477,  -1.6038,  -5.7412,  -4.5260,  -0.4074,
         -2.4312,  -3.2406,  -1.9981,  -1.9943,  -6.0855,  -4.6398,  -1.8380,
         -4.1274,  -2.6259, -16.0006,  -6.2177,  -7.3732,  -8.1612],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3171, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6129, -3.9089, -5.8691, -0.2525, -4.5352, -2.2164, -4.6923, -6.7272,
        -6.4360, -2.1700, -6.2147, -3.6287, -1.5529, -3.7760, -4.9081,  0.3274,
        -3.6615, -2.9381, -3.8672, -0.6450], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4643, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5177, -5.5355, -3.8169, -3.4391, -3.4407, -7.7733, -5.1027, -2.6868,
        -2.6877, -1.7374, -3.8976, -0.7010, -6.5717, -4.3360, -1.7922, -5.8560,
        -3.2927, -3.6332, -5.5931, -6.0784], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9745, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5696, -3.6610, -4.8772, -7.2304, -5.9901, -3.4520, -6.5246, -5.0589,
        -9.6376, -4.8867, -6.4476, -5.7176, -3.1079, -2.2036, -2.4312, -2.3938,
        -2.0330, -6.8787, -0.9215, -4.3939], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5208, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.6997,  -4.2598,  -6.1451,  -1.4358,  -9.7544,  -2.4601, -14.8016,
         -6.0654,  -7.7838,  -8.0650,  -7.9721,  -3.4500,  -6.9396,  -0.6453,
         -5.1018,  -4.1469,  -4.2081,  -1.8223,  -5.5003,  -4.5366],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3897, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4132, -3.8992, -3.9185, -3.5451, -2.5844, -5.4658, -4.0805, -1.1228,
        -2.6608, -2.7733, -1.2153, -8.7518, -5.3524, -0.3871, -8.2959, -1.9890,
        -0.5534, -4.1709, -4.8877, -1.6975], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4382, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3801,  -4.9545,  -1.2279,  -4.6862,  -4.3784,  -8.2581,  -4.9916,
         -3.2513, -13.7541,  -2.9667,  -2.3847,  -4.2934,  -7.2385,  -2.3580,
         -4.6999,  -2.5636,  -3.3863,  -1.8046,  -5.2243,  -5.5081],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5155, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3557, -6.1490, -0.6978, -4.4767, -2.7920, -5.2761, -6.8230, -5.1408,
        -1.2693, -7.8336, -3.8734, -1.3088, -4.1745, -6.7542, -5.0037, -1.7901,
        -3.7724, -1.8783, -1.6226, -5.2138], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0103, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.2275,  -8.9281,  -3.7086,  -5.5653,  -0.3974,  -6.3239,  -3.9742,
         -2.7534,  -6.2049,  -5.8309,  -0.6759,  -4.4356,  -2.6164,  -2.6333,
         -2.7063,  -4.6482,  -2.2251,  -6.6064,  -3.2967,  -2.3830],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3571, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.2753, -6.4637, -4.5880, -1.3118, -2.5264, -2.2954, -1.3763, -4.0944,
        -5.5597, -0.8013, -6.0054, -3.1309, -1.8920, -3.1381, -5.8155, -0.5115,
        -6.0912, -4.0935, -4.0387, -1.7267], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2868, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.5348,  -4.3803,  -0.8285,  -4.2343,  -2.8523,  -3.4107,  -1.6277,
         -5.5622,  -0.9735,  -3.9369,  -2.1510, -13.4919,  -5.2246,  -6.0521,
         -8.6223,  -2.2041,  -6.9593,  -1.3261,  -6.2548,  -4.1151],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4871, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2375, -2.8284, -3.2523, -0.7962, -6.5236, -4.2813, -1.8360, -3.0450,
        -3.4945, -1.7695, -1.1744, -5.4690, -1.4394, -6.7728, -3.9756, -4.1061,
        -4.6812, -7.4164, -6.1569, -3.8522], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9054, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.3848,  -2.7112,  -2.5788,  -2.0843,  -1.9812,  -9.0956,  -4.6684,
         -1.0666,  -5.3358,  -3.9360, -13.8783,  -7.7472,  -5.9737,  -8.5051,
         -2.0337,  -7.1281,  -1.2606,  -7.7332,  -3.1409,  -1.8791],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8561, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.6420,  -6.6142,  -2.0027,  -7.3430,  -3.0093,  -4.1812,  -3.6893,
         -6.1981,  -4.1912,  -1.8315,  -4.1930,  -3.1125,  -3.7807,  -3.0359,
         -5.7334,  -0.3976,  -4.2859,  -2.8091, -26.2108,  -2.3180],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9290, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4397, -2.5389, -5.7601, -4.4507, -0.8991, -5.3290, -2.2977, -2.6993,
        -1.8204, -6.0016, -0.5108, -4.2031, -3.8799, -3.5198, -3.9176, -6.6217,
        -5.7760, -3.0723, -4.9248, -2.9523], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7307, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.5556,  -6.9808,  -5.3979,  -0.6534,  -8.8787,  -2.1987,  -1.0226,
         -6.1781,  -5.0194,  -1.7177,  -7.9702,  -1.5822,  -4.8133,  -6.3069,
         -5.0486,  -1.7952,  -5.0803,  -7.4467, -12.3103, -10.4042],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1180, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.4933,  -1.0161, -11.4887,  -3.9334,  -4.1440,  -2.1657,  -6.3668,
         -4.1062,  -2.2175,  -4.4866,  -2.3898,  -4.2209,  -0.0247,  -6.8426,
         -4.3324,  -0.6198,  -5.7348,  -2.5717,  -3.9094,  -3.5758],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9820, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.6402,  -1.9948,  -5.9971,  -2.5029,  -2.2306,  -7.8900,  -5.6963,
         -1.6614,  -3.9780,  -4.7672, -12.1935,  -7.2763,  -5.0832,  -7.9047,
         -6.9756,  -8.6034,  -2.9772,  -7.8349,  -1.0338,  -5.5313],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4386, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.0496,  -7.9892,  -2.6938,  -6.3919,  -0.6234,  -9.8168,  -2.5541,
         -3.8018,  -6.0376,  -6.1593,  -1.2023,  -4.7405,  -3.9687, -15.5154,
        -10.1403,  -8.2967,  -4.5137,  -4.7638,  -1.1962,  -4.4414],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5948, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7066, -4.4613, -7.1329, -6.5438, -6.4823, -7.6474, -4.6088, -7.4497,
        -3.8677, -5.4618, -3.6779, -2.9174, -7.1036, -5.7699, -1.8197, -5.3898,
        -2.8803, -3.7163, -6.5603, -5.4512], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1824, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5974,  -4.0275,  -6.1144,  -4.3592,  -1.3085,  -5.2765,  -3.5877,
         -2.3421,  -2.3948,  -5.4808,  -0.1846,  -3.9906,  -3.3943,  -3.0410,
         -6.5383,  -5.4459,  -1.1718,  -4.8504,  -4.4951, -22.5695],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6585, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6213, -2.9065, -2.9513, -4.7925, -4.5271, -2.1248, -2.6574, -3.6342,
        -4.5635, -3.2330, -6.3472, -3.5932, -3.2371, -4.4630, -3.6686, -1.2534,
        -6.1340, -4.5172, -0.9793, -5.5395], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7372, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.4652, -2.4904, -3.1296, -2.4157, -8.9424, -5.8702, -0.7267, -5.8871,
        -2.5159, -3.5753, -5.9387, -6.4251, -4.5025, -1.8266, -3.2405, -3.7987,
        -5.5169, -1.9105, -7.0094, -4.6551], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0421, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4383,  -4.2067,  -3.8176,  -0.5102,  -5.6430,  -4.8991,  -0.4793,
         -4.3752,  -2.1412,  -1.3681,  -2.7081,  -5.7938,  -1.1396,  -4.4907,
         -3.1688, -14.2623,  -2.7222,  -9.3077,  -3.3121,  -8.0099],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2397, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8147, -3.5155, -4.7439, -0.1784, -2.6276, -4.5821, -4.1319, -2.5279,
        -4.7950, -4.4844, -0.4520, -5.7424, -1.7430, -1.6881, -5.2391, -5.5406,
        -1.1000, -3.2883, -1.8419, -2.0001], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1018, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.9970,  -1.5331,  -4.6492,  -3.4923, -12.1989,  -6.6368,  -7.5264,
         -8.4779,  -5.9668,  -5.9971,  -1.4019, -11.5515,  -5.0235,  -3.9649,
         -4.4209,  -5.3412,  -6.5223,  -4.0455,  -4.0240,  -2.9113],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5841, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.6430,  -6.8107,  -6.3087,  -1.5942, -15.7733,  -2.3696,  -3.7319,
         -3.0592,  -5.9126,  -4.4150,  -1.8801,  -4.8399,  -3.3401,  -2.9602,
         -5.8815,  -5.6887,  -1.1611,  -3.3658,  -1.9939,  -2.4636],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3597, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.8944,  -5.4188,  -1.8142,  -4.1522,  -5.4265,  -3.7673, -10.3019,
         -2.6969,  -6.7168,  -6.2192,  -8.4042,  -9.1061,  -7.2971,  -5.6475,
        -15.3516,  -5.8902, -23.1937,  -6.9705,  -7.6720,  -6.4455],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.4193, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7614,  -3.1951,  -2.8026,  -1.1397,  -5.9483,  -0.4768,  -4.7261,
         -2.5049, -12.0800,  -4.6421,  -5.8915,  -6.1173,  -8.5538,  -6.8630,
         -7.7890,  -1.7123,  -6.1420,  -3.8681,  -2.1591,  -0.4788],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4926, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5660, -5.2858, -4.5243, -7.0378, -4.2945, -1.6774, -4.2091, -3.7123,
        -4.1337, -1.1719, -5.5361, -4.1352, -2.3362, -3.9800, -2.1967, -2.9784,
        -0.9579, -5.3424, -4.2403, -3.4776], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6897, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.4767, -12.6756,  -5.7796,  -8.8979,  -2.1739,  -6.3391,  -0.1350,
         -9.3444,  -3.3038,  -2.7855,  -1.2051,  -5.6271,  -4.1450,  -0.6119,
         -5.0392,  -2.6753,  -1.7843,  -1.7207,  -6.3455,  -4.2592],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4162, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.9875,  -5.3548,  -1.6269,  -3.4077,  -2.4941, -21.4773,  -5.5078,
         -7.2677,  -7.3205,  -1.9902,  -7.0414,  -0.9036,  -6.1239,  -3.0520,
         -3.3769,  -6.7068,  -5.3743,  -4.7020,  -3.5114,  -3.3422],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3285, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.7104, -5.9463, -7.1047, -5.6308, -7.2420, -5.6078, -6.0136, -3.2545,
        -8.9973, -6.8336, -6.5080, -6.5751, -4.1661, -5.4770, -6.0796, -2.2088,
        -8.8024, -4.0131, -5.0363, -5.7265], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9967, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.2422,  -4.2281, -13.4411,  -6.3158,  -6.9975,  -2.1902,  -7.8899,
         -2.1581,  -6.1746,  -4.7256,  -3.0966,  -9.6310,  -8.2049,  -2.4019,
         -5.0789,  -4.3386, -11.3141,  -5.9154,  -7.3702,  -7.8960],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4805, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1386, -5.2378, -3.5792, -4.8675, -5.7422, -2.5406, -2.9662, -4.6851,
        -2.3838, -7.1025, -5.6180, -4.1122, -5.6364, -3.5487, -8.5737, -5.8948,
        -6.7421, -7.3010, -1.7593, -5.4742], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8952, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2786, -3.0438, -2.7778, -4.2349, -6.2197, -1.1033, -7.1696, -3.2680,
        -2.9877, -1.7899, -6.7695, -5.7649, -2.7688, -3.5363, -2.1484, -3.3164,
        -6.1601, -5.4482, -0.5325, -7.6428], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0481, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2641, -8.3371, -4.7484, -6.7687, -6.5407, -6.3644, -4.7197, -3.2668,
        -3.7093, -5.6027, -1.3470, -5.2823, -3.8116, -7.1966, -7.2000, -6.5382,
        -7.6070, -6.6001, -6.5545, -2.5486], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5004, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.4669,  -3.7350, -11.8230,  -4.2749,  -6.8421,  -6.5586,  -5.6489,
         -6.6126,  -1.6775,  -6.2141,  -3.9277,  -2.0295,  -6.4348,  -5.0767,
         -2.0216,  -5.5426,  -4.5795,  -2.9581,  -3.8783,  -5.8638],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0083, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6037, -1.3682, -3.2845, -2.6299, -5.1604, -3.1565, -6.2293, -5.2351,
        -2.5441, -5.7461, -2.2801, -2.7317, -0.9567, -7.0161, -2.0316, -4.0517,
        -3.3426, -3.5853, -7.2203, -5.8843], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0029, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9587, -3.9755, -0.8627, -5.9832, -4.1709, -2.6814, -4.2053, -2.8021,
        -1.6861, -3.1267, -6.2111, -3.8942, -1.9447, -2.8020, -2.0518, -2.7597,
        -2.5314, -5.4929, -1.7963, -2.6609], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2299, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.9949,  -4.8253,  -3.1599,  -3.1934,  -3.8639, -17.8149, -12.0804,
         -8.1609,  -2.9124,  -6.3838,  -1.4698,  -8.6533,  -2.4639,  -2.7570,
         -4.1911,  -6.3435,  -1.0892,  -4.9815,  -4.3998,  -3.4483],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6594, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7454, -8.1270, -3.4015, -6.7414, -1.1931, -6.6445, -3.7820, -2.9467,
        -1.2738, -6.5826, -5.3126, -1.0299, -8.5117, -3.0360, -3.5237, -3.2237,
        -6.0877, -4.5757, -1.1758, -3.8788], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3897, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9392, -2.6926, -5.9251, -9.2359, -4.5795, -8.4605, -2.7986, -3.7888,
        -4.3164, -3.1069, -4.8080, -5.8678, -0.4644, -4.9580, -5.8782, -3.8499,
        -3.0837, -4.6262, -6.3302, -3.0466], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6378, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8657, -4.2046, -4.9197, -1.8758, -4.6132, -3.1233, -3.6695, -4.1853,
        -4.9474, -6.1975, -4.1109, -9.5994, -2.7882, -6.5276, -6.7254, -1.5702,
        -5.5449, -3.5705, -3.2639, -4.8365], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3570, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-13.0690,  -3.6662,  -1.7323,  -5.0386,  -5.9612,  -1.5761,  -4.7046,
         -4.7213, -13.0112,  -4.2060,  -8.2440,  -2.3672,  -6.3946,  -0.6693,
        -13.2299,  -3.7656,  -3.6621,  -7.5348,  -8.5330,  -8.2827],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0185, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5976, -2.4555, -4.9155, -5.7915, -4.8861, -3.6523, -4.0343, -1.5179,
        -2.8735, -4.2361, -4.8750, -0.3341, -5.7570, -3.2697, -2.4233, -1.0436,
        -6.8230, -4.6497, -1.6505, -5.7871], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7287, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8083, -6.9468, -3.7684, -2.9378, -0.8171, -5.4301, -4.0146, -2.6146,
        -4.8766, -1.5252, -1.9734, -3.3580, -5.7822, -0.8977, -4.3966, -3.9438,
        -9.8401, -6.4112, -9.3644, -4.7453], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2726, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6473, -1.6798, -1.6475, -3.2390, -6.0986, -4.9884, -2.8130, -4.1419,
        -2.2968, -1.9041, -3.2728, -5.5109, -4.7260, -3.7584, -2.9738, -2.7538,
        -5.4536, -5.2859, -5.8788, -2.7495], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7410, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.5695,  -4.9907,  -3.5041,  -4.1779,  -3.7482,  -3.7618,  -1.9827,
         -5.7148,  -6.0354,  -2.2220, -10.7225,  -1.9875,  -3.0979,  -1.2231,
         -5.5151,  -0.7290,  -4.7246,  -2.9491,  -2.1533,  -5.4151],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0112, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2573, -5.6809, -1.3649, -4.3956, -1.9520, -1.5796, -7.9122, -4.9662,
        -0.8454, -2.2526, -2.7628, -1.4039, -6.6577, -5.6662, -2.0846, -4.9895,
        -3.1690, -2.4593, -2.0335, -6.4441], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7439, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0867,  -6.1833,  -5.0815,  -0.1522,  -2.7692,  -3.0950,  -2.3557,
         -0.6969,  -7.2079,  -0.8573,  -3.9097,  -3.4792, -23.6192,  -5.5290,
         -8.7055,  -2.3695,  -7.3475,  -0.5523, -19.1399,  -4.0753],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5606, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.9808,  -1.4647,  -6.5049,  -5.8363,  -1.0591,  -4.2021,  -2.6423,
        -14.0199,  -7.9508,  -9.4365,  -8.4097,  -3.0181,  -6.6503,  -1.2126,
         -8.5313,  -3.9219,  -2.4280,  -2.3053,  -6.5405,  -4.2091],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2662, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3877, -4.9011, -3.9418, -5.6459, -3.8124, -3.0928, -4.7547, -2.7973,
        -1.8827, -3.5593, -5.4668, -0.5181, -6.9715, -3.4439, -1.3263, -1.8983,
        -5.3463, -1.4321, -3.8915, -3.3423], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5206, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.9955,  -1.9642,  -5.2827,  -3.5905, -25.2640,  -5.4639,  -9.0179,
         -6.8357,  -7.6376,  -2.8470,  -7.1767,  -0.5397,  -6.1444,  -4.3322,
         -2.1085,  -1.7832,  -6.5201,  -1.3712,  -6.6043,  -3.8994],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7189, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.6155,  -4.0358,  -3.8594, -11.4821,  -6.2414,  -6.2251,  -7.1877,
         -5.3811,  -7.5954,  -1.3064,  -5.4948,  -4.4697,  -2.9795,  -2.1397,
         -6.4012,  -5.2354,  -1.3582,  -6.2111,  -2.0088,  -2.2377],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7233, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8112, -2.9739, -2.6712, -4.5363, -3.6828, -4.1820, -5.8224, -5.9697,
        -5.6629, -5.1312, -3.7080, -1.4849, -4.7857, -5.2863, -3.2930, -3.9954,
        -3.5680, -4.2882, -6.3407, -5.3339], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3764, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4858, -1.3732, -2.0823, -6.3954, -5.7843, -1.4600, -2.9275, -1.7812,
        -2.4037, -4.1926, -4.9186, -0.8910, -2.9888, -2.5882, -2.0523, -5.1455,
        -4.7440, -1.4151, -5.2768, -2.5980], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3752, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2773, -3.5667, -3.3696, -2.3555, -3.9444, -5.3041, -0.5093, -6.7035,
        -2.4858, -2.2664, -3.3151, -6.9732, -0.8463, -6.5974, -3.0459, -2.3989,
        -2.2027, -6.5379, -0.7296, -3.0896], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3760, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4318, -4.9318, -8.5349, -2.2484, -5.4208, -3.2746, -4.4447, -6.9038,
        -6.0590, -2.6023, -5.5468, -2.8756, -4.1469, -7.4584, -5.7472, -1.7357,
        -5.0097, -3.0985, -3.1774, -2.4830], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5066, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.4385, -6.2980, -4.1957, -1.4393, -3.4099, -3.3811, -4.3296, -1.5345,
        -5.1574, -4.6814, -1.3829, -4.3755, -1.9955, -3.3083, -1.7400, -5.2008,
        -1.6206, -2.6358, -3.9618, -2.8075], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1947, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0027, -5.1257, -3.4641, -2.3489, -3.1379, -5.1316,  0.0709, -3.5146,
        -3.1248, -3.9284, -1.5009, -6.9125, -4.5358, -0.5408, -3.6749, -2.1698,
        -4.4812, -3.2967, -7.0297, -5.8663], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5358, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3659, -6.0016, -5.4907, -4.7958, -3.0804, -2.6926, -5.2348, -4.3624,
        -0.2470, -5.3091, -3.6768, -3.2396, -2.4301, -5.1033, -0.8286, -4.5062,
        -2.1986, -5.4061, -6.6498, -6.4852], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2552, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.4128,  -7.0130,  -3.5932, -11.1624,  -5.8187,  -2.7378,  -9.2420,
         -5.7550,  -1.1546,  -9.1040,  -5.6564,  -8.1143,  -6.9540,  -7.3212,
         -7.5521,  -6.7196,  -9.7835,  -8.0915,  -6.6312,  -3.7570],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5287, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0678, -6.7095, -7.7029, -6.7852, -6.7033, -4.3199, -4.7523, -0.4557,
        -4.4466, -5.4208, -3.2577, -4.7819, -5.5309, -1.1069, -2.4303, -3.8585,
        -4.4141, -1.8732, -6.5907, -4.0903], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5649, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5781, -4.9207, -1.9478, -2.8586, -4.3025, -5.4936, -1.5384, -6.1485,
        -3.3808, -2.1218, -3.9056, -6.3245, -2.9120, -9.7762, -2.8179, -4.9647,
        -1.8198, -6.8893, -3.9721, -1.3924], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9533, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1749, -4.2601, -3.2733, -2.0734, -5.2598, -4.0656, -0.6828, -5.9348,
        -2.0794, -2.2108, -2.6339, -6.7421, -1.9527, -6.8408, -2.8725, -2.6793,
        -2.6063, -6.7575, -3.6777, -1.6454], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7711, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.0915, -15.8900,  -7.1934,  -7.6380,  -6.8809,  -2.4537,  -7.5813,
         -3.7247,  -7.6307,  -3.9763,  -2.7683,  -4.3415,  -5.7218,  -3.4495,
        -12.7890,  -4.6035, -12.7633,  -4.3031,  -5.7863,  -7.5638],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.6075, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8111, -0.6496, -5.3333, -5.2321, -1.8404, -1.8815, -7.0738, -5.8354,
        -4.2023, -4.3596, -4.9222, -2.2968, -4.5580, -2.9322, -1.4776, -5.8470,
        -4.9915, -1.7858, -3.4582, -1.6069], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8548, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7854, -5.5875, -2.0524, -5.7368, -4.5181, -3.6272, -0.8965, -6.1214,
        -0.9145, -3.1653, -3.2670, -3.3241, -3.1723, -3.9373, -6.0986, -2.1442,
        -4.1674, -3.6710, -3.5255, -2.9849], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6849, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9144, -5.5577, -2.2724, -1.7752, -5.9346, -4.8396,  0.0497, -6.3222,
        -2.4127, -1.9005, -4.0295, -4.0075, -1.4797, -2.8844, -2.8031, -2.6969,
        -1.3831, -6.2368, -4.4410, -1.7516], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2297, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.2581,  -5.2657,  -0.9256,  -4.4653,  -2.4341,  -3.9562,  -4.8953,
         -5.7248,  -0.9765,  -4.0455,  -1.5607,  -2.9466,  -3.9214,  -6.9997,
         -6.2344,  -3.1342,  -6.3659,  -4.7445,  -2.5249, -10.1119],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2746, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5658, -5.8549, -3.5402, -1.6948, -3.2598, -4.9370, -0.1819, -7.2176,
        -5.0853, -2.8958, -1.7992, -6.4922, -3.8205, -0.7288, -5.8887, -2.0467,
        -1.5735, -1.7604, -5.4200, -0.5274], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3145, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.3885,  -5.8798,  -1.2492,  -3.0622,  -2.9074,  -2.6572,  -0.9827,
         -6.4510,  -0.4606,  -3.0533,  -2.8138,  -5.3768,  -2.0536,  -5.9329,
         -5.1871,  -3.8050,  -3.7230,  -5.2474, -10.1988, -12.9015],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5166, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6535, -7.4740, -8.2222, -6.2380, -8.3355, -3.2548, -6.2725, -2.4563,
        -3.8741, -3.0969, -2.7764, -2.0276, -6.1479,  0.0565, -5.5264, -2.7342,
        -4.7744, -6.3949, -6.1766, -2.8340], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6607, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.9178,  -0.6627,  -9.1287,  -4.6726,  -1.3049,  -4.2677,  -5.3784,
         -5.8761,  -2.4977,  -5.6601,  -3.6918,  -0.9845, -15.4811,  -5.0016,
         -6.4976,  -6.8470,  -4.9083, -21.6211,  -8.1005,  -7.8640],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2682, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0736, -2.0505, -2.1412, -6.5454, -4.0761, -1.5411, -2.6747, -3.2064,
        -2.7293, -5.8915, -5.9777, -1.5578, -9.3312, -2.9862, -4.1245, -2.3437,
        -6.8058, -4.8528, -1.6575, -3.4245], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8496, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6453, -1.2682, -4.2235, -4.0484, -4.8535, -5.3663, -7.3335, -2.6971,
        -4.8680, -2.4754, -3.7791, -3.8861, -5.9666, -1.3035, -4.5718, -1.9596,
        -3.3840, -0.4043, -6.1247, -4.4972], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8828, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.1536,  -1.0548,  -8.5205,  -5.4272, -21.8879,  -7.8060,  -8.4376,
         -7.6619,  -3.2110,  -6.2647,  -0.2318, -10.5242,  -3.7381,  -4.6769,
         -6.9516,  -7.6175,  -0.8544,  -6.7532,  -3.7042,  -2.0678],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1272, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.0541,  -5.2756, -28.4276,  -5.7551,  -7.0099,  -6.0991,  -8.6064,
         -5.4029,  -7.5854,  -2.8843,  -2.0432,  -4.6666,  -2.2029,  -1.5743,
         -3.5898,  -5.0605,  -1.2430,  -4.9698,  -2.6102,  -3.3321],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8196, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3706, -3.9766, -4.5640, -0.2286, -3.3287, -2.6553, -4.7400, -0.9871,
        -5.1965, -4.2604, -1.2953, -3.5918, -2.9612, -3.2244, -2.4308, -6.4232,
        -4.0684, -1.8328, -4.0406, -2.4508], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2314, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7600, -3.0520, -2.6120, -5.9158, -4.6676, -1.6685, -3.2459, -2.2981,
        -1.3266, -5.0399, -5.2447, -0.8627, -2.8023, -2.0541, -3.1635, -1.9383,
        -5.9031, -4.2362, -2.1958, -3.2214], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2104, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0677, -2.5394, -3.1243, -3.9104, -5.8491, -1.4436, -5.3459, -3.6145,
        -5.3477, -0.9017, -5.3530, -4.1239, -1.0435, -4.8765, -2.6141, -2.3978,
        -5.5730, -5.3398, -1.3895, -4.3938], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6625, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.0371,  -6.2912,  -8.5946,  -3.6208,  -5.0832,  -9.1877,  -1.4409,
         -7.9626,  -2.6299,  -3.4099,  -3.5945,  -5.0820,  -0.6262,  -2.3428,
         -5.5632,  -8.5448, -13.5811,  -8.5219,  -6.6411,  -6.2359],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5996, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.4471, -6.0556, -3.3754, -2.1721, -6.7377, -5.1540, -0.5147, -6.8947,
        -2.5341, -3.4507, -4.8551, -6.0277, -4.9303, -1.4017, -4.8664, -3.4145,
        -3.4866, -1.0246, -5.8265, -4.2482], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8709, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.7092,  -5.0645,  -0.6982,  -3.7783,  -2.1783,  -2.6942,  -2.6436,
         -5.1065,   0.1675,  -4.7430,  -5.1142,  -5.0178,  -6.7285,  -6.7643,
         -6.9933,  -6.4140,  -3.6881,  -5.6608,  -5.4474, -15.9897],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0633, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1239, -3.4031, -3.0039, -2.4808, -5.6048, -5.5929, -5.0054, -3.6350,
        -4.0856, -1.4294, -5.2677, -5.4181, -1.9749, -4.7727, -2.0100, -3.9984,
        -3.4291, -6.0273, -3.9084, -1.0260], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8099, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.0256,  -3.4168,  -1.9621,  -1.1927,  -6.6061,  -4.8757,  -0.1777,
         -5.9534,  -2.5463,  -3.0271,  -2.8745,  -5.2299,  -0.7227,  -7.4125,
         -2.0336, -12.4153,  -7.7852,  -7.3113,  -7.8425,  -3.0799],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4245, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.9849,  -5.9091,  -2.4104, -15.7160,  -6.7499,  -6.4117,  -8.2616,
         -2.2541,  -8.4610,  -0.5760,  -8.7934,  -3.8247,  -2.6553,  -1.6500,
         -6.0768,  -4.4450,  -0.6205,  -3.4932,  -3.2090, -25.5194],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9011, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7062, -0.4536, -4.3744, -3.0011, -3.5131, -2.0036, -6.0627, -3.6407,
        -1.1875, -3.4507, -3.4081, -3.2792, -4.9820, -5.5227, -4.1074, -1.5421,
        -4.8191, -1.8282, -2.5086, -6.9435], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5667, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.6485,  -4.2726,  -2.5457, -12.0391,  -6.8327, -20.0851,  -5.9614,
         -7.3708,  -3.8659,  -3.5876,  -5.1493,  -1.8539,  -2.5996,  -4.0133,
         -4.3670,  -1.3513,  -5.5847,  -4.2469,  -0.8117,  -4.9180],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6053, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3947,  -3.3798,  -5.7332,  -2.6474,  -2.5997,  -3.7710,  -3.8863,
        -11.8787,  -6.4546,  -3.0258,  -5.8436, -16.4087,  -7.4615,  -6.6771,
         -6.3507,  -8.1396,  -2.6577,  -7.3074,  -1.1543,  -5.4902],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7131, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1272,  -4.0047,  -2.7266,  -3.4713,  -5.6016,  -6.0908,  -5.1213,
         -3.5734,  -4.0006,  -6.1319,  -3.8765,  -2.4715,  -4.8376,  -6.8750,
         -3.5133, -12.1127,  -8.0119,  -8.8432,  -6.6343, -11.2014],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6113, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7571, -0.9794, -2.4860, -5.6042, -0.2361, -3.9881, -4.1105, -3.6687,
        -2.2499, -5.7047, -5.0831, -2.5642, -4.5336, -2.8220, -1.7276, -5.0631,
        -4.9127, -1.0244, -3.2118, -2.4869], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2607, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.2376,  -3.2054,  -4.5266,  -4.8998,  -5.8895,  -1.2383,  -3.7104,
         -2.9152,  -3.3147,  -3.9036,  -5.7621,  -0.5130,  -4.8211,  -3.2142,
        -12.4231,  -5.0289,  -9.9752,  -2.9668,  -8.2121,  -1.0937],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6426, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.1598,  -1.4616,  -2.3398,  -2.8538, -15.2000, -10.2018,  -2.8270,
         -4.3902,  -2.9816,  -4.8558,  -5.1574,  -5.7494,  -4.4529,  -4.4979,
         -5.2069,  -2.4333,  -9.3184,  -5.8321,  -2.4813,  -5.1739],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1288, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.6098,  -3.7127,  -2.2253,  -3.8471,  -6.0601,  -1.7598,  -2.9526,
         -3.8590,  -3.3585, -15.0256,  -5.6192,  -5.9164,  -4.9198,  -2.2852,
         -9.2568,  -5.1425,  -7.3713,  -2.0904,  -6.6513,  -1.1734],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8918, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7602, -4.0523, -1.7533, -3.1522, -2.5953, -2.1864, -4.1643, -5.2007,
        -0.6585, -6.5059, -2.1378, -3.0721, -3.0917, -6.0321, -4.8075, -1.6637,
        -3.5290, -3.2650, -1.3094, -4.5249], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5231, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6145, -2.0543, -3.6330, -7.3198, -5.4974, -1.0780, -9.7063, -4.7064,
        -1.8421, -6.9940, -5.0608, -1.3049, -3.9794, -2.0664, -2.6844, -9.8609,
        -5.8147, -0.5940, -5.6107, -2.0905], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4256, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7270, -3.4244, -3.8699, -4.7984, -5.1439, -5.3915, -2.1050, -2.5252,
        -2.9372, -3.2279, -1.6774, -5.8807,  0.1755, -4.0602, -3.2118, -2.6014,
        -2.6975, -5.2934, -6.0928, -2.5633], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4527, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5838, -6.5466, -5.1448, -6.4082, -6.3725, -2.2046, -8.5519, -3.4142,
        -3.1727, -5.7468, -5.4295, -2.0360, -5.7452, -2.6978, -2.0693, -6.8406,
        -5.3493, -1.1375, -8.1395, -1.6754], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7633, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2566, -8.5093, -3.1226, -5.5905, -5.7163, -1.5163, -6.3168, -2.7238,
        -4.2527, -4.8923, -6.4254, -4.4543, -2.0294, -3.9390, -2.1583, -2.3193,
        -6.7388, -4.8385, -0.7917, -3.4231], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2008, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8788, -5.1525, -0.9522, -2.6236, -3.4629, -2.6631, -5.8619, -7.9859,
        -0.8467, -3.6578, -2.7183, -2.7913, -0.8662, -5.4879, -4.1853, -1.9047,
        -4.3471, -2.1142, -3.0032, -2.4377], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4471, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.6543,  -4.9411,  -1.1679,  -4.0965,  -2.9569,  -4.7650,  -5.2668,
         -6.5978,  -2.9597,  -3.5049,  -3.2618,  -4.1465,  -6.6268,  -5.5820,
         -1.5155, -11.6969,  -3.0944,  -3.9835,  -6.4420,  -5.0726],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6166, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.5139, -3.2199, -3.4590, -2.2887, -4.6330, -5.8661, -1.7323, -3.1398,
        -4.0189, -4.8450, -2.1232, -6.2800, -0.7308, -5.8523, -2.5129, -4.1059,
        -0.1647, -5.9823, -3.7601, -1.2815], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3255, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.3467,  -2.9158,  -6.8986,  -6.1555,  -2.3423,  -7.4907,  -4.4769,
         -2.6044,  -3.4708,  -5.4310,  -1.4372,  -4.6238,  -2.4388,  -2.5436,
         -2.3423,  -6.9758,  -5.5550,  -3.0491, -10.4393,  -3.2167],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4377, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7646, -2.6586, -1.9162, -7.6128, -5.0467, -1.1297, -7.6802, -1.6532,
        -3.3693, -2.7535, -5.3208, -1.0814, -2.8953, -3.9595, -2.7603, -3.6707,
        -5.5995, -1.0906, -4.7606, -2.7894], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6257, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2238, -1.8703, -2.8019, -3.8339, -5.1283, -1.0669, -7.0766, -1.9014,
        -8.9932, -6.7112, -8.5603, -8.2917, -2.9429, -5.9325, -2.0177, -6.3739,
        -4.5241, -2.3771, -5.9791, -5.5852], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7596, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.4418,  -2.2832,  -7.2082,  -0.7738,  -6.6683,  -3.0985,  -2.6792,
         -2.9872,  -5.4030,  -1.0209,  -7.4674,  -2.6219,  -2.0658,  -2.8954,
         -6.8365,  -5.4817,  -3.3089, -11.1885,  -2.1429,  -3.5332],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4053, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6237, -7.4815, -8.5528, -7.0850, -5.0260, -5.6799, -6.2539, -0.3377,
        -4.5448, -2.5887, -2.6180, -2.4956, -6.0430, -5.1503, -2.4353, -6.5058,
        -2.7752, -3.9654, -1.7931, -7.5404], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7248, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1432, -7.0455, -3.4206, -6.5612, -0.2052, -4.7996, -3.6613, -4.2296,
        -3.9250, -6.7175, -6.0857, -3.5761, -3.8238, -2.6982, -3.0271, -4.0720,
        -5.0709, -1.1568, -2.4182, -2.1987], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9918, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.3071,  -0.6522,  -6.2730,  -4.2833, -18.8140,  -6.2542,  -8.3250,
         -4.9637,  -6.1119,  -4.8549,  -0.7959,  -5.0305,  -3.1352,  -3.8586,
         -0.2927,  -5.6888,  -3.6592,  -3.1088,  -4.6308,  -2.3313],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9186, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8617, -4.7620, -0.7208, -5.1236, -3.1989, -2.3581, -2.4317, -6.5150,
        -3.3842, -2.2350, -6.1281, -1.0455, -3.1206, -3.0242, -5.5223, -0.7523,
        -3.7996, -4.5006, -3.2607, -2.6836], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5214, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.0508,  -1.5931,  -6.1938,  -4.3620,  -0.7890,  -4.4136,  -2.5868,
         -0.7742,  -6.7759,  -5.0691,  -1.7632,  -5.0986,  -3.3493, -13.4565,
         -7.9936,  -7.4971,  -6.7288,  -1.9781,  -7.3162,  -0.8381],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5314, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.1357,  -3.8007,  -1.5743,  -9.5319,  -2.9957,  -4.0037, -15.1718,
         -4.0875,  -3.4943,  -5.0287, -10.0584, -18.4114,  -8.8936,  -8.4210,
         -2.7608,  -7.1284,  -1.1638, -18.2028,  -2.7849,  -4.7983],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.9224, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5260, -4.5951, -4.9647, -0.1601, -2.4493, -4.2830, -6.3349, -0.5479,
        -6.6153, -4.4628, -1.1731, -5.4921, -3.0978, -3.9853, -3.1322, -6.2477,
        -5.1618, -2.3409, -2.8354, -2.3216], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8863, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.1153,  -4.6058,  -5.4749,  -0.8541,  -4.7916,  -3.0507, -11.7351,
         -6.8562,  -7.6832,  -7.4491,  -3.8486,  -5.4761,  -2.5705,  -4.2575,
         -4.1580,  -3.2345,  -4.3244,  -5.7594,  -1.5929,  -8.2623],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0050, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7906, -3.8066, -1.8774, -2.8775, -6.4826, -4.5050, -1.6184, -2.6079,
        -2.1696, -1.0892, -5.3867, -5.0829, -0.4930, -4.1219, -2.7883, -1.7413,
        -1.4178, -6.0615, -0.7337, -3.6280], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2140, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1998,  -6.6688,  -5.0161,  -3.6422,  -5.0973,  -4.1718, -13.8103,
         -6.2888,  -7.7895,  -7.0956,  -1.1586,  -4.9232,  -4.3142,  -2.2613,
         -5.1999,  -1.3110,  -1.7138,  -6.2061,  -5.0217,  -1.9998],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8445, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4182, -4.9340, -8.3700, -6.8477, -4.2047, -7.0357, -4.4295, -9.0991,
        -5.5006, -7.9757, -4.2185, -6.1891, -3.2146, -4.8639, -4.3690, -4.6352,
        -5.5211, -7.1322, -1.7459, -5.3734], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5539, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1915, -2.1615, -5.8100, -3.7881, -2.3552, -2.9032, -1.8485, -2.0292,
        -6.0788, -4.8061, -1.2219, -4.5372, -2.4053, -2.1897, -3.3879, -5.1576,
        -0.2186, -4.9910, -2.3976, -1.6451], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1062, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-24.7267,  -2.8817,  -7.7735,  -1.7586,  -5.6784,  -0.3652,  -4.8652,
         -4.0718,  -5.5438,  -0.2956,  -6.4569,  -4.5272,  -0.5683,  -5.4311,
         -2.2563,  -2.3366,  -2.7941,  -6.1822,  -1.0608,  -5.9643],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7769, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.8146,  -5.3762,  -3.5504,  -3.3876,  -2.6571,  -5.8269,  -4.7562,
         -1.9957,  -2.3218,  -3.5179,  -2.8771,  -6.8789,  -5.1839,  -1.8536,
         -7.0675,  -1.8405, -13.7072,  -6.8507, -10.5907,  -7.3885],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9721, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8635, -2.9633, -6.0497, -5.0679, -1.7199, -4.0275, -1.9453, -6.1934,
        -6.0344, -3.4514, -4.3192, -2.2445, -2.6793, -2.5840, -6.6187, -4.2194,
        -1.6278, -2.6270, -2.7883, -0.6126], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4319, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.8948,  -8.1540,  -9.0401,  -6.3137,  -3.5221, -12.8925,  -5.8826,
         -2.0744,  -0.6826,  -6.6180,  -4.9230,  -1.5208,  -3.4760,  -2.3200,
         -1.9463,  -3.2953,  -4.9579,  -0.6930,  -9.4355,  -2.2839],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8463, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0415, -5.9903, -2.2669, -2.2282, -5.3254, -4.5640, -0.8107, -3.6518,
        -1.8583, -2.4138, -3.5569, -5.2282, -1.2172, -4.5302, -2.7581, -2.0731,
        -4.8360, -5.2256, -0.8636, -3.4290], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2434, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.4423,  -5.4227,  -6.4292,  -1.0987,  -4.9594,  -4.9361,  -7.5250,
         -3.8487,  -4.3660,  -5.6820,  -5.5301,  -2.6096,  -5.3283,  -3.5115,
         -2.0313,  -6.4363,  -5.4647,  -1.1776,  -5.5992,  -3.4431],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8421, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.1941,  -4.1917, -22.7121, -10.1079, -11.5571,  -2.3402,  -4.8568,
         -0.6629,  -3.8071,  -3.2775,  -4.2140,  -1.8243,  -5.6722,  -4.2938,
         -0.9386,  -5.2542,  -3.7879,  -2.0961,  -2.1412,  -5.9134],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0422, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9918, -5.1769, -1.4511, -9.9131, -4.0362, -9.9682, -6.7700, -7.1648,
        -9.9514, -2.1052, -8.8853, -0.9507, -9.8383, -3.6108, -6.2009, -0.8351,
        -4.4248, -5.3241, -0.0849, -3.8864], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.3785, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.1342,  -4.3640,  -1.2695,  -3.4716,  -3.8099,  -5.9507,  -2.6435,
         -6.5855,  -4.9207,  -1.4919,  -7.8075,  -4.3137,  -1.2129, -11.5558,
         -3.8352,  -2.6384,  -3.4848,  -9.6475, -10.5784,  -7.9884],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9352, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3501, -8.6607, -8.5343, -8.8551, -7.9143, -6.4880, -9.3639, -2.5769,
        -1.7193, -5.3621, -4.8114, -2.4094, -2.8653, -5.1313, -0.8195, -1.7910,
        -2.5658, -2.7070, -3.8294, -5.4141], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7084, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.8136,  -3.2939,  -2.2564,  -1.5557,  -7.2904,  -0.7309,  -8.0396,
         -3.1008,  -1.6002,  -2.7885,  -6.4931,  -0.4086,  -1.8537,  -5.8218,
        -19.1173, -12.0728,  -6.6585,  -7.2716,  -6.3280,  -8.8330],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4164, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.6882,  -6.7491,  -2.8673,  -6.7344,  -0.4831,  -3.1236,  -3.3592,
         -3.1578,  -3.9011,  -6.2542,  -3.2721,  -4.7795,  -2.6120, -15.2474,
         -6.6751,  -8.4492,  -2.7089,  -5.0214,  -4.8733,  -2.5909],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0274, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7655e+00, -4.6693e+00, -1.0302e+00, -4.0035e+00, -4.9351e+00,
        -4.4778e+00, -2.6980e+00, -5.1510e+00, -4.8804e+00, -1.7107e+00,
        -3.5186e+00, -2.0880e+00, -4.0015e+00, -1.9250e+00, -5.1837e+00,
        -2.4144e-03, -2.8909e+00, -3.0297e+00, -4.7719e+00, -2.7884e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5261, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2814, -3.5243, -1.0187, -6.0388, -3.0353, -2.2965, -6.7950, -2.3864,
        -2.0181, -3.4155, -5.8610, -0.0961, -5.4644, -2.1134, -3.8247, -1.8418,
        -5.0282, -4.3119, -1.7063, -2.5628], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2310, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.2489,  -5.2275,  -0.5213,  -8.5818,  -3.3618,  -2.8159,  -3.3197,
         -6.0425,  -3.3871,  -3.4617,  -3.4026, -11.4388,  -7.9025,  -4.7822,
         -6.6233,  -2.5969,  -6.7957,  -1.1698,  -8.7496,  -2.3229],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7376, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2790, -1.9309, -4.3229, -4.4063, -5.6119, -1.3580, -1.8491, -2.7318,
        -2.1243, -4.9233, -5.7169, -1.3782, -3.1607, -2.6726, -1.8365, -1.4601,
        -6.8679, -5.2171, -2.8242, -5.3740], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5523, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8204, -3.7968, -2.8487, -3.0404, -5.0450, -4.7915, -1.9332, -4.8516,
        -2.4486, -2.0183, -4.2299, -5.3534, -0.4960, -2.0803, -3.2284, -3.0534,
        -6.0569, -5.4347, -0.4145, -5.6918], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4317, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.4538,  -6.7197,  -0.6987,  -5.6151,  -4.2918,  -1.5503,  -5.9284,
         -6.0038,  -1.3414,  -3.8551,  -1.8050, -11.6633,  -7.9307,  -8.4329,
         -8.0782,  -2.9856,  -6.1581,  -0.4824,  -5.3761,  -3.5785],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6975, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1312, -3.0607, -2.7142, -2.5538, -6.5720, -4.4553, -1.7275, -4.6761,
        -2.5715, -3.5149, -1.2693, -5.9399, -3.6856, -0.5580, -2.7225, -2.6998,
        -1.7117, -6.2270, -4.0048, -1.4122], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1604, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.1003,  -5.3740,  -1.0635,  -5.5867,  -2.7790, -12.7895,  -5.9331,
         -6.7067,  -5.8002,  -7.2634,  -2.5791, -12.2980,  -1.6502,  -5.4778,
         -4.6776,  -6.0065,  -4.1016,  -3.6755,  -5.1640,  -5.3428],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4685, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.7545,  -4.6836,  -3.0077,  -2.5462,  -4.3926,  -6.1457,  -0.4730,
         -2.8348,  -2.8909,  -3.6231,  -2.1258,  -7.2350,  -5.0056,  -1.0518,
         -3.2936,  -4.3005, -29.2146, -15.1353,  -7.7457,  -7.7865],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7123, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-20.3596,  -6.9953,  -9.8460,  -7.6261,  -7.0115,  -2.5556,  -5.1542,
         -3.1734,  -1.5622,  -5.5058,  -3.8587,  -3.0612,  -2.4314,  -6.1345,
         -4.0137,  -2.1715,  -4.0436,  -2.4311,  -1.1261,  -4.9786],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2020, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.8140,  -7.0869,  -5.6294,  -3.4031,  -3.5810,  -2.7623,  -2.6312,
         -8.0471,  -5.3895,  -2.5485,  -9.0744,  -3.5074, -16.8992,  -5.3686,
         -8.3253,  -6.3378,  -7.1575,  -5.7926,  -4.6942,  -0.7691],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6410, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1703, -1.1741, -4.3037, -2.6247, -3.5495, -5.9897, -6.5648, -2.3192,
        -5.5421, -2.9721, -2.4537, -6.8333, -5.3382, -5.0101, -5.3617, -2.1242,
        -5.9588, -3.7433, -6.9471, -3.8174], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3899, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.3670,  -4.3176,  -2.9673,  -5.7082,  -5.4894,  -2.9596, -18.9477,
         -2.0176,  -5.1809,  -3.0861,  -5.0420,   0.4250,  -4.1627,  -3.2510,
         -4.9126,  -8.9120,  -6.2646,  -8.4885,  -2.0448,  -7.1196],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2407, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.0665,  -2.7835,  -6.6925,  -1.3357,  -4.1393,  -4.8724,  -2.6956,
        -11.6446,  -5.4643,  -3.9714,  -4.9363,  -1.8831, -20.7927,  -4.0348,
         -7.6974,  -1.6586,  -5.0772,  -5.0494,  -0.8636,  -5.0407],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4350, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7678, -5.8655, -5.2295, -4.0271, -0.7121, -4.1690, -2.7404, -3.1146,
        -2.4126, -6.8279, -3.9516, -1.3693, -2.8907, -2.3443, -3.2581, -3.6650,
        -5.9041,  0.1320, -4.1938, -3.1171], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5714, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1210, -1.3794, -5.9420, -4.2780, -5.2145, -4.7098, -3.4428, -2.7252,
        -1.5715, -5.8763, -0.5220, -4.7795, -4.0737, -1.7954, -6.3146, -5.4079,
        -1.2367, -6.4289, -3.4108, -2.3166], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7273, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.8200,  -2.4407,  -3.0496,  -5.6172,  -1.4645,  -1.7168,  -4.0738,
         -7.7669,  -9.5165,  -5.2571,  -8.8716,  -2.7812,  -6.6137,  -1.6260,
        -12.5087,  -3.3573,  -2.6577,  -7.5760,  -5.3285,  -0.9215],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7483, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.6924,  -5.3798,  -2.5214,  -2.8614,  -5.0036,  -5.3234,  -2.2934,
         -4.9800,  -2.8592,  -3.5599,  -2.3307,  -6.4270,  -6.2655,  -3.6444,
         -8.5166,  -4.2521, -10.2207,  -5.7850,  -6.0272,  -4.6108],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7277, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0697, -3.7935, -1.7272, -3.4977, -1.5407, -7.1717, -4.7593, -2.8318,
        -2.2524, -2.7138, -1.1625, -3.9680, -5.2371, -2.0526, -5.9712, -2.8706,
        -1.6122, -3.2255, -6.3014, -5.8911], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4825, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.1645,  -0.2542,  -8.7134,  -5.9415,  -3.7660,  -7.3222,  -5.4750,
         -1.6660, -16.4090,  -4.2015,  -0.9280,  -4.5961,  -5.3796,  -1.8833,
         -7.5549,  -4.1994,  -4.1952,  -2.7882,  -5.1459,  -5.2090],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0396, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1338, -1.5728, -3.3723, -1.8390, -1.7255, -3.5517, -4.9929, -0.9060,
        -3.6302, -2.9664, -3.3545, -3.5228, -4.9850, -1.1060, -3.3732, -3.9302,
        -3.0203, -6.4628, -4.3363, -1.2062], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2494, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1953, -4.9590, -4.3295, -3.9931, -6.8260, -6.0164, -2.1141, -5.2326,
        -3.9478, -3.1914, -4.2925, -5.8207, -1.7198, -4.7805, -3.2275, -4.5296,
        -7.0401, -5.7337, -4.7256, -6.1396], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6407, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5862, -9.0502, -4.9184, -6.9275, -1.5148, -5.5841, -4.4166, -9.3031,
        -3.7866, -6.1027, -5.9244, -3.2959, -5.8568, -2.8810, -1.4735, -3.6433,
        -4.6948, -0.6747, -1.4790, -3.1716], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5142, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-12.7263,  -7.1867,  -6.7240,  -7.2762,  -4.9009,  -1.0097,  -9.4192,
         -6.0408, -10.5123, -13.8043, -18.9599,  -9.9501,  -5.8022,  -1.3922,
         -2.7778,  -4.9062,  -8.2137,  -6.6227,  -7.9988,  -7.8659],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.7045, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0611, -1.0237, -6.5769, -3.3195, -1.5116, -3.6735, -2.8063, -2.8343,
        -1.8584, -5.7099, -4.6160, -1.5407, -5.6870, -1.6612, -1.1428, -4.1012,
        -4.9332, -0.5140, -4.3249, -3.6193], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2258, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.5154,  -0.9085,  -6.7801,  -2.4747,  -2.8140,  -1.6716,  -6.4072,
         -4.0422,  -2.6685,  -4.8899,  -3.6370,  -6.3578, -22.9238,  -4.8923,
        -12.9576,  -4.3587, -18.2081,  -7.8873,  -8.2687,  -5.8753],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.7269, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8549, -0.7959, -5.0305, -3.1352, -3.8586, -0.2927, -5.6888, -3.6592,
        -3.1088, -4.6308, -2.3313, -6.8450, -4.3104, -7.1270, -1.1505, -6.2767,
        -2.7274, -4.1840, -2.2489, -6.2186], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9238, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1312, -3.0607, -2.7142, -2.5538, -6.5720, -4.4553, -1.7275, -4.6761,
        -2.5715, -3.5149, -1.2693, -5.9399, -3.6856, -0.5580, -2.7225, -2.6998,
        -1.7117, -6.2270, -4.0048, -1.4122], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1604, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3860, -6.0830, -0.2573, -4.1642, -3.7085, -4.5737, -4.0007, -4.6574,
        -5.0022, -0.2033, -5.6859, -3.8526, -4.6674, -3.3060, -4.7689, -5.6740,
        -2.0655, -2.4668, -3.4805, -1.2696], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6137, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7582, -1.4484, -6.9039, -0.0433, -3.5718, -3.8655, -3.6984, -1.7420,
        -5.2026, -3.7261, -0.4211, -3.5624, -2.5945, -2.8847, -0.7679, -6.6384,
        -3.3451, -1.8501, -3.7583, -3.1811], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3482, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-12.1201,  -3.7212,  -6.4493,  -5.2826,  -3.3367,  -5.8252,  -4.7239,
         -0.7258,  -5.3350,  -3.1326,  -3.6055,  -1.3991,  -6.8026,  -5.0779,
         -3.0003,  -4.4362,  -3.1034,  -4.9627,  -7.2352,  -5.0400],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7658, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5161, -4.4745, -4.0717, -6.0073, -4.8844, -8.0018, -5.9738, -6.1415,
        -6.5364, -5.5125, -4.9251, -4.3378, -4.5080, -4.6131, -5.6881, -8.1161,
        -3.6489, -4.6424, -3.4621, -4.5600], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1811, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7585,  -4.1563,  -6.0583,  -1.1980,  -8.2914,  -2.9898, -16.0708,
         -2.4661,  -7.4337,  -1.5019,  -6.2160,  -1.3412, -10.0592,  -3.1325,
         -2.2387,  -7.2769,  -5.5852,  -1.9010,  -8.0350,  -8.1515],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3431, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3927, -2.2579, -4.6674, -1.3376, -1.9437, -2.9271, -5.3753, -0.3475,
        -4.0412, -2.8953, -4.0020, -2.0754, -6.0852, -4.3947, -2.0322, -9.1053,
        -3.2459, -2.4795, -6.8092, -5.3456], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7380, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.1310, -5.2140, -2.1069, -6.5919, -3.6980, -5.8593, -4.2683, -7.1187,
        -4.0409, -3.5495, -3.0242, -3.2923, -2.8386, -0.2406, -6.6656, -1.6266,
        -5.3991, -2.6791, -3.7915, -2.1189], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1128, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.2909,  -2.3570,  -6.7463,  -1.5815, -10.2181,  -3.5662,  -3.0560,
         -2.5740,  -4.2846,  -5.1126,  -0.5584,  -4.6681,  -1.8264,  -2.2028,
         -6.5016,  -5.1885,  -1.1399,  -3.3362,  -1.8155,  -3.5638],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9794, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3499, -6.1035, -4.6874, -0.6390, -3.9390, -2.7913, -2.4862, -3.5132,
        -5.6808, -3.4823, -1.2203, -2.5088, -2.2577, -2.4348, -4.5418, -5.0755,
        -1.1128, -4.0503, -2.7417, -3.0120], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2314, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2218, -3.1752, -5.8172, -1.4458, -3.6425, -4.5328, -2.7477, -3.1343,
        -5.8611, -4.6796, -2.1593, -7.0274, -2.2790, -3.4593, -2.6565, -5.0617,
        -0.5106, -3.0942, -2.6441, -5.0320], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5591, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8900, -2.7873, -4.7667, -2.3943, -2.1255, -3.3059, -6.2689, -5.0768,
        -2.2129, -3.4704, -2.2327, -2.5161, -6.3028, -5.9923, -3.9132, -6.6196,
        -2.0819, -1.7513, -3.9990, -5.7252], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9716, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0827, -3.6375, -1.7083, -6.2386, -4.5192, -0.9330, -2.0226, -2.4761,
        -4.2959, -4.1454, -5.5002,  0.3339, -5.5958, -3.2817, -3.9496, -0.1386,
        -5.7757, -4.0569, -1.2262, -6.4738], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4362, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9286, -1.6458, -4.9269, -3.5244, -5.9609, -3.8453, -1.5761, -3.7893,
        -2.9958, -3.1164, -3.6670, -6.4531, -2.3189, -3.9548, -3.7784, -3.3215,
        -6.0666, -5.0793, -0.5236, -6.9826], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9228, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.4330,  -2.1489,  -5.8130,  -9.1254,  -6.5221,  -9.2724,  -6.2245,
         -7.0818,  -1.8237, -16.6819,  -4.4792,  -2.4007, -11.7120,  -6.8944,
         -0.9030,  -6.5588,  -7.0955, -26.0038,  -6.5430,  -7.1562],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.3937, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.7706,  -7.2315,  -7.7555,  -3.0186,  -6.1184,  -0.8084,  -4.1224,
         -2.7178,  -3.7257,  -2.3799,  -5.5655,  -0.5823,  -5.2580,  -5.4655,
        -19.4183,  -7.3956,  -7.0751,  -7.8056,  -2.5071,  -6.3567],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6039, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6400, -3.2292, -5.4082, -1.2087, -3.9242, -3.7890, -3.9568, -3.9519,
        -5.2712, -3.5288, -1.3689, -3.7954, -3.5835, -3.4054, -3.5196, -5.8757,
        -1.3167, -3.5235, -3.2508, -3.1851], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4866, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2172, -8.4078, -2.5094, -2.3962, -6.8110, -4.9912, -1.2353, -4.1595,
        -1.8898, -7.7482, -4.0328, -6.8818, -5.3827, -3.2248, -5.8732, -4.6699,
        -1.5599, -6.0303, -5.6516, -0.7420], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2707, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.2445, -1.8784, -2.3238, -4.4600, -2.1835, -2.7323, -1.4824, -5.8080,
        -4.0977, -0.8912, -2.6940, -2.1596, -3.0901, -2.9411, -5.4968, -0.7023,
        -3.2925, -3.0987, -3.7630, -2.5726], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2456, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1386,  -3.6190,  -3.3642,  -6.5371,  -4.9630,  -1.7526,  -2.3043,
         -5.1600,  -3.4483,  -5.0616,  -5.7761,  -2.1581,  -9.0256,  -1.7854,
        -18.3569,  -5.3471,  -9.1843,  -0.9609,  -7.7253,  -0.9904],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0329, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7171,  -3.1461,  -5.2665,   0.3139,  -5.1027,  -2.4503,  -1.4346,
         -6.8619,  -5.3782,  -1.0058,  -5.4772,  -8.0016, -27.1519,  -6.1694,
         -3.3137,  -7.0268,  -6.4230,  -8.2788, -10.3004, -11.6871],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3440, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.6712,  -6.4611,  -3.5474,  -4.4896,  -4.5849,  -8.7376,  -9.6272,
         -8.2789,  -2.5394,  -6.6300,  -3.1327,  -7.2298,  -3.6002,  -4.0135,
         -4.8454,  -5.6649,  -1.8932,  -5.3844,  -3.6683, -10.5678],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6284, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.8574,  -4.2314,  -1.0532,  -6.2815,  -5.0247,  -1.6923,  -6.4673,
         -2.8291,  -2.6526,  -0.7170,  -6.9079,  -0.7700,  -5.1825,  -5.0354,
        -18.2751, -29.0631,  -8.0021,  -3.0907,  -5.2030,  -4.7792],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0058, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.4794,  -3.5430,  -2.1061,  -4.4545,  -5.2888,  -0.9153,  -2.2399,
         -3.0625,  -1.6322,  -6.1748,  -5.5095,   0.2137,  -2.4397,  -2.2530,
         -1.1972,  -1.6476,  -6.4725,  -0.5222, -11.1893,  -2.1319],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3023, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2272, -4.5734, -1.1044, -4.1233, -2.9061, -2.5828, -2.8324, -6.2478,
        -0.1954, -4.4345, -3.2680, -3.1064, -1.9224, -6.4874, -2.5489, -7.1530,
        -3.7171, -4.0948, -1.1321, -6.2756], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7467, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.3775, -17.5782,  -2.5360,  -8.7396,  -3.2973,  -5.7217,  -4.9461,
         -4.8031,  -5.6274,  -4.7159,  -9.2803,  -7.8351,  -4.5694,  -6.1164,
         -2.9446,  -3.1761,  -5.2328,  -5.2617,  -2.3361,  -6.5430],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8319, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9454, -2.5662, -4.5829, -6.1634, -5.7625, -2.9582, -3.2614, -2.9781,
        -3.0555, -1.2406, -6.3024, -4.2629, -0.9375, -5.8911, -1.7361, -2.1117,
        -6.7236, -5.1418, -1.7544, -5.6024], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8989, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.6260,  -5.5143,  -2.0245,  -2.5675,  -5.2849,  -5.3436,  -1.3734,
         -6.0675,  -3.7575, -18.6938,  -8.4299,  -9.6218,  -8.4506,  -8.8227,
         -6.1302,  -2.6205,  -7.9776,  -4.5136,  -2.6013,  -3.3715],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7396, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.8814, -20.8211, -12.2993,  -7.9105,  -7.7931,  -5.0991,  -7.5240,
         -2.2419,  -9.5398,  -1.0453,  -6.3637,  -4.9237,  -3.7880,  -3.5630,
         -5.9178,  -5.2024,  -1.8753,  -6.3975,  -1.7902,  -2.5542],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2266, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3264,  -4.8578,  -4.0356,  -8.9784,  -6.0503,  -6.1542, -14.2278,
         -4.9981,  -3.2893,  -7.1936,  -3.7778,  -2.8611,  -3.4957,  -5.6119,
         -4.4066,  -0.3480,  -4.3329,  -2.0688,  -3.5960,  -5.9446],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9277, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.1234,  -6.3098,  -8.2701,  -8.9751,  -6.4625,  -1.7501, -12.5503,
         -4.6213,  -1.2028,  -5.7498,  -5.7132,  -3.1503,  -9.2678,  -7.7613,
         -8.5362,  -5.4985,  -8.2236,  -8.1442,  -5.1192,  -1.1126],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2271, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.0658,  -3.6532,  -4.0442,  -4.2995,  -5.7236,  -4.8208,  -2.1038,
         -6.5197,  -5.0384, -10.1692,  -9.3751,  -3.4933,  -8.0697,  -2.8358,
         -6.5173,  -5.0118,  -4.9175,  -4.8479,  -2.0799,  -8.2623],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2424, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1377, -4.1503, -3.2989, -3.4151, -2.0653, -5.5117,  0.1225, -4.6830,
        -3.7831, -7.8467, -6.2051, -5.9346, -8.6155, -2.9998, -7.3822, -0.9316,
        -6.7365, -3.2561, -2.8148, -6.4213], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3533, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3839, -4.2509, -0.5550, -4.7800, -4.1020, -3.9351, -3.3592, -6.5445,
        -4.5270, -3.0771, -5.0884, -4.8181, -4.4901, -3.3704, -6.4279, -1.0105,
        -4.9918, -2.9881, -6.3958, -5.0213], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3059, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2929, -4.5538, -2.7284, -5.7352, -2.4896, -4.7589, -2.2554, -7.1577,
        -4.0897, -1.5309, -6.0127, -4.4786, -3.1227, -4.3527, -4.7752, -3.9096,
        -2.0209, -3.5510, -1.4872, -2.4146], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8859, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.6685,  -1.9355,  -2.7615,  -6.4240,  -4.6869,  -0.5896,  -5.7936,
         -3.4064, -16.7411,  -7.1361,  -8.5091,  -2.1081,  -6.5724,   0.2071,
         -7.1356,  -3.4304,  -3.7112,  -6.0813,  -5.3588,  -3.5913],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8717, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.6729,  -6.2483,  -5.2807,  -0.8632,  -4.2342,  -2.7640, -14.8376,
         -6.0102,  -6.4931,  -7.2971,  -7.2006,  -6.0947,  -7.7673,  -1.9640,
         -8.0359,  -1.1228,  -9.1723,  -3.2148,  -4.0743,  -4.1818],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4765, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.4088,  -3.4793,  -1.0573,  -4.4461,  -2.0733, -21.5699,  -4.6949,
         -9.2126,  -4.6303,  -6.5024,  -1.3327,  -8.0075,  -7.1102,  -9.9626,
         -2.8810,  -4.0647,  -5.8124,  -4.2081,  -2.0935,  -3.6449],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6596, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2054,  -3.2557,  -4.6394,  -0.3186,  -3.9215,  -2.6913,  -3.3729,
         -1.6978,  -6.0742,  -0.5569,  -4.9203,  -3.5375, -11.3351,  -6.0018,
         -6.1380,  -8.3406,  -2.9093,  -5.1175,  -2.5393,  -7.9373],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4255, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8533, -0.7991, -4.5674, -2.5788, -2.1382, -7.1612, -5.6836, -0.5850,
        -7.9730, -1.7977, -1.3905, -3.7975, -4.3921, -0.0712, -3.5432, -1.7147,
        -2.2315, -2.8428, -6.2032, -3.5634], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4444, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3146, -1.6687, -5.1867, -2.3115, -2.3958, -0.9993, -5.5694,  0.0405,
        -5.6343, -2.3159, -1.6905, -1.7381, -6.2122, -3.8419, -1.2375, -2.3106,
        -2.4656, -4.6652, -4.8757, -6.1857], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2289, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.7195,  -6.7577,  -1.7086,  -5.8066,  -1.3400, -11.8132,  -5.3383,
         -2.6394, -13.3201,  -5.8888, -14.0353,  -7.0983,  -3.6997, -18.5329,
         -3.0556,  -8.9063,  -8.2718,  -2.2748, -12.0672,  -1.1386],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.0206, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.8898,  -3.8782,  -3.4989,  -3.9128,  -8.1826,  -5.0938,  -2.7470,
         -2.6557,  -3.8231, -12.8957,  -7.4887,  -4.1555,  -6.7361,  -1.7965,
         -4.3199,  -4.0674,  -1.8266,  -4.2898,  -3.6257,  -1.4337],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4659, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0084, -1.8335, -2.5558, -6.6893, -4.5579, -0.5930, -2.7013, -3.4939,
        -2.4969, -4.1488, -5.6992, -0.6953, -6.3752, -3.8844, -3.9494, -1.1930,
        -4.8958, -4.2393, -1.4949, -5.6919], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6099, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.1083,  -8.7472,  -8.5579,  -6.8628,  -4.1492, -13.2360,  -5.0559,
         -2.0502,  -4.0202,  -5.1376,  -1.9939,  -2.4565,  -2.6732,  -5.3745,
         -1.0663,  -5.7772,  -3.5235,  -0.6686,  -3.5100,  -2.3872],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6678, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.2833, -6.1952, -9.6035, -8.9295, -5.8486, -9.7625, -2.9101, -6.9220,
        -1.7214, -4.6998, -3.2008, -2.4087, -4.8674, -5.1252, -0.6550, -4.4179,
        -3.2412, -5.2418, -3.4813, -6.3589], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1937, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1340,  -3.5465,  -9.4778,  -4.6808,  -5.9235,  -8.1894,  -2.4273,
         -7.6132,  -1.7641,  -7.8940,  -3.8916,  -2.0872,  -5.7211,  -5.1150,
         -3.0804,  -2.7848,  -5.2323, -11.9902,  -8.5633,  -4.8041],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3460, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9150, -3.1168, -2.5748, -4.5981, -5.5409, -0.1742, -5.9418, -3.1617,
        -3.7712, -0.7777, -5.7030, -4.1894, -1.6048, -3.4768, -2.3070, -5.1257,
        -3.0295, -5.5858, -4.0264, -1.3946], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6008, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.2509,  -4.3964,  -5.2553,  -0.2763,  -2.6700,  -3.7758, -10.9087,
         -7.2210,  -9.5392,  -8.1215,  -3.2671,  -5.7837,  -1.2224, -18.3140,
         -3.4770,  -2.0501,  -5.9153,  -3.8223,  -7.9952,  -7.7726],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8017, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.2877,  -6.4096,  -3.8204,  -2.8857,  -3.7482,  -2.0822,  -7.4519,
         -5.3997,  -2.8553,  -7.8770,  -4.8315, -20.7790,  -7.8101,  -8.3703,
         -7.2768,  -0.9153,  -4.6289,  -4.2444,  -2.4831,  -4.1859],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5171, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7946, -5.0019, -2.3276, -1.5017, -2.7706, -5.3852, -1.1119, -7.5702,
        -3.1476, -1.5906, -5.7784, -6.1834, -0.9525, -3.0488, -3.8903, -3.6638,
        -4.3414, -6.1153, -1.2885, -3.3862], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5425, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.8412,  -1.6345,  -8.1223,  -5.1312,  -1.1254,  -6.3005,  -4.8953,
        -24.3381,  -5.7104,  -6.5656,  -5.7322,  -5.6612,  -3.9158,  -0.9227,
         -8.0511,  -3.2706,  -2.6117,  -4.9671,  -6.9364,  -2.3499],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6042, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4751, -3.0868, -2.5275, -2.3049, -6.3117, -4.7358, -0.9582, -4.3597,
        -1.9428, -2.2295, -4.8697, -5.2130, -0.0543, -7.4252, -2.4892, -2.8396,
        -3.9006, -5.4586, -0.8988, -3.7778], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4429, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9168, -2.9897, -2.3537, -5.8863, -0.7178, -4.3438, -2.3674, -2.0664,
        -1.6237, -6.5219, -1.6977, -5.4296, -2.9276, -2.7376, -2.5593, -5.4535,
        -3.4306, -1.1133, -2.4782, -2.4991], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1057, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.5140,  -6.0217,  -1.5084,  -2.6550,  -5.4372,  -0.6964,  -6.8879,
         -3.6156,  -0.1728,  -6.3905,  -4.2294, -19.5530,  -7.3979,  -8.7818,
         -6.9088,  -1.8956,  -7.1589,  -1.0120,  -8.6950,  -3.6571],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4594, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8383, -6.5143, -2.3464, -9.0372, -3.6713, -4.1761, -2.9030, -4.1500,
        -0.9191, -5.4845, -3.6493, -4.2394, -2.3310, -5.9466, -0.3301, -4.4778,
        -2.0603, -3.3652, -7.5122, -5.5263], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2239, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7415, -1.0097, -3.6681, -3.2182, -3.1520, -3.3018, -5.3665, -5.2785,
        -1.8451, -6.1610, -3.8975, -3.0461, -7.0494, -4.7150, -0.6858, -7.8643,
        -3.1790, -3.3779, -3.1534, -5.1734], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9442, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5491, -2.7973, -3.5730, -4.4947, -6.7487, -5.3570, -8.1788, -3.3712,
        -1.9785, -7.9639, -5.3621, -2.2130, -7.3370, -4.6044, -3.6596, -5.4114,
        -4.8437, -2.0047, -2.7972, -3.6776], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5961, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3092, -4.6370, -5.7511, -2.7675, -3.4691, -7.4572, -5.1315, -4.9263,
        -7.5075, -7.2103, -5.0396, -5.0889, -3.7719, -3.3134, -5.0015, -6.0772,
        -6.3171, -1.6810, -5.3437, -5.0922], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1447, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2663, -4.5416, -3.3088, -4.4433, -3.2678, -0.6216, -3.8072, -2.3048,
        -3.6735, -1.3202, -6.2746, -3.4197, -1.0445, -9.4149, -1.7523, -1.8367,
        -6.4640, -5.0685, -3.1906, -4.2277], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6124, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0305, -5.9331, -3.1308, -1.9296, -3.0961, -5.9487, -0.2629, -4.4319,
        -2.3218, -3.2443, -0.1814, -5.8347, -3.2998, -0.7713, -2.9376, -2.6402,
        -2.5851, -2.0628, -5.8059, -0.8405], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9145, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.1485,  -5.8212, -11.1736,  -4.4373,  -6.4017,  -5.5928,  -4.9298,
         -7.3437,  -5.6339,  -4.0454,  -6.8713,  -5.0315,  -1.4076,  -5.0644,
         -2.7725,  -1.8787,  -3.6057,  -5.5036,  -1.7414,  -3.8948],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1650, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.5346,  -6.9640, -10.6674,  -6.2183,  -8.3718,  -3.6134,  -4.2565,
         -5.3607,  -6.6780,  -5.6805,  -5.1353,  -5.9092,  -7.2275,  -2.5855,
         -6.1658,  -4.9202,  -6.6797,  -8.7477,  -5.5063,  -5.3952],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0809, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.2257, -6.3903, -6.9102, -4.2208, -4.7495, -8.7114, -2.7226, -3.2897,
        -1.7352, -2.7374, -5.6212, -3.7794, -0.1992, -4.0500, -3.5736, -7.0760,
        -8.1642, -8.1558, -8.1918, -8.4022], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.3953, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.3010,  -5.7902,  -3.0121, -15.2835,  -5.9804,  -8.1708,  -1.7792,
         -5.0320,  -1.1662,  -1.8693,  -5.0936,  -6.4790,  -3.8568,  -3.6764,
         -8.5772,  -1.5053, -11.6093,  -3.8297,  -3.5127,  -6.8727],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2199, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.4909,  -4.7825,  -3.0786,  -3.6559,  -2.9362,  -5.6735,  -3.9883,
         -2.3396,  -2.2216,  -2.5427,  -1.8304,  -1.6349,  -6.7657,  -1.0965,
        -19.1542,  -2.6785,  -3.6446,  -6.4300,  -6.1510,  -0.8888],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0992, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.2965, -25.0882,  -7.5338,  -8.4102,  -7.7320,  -1.5469,  -7.1249,
         -1.9748,  -6.9750,  -4.4702,  -1.6320,  -1.6702,  -6.6166,  -4.3470,
         -1.5259,  -3.1233,  -2.3281,  -0.7089,  -4.7244,  -5.4101],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3620, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.2647, -1.4749, -9.8780, -3.8099, -1.7103, -4.5924, -6.6696, -0.4343,
        -3.8765, -2.3223, -2.1899, -2.2842, -5.7622, -4.2940, -2.0441, -2.7273,
        -1.8612, -0.9550, -6.5641, -5.4300], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9072, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6610, -5.2701, -1.3341, -7.2811, -3.1206, -2.2183, -0.6798, -5.8306,
        -3.9169, -1.8685, -4.5224, -3.6153, -4.1579, -3.7454, -5.0739, -5.3895,
        -2.9014, -4.6141, -2.4027, -4.2788], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8941, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4281, -6.8257, -8.1140, -7.3201, -7.2654, -2.9232, -5.6051, -4.0611,
        -2.1164, -5.0764, -3.8654, -4.4716, -1.2624, -4.6937, -3.6929, -1.0102,
        -5.1956, -2.4483, -6.4799, -4.8883], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6372, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.4939,  -1.4459,  -5.7739,  -4.3835, -28.3018,  -7.4457, -10.0436,
         -6.1289,  -5.6798,  -9.4051,  -7.9092,  -5.7286,  -1.8403, -11.7953,
         -6.8533,  -9.6013,  -5.7182,  -6.6695,  -4.7632,  -2.5166],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.4249, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8888, -5.4172, -4.3103, -3.3168, -6.7360, -1.8693, -2.2649, -5.8775,
        -4.8961, -1.4066, -4.5206, -2.8623, -2.2299, -2.2143, -6.8176, -4.6796,
        -3.2222, -7.0998, -2.3070, -2.0860], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9511, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9020, -2.7652, -6.1726, -3.4161, -2.7415, -5.9455, -4.8729, -0.8151,
        -4.0133, -2.5698, -3.4082, -1.8782, -5.9616, -3.7761, -0.5642, -6.5482,
        -1.6955, -1.8521, -4.3773, -4.6359], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6956, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.6031,  -0.2963,  -7.1814,  -3.5118,  -1.4117,  -3.1889,  -6.2872,
         -4.8095,  -2.6342,  -2.7927,  -3.0256,  -8.7836,  -3.6371,  -6.9285,
         -3.7703,  -1.5379, -11.8289,  -2.1595,  -2.1642,  -0.8828],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0718, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9451,  -9.7431,  -4.3150,  -3.3007,  -3.9997,  -4.7266,  -4.7027,
         -4.8355,  -4.8267, -19.4343,  -8.0254,  -8.2449,  -6.8528,  -7.9551,
         -2.4102,  -6.2952,  -4.6246,  -1.6402,  -5.0246,  -2.7353],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8319, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.5324,  -1.5805,  -5.7219,  -1.2578,  -3.4889,  -5.4784,  -3.7368,
        -12.3498,  -6.6550,  -3.0612,  -5.1098,  -2.3048,  -1.7850,  -2.6835,
         -6.7521,  -5.2952,  -3.4223,  -5.7438,  -4.1314,  -2.3136],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5202, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6512, -5.8479, -4.8854, -8.0050, -3.0053, -0.9903, -5.0536, -4.0600,
        -0.4111, -6.3721, -1.5426, -2.4066, -2.8186, -5.2880, -1.6797, -3.2977,
        -3.5708, -1.8598, -4.7427, -5.3129], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8401, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4373, -4.2698, -0.0863, -5.7241, -3.2727, -3.0340, -1.9826, -6.4287,
        -3.2195, -1.1929, -5.8243, -3.2959, -3.9493, -1.7727, -5.4957, -3.6547,
        -0.3718, -3.6686, -2.3778, -1.6345], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2347, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6014e+00,  1.1277e-02, -3.2959e+00, -2.3727e+00, -1.1330e+01,
        -7.0555e+00, -6.9272e+00, -7.2895e+00, -2.8578e+00, -6.5608e+00,
        -1.8582e+00, -4.8123e+00, -4.3632e+00, -2.6064e+00, -6.8083e+00,
        -5.3436e+00, -1.2208e+00, -3.1453e+00, -4.2078e+00, -2.2471e+01],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5058, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8356, -4.8364, -2.1415, -4.6676, -4.7617, -2.8719, -4.2751, -4.1500,
        -1.9495, -3.1873, -4.7053, -0.5908, -6.6296, -1.7309, -2.2114, -4.4630,
        -4.5901, -2.0799, -4.0960, -4.6512], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6212, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3724, -5.9343, -3.4146, -1.4698, -6.7868, -1.5062, -1.8470, -4.5565,
        -5.1888, -0.9304, -3.8816, -3.8076, -1.9904, -3.1490, -5.5403, -3.9677,
        -2.7036, -2.2485, -3.5764, -1.8043], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3338, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.6085,  -4.3863,  -4.6756,  -0.1187,  -6.7210,  -2.3507,  -1.8843,
         -1.5451,  -6.8302,  -1.7682, -11.9069,  -2.8524,  -4.7980,  -0.9350,
         -6.0350,  -4.4385,  -0.3764,  -5.7630,  -3.4713,  -4.0083],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9237, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3712, -3.2457, -7.8605, -3.0962, -2.0612, -9.8634, -4.8195,  0.4493,
        -4.0677, -2.1193, -6.0420, -9.9443, -7.1638, -7.8077, -1.9032, -8.3081,
         0.0390, -6.0909, -3.9391, -1.3288], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7772, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6497, -2.3941, -2.6041, -5.2702, -5.2752, -0.4504, -2.5937, -2.9098,
        -2.5559, -3.7013, -5.1805, -1.5316, -6.1707, -4.7677, -4.9087, -1.8116,
        -5.7347, -4.2852, -0.9224, -3.9008], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5809, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9959,  -3.1851,  -5.9888,  -3.7558,  -1.5977,  -9.0363,  -2.5778,
         -3.1786,  -5.6761,  -4.9609,  -1.9128,  -3.7427,  -3.1112, -14.2439,
         -7.0740,  -6.4215,  -7.6038,  -2.9092,  -6.6107,  -1.1898],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8886, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6992, -3.2670, -3.6015, -1.1631, -6.4243, -3.6195, -1.1731, -2.6119,
        -2.3514, -0.9430, -4.2908, -5.4291, -2.0747, -4.8130, -3.9534, -4.1262,
        -3.3880, -6.1659, -3.9477, -5.5398], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6291, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.4260, -10.9928,  -6.5809, -33.7719,  -7.3405,  -8.9067,  -6.6952,
         -7.1825,  -2.6646,  -7.5707,  -5.5000,  -4.8848,  -2.3854,  -3.2283,
         -3.2852,  -5.7581,  -4.9745,  -2.4689,  -2.1471,  -2.3969],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5081, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6000, -4.7706, -1.1548, -4.6465, -2.1431, -3.5050, -3.1841, -5.9104,
        -5.5744, -2.3510, -4.5933, -1.7318, -1.4417, -6.5249, -4.1450, -0.7033,
        -5.1585, -2.5711, -1.8988, -3.5277], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6068, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9386, -5.5720, -0.2508, -9.0648, -3.7286, -3.5109, -3.7957, -5.9742,
        -0.4688, -3.0115, -3.6811, -2.2050, -2.4078, -6.9328, -5.4276, -3.0798,
        -8.1990, -3.6186, -1.4998, -5.9945], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1181, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.9648, -5.4682, -3.6306, -4.8635, -9.3365, -6.5877, -9.1672, -3.4058,
        -7.1902, -3.1050, -4.2577, -3.5731, -4.2222, -3.8490, -6.5247, -3.0162,
        -6.8174, -3.5940, -4.1184, -3.8854], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2789, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.7064,  -1.8657,  -2.2980,  -2.6831,  -2.4453,  -6.1542,  -5.3192,
         -1.1331,  -5.9115,  -1.7250, -15.6759,  -5.2612,  -9.6705,  -6.3658,
         -6.6761,  -4.5973,  -4.7309,  -0.2444,  -4.1151,  -3.9937],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8286, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.4508,  -7.3746,  -7.8148,  -2.7253,  -6.4053,  -3.9633, -14.4344,
         -2.3280,  -2.1307, -16.9726,  -3.0919,  -2.7103,  -5.3515,  -2.2778,
        -11.5094,  -3.8618,  -5.9244,  -7.7699,  -7.7755,  -9.7464],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5809, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.0674,  -3.2872,  -2.9721,  -0.6724,  -6.3077,  -3.4956,  -1.3761,
         -5.6737,  -2.3486,  -1.5356,  -1.9504,  -6.6007,  -0.1054, -15.3103,
         -2.0522,  -2.7868,  -2.7580,  -5.6185,  -4.3639,  -1.6574],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7970, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.7400,  -7.8288,  -6.7507,  -6.4077,  -2.7334,  -6.3943,  -0.4889,
         -5.4312,  -3.2969,  -3.3050,  -5.5486,  -5.5946,  -4.0593,  -5.3022,
         -2.2659, -14.6306,  -3.3337,  -8.2649,  -1.8268,  -6.6558],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2930, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0362,  -4.5656,  -1.3788,  -3.5651,  -4.9259,  -0.3159,  -3.4062,
         -2.8705, -16.3557,  -6.0394,  -8.6204,  -1.5875,  -7.2792,  -0.7806,
        -15.3650,  -4.1242,  -2.5739,  -1.0609,  -6.7168,  -4.1608],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9864, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.8687,  -4.7884,  -5.3348,  -0.6418,  -3.8918,  -3.0606,  -3.5417,
         -2.6508,  -5.2556,  -0.7347,  -4.7688,  -2.7659, -11.1765,  -5.2981,
        -10.6336,  -5.0946,  -8.5466,  -1.0049,  -3.1445,  -5.4479],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5325, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-12.0477,  -5.5984,  -7.5321,  -1.2448,  -4.7167,  -2.7515,  -3.0677,
         -5.7592,  -4.9563,  -1.2928,  -4.3957,  -5.3347,  -0.4621,  -4.4946,
         -2.9139,  -3.3893,  -1.4750,  -6.1843,  -1.6739,  -6.0384],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2665, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7204,  -1.7830,  -2.2933,  -1.0810,  -8.1014,  -4.9756,  -1.8859,
         -3.4544,  -3.4735, -14.0273,  -3.6399,  -6.5691,  -8.7400,  -7.4663,
         -2.5825,  -5.7060,  -0.9170,  -5.0158,  -3.9685,  -2.3413],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5371, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.6173,  -4.8600,  -3.8737,  -0.8830,  -4.2950,  -1.3185,  -1.8536,
         -6.0384,  -3.9713,  -0.3287,  -3.9860,  -2.1863, -20.2413,  -6.0741,
         -8.5399,  -7.4825,  -1.4779,  -7.0362,  -0.3955,  -4.0681],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6264, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.8461,  -4.6539,  -6.0181,  -1.4327,  -1.5345,  -2.1711,  -3.6385,
         -0.5199,  -5.9967,  -0.6453,  -9.7898,  -4.4511,  -5.0027, -10.6229,
         -3.6975,  -7.9717,  -6.2763,  -5.3812,  -7.6527,  -5.7420],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8022, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5502,  -4.9999,  -5.0393,  -1.7006,  -5.9463,  -1.5589,  -3.5488,
         -6.0036,  -5.4121,  -1.4834,  -5.3509,  -2.8687,  -1.3813,  -4.3995,
         -4.3602,  -0.7818,  -7.1368,  -1.5663, -10.0820,  -5.1461],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0658, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0727, -5.6313, -0.8418, -7.5554, -3.5906, -2.2574, -3.1281, -4.3063,
        -0.3135, -3.7656, -1.7858, -2.2120, -7.0590, -5.5285, -1.0079, -5.2235,
        -2.8272, -3.6518, -0.9529, -7.1401], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7426, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4766, -4.6331, -0.8607, -5.3465, -3.2712, -2.3462, -2.5956, -5.8205,
        -0.7955, -2.5429, -3.4999, -3.9855, -1.9096, -5.7644, -4.1233, -1.4674,
        -4.0330, -2.8494, -2.0249, -4.1716], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3259, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.9486,  -5.0485,  -0.6696,  -3.8662,  -2.1545,  -3.2463,  -2.8181,
         -6.1756,  -3.3198,  -1.0792,  -6.1746,  -2.7911,  -0.9161,  -3.4571,
         -5.8410,  -1.2141, -10.4893,  -5.1846,  -5.9247,  -1.8571],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8088, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9524, -3.1143, -7.2376, -7.5744, -2.3957, -8.7353, -2.0200, -2.8388,
        -3.0879, -6.3663, -3.3926, -1.0843, -4.5768, -2.7268, -0.9699, -7.5996,
        -5.3445, -0.0875, -6.0855, -3.2532], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1222, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.9383,  -5.0174,  -1.9985,  -3.3318,  -4.4511,  -5.6617,  -2.4262,
         -3.7952,  -5.0491,  -0.3657,  -7.4144,  -3.8273,  -3.8547,  -1.4932,
         -5.7295,  -0.8845,  -2.2772,  -4.2884, -10.5610,  -8.3996],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2882, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.1978,  -7.1764,  -3.9256,  -0.3232,  -4.1574,  -2.6324,  -1.9563,
         -2.9174,  -5.0542,  -1.5401,  -2.0561,  -2.9440,  -2.2192,  -6.0152,
         -5.2174,  -2.1351,  -3.7337,  -3.0763, -10.7834,  -6.9477],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8004, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.0293, -3.8941, -5.1833, -2.3383, -4.9987, -3.6398, -0.9715, -3.9951,
        -1.8907, -2.0912, -3.3556, -5.3194, -0.9182, -4.2920, -2.6227, -2.0109,
        -1.4820, -6.6018, -3.5142, -1.5452], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4347, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9957, -1.3531, -2.7753, -4.4916, -0.9031, -3.5297, -1.8851, -1.7736,
        -4.1500, -5.4920, -0.2766, -1.4636, -3.1273, -4.1763, -5.0128, -5.7546,
        -1.9959, -5.0930, -2.4010, -2.4784], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0064, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.2374,  -4.6031,  -5.1036,  -6.1757,  -6.3045,  -5.5077,  -7.7537,
         -5.1405,  -6.4165,  -4.8188,  -5.2916, -10.8093,  -4.3062,  -5.6058,
         -8.4081,  -4.2904,  -4.9033,  -4.9819,  -7.2140,  -8.1271],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9000, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5365, -1.8938, -3.6450, -1.3653, -5.0721, -3.9088, -0.6714, -3.8305,
        -3.5030, -5.3779, -2.4283, -5.3025, -4.9582, -1.7420, -3.4551, -4.2677,
        -7.2067, -4.4421, -3.7377, -5.8345], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8589, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6311, -6.4636, -5.8886, -0.9793, -7.5190, -5.0982, -2.1292, -1.5367,
        -7.8260, -6.0294, -2.9930, -5.3733, -3.0413, -2.9804, -3.8509, -6.7816,
        -3.2202, -1.0303, -2.9314, -1.8674], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0585, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6843, -3.4884, -3.5616, -5.7509, -0.6127, -7.5160, -5.3430, -4.0653,
        -3.5986, -3.7865, -4.5467, -1.6443, -5.9460, -3.8765, -2.1511, -3.1158,
        -4.9548, -5.7410, -3.6956, -4.7277], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1403, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2670, -5.3977, -3.0736, -3.5934, -3.8276, -5.9584, -5.1299, -4.6517,
        -7.2849, -5.6160, -7.1767, -3.6593, -6.6617, -0.8911, -6.5336, -3.4677,
        -3.3323, -2.8053, -5.8933, -4.5452], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8383, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9551, -3.3369, -1.5772, -6.6712, -4.0127, -3.5007, -2.4409, -4.9934,
        -4.3249, -1.3675, -4.3636, -2.4197, -0.8129, -2.3383, -5.2177, -0.6363,
        -5.2509, -2.5530, -4.6542, -1.5214], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3974, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.8960,  -5.7953,  -3.3001,  -4.9968,  -2.8423,  -2.7203,  -4.1940,
         -4.8137,  -0.2999,  -6.5330,  -2.2436,  -2.9155,  -4.0080,  -6.0025,
         -5.7319,  -3.3762, -11.2057,  -3.8525,  -3.0933,  -4.4708],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4146, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7757, -5.4239, -0.7811, -2.9424, -4.1822, -2.7794, -2.1754, -6.1631,
        -4.1046, -2.0809, -3.0801, -1.7823, -2.5524, -4.2585, -4.0998, -0.5349,
        -4.5105, -4.2103, -4.8484, -8.9324], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7109, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8283, -6.8760, -3.0281, -7.2736, -0.8181, -6.3021, -3.6374, -2.4953,
        -0.6627, -6.7759, -3.9680, -0.8396, -4.8711, -2.5161, -5.1941, -5.9195,
        -6.9539, -6.4122, -6.3194, -2.0525], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4872, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.9078,  -9.1366,  -5.7666,  -1.8707,  -6.7471,  -2.9782,  -3.9128,
         -2.6257,  -5.3989,  -4.0688,  -0.4018,  -4.8431,  -1.8882,  -1.2198,
         -6.5823,  -4.9525,  -6.4557,  -8.1434,  -1.5307, -27.3327],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7382, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3574,  -3.5790,   0.2010,  -5.2505,  -4.1546,  -1.4867,  -3.9479,
         -3.3244, -14.5647,  -5.9796,  -6.1796,  -8.0723,  -2.1895,  -7.0555,
         -1.1298,  -5.6628,  -5.1036,  -4.8573,  -1.6491,  -6.0955],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6719, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5487, -2.7820, -4.9726, -3.9114, -6.5909, -4.8346, -2.3103, -4.9852,
        -2.9396, -2.0677, -1.3269, -5.9521, -0.2114, -6.8841, -2.5013, -2.8075,
        -3.8382, -5.9134, -0.5061, -5.6979], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8291, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6139, -3.5263, -3.4946, -3.7232, -3.0495, -5.8130, -6.2746, -5.3344,
        -3.2980, -7.0754, -3.3157, -2.1192, -2.3732, -5.4783, -0.7022, -4.9729,
        -3.5111, -3.3715, -6.3847, -4.2487], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2340, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.0065, -3.7072, -6.4479, -1.8050, -9.2737, -3.8495, -3.1542, -5.2295,
        -6.7468, -0.4490, -5.5390, -3.6780, -3.0978, -6.0341, -5.4136, -2.1112,
        -3.1939, -3.7803, -3.4680, -2.6309], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3808, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9308, -7.9840, -6.9020, -8.9929, -3.3185, -7.3395, -3.1545, -4.1561,
        -4.6891, -2.8356, -1.9661, -6.6920, -4.5933, -1.5625, -3.9321, -3.4713,
        -3.4585, -0.1170, -6.4776, -3.5499], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4562, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.9159,  -1.6426,  -8.7940,  -2.1627,  -2.4986,  -4.6401,  -6.4399,
         -1.2677, -11.5877,  -2.7596,  -2.4158,  -0.4171,  -5.5828,  -3.9380,
         -1.2813,  -4.6095,  -3.1993,  -1.6646,  -3.7895,  -4.5604],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0084, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.7617,  -3.1828,  -2.1095,  -5.3327,  -3.3355,  -1.0574,  -5.2746,
         -5.3976, -15.1795, -11.9383,  -9.0583,  -3.2282,  -6.3002,  -0.4854,
         -8.1248,  -3.0817,  -2.1896,  -3.0057,  -5.4116,  -0.6832],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9069, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.2494,  -5.3318,  -7.7176, -10.5106,  -5.4928,  -8.8396,  -1.9737,
         -5.9604,  -0.8461,  -9.1247,  -3.0500,  -2.0608,  -9.5485,  -5.5140,
         -2.3951,  -7.8949,  -4.7275, -17.3264,  -6.9129,  -5.4942],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0986, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7137, -3.2094, -2.8371, -2.7345, -6.6894, -1.7910, -7.6493, -2.8343,
        -2.9452, -3.9771, -6.3376, -3.8803, -0.3594, -3.3509, -1.9829, -2.7299,
        -3.0226, -6.3390, -3.2546, -1.2903], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4964, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.1028,  -0.9482,  -8.3635,  -3.0529,  -2.7636,  -4.6557,  -6.0866,
         -0.8350,  -3.2485,  -3.8248, -10.6954,  -6.0303,  -6.6846,  -8.2579,
         -3.0023,  -6.1470,  -1.7728,  -5.0977,  -2.7101,  -4.5870],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7933, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1090, -5.2828, -0.8775, -5.4969, -1.5917, -2.9081, -1.4123, -6.0368,
        -3.5139, -1.4866, -2.3681, -2.2548, -2.9605, -0.4686, -4.5753, -2.3795,
        -1.4617, -6.0976, -1.6525, -3.5547], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0745, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0352, -5.1375, -3.9206, -2.3802, -5.5977, -3.2573, -2.1000, -3.9331,
        -5.3717, -1.7560, -3.7848, -4.1883, -3.4215, -4.2710, -5.5859,  0.1186,
        -4.4468, -3.4280, -3.8012, -2.9015], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5100, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9731, -5.3016, -1.1637, -4.8470, -1.3459, -1.9023, -5.7817, -5.4378,
        -1.5529, -5.1429, -2.2491, -3.6170, -5.5313, -4.7033, -0.4742, -4.5752,
        -4.6248, -3.6304, -2.1484, -4.5070], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6755, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2871, -4.3946, -5.6693, -4.4961, -4.6282, -6.1478, -1.7434, -6.7246,
        -5.2964, -0.1923, -3.7026, -2.2796, -5.1918, -1.5763, -6.0018, -3.5490,
        -1.3002, -5.1300, -2.0845, -2.5619], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7479, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6689, -1.6194, -2.8337, -4.8842, -4.3985, -0.7880, -3.8876, -2.3259,
        -2.3678, -3.3254, -5.9489, -4.5049, -1.8381, -3.5496, -1.8181, -1.3972,
        -1.1527, -5.3840, -2.0834, -6.6650], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3721, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6858, -0.6480, -4.7757, -2.7618, -3.0337, -2.1114, -6.1963, -4.6316,
        -2.1717, -6.0007, -2.8872, -2.3953, -1.9297, -6.8141, -3.7697, -2.1829,
        -3.3073, -3.0346, -1.6069, -7.3272], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6136, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.6722,  -2.6228,  -3.1694,  -3.8765,  -5.6114,  -4.7305,  -2.7204,
         -1.7830,  -2.2933,  -1.0810,  -8.1014,  -4.9756,  -1.8859,  -3.4544,
         -3.4735, -14.0273,  -3.6399,  -6.5691,  -8.7400,  -7.4663],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7447, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.5239,  -4.1149,  -4.0826,  -7.2864,  -6.3043,  -2.1302,  -7.3828,
         -4.2383, -10.0379,  -6.7425,  -7.5001,  -6.5891,  -4.0272,  -5.9777,
         -1.7002, -19.3315,  -4.0508,  -3.8060,  -3.4132,  -7.1655],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0703, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.3895,  -7.6182,  -3.9805,  -1.2172, -11.3525,  -3.8011,  -2.5449,
         -3.4435,  -9.6572, -10.8861,  -7.9722,  -4.7409,  -5.5780,  -7.0212,
         -3.1375,  -2.8643,  -2.8358,  -4.1800,  -6.8372,  -3.6927],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2375, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0092, -6.0705, -4.4729, -1.4704, -3.6699, -2.7848, -1.1601, -1.6604,
        -6.2665, -0.3350, -6.7305, -3.1027, -1.8388, -2.2458, -6.0104, -0.0208,
        -4.8031, -1.7769, -3.0429, -5.2734], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1873, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.0460,  -5.2480,  -4.6636,  -0.6624,  -6.0599,  -2.3635,  -6.2786,
         -1.9825,  -7.2784,  -4.6815,  -3.7367, -15.0388,  -3.5421,  -3.9445,
         -4.8445,  -6.2266,  -0.4927,  -6.3822,  -4.7202,  -2.1268],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6160, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.4543,  -2.6494,  -2.3516,  -5.0095,  -4.7889,   0.2448,  -3.3831,
         -2.7640,  -3.6326,  -7.0309,  -5.6197,  -2.0922, -12.3143,  -5.3774,
         -0.5878,  -1.5987,  -6.1940,  -5.9451,  -3.6736,  -3.0299],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1626, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.9581,  -1.3620,  -5.4100,  -4.1011,  -2.2631,  -5.5974,  -5.2376,
         -2.7848,  -4.3792,  -1.9549, -15.1546,  -6.6622,  -3.6254,  -5.6500,
         -6.1135,  -5.5695,  -6.0978, -15.0147,  -2.5545,  -6.9823],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6736, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1962, -3.2415, -5.3576, -4.5030, -6.4791, -1.6216, -4.3871, -4.0408,
        -3.5064, -4.8663, -4.7830, -0.2541, -5.4850, -3.0965, -2.4406, -1.8311,
        -5.2271, -3.8085, -0.6974, -3.9010], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7362, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.5006,  -5.4643,  -1.9261,  -3.4651,  -3.2492,  -5.4228,  -1.0182,
         -4.6699,  -3.3879, -16.4145,  -7.7509,  -5.7267,  -6.9981,  -2.2436,
         -6.3889,  -1.3157,  -6.1475,  -3.1833,  -3.6596,  -5.0277],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7480, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.7799, -4.5940, -3.0987, -2.8271, -1.5146, -6.4615, -4.2402, -2.0752,
        -7.6085, -2.2476, -1.5624, -2.0902, -5.1972, -0.7980, -6.7401, -4.0745,
        -2.9274, -6.2558, -4.5587, -3.7242], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6688, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9311, -1.6485, -6.3989, -4.1539, -0.9501, -7.6282, -2.5326, -1.5316,
        -1.7968, -6.1894, -1.1564, -3.1154, -3.1360, -4.4657, -3.0746, -7.9872,
        -0.6849, -6.9084, -4.4838, -8.6835], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0228, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9171, -1.2060, -3.9088, -4.5728, -0.6393, -2.7918, -3.2389, -2.8365,
        -0.7872, -6.3294, -3.4745, -1.2721, -2.8203, -2.2743, -2.7583, -1.3524,
        -6.5260, -0.4421, -4.3943, -2.9934], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8268, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-15.0263,  -5.0058,  -8.5186,  -2.4844,  -6.4909,  -4.1840, -19.9582,
         -4.8513,  -5.9455, -11.8182,  -6.0306, -14.4859,  -8.7703, -12.5796,
        -11.8626,  -6.9101,  -5.7822,  -5.9175,  -7.2261,  -5.3968],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.4623, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.4367, -2.6316, -6.6083, -6.6349, -5.0391, -6.3285, -6.1485, -2.0391,
        -7.9546, -3.4310, -3.0688, -5.5662, -5.2013, -1.9984, -5.3692, -2.6354,
        -2.0739, -6.7445, -5.0421, -0.8140], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2883, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3377,  -2.0997,  -2.6047,  -4.0244,  -4.9099,   0.3225,  -2.4228,
         -2.2185,  -3.1660,  -4.7225,  -5.1005,  -1.0864,  -3.4269,  -2.4927,
         -4.2709,  -7.2866,  -5.4168,  -0.7999, -12.8409,  -5.9587],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8932, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1887, -3.1502, -2.9081, -6.4156, -2.7471, -4.4004, -3.4034, -3.6634,
        -2.7046, -6.9662, -4.4337, -1.4946, -5.4766, -2.1673, -1.7991, -5.4465,
        -4.8639, -4.9671, -3.7133, -2.8395], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8375, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2294, -2.7250, -5.7037, -2.9542, -3.6247, -1.8315, -5.5411, -3.7370,
        -0.9618, -3.4818, -1.6464, -2.3639, -5.3414, -4.0373, -1.5062, -4.3107,
        -1.4837, -2.7793, -1.7709, -6.6357], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4333, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.7196,  -6.5332,  -5.2229,  -3.6437,  -3.5066,  -3.1665, -11.1149,
         -7.7974, -11.4246,  -3.9182,  -5.2717,  -2.2080,  -3.4891,  -6.0715,
         -2.2244,  -3.3774,  -2.8310,  -5.5518,  -3.0331,  -2.4079],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7257, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.9793,  -3.9316,  -1.9069, -21.9899,  -3.2892,  -8.7145,  -3.0186,
         -5.9906,  -0.1122,  -7.4469,  -4.5712,  -6.8539,  -1.6750,  -5.0536,
         -4.3359,  -0.6125,  -4.8673,  -1.8195,  -3.9101,  -1.1059],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6092, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1478, -4.2875, -5.4034, -3.7827, -2.8062, -3.1043, -2.8647, -3.0814,
        -3.1988, -6.7942, -3.5742, -1.0443, -3.5533, -2.5130, -3.3722, -4.5350,
        -4.3164, -1.9405, -2.6392, -3.4424], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4701, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0791, -6.2026, -1.2337, -9.8883, -3.0853, -2.9946, -4.0123, -5.1950,
        -4.1495, -1.9537, -3.6212, -2.4824, -2.6296, -3.6471, -5.1381, -0.3098,
        -4.3878, -3.4639, -2.6869, -7.2902], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8726, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.4171,  -8.1180,  -2.9506,  -6.0986,  -0.5114,  -6.1430,  -3.5989,
         -2.2383,  -1.3650,  -5.9077,  -4.5339,  -0.7770,  -5.4101,  -2.0493,
         -2.6683,  -3.7278,  -4.4323,  -1.0074, -20.4766,  -1.1065],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5769, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.1858, -3.1889, -3.8227, -2.2917, -1.8689, -5.9427, -4.7185, -1.9498,
        -4.0242, -2.6728, -2.1764, -5.9049, -5.0381, -1.6487, -2.9209, -2.2767,
        -2.9097, -4.9295, -4.3634, -0.5438], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1689, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5616, -1.1488, -1.9458, -2.8269, -1.4011, -1.0852, -5.5991, -1.0054,
        -2.5934, -5.0406, -9.4792, -9.6808, -7.2357, -8.0756, -6.8559, -3.8771,
        -6.9716, -1.4004, -7.1443, -4.7448], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6337, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2039, -10.6528,  -8.1683,  -8.7560,  -7.0658,  -2.4167,  -6.4566,
         -1.3228,  -4.0118,  -3.1043,  -2.6640,  -7.6479,  -5.2344,  -0.7212,
         -7.0205,  -3.6612,  -4.3141, -10.7518,  -9.6509,  -8.1790],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7502, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2360, -1.8790, -6.2298, -3.4197, -0.7009, -5.9030, -3.0067, -3.0390,
        -1.2174, -4.9737, -1.4430, -4.6422, -3.3897, -3.0951, -5.6746, -5.5601,
        -0.9328, -3.5827, -2.9962, -1.5075], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4215, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9412, -4.6405, -4.6503, -1.3082, -3.3265, -1.9779, -2.0738, -4.3912,
        -4.5838, -1.2497, -3.8024, -2.5054, -1.3320, -4.7817, -5.2578,  0.2394,
        -4.5146, -2.0883, -1.7264, -3.7462], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9329, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2330, -2.7633, -2.2736, -5.1605, -1.5247, -6.4451, -4.4504, -3.0316,
        -2.9635, -6.2493, -4.2933, -2.0334, -3.6336, -2.3607, -3.1060, -3.4733,
        -5.6979, -2.4343, -4.5090, -3.7257], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5681, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6576, -2.9068, -5.5301, -1.4288, -4.6646, -2.9091, -1.9307, -3.5095,
        -5.7620, -1.0701, -3.4103, -3.2687, -3.0183, -1.5251, -5.8341, -4.4291,
        -1.8854, -6.2077, -3.8307, -1.8149], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3297, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.4529,  -2.0674,  -4.5248,  -1.8146,  -7.0041,  -4.0400,  -1.0820,
        -11.6851,  -2.7363,  -3.8667,  -1.9524,  -5.7404,  -3.8581,  -1.0624,
         -3.6508,  -1.4162,  -2.1048,  -2.0155,  -4.1181,  -8.8486],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0021, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0562e+00, -6.9055e+00, -4.2324e+00, -2.5469e+00, -4.0288e+00,
        -1.6043e+00, -4.7277e+00, -1.1882e-03, -1.6508e+00, -3.8294e+00,
        -2.5114e+01, -1.8588e+01, -6.8497e+00, -8.3755e+00, -2.2919e+00,
        -5.3122e+00, -1.2086e-01, -4.8911e+00, -3.4716e+00, -2.6292e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5614, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1352, -2.4739, -7.1781, -4.7866, -2.2248, -3.9433, -3.6605, -2.0651,
        -1.3625, -5.2048, -0.1226, -6.0948, -3.4668, -3.0630, -1.0227, -6.6889,
        -2.9669, -0.8393, -3.7252, -2.5511], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3788, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9132, -4.2980, -5.1368, -4.8168, -1.5444, -1.5926, -3.0361, -3.0842,
        -6.0610, -5.0251, -0.3279, -3.2625, -2.4382, -2.7516, -1.9017, -5.0232,
        -2.9953, -1.5017, -3.2380, -1.4318], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0690, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.3768,  -1.6759,  -2.3026,  -4.1964,  -4.0253,  -0.7715,  -5.0926,
         -1.7971, -12.0501,  -5.1356, -10.6173,  -1.8316,  -6.9055,  -2.1581,
         -8.2946,  -3.6356,  -2.4508,  -3.1923,  -5.6723,  -1.0835],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4133, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.4568,  -8.8496,  -4.1820,  -6.6515,  -3.8702,  -4.7985,  -1.5935,
         -3.5748,  -4.6799,  -2.3161, -11.6268,  -3.5911,  -3.1677,  -6.2596,
         -4.9781,  -1.1038,  -4.4409,  -5.1692, -30.0867,  -6.7779],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2587, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.9842,  -2.1587,  -4.8101,  -1.5637,  -6.6089,  -4.5599,  -5.2591,
         -4.3488,  -1.2728,  -3.4368,  -2.4394,  -1.9136,  -6.6560,  -5.0329,
         -0.9538,  -5.3945,  -2.1559, -25.7875,  -6.9844,  -9.5164],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1419, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0393, -2.6150, -2.7733, -5.7285, -3.5562, -1.7047, -3.1120, -1.6955,
        -2.2242, -5.1694, -4.0600, -0.7286, -3.6555, -2.2534, -1.6957, -3.4411,
        -5.5600, -1.1702, -2.1868, -3.6226], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9996, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.6072,  -0.6936,  -2.3140,  -2.6338, -15.0868,  -5.3514,  -7.9798,
         -7.5009,  -2.9090,  -5.0621,  -2.9233,  -6.2021,  -3.2974,  -3.1021,
         -4.5844,  -4.9940,  -0.1288,  -8.6007,  -1.9637,  -2.9949],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6465, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.6032,  -5.0883,  -0.3814,  -9.9552,  -1.7209,  -4.0073,  -7.5618,
         -5.1939, -13.4030,  -6.4852,  -3.5239,  -1.6260,  -4.6745,  -4.8892,
         -1.9809,  -7.0011,  -3.6881,  -2.4436,  -2.6988,  -4.5160],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6221, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.5792,  -0.3089,  -4.3114,  -1.5625,  -4.3768,  -3.6039,  -6.1013,
         -4.6649,  -2.1151,  -3.9218,  -2.1437,  -2.2160, -10.7115,  -5.0165,
         -1.2988,  -8.0220,  -3.8486,  -3.1587,  -2.4278,  -6.5793],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0484, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9838, -6.3945, -5.1476, -3.1667, -6.4464, -2.2818, -1.5784, -4.9046,
        -4.3633, -0.5905, -4.0070, -2.5256, -3.7389, -2.9864, -4.9826,  0.6926,
        -4.3149, -3.4366, -5.6220, -6.7566], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7768, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.0843,  -0.2413,  -6.4205,  -2.7885,  -3.2563,  -3.2515,  -3.7934,
         -2.6822,  -3.6086,  -5.2505,  -0.8804,  -6.2857,  -5.1107,  -4.7476,
         -5.8876,  -7.6418,  -7.2125, -11.1693,  -4.7085,  -4.7784],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7400, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.6997,  -5.0005,  -4.3736,  -5.2195,  -4.4297,  -8.5897,  -3.7386,
         -9.5367,  -2.3531, -14.7629,  -4.5093,  -2.8396,  -3.9860,  -6.6277,
         -2.6805,  -6.1432,  -3.7488,  -4.5003,  -5.0256,  -5.7938],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3779, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3660,  -2.7041,  -4.1250,  -9.8554,  -8.9152,  -6.2905,  -6.6679,
         -6.6372,  -6.2150,  -4.6052,  -3.9572,  -2.9304, -15.8287,  -4.7155,
         -3.3677,  -4.7389,  -5.5763,  -0.0244,  -6.5384,  -3.5808],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4820, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.4249,  -1.8740,  -8.0424,  -2.9059,  -3.5702,  -1.9035,  -5.7090,
         -0.4058,  -3.1611,  -4.7129, -25.3890,  -8.2429,  -9.1079,  -2.0323,
         -5.2775,  -0.0592,  -6.5155,  -3.6636,  -5.8804,  -6.8107],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4844, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.9723, -3.3786, -5.1283, -8.1296, -5.2962, -3.6866, -2.9527, -5.7803,
        -6.7446, -1.4837, -3.6196, -2.6069, -3.0782, -2.0025, -5.1115, -3.6792,
        -1.8619, -5.6524, -5.7034, -3.0692], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4469, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3479, -7.0753, -4.2700, -7.5520, -4.2077, -1.0942, -9.3290, -3.7303,
        -3.5079, -7.2048, -5.3793, -0.9771, -7.0206, -2.3352, -1.8225, -4.5206,
        -3.9823, -0.4484, -2.9752, -2.8366], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1308, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5842, -1.8907, -4.3447, -3.1091, -2.1037, -2.5411, -4.9890, -0.9445,
        -2.9459, -2.8382, -2.8084, -0.4411, -6.3644, -3.1387, -3.0452, -3.4883,
        -2.9376, -2.0855, -6.0212, -5.4356], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2529, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.7137, -10.9395,  -3.3995,  -3.8967,  -5.8721,  -2.7601,  -1.5103,
         -2.1640,  -3.1202,  -3.8113,  -4.9467,  -0.4943,  -3.3580,  -2.3238,
         -3.2200,  -5.3135,  -4.8403,  -1.5204,  -3.8676,  -3.9174],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7995, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3434, -4.0170, -2.4648, -3.8770, -1.1211, -4.6474, -4.6794, -1.4666,
        -6.9427, -2.7701, -4.0582, -1.8762, -5.1385, -3.0952, -0.4331, -4.0406,
        -1.9666, -2.6026, -1.0659, -6.4410], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2024, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3004, -2.0628, -4.3276, -1.5158, -5.6925, -3.8028, -0.5544, -4.9656,
        -3.6067, -3.5086, -3.7385, -6.7894, -4.1359, -1.2560, -3.3822, -1.7302,
        -1.8063, -3.0832, -4.2769, -0.1449], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2340, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0784, -3.1821, -4.0268, -1.9401, -6.3841, -1.5458, -7.1476, -2.6334,
        -3.4646, -0.4646, -5.8328, -3.7356, -0.4307, -4.2949, -1.5304, -1.3792,
        -3.6127, -5.9278, -3.8463, -1.5941], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4026, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.8286,  -1.0105,  -9.6573,  -4.9397, -12.7863, -10.2489, -11.2006,
         -5.9409,  -7.6624,  -9.2536,  -6.3417,  -4.6144,  -5.4714,  -5.5015,
         -4.0857,  -3.8074,  -5.4805,  -0.3861,  -2.9075,  -3.9551],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0040, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.8281, -2.7688, -2.5399, -4.8077, -3.6359, -0.0695, -3.0841, -2.3672,
        -3.3418, -2.5822, -5.5587, -4.4250, -2.5694, -3.4717, -2.6612, -2.8277,
        -2.5646, -6.3226, -3.8617, -1.0810], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5184, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2555, -4.4725, -2.9157, -1.7986, -4.8658, -0.4939, -3.9844, -2.4757,
        -4.6411, -0.4921, -6.5440, -3.6005, -0.7787, -3.7128, -3.0366, -2.4659,
        -1.6225, -4.7403, -0.5405, -5.5845], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1011, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.9067,  -5.7137,  -5.1300,  -0.4954,  -5.2599,  -3.2250, -20.9736,
         -3.7259,  -8.1851,  -1.6754,  -8.7433,  -1.9665, -12.5426,  -3.1621,
         -5.2864,  -1.6493,  -5.1230,  -4.7801,  -2.2379,  -4.0792],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2931, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.2099, -4.9894, -4.1776, -5.5852, -6.9750, -5.0972, -5.9147, -4.1064,
        -7.3884, -2.5727, -3.2738, -1.7880, -4.5657, -2.5352, -3.0372, -6.2454,
        -1.5289, -2.9344, -0.7789, -5.5488], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3627, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.6066,  -5.0516,  -4.6107,  -3.0659,  -2.3948,  -4.0272, -11.2415,
        -10.8354, -10.5426,  -2.9729,  -6.1215,  -0.5861,  -8.0332,  -3.4582,
         -3.2635,  -1.6841,  -5.4776,  -4.0467,  -1.1094,  -4.8958],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7513, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1703,  -4.6221,  -3.7368,  -5.9936, -10.7436,  -5.5473,  -3.0688,
         -8.8484,  -3.9015,  -0.7224,  -6.1803,  -4.4354,  -2.1411,  -4.7760,
         -2.9403,  -2.4405,  -4.5802,  -4.8270,  -0.3698,  -4.4400],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3243, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1387e+00, -2.1699e+00, -1.8009e+00, -3.6427e+00, -5.2756e+00,
        -5.5190e-03, -3.3075e+00, -2.6375e+00, -3.4349e+00, -8.2367e-01,
        -6.6838e+00, -3.9612e+00, -2.0441e+00, -3.2363e+00, -3.0120e+00,
        -2.6546e+00, -2.2717e+00, -5.4653e+00, -3.1318e+00, -1.2825e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0490, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.9807,  -3.1637,  -1.8900,  -2.4507,  -2.2390,  -5.0048,  -0.1589,
         -2.9388,  -2.6375,  -4.2492,  -1.3516,  -4.9239,  -2.8712,  -0.1292,
         -3.8404,  -3.2403, -20.7989,  -2.9160,  -8.7694,  -1.6444],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8099, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3496, -1.8933, -3.6370, -2.8334, -3.9410, -6.5424, -4.9060, -1.1250,
        -2.5349, -5.8503, -9.9234, -7.8856, -3.3665, -6.6942, -6.8289, -8.8186,
        -8.4672, -9.0151, -4.5844, -5.5437], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.5370, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.5196, -8.3483, -2.4576, -3.4691, -4.3285, -4.9606, -1.9033, -7.7394,
        -4.3179, -3.5207, -5.5137, -3.3939, -1.5207, -6.5779, -3.1221, -1.7066,
        -4.8789, -4.9182, -0.3233, -5.0361], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9278, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7916, -3.1367, -0.9417, -2.2788, -2.8307, -2.3408, -7.1240, -5.2696,
        -0.7519, -2.9090, -2.4677, -2.5657, -3.9176, -5.2721, -1.3069, -2.1581,
        -3.3258, -2.6319, -5.6651, -5.4539], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4570, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6530, -5.7477, -5.1702, -6.3699, -2.1828, -8.3044, -3.5191, -2.0680,
        -6.6666, -4.6964, -1.5206, -6.5652, -2.1582, -0.8512, -4.6376, -3.5457,
        -2.4175, -4.0489, -3.1556, -2.3769], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9828, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.2832,  -4.7851,  -0.7058, -13.3768,  -3.9987,  -4.6680,  -3.6916,
         -6.6882,  -5.0513,  -2.9138,  -4.9631,  -4.7918,  -9.3866,  -8.4281,
         -6.9040,  -8.2401,  -2.5684,  -6.7286,  -1.7416,  -3.9483],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4932, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7452, -5.6640, -0.6836, -4.5542, -2.1372, -1.5679, -4.3280, -4.3258,
        -0.5162, -3.8016, -2.9383, -3.8262, -4.0788, -6.0657, -3.4542, -2.4728,
        -5.2251, -1.3808, -1.6278, -5.1663], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4780, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.6838,  -3.9988,  -0.0301,  -3.0269,  -5.4931, -15.8407,  -6.0998,
         -3.2448,  -8.5273,  -5.0853,  -5.6681,  -1.4062,  -5.6200,  -4.8531,
         -3.1851,  -2.0877,  -6.7466,  -3.6562,  -1.7727,  -2.3234],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9675, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0462, -0.1878, -3.3101, -3.0601, -2.9840, -0.0471, -5.5492, -3.3405,
        -0.8149, -3.9029, -3.0632, -3.7492, -0.6695, -6.6324, -4.4679, -1.7713,
        -3.7810, -2.6060, -2.3813, -4.9811], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0673, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7590, -3.1650, -1.6107, -5.5849, -4.8034, -1.4128, -2.6730, -2.9380,
        -4.4724, -5.8818, -5.7540, -0.6418, -2.3552, -2.2689, -3.1692, -0.0426,
        -6.3261, -3.3136, -1.7003, -3.7033], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4288, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6016, -6.5294, -6.8919, -8.5513, -2.5301, -8.8854, -0.7682, -4.2158,
        -3.9020, -4.1001, -3.2548, -5.7060, -6.3398, -2.4257, -3.8518, -3.2068,
        -3.7582, -3.6769, -4.9529, -0.6252], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4387, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.8462,  -4.4360,  -5.8886,  -0.6856,  -2.8848,  -3.6779,  -3.7418,
         -9.0464,  -5.3244,  -0.9255,  -4.7188,  -2.2676, -14.8017,  -5.9723,
         -4.7097,  -7.0882,  -6.0769,  -6.0225,  -2.0498,  -3.7932],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7979, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4652, -1.5240, -5.3126, -3.2884, -0.1479, -1.8700, -2.6945, -1.8913,
        -1.4684, -4.9160, -2.0310, -2.3018, -6.3353, -2.4111, -3.3147, -0.6351,
        -6.0060, -2.9455, -1.2627, -4.0101], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8916, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6086, -3.1784, -6.2061, -5.4076, -2.5997, -6.4095, -2.7294, -1.5279,
        -5.5917, -3.2774, -0.0239, -6.5712, -3.0079, -3.9798, -3.3576, -6.2716,
        -3.9636, -2.6629, -3.1028, -2.6392], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7058, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3679, -5.5690, -3.1474, -2.6482, -5.4517, -5.6600, -1.8991, -4.9809,
        -4.0356, -4.3737, -0.7930, -6.8453, -3.5453, -2.0275, -3.9875, -2.2345,
        -4.0711, -1.0379, -5.9445, -3.9680], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6794, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7146, -6.4484, -4.9577, -0.5792, -6.6591, -2.6631, -3.1184, -5.7581,
        -4.8124, -0.8743, -2.8817, -2.6134, -2.8158, -3.9188, -4.9184, -0.5908,
        -4.7446, -2.9853, -2.3065, -6.3022], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6331, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.3957,  -5.3109,  -4.5990,  -5.6110, -12.9659,  -3.0398,  -5.5599,
         -2.7033,  -3.2600,  -2.9536,  -6.7354,  -2.1676,  -4.5069,  -3.4737,
         -3.7888,  -8.5720,  -5.3409,  -2.2185,  -3.6435,  -2.3641],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8105, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9754, -3.7155, -3.3569, -5.9560, -4.0097, -1.9873, -4.6150, -2.4217,
        -2.8028, -6.3014, -4.7528, -0.9620, -4.6984, -2.7470, -1.7736, -0.8150,
        -6.5584, -3.2529, -1.4224, -3.5862], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5355, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0292, -2.5547, -2.9631, -6.8500, -5.7044, -3.5026, -4.7253, -2.8594,
        -1.3163, -4.0257, -3.9080, -1.5158, -4.4186, -1.6232, -3.1094, -1.6580,
        -6.1615, -3.2888, -1.8226, -4.5280], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4282, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6739, -0.0178, -5.3677, -3.0766, -4.1808, -4.1597, -4.2261, -4.8589,
        -1.4778, -9.2232, -1.8898, -4.5538, -2.2273, -5.9901, -3.3231, -1.5800,
        -4.1089, -2.8540, -2.2059, -4.8124], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7904, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.9348,   0.2853,  -7.8038,  -5.1242, -11.4289,  -7.9103,  -6.2386,
         -7.1357,  -2.6683,  -6.2725,  -2.5655,  -6.1498,  -3.6275,  -3.8302,
         -3.4754,  -6.6425,  -3.5375,  -1.0260,  -3.2052,  -3.3487],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8320, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.4952,  -5.6405,  -5.5931, -16.7839,  -6.0945,  -7.3732,  -6.6967,
         -7.4446,  -5.3863,  -7.7897,  -2.7029,  -4.9055,  -3.1967,  -3.1324,
         -5.8341,  -5.6404,  -3.2316, -12.3255,  -3.4162,  -4.4768],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9580, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1009, -1.5789, -3.4112, -6.6749, -4.2027, -0.0151, -2.5284, -2.4331,
        -2.6176, -1.8042, -6.4553, -3.1471, -0.9958, -3.9397, -1.7450, -3.0887,
        -3.2905, -4.3547, -0.5307, -4.4569], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0186, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.8965,  -2.8258,  -4.6247,  -0.7701,  -5.2895,  -3.1237,  -2.8972,
         -3.5827,  -6.7864,  -3.8860,  -2.7802,  -2.2807,  -1.6899,  -2.0927,
         -6.0651,  -5.1699,   0.2738,  -3.9995,  -2.7130, -19.2406],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0720, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.7706,  -6.6602,  -2.8599,  -5.5847,  -2.1853,  -8.0172,  -3.8173,
         -1.5474,  -6.2825,  -5.4209,  -3.8764,  -6.5248,  -4.2013, -13.4377,
         -3.2103,  -6.9775,  -6.4469,  -8.1745,  -2.5388,  -7.4134],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6474, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0510, -4.9494, -1.2112, -6.2352, -3.8178, -1.6414, -4.5850, -2.4822,
        -4.8851, -2.4256, -5.9033, -3.5947, -1.3286, -3.3806, -2.7049, -3.5153,
        -0.9428, -6.2770, -0.1734, -3.5708], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3338, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9870, -4.3390, -4.6059, -0.4119, -2.5875, -2.6455, -2.5503, -4.4241,
        -4.6937, -0.4823, -5.2015, -2.8244, -3.0173, -5.3237, -5.0349, -1.5719,
        -2.3590, -2.8044, -3.8246, -5.0353], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4362, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.8750,  -5.5639,  -3.0123,  -8.5692,  -6.6298,  -7.0674,  -8.4076,
         -5.1178, -12.9162,  -1.5312,  -4.4061,  -4.5074,  -2.8230,  -1.3885,
         -6.2334,  -3.9725,  -1.5666,  -2.4810,  -2.3311,  -1.4690],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6434, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6719, -2.6774, -3.3660, -4.6828,  0.0622, -7.8035, -3.0497, -3.3824,
        -1.6882, -5.2802, -3.4084, -0.6736, -3.2922, -2.1781, -5.0497, -2.0487,
        -7.7928, -3.9263, -1.1965, -4.1222], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4114, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0917, -1.4882, -1.6301, -6.2248, -4.5616, -0.0939, -3.8098, -2.8215,
        -2.5464, -6.6198, -5.1738, -1.5932, -4.5284, -3.5593, -3.6801, -7.9735,
        -4.5568, -0.1572, -2.4866, -2.1898], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5393, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.4675,  -2.9326,  -3.1195,  -2.3714,  -6.1276,  -5.5115,  -1.7303,
         -5.8139,  -5.3132, -22.6157,  -7.0165,  -9.0829,  -7.5035,  -7.2437,
         -2.2386,  -7.7625,  -0.7446, -15.0545,  -3.2207,  -4.0041],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0437, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0357, -2.1333, -3.4609, -4.7234, -0.7173, -5.5042, -5.2606, -3.1267,
        -2.9663, -5.6722, -4.7529, -2.5365, -5.2454, -2.8276, -1.4067, -3.8222,
        -3.9618, -0.3035, -2.5206, -2.6242], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2801, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.2560,  -3.8074,  -1.6135,  -4.8239,  -6.6894,  -2.1985,  -4.8273,
         -2.2216,  -7.3359,  -4.9017,  -5.1303,  -6.2022,  -4.0032,  -4.9117,
         -8.2070,  -4.6687,  -1.6864,  -3.9135,  -1.5286,  -3.7183],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6323, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2361, -1.5525, -6.4401, -3.3305, -1.3696, -3.0823, -2.6966, -3.9749,
        -1.5571, -5.6009, -3.2570, -1.0646, -4.2158, -2.3692, -5.1124, -0.3267,
        -6.5242, -3.2095, -0.7350, -3.6493], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2152, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6793, -4.0843, -3.9560, -8.1620, -5.4581, -3.5959, -4.3536, -5.1843,
        -2.2535, -2.0107, -5.5476, -3.3527, -0.7212, -3.0273, -2.2899, -3.1647,
        -1.6023, -4.8467, -0.3904, -3.6389], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6160, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0014, -7.1574, -2.1172, -5.6445, -4.1225, -1.8116, -5.5854, -3.5679,
        -3.0719, -3.8301, -6.1673, -5.2034, -2.6412, -4.7078, -2.8814, -1.8321,
        -6.4755, -4.4514, -2.1450, -4.7202], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0568, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5971, -1.7312, -3.7076, -0.5928, -6.1829, -2.9101, -1.5985, -6.0788,
        -2.9616, -1.9973, -3.0214, -5.3951, -0.2015, -3.2003, -3.7142, -2.1694,
        -5.1601, -4.8156, -0.1986, -2.1426], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1688, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.7276,  -6.2799,  -2.3815, -28.9640,  -3.6390,  -8.6157,  -1.9128,
         -7.0324,   0.0967,  -7.5584,  -3.5250,  -5.5469,  -4.8749,  -6.7160,
         -4.3908,  -1.3847,  -5.0488,  -2.8466,  -2.6591,  -2.3654],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3186, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6930, -3.1739, -1.3344, -3.1322, -2.0718, -1.7568, -5.0627, -5.1008,
        -1.8102, -6.8602, -3.3283, -4.2656, -1.7176, -6.3420, -3.6170, -0.8956,
        -5.7925, -3.1652, -2.9799, -0.8991], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4499, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.3908,  -7.4868,  -6.0636, -11.8303,  -1.4813,  -4.3088,  -6.7984,
         -7.9012,  -4.3897,  -3.8875,  -5.5106,  -4.1779,  -1.0919,  -2.6648,
         -2.7360,  -2.8853,  -4.7457,  -5.1275,  -0.5444,  -3.8417],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6932, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.8076,  -6.1108,  -4.7354,  -1.6123,  -5.1136,  -3.8258,  -5.6822,
         -3.2317,  -5.6739,  -2.5530,  -1.6032,  -3.8385,  -2.1539,  -6.2760,
         -8.7992,  -4.7787,  -2.9096,  -5.6439, -17.7981, -15.8682],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7508, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9721, -5.4010, -4.8801, -1.1936, -3.5791, -4.5341,  0.0211, -3.0985,
        -2.8769, -2.7080, -1.9114, -5.1686, -1.8169, -2.8906, -3.3044, -4.4936,
        -4.7271, -7.1453, -2.9152, -4.4176], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4507, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3198, -5.2723, -0.0718, -4.1103, -3.0184, -2.4689, -6.0589, -3.4221,
        -0.9058, -5.5417, -3.4609, -3.4944, -3.0246, -6.1358, -5.2418, -1.7024,
        -6.5675, -2.0221, -2.1168, -5.5769], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7767, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.1507, -6.6589, -2.9413, -1.3562, -3.3103, -3.0787, -1.5772, -1.3728,
        -5.7093, -0.0340, -2.5588, -2.3794, -4.8023, -5.2414, -4.9340, -4.9893,
        -2.2833, -4.7009, -2.6972, -1.7275], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1101, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7134, -2.3845, -6.0156, -2.0078, -1.3050, -2.3632, -5.0753, -1.7741,
        -5.1132, -3.8656, -2.3275, -4.0765, -4.9834, -1.2251, -3.0318, -2.1686,
        -2.0730, -3.4000, -6.2806, -3.2928], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3738, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7700, -1.4288, -2.1278, -3.4641, -3.4716, -2.5833, -5.3629, -0.2777,
        -5.1210, -2.6750, -3.0850, -2.0539, -6.0890, -1.3351, -7.7644, -2.9869,
        -2.0108, -2.7524, -6.0360, -4.9744], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4685, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.8412,  -2.7554,  -5.4476,  -3.5074, -13.5601,  -6.3724,  -6.3645,
         -7.3855,  -2.4250,  -7.2559,  -0.9802,  -7.6332,  -5.2009,  -2.7054,
         -1.2961,  -5.1864,  -3.2285,  -1.8447,  -4.3186,  -2.3242],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6817, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5864, -1.3024, -2.4255, -2.3464, -4.9165, -4.3330, -0.1575, -4.7893,
        -2.3393, -1.4040, -5.9423, -4.7349, -0.0219, -5.3997, -1.5907, -1.5409,
        -0.9065, -5.5417, -0.9980, -0.4228], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6850, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2691, -3.1781, -6.2301, -5.7046,  0.0535, -4.7713, -3.7240, -3.3071,
        -1.0265, -5.4736, -3.4997, -0.7201, -5.2949, -1.8809, -2.7982, -3.8457,
        -5.1026, -0.6476, -3.5701, -3.1920], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3091, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5762, -5.8836, -5.8596, -7.1866, -3.4960, -7.3268, -4.8391, -6.5430,
        -4.1235, -5.7209, -3.1633, -4.7998, -7.8267, -6.0971, -2.0332, -5.3760,
        -4.4233, -3.4675, -3.1298, -7.4786], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.3175, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.2106,  -2.1517,  -3.9146,  -4.4624,  -0.8008,  -3.6615,  -3.3178,
         -2.3610,  -1.7254,  -6.1450,  -0.7442,  -2.6291,  -4.0677, -13.5949,
         -5.8777, -12.0918,  -3.8463,  -6.5253,  -1.7202, -15.4265],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8637, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7496, -3.0880, -2.8288, -3.3155, -6.6955, -2.2817, -4.0915, -2.8393,
        -2.8023, -2.6501, -5.5003, -3.4643, -0.8324, -7.3685, -4.1794, -3.4670,
        -2.0126, -5.3886, -0.2725, -5.9927], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6910, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3953,  -1.9126,  -7.2795,  -4.8533,  -1.4752,  -5.4268,  -3.0503,
        -14.2931,  -6.4391,  -7.1109,  -7.7477,  -8.7543,  -5.4337,  -1.0950,
         -7.5606,  -4.2862,  -2.6183,  -2.7845,  -5.6075,  -4.8856],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2505, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1295, -7.9740, -8.4498, -7.6725, -8.0603, -9.0518, -6.0324, -4.8099,
        -4.0622, -4.8567, -5.5725, -1.9367, -4.9633, -5.4060, -2.5305, -2.6052,
        -5.1930, -3.0850, -4.3126, -4.0423], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2873, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3578, -6.7431, -3.4962, -2.2852, -4.0255, -2.3609, -2.3001, -4.2976,
        -3.5032, -0.7180, -3.6705, -5.5143, -3.8120, -1.7017, -4.3661, -5.3453,
        -1.4131, -0.6512, -3.7669, -2.2183], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1773, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.3336,  -6.2940,  -3.2995,  -2.2394,  -1.7109,  -5.8369,  -3.5923,
         -1.0256,  -3.3882,  -2.7987,  -2.9764,  -4.2604,  -3.6120,   0.0805,
         -4.4476,  -3.4576, -18.2885,  -7.8003,  -9.0553,  -2.3005],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3319, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7012, -5.3473, -4.6684, -9.2340, -9.0607, -6.4395, -6.6977, -4.5634,
        -8.7125, -2.6435, -6.5952, -5.9938, -5.6187, -4.2023, -6.2223, -4.2081,
        -5.1246, -6.3631, -7.9700, -3.1509], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9759, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7845, -6.6857, -2.9054, -7.1164, -1.0248, -6.2212, -3.5646, -2.4537,
        -0.5952, -6.7481, -3.6905, -0.8746, -5.0077, -2.4716, -5.1222, -5.8010,
        -6.7955, -6.3715, -6.2136, -2.0069], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4227, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.8040,  -4.8570,  -2.1212,  -4.1637,  -5.8698,  -5.6239, -12.9247,
         -7.0929,  -1.5261,  -7.9488,  -3.5713,  -6.8615,  -4.6432,  -2.7730,
         -9.1180,  -4.3564, -19.2090,  -7.9978, -11.3567,  -3.8826],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3351, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1948,  -3.2374, -11.0081,  -5.5520,  -3.4392,  -4.1586,  -3.4218,
         -1.9455,  -2.3134,  -6.0295,  -0.3166,  -2.7158,  -3.3645,  -4.7890,
         -6.1726,  -6.5721,  -1.5831,  -3.9429,  -2.5307,  -4.0110],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0149, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5388, -5.5568, -4.3211, -4.2187, -2.8508, -2.4849, -7.7464, -4.4473,
        -0.3544, -6.6821, -2.1499, -1.0311, -3.7167, -3.7982, -0.5246, -4.9936,
        -2.1403, -3.1077, -4.2541, -5.3754], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6646, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.0215,  -5.7720,  -9.6526,  -4.9660,  -5.0482,  -2.4231,  -1.8534,
         -6.0946,  -1.5990,  -3.5572,  -1.7099,  -5.8798,   0.3696,  -2.7275,
         -7.0581,  -5.6163, -27.5455,  -3.3307, -10.1769,  -5.6791],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9671, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.0292, -2.5894, -2.2643, -3.3450, -1.9885, -5.9813, -3.4517, -1.0522,
        -3.7695, -3.5260, -3.6130, -2.0966, -5.4993, -3.9698, -0.9544, -3.9373,
        -3.1136, -5.6059, -1.3767, -6.5486], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2327, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5881, -4.6733, -2.3108, -4.6941, -5.9529, -2.0543, -7.4693, -5.1509,
        -0.5359, -3.5595, -3.8061, -4.0608, -1.8780, -5.1580, -3.0692, -0.5000,
        -3.9915, -2.4332, -1.8234, -4.7736], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5241, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.9009,  -7.1167,  -7.7178,  -2.5699,  -7.1680,  -1.2577,  -8.9813,
         -4.8276,  -1.6502,  -5.6048,  -5.1532,  -0.0619,  -6.2664,  -3.5299,
        -20.6577,  -7.3418,  -9.9232,  -7.8171,  -6.7526,  -2.0574],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2178, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1759, -3.7559, -4.0745, -4.1077, -2.9997, -3.0564, -6.1372, -3.3321,
        -2.2905, -2.7831, -5.6957, -0.6188, -5.6391, -3.5569, -3.3228, -1.9442,
        -6.0378, -4.8500, -1.4421, -5.6353], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7728, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.6775,  -4.9265,  -1.5578,  -1.7251,  -2.9096,  -4.8529,  -0.3912,
         -2.6962,  -3.0692,  -2.0365,  -4.7510,  -5.4337,  -0.1567,  -7.5353,
         -2.5535, -24.7486,  -5.2363,  -8.4505,  -7.0271,  -6.6836],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8709, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4832, -3.4052, -4.2873, -5.1092, -0.9707, -2.3648, -2.8712, -1.4674,
        -5.1864, -5.4542, -0.6380, -6.3938, -3.2845, -8.0295, -9.4819, -8.2235,
        -6.8480, -3.0116, -5.3734, -1.5803], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3232, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2524, -4.2511, -4.5557, -0.8110, -4.3882, -3.2709, -2.2016, -2.0641,
        -6.6107, -2.9632, -1.0149, -3.1272, -1.6728, -1.1053, -3.6715, -4.3020,
        -1.8381, -4.0686, -3.1918, -2.6837], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0522, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3173, -1.5034, -5.2402, -4.5124, -0.3916, -2.9726, -2.0334, -1.9128,
        -3.0694, -5.4427, -3.7701, -4.6717, -3.2779, -2.2300, -7.2170, -4.4141,
        -0.9738, -3.3458, -1.8582, -2.3699], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1762, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.0153, -4.0232, -2.6426, -1.6311, -4.3125, -4.6663, -0.8056, -3.6909,
        -2.2272, -3.1005, -2.4929, -5.6889, -4.2504, -0.7627, -6.2329, -1.1334,
        -5.9589, -1.1885, -7.7045, -3.6957], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3112, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.4601, -5.9785, -3.1313, -6.6431, -1.0390, -7.4514, -1.7652, -6.0094,
        -3.0410, -2.8331, -3.3489, -5.1699, -4.8758, -0.8751, -3.9993, -3.2165,
        -3.2382, -3.0244, -5.9010, -5.1065], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3054, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2859,  -6.1642,  -3.9957,  -0.9602,  -1.9951,  -2.2056,  -4.1177,
         -0.5855,  -5.7757,  -0.6256,  -1.1220,  -5.4041, -25.3847, -13.1334,
         -7.2347,  -8.0367,  -5.9529,  -5.9520,  -3.8379,  -0.4392],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3104, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3807,  -6.8155,  -4.7014,   0.3054,  -5.3236,  -6.8194, -10.2614,
         -7.9780,  -7.0383,  -7.7054,  -2.7518,  -6.6573,  -7.9427,  -4.6906,
         -3.4117,  -3.9457,  -3.6541,  -4.8551,  -0.6549,  -7.1889],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2735, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8818, -5.3470, -4.0236, -5.3151, -0.5735, -4.6591, -4.1525, -3.7992,
        -2.2336, -4.5107, -3.9366, -1.7403, -3.4043, -5.4031, -2.9284, -2.8090,
        -4.7799, -3.9557, -0.9035, -3.7992], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6578, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.3518,  -4.0340, -21.1193,  -5.7017,  -7.3252,  -6.9635,  -9.0485,
         -3.2058,  -5.9338,  -0.8079,  -3.3393,  -4.0448,  -3.0942,  -7.5676,
         -4.5858,  -0.6212,  -3.1266,  -1.7723,  -2.4926,  -1.9081],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1522, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.5798,  -6.4526,  -6.1713,  -2.4443,  -4.2781,  -5.0048, -11.0452,
         -6.3123,  -7.4998,  -6.0939,  -4.0200,  -5.7734,  -1.1105,  -5.7557,
         -5.1874,  -2.8981,  -4.0051,  -5.9000,  -0.9158, -11.5060],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2977, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4524, -3.3556, -7.5043, -3.1848, -2.0709, -5.6250, -4.9370, -0.7474,
        -2.7191, -1.5975, -1.9483, -2.6019, -6.6164, -4.2692, -2.5269, -3.1884,
        -1.5631, -1.5709, -6.2238, -3.6157], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5659, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5118,  -3.2117,  -3.8897,  -5.9043,  -3.4857,  -0.4675,  -6.6461,
         -3.1691,  -4.6172,  -8.7079,  -4.1482,  -0.1731,  -5.0044,  -3.9018,
        -15.5015, -10.6210,  -7.2543,  -8.2241,  -6.5889,  -2.2270],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3128, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.7853, -3.3045, -2.5446, -4.6695, -4.5951, -0.2319, -3.1529, -1.9726,
        -2.9656, -1.9690, -5.0270, -0.6135, -3.2852, -2.3746, -3.1262, -5.7098,
        -5.7796,  0.2097, -5.4878, -3.2033], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4794, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.9504,  -3.3043,  -2.6396,  -5.6852,  -4.0449,  -1.0194,  -3.7837,
         -1.7751,  -2.0143,  -2.8137,  -6.4581,  -1.2488, -12.2348,  -4.3553,
         -1.9525,  -7.0711,  -4.9009,  -3.7751,  -5.7717,  -7.2644],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2032, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.7122,  -2.8306,  -6.8066,  -3.5062,  -4.2507,  -3.8804,  -4.2632,
         -8.1100,  -7.3507,  -2.3521,  -4.8796,  -3.0504,  -4.6875,  -6.0628,
         -6.1246,  -1.2180,  -4.0930,  -4.4288, -15.6672,  -7.9186],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4096, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7432, -2.0016, -4.4690, -4.3437, -1.6414, -4.6039, -4.3725, -2.9328,
        -2.1662, -6.4408, -3.6225, -1.9994, -1.9408, -2.4966, -1.6726, -6.6255,
        -4.8264, -0.4650, -3.4675, -2.8027], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2317, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7671,  -1.9280,  -1.4284,  -6.7914,  -3.4420,  -2.4189,  -1.4380,
         -2.2989,  -2.7569,  -2.2771,  -5.4380,  -0.7996,  -4.8181,  -4.9640,
        -17.4376,  -4.3692,  -9.1757,  -3.7830,  -5.9590,  -0.6392],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2465, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0943e+00, -2.0904e+00, -4.6914e+00, -3.9326e+00, -5.1104e+00,
         1.6832e-01, -2.5262e+00, -4.0348e+00, -1.4860e+01, -1.4063e+01,
        -4.8937e+00, -7.6898e+00, -3.0870e+00, -4.9812e+00,  9.9961e-03,
        -7.1498e+00, -2.8565e+00, -3.5065e+00, -2.8095e+00, -4.0243e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7612, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.3085,  -2.4758,  -8.4548, -18.4236,  -3.3328,  -6.6890,  -5.8770,
        -17.5524,  -7.1759,  -6.5992,  -6.8103,  -2.0948,  -6.2279,  -4.3151,
         -1.9678,  -7.4943,  -2.9694,  -1.5382,  -4.1343,  -4.8253],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2133, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1075, -9.6943, -9.9629, -3.0639, -7.5035, -7.0960, -1.9938, -9.1031,
         0.1984, -8.6185, -3.4601, -4.6572, -5.3515, -5.9734, -2.4329, -9.1468,
        -3.2030, -4.2463, -2.6341, -5.7112], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.3881, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1172,  -5.6445,  -4.1225,  -1.8116,  -5.5854,  -3.5679,  -3.0719,
         -3.8301,  -6.1673,  -5.2034,  -2.6412,  -4.7078,  -2.8814,  -1.8321,
         -6.4755,  -4.4514,  -2.1450,  -4.7202,  -3.0376, -10.5745],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2294, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4764, -6.1183, -5.1043, -2.7805, -4.2207, -2.2300, -2.4621, -5.1669,
        -4.7387, -1.0953, -4.0590, -3.6156, -2.5131, -5.4834, -4.8049, -0.9133,
        -3.8448, -2.7242, -1.7404, -4.1068], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5099, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9046, -2.3441, -1.3073, -4.9871, -5.0291, -0.1893, -4.8343, -1.1905,
        -1.9931, -5.4158, -4.0943, -0.4972, -4.6392, -2.5203, -0.6030, -6.0829,
        -4.4763, -0.7698, -4.6122, -1.3656], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9928, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1313, -0.9536, -5.7874, -3.3978, -3.2535, -0.1960, -5.7805, -3.3725,
        -1.5165, -6.5548, -2.4104, -1.1579, -2.4621, -5.4703, -0.0688, -6.1299,
        -2.5133, -4.7907, -2.3203, -5.7544], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4511, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6936, -1.4586, -1.6946, -5.2401, -0.7587, -5.2893, -4.6854, -2.6019,
        -4.6947, -5.1636, -2.0548, -3.5111, -2.8267, -2.6435, -2.7780, -5.3278,
        -0.0468, -5.1818, -2.5101, -4.4421], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2802, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-13.0839,  -6.1405,  -9.3753,  -1.7049,  -5.6751,  -3.5409,  -1.9019,
         -5.3942,  -5.4976,  -5.6030,  -7.3783,  -8.1497,  -0.4621, -11.6544,
         -8.3088, -39.1701,  -7.0970,  -7.4537,  -7.0067,  -7.9988],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.1298, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7150, -5.0549, -1.2985, -7.4480, -3.0653, -2.2045, -0.6350, -5.7440,
        -3.6364, -1.5637, -4.6848, -3.5684, -3.8810, -3.6395, -4.9480, -5.1891,
        -2.8608, -4.2613, -2.3457, -4.2252], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7985, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4879, -6.7346, -8.1316, -7.3013, -7.1004, -2.6589, -5.5902, -3.7714,
        -1.7486, -5.0969, -3.7545, -4.3357, -1.2465, -4.5852, -3.4413, -1.0261,
        -5.1042, -2.4248, -6.4842, -4.8949], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5459, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0224, -4.9418, -2.4435, -2.1201, -2.6043, -2.4229, -3.5667, -4.9021,
        -0.9025, -6.4652, -3.1971, -3.5214, -4.7578, -5.4446, -2.0641, -2.5201,
        -4.4932, -4.3829, -3.3305, -4.3424], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7723, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4912, -2.2207, -5.8893, -3.1776, -1.5929, -5.3419, -2.4726, -3.4781,
        -5.9092, -6.2589, -4.2545, -2.2574, -4.4902, -5.3055, -3.7386, -3.9493,
        -5.2231, -3.0422, -0.7847, -4.0408], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8459, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2151, -4.9036, -5.2976, -2.3411, -2.7559, -0.2328, -5.8524, -3.2874,
        -1.0187, -3.3152, -2.7645, -3.1438, -4.8461, -3.9441, -0.3808, -6.5734,
        -2.9601, -3.0568, -1.5140, -5.7563], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4580, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.6384,  -6.2060,  -4.0790,  -1.5010,  -7.6152,  -4.7386,  -2.8497,
         -8.8937,  -0.9361, -16.1934,  -5.4767,  -6.9976,  -7.9911,  -1.6394,
         -8.1535,  -1.3628, -14.7242,  -4.1902,  -2.1814,  -4.0545],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5711, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.1890,  -2.1524,  -7.0134,  -5.1987,  -1.7858,  -5.2195,  -2.1530,
        -11.3116,  -8.3312,  -8.1490,  -6.9949,  -4.8581,  -4.6560,  -0.8658,
        -11.1245,  -4.4156,  -1.7053,  -4.4530,  -4.3894,   0.5863],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0690, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4930, -7.1525, -0.7115, -6.5638, -3.2986, -4.7724, -1.9454, -6.9581,
        -4.1218, -1.0790, -2.3099, -2.7441, -3.4039, -1.5844, -5.6963, -3.8723,
        -0.9957, -3.8823, -1.2173, -1.4523], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3127, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.2343,  -2.9089,  -3.0986,  -2.3879,  -5.8686,  -5.3282,  -1.6339,
         -5.7960,  -5.2549, -23.0088,  -6.9948,  -9.1446,  -7.4849,  -7.1066,
         -2.1613,  -7.6057,  -0.5665, -15.3102,  -3.0802,  -3.9985],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9987, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.9055,  -5.1785, -11.0897,  -2.9545,  -2.9016,  -0.6126,  -5.4212,
         -3.9026,  -1.0238,  -3.3507,  -2.2411,  -3.3298,  -5.8087,  -4.6131,
         -0.4559,  -5.2059,  -3.4811,  -1.6636,  -2.0712,  -5.2094],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8210, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6681, -1.1949, -2.6414, -3.3098, -6.1345, -2.2692, -2.5633, -7.8817,
        -4.4539, -0.8116, -5.7689, -3.3622, -2.3148, -6.2039, -4.5297, -6.1002,
        -8.9037, -2.9684, -7.8860, -2.8726], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3419, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1806, -7.3895, -4.3830, -1.4725, -3.7525, -5.7224, -1.4327, -3.5635,
        -3.0935, -4.2931, -7.7728, -3.5700, -2.2346, -5.3486, -4.0762, -3.3518,
        -0.4875, -6.3362, -4.1199, -1.3322], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7457, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.1351,  -1.2938,  -6.8073,  -4.4837,  -7.3543, -11.8375,  -5.6617,
        -15.2367, -31.1090,  -9.6099,  -3.2082,  -3.9033,  -4.3226,  -3.7208,
         -2.4746,  -2.7808,  -3.7983,  -5.3680,  -1.0327,  -5.7209],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.6930, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.1166,  -5.1049,  -2.2485,  -3.0429,  -3.0934,  -3.9484,  -4.3273,
         -4.4823,   0.6072,  -1.8468,  -4.6394, -20.1995, -17.9225,  -9.0349,
         -2.2326,  -5.1175,   0.0384, -14.2853,  -4.1368,  -3.5366],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7335, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.3910,  -5.4412,  -4.0748,  -2.4763,  -3.0243,  -2.1736,  -1.8007,
         -5.7474,  -4.5455,   0.3905,  -4.7813,  -6.3471, -13.1847,  -6.1223,
         -7.4947,  -1.9248,  -5.7507,  -3.9415,  -0.7655, -11.3396],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5968, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0047, -4.1442, -0.4433, -4.4890, -2.6128, -2.7652, -1.9117, -6.1341,
        -4.2198, -2.1208, -5.4832, -2.8083, -2.3044, -1.6427, -6.8148, -3.2544,
        -2.1534, -3.8097, -2.9349, -1.6749], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3363, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5565,  -4.8870,  -4.8323, -15.3498,  -6.4346,  -5.7455,  -7.0463,
         -7.6055,  -7.8598,  -1.3986,  -8.2153,  -1.7387,  -6.2378,  -4.4610,
         -2.5203,  -0.8940,  -6.2460,  -3.1994,  -0.9501,  -2.5043],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0341, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7219, -1.6429, -3.9583, -2.0488, -6.3576, -3.0441, -1.5832, -3.4714,
        -2.4682, -2.2457, -1.2376, -6.9957, -2.4988, -6.7447, -3.4266, -3.4986,
        -3.2869, -4.0137, -3.7323, -0.3428], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4660, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3976, -2.8930, -1.4072, -5.2872, -2.8869, -0.4788, -2.8574, -2.6352,
        -2.0053, -1.8543, -6.1052, -3.9176, -1.9007, -4.7959, -2.0370, -3.4238,
        -4.7233, -4.0906, -0.4186, -2.6425], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0379, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.5812,  -3.6563,  -8.1931,  -7.4524,  -5.9661,  -1.7893, -12.2418,
         -3.3854,  -3.1715,  -6.3906,  -5.0761,  -1.7766,  -4.7673,  -2.0223,
        -13.2374,  -4.2994,  -4.6045,  -7.5213,  -6.3305,  -8.0254],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7744, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.6213, -6.1160, -3.3671, -0.7058, -3.6714, -3.0347, -2.3724, -1.6857,
        -6.4304, -2.9234, -1.0738, -3.2742, -2.4747, -4.2905, -0.1537, -6.2226,
        -2.8452, -1.0025, -3.0605, -3.7135], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9520, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8653, -2.8331, -2.4057, -1.0369, -5.7060, -2.9807, -2.4932, -4.1048,
        -2.3219, -3.0910, -3.9457, -5.0440, -0.6360, -2.9461, -1.9903, -4.2625,
        -1.9207, -5.2417, -4.3812, -1.2539], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2730, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4570, -2.8412, -4.0353, -4.0232, -3.0879, -2.7474, -3.3635, -1.1487,
        -5.5871, -3.5318, -0.2064, -3.6726, -1.8623, -1.8465, -2.9558, -6.6612,
        -3.6122, -2.0820, -3.5256, -2.6240], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1436, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4745, -1.5938, -6.8289, -2.9312, -2.7847, -3.3861, -5.2473, -0.5443,
        -2.2871, -2.6280, -2.2994, -1.4366, -5.8198, -2.6336, -4.7106, -2.5827,
        -4.1988, -1.0850, -5.6166, -4.3168], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3203, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.6220,  -4.1436,  -4.3183,  -4.9058,  -2.9534,  -6.4008,  -4.7377,
         -1.6770,  -5.1405,  -3.2940,  -1.6860,  -6.5589,  -4.6272,  -0.1990,
         -2.4790,  -4.1538, -11.5532,  -8.4918,  -8.3856,  -2.0754],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4202, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.6556, -3.3401, -1.4814, -2.0656, -5.6489, -4.1501, -0.2769, -4.4714,
        -1.3996, -2.0350, -2.3598, -6.6184, -4.0000, -2.0193, -3.3041, -2.5792,
        -2.0985, -4.0473, -4.7367, -0.3381], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8813, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.4246, -6.3179, -2.9876, -0.9974, -3.6783, -2.7309, -3.0285, -0.9119,
        -6.0719, -2.7662, -1.4202, -1.9728, -2.1694, -1.4390, -2.2301, -5.5800,
        -2.4471, -1.5996, -3.7289, -2.2451], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7374, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-12.6950,  -8.0548,  -6.1914,  -8.1008,  -2.9515,  -5.8395,  -0.9817,
         -7.8239,  -3.3292,  -2.9703,  -1.7603,  -5.0506,  -4.7318,  -2.0157,
         -5.6395,  -2.6399,  -4.5273,  -1.1028,  -6.0848,  -3.4942],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7992, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6493, -3.3502, -3.1494, -4.2627,  0.0624, -5.4498, -4.0328, -2.7990,
        -4.8684, -6.3181, -2.3491, -1.4049, -3.3904, -1.3440, -1.8815, -2.8729,
        -6.1489, -0.3697, -3.8781, -3.3125], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1885, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.9746,  -8.4225,  -4.8004,  -7.8591,  -3.1526,  -5.6280,  -7.0685,
         -4.9848,  -4.9830,  -2.6214,  -0.4031,  -5.8234,  -3.5469,  -1.2530,
         -3.2352,  -1.8981,  -3.1562,  -2.8444,  -5.4587,  -3.2797],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5697, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6625, -6.0375, -4.7768, -3.8322, -5.3629, -6.0320, -2.2717, -8.6195,
        -2.9729, -7.7185, -7.7074, -7.8764, -6.5018, -1.8081, -7.0094, -0.7895,
        -6.3981, -4.8842, -4.5209, -5.8166], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1299, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1280, -1.5559, -4.7765, -4.6106, -0.1796, -3.7213, -2.3586, -1.6541,
        -3.0835, -4.8128, -0.4173, -4.1691, -2.6313, -2.5126, -3.4611, -4.9474,
        -0.0855, -4.8985, -2.7530, -8.1186], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1938, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.1584,  -2.4101,  -0.3765,  -3.9652,  -1.9741,  -2.3981,  -1.9567,
         -5.7241,  -0.9695, -19.7021,  -2.5438,  -1.9799,  -5.0061,  -5.8174,
          0.5349,  -2.7361,  -6.0145, -18.4744, -46.2591,  -9.4261],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.1679, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3051, -2.9022,  0.1635, -6.0685, -3.9717, -2.2814, -0.3936, -6.0244,
        -3.2621, -1.1868, -3.6807, -2.8536, -2.0188, -2.5298, -5.3787, -4.5863,
        -2.1953, -6.9035, -2.0676, -2.3131], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2880, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.8348,  -3.3298,  -3.9819,  -2.0128,  -5.2940,  -3.0323,  -1.5622,
         -8.1164,  -1.4828, -13.1556,  -7.0156,  -1.8876,  -5.4238,  -4.4434,
         -2.4426,  -5.9555,  -5.0059,  -7.5037,  -5.0057,  -8.3960],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0441, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.9223,  -4.1853,  -2.9985,  -2.9912,  -1.0787,  -3.5212,  -6.4969,
         -4.1069,  -1.5684,  -4.3357,  -1.9611,  -2.5394,  -5.1058,  -3.6156,
          0.8617,  -5.1625,  -3.0895, -21.7601,  -6.3632,  -8.0850],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7013, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.6164,  -5.6414,  -4.7381,  -0.7775,  -6.2825,  -2.9453,  -2.9394,
         -2.5800,  -5.9484,  -3.5630,  -1.7554,  -4.3040,  -1.8258,  -2.7937,
         -3.7505,  -4.6192,  -1.7484,  -5.2618,  -3.6419, -12.3741],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0053, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2086, -1.7412, -2.5853, -4.2619, -5.5200, -0.8568, -2.5316, -2.3944,
        -2.3083, -5.1123, -4.4373, -1.4973, -2.4559, -4.0533, -2.3822, -1.5512,
        -6.5240, -2.7673, -2.7041, -3.8986], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1896, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.0575,  -4.4171,  -4.9749,  -1.2304,  -3.8496,  -3.1441,  -2.0886,
         -3.0842,  -4.4341,  -0.6022,  -2.3060,  -3.6289, -25.9137,  -5.3192,
         -9.3902,  -3.4351,  -3.6753,  -1.3226,  -3.6114,  -5.1835],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6834, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3632, -10.0091,  -4.0282, -11.3702,  -4.3147,  -6.6335,  -0.9139,
         -7.9955,  -7.1459,  -2.8479,  -3.6072,  -3.4072,  -4.7077,  -4.1067,
         -2.3788,  -3.7641,  -2.7588,  -2.6118,  -2.5455,  -4.4158],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6463, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.5427,  -5.8901,  -6.1475,  -1.2808,  -5.3662,  -3.9563,  -5.8021,
         -5.7764,  -2.4484,  -7.9806,  -5.2039,  -3.4637, -11.0273,  -4.1322,
        -12.3723,  -7.7478,  -4.3367, -29.2710,  -7.6476,  -7.8357],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.1115, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0951, -2.7590, -3.8308, -0.8975, -5.9900, -3.1730, -0.6907, -4.5453,
        -1.9563, -3.7424, -1.1146, -6.3839, -3.3642, -1.6570, -8.3376, -2.6615,
        -3.0835, -1.7490, -6.3442, -3.4695], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4923, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0007, -5.6745, -5.1611, -0.5969, -3.0383, -2.5163, -2.4329, -3.4413,
        -6.3986, -5.1023, -3.5802, -3.8895, -1.5146, -4.1806, -1.7300, -6.7044,
        -2.7194, -1.6886, -3.3540, -3.0436], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4384, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1320, -1.6537, -5.6828, -3.8023, -0.3903, -3.4892, -2.3514, -2.2810,
        -0.1687, -5.8062, -3.4223, -1.6966, -8.3953, -1.3000, -4.7806, -6.4905,
        -4.3296, -2.9235, -3.9540, -3.6529], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4351, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0184, -3.7172, -3.2700, -4.4159, -5.9895, -5.5569, -3.8134, -7.0036,
        -4.5086, -2.7084, -1.3836, -6.6500, -3.3552, -1.4823, -3.3564, -2.3761,
        -1.9535, -4.9423, -4.4532, -0.3732], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8664, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6788, -0.1667, -5.1482, -2.2803, -2.2389, -0.8238, -6.3682, -2.7937,
        -2.2849, -4.3157, -3.0938, -3.3076, -2.2392, -5.1513, -1.1126, -3.9443,
        -2.7939, -2.4492, -0.7496, -6.2178], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1079, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.9201, -2.4520, -4.8813,  0.7641, -4.8443, -3.0040, -2.7971, -1.1565,
        -6.1047, -2.7011, -0.2204, -6.7208, -1.5322, -3.2425, -6.1449, -4.9303,
        -0.6385, -6.5734, -3.3781, -2.8696], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5674, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.5019,  -1.7155,  -1.0631,  -2.1277,  -5.2297,  -0.1252,  -3.0473,
         -5.2349, -13.2460,  -8.0805,  -5.0873,  -6.6192,  -2.8520,  -5.2730,
         -3.4536,  -8.1641,  -4.2569,  -1.7920,  -3.5694,  -4.4240],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4932, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6858, -3.8430, -0.0856, -3.7368, -2.3136, -3.8611, -0.9087, -6.0202,
        -3.0906, -0.4926, -3.0583, -3.3094, -2.4854, -4.0765, -6.6300, -4.1607,
        -2.3739, -3.3194, -1.7172, -2.2308], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0700, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.7559,  -7.9116,  -6.6884,  -6.8205,  -2.7954,  -6.3877,  -2.2612,
         -6.5639,  -3.3452,  -2.9711,  -5.3868,  -4.8744,   0.3329,  -2.6867,
         -2.7576, -10.0061,  -6.8312,  -7.6235,  -7.9489,  -2.4006],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1342, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7309,  0.3155, -3.9485, -2.3434, -2.8726, -2.3661, -6.1923, -3.0581,
        -1.1670, -5.1555, -2.4435, -2.3675, -1.4691, -6.5245,  0.1917, -4.0232,
        -2.5612, -2.2943, -2.0658, -5.4002], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0238, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7863, -5.5707, -5.0393, -1.5109, -4.6473, -4.2394, -1.7581, -4.0432,
        -5.7459, -0.9501, -2.4378, -2.5050, -4.1064, -1.6506, -6.7301, -4.3431,
        -0.1895, -3.7700, -2.9222, -4.0922], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4019, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.2046,  -3.7917,  -1.0716,  -5.6251,  -1.3698,  -2.1919,  -2.7094,
         -6.2649,  -4.7093,  -2.1354,  -2.9417,  -3.0150,  -3.9228,  -6.3515,
         -4.8081,  -0.1294, -10.1732,  -4.6726,  -1.7480,  -4.9386],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9387, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8475, -2.0064, -2.8320, -5.5967, -3.9198,  0.4024, -5.6879, -2.6105,
        -0.7417, -5.2263, -2.7935, -0.2316, -3.0440, -1.9729, -2.2575, -1.9348,
        -3.7116,  0.0671, -3.6932, -3.3761], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7507, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.8810,  -4.6481,  -3.8483,  -4.9021,  -0.6522,  -2.8518,  -4.2927,
        -15.4216,  -4.6987,  -6.2988,  -6.0225,  -7.8710,  -2.5292,  -5.5189,
         -1.2289,  -4.5181,  -2.3357,  -5.4265,  -8.1388,  -7.9939],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0539, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.8549,  -4.2238,  -2.7381,  -1.6465,  -1.9713,  -4.6526,   0.2720,
         -5.1874,  -1.3610, -11.1230,  -4.5397,  -7.4299,  -7.5470,  -3.7966,
         -5.8394,  -0.6855,  -5.3272,  -2.9875,  -2.5291,  -0.9469],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9058, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.4028, -5.1257, -1.9434, -1.5506, -2.2257, -5.4782, -1.3085, -7.2969,
        -4.7274, -5.2280, -5.2436, -3.2626, -4.4821, -0.7467, -3.0689, -4.4116,
        -4.0925, -6.5449, -5.7038, -2.3773], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7611, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.5413,  -2.2268, -16.8185,  -4.3072, -11.0917,  -5.2055,  -5.3634,
         -4.3046,  -0.8141,  -3.6955,  -3.5568,  -4.9650,  -1.6667,  -3.2138,
         -5.3462,  -4.2696,  -2.8625,  -7.9650,  -3.2858,  -1.8046],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8152, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0635, -2.7669, -5.4805, -3.0579, -8.3322, -5.1827, -2.9029, -4.5792,
        -0.6611, -6.1689, -3.4538, -0.9837, -3.8312, -2.7007, -3.7810, -0.8834,
        -6.2680, -3.2012, -0.4938, -3.5571], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5675, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.7175, -4.8253, -1.7893, -3.2079, -2.1008, -6.4197, -2.7878, -0.6358,
        -3.1648, -1.5942, -3.1281, -0.1951, -5.8127, -3.2805, -6.9304, -3.9741,
        -3.2065, -3.4331, -6.0272, -0.6144], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1923, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2414,  -1.0972,  -6.2886,  -3.3340,  -1.1610,  -2.0113,  -3.9935,
         -2.7115,  -6.2904,  -4.2693,  -0.3686,  -4.3232,  -2.5036, -13.8512,
         -9.2715,  -8.1276,  -3.4786,  -5.0979,  -3.1327,  -0.8659],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2710, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2563, -9.1687, -2.6811, -2.3776, -6.6180, -4.4511, -1.6671, -4.1496,
        -2.6282, -2.6306, -1.9235, -5.5353, -3.0859, -0.5881, -4.5265, -1.7877,
        -2.6689, -1.5291, -4.9515, -3.3592], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3792, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4142, -2.9462, -4.2404, -2.2900, -2.4323, -1.3996, -5.0411, -1.2815,
        -3.7981, -2.8041, -3.7486, -0.9307, -6.3003, -3.0928, -2.2913, -4.6212,
        -2.3433, -3.5883, -0.8751, -5.3411], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1890, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4022, -8.0021, -5.8150, -1.9991, -3.7218, -4.5350, -3.3362, -7.5169,
        -3.8489, -6.6104, -6.4011, -8.5127, -5.4757, -8.9443, -5.4553, -8.0223,
        -6.2210, -7.3220, -3.3611, -8.2136], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.7858, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4400, -1.2510, -6.6341, -3.1490, -1.0376, -2.4386, -2.8562, -2.3834,
        -4.3715, -4.5713, -0.2790, -4.2655, -2.6132, -3.8158, -5.5557, -5.5505,
        -2.6041, -3.5503, -2.6117, -4.2796], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4129, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5220, -3.3119, -8.3672, -2.1754, -5.9826, -5.6382, -4.5986, -7.0719,
        -5.1703, -3.3325, -2.5960, -3.3924, -4.4854, -2.0031, -5.6601, -2.9788,
        -2.9452, -2.0311, -5.3648, -3.0989], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1363, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9340, -3.3689, -2.0447, -3.0737, -5.5479, -2.1285, -3.0333, -3.4747,
        -4.0204, -7.2578, -3.9702, -0.2598, -3.0769, -4.6728, -3.2287, -2.4061,
        -3.9624, -4.4849, -1.4324, -2.2149], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4797, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0164, -5.7169, -2.1163, -2.0061, -5.6371, -3.9977,  0.4631, -3.6046,
        -2.6391, -8.4227, -6.4914, -8.2350, -6.8534, -2.5546, -5.0640, -4.3506,
        -1.8909, -2.9176, -3.2857, -4.1802], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0759, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.4344, -4.0102, -2.5176, -1.4297, -1.8757, -5.4170, -0.1246, -5.9333,
        -2.1105, -3.8607, -1.5097, -6.7686, -2.7990, -1.1616, -4.0798, -3.2703,
        -2.6434, -2.1263, -6.8844, -3.4883], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0788, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9673, -2.8008, -2.2669, -5.4353, -2.7968, -1.1226, -5.6022, -1.6028,
        -1.3678, -2.4527, -4.0718,  0.1479, -4.0903, -3.9099, -4.4748, -1.9976,
        -5.5865, -3.9388, -0.5083, -3.9684], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0407, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1249, -3.6065, -2.6555, -4.3444, -6.0475, -2.1040, -4.9996, -3.3062,
        -2.6442, -2.8332, -5.6933, -4.0865, -1.9418, -4.2999, -1.3036, -3.6014,
        -3.8666, -4.4781,  0.0246, -1.9393], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4926, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.9509,  -2.2904,  -3.3307,  -5.0227,  -5.1424,  -5.0797,  -2.6288,
         -3.5130,  -3.8600,  -4.7468,  -2.7289,  -4.4781,  -1.0436,  -4.2328,
         -4.7018, -13.4192, -19.6388,  -9.0331,  -7.1539,  -6.8564],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6426, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.1393, -5.7331, -3.1133, -3.1029, -1.0006, -5.7122, -3.1779, -1.3625,
        -4.0557, -2.8096, -1.5953, -1.9532, -6.3060,  0.2399, -4.1080, -2.6417,
        -2.2539, -1.9371, -6.3075, -2.9214], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9856, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.1820, -10.6859,  -6.0445,  -9.0596,  -5.5544, -15.0860,  -7.5031,
        -12.1892,  -8.0959,  -5.6805,  -3.2964,  -3.9543,  -3.2624,  -5.6454,
         -7.0862,  -6.3751,  -7.2183,  -3.6011,  -6.5768,  -3.7006],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.9399, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1406, -2.9811, -1.3562, -3.0207, -1.9041, -2.7188, -4.1286, -3.7896,
        -0.2329, -4.8208, -2.1747, -1.6144, -5.4438, -4.6253, -0.3449, -3.4800,
        -2.6477, -3.4375, -4.7693, -6.2210], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2926, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9228, -7.3355, -3.2397, -3.7581, -1.9879, -5.0868, -4.3771, -0.7475,
        -3.2007, -2.8597, -1.5088, -4.8051, -4.6737, -5.4079, -4.0276, -3.2407,
        -2.7099, -1.6396, -3.5885, -3.0860], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4102, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7222, -3.3096, -3.0810, -2.0275, -4.6365, -4.9864,  0.4769, -3.1509,
        -2.8628, -8.6589, -4.7592, -9.2465, -2.7065, -6.2236, -1.6699, -6.8536,
        -4.0854, -1.5652, -2.0778, -6.6256], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9886, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.8947, -2.2650, -6.2491, -1.5520, -3.2857, -2.8347, -2.5807, -2.1886,
        -5.3473, -4.0236, -0.2738, -3.8305, -2.9989, -2.7171, -5.5383, -5.3199,
        -1.5064, -5.9331, -2.3405, -4.6319], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6656, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7013, -7.7404, -4.3475, -2.4632, -4.4039, -4.8292, -0.4674, -1.8459,
        -2.8821, -3.2721, -6.6942, -4.7847, -0.8849, -6.6919, -2.0348, -0.8349,
        -4.4824, -3.1678, -0.0951, -3.6427], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3633, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2453, -4.0457, -0.9081, -5.1387, -3.2959, -2.7407, -2.8379, -4.3063,
        -3.5259, -1.0082, -4.4751, -1.7768, -2.8838, -3.2522, -4.5354, -0.2154,
        -6.1117, -2.2076, -1.9855, -3.5247], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0510, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7314,  -5.5968,  -2.9831,  -8.3625,  -6.6428,  -6.9996,  -8.3462,
         -4.7698, -13.1981,  -1.2387,  -4.2590,  -4.4968,  -2.7663,  -1.3705,
         -6.2090,  -3.6938,  -1.4284,  -2.3692,  -2.3113,  -1.5448],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5659, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6113, -2.6471, -3.3532, -4.4581,  0.3399, -7.8646, -2.8884, -3.1633,
        -1.6469, -5.2415, -3.1425, -0.4495, -3.1053, -2.0718, -4.9804, -1.8293,
        -7.8790, -3.6658, -1.1831, -4.0999], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2970, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3362, -7.5448, -6.4861, -4.8711, -1.4253, -5.3622, -4.0267, -2.4079,
        -5.9018, -4.8541,  0.1531, -4.8611, -3.1897, -2.4394, -2.4688, -5.6994,
        -3.4682, -1.9434, -3.6714, -2.1097], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9457, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9111, -3.0470, -3.5149, -3.4483, -6.0339, -1.4305, -4.0584, -6.4925,
        -2.5658, -2.6020, -7.1189, -3.9714, -6.7114, -3.3475, -4.1239, -5.5435,
        -7.6147, -5.4919, -2.5027, -7.3495], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4940, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.2314, -5.6852, -2.8871, -2.4368, -6.3712, -4.5329, -1.2468, -8.3482,
        -1.9745, -2.2629, -3.4495, -4.8441,  0.1603, -6.0286, -3.9445, -6.1276,
        -4.4774, -5.7772, -5.6140, -3.3250], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9471, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9781, -6.0568, -5.9607, -5.2249, -2.1214, -5.2302, -6.1821, -2.0377,
        -7.8517, -4.2077, -1.2912, -5.9553, -3.7060, -3.9660, -2.8814, -7.1077,
        -4.2970, -2.6254, -5.7834, -2.9566], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4211, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0602, -3.8083, -0.9281, -6.0257, -4.3481, -3.5537, -2.0841, -5.6828,
        -2.7932, -1.6855, -3.5711, -1.7995, -2.2541, -3.4195, -5.0003,  0.0319,
        -8.1883, -3.6182, -1.0951, -1.6279], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2256, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0819, -3.7957,  0.5696, -3.9714, -4.7379, -2.8547, -3.6968, -4.9966,
        -4.1691, -1.7344, -5.4896, -2.0922, -3.3198, -0.6052, -7.2532, -4.1549,
        -1.4881, -3.4965, -3.2905, -2.9153], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3287, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.0786, -6.3776, -6.9212, -4.5268, -5.3290, -5.1154,  0.2931, -7.3202,
        -4.5640, -3.0285, -6.9487, -6.2280, -1.7279, -5.6091, -3.9451, -9.5396,
        -3.7807, -7.1904, -4.0429, -4.4718], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2226, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-15.7475,  -8.0734,  -7.2143,  -7.1791,  -2.4487,  -8.5379,  -0.1341,
        -11.1140,  -4.3435,  -2.7090,  -4.4222,  -4.9020,  -0.6434,  -6.1070,
         -3.6286,  -2.4135,  -4.7098,  -5.5905,  -4.7307,  -6.1746],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5412, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4582, -4.5461, -5.5377, -5.7420, -2.8510, -6.6801, -4.8955, -2.2309,
        -5.7091, -5.0758, -0.3429, -4.2967, -2.0446, -9.8607, -3.0226, -5.7474,
        -5.4974, -2.6314, -4.3817, -2.2655], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3409, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.6630,  -5.3297,  -4.7348,  -2.0804,  -7.2397,  -4.5077,  -0.3028,
         -6.6130,  -3.8173, -17.7561,  -5.1598,  -6.5666,  -7.1035,  -7.0051,
         -7.9648,  -7.9367,  -5.5796,  -4.2614,  -6.9537,  -5.0759],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9826, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5430, -1.7142, -6.0541, -1.4696, -2.3955, -6.0600, -3.6681,  0.2351,
        -3.8953, -2.1546, -2.9721, -4.5628, -6.2258, -2.9142, -1.7765, -2.5956,
        -1.8018, -2.8360, -5.6546, -4.3670], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3213, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.3627,  -5.3544,  -7.8090,  -6.7830,  -5.5984,  -1.6154, -16.4831,
         -4.5929,  -2.8618,  -7.0168,  -5.1091,  -0.9626,  -6.3274,  -1.7710,
        -12.4431,  -4.4144,  -6.9835,  -7.4511,  -7.0706,  -2.2029],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9107, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1044, -2.4059, -6.8448, -5.0335, -2.9788, -7.0318, -3.4793, -1.4177,
        -5.2508, -3.9754, -0.4916, -4.2512, -1.5422, -2.4897, -5.6903, -3.9352,
         0.1551, -3.0429, -2.5914, -5.2405], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4821, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.2466,  -1.8098,  -3.2030,  -3.0092,  -4.1841,  -5.4127,   0.2385,
         -2.1421,  -4.0543, -12.7374,  -5.7284,  -8.7996,  -3.3781,  -4.5710,
         -0.1891,  -5.0012,  -3.4118,  -1.7632,  -7.0156,  -4.4049],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0412, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7588,  -1.4183,  -5.9914,  -0.7171,  -3.9026,  -5.1791,  -8.6850,
         -6.7964,  -2.6242, -10.8072,  -2.0552,  -6.9097,  -0.9489,  -9.0255,
         -3.7972,  -1.2021,  -1.8799,  -7.5225,  -0.4074,  -6.3594],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4494, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9286, -1.3656, -4.4159, -3.8195,  0.6582, -3.7508, -2.8470, -3.0427,
        -0.6112, -6.5128, -3.7738, -0.8611, -3.0771, -1.5375, -2.9158, -4.3671,
        -4.6061, -0.1473, -5.0502, -2.8881], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8930, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-17.0173,  -3.4392,  -5.5387,  -7.7286,  -6.0235,  -8.7073,  -1.2508,
        -10.6742,  -4.6939, -11.5388,  -4.5085,  -5.5292,  -5.0583,  -0.3238,
         -2.0232,  -4.4482,  -2.6657,  -4.9347,  -4.2643,  -1.3380],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5853, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.6712, -13.6631,  -5.0919, -27.3603,  -6.0905,  -9.5093,  -5.9925,
         -7.1176,  -7.1531,  -9.2715,  -3.5836,  -6.0269,  -3.5092,  -0.7895,
         -6.4557,  -2.6646,  -3.2098,  -6.1174,  -5.3855,  -1.4345],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.8549, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6788, -1.9671, -3.9746, -2.9874, -2.4342, -2.7564, -6.3744, -5.3278,
        -2.7661, -4.0365, -2.1921, -3.3994, -1.9899, -6.2821, -1.1300, -6.7582,
        -2.5787, -3.9005, -1.9142, -5.4743], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6961, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.2961,  -3.1274,  -4.3024,  -5.5315,  -5.8939,  -1.9850,  -5.6606,
         -3.3960, -10.3123,  -5.8799,  -6.0732,  -6.2305,  -2.2047,  -5.8352,
         -0.6845,  -5.4291,  -2.5454,  -3.0387,  -5.8581,  -4.7856],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7535, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7828,  -4.0732,  -4.4763,  -0.3671, -12.4625,  -1.7383,  -1.4511,
         -5.4434,  -4.1683,   0.1597,  -5.0704,  -1.8940,  -2.4739,  -4.5928,
         -4.2964,  -1.4583,  -4.1120,  -2.3691,  -4.3486,  -4.0562],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5738, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0589, -6.1986, -3.9800, -1.4949, -5.0896, -3.4675, -3.3269, -4.8534,
        -1.9727, -4.6623, -3.9361, -5.8701, -3.2740, -1.2594, -6.9436, -1.8285,
        -4.3884, -1.1114, -6.9578, -2.9806], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7827, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4371, -2.8325, -1.0158, -3.5389, -2.7151, -2.2030, -2.4729, -6.0703,
        -2.8235, -0.7415, -4.0251, -2.5686, -1.9586, -2.3785, -4.4804, -0.3815,
        -4.9345, -3.4007, -3.2234, -3.4500], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0826, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3107, -2.0176, -2.6615, -5.9769, -3.0783, -2.1785, -2.8191, -1.4369,
        -1.6535, -5.3013, -3.9131, -1.9466, -5.2396, -3.0233, -1.9936, -5.5527,
        -3.3027,  0.3961, -2.9465, -3.7401], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0348, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.9357,  -2.3809,  -5.1315, -20.4113, -10.8357,  -7.1084,  -2.2010,
         -4.7634,   0.0639,  -5.4098,  -2.3513,  -3.1001,  -0.5425,  -5.3652,
         -3.4168,  -0.9824,  -6.1295,  -2.5951,  -3.4571,  -1.3180],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4186, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4371, -1.9958, -1.3643, -1.5795, -7.3569, -2.8721, -7.3931, -4.3033,
        -2.1723, -3.1683, -4.3029, -5.7410, -1.8274, -4.1230, -3.0482, -2.1017,
        -7.2024, -4.1812, -0.3714, -4.9917], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8267, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.2327,  -6.6523,  -6.1030, -11.7583,  -5.7599,  -5.5643,  -7.6332,
         -6.9126,  -4.9960,  -0.3366,  -6.9376,  -2.7373,  -2.5045,  -4.1046,
         -5.1042,  -5.2807,  -2.3133,  -8.1059,  -4.4240,  -2.2887],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9642, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.6987,  -1.9833,  -2.8089,  -3.3841,  -5.1234,  -0.4579,  -3.8298,
         -2.4480,  -8.3814, -10.5072,  -8.0755,  -7.5133,  -3.1675,  -5.3673,
         -2.9458,  -2.6930,  -3.5884,  -1.8909,  -2.0807,  -5.9546],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2950, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6326, -2.7764, -1.5906, -5.6027, -2.5529, -4.2265, -4.5379, -2.3556,
        -4.4366, -3.8066, -3.9322, -3.4183, -1.0713, -3.6415, -1.3093, -2.0776,
        -2.5075, -4.0454, -0.7661, -2.4614], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0875, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.0420,  -2.8295,  -5.3553,  -5.3051,   0.2910,  -5.3874,  -2.5033,
         -3.7939,  -6.0107,  -5.9352,  -2.3533,  -6.8006,  -3.7089,  -3.8179,
         -2.7701,  -3.4394,  -1.2712,  -2.9148,  -5.3245, -10.1641],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1218, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5663,  -6.1492,  -4.3860,  -2.4196,  -5.5452,  -2.9742,  -2.9417,
         -2.7447,  -4.2879,  -0.7603, -14.0764,  -2.5720,  -2.4297,  -1.6043,
         -6.5453,  -5.4394,  -2.7967,  -5.5061,  -2.4234,  -1.9970],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0083, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9589, -2.6704, -0.9807, -5.2463, -2.8949, -1.2863, -2.5783, -2.0415,
        -2.4812, -4.1727, -3.9796, -4.5830, -3.7801, -3.1045, -9.4573, -4.6009,
        -5.6476, -8.3356, -6.6451, -5.5794], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1012, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8858, -5.6172, -5.9811, -0.3418, -4.3697, -1.9908, -2.7565, -1.8915,
        -4.4844, -0.5319, -5.7800, -5.4873, -3.7139, -1.5297, -5.0264, -3.9443,
        -1.8325, -4.9097, -1.9279, -3.0589], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4531, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.4869, -5.4891, -6.1057, -7.6230, -8.1031, -2.4789, -8.6517,  0.4291,
        -5.2582, -4.9387, -3.8984, -3.1006, -6.6994, -4.8088, -4.4180, -4.0249,
        -2.6548, -2.0534, -2.0026, -4.3107], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7339, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9666, -0.8614, -2.2979, -1.9719, -1.9640, -6.3802, -4.2790,  0.3205,
        -4.2219, -2.2085, -1.7959, -2.9757, -4.8206, -0.5664, -2.9710, -2.3629,
        -5.0683, -1.1478, -5.4720, -3.8335], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9423, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8202, -2.9569, -0.5861, -5.6480, -0.8791, -4.3697, -5.8339, -4.3749,
        -5.4336, -5.1819, -4.0509, -5.8613, -4.2158, -6.5538, -7.6585, -3.8469,
        -6.9035, -1.2939, -4.3505, -4.0284], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2924, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7443, -3.6483, -5.2132, -0.2488, -5.5746, -2.5641, -4.6022, -1.0385,
        -6.8297, -3.1660, -1.3810, -3.2023, -3.7418, -3.7420, -2.2080, -5.9434,
        -2.8407, -1.0896, -3.6984, -1.9269], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2702, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6241, -4.4121, -0.8593, -3.1859, -3.9499, -2.1294, -4.0184, -4.6295,
        -0.2326, -4.9883, -2.1106, -3.5572, -2.4586, -6.5406, -4.2095, -1.8476,
        -2.4778, -3.5675, -2.4789, -6.2400], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4759, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.6602, -7.5654, -2.9091, -5.6338, -0.4230, -6.6698, -3.0435, -2.5950,
        -5.0015, -5.5371, -0.3551, -3.7800, -3.0792, -1.6225, -2.5182, -6.5769,
        -2.7671, -1.8932, -2.0385, -2.4191], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7044, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.9576, -10.6436,  -7.1585,  -7.3792,  -1.7223,  -6.7561,  -1.2178,
         -6.3695,  -3.1035,  -2.4203,  -3.6972,  -8.5533,  -0.1923,  -3.7007,
         -2.9544,  -2.4684,  -0.6331,  -6.6520,  -3.0555,  -0.5008],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2568, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6295, -1.6114, -2.4908, -2.0332, -0.6585, -3.7770, -5.0856, -0.0263,
        -2.5251, -2.0619, -1.4547, -3.0036, -4.1691, -2.1798, -3.2190, -4.3011,
        -4.0227, -1.3645, -5.7263, -3.2498], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8295, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.0372, -18.1366,  -4.4185,  -8.2762,  -2.0036,  -5.6458,   0.1585,
         -3.8375,  -3.6277,  -4.1482,  -3.7368,  -4.5347,  -4.3318,  -0.1730,
         -4.8213,  -3.9617,  -4.2886,  -3.0317,  -4.5111,  -5.1280],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5246, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-23.4830,  -5.7629,  -4.4624,  -8.7423,  -6.0380,  -7.4998,  -1.8955,
         -7.3409,  -5.7033,  -0.9979,  -5.9681,  -1.8110,  -4.0507,  -1.4413,
         -5.7087,  -1.5749,  -5.3191,  -3.5689,  -3.2400,  -1.7653],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3187, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.1584,  -7.3232,  -5.2761,  -7.8357,  -6.4778,  -5.9700,  -1.9286,
         -4.6732,  -3.6275,  -3.4579,  -1.9973,  -5.3190,   0.3699,  -5.2111,
         -2.5364, -10.8137,  -7.8651,  -5.6433,  -7.4614,  -2.5281],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0367, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5230, -4.8540, -5.7593, -1.3274, -4.3407, -4.8372, -9.7728, -5.1810,
        -7.7667, -3.1325, -6.1367, -3.4902, -2.2008, -3.8974, -2.7501, -4.6377,
        -4.8401, -6.4081, -1.7457, -5.2569], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6429, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.7620,  -2.3326,  -5.5335,  -1.9783,  -4.3670,  -2.8742,  -2.7970,
         -5.9576,  -5.3368,  -0.3936,  -4.4990,  -2.1739,  -8.8574,  -5.4955,
         -6.1666,  -9.0842,  -7.9188, -10.0092,  -5.5039,  -2.1218],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0581, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.0856,  -0.6614,  -3.9715,  -5.4269, -16.9457, -25.4053, -10.6197,
         -2.7937,  -6.4041,  -0.9610, -11.8110,  -4.6830,  -6.3993, -13.9809,
        -20.7249,  -3.9750,  -7.9594,  -2.2688,  -4.6925,  -3.1379],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.7954, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.4980, -15.2056, -10.7545,  -8.8059,  -2.4814,  -4.9208,  -1.3924,
         -7.5059,  -3.1549,  -2.9724,  -2.1916,  -6.3182,  -0.0367,  -5.1660,
         -2.3631, -16.2737,  -7.0251,  -6.2000,  -7.7211,  -2.7190],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8353, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7155, -1.3180, -2.4038, -5.1227,  0.4736, -3.1784, -4.4549, -9.2478,
        -7.4565, -2.6166, -7.7225, -2.3923, -5.6264, -2.4294, -4.3061, -2.6823,
        -3.4838, -4.8397, -4.6683, -0.4592], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8325, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6881, -4.8302, -2.2909, -4.1998, -4.4750, -3.2245, -3.0417, -6.7849,
        -5.2863, -4.3073, -5.3698, -1.9289, -3.5221, -0.7467, -5.2776, -1.4471,
        -6.8693, -3.1025, -3.7186, -2.5473], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9829, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5768, -0.9287, -2.3930, -3.1433, -5.5162, -4.9091, -4.6010,  0.1091,
        -3.0564, -2.7171, -2.2972, -3.3068, -4.1331,  1.0056, -4.5082, -2.0103,
        -2.8852, -1.5751, -6.3713, -2.5012], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9658, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2068, -6.0392, -3.2963, -4.0999, -2.9977, -4.6751, -0.0968, -5.8856,
        -4.0370, -4.0270, -2.8922, -4.4980, -6.3219, -5.0813, -3.7054, -3.4301,
        -3.9583, -2.4062, -2.7882, -5.9230], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8683, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.8950,   0.4278,  -2.3655,  -5.1767,  -3.9315,  -2.1438,  -3.9898,
         -5.6750,  -3.9136,  -4.4573,  -4.0215,  -3.0504,  -2.1164,  -1.5957,
         -5.1007,   0.4461,  -3.0158,  -3.6162, -13.8203,  -5.0009],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8506, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8191, -0.5452, -6.2053, -3.2525, -1.5275, -4.2439, -3.8831,  0.7749,
        -2.7220, -2.1278, -2.3853, -3.0808, -4.9105, -0.2999, -4.3484, -1.6862,
        -3.0784, -2.5986, -5.9475, -3.0872], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9987, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8776, -1.2221, -5.2408, -3.1167, -2.2795, -1.7067, -7.9394, -0.8054,
        -3.6403, -4.7158, -5.7176, -5.2699, -7.8754, -7.2173, -8.9583, -3.4863,
        -3.8160, -8.0262, -4.4540, -0.3024], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4834, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1201, -5.3423, -2.5050, -4.6912, -4.5272, -4.1087, -2.6661, -5.9380,
        -2.7229, -0.8506, -3.5955, -3.5683, -3.7169, -3.4323, -5.0370, -0.9255,
        -3.6940, -1.7842, -2.4371, -3.7803], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6222, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.1438,  -2.6544,  -2.2239,  -6.9599,  -1.6584,  -6.8968,  -8.4719,
         -0.2458,  -3.4319,  -4.8817,  -7.4918, -14.9996,  -6.7366,  -7.6594,
         -2.6300,  -4.6179,   0.3579,  -6.7880,  -3.0193,  -3.7825],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9968, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9698, -2.3605, -0.9899, -6.1883, -3.5533, -1.0253, -2.3907, -2.8180,
        -1.8861, -2.7727, -6.3637, -3.5634, -1.7344, -3.0097, -2.4500, -3.5463,
        -0.2131, -6.4124, -3.0115, -1.0526], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8656, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.7069,  -1.7006,  -2.3933,  -2.9059,  -5.0490,  -5.6653, -10.6633,
         -2.6713,  -3.5340,  -5.3473,  -5.1401,  -0.6415,  -6.3532,  -4.1754,
         -2.7048,  -4.1973,  -6.1132,  -4.9211,  -3.3660,  -8.0728],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7661, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.5664, -2.7342, -4.4417, -2.7446, -1.6013, -5.7394, -5.3655, -1.9782,
        -5.1601, -4.9284, -1.1168, -7.4461, -4.5140, -1.2970, -5.0746, -4.6467,
        -1.8827, -4.4265, -5.4475, -4.7074], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7343, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5370, -4.6853, -2.3600, -2.9654, -2.1067, -1.1948, -2.8455, -3.9967,
        -0.3643, -3.4824, -3.2760, -2.7948, -6.9305, -4.4790, -3.2943, -4.3887,
        -1.8143, -3.8967, -4.0522, -6.5905], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5527, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.8770, -17.2254,  -3.6398,  -9.3862,  -2.4218,  -7.1373,  -0.2018,
         -1.7831,  -6.6630,  -5.6508, -11.0949, -10.7887,  -9.5981,  -2.4711,
         -4.7977,  -2.0051,  -6.1421,  -3.1123,  -3.1807,  -7.4418],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8809, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3214, -3.9907, -3.0898, -6.9859, -3.6787, -2.5016, -2.1693, -2.6053,
        -5.3692, -2.9952, -4.3182,  0.0793, -7.2465, -1.3712, -2.5837, -1.7902,
        -6.3017, -2.5792, -0.9267, -3.6861], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2716, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1258, -3.9862, -3.1223, -3.7827, -1.7063, -5.6015, -8.5768, -2.1359,
        -3.8488, -5.7738, -5.2742, -2.3749, -4.8134, -8.4423, -2.7982, -6.1305,
        -5.2553,  0.4740, -5.0292, -2.9956], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2150, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1412, -3.2098, -0.4292, -2.7851, -2.3951, -3.6185, -1.2230, -5.6879,
        -3.0916, -0.3673, -4.7094, -2.9067, -1.1537, -0.9602, -5.3340,  0.0825,
        -6.9302, -5.3295, -2.9632, -2.7687], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0461, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6218, -4.1058, -4.5416, -6.1218, -4.4193, -8.2892, -5.3440, -5.0286,
        -3.2389, -3.0977, -3.7920, -4.4226, -6.3443, -5.9945, -2.3848, -3.6362,
        -2.9627, -3.1591, -5.3013, -6.1554], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5981, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.2857, -10.5684,  -7.6264,  -4.6714,  -8.3056,  -5.0702,  -4.5043,
         -4.4579,  -6.0236,  -4.9592,  -1.5741,  -4.7821,  -2.1456,  -2.9905,
         -4.9751,  -4.2619,  -0.3652,  -4.0705,  -2.3370,  -3.8649],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7420, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.7977,  -5.6731, -11.3596,  -3.9695,  -6.8230,  -1.3771, -12.3626,
         -3.2960,  -6.6818,  -9.6215,  -4.6490,   0.1602,  -5.0451,  -5.8144,
         -3.3325,  -2.8031,  -4.2854,  -5.6832,  -1.2363,  -2.8519],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3751, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.9260,  -5.6197,  -1.2194, -10.5326,  -7.9647,  -8.3432,  -6.7747,
         -2.7326,  -5.4676,  -6.9592,  -4.4649,  -4.2316,  -4.3168,  -3.0070,
         -5.1427,   0.6940,  -2.5683,  -3.0137, -10.7077,  -7.1092],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0204, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2284,  0.0740, -3.5954, -2.5440, -3.6136, -4.0393, -6.2905, -3.6762,
        -1.4894, -4.3295, -2.1356, -3.0568, -6.1915, -4.9443, -0.6828, -4.2377,
        -3.4244, -0.9218, -4.3324, -4.2661], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4463, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.3231,  -6.4003,  -2.3652,  -0.8171,  -2.2925,  -3.0498,  -2.4394,
         -1.6519,  -5.0435,   1.0104,  -4.0050,  -2.2216, -11.5180,  -4.5546,
         -5.5004,  -6.1486,  -8.0703,  -6.1792,  -7.0333,  -0.7186],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9661, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4609, -1.0040, -6.9978, -4.3475, -0.1278, -6.1582, -1.6711, -3.5745,
        -2.9348, -6.9415, -4.2526, -2.4528, -3.1545, -2.4717, -3.5128, -1.6148,
        -5.8462, -2.7968, -1.3815, -2.5090], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3105, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5131, -0.2698, -3.7446, -1.7571, -3.7423, -6.3875, -5.8379, -1.7026,
        -9.9682, -4.4527, -6.3528, -2.8231, -5.9313, -5.0245, -2.3735, -4.4002,
        -4.2668, -3.5506, -1.3531, -6.7476], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1600, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.2677,  -3.1469,  -0.9482,  -5.8153,  -3.4594,  -0.6140,  -4.0319,
         -4.1704,   0.2521,  -2.5796,  -3.1967, -16.3874,  -4.1942,  -7.9426,
         -2.2956,  -5.1628,   0.3670,  -4.8070,  -2.9060,  -3.3327],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0320, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.6693,  -3.0639,  -1.0910,  -9.6865,  -3.9365,  -1.9554, -16.1806,
         -8.7001,  -2.1582,  -5.7647,  -3.6764,  -3.9626,  -4.6692,  -9.1742,
         -6.7114,  -9.6416,  -7.7710,  -2.1118,  -2.5104,  -6.0045],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7720, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6449, -3.1758,  0.3416, -6.5960, -1.2361, -2.1563, -3.0646, -3.3077,
         0.7604, -1.3043, -2.5931, -3.7427, -0.9855, -7.7986, -3.1433, -2.1284,
        -2.7379, -3.0653, -2.3730, -0.7851], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7368, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7352, -5.7731, -4.1606, -3.7306, -5.0508, -7.3726, -1.2613, -4.3427,
        -3.6813, -2.7036, -5.0041, -5.8124, -0.8867, -4.9085, -3.2470, -3.5701,
        -4.3753, -5.8467, -1.2471, -3.7570], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9733, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.6421,  -3.2507,   1.1284,  -4.1410,  -3.0425,  -5.3623,  -7.0944,
         -6.0285,  -7.9180,  -4.9073,  -7.0409,  -1.0154, -10.6858,  -4.6890,
         -3.6396,  -6.0971,  -4.6712,   0.3880,  -3.4386,  -3.6685],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6408, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.0026, -13.8480,  -7.4515,  -7.7708,  -6.9326,  -2.1385,  -6.4515,
         -0.0870, -14.2262,  -3.6618,  -3.0809,  -6.4370,  -4.1231,  -3.4880,
         -2.3804,  -7.6090,  -6.1428, -10.2953,  -5.2403,  -7.5436],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0456, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.1357,  -6.2297,  -3.3580,  -1.1610,  -4.7659,  -4.9117,  -1.0184,
        -14.8294,  -2.2484, -13.2029,  -8.0872,  -4.7314,  -7.1937,  -3.2797,
         -5.0160,  -0.9416,  -3.7374,  -3.0790,  -3.0458,  -4.8676],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8420, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5134, -5.0406, -3.3879,  0.9658, -3.9149, -3.2224, -3.0388, -2.5415,
        -5.3150, -2.7513, -1.1976, -5.3875, -2.2854, -2.4695, -2.3139, -6.3110,
        -3.0259, -2.9728, -7.9851, -1.7767], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3243, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.9925,  -7.2016,  -2.0332,  -0.5678,  -4.9332,  -3.5592,  -1.3294,
         -4.5610,  -4.4846, -16.6018,  -6.9853,  -7.8976,  -7.2414,  -2.6539,
         -6.7655,  -0.8313,  -5.9904,  -4.8443,  -2.7900,  -4.9817],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0123, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.3642,  -5.6002,  -4.4851,  -5.0449,  -4.1411,  -4.9551, -12.0488,
         -6.1113,  -3.2787,  -3.7435, -10.1870,  -6.3526,  -6.4648,  -6.6555,
         -4.8532,  -2.5211,  -3.2733,  -3.4033,  -5.6570,  -5.7626],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4952, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6209, -1.8269, -2.7786, -4.1358, -4.6299, -0.2071, -4.4981, -1.6895,
        -3.0053, -6.4862, -4.4102,  0.7905, -3.5438, -2.1286, -2.3234, -1.3681,
        -6.3242, -2.3544, -0.1495, -3.6693], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8680, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-21.9664, -13.5630,  -9.8611,  -2.2908,  -6.2483,  -0.5533, -22.8059,
         -3.7959,  -2.2305,  -5.3842,  -5.7825,   0.1022,  -6.5977,  -2.1930,
         -9.9119, -10.6185,  -9.7524,  -6.3385,  -2.8212,  -5.8411],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.4227, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0882,  -2.7321,  -3.6589,  -2.5440,  -4.4456,  -4.2623,  -1.6047,
         -3.1537,  -2.8489,  -4.8948,  -5.9485,  -5.3978,   0.3833,  -2.8760,
         -6.3731, -19.2405, -22.2102,  -7.2413,  -6.9678,  -5.8541],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7980, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.4617,  -2.7815,  -2.5723,  -4.5125,  -1.2028,  -5.8917,  -2.8848,
         -0.8494,  -4.8328,  -2.3178, -18.1852,  -6.7943,  -7.3558,  -2.2728,
         -5.6926,   0.0282, -19.3776,  -3.0097, -12.7470,  -6.7844],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4788, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.8268,  -1.4996,  -2.6160,  -4.0055,  -0.5732,  -6.6939,  -3.7661,
         -0.7767,  -2.1379,  -3.1040,  -2.7686,  -7.4845,  -5.0743,  -1.6890,
         -5.4364,  -4.4178, -10.2500,  -5.6420,  -7.2352,  -6.8226],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1410, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9141, -2.3274, -3.1691, -4.3188, -1.1738, -5.0217, -2.7995,  0.1025,
        -4.6520, -2.7470, -1.9847, -5.0750, -4.5244, -2.3255, -4.3101, -2.6695,
        -2.9957, -2.2427, -5.9110, -3.1640], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1112, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0778, -3.0358, -3.7344, -3.8146, -4.1852,  0.5946, -3.8314, -2.6830,
        -2.7189, -1.8329, -6.0176, -2.7152, -2.3019, -1.3999, -2.4244, -1.7124,
        -5.7581, -4.3688,  0.3226, -6.0283], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9862, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.2015,  -3.5052,  -0.5359,  -5.7360,  -1.3334,  -2.1926,  -2.6841,
         -6.2981,  -4.5498,  -1.9816,  -2.9609,  -2.9567,  -3.7987,  -6.0553,
         -4.6354,   0.1318, -10.1919,  -4.5993,  -1.9380,  -4.8667],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8445, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8482, -1.9196, -2.9406, -5.3409, -3.6379,  0.5823, -5.4142, -2.5640,
        -0.8653, -5.1387, -2.5264,  0.0906, -3.1053, -1.8529, -2.1823, -1.8416,
        -3.4672,  0.4412, -3.4570, -3.1329], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6060, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.0705,  -1.4825,  -1.9392,  -2.9290,  -2.7044,  -4.1987,  -4.4723,
          0.1980,  -6.2325,  -2.8456, -18.1757,  -6.0788,  -7.9212,  -6.9471,
         -2.3927,  -5.0631,   0.6275, -11.2916,  -1.7252,  -3.9113],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6278, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9922,  0.0993, -5.2047, -1.9700, -3.2275, -1.4221, -6.6252, -2.7681,
        -2.0813, -2.2299, -1.5584, -1.4235, -1.5070, -5.4999,  0.6912, -3.2733,
        -2.2208, -2.0484, -0.8776, -6.3087], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6724, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3951,  0.9910, -5.1674, -3.6377, -3.7584, -0.5306, -4.7356, -4.1300,
        -1.1708, -2.7855, -2.3990, -1.5122, -4.2825, -3.8568,  1.2648, -4.4608,
        -2.1579, -2.6034, -6.4394, -5.3901], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0579, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4503, -2.6290, -1.6127, -3.9228, -2.8859, -1.6595, -5.3482, -3.6413,
         0.8641, -3.6035, -2.6795, -2.4264, -1.4106, -6.6349, -2.7632, -0.7452,
        -5.5172, -2.1442, -2.6560, -2.1683], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0017, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.5540,  -5.6625,  -8.1325,  -1.9021,  -6.9762,  -0.8628,  -4.6290,
         -3.2238,  -3.3872,  -4.6484,  -4.8283,  -0.3014,  -6.6246,  -3.3752,
        -11.5769,  -5.9451,  -8.4697,  -1.0378,  -5.5472,  -3.8206],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0753, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0051, -8.9400, -0.7293, -7.1154, -4.3343, -3.5218, -1.9102, -5.5254,
        -3.5688, -0.3942, -4.7519, -2.5619, -1.2092, -3.4791, -4.6054,  0.0181,
        -6.1973, -4.5632, -2.8860, -5.7954], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6538, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.4518,  -0.1151,  -5.0485,  -3.2570,  -2.3977,  -2.5364,  -6.3196,
         -2.8879,  -0.9095,  -4.1518,  -1.4872,  -2.8131,  -3.6131,  -4.3225,
          0.9987,  -5.4854,  -3.1902, -10.6403,  -7.1647,  -7.1445],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7969, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4244,  -2.0741,  -5.6537,  -4.3926,  -1.6021,  -3.7708,  -4.2203,
        -12.2860,  -3.1351,  -7.7098,  -5.3780,  -7.5098,  -2.3893,  -5.8964,
         -0.8104,  -5.6370,  -3.3254,  -3.1194,  -6.8985,  -4.4958],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6364, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4188,  -1.1663,  -1.6177,  -6.7868,   0.6098,  -4.3424,  -2.6233,
        -12.1580,  -4.5014,  -9.0449,  -1.0562,  -5.1981,   0.5727,  -5.3756,
         -3.5127,  -2.2794,  -2.2548,  -5.4990,  -3.8952,  -1.1354],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6842, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5911, -1.3859, -5.5071, -3.3463, -3.4162, -0.7381, -6.1062, -2.5934,
        -0.3246, -2.0465, -1.9128, -2.4555, -6.5507, -3.2235,  0.5609, -3.2362,
        -2.7990, -3.3522, -5.1159, -4.0906], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0616, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3920, -1.3054, -4.8328, -3.7970, -3.9856, -2.9245, -2.2072, -0.7571,
        -5.6040, -2.9053, -1.5034, -2.5648, -1.6758, -2.5391, -5.9424, -4.1232,
         0.1293, -3.9936, -2.0631, -1.8155], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8401, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8846,  0.3071, -4.6370, -4.8404, -4.5810, -3.3384, -2.8861, -9.0160,
        -4.1233, -2.7674, -8.6548, -2.9260, -3.7919, -3.6063, -6.3550, -4.7410,
        -1.4600, -3.0524, -3.8817, -2.5033], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1370, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0496, -6.0127, -6.4337, -4.7246, -5.3464, -6.3497, -3.8781, -5.1693,
        -7.0685, -2.2484, -8.4201, -8.9111, -8.7124, -3.2064, -5.2167, -3.7321,
        -7.6322, -5.9251, -4.9855, -5.5801], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.6802, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2772e+00, -3.2605e+00, -2.2165e+00, -3.8306e+00, -1.0426e+01,
        -7.5212e+00, -7.4982e+00, -3.5883e+00, -1.1945e+01, -5.5741e+00,
        -6.3247e+00, -7.1656e+00, -2.0620e+00, -5.2258e+00, -5.5313e-05,
        -1.5789e+00, -4.9341e+00, -2.7061e+00, -5.7556e+00, -5.3534e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9122, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8298, -4.1186,  0.7930, -3.5083, -2.3120, -3.2041, -5.1449, -5.6612,
        -0.9419, -3.1534, -2.4388, -1.4007, -4.1663, -3.7254, -0.1028, -4.2723,
        -4.8668, -2.7667, -4.9677, -7.1788], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2984, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.8102,  -7.6570,  -6.6003,  -2.7028,  -5.5946,  -1.9372,  -5.8919,
         -4.3049,  -2.9914,  -7.6647,  -4.5860,  -0.0443,  -2.7999,  -5.8472,
        -15.1065,  -8.6243,  -9.7588,  -1.7492,  -6.3222,   0.4660],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4264, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1720, -6.0138, -4.4044, -6.4435, -4.7985,  0.2809, -9.4110, -2.8967,
        -3.0215, -2.2317, -4.4866, -3.8997,  0.4481, -5.0767, -1.8209, -2.5018,
        -0.7156, -5.2938, -2.2033, -2.4895], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5076, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.5822,  -7.2313,  -1.8033,  -6.3520,  -1.4012,  -2.0207,  -2.7111,
         -3.4155,  -3.3814,  -4.5635,  -0.0805,  -4.1729,  -1.6461, -16.8386,
         -7.1055,  -6.7282,  -7.8569,  -2.4807,  -5.6380,  -0.0199],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6015, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.6484,  -3.3981,  -7.5801,  -4.5630,  -3.0166,  -4.1852,  -4.5908,
        -10.4342,  -3.9963,  -6.1133,  -4.2281,  -3.0201,  -2.4273,  -3.1990,
         -1.0057,  -2.5509,  -4.9273,   0.3701,  -5.4613,  -1.9504],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9963, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3943, -5.9771, -0.8231, -7.7029, -2.7949, -3.0851, -0.2156, -6.5790,
        -2.7671, -0.3500, -4.8130, -2.2310, -3.2913, -1.0082, -6.3296, -2.6453,
        -1.4516, -2.8634, -1.7826, -1.4894], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0297, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1431, -0.9778, -3.0570, -4.8015, -4.9152, -4.2819, -0.6710, -7.1217,
        -3.7859, -2.3621, -6.1205, -3.3243, -5.9607, -1.4548, -5.3773, -3.6936,
        -0.9253, -4.1833, -3.4506, -4.1378], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7873, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2085, -5.3589, -3.2753, -2.0770, -2.6951, -1.9962, -3.3697, -1.6766,
        -5.2610,  0.8021, -6.1605, -5.4797, -1.5763, -5.0251, -6.4321, -1.0212,
        -6.5038, -3.0348, -2.5978, -6.2547], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6101, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.0916,  -5.1925,  -3.1520,  -1.9987,  -1.7997,  -4.0799,   0.8770,
         -4.1097,  -2.1145,  -2.2555,  -5.0807,  -3.2839,   0.8197,  -2.6573,
         -3.2150, -10.2078,  -8.9452,  -4.7584,  -7.4261,  -2.7864],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6229, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.9008,   0.7962,  -3.6180,  -3.1939, -10.7217,  -5.2944,  -6.0996,
         -6.4151,  -2.6572,  -4.5942,  -0.2430,  -6.0027,  -2.4220,  -2.0542,
         -6.0224,  -4.6042,  -2.3221,  -3.3630,  -2.7335,  -2.3063],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9386, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1942, -0.3729, -4.7642, -4.4930, -5.9480, -1.2282, -5.4520, -3.6526,
         0.4489, -4.1610, -2.9977, -3.9552, -1.2590, -5.0484, -3.8355, -0.3266,
        -7.4118, -2.2235, -1.9209, -3.7265], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3261, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0502, -4.1603, -3.6448, -2.3283, -2.1676, -5.9486, -3.3624, -1.5723,
        -2.6773, -1.5038, -2.5582, -5.9552, -3.4679,  0.3882, -2.7075, -2.5400,
        -2.6091, -3.3221, -7.5603, -5.0226], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1885, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.7891,  -5.6412,  -2.5768,  -2.8399,  -2.3637,  -5.1792,  -0.1554,
         -6.2043,  -2.5992,  -2.7611,  -5.8323,  -4.1787,  -0.4807,  -6.4648,
         -5.1818, -10.3910, -15.5401,  -3.9352,  -7.4631,  -1.5294],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5264, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4332,  0.3200, -3.9119, -2.7634, -2.7836, -1.1460, -6.9026, -3.4067,
        -1.3248, -2.1870, -1.5897, -4.0295, -2.6084, -6.5216, -2.3224, -1.3643,
        -8.4505, -2.7722, -1.6082, -2.1522], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0479, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.8007,  -3.9417,  -3.0909,  -5.5681,  -4.3491,  -1.4982,  -4.3543,
         -1.8848,  -3.4642,  -3.0556,  -3.2120,  -5.6915,  -4.2256,  -4.3628,
        -14.3456,  -2.1068,  -8.7210,  -1.3584,  -5.3037,  -0.9583],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2147, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0683, -3.2911, -4.9286, -4.0550, -1.7464, -5.3497, -2.6541, -2.6717,
        -1.7588, -5.4265,  0.5267, -1.9922, -3.8948, -3.5854, -5.5298, -5.8687,
        -5.1981, -4.8009, -5.6338, -3.6372], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7282, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2679, -1.8297, -5.2417, -3.2609, -1.3021, -5.1889, -1.6194, -2.1291,
        -3.3746, -4.1110,  0.6788, -3.2613, -2.7378, -2.5701, -2.3963, -5.7959,
        -3.3391, -0.9784, -2.3236, -3.9738], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9011, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1466, -2.1377, -2.3653, -1.5066, -5.3501, -3.8936, -0.9816, -5.0012,
        -1.5579, -2.2926, -2.8552, -3.8842,  0.6319, -3.6212, -4.1231, -4.3017,
        -0.7752, -5.8049, -3.0370, -1.0070], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9005, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6884, -1.2580, -3.6193, -1.5918, -2.3520, -3.0230, -4.5092, -0.3772,
        -2.4732, -2.4174, -1.9030, -7.7400, -3.6697,  0.2279, -3.8389, -2.2947,
        -1.8691, -1.5460, -6.4436, -3.6850], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8536, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.1518, -2.4502, -1.9110, -2.4568, -2.6000, -6.4726, -2.6485, -0.1837,
        -3.7834, -2.1238, -1.6759, -2.6005, -5.3857, -4.8016, -2.4392, -2.6230,
        -1.6498, -1.3098, -6.7022, -4.4484], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9209, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.0024,  -1.3252, -12.3774,  -5.2264,  -7.6323,  -8.1265,  -6.2726,
         -6.1390,   0.0803,  -2.7945,  -4.7129,  -3.1212,  -6.3059,  -4.4242,
         -0.5213,  -4.1393,  -3.1974,  -2.5731,  -2.3229,  -5.1327],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6133, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8559,  0.4883, -5.8744, -4.1039, -5.5632, -6.5787, -3.4507,  0.0084,
        -4.1986, -1.7768, -3.0126, -2.9055, -6.2090, -3.4853, -1.2370, -3.9160,
        -1.6072, -2.8635, -3.2480, -5.3843], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3887, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9710, -4.4400,  1.1704, -4.2071, -2.6494, -3.0970, -0.8119, -6.5054,
        -3.6033, -1.4973, -4.0763, -2.7741, -3.8894, -3.8719, -4.8546, -3.0776,
        -1.7469, -3.0787, -2.3961, -1.9110], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0144, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9040, -2.7626, -0.6368, -6.2789, -2.6523, -1.5671, -2.9349, -2.2622,
        -3.7666, -1.9539, -5.9106, -2.7971, -1.4572, -2.9529, -3.2269, -4.5587,
        -2.0880, -6.1896, -3.1787, -1.0694], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0574, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3446, -6.3993, -3.8872, -1.0900, -4.5695, -3.9759, -3.7139, -2.7492,
        -6.0343, -4.0155, -2.3572, -4.1172, -2.1933, -3.8061, -1.5097, -6.3015,
        -2.9756, -2.4850, -7.9294, -2.2872], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7871, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.4274,  -5.3739,  -4.4798,   0.1055,  -5.2924,  -1.7476, -18.0613,
         -4.7462,  -8.3201,  -1.9359,  -4.7868,   0.0666,  -5.8918,  -4.2882,
         -4.0674,  -4.9068,  -6.3671,  -0.8983,  -3.8015,  -3.4161],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4818, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9660, -1.8681, -7.4653, -2.2904, -1.8742, -5.2544, -3.6123,  0.3808,
        -3.9678, -2.6645, -4.4902, -3.2042, -6.2595, -3.9659, -1.6416, -7.8009,
        -2.0945, -1.1637, -1.7559, -5.7750], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5867, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-23.2048, -16.3848,  -7.9595,  -2.0882,  -5.0290,  -0.0612, -13.7590,
         -2.7211,  -2.2731,  -2.2933,  -7.9111,   0.6415,  -5.8003,  -2.9613,
         -1.9381,  -6.1618,  -4.8113,  -0.1597,  -4.6554,  -6.7286],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8130, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7917, -2.9025, -3.7395, -3.7817, -3.9496,  1.0067, -2.4091, -1.9642,
        -1.3270, -4.1228, -3.3698, -1.6509, -3.1429, -3.0442, -4.8748, -6.1224,
        -5.1059, -1.1753, -3.2910, -1.8075], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0283, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7169,  -3.2261,  -2.1326,  -2.2451,  -3.3921,  -4.3659,   0.2286,
         -4.4583,  -2.7144,  -3.3098,  -3.3924,  -6.7602,   0.7706,  -3.3671,
         -2.9783, -12.6879,  -7.7171,  -3.3391,  -7.1113,  -3.1999],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9058, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.9197,  -2.9429,  -8.9274,  -7.3740,  -2.5264,  -4.9526,  -4.5710,
         -0.0379,  -2.8427,  -3.0184, -19.5396,  -6.5836,  -8.0005,  -1.0622,
         -6.4708,  -1.2007,  -9.3612,  -2.9692,  -2.9858,  -4.8537],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3070, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1059, -4.9236, -3.8526,  0.1186, -3.5045, -1.6680, -2.7905, -2.9321,
        -5.7908, -4.5674, -2.2955, -5.3023, -3.0897, -3.4853, -2.7489, -5.7599,
        -3.5134, -2.5368, -2.5904, -2.7995], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3569, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2433, -2.5408, -6.1505, -4.4436,  0.1530, -3.4756, -2.5887, -3.2948,
        -3.6845, -6.1746, -0.9261, -2.7951, -3.6927, -3.8423, -2.2530, -6.0784,
        -4.0042, -0.9952, -4.3026, -2.6720], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3503, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.6200,  -6.1314,  -4.1086,   0.0384,  -5.9552,  -2.8677, -14.6485,
         -6.5818,  -7.1537,  -7.2847,  -7.4358,  -1.6313,  -8.9686,   0.7041,
         -3.3825,  -3.4759,  -3.3163,  -4.3886,  -5.9781,  -2.4181],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8302, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.5029,  -3.8406,  -0.6700,  -1.7216,  -3.5877,  -3.3878,  -7.0647,
         -4.3077,   0.4334,  -6.3124,  -3.1371, -18.9832,  -4.7715,  -8.0966,
         -6.6686,  -7.4461,  -1.4405,  -7.1248,   0.4576,  -9.9294],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1551, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4065,  0.1620, -4.1359, -2.8642, -3.4385, -3.2380, -5.6473, -4.2472,
        -2.8869, -3.1728, -1.2953, -1.8115, -5.0305, -3.6126, -0.0432, -4.4314,
        -1.6451, -3.3473, -6.0947, -4.5158], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2851, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4853, -4.3795, -0.3429, -2.3462, -2.8859, -2.2407, -1.6875, -6.5730,
        -4.2477, -1.6297, -3.0377, -2.1341, -1.2977, -2.3887, -4.3833, -0.0593,
        -3.3871, -2.5360, -4.0638, -1.9869], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8547, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9569, -1.6800, -1.9665, -2.2941, -6.4824, -3.7680, -2.7222, -3.3137,
        -2.3994, -6.2626, -5.6556, -4.0511,  0.3029, -6.2624, -1.7433, -1.8380,
        -3.8791, -3.6829, -0.9565, -2.6937], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2653, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6295, -3.3752, -4.1001, -3.3496,  1.0116, -4.0866, -4.3196, -5.8350,
        -4.1102, -8.9561, -5.6802, -7.9386, -3.3224, -5.8138, -3.1324, -3.0535,
        -3.2882, -2.3919, -3.1807, -3.5635], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0058, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7492, -0.3580, -4.8383, -3.6892, -2.3102, -2.4363, -4.1585, -4.9476,
        -1.8300, -9.8075, -4.2445, -2.5166, -9.1887, -5.6768, -9.4695, -6.0814,
        -3.4815, -2.4560, -3.4627, -6.8991], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6301, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.9259,  -3.1434,  -3.5289,  -0.0148,  -4.5481,  -1.4461,  -1.9654,
         -3.6098,  -5.2423,  -3.0165,  -2.5141,  -2.6508,  -1.3556,  -1.7471,
         -6.5811,  -4.1422,  -0.0265,  -3.6524,  -1.7183, -11.4891],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2159, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.6346,  -4.3234,  -1.6796,  -4.4222,  -2.6845,  -3.9694,  -0.3200,
         -3.5914,  -6.0123,  -2.5511,  -2.9228,  -6.3226,  -3.1912,  -1.3011,
         -5.2988,  -2.9062,  -0.6697, -15.2670,  -3.6213,  -2.1773],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7433, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2113, -4.7637, -0.0829, -1.3980, -2.3003, -3.3225, -7.0930, -4.6616,
        -0.0906, -4.7138, -1.1650, -1.4529, -2.1727, -4.5688, -1.1298, -6.0805,
        -3.1490, -2.4245, -4.5366, -4.5020], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1910, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1979, -3.0957, -5.2137, -0.4544, -3.5933, -3.6140, -3.7052, -7.0742,
        -4.6939, -0.0947, -4.0373, -2.6158, -2.1407, -6.2517, -3.1476,  0.0742,
        -2.7172, -3.2694, -0.9900, -4.1389], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0986, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.0612,  -3.1673,  -0.4454,  -6.1438,  -2.8509,  -1.3870,  -5.4696,
         -3.1266,  -2.7896,  -0.9670,  -6.2623,  -2.6967,  -1.5531,  -5.9138,
         -2.2701,  -1.6481,  -1.8516,  -6.1675,   0.7685, -14.6754],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5839, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.2751,  -9.1543,  -4.5334, -18.9528,  -7.3608,  -4.0307,  -6.5633,
         -9.0011,  -6.9087,  -6.0645,  -2.9405,  -5.8654,  -0.8936,  -7.7048,
         -4.9990,  -1.9950,  -1.9841,  -6.9617,  -0.2408,  -4.8546],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6142, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4435, -2.1495, -4.3826, -5.7955, -5.0425, -2.3655, -2.9884, -2.6937,
        -3.1770, -1.4531, -6.2440, -3.1111, -0.9585, -5.8602, -1.5446, -2.3871,
        -6.7720, -4.1752, -0.9263, -5.6403], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6055, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.7018,  -5.5768,  -1.9750,  -2.7313,  -5.0647,  -4.5422,  -0.2159,
         -6.0306,  -3.5631, -20.7448,  -8.5182,  -9.8485,  -8.0383,  -7.1457,
         -5.2108,  -2.3792,  -8.5077,  -4.3923,  -2.6442,  -3.4313],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6131, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-25.1914,  -7.3187,  -9.7657,  -6.0206,  -1.0304,  -5.7595,   0.2238,
         -4.4852,  -3.5616,  -3.3598,  -0.5175,  -6.0853,  -3.5822,  -0.1793,
         -4.6913,  -1.9986,  -2.6405,  -2.3181,  -4.9385,   0.3360],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6442, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2115, -2.2770, -5.5268, -3.8063, -1.1425, -4.1252, -2.1638, -2.3872,
        -1.0058, -6.7139, -2.5619, -1.4210, -6.9034, -1.9260, -1.9388, -0.9914,
        -5.5973,  0.4385, -4.0126, -3.1094], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9692, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6231, -6.3479, -7.6717, -2.2260, -9.5179, -0.8721, -3.8549, -3.8697,
        -2.8954, -2.2043, -5.1035, -3.9207, -0.1987, -1.9202, -1.9671, -2.0880,
        -5.6017, -4.5499, -0.2095, -7.9094], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9776, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-12.2539,  -4.0891,  -7.0027,  -6.6165,  -8.3699,  -1.6179,  -4.7596,
         -3.2080,  -1.2278,  -4.7269,  -3.3749,  -2.9765,  -8.1257,  -4.7103,
          0.7023,  -2.8393,  -4.0367, -11.6059,  -4.9509,  -6.9962],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1393, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-12.5927, -10.4036,  -4.3233,  -9.7764,  -3.1479,  -4.7362,  -2.2839,
         -6.2847,  -2.1206,  -2.7547,  -6.0774,  -4.3265,  -0.4562,  -5.4292,
         -2.6276,  -3.1571,  -2.8446,  -7.9290, -10.5729,  -7.5152],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4680, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7547, -7.7145, -6.5473, -7.6826, -4.0939, -5.5290, -3.5312, -5.6048,
        -5.0087, -2.9890, -6.5002, -5.7267, -2.1618, -5.7027, -3.7585, -9.2411,
        -5.3445, -6.7590, -8.9490, -7.7904], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9195, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.6521,  -0.1303,  -4.2971,  -2.2538,  -2.6068,  -2.5025,  -4.2420,
         -5.8140,  -4.8311,  -3.1559, -15.9375, -17.7866,  -8.3629,  -3.9941,
         -3.9755,   0.2125,  -5.6177,  -7.0536, -22.9390,  -7.7269],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3333, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9683, -3.5540, -0.4659, -8.9980, -4.0550, -1.5323, -2.3310, -6.1207,
        -1.7511, -7.2282, -4.5321, -1.5667, -5.7574, -3.0941,  0.1670, -2.5114,
        -1.5886, -2.7020, -5.1850, -3.7678], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5771, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3085, -3.1802, -0.6418, -5.3235, -3.0885, -3.6539, -0.9987, -4.9992,
        -4.6106, -1.5569, -5.1580, -2.1584, -2.6500, -6.1956, -4.0109, -0.2952,
        -4.6001, -5.1680, -3.0469, -2.0961], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4371, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6286, -6.4905, -3.8731, -0.7142, -4.3815, -1.9167, -2.7398, -3.7482,
        -5.1286, -0.6963, -5.5601, -1.7853, -3.9890, -2.6117, -6.2744, -5.2227,
        -2.2584, -3.5645, -4.0569, -3.0532], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5847, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3739e+00, -2.7096e+00, -6.2142e+00, -3.1339e+00, -5.5134e+00,
        -3.7851e+00, -3.3206e+00, -5.7031e+00, -5.3230e+00, -5.8200e-03,
        -3.9055e+00, -7.1236e+00, -4.8521e+00, -4.5162e+00, -5.6042e+00,
        -4.3505e+00, -3.7357e+00, -5.6606e+00, -9.0647e+00, -4.2180e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8057, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2463, -2.5164, -2.2670, -3.7094, -4.8937,  0.7298, -3.7786, -2.6085,
        -4.3015, -3.5494, -5.4426, -2.6714, -1.1620, -1.6035, -2.8146, -1.4613,
        -4.4771, -4.4415,  0.0845, -4.5724], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9351, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3543,  -1.5784,  -5.7906,  -4.1225,  -1.5761,  -5.8427,  -2.4907,
         -1.5483,  -5.6224,  -4.6560,   0.1531,  -5.3297,  -2.6120, -15.3524,
         -4.6065,  -7.5547,  -0.9094,  -6.7514,  -0.3074, -18.5413],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9197, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.3686,  -4.0625,  -4.9310,  -2.8832, -10.1713,  -6.8197,  -4.3112,
         -7.5154,  -3.6170,  -6.3887,  -1.3630,  -4.1087,  -5.1789,  -4.8362,
         -5.6511,  -6.7756,  -1.6651,  -3.4914,  -3.3886,  -3.4466],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7987, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.4199, -3.8683, -2.9688, -8.6362, -6.2051, -7.4292, -6.8120, -4.8559,
        -5.4900, -3.2607, -1.9848, -4.3159, -1.8100, -2.9191, -4.9895, -5.3048,
         0.3889, -4.3717, -3.5014, -3.4509], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0683, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4279e+01, -7.3555e+00, -2.5438e+00, -7.0553e+00, -4.6883e+00,
        -5.4207e+00, -7.2352e-03, -4.4722e+00, -3.9537e+00, -3.0741e+00,
        -3.8454e+00, -4.8663e+00,  3.9143e-01, -3.0832e+00, -3.4352e+00,
        -3.3948e+00, -4.8937e+00, -6.5030e+00, -4.6960e+00, -2.1298e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4653, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.0431,  -6.3312,  -4.4958,  -0.6985,  -5.5156,  -3.2106, -17.7253,
         -3.3327,  -7.1682,  -1.1546,  -5.8920,  -0.6310,  -8.5569,  -2.8825,
         -2.9648,  -2.2846,  -5.6426,  -3.9017,  -1.6223,  -3.3765],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4715, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.8223,  -2.2448,  -1.9861,  -6.3372,  -4.7055,  -0.2831,  -6.9847,
         -5.7762, -23.9331,  -3.8354,  -6.6869,  -9.2571,  -6.5855,  -6.1222,
         -2.9292,  -5.8949,  -4.9267,  -6.4271,  -3.9157,  -6.4360],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0045, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8335, -6.3381, -1.1124, -7.9767, -3.2065, -3.4951, -0.1105, -6.7889,
        -3.3063, -0.7550, -4.0626, -3.1393, -4.0146, -1.8825, -5.3495, -2.9146,
        -0.5037, -3.4361, -2.0130, -2.7215], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2980, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.2296, -4.7413, -1.7795, -2.8816, -3.0186, -4.2018, -7.5404, -3.8491,
        -2.4542, -2.9007, -1.1937, -6.0967, -2.8403, -1.7203, -3.2877, -4.8722,
        -2.1543, -2.8205, -5.9607, -5.7065], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4895, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5466, -2.4727, -3.7942, -1.0823, -6.2714, -2.7702, -0.5818, -4.1922,
        -1.9043, -2.1766, -4.8493, -4.7497,  0.6881, -2.6474, -1.7764, -1.2464,
        -3.2151, -6.0117, -4.3286, -1.7689], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9849, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.4744,  -1.6290,  -5.0483,   0.8339, -18.4176,  -4.6786,  -1.8781,
        -16.0467,  -6.6622, -20.9644,  -7.4600, -15.6695,  -6.6334,  -2.6557,
         -4.5084,  -2.2550,  -2.9862,  -5.5480,  -2.1713,  -2.2693],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.7561, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9469, -6.9110, -5.1715, -1.3230, -3.5246, -2.0659, -3.9361, -3.9775,
        -5.3890, -1.1144, -5.0343, -2.6654, -3.0700, -4.1292, -4.7661, -0.0812,
        -2.0118, -2.3923, -3.1316, -2.2586], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3450, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1194,  -1.9775,  -2.8518,  -6.3045,  -2.8376,  -2.3085, -11.6372,
         -1.0532,  -2.5060,  -5.4946,  -4.5622,  -0.0387,  -3.7158,  -3.1163,
        -13.0285,  -6.8716,  -6.0956,  -6.4770,  -2.3513,  -5.1296],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5238, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.5796, -5.0464, -1.5183, -3.0829, -4.0333, -4.7770, -0.2085, -3.0638,
        -2.5212, -3.9118, -5.6675, -4.3268, -1.3100, -4.8027, -1.5369, -3.3077,
        -3.5752, -3.7799, -3.5899, -2.4979], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1569, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2635, -2.6198, -2.7530, -4.0563, -0.2854, -6.3497, -2.2698, -2.9097,
        -3.1617, -4.6598,  1.1809, -2.7692, -2.9112, -2.2695, -0.8723, -5.0450,
         0.3442, -6.7669, -2.3783, -3.2711], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8043, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3983, -6.4432, -6.9033, -6.7251, -1.4784, -9.7945,  0.4062, -7.9756,
        -3.9082, -3.6241, -1.4323, -4.8945, -4.2984,  0.2735, -3.9188, -5.3390,
        -3.4499, -1.5139, -5.9656, -2.5778], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2481, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7332, -0.7753, -5.4732, -3.6487, -6.8484, -6.0003, -3.5488, -0.9287,
        -5.8589, -1.4087, -2.4323, -2.2789, -3.8310,  0.4492, -5.4158, -2.0155,
        -3.3346, -6.0323, -4.1239, -0.1258], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3683, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3899, -2.4637, -0.9238, -6.0161, -2.4671, -1.4732, -3.1777, -4.4225,
         0.7134, -5.0392, -2.4247, -3.1963, -3.6472, -5.3722, -5.8667, -3.6057,
        -5.2500, -1.2073, -2.4215, -3.3848], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4018, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.1748,  -4.3297,  -2.5226, -11.4427,  -3.7564,  -7.2741,  -6.4366,
         -7.2288,  -1.1078,  -4.4524,  -2.0393,  -1.7651,  -5.4297,  -2.0703,
         -4.1480,  -5.5304,  -5.5097,  -1.7739,  -3.2020,  -3.9457],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1895, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1681,  -3.8823,  -3.2622,   0.4034,  -5.4003,  -6.0005,  -2.6943,
         -3.9562,  -5.3678,  -4.6532,  -2.2470,  -6.7724,  -3.0103,  -1.0495,
        -17.1348,  -5.5414,  -8.7801,  -5.0218, -26.1560,  -3.8901],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8292, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.3829, -3.6327, -3.6114, -2.8829, -4.3075, -4.5675, -1.6846, -9.6407,
        -3.3720, -4.3849, -4.1242, -5.6643, -3.0489, -0.8763, -3.6897, -2.6146,
        -3.1191, -3.5215, -4.0072, -0.3494], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8741, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2963, -2.9143, -3.7425,  1.2648, -3.2483, -3.3701, -4.0604, -5.4812,
        -4.2358, -0.0229, -4.7011, -2.1534, -1.3566, -2.6964, -4.5863,  0.6643,
        -2.8777, -2.6450, -1.6367, -1.7927], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5944, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.2871, -3.3504, -3.4354, -2.5498, -3.8710, -5.5319, -2.7665, -0.1722,
        -2.9708, -1.8045, -2.8476, -1.9263, -6.0253, -2.0809, -1.9305, -3.7540,
        -1.3359, -2.0408, -3.5358, -3.1875], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7702, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7516e+00, -4.9070e+00,  3.5472e-01, -4.6487e+00, -3.1850e+00,
        -3.6394e+00, -1.3927e+00, -5.0239e+00, -4.6434e+00, -2.4576e+00,
        -1.8761e+01, -2.6466e+00, -6.2265e+00, -8.5368e+00, -6.3053e+00,
         1.2711e-02, -5.1182e+00, -1.8978e+01, -1.9699e+01, -4.7213e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3637, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.6000,  -0.9448,  -3.2119,  -1.9356,  -2.0398,  -6.2689,  -4.1579,
         -1.2246,  -8.4705,  -4.6201, -29.9006,  -7.3781,  -8.3290,  -6.5353,
         -7.7249,  -2.3172,  -5.0134,  -0.5419,  -2.7284,  -2.1422],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4043, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1887, -2.2865, -0.5431, -3.8295, -3.4660,  1.1811, -1.7524, -1.8191,
        -2.3341, -5.2047, -4.5526, -0.2443, -4.6358, -2.7698, -1.6118, -1.9078,
        -5.6791, -2.3093, -1.5280, -4.6223], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7052, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8340, -3.6596, -0.8056, -4.4503, -2.4007, -2.5660, -4.8160, -3.8929,
         1.2576, -4.8020, -2.2083, -2.9387, -3.2066, -6.6596, -2.9858, -2.0091,
        -5.1209, -3.7373, -8.0546, -6.8675], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8379, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6804, -2.9838, -2.8066, -3.2987, -6.3492, -2.7153, -3.4394, -3.7424,
        -1.1465, -2.3814, -3.3942, -4.2392, -1.0006, -3.9614, -2.1516, -1.3352,
        -1.7315, -5.5239,  0.9882, -8.5988], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1246, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1153e-03, -4.3272e+00, -1.9056e+00, -1.0072e+01, -2.4256e+00,
        -8.4652e+00, -4.6618e+00, -2.8021e+00, -4.0610e+00, -1.8188e+00,
        -3.2600e+00, -3.3245e+00, -3.0387e+00,  1.1421e+00, -2.5987e+00,
        -2.4459e+00, -1.7441e+00, -3.8777e+00, -4.8581e+00,  3.3837e-02],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2259, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.0575,  -2.2782,  -2.1394,  -1.4535,  -5.4329,   1.1556,  -4.1178,
         -4.2618, -12.5412,  -7.5362,  -3.9763,  -6.9272,  -3.5344,  -5.0462,
         -0.3037,  -2.9520,  -2.2626,  -1.8906,  -4.5787,  -3.4004],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9768, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0626, -7.2502, -6.9141, -3.7187, -4.7009, -3.9836, -3.9584, -2.6127,
        -1.7371, -5.5349, -3.8345,  0.2580, -3.5675, -1.8314, -2.7070, -3.3770,
        -3.4240, -0.9160, -3.9428, -1.9236], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6369, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2786, -6.1192, -3.6024, -2.5012, -8.6058, -6.5988, -3.5269, -5.1026,
        -6.8678, -5.3566, -6.4341, -2.7335, -3.2974, -3.3047, -2.4726,  1.3120,
        -4.3821, -2.9861, -2.2660, -2.3887], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1257, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0389, -5.7697, -2.0992, -5.9273, -2.3863, -9.9107, -5.4882, -6.7471,
        -7.0845, -1.9879, -4.4937,  0.9812, -4.6964, -2.3022, -2.3611, -2.8962,
        -5.1388, -0.7055, -6.6759, -2.1190], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1424, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.4442, -6.4588, -1.9158, -2.6613, -3.7599, -3.5628,  0.4783, -3.0466,
        -2.2929, -1.0008, -3.1492, -4.2857,  0.4894, -4.7558, -2.4144, -2.9382,
        -1.8644, -6.1724, -3.6120, -2.2730], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7820, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.0388,  -5.6243,  -3.4729,  -3.0667,  -5.5408,  -4.2523,  -5.4461,
         -7.9520,  -6.8203,  -8.1402,  -5.5031,  -5.6763,  -3.8428, -10.2961,
         -9.1959,  -3.6700,  -0.9658,  -6.6068,  -4.8397,  -0.8470],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1399, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0480, -5.1239, -1.4939, -3.6799, -3.7841, -3.9306,  0.8864, -5.6292,
        -2.1761, -2.1994, -3.2191, -6.8573, -3.8948, -2.2275, -3.4305, -3.2420,
        -2.6039, -3.0952, -3.6451,  0.7394], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9827, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2126, -3.9446, -4.7268, -4.2150, -2.1441, -5.6753, -3.5265, -4.0287,
        -8.9150, -6.5303, -1.8173, -4.2135, -2.3658, -3.5273, -5.3504, -4.2074,
        -0.3175, -5.7489, -3.9945, -3.1734], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0318, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5528, -6.5153, -0.3789, -8.2459, -2.1655, -2.0004, -1.7246, -5.8397,
         0.8327, -5.0691, -3.0796, -7.2987, -7.5829, -5.5289, -7.2210, -2.5646,
        -5.0980, -1.4709, -4.8305, -2.3126], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0324, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0988, -2.7745, -1.9435, -2.1430, -5.4018, -3.1220, -1.1518, -3.6335,
        -3.5238, -3.1013, -1.4272, -4.7983, -0.1694, -4.3195, -5.2913, -2.1710,
        -2.3586, -4.7808, -4.5042, -1.5885], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1151, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3251,  -4.8360,  -3.9958,  -0.0475,  -4.0788,  -4.6724, -17.6662,
         -7.0733,  -7.5242,  -5.4360,  -8.2144,  -3.8577,  -5.1655,  -4.4729,
         -2.2665,  -7.8782,  -3.2543,  -4.3630,  -1.4933,  -4.2015],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1911, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8772, -2.4498, -2.7670, -1.7944, -3.5564, -3.0969, -1.2440, -3.9675,
        -2.7009, -1.8637, -2.6568, -5.5404, -5.2839, -1.9334, -5.5002, -3.2803,
        -2.2552, -7.4329, -4.2839,  0.4637], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1511, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2746,  -1.9520,  -1.7069,  -6.7292,  -3.1505,  -1.0677,  -2.7116,
         -2.6222,  -2.1400,  -6.8803,  -4.3991,   0.5830,  -5.9782,  -2.7191,
        -14.8526,  -6.2645,  -8.1907,  -6.3000,  -1.8456,  -5.3708],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3786, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2083,  0.0880, -1.8900, -2.1152, -5.0556, -1.0351, -5.1003, -2.7225,
        -0.6509, -5.2293, -1.8077, -1.2575, -2.0834, -4.8015,  0.8175, -4.1401,
        -2.2497, -2.8660, -4.8399, -4.6703], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7909, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.9773,  -9.7739,  -6.5442, -36.2354,  -7.4628,  -9.5285,  -6.5180,
         -6.2098,  -2.4026,  -7.3484,  -5.9596,  -5.0273,  -2.0665,  -3.4001,
         -3.0756,  -5.8160,  -4.3430,  -1.9325,  -1.1723,  -2.4034],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4099, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7441, -3.8898, -0.1070, -3.8957, -2.0613, -3.5629, -2.7579, -5.9478,
        -4.9564, -1.3407, -4.6593, -1.4339, -1.6954, -5.8779, -3.0113, -0.1157,
        -5.1236, -2.5335, -1.9827, -3.2050], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2451, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8741, -4.9855, -0.0589, -6.9375, -2.4935, -4.0602, -3.5923, -7.2902,
        -5.2799, -1.8490, -3.4416, -3.8937, -6.6497, -3.6435, -6.9028, -2.8037,
        -0.7543, -5.1025, -5.2354, -2.8222], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9335, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.9007, -5.2101, -2.2639, -3.7094, -5.3985, -5.6400, -1.9356, -5.8524,
        -3.4586, -3.4641, -2.5973, -3.2312, -1.2277, -3.1823, -5.1863, -8.9033,
        -3.1921, -3.7649, -6.4948, -1.8910], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7851, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.2140, -5.6487, -2.5179, -2.2090, -1.5527, -4.3749,  0.8167, -2.9281,
        -3.4903, -2.8768, -6.2950, -2.9514,  0.4727, -4.0510, -1.4551, -1.7910,
        -1.9543, -3.7092,  0.9069, -0.4349], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2915, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.0441,  -3.4206,  -1.2252,  -4.7541,  -1.9676,  -4.9833,  -2.2470,
         -7.4419,  -2.9546,  -2.0987,  -2.4336,  -4.7903, -15.9743, -10.5285,
         -7.1252,  -2.4304,  -4.9018,  -2.1225,  -9.7348,  -3.4381],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9808, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.9323,  -3.1850, -16.6228,  -7.9222,  -3.0695,  -3.6278,  -4.1010,
         -8.6894,  -9.3185,  -7.8786,  -2.5258,  -5.5769,  -2.8894,  -9.4978,
         -3.4105,  -4.2587,  -5.0631,  -4.6883,  -0.3570,  -5.1261],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6870, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7047e+00, -4.9027e+00, -9.9114e-03, -5.6935e+00, -4.4844e+00,
        -2.1309e+01, -3.9431e+00, -7.2090e+00, -1.3272e+00, -4.7422e+00,
         2.8537e-01, -4.8682e+00, -4.3401e+00, -1.8856e+00, -3.0575e+00,
        -6.7862e+00, -4.5530e+00, -9.8956e-01, -3.8597e+00, -2.1511e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4766, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3783, -1.9077, -1.1879, -3.0447, -6.4744, -3.2708, -1.6057, -4.0451,
        -2.4251, -2.6102, -5.5438, -3.1710,  0.2014, -3.1460, -2.2289, -2.5425,
        -2.8458, -5.4207, -2.1082, -1.3895], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9072, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2674, -1.8630, -2.1094, -7.2050, -4.1763, -0.1343, -5.7887, -1.2690,
        -3.3703, -1.1906, -4.0485, -1.3006, -5.3029, -2.4747, -2.6136, -2.4780,
        -5.9349, -5.7104, -1.3884, -2.6349], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1630, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5646, -3.6718, -0.2782, -3.6565, -2.3805, -2.3686, -4.6278, -4.6211,
         0.1341, -2.8424, -2.1201, -2.2067, -6.6094, -2.6917,  0.0302, -5.5248,
        -2.0372, -2.3534, -2.6926, -4.3704], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0227, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.4330,  -5.3532,  -1.0341,  -2.7243,  -4.8734,  -4.4515,  -3.9688,
         -5.6093,  -4.4942,  -0.8798,  -3.9484,  -2.5240,  -2.7529,  -4.2468,
         -4.2639,  -0.1150,  -4.2559,  -3.9515, -12.5104,  -6.8826],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0137, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9528, -5.3153,  0.2372, -3.2563, -2.4690, -3.3760, -4.1029, -4.2166,
         0.2063, -5.0175, -1.7380, -3.0603, -0.9773, -5.6488, -2.2238, -5.5974,
        -3.2436, -1.5070, -6.0755, -4.4079], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1871, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3692, -3.4521, -6.6705,  0.3329, -5.7908, -2.2440, -2.3350, -6.3261,
        -4.2191, -1.1281, -5.4475, -2.0294, -6.1175, -4.2178, -7.1043, -3.5571,
        -1.5425, -3.2408, -3.7580, -2.7201], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7468, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6558, -2.2721, -4.0951,  1.1216, -1.8398, -3.1820, -8.0113, -6.7125,
        -3.8403, -7.0868, -2.6397, -4.9183, -8.4932, -9.1414, -4.1014, -2.7463,
        -6.0732, -5.3174, -1.1732, -7.9066], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5542, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.3780, -5.7839, -3.2467, -2.2054, -2.6933, -5.3678, -3.3090, -0.5115,
        -1.2512, -2.5608, -1.8950, -5.0372, -4.5749, -0.0301, -3.3988, -1.8047,
        -2.1584, -2.6345, -4.6952, -0.2785], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6530, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5182, -6.1796, -2.3086, -3.8496, -6.3522, -4.9809, -2.5014, -7.4206,
        -2.6863, -2.2621, -4.5619, -4.3395,  0.2394, -4.3033, -2.7543, -3.1345,
        -2.5001, -3.3874, -0.5146, -3.6774], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6497, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5231e-01, -6.4271e+00,  9.3232e-01, -8.0545e+00, -1.9251e+00,
        -1.5429e+01, -7.6927e+00, -8.3917e+00, -2.1171e+00, -5.4262e+00,
         6.6280e-03, -5.5036e+00, -2.8036e+00, -1.5155e+00, -2.4750e+00,
        -6.3558e+00, -1.1852e+00, -5.0284e+00, -2.0761e+00, -2.1201e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2120, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.4902, -5.9614, -2.5206, -3.0985, -5.9793, -3.7988, -1.5033, -0.7588,
        -2.7442, -2.8318, -5.8242, -3.5994, -0.1935, -7.5761, -3.0932, -5.6310,
        -3.0284, -6.3159, -4.7142, -1.4745], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5569, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4270, -2.3114, -1.2411, -2.1640, -5.8135, -2.8675,  0.2783, -4.5310,
        -1.9662, -2.8032, -1.8269, -4.6748,  0.5068, -7.2748, -2.1306, -2.3836,
        -3.4832, -5.0896,  1.1233, -3.5918], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7336, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3693, -5.4681, -4.8823, -5.2561, -2.4765, -3.0321, -3.9577, -1.7620,
        -2.7096, -3.6093,  1.4004, -9.1382, -2.8674, -3.5509, -1.2655, -7.0716,
        -3.0896, -1.4107, -5.4320, -2.5224], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6235, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3306, -1.1067, -1.6997, -1.3205, -4.1428,  0.6046, -3.2767, -3.8681,
        -3.0741, -2.1311, -5.4928, -4.8036, -2.5497, -6.9921, -3.4234, -9.6196,
        -3.9387, -3.3565,  0.2576, -2.5550], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2410, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6557, -1.0307, -6.6908, -4.2291, -1.1937, -2.6131, -2.4488, -1.8269,
        -4.6774, -4.3094,  0.7397, -3.8182, -1.9218, -2.3414, -5.5810, -3.3724,
        -0.3934, -3.1241, -1.3902, -1.4418], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7160, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7639, -0.1435, -2.6381, -3.2157, -3.8833, -1.5035, -6.7147, -3.5702,
        -1.9072, -3.3888, -2.6606, -2.1251, -2.7438, -6.1317, -2.8244, -2.7010,
        -9.4682, -2.5578, -3.1492, -6.2361], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6163, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.1118, -3.2555, -1.5990, -3.7238, -1.9864, -4.6832,  0.2454, -2.7312,
        -2.3974, -5.0613, -4.2912, -6.1379, -3.3380, -7.5264, -4.5673, -1.9669,
        -5.2416, -4.7509,  0.1955, -4.6596], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3182, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2056, -1.5136, -3.1437, -0.3671, -6.0942, -2.4772, -1.1993, -3.4454,
        -1.4058, -2.5852, -1.7998, -6.5129, -2.6857, -1.1799, -2.8368, -2.7868,
        -3.1715, -1.5349, -6.5496, -3.2101], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9852, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.2712,  -3.7532,  -4.0318,   0.6659,  -5.7086,  -1.7609, -13.9281,
         -5.9861, -10.2119,  -7.5933,  -2.4481,  -6.4486,   0.3075, -12.7886,
         -3.5183,  -3.0385,  -4.4439,  -4.8702,  -0.1913,  -7.9710],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9995, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.6489, -11.1398,  -3.8668,  -4.4555,  -9.4702,  -5.6794,  -7.2150,
         -6.4161,  -6.5655,  -3.9879,  -6.9540,  -1.1424,  -5.2034,  -4.6174,
         -6.0786,  -5.7412,  -6.2916,  -6.5973,  -7.1831,  -3.6985],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8976, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9640, -2.2337, -2.0084, -3.2232, -4.4141,  0.7918, -3.9244, -2.9904,
        -3.1722, -2.7618, -6.3423, -2.4112, -1.9481, -4.2520, -2.9377, -1.9343,
        -1.0505, -6.8468, -2.3069, -0.7536], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0842, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4769, -4.3870,  0.1946, -2.8609, -1.6892, -2.0623, -1.7036, -7.4422,
        -4.4506, -2.2306, -3.5628, -2.2471, -3.4462, -4.2545, -4.2326, -0.8616,
        -3.9087, -2.0288, -2.0304, -0.7843], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9733, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3353, -3.2751, -5.8753, -2.1624, -1.3816, -2.4530, -2.6738, -4.4472,
        -2.2085, -4.5119,  1.4764, -4.2936, -1.7517, -2.9483, -3.3057, -6.0503,
        -4.2947, -1.3435, -2.4591, -3.0575], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9676, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.1669,  -9.0517,  -3.7337,  -3.8527,  -6.3421,  -5.0827,  -5.3853,
         -7.3842,  -5.9642,  -4.8952,  -4.5039, -10.2972,  -4.4844,  -5.2782,
         -8.9999,  -8.7466,  -2.9579,  -4.0741,  -2.9022,  -5.3021],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7203, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9161, -2.8726, -2.0779, -2.7430, -5.0970,  0.5231, -7.4944, -3.0024,
        -4.0266, -0.7987, -6.0022, -3.3122, -1.7125, -3.9164, -2.9204, -1.7962,
        -2.5151, -5.6960, -3.2997, -3.8322], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4754, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7172, -2.2717, -4.1038, -5.0368,  0.0335, -3.1841, -4.6237, -2.7877,
        -6.0945, -3.6112,  0.9963, -3.7693, -1.2481, -3.1304, -1.7185, -6.0394,
        -2.5139, -1.3088, -4.0054, -1.5910], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9363, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5916,  -5.1753,  -5.2012,  -1.1599,  -2.8661,  -2.2536,  -4.0821,
         -3.3218,  -4.5781,   0.7425,  -3.8936,  -3.6363, -10.4101,  -6.2626,
         -4.9950,  -7.3392,  -2.8130,  -5.6102,  -7.0361,  -3.3291],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2906, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2729,  -0.8519,  -3.5193,  -2.5488,  -3.1993,  -3.0516,  -6.9019,
         -3.6660,  -3.4139, -11.6610,  -4.4678,  -2.8473,  -3.4413,  -5.8408,
         -3.7064,  -6.5316,  -6.5868,  -3.2546,  -1.3241,  -3.6700],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1879, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.6689, -5.1540, -1.5741, -1.9567, -3.3654, -2.7674,  0.8757, -1.3030,
        -2.5642, -1.7319, -4.3324, -5.2665, -3.6089, -4.8472, -2.6780, -3.4893,
        -2.2416, -7.3176, -5.0920, -2.4826], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0783, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5234, -5.8392, -5.1974,  0.3933, -4.6673, -3.5616, -2.9618, -4.1899,
        -5.0024, -1.2179, -4.2837, -2.1594, -2.4366, -2.7535, -3.3748, -0.3742,
        -4.2956, -4.6228, -5.8305, -1.1547], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5527, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9026, -2.1421, -2.3902, -3.6171, -5.3558, -3.5579, -0.3615, -2.9866,
        -2.4889, -1.8040, -4.4470, -6.3843, -4.9962, -2.9559, -4.2377, -2.9462,
        -3.0729, -3.3255, -3.9638,  1.2285], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0354, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.5866,  -4.4058,  -2.6602,  -5.9950,  -2.2887, -17.6272,  -4.9165,
         -9.9701,  -6.0194,  -7.4049,  -3.0492,  -7.8467,  -1.1737,  -3.9761,
         -3.7891,  -3.5407,  -5.6378,  -4.4973,   0.6439,  -5.1713],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2456, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.4600,  -2.4132,  -2.5634,  -1.6793,  -4.9891,  -3.0488,  -1.8232,
         -2.6167,  -1.9482,  -2.2472,  -1.4452,  -4.5443,   0.1966, -10.8658,
         -2.3881,  -2.8143,  -2.6508,  -5.2002,  -3.1335,  -0.5518],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0093, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.0280, -2.4083, -4.3087, -1.7443, -7.7961, -2.8119, -0.1397, -2.9277,
        -3.4934, -5.2977, -0.7717, -3.8033, -6.9114, -3.4067, -2.9989, -5.1284,
        -2.4121, -2.2533, -2.3316, -4.8322], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6903, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.6571,  -0.4363,  -9.2847,  -4.3022, -18.5877,  -7.9330,  -9.0552,
         -6.4502,  -5.6945,  -1.9060,  -6.2626,   0.5394,  -4.1081,  -4.0052,
         -3.0581,  -2.9616,  -7.1841,  -5.0984,  -1.1604,  -4.7814],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3194, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.5998,  -1.8323,  -2.5789,  -5.2912,  -3.5790,   0.4055,  -6.2713,
         -7.3261, -26.0053,  -3.6807,  -8.4682,  -7.6311,  -7.0520,  -5.9976,
         -2.9347,  -3.3862,  -0.0688,  -2.7202,  -2.4339,  -2.2973],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2375, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.4380,   0.6822,  -4.1410,  -3.3736,  -3.2027,  -6.3724,  -4.0697,
         -0.9507,  -6.9292,  -5.1148,  -8.4849,  -9.6250, -10.1693,  -8.0780,
         -1.6442, -10.1815,   0.7501, -11.3493,  -4.7717,  -3.4832],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3473, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1108,  -3.8039,  -4.3975, -13.4782,  -5.0856,  -4.5262,  -4.0819,
         -7.7495, -11.9674,  -5.0584,  -5.4661,  -6.4554,  -2.6315,  -5.0049,
         -3.6387,  -1.0321,  -4.1454,  -2.0034,  -1.7011,  -2.9220],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9130, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.7807,  -5.1036,  -2.1582,  -8.7154,  -3.6471,  -2.9105,  -2.7677,
         -3.3842,   0.8026, -10.3916,  -2.1814,  -2.4695,  -2.5668,  -5.0300,
         -3.2866,  -0.6362,  -2.7827,  -3.1979,  -2.6999,  -1.1527],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5030, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0795, -1.7558, -3.9016, -4.1345, -1.1082, -2.0301, -2.1108, -4.8156,
        -1.2425, -7.7888, -2.6458, -2.1318, -6.8228, -4.8165, -4.6513, -0.6812,
        -3.5572, -3.3397,  0.2359, -8.6022], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3990, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1120, -1.8423, -2.3337, -4.1012, -1.2783, -5.3164, -0.0148, -5.2739,
        -2.7221, -3.1743, -1.1075, -6.0736, -2.5182, -1.9163, -7.4959, -2.3834,
        -1.9992, -2.2173, -6.2099, -2.0318], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1061, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.6524,  -6.6743, -17.5669,  -8.9900,  -8.3812,  -3.0634,  -5.7007,
         -3.0380,   0.1221,  -6.1952,  -2.0535,  -2.2158,  -3.5394,  -4.8647,
         -0.1803,  -5.0995,  -2.0916,  -3.8321,  -1.4434,  -5.7980],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8629, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3281, -2.2594, -2.7996, -5.3531, -1.4670, -2.3155, -2.0648, -4.9998,
        -0.6680, -5.1421, -2.3314, -2.5703, -3.5166, -3.7884,  0.9534, -6.6501,
        -2.2211, -3.0064, -1.9097, -6.3026], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1870, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0731, -2.0374, -4.6279, -4.1451,  1.2053, -4.7128, -2.8298, -2.1095,
        -3.8771, -3.7476, -0.8040, -3.2070, -2.6388, -2.2609, -3.4034, -6.5807,
        -4.9903, -3.8297, -6.4146, -5.9522], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5018, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.9355, -4.0997, -1.6742, -0.7000, -6.0228, -2.1130, -1.6367, -8.0971,
        -5.1978, -4.2441, -4.6508, -7.1646, -4.6455, -3.3153, -6.2702, -3.8307,
        -3.4859, -2.4197, -7.0818, -2.5717], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4079, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0600, -8.3727, -6.5245, -2.5300, -7.0183,  0.5715, -4.1708, -3.6754,
        -3.2149, -2.8404, -5.3934, -2.5999, -0.1293, -2.4981, -2.4401, -2.1650,
        -4.4085, -4.6064,  0.3199, -4.2530], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6005, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0062, -0.2807, -6.2459, -3.1706, -3.1696, -1.9580, -6.7708, -4.5641,
        -2.5872, -6.0286, -3.8919, -0.9016, -5.6875, -3.8659, -0.0692, -2.6855,
        -2.1366, -1.8526, -3.9535, -3.6353], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5231, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.5653,  -4.7875,  -3.9345,  -3.3869,  -3.1823,  -7.0241,  -2.0942,
         -1.6413, -11.5666,  -1.9781,  -2.1265,  -5.9977,  -4.0517, -26.1251,
         -2.5436,  -6.7618,  -4.4394, -17.3452,  -6.0880,  -5.2460],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1943, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.0581,  -4.7917,  -4.8415,  -2.7171,   0.9392,  -5.0068,  -1.9766,
         -2.4972,  -4.3447,  -4.3182,   0.8326, -11.7072,  -3.1060,  -1.8632,
         -5.5589,  -4.8548,   0.0649,  -3.4864,  -2.9311,  -4.4526],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6338, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2014, -2.2488, -1.6686, -4.1028, -3.9656, -1.0720, -4.4850, -2.6387,
        -4.0855, -2.6389, -7.2925, -2.9701, -1.9113, -4.8457, -3.0859, -1.4122,
        -3.3871, -7.7388,  0.5138, -2.1506], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2194, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7126,  1.0485, -5.4771, -1.8680, -3.4455, -0.4026, -6.6125, -2.8399,
        -1.6581, -2.9297, -2.7690, -3.8435, -1.3565, -6.8771, -4.4467, -2.0866,
        -4.0551, -3.0319, -4.0854, -5.2771], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2863, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.1541, -7.4681, -6.6579, -7.3705, -6.7397, -6.9183, -6.4537, -8.3986,
        -8.6090, -7.8577, -7.1777, -6.5205, -8.1335, -7.0066, -7.9417, -7.0872,
        -7.6322, -8.8573, -8.0344, -7.9496], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.5484, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.5722,  -9.5169,  -9.0338,  -5.5307,  -7.6136,  -2.0971,  -8.3073,
          0.8785,  -4.5759,  -3.4008,  -3.0944,  -6.1165,  -4.9049,   0.1102,
         -5.7449,  -3.0061, -15.5158,  -5.8684,  -9.0754,  -0.9412],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6464, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5220, -6.2348, -4.0502, -2.4869, -2.3441, -1.4974, -1.9822, -3.8117,
        -4.3319, -0.0197, -3.6242, -3.2931, -4.4383, -4.4657, -5.2103, -0.6087,
        -3.8953, -1.9793, -3.7046, -1.2715], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0386, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6748, -4.8126, -2.0350, -5.4511, -4.1702, -0.6809, -3.6816, -1.6119,
        -3.3103, -1.5878, -5.1843, -1.2943, -5.2056, -1.5136, -3.1437, -0.3671,
        -6.0942, -2.4772, -1.1993, -3.4454], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9970, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1273, -1.1446, -1.8440, -3.2526, -2.9534, -1.5977, -5.1408, -1.7832,
        -6.0527, -2.6240, -2.2583, -2.7450, -4.9813, -2.7705, -1.8368, -6.9379,
        -1.8389, -2.6610, -1.0548, -4.6344], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1120, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5071, -5.5529, -1.9923, -5.5539, -2.6061, -4.6156, -3.1423, -2.7304,
        -3.4880, -4.8186, -0.2162, -5.4780, -2.5575, -5.3031, -1.4950, -4.9998,
        -3.4322, -0.1893, -2.3472, -3.4373], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3731, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.0773,  -2.6798,  -2.2108,  -2.3720,  -6.2022,  -3.7685,  -1.0095,
         -1.2342,  -3.0902,  -0.9761,  -4.1921,  -4.1073,   0.8949,  -3.1719,
         -1.9611, -13.8263,  -3.8065,  -8.1551,  -1.7797,  -5.7187],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7722, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.3800,  -6.1827,  -9.4211,  -6.2918,  -4.1946,  -7.3239,  -4.2659,
         -7.4862,  -3.8709,  -7.3522,  -8.6757, -11.0335,  -2.0507,  -9.4073,
         -4.1842,  -5.6887,  -8.6407,  -2.4488,  -5.7790,  -5.7842],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2731, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.1441, -11.3631, -14.6594,  -8.4001,  -4.6110,  -4.3522,  -2.1486,
         -1.3873,  -4.0832,  -2.7649,  -2.5464,  -0.2278,  -6.3338,  -2.0566,
         -0.9597,  -2.6703,  -1.9128,  -2.5446,  -2.5766,  -4.3166],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3529, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7332, -5.3172,  0.5097, -3.8967, -2.2237, -2.9439, -1.8660, -6.2408,
        -2.8096, -0.6238, -5.5087, -2.5137, -1.4493, -2.0461, -6.7899, -2.1720,
        -0.9789, -3.3904, -1.9104, -1.7771], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9341, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0813,  -5.3244, -19.9236,  -8.0305,  -4.8282,  -5.8473,  -3.2485,
         -4.5525,   1.3588,  -7.3664,  -1.9114,  -2.9639,  -5.1537,  -4.5982,
          0.1944,  -3.2320,  -2.6140, -13.4719,  -5.9384,  -7.3533],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4443, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.6161, -6.2391, -2.4035, -1.6614, -4.6259, -4.0196, -4.6985, -1.5914,
        -5.9455, -2.7059,  0.1380, -3.6742, -1.6124, -2.4094, -4.8281, -4.1575,
        -0.0802, -3.5438, -2.8617, -1.9893], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9763, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.0773,  -4.3846, -12.3040,  -9.1979,  -2.8383,  -5.7223,  -6.6584,
         -7.1691,  -7.7134,  -6.1126,  -2.0685,  -8.3805,  -2.9826,  -7.5884,
         -4.5637,  -3.8800,  -1.0898,  -4.7053,  -3.5473,   0.5625],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0711, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-14.0017,  -4.8348,  -1.1246,  -4.3609,  -3.9160,  -6.8189,  -5.9412,
         -3.8265,  -7.6928,  -3.6915,  -4.2572,  -0.6671,  -4.9223,  -4.9119,
         -3.4334,  -2.1374,  -5.0703,  -3.5393,  -1.4667,  -4.4237],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5519, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4746, -3.3565, -1.9500, -5.7184, -5.0658, -0.3322, -3.0556, -2.6429,
        -8.8595, -7.2905, -6.9138, -6.6685, -3.7440, -4.5548, -3.4132, -6.0474,
        -1.6608, -3.3137, -6.2785, -4.5264], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3934, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2634, -0.7513, -3.8061, -2.3091, -1.1965, -3.4249, -4.8523,  0.5380,
        -4.7846, -3.2701, -4.3813, -5.6019, -6.3424, -7.7841, -2.2843, -5.6343,
        -2.3327, -5.6417, -2.3027, -3.6505], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6538, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1083, -5.4437, -3.6615, -1.0685, -3.4781, -1.2773, -3.1658, -3.7836,
        -3.0707,  0.3039, -3.7326, -3.3428, -2.1123, -2.6324, -6.0405, -0.5277,
        -2.1457, -4.5449, -1.8061, -1.9704], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7805, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0772, -2.3385, -5.2937, -3.5124, -4.3615, -1.1048, -6.0497, -3.0336,
        -0.7175, -5.5115, -1.8108, -1.6894, -2.3404, -6.6589, -4.4657, -2.3893,
        -3.9559, -1.8358, -2.2650, -2.7017], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4057, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-22.4563,  -4.1447,  -4.9786,  -8.2197,  -5.3400,  -2.2690,  -7.3940,
         -2.5027, -12.8233,  -7.1063,  -7.3394,  -6.8502,  -5.7089,  -0.6284,
         -6.5115,  -2.1135,  -3.9894,  -2.3873,  -2.6613,  -2.6328],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9029, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.7993, -4.3117, -3.4565, -2.7286, -3.8983, -5.8445, -0.5137, -4.6483,
        -3.3249, -5.4186, -7.5214, -5.4576, -7.8992, -5.2040, -5.3123, -3.2973,
        -6.7620, -5.5718, -2.2166, -3.2313], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3709, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0738, -0.0345, -3.4683, -3.1928, -3.6238, -0.8158, -6.4723, -2.6614,
        -0.2087, -7.0821, -2.1955, -3.1607, -3.5815, -4.5780, -0.2204, -4.8796,
        -3.0604, -5.7442, -8.4416, -7.3859], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6941, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6379, -1.3171, -1.5052, -6.2541, -2.3208, -2.3999, -2.2124, -2.4701,
        -1.5280, -3.1058, -6.2170, -2.2693, -1.0899, -5.8513, -2.4979, -1.6884,
        -2.5784, -3.4925,  1.1662, -4.1003], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7685, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7994,  0.4388, -3.1360, -3.1377, -1.3910, -2.3551, -5.7634,  0.4866,
        -7.1476, -2.2081, -2.9133, -0.6319, -5.6142, -2.9602, -0.7685, -4.1449,
        -1.5282, -3.5460, -2.8503, -5.7370], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9354, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.0442, -10.6663,  -3.9509,  -3.5080,  -2.0392,  -5.9172,  -2.7041,
         -0.0348,  -6.4985,  -2.1260,  -1.3220,  -3.1256,  -3.5227,  -0.4067,
         -3.6818,  -2.4629,  -1.9469,  -3.4170,  -4.3530,   0.6323],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0548, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1783,  -1.5000,  -1.5998,  -5.1861,  -1.5319,  -2.7160,  -3.4047,
         -2.2640,  -2.6440,  -1.7558,  -5.1361,   0.6384,  -6.1943,  -1.3804,
        -12.8579,  -4.3873,  -5.7823,  -7.8939,  -5.4843,  -5.5248],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9392, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.1423, -4.2165, -2.6343, -3.3298, -2.9385, -5.3305, -3.8362, -2.0619,
        -3.2074, -3.1359, -1.7581, -0.7133, -5.6093, -2.7782, -0.7754, -2.3563,
        -1.4101, -1.7746, -2.9523, -3.8913], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7426, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9834, -3.7858, -1.1537, -1.8583, -6.9934, -2.3141, -2.4961, -4.1323,
        -4.1581,  0.4836, -3.9975, -1.4755, -2.0414, -2.5130, -4.7719,  0.3618,
        -3.1792, -2.4816, -2.0046, -4.0115], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6753, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0454, -4.7184, -7.0261, -2.4583, -4.8269, -0.4906, -6.8893, -2.3051,
        -2.7160, -5.2412, -4.3712, -0.1992, -9.5123, -3.5907, -3.0703, -6.5266,
        -4.3922, -0.1102, -0.9688, -2.1855], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9322, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9026, -2.1421, -2.3902, -3.6171, -5.3558, -3.5579, -0.3615, -2.9866,
        -2.4889, -1.8040, -4.4470, -6.3843, -4.9962, -2.9559, -4.2377, -2.9462,
        -3.0729, -3.3255, -3.9638,  1.2285], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0354, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4307, -3.4037, -2.5123, -6.7885, -4.8959, -2.1692, -8.2473, -5.7469,
        -2.7834, -4.3616, -6.9219, -2.4930, -2.2189, -4.7505, -2.0394, -2.9468,
        -0.8945, -5.7851,  0.9838, -3.4843], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6945, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.7988,  -7.6084,  -2.0870,  -3.5908,  -1.3406,  -5.1646,  -3.0219,
         -0.0847,  -4.6779,  -2.1638,  -2.0592,  -1.3629,  -6.5425,  -1.5103,
        -14.8955,  -3.1425,  -2.3086,  -6.0194,  -5.0152,  -0.6392],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9517, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3913,  -4.8560,  -4.6429,  -1.3208,  -7.4437,  -2.3315,  -2.4287,
         -4.8534,  -4.8512,   0.6928,  -3.6973,  -3.9722,  -2.4545,  -6.1078,
         -4.4292,   0.4626,  -2.6630,  -5.7435, -17.4994, -15.8335],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8682, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.8076,  -6.4326,  -5.2629,  -0.6885,  -9.5182,  -4.6918,  -4.3320,
         -9.9035,  -6.2885,  -8.0970, -17.2763,  -9.5180,  -8.8897,  -3.7573,
         -9.9007,  -8.0285,  -3.6716,  -3.4490,  -1.6163,  -4.1106],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.6120, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2320,  -3.9308,  -3.2086,  -5.2555,  -3.5783,   0.2060,  -5.6931,
         -3.1946,  -3.6496,  -3.3157,  -3.4157,   0.4649, -12.2421,  -3.0238,
         -2.4648,  -1.7661,  -5.9861,  -3.9839,  -0.9447,  -3.0173],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5616, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.4183,  -4.7326, -11.7372,  -5.3794,  -1.9579,  -6.7949,  -4.9599,
         -2.8691,  -2.5656,  -4.8737,  -0.1473,  -7.2296,  -4.6718,  -3.0639,
         -6.7651,  -3.8293,   0.1628,  -5.0745,  -2.6924, -21.6195],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2610, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.4181,  -7.0733,  -7.9064,  -7.1263,  -7.3024,  -3.0331,  -6.5930,
         -2.0887,  -6.4471,  -4.0454,  -2.1578,  -6.5338,  -4.8179,   0.4365,
         -5.3752,  -2.5969, -10.5743,  -5.3486,  -8.7330,  -1.5761],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3155, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.1259, -2.3062, -3.0831, -5.5147, -4.4808,  0.5736, -2.4568, -2.6156,
        -7.2157, -8.1215, -8.9193, -6.1923, -2.6063, -5.9152,  0.2677, -7.6436,
        -3.2388, -2.6438, -3.1621, -4.7732], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4087, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3419, -3.0919,  0.8827, -3.6658, -2.7802, -3.2307, -2.4627, -4.0857,
         0.4628, -5.3143, -3.1224, -2.2977, -1.2020, -4.8223, -4.0429, -0.2985,
        -8.2267, -2.2882, -1.3434, -4.1853], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8729, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3474, -0.4833, -5.7158, -2.1813, -2.7687, -1.2770, -5.9942, -2.3825,
        -1.6170, -3.6249, -1.6030, -3.3237, -1.5239, -6.4793, -2.7570, -1.4224,
        -5.3161, -2.6620, -1.8075, -1.0043], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8646, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.6535,  -3.5967,  -1.5700,  -2.3365,  -1.8546,  -3.2917,   0.6234,
         -2.1453,  -2.5187,  -2.2779,  -4.5979,  -4.5879,  -0.8666,  -4.4199,
         -1.7299, -21.7580,  -2.8578, -10.8088,  -1.9125,  -9.8318],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1996, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.0219,  -5.7028,  -2.0358,  -1.9895,  -1.6823,  -4.4344,  -1.9282,
         -5.7280,  -3.0065,  -2.5089,  -0.5872,  -6.2467,  -0.2666, -17.6899,
         -1.9374,  -2.4682,  -5.9527,  -4.3879,  -1.3380,  -6.3425],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9628, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.3235,  -2.1920,  -3.9821,  -7.1198,  -3.4564,  -0.7120,  -0.0497,
         -2.4059,  -2.1272,  -6.1967,  -3.3178,  -0.8368,  -8.5518,  -2.8941,
        -18.1697,  -4.6938,  -7.6761,  -7.0244,  -6.3082,  -0.9452],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7492, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4642, -7.3593, -5.3167, -6.0204, -3.1538, -5.9387, -0.5038, -6.7955,
        -3.2805, -2.8732, -2.7086, -5.8210, -3.3729, -0.1020, -3.0577, -2.2519,
        -3.7779, -6.2411, -3.4300,  0.0276], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8221, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.3544,  -5.4499, -17.7920, -29.0993,  -4.6594,  -6.3377,  -7.4916,
         -6.5515,  -6.1026,  -3.4242,  -5.2710,  -0.1529,  -4.6347,  -2.9911,
         -2.2427,  -3.7688,  -4.7625,  -0.0330,  -7.1293,  -3.7476],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4998, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4760,  -4.4162,  -2.8206, -12.7154,  -2.6303,  -2.1077,  -2.5842,
         -0.6472,  -6.1349,  -2.0140,  -1.2031,  -4.5890,  -1.1443,  -2.6883,
         -3.2465,  -3.9596,   0.4847,  -4.3101,  -1.3029,  -2.4562],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1481, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4303e+00, -6.1547e+00,  1.5548e+00, -1.6441e+01, -3.2344e+00,
        -2.9928e+00, -4.8402e+00, -5.5961e+00,  3.5074e-02, -7.5634e+00,
        -9.6226e+00, -4.9303e+01, -7.1369e+00, -8.6196e+00, -7.7685e+00,
        -4.7096e+00, -2.6901e+00, -4.0068e+00, -3.6813e-02, -3.7981e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.2677, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.7730,  -6.0539,  -9.1967, -11.4913,  -3.8499,  -6.8988,  -4.4325,
         -3.6003,  -4.7839,  -1.5394,  -2.3713,  -6.6594,  -5.6598,  -3.1453,
         -2.9844,  -1.8666,  -2.1217,  -3.8340,  -3.6769,  -0.0449],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4992, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0763, -2.4153, -1.6655, -2.2051, -5.0549,  1.1466, -3.3233, -3.3510,
        -3.6219, -4.8853, -7.0735, -3.6460, -8.6549, -3.5406, -2.2963, -6.3169,
        -3.4205,  1.1643, -4.7098, -3.4028], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7174, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4525, -0.9071, -5.8903, -2.2814, -1.0621, -3.0113, -2.5761, -3.0561,
        -2.6070, -5.6134, -2.6595, -0.3883, -5.1794, -2.9258, -2.3053, -1.9359,
        -3.6648,  1.2579, -2.7123, -3.0448], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6508, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.6821, -4.7078,  0.8119, -2.6923, -2.2653, -4.4396, -1.0093, -6.2154,
        -3.7405, -1.9950, -5.9579, -4.3789, -4.9851, -2.0070, -3.2765, -4.1922,
        -0.0114, -1.2508, -3.2757, -3.6397], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9955, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5302, -5.5036, -2.6352,  0.5497, -4.2370, -2.6397, -3.2392, -0.8558,
        -7.0537, -2.1500, -0.3212, -3.8719, -3.0343, -2.2952, -2.5301, -5.9998,
        -3.7445, -0.4963, -2.2167, -2.2042], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8004, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2626, -4.8273, -3.7479, -1.0773, -3.4273, -2.6021, -3.0613, -2.0706,
        -5.2061,  0.0514, -5.4980, -2.4214, -2.6549, -3.5855, -5.0039,  0.4615,
        -6.4653, -2.2431, -2.6395, -2.0754], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9678, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3584, -4.3045, -1.7573, -0.9949, -2.4478, -2.7511, -4.8392, -3.4841,
         1.0604, -4.7925, -2.2684, -1.8458, -5.5764, -3.9013,  0.7200, -5.6783,
        -1.1190, -1.3561, -1.0509, -4.9834], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8365, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0458, -2.3249, -3.5718, -5.9944,  0.4656, -3.6877, -3.2358, -3.0047,
        -1.5666, -6.4222, -4.3429, -2.6260, -5.9639, -3.1300, -2.7651, -4.6492,
        -3.7814,  1.4345, -5.1201, -1.6945], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2513, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.7442,   0.3783,  -3.7948,  -5.0058,  -3.1423,  -5.0867,  -5.4610,
         -6.4846,  -3.3196,  -1.5025, -13.3161,  -2.8607,  -2.5493,  -4.3401,
         -5.0113,   0.4246,  -2.0714,  -3.6936,  -1.6250,  -4.5388],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9872, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9442, -1.0507, -3.2648, -1.4015, -0.9844, -2.1891, -4.9875,  0.4222,
        -5.0082, -2.6869, -2.7006, -0.5349, -6.7277, -2.7266, -0.3104, -1.8996,
        -2.2913, -3.9065, -6.1767, -3.9120], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7641, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2522e+00, -3.5236e+00,  4.4477e-04, -3.3912e+00, -1.4493e+00,
        -2.2274e+00, -3.7968e+00, -2.9688e+00, -5.2785e-01, -1.9848e+00,
        -3.1992e+00, -3.1620e+00, -3.2872e+00, -4.5959e+00, -3.4550e+00,
        -2.1221e+00, -2.9480e+00, -1.6724e+00, -2.7287e+00, -1.6836e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6988, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0267, -1.7497, -1.8813, -5.0698, -2.3402, -2.3700, -2.2466, -4.3422,
        -3.0850, -4.7286,  1.2419, -9.9462, -2.3407, -2.4413, -5.5722, -4.2989,
        -0.2252, -5.6034, -1.4462, -5.4641], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3468, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8231, -0.5891, -6.4659, -2.3052,  0.0432, -3.0748, -1.5643, -3.4891,
        -4.5740, -6.4187, -3.6852, -0.9557, -6.1812, -1.2752, -3.1351, -4.6542,
        -3.2003, -0.4815, -2.6109, -2.1761], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9808, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7346, -3.0760, -2.6503, -1.9648, -5.4098, -3.8850,  0.8476, -3.8120,
        -1.4732, -4.6313, -1.3671, -4.3496,  1.3584, -2.4973, -3.0104, -3.9961,
        -3.4455, -6.3381, -5.1755, -2.5754], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9593, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.1408,  -5.1691,  -3.1746,   0.1738,  -3.7905,  -2.4791,  -2.3900,
         -0.5090,  -6.3874,   1.0600,  -4.0006,  -3.4101,  -2.5561,  -2.0880,
         -5.8214,  -4.3292,  -0.4141, -10.0837,  -2.0475,  -1.6717],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2615, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6697, -0.0416, -3.7619, -5.2470, -1.4399, -3.8863, -4.9581,  0.3940,
        -5.1025, -3.8338, -2.3026, -4.4538, -4.3002, -3.7766, -3.7748, -1.9490,
        -2.3013, -1.0699, -5.4350,  0.6726], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1619, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5967, -3.8262, -4.0949,  0.5048, -2.6571, -1.5066, -1.2157, -1.9160,
        -6.3295, -2.4449, -1.8049, -5.8944, -2.2220, -1.5965, -6.7539, -3.8021,
        -1.2929, -2.8101, -4.2449, -7.8589], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2182, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0625, -5.3826, -3.9931,  0.2403, -6.0331, -3.6034, -1.7096, -2.5650,
        -7.1465, -4.7382, -1.5486, -4.6544, -2.1894, -1.5541, -2.5949, -5.7540,
        -2.6674, -2.4597, -2.2050, -2.2457], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2933, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.8968,  -3.5171,  -4.9201,  -7.6986,  -6.6171,  -2.9472,  -4.3249,
         -2.9020, -10.2607,  -7.1203,  -6.0919,  -7.5169,  -1.7719,  -7.9720,
          0.0409,  -4.7966,  -3.9965,  -4.0822,  -4.0656,  -5.9112],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2184, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.3199, -4.2027, -2.4526, -2.4176, -4.3310, -3.7381,  0.5467, -2.1977,
        -2.2155, -2.7251, -3.0655, -6.8614, -3.1227, -1.7951, -4.4655, -1.8581,
        -3.9699, -1.2395, -4.4272,  0.1326], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7363, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5503, -1.1829, -6.1032, -2.7917, -6.4979, -4.8579, -1.6240, -1.8638,
        -2.2181, -2.3335, -5.0399, -4.0983, -0.4204, -2.9559, -2.7227, -6.5046,
        -4.5759, -4.4779, -1.0024, -5.7864], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5304, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3582, -7.1006, -4.9764, -0.7860, -4.6051, -3.2901, -2.9884, -1.4509,
        -5.8651, -2.1182, -0.3320, -2.2228, -3.5328, -2.8044, -2.4299, -6.5707,
        -4.2586, -1.0504, -2.8074, -2.1962], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4372, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.6897,  -5.8825,  -1.6117,  -2.7791,  -3.6011,  -4.9542,   0.6273,
         -4.1338,  -2.2238,  -2.1239,  -5.0210,  -4.4308,  -2.3620,  -3.9914,
         -3.7006, -12.0553,  -6.1446,  -7.0118,  -6.1977,  -1.6219],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1955, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.1501, -3.6755, -1.3858, -1.7217, -2.8793, -7.6003, -4.5668, -2.2324,
        -3.3106, -2.4469, -2.2085, -3.2897, -2.8264,  0.6455, -2.2751, -2.1818,
        -2.0186, -3.5342, -3.4783, -0.4844], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5810, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.5753,  -3.9109,  -2.9899,  -0.7900,  -6.0426,  -3.6303,  -0.2680,
         -7.1481,  -1.4635,  -2.4365,  -5.3597,  -4.5424,  -0.0693,  -2.6359,
         -2.8538, -11.0913,  -8.5848,  -5.5196,  -7.1471,  -2.1419],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1100, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1481,  -2.5732,   1.4228,  -3.1473,  -4.0025, -15.5574,  -7.4594,
         -8.3086,  -1.5568,  -5.3850,  -0.5133,  -4.0646,  -2.7315,  -4.5340,
         -0.6134,  -5.9558,  -3.4259,   0.6782,  -5.4945,  -2.6989],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9535, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3410, -2.9713, -2.4485, -5.2382, -2.7788, -1.0536, -3.7468, -2.5385,
        -2.5187, -4.2234, -5.6363, -1.1202, -9.9865, -3.6113, -4.1552, -2.8169,
        -6.8647, -1.8171, -2.0751, -4.7918], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6867, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3161,  -4.5340,  -3.5181, -15.9657,  -4.8992,  -0.5283,  -7.2139,
         -2.5660,  -2.8716,  -1.8224,  -7.4214,  -4.4640,  -2.0003,  -3.5360,
         -2.0662,  -1.2631,  -1.3869,  -4.1675,  -0.5220,  -3.3222],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8192, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2319, -2.8570, -5.0367, -3.4880,  1.2724, -3.0816, -3.2427, -1.9667,
        -1.5010, -6.6850, -2.0316, -1.9765, -3.3335, -2.5497, -1.7908, -5.0117,
        -3.9679, -1.0538, -4.1190, -0.9818], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8317, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6381,  0.1325, -1.8131, -1.8761, -1.9775, -5.5073, -3.7932,  0.3469,
        -3.7999, -1.2829, -2.1855, -2.3653, -3.4031,  0.0457, -2.4514, -2.1249,
        -1.6621, -1.2516, -6.7306, -1.6038], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3971, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1567, -0.3496, -6.9405, -2.2185, -0.8726, -3.9506, -2.4189, -4.8095,
        -1.2299, -4.4235, -3.4562, -0.8862, -5.9375, -3.4205, -4.6759, -3.5651,
        -2.5746, -3.7886, -0.5074, -4.9088], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2546, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3444,  -3.8333,  -3.5011, -12.7511,  -5.8768,  -5.7438, -17.8448,
         -3.6559,  -4.1810,  -7.6337,  -3.0568,  -2.2549,  -3.7068,  -5.3220,
         -2.7132,   0.1516,  -3.5779,  -1.7253,  -3.8751,  -5.1323],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9289, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1041,  -0.9755,  -4.3731,  -2.1132,  -1.9187,  -2.2778,  -5.0198,
         -0.5960,  -3.7032,  -2.5755,  -3.3595,  -1.7842,  -4.7905,   1.5775,
         -3.2348,  -4.5166, -32.3930, -17.9668, -10.1101, -10.1612],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6198, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.4823,  -1.2952, -12.7119,  -5.0618,  -7.9606,  -7.9663,  -5.6956,
         -5.6413,   0.3995,  -2.5597,  -4.5966,  -3.0451,  -6.0977,  -3.9030,
          0.1207,  -3.7962,  -3.0932,  -2.6041,  -2.2661,  -4.7470],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4002, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1731,  1.0895, -5.5389, -3.8624, -5.5447, -6.5510, -2.8213,  0.7458,
        -4.0330, -1.7557, -2.8312, -2.5917, -6.4359, -3.0610, -0.7867, -3.8772,
        -1.4916, -2.7738, -3.1209, -5.0092], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1212, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1262, -1.8643, -6.1297, -2.9481, -1.1126, -1.7319, -3.5047, -2.0704,
        -4.4914, -3.6576,  1.5422, -3.5998, -2.0584, -1.7445, -2.2714, -6.6753,
        -1.8761, -1.1167, -2.5242, -1.6341], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6298, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9790, -4.4168, -3.3934, -5.9428, -9.4139, -6.5947, -2.0552, -4.7787,
        -3.3308, -8.2529, -6.1561, -7.0027, -6.0678, -6.2346, -2.6671, -7.1394,
        -0.1916, -6.6200, -3.0884, -4.2682], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0797, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-12.8697, -11.2074, -12.3496,  -6.2963,  -4.9284,  -3.5822,  -5.8426,
         -2.6307,  -3.0600,  -1.6257,  -5.5633,  -2.5353,  -0.5047,  -3.5395,
         -1.5438,  -2.7145,  -5.1502,  -2.7149,  -0.4948,  -3.4110],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6282, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.2707, -3.3336, -1.6430, -1.4181, -5.9060, -3.1436,  0.6904, -6.2140,
        -1.5221, -3.6138, -1.8579, -6.2569, -2.4513, -0.5252, -6.2007, -1.9888,
        -2.2162, -3.5825, -4.5668,  0.9138], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7283, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.6823,  -7.6582,  -6.9384,  -4.7410,  -0.1443,  -7.6491,  -4.5351,
         -2.0686,  -2.9374,  -4.3415,  -1.3584,  -2.0132,  -4.3986, -16.0407,
        -20.6286,  -8.6572,  -1.2374,  -7.9740,  -0.6664,  -2.8777],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5274, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.2572,  -2.0889,  -0.3464,  -5.9313,  -2.7078,  -1.5426,  -7.7778,
         -1.4192,  -4.8886,  -6.5623,  -3.9146,  -3.8535,  -3.9135,  -3.6749,
        -12.9615,  -5.2052,  -7.1688,  -6.5742,  -5.7385,  -6.1159],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7321, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1032,  -1.9116,  -2.7551,  -0.4797,  -5.8329,  -4.2505,  -1.5105,
         -4.3804,  -4.8186, -11.2781,  -7.3973,  -8.8203,  -4.2771,  -6.3318,
        -22.1890,  -5.0149,  -9.1433,  -7.7647,  -7.2063,  -4.8036],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1634, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3600, -4.7253,  0.2116, -5.8482, -2.2272, -4.4836, -1.7190, -5.4366,
        -3.0903, -1.1184, -4.3087, -2.5293, -2.3983, -1.0306, -6.3509, -1.9281,
        -0.4491, -3.9206, -2.0869, -2.5416], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9171, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1270, -2.9258, -1.9942, -0.7027, -5.1298, -2.9564, -1.9291, -3.3052,
        -2.4997, -2.9042, -1.1869, -4.7791, -0.7126, -4.3224, -2.9820, -3.1814,
        -2.5481, -4.3658,  1.0391, -6.8951], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9204, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.4575,  -3.1651,  -0.2721,  -4.3809,  -3.8813, -18.5853,  -4.9115,
         -6.9476,  -1.8432,  -3.7623,   0.7069,  -4.7603,  -3.3768,  -2.5225,
         -1.0408,  -5.8830,  -1.2931,  -1.7644,  -5.1914,  -1.7752],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9554, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.2691,  -5.3022,  -0.5524,  -6.4828,  -2.7885,  -2.1125,  -2.2507,
         -6.3353,  -2.8292,  -2.0030,  -4.8235,  -2.1518,  -0.7339,  -2.5744,
         -4.8364,   0.3114,  -4.7310,  -3.6661, -22.5415,  -6.8420],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2258, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1868, -3.2490, -4.2457, -1.0286, -4.7066, -2.7318, -0.1107, -4.5011,
        -1.5835, -2.9297, -0.9948, -5.3402,  1.1171, -3.1201, -2.7605, -3.4718,
        -0.9266, -6.0473,  0.2754, -3.4350], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6989, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.4880,  -7.8862, -10.9347,  -6.9540,  -2.8389,  -5.6802,  -0.5813,
         -4.9323,  -3.2274,  -2.5419,  -7.1469,  -3.9050,  -2.6048,  -4.8145,
         -3.2275, -14.2019,  -5.9373,  -7.9275,  -7.1393,  -6.6865],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0328, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.7271,  -4.1231,  -4.9910,  -3.4050,  -2.4751,  -2.4945,  -5.7193,
         -0.6401,  -0.8745,  -3.8255,  -2.2651,  -5.5664,  -2.6387,   0.9932,
         -2.7355,  -2.7097,  -3.0926,  -5.6647,  -4.6291,  -0.1165],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3850, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8965, -2.5744, -0.8013, -3.4024, -0.6403, -3.4288, -2.8414, -4.5861,
        -0.5994, -5.4889, -2.5022, -0.7999, -6.4320, -2.6743, -2.0578, -4.2828,
        -3.2052,  0.5059, -3.7265, -1.8610], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6648, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.7735, -10.0817,  -8.5134, -34.1778, -10.1235,  -4.8549, -11.2993,
         -8.2453,  -4.4914,  -5.6081, -11.0379,  -4.1650,  -7.3717,  -2.4169,
         -6.4671,  -0.0723,  -9.1317,  -4.2148,  -4.1817,  -3.8278],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.9528, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1514, -0.0740, -1.4503, -1.8885, -3.7381, -2.9655, -5.0942,  0.9344,
        -2.2951, -2.0458, -3.0979, -2.6154, -3.4586,  1.2578, -3.8544, -3.9388,
        -3.0239, -3.2874, -6.3992, -6.0660], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8126, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.3485, -0.3333, -6.3699, -6.1909, -2.3670, -3.2929, -1.7201, -5.1397,
        -2.5530, -2.8421, -9.2671, -4.9565, -3.7965, -4.6568, -5.2572,  0.0464,
        -8.3920, -1.0521, -3.3535, -0.7329], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0288, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.8277,  -4.3581,  -3.3992,   0.5005,  -3.5496,  -3.7634, -10.8148,
         -7.0002,  -7.0226,  -6.1918,  -2.4915,  -3.9922,   0.5347,  -5.5947,
         -2.2028,  -2.1663,  -3.3392,  -5.0386,  -0.1177,  -3.9560],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9396, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.4902, -4.3918, -2.7537, -3.4224, -3.0835, -4.3874,  0.8998, -3.4967,
        -2.5837, -3.9632, -3.9111, -7.0782, -3.5337, -2.7692, -3.0265, -2.2674,
        -4.1915, -2.5193, -6.0992, -2.8172], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2453, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.8101,  0.3581, -1.7147, -2.2257, -2.5759, -1.2580, -5.8451, -2.8032,
        -0.9433, -4.2829, -1.7890, -2.6067, -1.7028, -6.5680, -2.1106, -0.5041,
        -3.1981, -2.5398, -3.2103, -3.4263], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8378, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.7143, -3.6336, -2.7828, -3.7860, -2.7196, -6.6395, -6.5967, -3.8958,
        -8.4347, -3.9437, -1.5380, -5.1837, -3.1112, -0.6640, -3.6698, -2.1737,
        -3.5576, -2.4983, -4.1715,  0.8582], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3714, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.3117,  -2.4730,  -2.2274,  -1.5337,  -4.9970,  -1.3521,  -9.0696,
         -3.6878,  -0.7442,  -6.2047,  -3.9674,  -2.9777, -12.6500,  -7.6160,
        -10.9084,  -7.5678, -10.0781,  -2.8910,  -3.3559,  -3.4158],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1015, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9636, -2.0234, -0.4659, -7.3943, -2.9310, -2.2394, -1.3195, -4.7725,
         0.4101, -8.7098, -4.4962, -2.1834, -4.7036, -4.5464, -6.2783, -1.9599,
        -7.9968, -5.2659, -3.5036, -6.1641], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1754, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.6261,   1.0418,  -7.5530,  -3.9820,  -3.6876,  -6.8346,  -3.7027,
         -0.5150, -15.1852,  -3.9932,  -1.3362,  -3.8172,  -4.7609,  -0.2014,
         -5.5284,  -3.1996,  -3.7841,  -2.0810,  -5.2649,  -4.2518],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1132, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5466, -0.3015, -2.9273, -1.5038, -1.9596, -3.1279, -3.5138,  0.4996,
        -3.1831, -2.6183, -3.0373, -2.6909, -3.6662,  0.2947, -2.8121, -3.4136,
        -3.0073, -5.2782, -2.9346, -0.6216], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4675, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5536,  -7.9812,  -3.0585,  -2.8329,  -5.0506,  -5.8876,  -0.6251,
         -6.2915,  -4.3210,  -4.7887,  -5.2257,  -5.8432,  -5.3751,  -5.2163,
         -3.4155,  -6.8649,  -4.4591,  -3.2725, -13.3010,  -5.0535],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0709, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.3380, -3.2862, -3.1001, -3.3434, -5.0052, -3.2185,  0.4698, -5.2412,
        -3.7280, -2.0422, -2.0050, -4.9108, -3.4331, -0.5253, -1.6699, -2.9149,
        -1.1047, -5.4222, -3.9337,  1.4138], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6669, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8823, -2.2230, -0.6434, -3.2909, -1.9372, -3.8161, -1.5883, -4.6821,
        -2.8067, -0.1656, -2.8968, -2.0035, -3.3328, -1.2794, -5.9840, -2.3155,
        -1.6968, -4.0177, -2.4445, -1.4557], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7231, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8028, -2.4706, -3.2504, -2.4226, -5.7948, -2.2523, -1.0830, -4.0541,
        -3.2070, -1.6320, -1.9957, -4.1436,  0.4276, -2.4958, -2.0350, -2.1010,
        -5.1434, -4.4474,  0.4411, -6.3448], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9904, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.8390,  -2.4953,  -1.2222,  -7.4067,  -4.6724,  -3.7660, -13.4457,
         -2.4199,  -7.1839,  -2.5694,  -3.7808,   1.1628,  -8.9891,  -3.0113,
         -5.5386,  -1.0082,  -5.3443,  -4.8469,  -7.9761,  -2.4233],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5888, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2906, -6.7834, -1.6646, -5.2845,  1.2951, -2.5392, -2.1532, -2.1730,
        -5.6747, -3.6794,  0.0153, -4.0865, -2.9482, -1.2675, -2.9279, -6.6469,
        -1.4624, -1.8930, -4.9134, -2.9104], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2494, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-21.9141, -11.4879,  -7.1055,  -2.1429,  -3.3857,   1.7069,  -7.0027,
         -2.6402,  -2.3376,  -3.0189,  -5.8950,  -4.3179,  -0.6511,  -3.0892,
         -3.3993,  -1.2845,  -4.6407,  -2.7643,   0.1327,  -2.7077],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3973, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.7009,   0.3802, -10.6226,  -2.6123,  -3.2274,  -6.0547,  -5.9258,
         -2.0862,  -5.0409,  -3.7672,  -2.1984,  -2.9826,  -3.6133,  -5.0229,
         -3.5388,  -1.6214,  -1.8411,  -5.5608,  -3.6098,  -0.5594],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7603, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5949, -2.2298, -3.6164,  0.9865, -4.7871, -3.0857, -4.2428, -4.1738,
        -5.1817, -3.0356,  0.2969, -3.3431, -3.6713, -5.3894, -5.3250, -5.2271,
        -4.9181, -2.2409, -2.6736, -2.3217], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5887, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.2862,  -4.0286,  -7.4334,  -5.6983,  -8.7842,  -2.2194, -13.7090,
         -4.1944,  -2.5201,  -2.8921,  -6.0010,  -4.6052,  -2.1348,  -3.8124,
         -1.9874,  -3.0116,  -4.6386,  -3.9355,   0.6698,  -1.9118],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6067, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8080,  0.3790, -1.5665, -2.8901, -2.3770, -4.9093, -3.7242,  0.5960,
        -5.1268, -3.5796, -5.8282, -5.7385, -4.4620, -9.5402, -3.8751, -7.4115,
        -2.7122, -7.4860, -0.1294, -3.2443], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8717, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1623, -4.3484, -1.5698, -6.1490, -2.2752,  1.2225, -4.9422, -2.2613,
        -3.7043, -0.7914, -6.6200, -2.1597, -0.7984, -4.1727, -2.0887, -3.2412,
        -1.9530, -5.2701,  1.0175, -2.1326], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8200, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2897, -2.8264,  0.6455, -2.2751, -2.1818, -2.0186, -3.5342, -3.4783,
        -0.4844, -4.8800, -2.2791, -2.9494, -3.8448, -4.9001, -6.6406, -1.6408,
        -3.4389, -1.9006, -2.5071, -4.3010], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9363, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1630e+00, -5.0964e-01, -6.4367e+00, -2.3890e+00, -4.0738e+00,
        -4.8907e+00, -4.0983e+00, -2.2996e+00, -9.0018e-01, -2.7455e+00,
        -2.9041e+00, -4.0610e+00, -4.0964e+00,  8.9955e-04, -4.9539e+00,
        -1.8583e+00, -2.8231e+00, -5.8915e-01, -6.4659e+00, -2.3052e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0781, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5930,  -1.8359,  -1.4944,  -1.7759,  -4.7931,   1.5147,  -4.1717,
         -2.6052, -18.4202,  -2.8407,  -6.8139,  -1.7391,  -4.3040,  -0.0502,
         -3.9882,  -3.0779,  -2.8976,  -3.6720,  -4.6520,  -0.7056],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5458, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6744, -3.9705,  0.2995, -2.9215, -1.5444, -2.8575, -4.2949, -3.6899,
         0.0228, -6.1402, -1.0137, -2.4990, -1.1571, -4.7879, -0.4771, -6.2685,
        -2.2459, -3.3741, -4.6244, -4.4891], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0354, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-21.3562,  -3.6386,  -3.2038,  -4.8395,  -4.9653,   0.5253,  -8.2641,
         -3.0527, -23.1033,  -6.2051,  -8.5485,  -6.0230,  -7.9700,  -2.6323,
         -6.2482,  -0.4442, -11.8985,  -3.5739,  -3.3011,  -6.2601],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.7502, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7363, -1.1897, -7.3582, -2.5164, -1.2914, -3.7322, -2.7192, -2.6858,
        -0.3990, -6.7089, -2.1287, -0.9132, -4.0988, -2.9424, -2.3072, -5.8842,
        -4.0269, -6.4559, -6.1118, -2.1666], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5186, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1736, -0.7822, -4.4983, -1.9960, -2.9385, -1.5315, -4.2504, -6.2112,
        -4.4468, -3.0024, -3.1084, -0.8906, -5.4997, -1.7724, -0.5047, -3.5586,
        -2.2992, -1.9040, -4.2297, -3.9482], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9773, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.0100,   0.6648,  -2.0036,  -5.4652, -10.0710, -10.3442,  -5.3285,
         -6.9851,  -3.7165,  -2.2784,  -4.4840,  -4.6742,  -3.3281,  -2.2613,
         -3.1789,  -3.4967,   0.5563,  -3.5429,  -1.9706,  -3.2121],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1065, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6687, -4.0827, -3.9481,  0.5417, -5.0147, -1.6862, -3.8057, -2.0188,
        -6.5749, -3.9087, -0.8665, -2.6607, -3.5999, -2.7505, -6.1817, -4.5391,
        -0.4021, -4.3296, -3.1608, -3.5409], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2599, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.1755,  -1.4581,  -2.8887,  -1.1044,  -6.2817,  -2.7131,  -0.3308,
         -1.7897,  -4.2428,  -3.0898,  -6.0085,  -3.7464,   0.0433,  -4.4156,
         -2.3917, -13.2564,  -8.6709,  -7.8157,  -2.6427,  -5.0647],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1522, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4765e+00, -5.2152e+00, -2.4978e+00, -3.3087e+00, -4.1113e+00,
        -1.9706e+00, -1.1462e+01, -1.9122e+00, -8.6539e+00, -3.3780e+00,
        -5.0401e+00, -4.1922e-02, -2.9994e+01, -4.1468e+00, -4.0311e+00,
        -1.3716e+01, -6.6088e+00,  1.3596e-02, -7.9415e+00, -3.2054e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9349, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.5993,  -2.1453,  -3.2651,  -5.2663,  -1.7374,  -3.9923,  -4.3053,
         -5.5479,  -2.2489,   0.0513,  -2.6728,  -3.5738,  -2.2192,  -2.4847,
         -4.1466,  -3.2424,  -0.6335,  -9.0006,  -3.0409, -16.4548],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0263, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9811,  -3.7228,  -2.5574,  -2.1760,  -6.7680,  -4.7079,  -2.6077,
        -10.4903,  -4.5311,  -4.1760,  -3.9075,  -4.7320,  -0.5352,  -9.9936,
         -6.0936,  -2.6915, -12.5051,  -2.4836,  -4.3196,  -5.7078],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8844, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.2630,   1.2890,  -1.7468,  -3.1346, -24.6370, -15.8788,  -8.7259,
         -1.1205,  -3.8713,   0.9022,  -4.9353,  -3.3592,  -1.7283,  -1.8858,
         -5.6222,  -2.7540,   0.6155,  -6.8763,  -2.0512, -17.3539],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4069, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8472, -4.2771, -0.8849, -6.3667, -2.5545, -4.3454, -4.2857, -4.3160,
         0.5374, -3.8954, -3.5700, -3.1421, -3.7821, -5.5328, -1.5097, -2.2506,
        -3.4514, -2.1379, -6.1317, -4.3911], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5067, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7631, -2.3523,  0.0988, -4.0951, -2.9530, -1.2707, -1.4490, -6.1937,
        -3.2022, -1.1436, -3.9611, -2.5642, -1.8854, -2.8791, -3.3583,  0.2110,
        -4.2786, -1.9886, -2.5343, -1.7922], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6677, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1402,  0.1018, -1.2424, -3.2978, -2.9157, -0.9496, -6.2217, -2.6586,
        -0.2834, -1.9293, -2.7279, -1.7248, -4.4612, -4.1165,  0.4310, -3.7446,
        -2.2318, -2.0918, -3.3904, -3.9719], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6783, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3768, -1.2915, -2.6687, -1.6325, -5.1963, -3.7811, -0.0197, -1.6258,
        -1.7945, -2.7169, -6.3388, -3.9246, -6.3098, -6.2129, -2.3446, -1.9386,
        -5.5864, -4.7121, -0.2280, -3.5315], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3116, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9739, -1.2870, -2.0551, -5.8823, -2.7028, -3.9716, -2.2958, -5.8391,
        -2.2975, -1.8213, -3.7022, -2.3260, -1.8215, -3.2330, -3.3075,  1.7243,
        -3.3811, -1.6496, -4.5494, -1.2222], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7797, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4898, -2.7975, -3.6289,  0.1394, -6.2010, -6.2995, -4.3662, -0.8964,
        -6.5849, -2.1042, -1.1988, -4.1741, -2.4241, -1.3752, -1.8275, -5.5227,
        -2.2807, -2.8870, -2.6876, -1.6663], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0636, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1138, -2.1776, -7.9294, -2.0910, -1.3619, -4.3727, -3.2398, -3.6134,
        -0.9189, -6.4982, -2.3443, -0.2533, -4.5200, -1.5590, -9.3554, -6.7282,
        -4.9973, -7.2039, -6.1710, -3.6421], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3046, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.3847,  -2.9725,  -1.1865,  -4.4337,  -2.4113,  -3.9109,  -2.6316,
         -5.4806,  -0.0628,  -2.7095,  -3.0579,  -5.0297,  -6.7885,  -4.3447,
         -3.9194,  -7.0270,  -1.9941, -20.4900,  -5.3539,  -9.5211],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9355, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7317,  -2.0729,  -3.9864,  -3.2017,   0.4637,  -5.3227,  -2.1234,
         -3.4438,  -0.8098,  -6.1870,  -2.0130,  -0.0919,  -4.5795,  -1.7717,
         -1.9714,  -1.7535,  -4.9968,  -1.9655, -14.6939,  -3.1794],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3216, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-17.3449,  -3.4963,  -8.4665,  -1.8363,  -5.2591,  -0.7247,  -5.6826,
         -4.9809,  -3.5827,  -5.9060,  -4.8125,  -2.6210, -12.4164,  -3.6980,
        -15.7928,  -4.7656,  -9.3624,  -6.2359,  -7.7003,  -2.9424],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3814, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.7717,  -6.8407,  -4.5298,  -2.6201,  -2.3583,  -9.8423,  -4.6077,
        -10.6084,  -7.8886,  -5.5241,  -7.4692,  -2.2751,  -3.6658,  -1.7725,
         -8.1744,  -4.5617,  -2.3130,  -0.9317,  -6.9587,  -3.4267],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0570, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.0777,  -1.1850,  -2.4315,  -1.9257,  -3.3479,  -0.8931,  -6.9131,
         -2.2115,  -1.0172,  -2.0253,  -3.9421, -19.4177,  -3.7345,  -9.3398,
         -1.3984, -10.4254,   0.7609,  -6.4349,  -3.9353,  -5.3983],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5647, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2245, -1.8213, -2.3606, -2.1225, -1.1331, -5.3764,  1.6061, -1.8231,
        -3.1612, -2.7458, -2.7709, -7.2067, -4.5615, -2.1385, -1.8662, -2.1572,
        -1.8863, -1.3096, -6.0361,  0.8381], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4629, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.6537,  -5.0578, -15.2954,  -1.7732,  -4.1277,  -4.6236,  -4.1817,
         -1.4960,  -2.8945,  -5.7272,  -3.7841,  -1.0100,  -5.8600,  -2.9387,
         -0.5962,  -2.7891,  -3.4891,  -1.9025,  -1.8877,  -5.3065],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8197, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3580, -3.0926, -4.4275, -1.1553, -2.1696, -2.7596, -3.9205,  0.5323,
        -3.8285, -2.4946, -2.9641, -1.7117, -6.1449, -3.0206, -1.0378, -8.5682,
        -3.0151, -2.7101, -5.8513, -4.1996], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1949, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3198, -3.7733, -2.1489, -5.6607, -3.9191, -7.0243, -3.2133, -7.9591,
        -2.3336, -2.6955, -3.8403, -3.0155, -2.8122,  0.0354, -5.9691, -0.6294,
        -5.8843, -2.2270, -3.4558, -1.8229], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7834, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5518,  -2.7074,  -4.4722,  -3.3232,  -6.7152,  -2.3335,  -0.1165,
         -8.0359,  -1.5454,  -2.8014,  -1.0491,  -5.3916,   0.5295,  -0.9938,
         -3.1612, -22.4300,  -6.9209,  -7.0209,  -1.3913,  -4.4630],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3447, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0976, -3.6741, -1.0628, -5.7229, -2.1671, -4.9278, -1.8911, -6.5484,
        -4.5065, -1.5357, -4.4871, -2.7251, -1.8155, -3.1822, -2.6946,  0.3587,
        -2.7665, -1.3020, -2.9751, -2.3946], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0559, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8344, -1.8314, -1.9155, -2.7177, -3.3867,  0.8140, -1.0872, -1.7576,
        -4.6076, -1.7677, -5.5090,  0.6375, -4.2393, -1.8677, -2.7206, -1.7952,
        -5.7471, -4.7009, -1.2111, -9.1087], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8177, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4106, -2.3944, -3.9330, -0.1258, -5.7260,  0.3459, -5.9273, -2.7011,
        -9.7219, -5.3359, -7.2935, -6.2631, -2.0984, -5.8825, -3.1020, -3.0058,
        -3.2304, -1.8423, -5.1264, -6.0027], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1389, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8464, -3.5004, -2.5694, -0.5979, -5.2934, -1.8931, -2.3411, -2.5471,
        -1.4911, -2.6111, -2.7487, -3.0947,  0.0980, -3.2136, -1.3881, -2.3460,
        -1.3312, -6.4535, -4.1678, -1.2545], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6296, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6406, -8.0676, -5.7903, -7.3147, -7.6443, -2.5344, -5.2466,  0.0421,
        -5.1085, -3.4136, -4.0323, -1.9314, -5.1161, -2.9944, -0.1611, -3.1330,
        -3.0123, -1.4957, -1.6025, -4.4893], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7343, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.7263, -15.6448,  -4.3161,  -4.1643,  -2.3627,  -5.0470,  -3.7354,
        -10.0497,  -3.9984, -17.6583,  -2.1450,  -9.2981,  -3.9820,  -5.3417,
         -0.9630, -14.4639,  -4.3659,  -2.5085,  -1.0651,  -5.8308],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8107, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5630, -2.5408, -6.6031, -2.7045, -0.2336, -1.7775, -1.6372, -1.7754,
        -6.3595, -2.8357,  0.5712, -4.4255, -1.3865, -1.8991, -4.0827, -3.1653,
        -0.1501, -3.6755, -1.3858, -1.7217], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4676, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.0010, -10.3755,  -8.1242,  -8.7758,  -6.3906,  -1.9868,  -5.2364,
         -0.0323,  -3.1877,  -2.6006,  -2.8089,  -6.5379,  -4.1601,   0.5419,
         -5.8984,  -3.2765,  -4.2497, -10.9774, -10.3201,  -7.7418],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2570, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2570, -2.9096, -3.1683,  0.7968, -2.3553, -2.2450, -1.8378, -3.3324,
        -5.2435,  0.7177, -4.4469, -3.1786, -3.5806, -5.9145, -4.4936, -0.3349,
        -5.8114, -3.6336, -4.6879, -3.5404], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0228, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1217, -4.2792, -5.8739, -0.6536, -8.2374, -3.2828, -1.8917, -6.3543,
        -3.9137,  0.0227, -6.7088, -2.0455, -1.2769, -4.5631, -2.4505, -2.7164,
        -3.5354, -2.7917, -1.9584, -5.2403], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5936, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7999, -5.7471, -3.6984,  0.3364, -6.2374, -4.5180, -1.1533, -0.5860,
        -6.8478, -4.8615, -2.8919, -2.7978, -1.6584, -1.4974, -1.3726, -4.9483,
         0.3052, -6.5262, -2.9405, -3.6967], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2069, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0156, -1.5203, -3.6609,  0.1335, -3.7056, -6.0096, -4.1440, -3.9781,
        -4.2991, -4.8922, -1.9953, -3.6835, -2.9812, -0.9366, -2.4566, -3.1160,
        -4.9717,  0.8071, -4.1982, -2.8770], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1250, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9924, -2.5955, -2.8089, -1.7047, -3.9687, -3.2192,  0.9485, -1.7128,
        -1.4224, -2.4567, -8.8698, -4.1514, -0.9799, -4.8289, -2.6922, -7.1660,
        -3.6758, -3.8247,  0.0813, -3.5961], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0318, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.6242,  -2.3735,  -4.1304, -23.5097,  -5.3964,  -6.7152,  -1.6969,
         -2.8138,  -0.2061,  -4.2146,  -4.7217,  -5.2395,  -8.1466, -11.5086,
         -7.4684,  -1.6445,  -7.5419,   0.4547,  -8.4619,  -3.8094],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5884, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3969,  -0.9602,  -8.3368,  -1.7086,  -3.5734,  -5.9451,  -4.2855,
         -1.7464,  -1.5170,  -5.4392, -17.7803,  -5.3310,  -5.4259,  -7.5469,
         -7.1550,  -2.2553,  -6.2910,  -3.8322,  -6.5204,  -5.1756],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2111, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.8245,  -5.2976,  -2.0726,  -0.4939,  -7.6496,  -1.6784, -12.5695,
         -6.5968,  -1.6893,  -4.5415,  -4.1801,  -2.2634,  -6.4122,  -5.0028,
         -9.1387,  -4.9607,  -8.5839,  -7.4530,  -3.2076,  -7.3810],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1499, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8493, -3.0049, -3.0124, -5.6405, -4.3340, -5.2902, -5.8946, -3.9306,
        -2.6416, -5.4531, -4.2970, -0.2864, -1.8712, -2.3896, -1.8799, -2.3848,
        -4.2662,  1.5888, -4.7271, -2.2214], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2393, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3514, -1.6811, -4.0962, -1.5565, -5.5866, -2.1517, -1.8144, -2.4506,
        -3.1779,  1.5206, -7.1237, -4.2507, -2.0289, -1.6203, -6.9022, -1.6361,
        -0.1000, -3.8061, -1.9323, -0.9267], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6336, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6829, -2.8467, -2.8655, -3.3321, -5.1544,  0.7482, -5.0829, -1.7586,
        -3.6640, -2.5929, -5.5527, -3.4777, -1.0537, -1.8618, -2.8376, -1.2345,
        -5.2042, -3.3249, -1.5308, -4.4378], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9874, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5856, -2.3087, -2.4260, -2.1385, -4.2168,  0.1653, -5.0959, -3.0996,
        -2.6148, -3.2139, -4.9662, -2.6742, -0.0739, -5.9376, -1.7593, -1.5021,
        -4.6050, -2.4980, -1.1449, -2.9152], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8305, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7285, -3.8230, -1.8437, -6.0308, -2.1792, -1.5440, -3.4566, -1.2401,
        -3.2695, -0.5409, -4.2152, -1.4465, -3.8522, -3.8268, -3.0679, -1.7323,
        -6.8298, -2.4367, -2.7483, -1.9459], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9379, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2178, -3.1473, -1.2641, -5.8534, -2.2403, -0.9691, -3.1743, -3.0434,
        -2.8517, -5.2325, -3.0458,  0.5859, -4.7804, -3.2635, -2.7094, -2.0824,
        -4.0000,  0.3018, -3.9883, -1.9328], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7954, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4836, -5.5334, -3.6435, -0.1073, -2.7883, -2.3014, -1.5556, -6.0830,
        -3.2959,  1.1310, -5.1846, -1.3236, -2.0321, -2.9708, -6.4503, -1.9768,
        -0.7133, -2.4846, -1.9408, -2.0827], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6910, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9303,  -6.8421,  -2.0565,  -0.1914,  -4.0192,  -1.3443,  -1.6152,
         -1.6849,  -3.3487,   1.3922,  -3.0923,  -1.9662,  -3.1584,  -3.3930,
         -3.7214,   1.5160,  -4.5263,  -5.3088, -19.1859,  -4.6534],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5065, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-18.4300,  -3.9468,  -5.0374,  -2.3673,   0.4498,  -6.4943,  -3.2792,
         -3.8419,  -2.6007,  -5.4119,  -2.9150,   0.4905,  -4.1766,  -2.9038,
         -2.9923, -16.9535,  -3.3784,   0.0702,  -5.7517,  -2.4305],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5950, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4968, -1.2926, -6.4900, -3.0772, -0.1674, -4.8002, -2.6960, -3.2245,
        -2.2656, -6.8184, -3.8970, -0.4798, -4.8966, -1.9684, -3.8254,  0.0995,
        -7.5231, -1.5042, -0.9830, -4.5995], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1453, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.4194, -4.3607, -3.8640, -6.3583, -1.6696, -7.8958, -3.2017, -3.8802,
        -2.3058, -7.6251, -2.1815, -0.7013, -9.4808, -2.4767, -2.4950, -3.2811,
        -4.8541, -2.3344,  0.4979, -4.3615], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0624, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.2328,  -0.5977,  -5.1254,  -5.5183,  -2.7050,  -6.2086,  -8.1082,
         -5.2214,  -2.9033,  -6.2438,  -3.1924,  -2.6623,  -5.3775,  -4.4625,
         -4.9489, -11.1439,  -3.6776, -24.9124,  -9.5276, -10.7810],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3775, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2853, -6.9931,  0.1256, -3.5269, -3.8751, -2.7620, -2.9023, -4.7179,
        -0.7472, -7.4205, -2.6652, -1.9014, -2.8926, -4.5254, -2.7061,  0.7041,
        -2.9977, -3.1619, -2.0352, -3.2049], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9746, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.3956,  -3.8539,  -5.5947, -10.3195,  -7.8879, -11.1152,  -6.8612,
         -6.3498,  -5.0451,   0.2126,  -5.2613,  -4.2058,  -3.9180,  -4.3148,
         -4.2355,   0.1492,  -1.3948,  -3.4043, -10.9678,  -5.2241],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1994, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.5173,  -6.3293,  -5.3575,  -1.5053,  -3.9523,  -2.7197, -10.9657,
         -5.7844,  -4.6942,  -5.9101,  -2.0304,  -5.5058,  -3.6729,   0.0666,
         -2.3703,  -1.7961,  -2.6552,  -3.4113,  -4.2611,  -4.7180],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9545, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.6994, -3.4324, -6.8577, -3.2457, -5.1076, -2.3252, -7.2060, -2.2873,
        -2.0732, -2.6581, -2.9685,  1.2816, -2.1068, -1.7447, -2.6451, -1.6824,
        -5.9745, -1.6965, -1.1227, -3.6060], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3579, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6632, -8.0450, -5.9764, -2.1578, -5.2346, -0.5686, -9.8510, -2.7625,
        -2.6968, -1.1189, -5.7796, -2.3426, -0.5103, -2.6116, -1.9529, -2.2654,
        -4.0948, -4.2017, -0.0849, -3.1975], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5558, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.7977,  -2.5753,  -2.7134,  -2.7285,  -4.4297,  -3.5292,   1.0088,
         -1.6850,  -3.5036, -20.6028,  -1.6821,  -7.7471,  -2.7110,  -4.4978,
          0.2132,  -5.8772,  -3.5080,  -2.8811,  -6.6061,  -4.3482],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0601, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.6918,  -2.6529,  -3.4019,  -2.0268,  -4.4561,  -2.9969,  -1.9216,
        -15.7856,  -2.2308,  -2.5659,  -3.4912,  -5.3479,  -1.5553,  -9.4327,
         -1.4767,  -3.2903,  -2.9387,  -4.9025,  -3.0535,  -0.5138],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8866, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.7811, -4.2789, -1.3153, -2.9341, -3.4357, -2.9257,  0.3903, -3.9777,
        -0.8092, -3.1642, -2.7840, -6.1869, -1.3621, -2.1742, -6.5046, -2.9440,
        -2.5738, -3.6430, -6.8871, -4.7075], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1500, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3029, -1.8218, -3.6568, -1.4708, -4.4481,  0.0169, -1.8366, -2.3464,
        -4.5758, -6.6413, -3.2540, -2.0169, -5.4267, -3.8782, -1.8871, -1.4657,
        -4.7034, -0.2418, -3.8957, -2.1678], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9510, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3310, -6.2702, -3.2485, -5.0330, -3.7206, -4.1119, -3.5187, -2.0415,
        -4.7891, -4.2694,  0.2024, -3.6776, -2.0781, -7.1854, -2.3595, -7.7263,
        -4.7321, -1.4455, -4.4977, -3.1125], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9473, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6580, -0.6192, -3.6052, -2.0316, -2.7046, -1.7247, -7.0727, -2.6598,
        -2.6375, -4.7848, -1.9067, -2.6282, -1.0249, -6.9253, -1.1945, -0.6340,
        -4.3317, -1.4420, -2.1837, -2.5372], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8153, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.3667,  -2.8197,  -3.2925, -15.4117,  -1.4218,  -1.6472,  -3.3307,
        -10.0779,  -6.3125,  -6.6967,  -5.1870,  -7.8722,  -5.5765,  -7.1583,
         -2.3467, -11.6929,  -3.4333,  -2.8044,  -6.5374,  -3.8633],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5925, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1721, -6.8586, -6.1273, -4.8384, -7.5781, -1.8771, -4.8945, -3.9618,
        -5.7148, -4.5653, -6.2330, -4.9639, -2.5286, -6.1801, -4.2456, -2.3743,
        -5.2873, -7.7552, -0.4350, -8.4585], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1025, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5108, -0.2552, -4.5259, -1.9508, -2.2986, -4.5109, -3.7550,  0.1111,
        -2.8503, -3.2667, -1.0030, -3.6973, -3.9370, -0.7865, -4.3251, -1.7402,
        -4.0572, -3.5749, -5.1367, -3.4612], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9766, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2449, -3.7667, -2.8581, -2.1230, -3.8382, -3.4509,  1.0039, -6.0439,
        -1.4880, -3.4187, -2.4280, -2.3228,  1.1161, -3.1999, -1.6011, -3.5545,
        -0.9487, -6.4111, -2.3983, -0.9017], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4939, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.5903, -8.7247, -3.4870, -2.1385, -3.2378, -4.3944, -1.9853, -2.7284,
        -4.3444, -3.2335, -3.6872, -6.2194, -6.1221, -4.3608, -1.8282, -2.9165,
        -3.6394, -2.9188, -0.7420, -5.9411], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6030, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.7663,  -4.9105,  -2.7729,   1.4446,  -3.4975,  -1.5495,  -2.8359,
         -1.4362,  -6.8338,  -2.2312,  -0.5278,  -3.1976,  -1.1929,  -1.9736,
         -2.1105,  -3.6306,   0.5261,  -2.1177,  -2.2809, -14.1817],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8538, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.6525,  -3.1364,  -7.5784,  -2.0580, -31.6282,  -4.8312,  -7.3238,
         -2.5857,  -1.9725,   1.4386,  -3.6114,  -2.4851,  -1.4984,  -4.9046,
         -4.9289,  -1.3438,  -6.0721,  -3.7195,  -5.3267,  -9.2397],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3729, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4220, -1.7607, -3.2067, -3.2757, -5.4127, -4.5256, -6.1007, -5.4927,
        -2.7078, -2.4495, -2.0231, -2.0800, -1.1850, -6.9058, -1.6790, -1.2093,
        -2.1462, -2.0205, -1.9017, -2.6399], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1572, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1857, -4.1257,  0.5446, -5.6908, -2.6432, -3.8242, -2.7055, -7.1998,
        -3.4802, -1.4067, -2.2258, -2.4214, -1.7350, -4.7227, -3.2825,  1.2406,
        -4.5715, -1.7246, -2.4606, -2.4760], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0548, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4539, -3.6829, -6.1074, -3.6772, -1.6751, -4.2008, -3.5600, -2.5971,
        -5.3282, -3.5163,  0.9527, -2.5342, -1.8588, -3.0625, -4.8073, -3.0817,
         1.1339, -3.7784, -1.7130, -2.2835], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8416, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6187, -1.6161, -1.9828, -1.0750, -5.8077, -1.1955, -3.2526, -2.7935,
        -4.6806, -0.5151, -6.7319, -2.3922, -0.4884, -6.0623, -1.9919, -1.6816,
        -1.7834, -5.5942, -1.6828, -0.7498], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7848, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.8546,  -9.2403, -15.0664, -22.7601,  -3.9963, -10.3955,  -6.3974,
         -7.9806,  -3.5630,  -4.8915,  -2.9884,  -4.4548,  -4.1099,  -3.7736,
         -5.5278,  -4.3116,  -0.4345,  -3.3097,  -3.3575,  -1.3287],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1871, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6805, -2.5757, -6.4465, -2.0500, -0.0798, -5.3057, -3.3021, -4.4169,
        -3.6536, -5.3441, -1.8127, -0.4675, -3.1286, -2.1403, -5.6039, -3.9035,
        -6.5373, -3.5940, -2.4774, -2.5439], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3532, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4439,  -0.6565,  -1.9972,  -2.6287,  -1.3614,  -4.0002,  -3.7459,
          0.2097,  -2.5695,  -2.7985, -15.6169,  -4.2146,  -8.1004,  -0.7351,
         -6.1916,  -2.6406, -14.2190,  -2.8743,  -3.8639,  -9.0501],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4749, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4052, -2.9987, -2.3435, -4.6847, -4.4432,  0.4936, -3.4095, -3.0791,
        -3.5058, -1.3002, -5.2265, -3.5218,  0.4592, -2.5680, -2.1574, -2.7133,
        -1.1131, -4.2696, -7.7553, -3.7954], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0169, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.0326,  -0.9498,  -2.0764,  -2.6374,  -8.6135,  -5.4102,  -6.9457,
         -7.4233,  -9.0034,  -4.4669,  -1.6291,  -5.3066,  -3.1051,  -0.8751,
         -4.0759,  -3.0280,  -3.2213,  -6.9538,  -5.9258,  -2.2403],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7460, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.7047,  -9.5598,  -2.1022,  -3.3228,  -6.6596,  -3.9001,  -0.2669,
         -8.3739,  -6.7773, -17.9465,  -7.0558,  -8.0823,  -6.9022,  -5.7778,
         -2.1851,  -5.5879,  -2.1706,  -8.5507,  -3.9425,  -2.2900],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1579, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0826, -1.1889, -6.1913, -1.3651, -1.5211, -4.3455, -1.8833, -1.9398,
        -4.0965, -3.2589, -0.1707, -3.0313, -3.0870, -2.7627, -2.2792, -4.8949,
         0.3482, -3.9564, -1.7213, -3.7679], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7098, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1333, -4.8631, -3.5165,  1.0846, -1.9971, -1.7667, -3.8951, -1.0346,
        -6.6640, -1.8983, -0.6695, -4.6251, -3.7664, -2.3788, -0.9144, -5.8426,
        -2.0813, -0.5305, -2.8284, -2.4849], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6903, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.1331,  -2.7184,  -0.0543,  -2.7173,  -2.8622,  -2.5457,  -5.5946,
         -3.6016,  -1.4276,  -3.9560,  -4.2403,  -7.7161,  -8.2459,  -5.5058,
         -6.3009,  -5.1009,  -5.1088,  -0.9179, -30.8274,  -4.1978],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4386, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1492e+00, -1.5379e+00, -5.3176e+00, -2.3371e+00, -1.6268e+00,
        -8.4961e-01, -5.7930e+00, -5.1440e-01, -6.2061e+00, -2.7748e+00,
        -2.0412e+01, -4.2457e+00, -7.9258e+00, -2.1373e+00, -3.4922e+00,
        -7.5899e-03, -5.3762e+00, -2.5100e+00, -2.3368e+00, -3.2933e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1922, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1798, -19.4376,  -6.2435,  -8.7820,  -6.8669,  -2.7576,  -4.3291,
          0.1487, -21.0442,  -1.3787,  -4.0557,  -3.9834,  -5.2644,  -2.4819,
        -16.6046,  -6.5664,  -8.2258,  -5.2762,  -7.7960,  -5.1270],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.9126, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8562, -5.6498, -5.5597, -1.6651, -3.1757, -3.2467, -3.5325, -4.0521,
        -6.6995, -2.3950, -3.8057, -2.8094, -3.4185, -3.0942, -5.3913, -0.7842,
        -2.2928, -5.0610, -5.6405, -3.1581], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7644, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1787, -3.3143, -1.4297, -3.2950, -3.2051,  1.0441, -3.1709, -4.2776,
        -3.7563, -4.0799, -7.4816, -4.9271, -2.8901, -7.1022, -4.2896, -1.9471,
        -2.1619, -6.2797, -4.4688, -1.6229], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6417, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2393, -1.2784, -7.8815, -5.5524, -0.9695, -4.3930, -2.2694, -3.9216,
        -0.7297, -4.4620,  0.6866, -3.9705, -2.7454, -1.9437, -2.3207, -5.3489,
        -2.4468, -1.0274, -3.5207, -3.4918], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1913, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1435, -1.5217, -2.9251, -3.9476, -4.3743, -0.1965, -2.5832, -1.7583,
        -3.4144, -4.2828, -3.3597,  1.0742, -2.1446, -3.1495, -3.4184, -7.0464,
        -4.3269, -3.1513, -5.0447, -1.8389], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0277, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.9411,  -5.4644,  -2.5031,   0.2555,  -7.4636,  -2.2542,  -4.3979,
         -1.0034,  -6.3633,  -1.8783,  -1.5136,  -9.8908,  -2.1138, -11.3594,
        -10.9732,  -3.9281,  -4.0516,  -2.8970, -13.9726,  -7.4652],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0090, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.0483,  -7.6213,  -2.1760, -12.4897,  -1.8325,  -3.6248,  -5.8517,
         -2.0314,  -3.4746,  -1.5364,  -5.9191,  -2.4988,  -0.6022,  -2.6855,
         -3.3621,  -2.2173,  -4.7518,  -4.6525,   0.8259,  -6.2093],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9380, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0073, -2.3936, -3.2093, -2.6803, -6.1308, -3.4933, -0.5834, -2.5910,
        -1.9489, -2.3359, -2.8470, -4.1290,  1.1532, -3.3558, -1.6455, -2.8832,
        -6.0744, -2.5868,  0.9859, -2.6091], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6183, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.6991, -3.4469, -4.6357, -3.3743, -2.4685, -5.3450, -2.8719, -0.3441,
        -5.3479, -1.3621, -2.8030, -5.6681, -2.4655,  0.8269, -3.5653, -2.0371,
        -2.7532, -4.1856, -6.3514, -1.9454], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9723, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.3275,  -6.0143,  -2.0131,  -4.5616,  -2.5223,  -4.9682,  -2.2610,
         -3.0216,  -6.3411,  -3.2719,  -1.4391,  -3.1833,  -3.8793, -16.8622,
         -7.9436,  -9.1853,  -8.6918,  -7.1842,  -2.8253,  -4.7115],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4604, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.6850,  -4.9750,  -0.4083, -11.6272,  -4.5189,  -2.4171,  -5.8712,
         -4.3417,  -1.7648,  -3.0717,  -3.2719,  -7.7343,  -5.9222,  -6.7277,
         -7.3593,  -2.8392,  -5.8972,  -1.8100,  -3.1894,  -2.3783],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3905, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5422, -2.9268, -1.8638, -4.2790, -3.5708, -2.8029, -4.1690, -2.3510,
        -3.7513, -4.6016, -4.7519, -3.0090, -2.6639, -6.2640, -3.3120, -4.7511,
        -3.7490, -3.6537, -6.9405, -3.6442], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9299, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.5470,  -3.6759,  -5.0818,  -2.5835,  -5.1560, -18.5331,  -4.9080,
         -4.0864,  -6.3573,  -3.5599,  -2.9884,   0.9979,  -6.4046,  -5.9415,
        -21.9823,  -6.4584,  -9.5537,  -6.8003,  -5.6979,  -4.7012],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5010, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.8448,  -3.2171, -17.7128,  -4.7141,  -8.3376,  -2.8653,  -4.9381,
          0.6365, -21.1782,  -2.9820,  -2.9677, -11.2362,  -5.3581, -23.5308,
         -3.4944, -27.2958,  -7.6500,  -5.8452,  -6.1502,  -7.4954],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.5589, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9174, -2.8801, -3.5876, -9.4979, -2.5504,  1.8323, -3.3563, -2.0183,
        -2.7441, -3.0522, -7.1088, -4.2812, -2.0022, -3.5404, -3.0614, -1.0521,
        -4.1680, -3.3890,  1.0501, -1.5962], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1961, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.5913,  -1.9079, -19.9717,  -2.2315, -10.7813,  -4.4980,  -5.4085,
         -2.2320,  -6.2040,  -2.8172,  -3.2640,  -1.6456,  -5.2890,  -2.3685,
          0.5915,  -3.7807,  -4.3268,  -5.0109,  -1.0753,  -5.1557],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6984, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.8500,  -0.3998, -10.8792,  -6.1610,  -5.4279,  -8.9452, -16.8230,
         -2.8325,  -7.2231, -12.5850, -13.7700, -11.0559,  -5.7958,  -8.8164,
        -10.4292,  -5.5721,  -5.7337,  -7.8031,  -6.6892,  -3.7565],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.0774, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8851, -3.4928,  0.3724, -3.5721, -3.0304, -3.3111, -2.9338, -6.1543,
        -3.3386, -0.4333, -1.9933, -2.0110, -3.3534, -3.2901, -2.6532,  0.4129,
        -2.1274, -3.6053, -2.5889, -5.5483], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8769, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7428,  0.3848, -7.5441, -2.7478, -2.9761, -4.2900, -3.3506,  1.3386,
        -3.7220, -2.3482, -2.3108, -1.9639, -6.5787, -1.6370, -0.5343, -2.4042,
        -1.4819, -1.1815, -3.5269, -2.9549], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8286, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.2549,  -7.3410,  -7.1849,  -3.9065,  -6.5908,  -4.3174,  -5.0226,
          0.8179, -16.1572,  -3.2844,  -2.3281,  -4.5725,  -4.1923,  -4.5985,
         -4.0833,  -6.7367, -25.8156,  -4.4102,  -7.6323,  -7.5586],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5085, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.0207,  -2.3804,  -5.5294,   0.5779,  -1.7700,  -5.5777, -18.0992,
        -20.0745,  -7.5864,  -9.3100,  -5.8922,  -8.3004,  -2.1241,  -5.4114,
        -10.5183, -10.6912,  -3.4963,  -2.6578,  -2.0055,  -5.1220],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3995, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.1777,  -5.4338,  -7.2637,  -2.1812,  -9.3003,   0.9498,  -9.0884,
         -2.6340,  -2.4640,  -3.5397,  -5.6572,  -2.4602,  -1.7196,  -2.6930,
         -2.3152,  -2.5051,  -5.5760,  -3.2714,  -2.0658,  -3.1950],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1296, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.7431, -2.8362, -2.4596, -3.5035, -5.0398, -4.8916, -1.8652, -4.2578,
        -3.1696, -2.0846, -6.2977, -3.6966,  0.7420, -4.7649, -1.7157, -2.3900,
        -4.4101, -2.9376,  1.5824, -2.8029], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8028, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4546, -2.6486, -4.0141, -2.6328, -0.3799, -3.4746, -1.7558, -3.3009,
        -1.1223, -6.3251, -1.8273, -1.2048, -3.8638, -0.9953, -1.7598, -2.6533,
        -2.6316,  0.7049, -3.5409, -5.0148], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5448, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6161, -2.6353, -2.2749, -2.9184,  1.6674, -2.6869, -4.7083, -2.4496,
        -4.1692, -4.9431, -4.4850, -5.7673, -5.5705, -7.4136, -3.1356, -2.1930,
        -4.8835, -2.6380, -0.1779, -6.1392], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5069, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4983, -4.1608, -0.9849, -5.1097, -2.6650, -3.2685, -4.5195, -4.6809,
        -1.5670, -5.8664, -3.6627, -1.7270, -1.4460, -7.9632, -5.2159, -1.7818,
        -6.5184, -1.7687, -1.9790, -1.8526], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4118, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5095, -16.0637,  -3.4954, -10.7043,  -1.4245,  -6.5267,  -0.9364,
         -8.8738,  -4.8528,  -8.7348, -10.7915,  -4.5766,  -7.5801,  -3.0821,
         -4.8510,  -6.3292, -10.2542,  -3.7514,  -2.0038,  -4.1212],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0732, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7494, -4.0161, -2.7248,  0.5966, -3.6816, -0.7654, -2.5878, -4.8478,
        -2.6319,  0.8906, -2.0623, -1.1843, -1.8289, -3.3772, -2.6613,  1.5509,
        -2.7395, -1.8043, -1.3621, -5.5814], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1784, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.3052, -3.9994, -7.1680, -1.5532, -5.2836, -0.9625, -2.2919, -3.2135,
        -3.8403, -4.5185, -4.8726, -0.5719, -3.7811, -1.9644, -3.4295, -4.8131,
        -6.0886, -0.4581, -4.1002, -3.0735], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7145, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7282, -1.6286, -5.5732, -2.2338, -1.2441, -4.8386, -1.8377, -2.6984,
        -1.7611, -3.9630,  0.5889, -2.7729, -4.6406, -9.6282, -7.0396, -4.7031,
        -6.4047, -2.3554, -5.8553, -3.8151], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7566, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0089, -3.4060,  0.0240, -6.6498, -2.2115, -0.6652, -9.2508, -1.9555,
        -3.5078, -3.3610, -7.1273, -1.3799, -3.1963, -1.9987, -2.5248, -5.4769,
        -3.7103,  0.3961, -2.5451, -2.2049], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1380, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.4254, -1.4769, -2.5399, -1.8120, -2.1602, -6.9350, -1.5056, -2.2589,
        -2.5825, -3.3173, -0.8840, -2.6730, -3.4551,  1.1347, -5.2443, -3.2636,
        -3.1510, -3.1487, -3.8647,  0.2784], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4217, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0363, -2.1538, -5.7158, -0.0350, -3.4731, -2.5431, -3.6732, -1.3318,
        -4.6880, -4.4738, -0.7850, -1.5063, -3.0168, -5.2530, -2.5897, -6.1140,
        -2.7895,  0.7802, -3.6144, -1.9114], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0962, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0145, -5.9392, -3.3368, -1.3810, -3.7977, -6.1773, -1.7314, -2.1179,
        -4.5663, -1.0746, -4.8860, -3.3298, -5.8892, -4.6479, -0.3872, -3.3720,
        -3.2918, -2.2774, -2.2100, -3.0236], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2226, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1139,  1.0992, -8.4581, -2.9376, -2.5515, -1.8462, -5.0253, -1.5862,
         0.1414, -4.9905, -0.9891, -2.4553, -1.9894, -5.6599, -1.2230, -4.3443,
        -1.9833, -2.5317, -2.3486, -6.6183], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9706, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4590, -2.8598, -2.3729, -3.5531, -3.8109, -1.6640, -4.0805, -3.8301,
        -2.3423, -4.1273, -4.3034,  1.0407, -4.0209, -1.7970, -2.3222, -1.5559,
        -5.3227, -3.1004,  0.2316, -4.5941], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8922, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9531, -3.1684, -0.2618, -2.7427, -1.9860, -1.5720, -1.3246, -3.7114,
         0.1678, -4.1018, -1.8156, -2.1344, -2.9117, -5.6463, -3.1870, -0.1230,
        -4.7625, -1.4184, -3.9182, -4.2800], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6425, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.0297,   1.1724,  -5.3585,  -2.4986,  -2.0643,  -3.9197,  -4.3948,
         -3.0926,  -6.3217,  -5.1322, -38.1845,  -5.4379,  -8.0246,  -7.2899,
         -7.7860,  -2.0555,  -5.5208,  -2.2271,   0.6398,  -5.7526],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9139, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9002, -3.6805,  0.9474, -6.4411, -2.6422, -3.8443, -1.2519, -4.3421,
        -1.3141, -2.7747, -2.7264, -2.5978, -5.6362, -1.7519,  0.0533, -3.0433,
        -1.3301, -3.3045, -1.7673, -5.7389], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9543, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.9932,  -5.9000,  -1.5507,  -2.3797,  -1.9588,  -2.4170,   1.6717,
         -6.3925,  -1.8825,  -1.5447,  -5.9254,  -4.2784,   0.5948,  -1.8690,
         -3.8614, -13.2621,  -6.4131,  -5.8501,  -6.9314,  -5.2298],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8687, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.8010,  -6.2630,  -3.6707,  -0.5789,  -2.4622,  -3.4320, -25.6589,
         -4.6125,  -8.9261,  -5.4227,  -7.6195,  -1.8572,  -5.3617,   0.0758,
         -5.4331,  -3.1779,  -1.8145,  -1.9971,  -5.5088,   0.0673],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8227, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.6132, -6.8777, -3.4297, -0.5402, -3.2260, -2.1018, -2.2897, -4.9411,
        -3.8740, -0.7175, -2.0086, -1.0582, -2.3571, -5.0166, -2.6536,  1.1890,
        -2.4234, -2.4457, -1.9300, -6.6937], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7004, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.5419,   1.3672,  -3.1136,  -1.6850,  -3.0765,  -3.7650,  -4.3804,
         -0.3767, -11.9544,  -1.8562,  -2.2852,  -4.3231,  -4.1584,  -1.0316,
         -1.6067,  -3.3684,  -2.4410,  -3.3998,  -3.7344,  -3.5319],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1132, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 4.5678e-01, -1.8342e+00, -2.9468e+00, -1.5224e+00, -4.3107e+00,
        -3.9226e+00, -2.2612e-03, -2.5167e+00, -3.4674e+00, -1.0339e+01,
        -7.1540e+00, -3.5899e+00, -7.7020e+00, -1.9942e+00, -4.8935e+00,
        -6.5027e-01, -6.2982e+00, -3.3710e+00, -2.1770e+00, -9.5759e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8906, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0631, -3.1974, -4.2232, -4.3232,  0.5354, -5.0317, -1.5783, -3.8069,
        -0.3044, -5.8565, -3.4878,  0.0545, -2.7605, -2.5940, -2.6784, -4.8697,
        -2.8720,  1.3435, -2.6926, -2.1895], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6798, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9032,  0.6796, -3.5955, -3.7661, -3.7400, -2.0400, -5.2914, -3.2842,
         0.6032, -4.0078, -1.6934, -2.0823, -2.9062, -4.3583,  1.2834, -3.7904,
        -1.9918, -3.8952, -1.4038, -6.2811], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7732, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.6904,  -4.5361,  -4.1609,  -5.2766,  -6.2703,  -3.3909,  -2.3928,
         -4.5336,  -2.4652,  -4.8770,  -5.5989,  -6.0451,  -0.8965,  -3.7529,
         -4.3304,  -4.8685, -11.6932,  -4.0156,  -0.8643,  -3.2443],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4952, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2205,  -2.1132,  -4.9181,  -2.4823,   0.1261,  -1.1697,  -2.1572,
         -2.5533,  -5.1858,  -3.4390,   1.2967,  -2.0440,  -2.3105,  -1.4872,
         -6.1202,  -3.5771,  -6.5073,  -8.4069,  -3.9880, -17.8832],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9070, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9954,  -9.7611,  -3.9028,  -3.8331,  -5.5065,   0.5322,  -3.9295,
         -2.3957,  -2.3337,  -1.6253,  -4.8009,  -2.1992,  -9.5726,  -3.5993,
         -0.7142,  -6.0903,  -3.6702,  -4.7007, -13.5315,  -7.7214],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6176, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.7287,  -7.7673,  -1.9200,  -1.0343,  -2.0495,  -3.0075,  -3.6010,
         -1.3096,  -5.3049,  -3.4445,  -0.2415, -17.9706,  -1.7335,  -5.6825,
         -8.8698,  -4.4479, -17.7492,  -9.3810, -35.2127,  -7.8754],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.0166, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8163, -2.8664,  0.1424, -2.6693, -1.7515, -3.3020, -1.4507, -6.7049,
        -2.0737, -0.1172, -3.8059, -2.3315, -2.2384, -3.2366, -3.1298,  1.4320,
        -4.2626, -2.3945, -3.0933, -1.0401], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4355, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.3752,  -2.8381,  -2.6978, -12.2012,  -5.8393,  -4.7898,  -6.3664,
         -2.7969,  -4.3409,   0.5135, -13.4746,  -2.7905,  -0.8933,  -3.6870,
         -4.2099,   1.0188,  -6.2140,  -2.5115, -25.3268,  -4.0877],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2454, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5476, -6.2024,  0.3942, -2.1731, -1.6598, -2.4128, -4.5810, -2.9270,
         0.1235, -2.1372, -2.5966, -2.3048, -3.6906, -3.3356,  0.9646, -3.8012,
        -2.0974, -3.8059, -0.3877, -7.0323], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6105, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1815, -2.1421, -5.6836, -2.6542, -2.7766, -1.6187, -3.0616, -2.2345,
        -1.9953, -3.7883,  0.8996, -1.7340, -1.6522, -2.1193, -2.8462, -3.8376,
         1.7234, -6.1675, -2.0196, -2.0281], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4459, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9809, -2.6011, -6.2519,  0.9800, -7.0916, -2.9761, -3.1354, -1.0270,
        -5.3392, -2.7100,  1.0315, -3.8259, -2.3917, -3.8843, -4.7306, -2.2791,
        -0.3121, -3.5965, -2.4868, -3.0221], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0815, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1279,  -4.1042,  -5.6880,   0.0639,  -3.2802,  -3.1428,  -2.5770,
         -1.8463,  -5.2294,   0.3377,  -2.8963,  -2.4924, -17.0057,  -6.1663,
         -6.6245,  -5.8677,  -1.2467,  -6.5602,  -1.9221,  -6.5023],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2439, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0814, -3.7058, -3.6199, -6.3723, -4.5112, -1.8839, -3.4892, -2.8564,
        -2.5629, -4.3579, -3.4306, -0.8088, -3.2232, -2.4266, -2.5943, -5.0559,
        -4.6266,  0.2840, -2.5479, -3.4271], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2149, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.1960,  -2.1875,  -1.3194,  -2.9797,  -3.5284, -15.1450, -16.6601,
         -7.6044,  -2.6664,  -2.5174,   1.3001,  -3.8901,  -2.0810,  -1.5981,
         -4.5018,  -4.1770,  -3.0132,  -1.0198,  -2.9347,  -2.9988],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3859, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.3564,  -4.1752,  -3.7744, -26.2711,  -6.0755, -18.2272, -11.8944,
         -3.8568,  -7.1578,  -2.8585,  -0.0401,  -8.8627,  -2.3761,  -3.5697,
         -4.2392,  -3.8189,   0.3425,  -4.2748,  -2.6326,  -3.2373],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3178, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.1334,  -4.7158,  -1.5361,  -2.4204,  -0.5336,  -4.6260,  -0.0999,
         -2.6979,  -4.7632,  -3.4179,  -4.9324,  -4.4852,   0.0966,  -9.1651,
         -4.1114,  -3.2489,  -6.2733,  -5.1166, -24.4818, -24.9908],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5193, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9912, -8.0708, -3.2916, -7.0950, -5.7057, -3.8721, -2.9438, -3.3390,
        -3.2792, -4.6172, -4.8328, -4.4763, -0.7725, -5.3118, -2.8389, -7.5427,
        -8.3668, -5.4598, -7.6308, -5.4141], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0426, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3033, -0.3749, -4.0682, -1.9045, -2.2574, -1.3906, -5.9758, -3.2084,
        -1.8733, -4.2315, -1.8839, -1.6043, -3.1604, -3.1758, -0.1276, -3.0235,
        -1.6616, -2.9523, -0.3857, -6.0371], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5800, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.1593,  -2.9732,  -2.3265,  -5.8046,  -3.0662,  -1.5826,  -1.9872,
         -2.6048,  -3.3439,  -4.9100,  -3.7936,   0.2355,  -3.1977,  -1.8910,
        -15.2221,  -5.5827,  -8.0872,  -7.4218,  -6.7117,  -2.4018],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1916, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-12.1813,  -6.0729,  -9.6082,  -4.9908,  -6.5792,  -0.8175,  -7.7996,
         -0.1624, -17.1177,  -3.1685,  -2.3830, -12.2483,  -4.9777,  -3.3371,
         -5.8157,  -0.8399,  -3.6657,  -5.2286,  -3.8210,  -0.8960],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5856, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7854, -3.0996, -4.3215, -2.5276,  0.1151, -2.9544, -0.9814, -2.2926,
        -1.3142, -3.2008,  1.2541, -3.2961, -3.3004, -3.9509, -0.6486, -5.7699,
        -2.4784,  0.7377, -4.9169, -2.9536], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4343, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.1146,  -2.2337,  -3.4136,  -3.2571,  -5.4379,  -2.5372,   0.6009,
        -12.3027,  -2.9991,  -2.8115,  -6.1191,  -3.7560,  -0.3175,  -5.2685,
         -3.9767,  -9.4586,  -7.3802,  -2.2343,  -5.8722,  -2.3867],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2638, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.3647,  -3.6669,  -1.4431,  -2.2140,  -3.3533,  -6.3287,  -1.7733,
         -0.0469,  -2.7436,  -1.9521,  -3.0158,  -5.6506,  -4.0342,  -3.0898,
         -3.0808,  -3.8337, -11.2459,  -5.1464,  -7.5401,  -5.6274],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7711, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4705,  0.1592, -3.6745, -3.6778, -4.1080, -5.1510, -6.5807, -1.2307,
        -2.5584, -2.1177, -3.0230, -3.9591, -4.5886, -0.1922, -3.3589, -1.3488,
        -2.9801, -0.4567, -5.9690, -2.3873], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9837, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.4653,  -2.1185,  -2.1878,  -0.4980,  -5.6868,  -1.6413,  -0.4207,
         -6.5812,  -3.0589,  -2.5879,  -3.3797,  -4.3332,  -2.1230,   0.4025,
         -8.2246,  -3.9480,  -3.3233,  -4.5590,  -4.1453,   0.0153],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4932, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.9656,   0.6361, -22.8644,  -2.7486,  -3.7079,  -2.4669,  -5.8463,
         -4.5017,  -0.9982,  -0.5464,  -3.4628,  -0.7386,  -5.8933,  -3.5199,
         -4.3186,  -5.5861,  -4.3016, -18.1955,  -5.9623,  -7.3485],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3168, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.7659,  -4.3427,  -3.7322,  -2.7880,  -6.1475,  -4.1660,  -0.0516,
         -2.5601,  -2.2265,  -9.6138,  -5.6202,  -6.3801,  -8.0276,  -8.2368,
         -5.5920, -10.4283,  -0.9714,  -0.4568,  -3.3438,  -0.9992],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4725, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7103, -1.8822, -2.9601, -2.4214, -4.3909, -3.4429,  0.8376, -2.0333,
        -2.0695, -1.3299, -2.3057, -6.6039, -2.2174, -0.7767, -2.6458, -1.8245,
        -2.4381, -5.7611, -2.6348,  1.1064], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4252, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2774e+00, -2.1757e+00, -1.7224e+00, -4.4430e+00, -5.0142e+00,
        -3.3851e-03, -2.9434e+00, -4.0400e+00, -3.3106e+00, -1.8501e+00,
        -6.1692e+00, -3.5987e+00, -9.5109e-01, -3.2916e+00, -2.8050e+00,
        -2.5919e+00, -3.2742e+00, -4.0075e+00,  6.3877e-01, -6.6950e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1763, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5997, -0.1546, -3.8879, -2.2312, -3.9861, -2.7283, -5.6261, -0.8809,
        -4.6646, -3.2178, -3.1473, -1.2641, -5.8534, -2.2403, -0.9691, -3.1743,
        -3.0434, -2.8517, -5.2325, -3.0458], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2399, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.3793, -2.8344, -1.4258, -3.6006, -1.9595, -4.0346, -1.1959, -2.2913,
        -2.2748, -4.8004, -4.2164, -5.8525, -3.1307, -7.1598, -4.4152, -1.6828,
        -4.9134, -4.1378,  0.6281, -4.6630], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1291, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8164, -1.5426, -1.8390, -2.1670, -3.1555, -2.5711, -0.3247, -4.3691,
        -1.6809, -3.2041, -0.9198, -6.9790, -2.3509, -0.3397, -4.5653, -1.7015,
        -2.4864, -1.8787, -4.9069,  0.8247], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3487, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8279, -2.4218, -1.3916, -2.1041, -2.4774, -3.2225,  0.8783, -4.5450,
        -1.5642, -2.8448, -1.7271, -5.4195, -2.0874, -1.1298, -4.2025, -3.2489,
        -2.6003, -5.4264, -4.0485, -0.7347], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5573, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-39.4948,  -4.7387,  -4.6947,  -4.7313,  -5.2360,  -3.9155,  -9.8594,
         -3.5084, -13.0525,  -1.2805,  -2.9479,  -7.5193,  -2.3878,  -3.2448,
         -5.3224,  -3.6760,   1.0809, -11.0144,  -3.1677,  -1.0994],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4905, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.9315,  -1.3920,  -1.3457,  -4.4369,   1.0855,  -4.4730,  -2.1967,
         -2.8330,  -6.4795,  -4.8920,   0.4072, -12.2546,  -4.7657,  -1.7535,
         -2.5446,  -3.1622,  -0.3671,  -3.8421,  -3.3572, -12.2458],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6390, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5850, -2.7817, -2.5439, -3.7581, -4.0602,  0.9054, -2.6952, -2.9484,
        -2.3676, -2.9111, -6.9127, -5.2337, -2.8359, -8.3504, -3.1577, -1.1491,
        -4.4547, -2.3903,  0.8153, -5.0775], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3246, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.4810,  -5.7287, -13.5281, -16.3542,  -3.5185,  -6.0545,  -6.9360,
         -7.7334,  -2.8131, -10.4878,  -0.1885,  -2.6440,  -5.0576,  -2.2320,
         -3.9515,  -9.9553,  -3.6755, -19.1609,  -4.3769,  -3.4130],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5645, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8454, -2.4675, -3.9431, -6.5280, -3.5992, -0.1253, -4.5125, -1.9026,
        -2.0401, -0.8277, -3.7021,  0.1181, -4.0412, -4.7353, -2.9255, -1.2410,
        -5.4900, -1.8387, -0.9043, -2.8834], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9217, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.5704, -2.9602, -7.0867, -2.0137, -2.2719, -3.5472, -4.0386,  1.9546,
        -3.9278, -1.5909, -3.4379, -1.2573, -4.6881, -2.0185, -0.8772, -2.8454,
        -1.9017, -3.0966, -4.7079, -3.1238], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7004, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7440, -2.6842, -1.8309, -5.6989, -4.4826, -0.9898, -9.1605, -2.8080,
        -3.0468, -1.3146, -4.5532,  0.2712, -1.8739, -2.1591, -2.5479, -3.2683,
        -6.1726, -4.8250, -0.4905, -5.9017], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2641, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8258, -7.5164, -5.6584, -7.3074, -5.0230, -5.6251, -3.4951, -4.5233,
        -3.3580, -5.0152, -8.9605, -4.8753, -2.6419, -3.7505, -2.3182, -3.5075,
        -4.8275, -4.3908, -0.5452, -2.8461], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5506, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.7476,   0.6940,  -8.0044,  -2.5160,  -2.7119,  -3.7995,  -5.1423,
         -4.3428,  -0.4854,  -2.9025,  -1.9817,  -2.5889,  -2.1325,  -6.0564,
         -1.7495, -21.8270,  -2.8118,  -3.9792,  -4.9034,  -3.2830],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4136, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.6057,  -7.1801,  -6.6119,  -4.4758,  -6.8122,  -4.5367,  -4.8499,
         -5.4616,  -8.6325,  -2.4476,  -4.6732,  -4.3601,  -4.8466, -14.1630,
         -4.6522,  -5.5099,  -5.8058,  -5.5399,  -6.5901,  -3.9883],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7871, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4997, -5.5966, -5.5315, -0.6792, -7.2083, -2.9989, -0.7582, -3.2127,
        -2.2266, -2.5014, -2.5306, -2.8486,  1.0762, -6.8898, -2.1250, -3.8407,
        -5.0899, -2.5830,  1.7302, -3.7179], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2016, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7056, -2.3682,  0.5642, -2.5718, -2.6411, -1.5014, -2.0979, -6.7478,
        -2.0633, -0.6982, -4.4744, -1.8532, -2.7566, -3.5559, -2.5689, -2.0767,
        -5.9431, -2.1542, -2.5887, -1.2699], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7036, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9355, -6.8824, -2.3720, -9.6595, -2.5459, -6.5881, -8.6325, -6.3188,
        -6.9247, -2.1237, -5.3581, -0.0985, -3.9454, -4.4274, -3.7428, -8.0918,
        -4.0851, -3.4342, -3.0286, -1.2255], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6210, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3900, -0.7262, -7.5967, -3.1581, -3.3268, -4.8423, -3.6168,  0.8986,
        -3.8214, -2.4136, -1.2731, -2.3625, -4.4249, -1.6076, -1.8604, -3.1519,
        -2.8649, -3.6308, -4.6306,  1.1235], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8838, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1091e+00, -3.7497e+00, -6.3545e+00, -3.8896e+00,  2.1632e-02,
        -3.6784e+00, -4.1554e+00, -2.3343e+01, -6.1703e+00, -8.8772e+00,
        -7.1347e+00, -1.4298e+00, -5.5343e+00, -4.8945e+00,  8.8230e-02,
        -1.6069e+00, -2.3280e+00, -1.2998e+00, -2.7117e+00, -3.3498e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6253, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.9764,  -2.1349,  -3.6568,  -3.3188,  -3.6868,  -4.7410,  -5.2808,
         -3.9484,  -2.6437,  -4.3107,  -3.0282,  -5.2834,  -2.8852,  -0.2739,
        -15.3874,  -7.4138, -29.6511,  -9.2045,  -7.5476,  -9.5033],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2438, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5938, -2.2258, -2.0540, -4.8647, -5.4850, -3.5607, -3.0444, -2.8106,
        -1.7223, -4.1040, -0.3282, -3.0541, -3.7229, -2.7487, -2.9052, -4.5556,
        -4.7659, -0.4999, -1.3587, -1.9339], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8669, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3597, -2.0557, -1.7864, -3.0339, -3.5203,  1.0885, -5.4479, -2.7204,
        -1.7595, -2.4048, -3.4846,  0.8857, -5.0772, -2.4731, -3.0401, -1.4477,
        -5.5492, -1.7483, -0.7980, -2.6124], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4673, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1297,  0.3035, -3.4732, -3.6251, -4.1007, -5.1473, -6.4307, -1.1167,
        -2.3491, -2.0311, -2.9152, -3.9013, -4.3519, -0.0909, -3.2062, -1.2436,
        -2.9983, -0.5209, -5.9327, -2.0952], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8678, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.4451,  -2.0537,  -2.1020,  -0.4679,  -5.6711,  -1.3565,  -0.2414,
         -6.7233,  -2.9968,  -2.5922,  -3.4504,  -4.2966,  -1.8388,   0.4463,
         -7.6591,  -3.8854,  -3.3167,  -4.5162,  -4.0760,   0.1322],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4056, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6464, -1.7767, -2.0987, -2.4716, -0.9552, -7.6575, -1.4658, -0.2739,
        -2.9511, -2.6325, -2.3658, -1.1334, -6.2010, -2.5076, -0.0119, -2.3487,
        -2.2158, -4.2731, -0.0915, -7.1929], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6135, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4109, -4.8116, -3.6268,  0.3069, -2.7002, -3.7662, -2.4529, -5.2436,
        -4.1488,  1.1268, -6.6300, -1.9038, -3.8454, -4.1179, -2.9052, -1.1858,
        -3.6461, -3.4411, -3.2328, -3.3089], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1472, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8450, -2.7644, -5.2059, -3.7932,  0.4741, -5.8070, -4.3202, -2.1999,
        -1.2558, -7.9149, -4.5977, -1.8627, -0.7757, -3.3147, -1.3892, -2.1889,
        -3.1150,  1.5712, -4.5054, -2.4524], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9131, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3666, -2.2886, -4.5370, -0.3831, -6.0506, -2.4130, -0.6500, -1.5111,
        -1.9781, -1.7678, -0.5425, -6.2021, -2.0365, -1.2241, -2.6974, -1.1677,
        -3.3403, -3.1114, -3.3126,  0.6024], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3989, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6287, -5.6042, -1.4869,  0.4397, -1.4200, -2.3961, -0.6274, -2.2660,
        -3.1826,  0.6490, -2.6953, -1.4570, -2.9512, -0.5032, -5.5734, -1.8320,
        -0.4447, -3.9993, -1.6194, -2.9703], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1284, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3782,  -4.0856,  -3.8139,  -6.0892,  -5.0902,  -0.2605,  -3.4384,
         -2.8382,  -2.7167,  -2.8592,  -3.8366,   0.2471, -12.3835,  -1.9021,
         -2.1305,  -5.1659,  -4.6516,  -0.3812,  -7.6710,  -2.6708],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7058, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.7426,  -4.9336,  -2.2521,  -3.2922,  -3.6711,  -3.7688,   0.7061,
         -5.8673,  -3.4628, -13.6798,  -5.0868, -14.2494,  -3.3277,  -6.2117,
          0.6103,  -1.5077,  -6.2451, -24.5484,  -9.4666,  -1.0175],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6007, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8657, -1.7081,  0.9469, -3.2549, -1.2746, -1.6778, -1.5742, -6.8897,
        -4.4936, -0.7795, -3.5328, -1.9731, -2.9656, -4.9509, -2.3469, -2.4984,
        -4.3541, -1.7557, -3.8709, -1.8375], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7329, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.8857,  -2.4309,  -3.3174,  -0.5975,  -5.9137,  -2.3385,   1.5531,
         -3.3224,  -2.8186,  -2.4044,  -6.2440,  -2.8265,   1.0460,  -1.1621,
         -1.5925,  -1.8735,  -6.0625,  -3.2981,  -0.8040,  -3.3649],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9829, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.2073,  -4.5596,  -3.9156,  -4.1892,  -1.4596,  -1.6412,  -5.2645,
         -2.0636,  -2.5930,  -3.4285,  -3.5490,   0.6980,  -3.5387,  -1.3324,
         -3.2120,  -1.0030,  -4.9812,  -2.0211,   0.9299,  -2.5314],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0431, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.6156,  -2.4262,  -3.3892,  -1.3420,  -4.1808,  -7.7535,  -0.6578,
         -4.3695,  -6.3587, -10.3625,  -4.8789,  -6.9574,  -1.9361,  -2.7232,
         -5.6538,  -3.7347,  -2.2987,  -1.8472,  -0.8825,  -5.1049],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1237, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.2868,  -3.7348, -25.1918,  -5.1399,  -6.8358,  -2.0712,  -3.1831,
          1.3834,  -1.9670,  -2.9637,  -3.1163,  -1.9230,  -6.4590,  -1.3128,
         -2.6425,  -7.2672,  -1.3819,  -3.6779,  -3.5069,  -4.0614],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5670, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3614, -3.8517, -4.0718, -4.0547, -6.0691, -4.1561, -0.8269, -3.8694,
        -4.1393, -3.7312, -4.1523, -4.2677, -0.4267, -3.7728, -2.6012, -4.2410,
        -6.0079, -3.6593, -6.9865, -5.8229], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9535, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.8415, -11.2775,  -3.3092,  -5.9265,   0.5648,  -4.5692,  -4.2467,
         -6.7011,  -3.7231,  -5.2574,  -4.2751,  -1.5046,  -1.2574,  -2.3991,
         -1.5770,  -3.0201,  -2.4492,   1.0588,  -1.7909,  -3.1297],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4816, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1207, -5.8194, -2.3170,  0.5019, -2.4778, -2.3496, -3.1683, -1.5786,
        -4.4494, -2.1547,  0.5216, -3.0264, -1.6670, -2.7899, -3.5216, -3.9117,
         1.0898, -2.8440, -2.4018, -1.4891], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2487, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0234,  0.7386, -4.5645, -2.5495, -3.0809, -4.6943, -3.5543,  0.0059,
        -3.2896, -1.7260, -1.3166, -3.4074, -1.9489,  0.9542, -2.2401, -1.9070,
        -1.6697, -5.3013, -4.5046,  0.5675], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3256, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.7993, -3.7509, -0.9434, -6.9542, -2.7559, -2.9162, -4.6633, -4.1045,
        -5.0491, -0.8911, -8.5967, -2.6932, -2.6234, -4.7033, -2.9693,  1.5484,
        -3.9171, -1.8339, -2.2636, -1.3472], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1114, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0231e+00, -1.6218e+00,  1.5493e+00, -2.2666e+00, -3.1744e+00,
        -1.8549e+01, -4.6283e+00, -9.4220e+00, -2.1521e+00, -6.8330e+00,
        -1.3118e-02, -8.7507e+00, -3.5355e+00, -3.8305e+00, -4.0801e+00,
        -5.2875e+00, -3.3451e+00,  7.2472e-01, -1.7385e+00, -2.7852e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1881, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.4291,  -7.8164, -33.2778, -13.1866, -11.0041,  -6.9444,  -5.6193,
         -4.9065,  -4.3110,  -6.7308,  -6.5112,  -3.3979,  -2.0149,  -1.4257,
         -6.6434,  -4.1963,  -0.4561,  -4.3736,  -1.8904,  -4.0869],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.9111, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3992, -0.2278, -3.2916, -2.6043, -3.0579, -2.1389, -6.1421, -2.7571,
        -0.7459, -2.0623, -2.6905, -1.1556, -2.0602, -4.3256, -2.5203, -2.1225,
        -4.1800, -1.1754, -5.4293, -2.2285], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6657, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.8484,  -2.0883,  -4.8903,   0.3815, -12.5940,  -2.5401,  -3.3196,
         -1.8771,  -4.6815,  -3.9452,  -0.0441,  -0.5982,  -3.3707,  -1.9936,
         -7.9147,  -3.3402,  -0.3479,  -2.6355,  -4.0474,  -6.7753],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4735, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4137, -11.2925,  -9.4173,  -8.8952,  -0.9153,  -4.2231,   1.2544,
        -28.5169,  -2.8627,  -7.3626,  -6.9926,  -1.6046,  -4.7597,  -4.4951,
        -16.9526,  -6.1247,  -6.7452,  -8.1901,  -7.3547,  -5.8938],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.1879, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5799, -3.5988, -3.9169, -7.4923, -5.8229, -3.6264, -3.4970, -0.8464,
        -5.2067, -1.9760, -3.6482, -3.7497, -2.1527, -6.3183, -6.2417, -7.4185,
        -6.1345, -2.8871, -4.2181, -2.4068], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3869, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.2662, -1.3140, -1.9439, -2.5598, -0.6440, -8.5536, -1.8637, -2.4804,
        -4.8943, -2.7890, -4.4505, -5.0181, -5.2588, -0.1837, -3.7850, -1.7983,
        -2.7474, -2.3826, -7.5245, -2.5478], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0736, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.4239,  -7.0314,  -3.1168,  -1.0979,  -5.1223,  -2.9922,  -1.6007,
          0.0775,  -5.8049,   0.1217,  -1.5558,  -4.7025, -10.4975, -16.8803,
        -12.4395,  -7.6773,  -1.4326,  -5.8236,  -2.4295,  -2.0717],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6751, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5733, -3.6471, -0.9686, -5.9069, -1.3044, -1.7678, -4.0813, -2.8888,
        -1.9260, -1.7545, -5.0836, -1.0299, -2.7458, -8.2956, -1.7977, -1.9257,
        -2.5603, -2.8091,  1.3480, -2.8285], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6773, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9816,  -6.6075,  -8.0699,  -4.1918,  -4.8205, -10.3499,  -0.9194,
         -5.6199,  -2.3440,  -3.2753,  -2.2044,  -3.2279,   1.0377,  -1.0445,
         -5.6473,  -6.0475, -14.9775,  -9.2719,  -6.2509,  -3.6004],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0207, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.1897,  -6.4705,  -6.9575,  -6.0807,  -5.4533,  -7.4453,  -7.1150,
         -3.3262,  -4.5069, -11.0930, -22.0247, -19.2285,  -7.0928,  -3.4045,
         -2.6600,  -5.3480,  -4.6967,  -4.5551,  -3.8947,  -5.7473],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.1145, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.6020,  -8.9859,  -6.9727,  -5.6681,  -8.9982,  -3.0877,  -5.1389,
         -1.7920,  -5.3529,  -4.3659,  -2.5561,  -5.5534,  -3.3425,   0.3947,
         -4.6004,  -2.2143, -18.1198, -10.9057,  -7.9071,  -1.9641],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6867, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.7876,  -5.6405,  -2.6079, -39.1821,  -4.8644,  -8.4938,  -3.4059,
         -6.9351,  -2.9967,  -0.0706,  -2.4473,  -3.1616,  -1.9196,  -3.3467,
         -4.1100,   0.1053,  -4.0327,  -3.1182,  -2.4421,  -1.6450],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1051, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.4147, -2.2708, -0.8402, -8.7806, -6.6834, -9.8609, -6.4472, -3.5311,
        -3.2241, -9.8260, -3.7150, -2.8701, -3.4259, -3.3911, -6.0236,  0.5420,
        -3.5994, -3.5586, -2.5867, -6.3504], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2514, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.9702, -17.3954,  -4.9085, -10.7668,  -1.3084,  -5.0423,  -2.2889,
         -1.2317, -10.0794,  -3.2028,  -3.9220,  -5.4906,  -4.6283,  -0.2654,
         -5.5021,  -2.5108,  -2.6727,  -3.3436,  -5.8156,  -0.8365],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7591, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.9088,  -3.8992,  -2.5865,  -1.6682,  -4.0108,  -3.5717,   0.5686,
         -4.4559,  -2.7438, -22.5677,  -9.2163, -11.3206,  -6.8436,  -3.6996,
         -5.6545,   0.1029,  -2.1398,  -1.8795,  -1.9212,  -2.8110],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4705, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8077, -3.5130, -3.6111, -2.9329, -3.8046, -3.5189,  0.7926, -2.2122,
        -1.4220, -1.4089, -3.7834, -2.2279,  0.3340, -5.1451, -4.2754, -2.4461,
        -3.6614, -3.8690, -3.5183,  0.4401], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5296, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7737, -5.4023, -2.6979,  0.3776, -3.0765, -1.5245, -4.0967, -5.6293,
        -4.0644,  0.5338, -2.8114, -1.8432, -2.6521, -1.8044, -7.7999, -4.4792,
        -1.6264, -3.1761, -1.2325, -2.8225], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8801, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5494, -2.9693, -0.7218, -6.4766, -1.8318, -1.1295, -3.1624, -1.6011,
        -2.9606, -1.1084, -3.4508, -0.9681, -3.1164, -1.8349, -3.4476, -3.5265,
        -7.2462, -5.0091, -1.6635, -3.0570], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8416, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.1896, -22.0990,  -1.3414,  -4.0613,  -3.8819,  -5.1022,  -2.6473,
        -16.9318,  -6.6227,  -8.3146,  -5.3316,  -7.7915,  -4.9947,  -7.8793,
         -2.6012, -10.5386,  -3.5031,  -6.8405,  -5.5514,  -3.5581],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4701, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.0409, -3.2137, -2.2924, -4.0648, -1.3177, -6.3223, -2.9521, -0.3590,
        -2.1541, -1.6308, -2.7490, -5.8367, -2.9021,  0.1064, -2.9736, -3.0686,
        -3.2962, -2.1818, -4.0290, -3.5624], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7380, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.1569,  -4.1430,  -5.4254,  -4.8742,  -4.0181,  -7.0087,  -4.5007,
         -4.7957,  -4.8995,  -3.2325,  -3.7652,  -5.0863,  -8.3369,  -5.5118,
         -6.6437,  -4.4380,  -6.3839,  -2.3481, -14.3322,  -3.8767],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3889, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.3129, -2.4559, -2.1674, -1.4531, -2.0178, -3.5249, -5.6175, -4.4831,
        -2.1268, -2.0805, -1.4181, -5.4404,  0.3534, -2.6913, -2.3847, -4.6038,
        -5.0713, -5.0871, -2.7614,  0.5030], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7108, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.5891,  -6.5352,  -1.5863,  -3.8044,  -5.5711,  -3.3680,  -0.0308,
        -10.4401,  -5.6381, -13.3922,  -2.9454,  -6.6143,  -4.1944,  -2.7051,
         -5.2964,  -3.6259,  -3.4086,  -8.0804,  -3.7560,  -2.4191],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5911, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.6736,  -3.8088,  -1.8847,  -3.7740,  -0.7713,  -6.4443,  -2.4877,
         -0.3518,  -3.5820,  -1.3604,  -1.3773,  -1.0720,  -5.1450,   1.0458,
         -1.8895,  -1.7585,  -6.7391,  -6.6703, -13.0226,  -3.2071],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2487, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8495, -2.5418, -1.3200, -4.5387, -2.5465, -0.6800, -2.8677, -0.8974,
        -2.9915, -4.8368, -4.5651, -0.0800, -3.2690, -2.2387, -2.0648, -3.1402,
        -2.8327,  0.8533, -4.0376, -2.4218], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5433, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5199, -3.4625, -1.4250, -2.6603, -4.8432, -2.0209,  0.8752, -3.8592,
        -1.2415, -1.8639, -2.0080, -6.3006, -1.5009, -0.4482, -2.1212, -1.5093,
        -1.6850, -2.9653, -2.6653,  1.2485], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0988, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2605, -3.9884, -1.0322, -2.3555, -2.4255, -2.5956, -3.8022, -6.4213,
        -2.7047, -1.6991, -2.2639, -2.7684, -3.2371, -6.9221, -3.4637, -0.3965,
        -6.8662, -2.3968, -2.5183, -1.7379], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0928, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9203, -1.5652, -5.6915, -3.0532,  1.2121, -4.4170, -2.3047, -2.7275,
        -0.8527, -3.8725,  0.6838, -9.5237, -2.7418, -1.6799, -3.7455, -4.8663,
        -6.4094, -2.5251, -3.0117, -3.4649], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1238, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9777, -5.6874, -3.2768, -1.2755, -6.4899, -1.3634, -2.1785, -2.5755,
        -6.0779, -1.5470, -1.0988, -3.3819, -1.8709, -3.0034, -3.0932, -2.7807,
        -0.0836, -2.7566, -1.0166, -2.9963], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7766, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.7469,  -5.0609,  -4.3396,  -4.1700,  -3.6619,  -5.8005,  -3.6001,
         -1.8053,  -4.1151,  -3.9202,  -6.6486, -10.5233,  -5.0146,  -7.1766,
         -3.7551,  -5.1196,  -2.5261,  -4.1657,  -4.1570,  -4.5806],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5944, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3544, -6.4871, -6.7654, -4.3233, -3.2373, -2.9458, -4.1835, -7.3695,
        -7.3738, -4.4147, -6.0078, -4.4167, -5.2020, -3.5028, -9.3799, -4.3416,
        -4.4328, -7.8309, -5.3158, -1.8982], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2892, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0823, -2.2639, -3.7501, -3.6078,  0.9882, -2.4163, -3.7611, -0.8932,
        -2.2691, -2.0846,  1.9684, -3.6765, -2.6093, -2.8361, -4.4436, -6.8089,
        -3.7895, -2.0529, -3.8948, -1.7893], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6536, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2197,   0.8823,  -1.0085,  -2.3631,  -5.0688,  -5.5927,  -3.2540,
         -5.4562,  -4.2397,  -3.3583, -15.2381,  -4.7639,  -6.3204,  -6.8479,
         -2.0754,  -2.9652,  -5.3457,  -3.2090,  -1.4592,  -3.7847],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2344, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4147, -6.0078, -4.4167, -5.2020, -3.5028, -9.3799, -4.3416, -4.4328,
        -7.8309, -5.3158, -1.8982, -3.4775, -2.0982, -4.5924, -2.3357, -4.1250,
        -1.7815, -3.8167, -2.7326, -4.2887], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2996, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.7152, -4.7466, -1.5683, -3.3497, -0.6780, -5.0652,  0.6305, -3.9468,
        -2.8631, -2.8872, -0.7445, -5.4734, -3.6228,  0.2978, -4.1274, -2.6874,
        -2.8681, -2.4499, -3.5555,  0.6420], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3674, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.1766,  -4.4586,  -2.9783,  -2.8823,   1.0144,  -5.1989,  -1.4732,
         -3.6149,  -1.0868,  -4.5989,  -2.0451, -27.4347,  -3.7060,  -2.7213,
         -3.2045,  -6.4416,   0.4953,  -4.5508,  -3.4968,  -2.9033],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4232, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-12.8566,  -5.1733, -10.0195,  -0.6160,  -6.2082,   0.4555, -28.4432,
         -4.6499, -12.7327, -16.9803,  -6.1462, -12.1559,  -8.8656, -34.2593,
         -6.1993,  -8.3371,  -5.5043,  -3.7620,  -6.8353,  -7.1220],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-9.8206, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.9651,  -4.5322,  -2.3354, -37.7531,  -4.3090,  -8.4648,  -0.7156,
         -7.3028,  -0.3651,  -9.0691,  -3.5445,  -6.3918,  -4.1735,  -5.3025,
         -2.5967,   0.1509,  -2.5945,  -2.2652,  -3.8910,  -0.8648],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2678, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.4955, -2.5931, -0.9373, -4.9155, -1.8045, -1.2533, -3.9137, -2.8460,
         1.9465, -3.9786, -2.2092, -4.6328, -6.1862, -3.2962,  0.1022, -4.7788,
        -2.5540, -1.3825, -2.9130, -3.1503], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9896, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3660,  -2.6503,  -3.5695,  -2.6909,  -5.4910,  -4.4301,  -0.7101,
         -6.1665,  -1.3752,  -8.1395,  -3.4355,  -5.6680,  -6.3766,  -7.7208,
         -7.5002,  -3.6679,  -3.6490,  -2.2433,   1.3023, -32.7009],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4624, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.1696, -3.0412, -5.3665, -4.3744, -2.6378, -5.8356, -6.0681, -1.9347,
        -8.1567, -2.8442, -1.8957, -3.8414, -2.4103,  0.8929, -8.6334, -1.7730,
        -2.0883, -2.0945, -6.1563, -3.9306], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6180, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.6950,   0.3270,  -3.0429,  -2.3432,  -1.5135,  -1.3592,  -7.0387,
         -4.2914,  -1.0632,  -6.2101,  -3.1098,  -3.0066,  -0.2638,  -4.5427,
          0.9751,  -3.0334,  -4.2451, -17.3554, -20.6727,  -5.2151],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5350, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3477, -5.4134, -2.8737, -0.2555, -2.3623, -2.4449, -2.9111, -4.6025,
        -1.9293,  1.1833, -2.2590, -1.0114, -2.3147, -3.6391, -2.3063,  1.3707,
        -4.1392, -2.6915, -1.6088, -3.9068], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3232, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3196, -2.1134, -2.6926, -1.0496, -5.8793, -2.2761,  0.5283, -7.4924,
        -3.6631, -3.2187, -1.1145, -6.0412, -1.3920, -0.6368, -2.6294, -2.2451,
        -2.4425, -6.0674, -3.3111, -2.5801], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0318, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6566, -2.2041, -3.2040, -1.1873, -4.0296, -0.8274, -1.9521, -5.9064,
        -4.3138, -5.7466, -4.0713, -5.4457, -6.4341, -5.6250, -4.4938, -2.4617,
        -5.6300, -4.4348, -3.1256, -4.1401], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8945, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6355, -3.7258,  0.1441, -3.7642, -3.5093, -1.6182, -2.2238, -7.0181,
        -3.4229, -1.4650, -1.1433, -1.1895, -2.6884, -1.7402, -4.7618, -2.7135,
        -9.2041, -2.8345, -2.6199, -0.8892], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0512, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0340, -2.4721, -6.9055, -4.6341, -1.4294, -6.2371, -3.1240, -1.6428,
        -4.2391, -2.9246,  0.2559, -3.4449, -1.1571, -2.2043, -4.5225, -2.3080,
         0.6495, -3.0140, -2.0205, -4.9447], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9177, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.7239,  -1.4413,  -3.1649,  -2.8939,  -4.4465,  -4.2866,   0.7505,
         -1.4316,  -3.5282, -12.3972,  -5.0951,  -7.6458,  -2.5584,  -3.1523,
          0.6334,  -3.6664,  -3.1748,  -2.0674,  -6.2918,  -3.0526],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4094, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5540, -4.2157, -4.7215, -2.2935,  1.0033, -3.2439, -1.9506, -3.0774,
        -2.3974, -7.3735, -1.4873, -1.0615, -2.8126, -1.9351, -4.1162, -3.0183,
        -3.9682,  1.5376, -3.1568, -3.1771], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8010, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8581, -1.7125, -3.1027, -1.2943, -3.2721, -0.3291, -3.7752, -1.7736,
        -3.0758, -0.7008, -6.2057, -1.6391, -0.9979, -4.3158, -2.8721, -3.0624,
        -4.3719, -4.0508,  0.0212, -2.7477], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7568, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.1342,  -1.9973,  -0.4182,  -7.9577,  -1.9309,  -1.7180,  -5.0329,
         -3.1068,  -0.3211,  -4.6613,  -3.1032, -10.1444,  -4.6602,  -6.2221,
         -5.3696,  -6.9737,  -2.6343,  -5.6712,  -0.1285, -15.1355],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6660, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2652, -2.0655, -0.2983, -4.8386, -1.7290, -4.1202, -5.1585, -3.2948,
        -0.0678, -5.7621, -3.8888, -2.6946, -1.9035, -6.9226, -4.5175, -0.6180,
        -3.4082, -2.7645, -2.2981, -1.1006], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1858, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8879, -2.0632, -3.3289, -5.8856, -3.5972, -1.5443, -4.1081, -1.2651,
        -1.6260, -3.9092, -2.7589,  0.4116, -3.9667, -3.0497, -2.6783, -2.3483,
        -4.4022,  1.0644, -3.0562, -4.4721], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7736, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2781, -4.3137, -3.3567, -0.0533, -3.5030, -2.4370, -2.5951, -5.0434,
        -4.7172,  0.6500, -3.0289, -1.6477, -5.8394, -5.9813, -4.0714, -0.7578,
        -5.3165, -2.3373, -2.0339, -3.0107], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0836, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.3308,  -4.0996,  -4.8602,  -2.2843,   1.4094,  -3.5206,  -2.9666,
         -3.7916,  -5.8648,  -3.7810,  -2.5643,  -3.5208,  -2.0733, -14.1222,
         -6.1702,  -6.4622,  -6.5281,  -2.1313,  -5.5232,  -0.5484],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2867, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3674,  -7.9629,  -4.0452, -12.8496,  -1.2414,  -6.2655,  -6.0016,
         -0.9483,  -4.8898,  -5.0529,   0.0948,  -4.7355,  -3.0988,  -2.4835,
         -2.2615,  -8.8913,   0.4450,  -4.7148,  -2.5167,  -4.0965],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1942, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.7606,  -4.2697,  -3.4640,  -2.0072,  -4.3121, -13.7102,  -3.8368,
        -10.6094,  -1.1451,  -7.5275,   0.4949,  -3.8650,  -6.3122,  -7.7805,
         -4.1976,  -4.5333,  -7.7934,  -1.4367,  -1.7026,  -3.1247],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6447, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3059, -6.8106, -2.2118, -4.8497, -0.3853, -2.7476, -3.2500, -2.3149,
        -3.5258, -4.0354,  0.5571, -3.5046, -1.6074, -2.9501, -2.4867, -4.9913,
        -4.1611,  0.6113, -1.7165, -2.0274], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9857, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4341,   1.3883,  -9.5019,  -2.5159,  -2.7884,  -3.4883,  -4.7432,
          0.3589,  -2.6849,  -2.6339,  -2.1352,  -1.1600,  -4.9861,  -0.3448,
         -2.6711,  -1.2031, -14.2797,  -6.2011,  -9.6086,  -1.6638],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6649, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.4946,  -0.8453,  -4.4541,  -2.7278,  -3.2953,  -2.4540,  -5.6330,
         -1.8083,   0.4902,  -1.5430,  -4.0025,  -1.0103,  -2.4973,  -2.6258,
          1.5462, -12.8492,  -1.7614,  -3.9943,  -5.2812,  -5.7242],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2983, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.0934,  -4.5977,  -5.1280,  -2.0955,  -3.3659, -14.9562,  -7.0662,
         -5.9462,  -6.1720,  -1.2357,  -8.0028,  -0.7344,  -6.1428,  -3.0066,
         -2.9581,  -4.6274,  -4.0133,  -0.9426, -12.3582,  -2.2734],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0858, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6740,  0.0707, -3.8723, -1.7312, -2.7102, -2.3122, -4.0884,  0.0186,
        -4.4454, -2.1537, -2.9450, -2.3280, -7.1096,  0.4112, -4.9946, -1.7451,
        -3.0709, -1.8223, -4.5123, -2.3984], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8207, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.2233, -4.2782, -5.3798, -3.1268, -3.4066, -6.1559, -4.8759, -1.0084,
        -2.6579, -1.6753, -2.8977, -2.5221, -5.1886, -2.9142, -0.3831, -3.3808,
        -1.3519, -2.5835, -5.0238, -2.3310], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9959, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.0455,  -4.6133,  -3.1760,  -3.0164,  -3.0610,  -5.3104,  -4.5003,
         -5.5671,  -2.0329,  -6.6531,  -2.4224,  -9.1051,  -7.3728,  -6.5031,
         -6.1044,  -7.4643,  -5.1854,  -5.3264,  -3.3371,  -3.7678],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2282, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.5591,  -3.3391,  -6.2587,  -2.2706,  -4.5194,   0.6908, -11.4601,
         -2.8026,  -4.3215,  -1.4003,  -4.8318,  -1.5743,  -2.9922,  -4.6596,
         -1.7882, -11.3523,  -5.0842,  -8.5569,  -6.8285,  -2.7537],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6331, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.5996,  -2.4016,  -3.4126,  -2.3790,  -4.2327,   1.1358,  -2.7696,
         -2.3632,  -3.3990,  -0.9556,  -3.5163,   0.2330,  -0.8289,  -5.2811,
        -23.1924,  -3.4408,  -3.9107,  -5.8981,  -4.0337,  -0.6572],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7452, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3053, -3.0580, -3.2534, -4.7171, -1.0700, -3.1219, -5.4005, -7.7844,
        -5.1930, -4.3178, -7.4005, -2.0198, -4.9165, -2.5808, -5.1198, -3.3006,
        -4.2989, -6.1413, -4.6275, -1.3333], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1480, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7709,   0.5483,  -2.2769,  -3.0543,  -3.7756,  -3.1456,  -3.5492,
          0.5024,  -1.5929,  -2.3554,  -9.7942,  -6.0721,  -3.0312,  -7.6968,
         -1.7974, -11.8970,   1.3387,  -5.7793,  -3.4640,  -3.3444],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6504, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.1856, -4.5314, -3.1532, -2.2546, -3.3991, -2.3561,  0.7233, -2.4501,
        -4.0279, -3.3259, -4.1363, -2.6766, -3.2393, -3.0064, -1.5039, -2.9830,
        -0.0499, -6.5529, -1.0390, -0.1433], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5146, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.4353,  -1.8555,  -3.6003,  -2.7441, -11.3809,  -6.6447,  -5.7525,
         -6.2648,  -2.5644,  -5.5464,   0.6380, -11.5737,  -2.9016,  -3.1355,
         -5.4643,  -4.0729,  -0.1463,  -5.1115,  -1.9466,  -4.2013],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5352, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4019, -4.9275, -4.9038, -6.7623, -2.7791, -3.2049, -3.2976, -4.9712,
        -9.1424, -2.9980, -7.3383, -6.5723, -4.4754, -2.9573, -6.9201, -4.1648,
        -4.0273, -6.6362, -5.1778, -0.0980], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7878, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.3443,  -3.9939,   1.2723,  -3.0141,  -2.4817,  -1.8776,  -5.8163,
         -3.9506,   1.0956,  -6.0405,  -4.4539,  -1.8010,  -2.2681,  -3.2983,
          1.0547,  -4.2770,  -5.8019, -19.6870, -10.2579,  -7.5472],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2244, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.5361,  -5.9536,  -7.6573,  -9.6140,  -4.6797,  -1.9291,  -6.0334,
         -3.3255,   0.9136,  -8.8327,  -1.6429,  -2.3208,  -2.8312,  -3.9752,
        -13.0154,  -0.9271,  -3.1950,  -1.6942,  -5.0748,  -2.7638],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4544, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.0327,  -4.6059,  -4.9408, -12.0069,  -4.1623,  -7.5208,  -1.6454,
         -5.1072,   0.5699,  -4.4701,  -4.8837,  -3.2787,  -4.1288,  -4.6086,
         -3.4108, -11.8088,  -3.8602, -12.5719,  -5.6241,  -6.9130],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3505, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7904, -2.4369, -6.8061, -2.5778,  1.6137, -6.1738, -2.7069, -2.3453,
        -4.6642, -2.1128,  1.2789, -5.9957, -0.9117, -1.8051, -2.5115, -4.1694,
        -7.0258, -5.9742, -2.0999, -4.6072], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1911, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6381,  0.3333, -2.9113, -2.2845, -3.7456, -3.5354, -7.3121, -5.4254,
        -2.1094, -3.2064, -3.1209, -1.6149, -2.1734, -4.3188,  0.2364, -9.2269,
        -3.4339, -2.2172, -2.3288, -5.3201], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3177, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.6504,  -9.5445,  -2.1295,  -3.2389,  -2.3343,  -5.0141,  -0.0178,
        -10.1347,  -1.8618,  -2.2112,  -2.5961,  -4.9257,  -3.0147,  -0.9070,
         -3.2495,  -1.4520,  -1.2333,  -2.5395,  -2.2197,   1.2478],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9014, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2393,  0.4964, -4.6629, -2.8866, -3.1207, -2.6665, -4.6472, -1.9171,
         0.1136, -2.6895, -3.2696, -1.1816, -3.0837, -3.6335, -6.4300, -5.7242,
        -2.6727, -3.3566, -2.2383, -5.8776], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1844, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.7260,  -2.1903,  -4.4475,  -5.6357,  -3.7689,  -4.8171,  -4.5420,
         -3.2617, -22.4690,  -8.2831,  -6.9957,  -1.9359,  -2.6581,   1.6776,
         -7.1375,  -2.5098,  -2.2898,  -2.9166,  -5.8239,  -4.1746],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1453, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9079, -3.8484, -2.7855, -5.7766, -0.2173, -0.5003, -3.3710, -2.1169,
        -3.9265, -3.6368, -0.3078, -5.0537, -2.0011, -1.9146, -1.8729, -7.3245,
         0.5278, -5.0671, -2.0773, -1.2758], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6727, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0459, -1.6448, -4.7669, -1.8271,  1.0383, -4.0161, -1.6504, -1.9385,
        -2.7303, -7.4354, -4.8724, -1.9096, -5.0107, -3.5759, -3.4500, -3.4665,
        -2.8538,  1.4912, -3.5108, -1.2045], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8190, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7375, -2.5945, -3.4040, -0.7602, -5.2972, -2.4008,  0.2215, -3.5852,
        -1.9345, -3.8719, -1.8643, -5.8945, -1.9397, -0.3763, -7.8748, -2.8323,
        -1.9577, -1.9747, -4.7335, -0.3206], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0566, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.1090,  -2.0861,  -1.2091, -10.0229,  -2.0563,  -2.7119,  -4.6050,
         -3.8381,   0.8363,  -1.3451,  -1.7256,  -4.1720,  -0.9830,  -5.6320,
         -2.2656,   0.5132,  -2.9373,  -2.2837,  -1.6306,  -2.6466],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8955, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-12.7140,  -8.8270,  -1.8490,  -2.9357,  -1.1698,  -3.5450,  -3.4945,
         -4.0188,  -2.4592,  -7.0727,  -1.7986,  -0.9743,  -2.8735,  -1.9317,
         -3.0112,  -1.0909,  -6.8016,  -1.3635,  -2.5764,  -3.8559],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7182, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1219, -0.6883, -5.5421, -2.1881,  0.6646, -2.4724, -2.7875, -4.0646,
        -1.4335, -4.8740, -2.5465,  1.1905, -6.3263, -1.7974, -3.1391, -3.4313,
        -3.7294,  1.1049, -2.9677, -1.3793], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4765, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.3271, -4.3243, -2.7506, -3.2071, -4.5027, -5.0970, -0.2552, -5.5267,
        -3.2536, -2.0401, -4.8619, -3.0165, -0.2680, -3.4365, -2.2422, -4.5421,
        -1.1493, -6.2573, -2.2193, -2.0711], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0674, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4848, -3.1346, -1.5390, -1.5124, -7.8566, -2.6869, -1.6643, -3.0951,
        -1.9621, -2.3490, -2.8778, -3.5993, -2.7862, -5.1634, -2.3535, -1.8473,
        -2.4067, -5.5783, -1.8095,  0.1389], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9784, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.5980,  -2.5912,  -3.4417,  -5.6894,  -3.8584,  -1.1109,  -9.7653,
         -3.0464,  -4.3953, -10.3272,  -7.0244, -24.1538,  -4.3396, -45.9797,
         -4.3009,  -4.3560,  -8.1654,  -8.3690,  -5.8074,  -1.2030],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.9762, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.6159,  -1.8264,  -4.2948,  -1.4164, -15.3282,  -1.7850,  -2.5954,
         -6.3861,  -4.0526,  -2.6376,  -5.2051,  -1.1561, -18.0844,  -5.7481,
        -10.7965,  -5.7380,  -5.3683,  -3.6166,  -2.6199,  -1.0834],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3177, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7244, -3.1598, -2.5388, -6.5485, -4.2109, -2.6721, -2.8701, -0.5859,
        -1.2066, -1.9732, -3.7809,  0.5256, -3.8637, -2.6315, -1.4352, -2.9199,
        -2.7149,  1.8473, -2.6691, -2.3599], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4746, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1244, -1.8426,  0.0635, -1.7620, -1.9258, -2.8878, -3.2703, -3.7895,
         1.0379, -4.7002, -1.6681, -4.8487, -0.6027, -6.8177, -2.3115, -1.2997,
        -2.5255, -3.3524, -3.2483, -2.1026], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6989, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.0401, -4.0321, -1.9623, -1.2490, -3.5718, -3.2835,  0.7634, -1.7379,
        -1.9592, -1.7099, -1.2256, -6.4242, -1.9974, -1.6730, -2.4297, -1.0180,
        -1.7472, -3.6959, -2.9259,  1.4985], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9670, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3338, -3.0949, -4.8168, -3.1394, -2.0964, -2.8846, -3.4045, -0.9878,
        -6.4118, -1.9829, -1.5979, -3.7600, -1.7063, -1.9365, -1.8199, -4.0996,
         0.8319, -2.1664, -2.0292, -1.8844], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5661, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.0482,  -1.7624,   0.1185,  -2.3052,  -1.5872,  -4.4391,  -0.6557,
         -8.7586,  -2.0835,  -1.3978,  -3.7462,  -4.0851, -11.1161,  -6.5641,
         -3.6433,  -6.0135,  -5.6766,  -5.3265,   0.9544,  -7.3219],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0229, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.7844,  -3.3788,  -1.3389,  -7.1644,  -2.0965,  -1.0372, -11.0123,
         -2.9773,  -2.8137,  -4.1567,  -3.6420,   0.1300,  -4.3162,  -4.1350,
         -3.2796,  -1.7943,  -6.7621,  -4.5962,  -1.0154,  -3.6882],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5430, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.0376, -2.2904, -1.7522, -2.1897, -4.5095, -2.4918,  0.7769, -3.6900,
        -2.1593, -3.2368, -0.8695, -5.1125, -2.2173, -0.7446, -7.0497, -1.0998,
        -3.8839, -4.4951, -3.8938,  1.1333], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4907, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.5746,  -2.3927,  -3.3633,  -7.2546,  -2.7689,   2.0339,  -3.5811,
         -3.6039,  -1.8963,  -1.9887,  -4.9599,   1.1405, -13.9089,  -2.6844,
         -2.6491,  -3.2193,  -4.8663,   0.8982,  -5.0048,  -2.7207],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2182, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1369, -4.9451,  0.4134, -3.6580, -2.2159, -2.3406, -1.4714, -4.8803,
         0.7984, -1.1417, -1.5245, -2.0566, -5.2243, -3.3108,  1.0764, -2.8362,
        -2.4316, -2.2227, -2.6551, -7.1723], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4468, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8713, -1.4022, -2.5274, -3.7566, -1.9477,  1.2373, -3.6117, -1.2539,
        -4.5493, -7.9011, -6.4906, -6.7528, -1.8975, -4.9373, -7.6542, -5.8230,
        -1.5726, -3.0719, -0.6808, -5.6973], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6081, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.8435,  -1.6658,  -4.2497,   1.2795, -12.8174,  -2.9027,  -2.5934,
         -1.3283,  -4.8967,  -0.2841,  -0.2961,  -4.3458,  -1.6884,  -2.5319,
         -5.4653,  -3.2831,   0.8472,  -3.4276,  -1.8002,  -3.6540],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8974, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.1412,  -3.8813,  -1.9110,  -3.1984,  -2.3518,  -2.4840,  -0.5290,
         -5.6545,  -1.4957,  -1.8103,  -2.7852, -10.9100,  -6.2512,  -4.3771,
         -5.7364,  -2.0042,  -4.3744,   0.2232,  -5.9374,  -3.0908],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7850, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.1657,  -4.7658,  -2.2322,  -1.7796,  -2.4210,  -1.3385,  -5.8096,
         -2.4835,   1.2945,  -4.7811,  -1.9203,  -2.2381,  -5.2016,  -2.3566,
          0.9188,  -4.2836,  -2.9225, -32.1136,  -5.9794, -12.2413],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9910, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.0670,  -1.5983,  -2.4862,  -3.4121,  -4.7887,   1.0740, -11.0463,
         -2.1710,  -2.8261,  -1.1292,  -5.2139,  -1.1562,   0.4302,  -3.9135,
         -1.3417,  -2.5033,  -0.9528,  -3.5199,  -4.0827,  -3.1327],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7919, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4186, -0.8902, -2.7904, -0.8065, -6.5078, -1.4181, -1.2054, -2.5510,
        -1.6611, -3.0862, -4.0755, -2.8439,  0.7260, -1.5986, -1.9898, -3.6275,
        -2.1915, -7.6758, -1.8225, -2.0789], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5757, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4317, -5.0837, -6.1652, -1.8168, -5.4399,  1.1272, -1.3766, -1.8403,
        -3.7147, -0.3515, -6.5793, -1.6472, -0.9811, -3.4884, -1.3572, -3.4828,
        -0.9319, -7.2362, -0.7680, -2.1765], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0371, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5898,  0.9952, -4.7536, -2.3388, -3.3090, -3.9546, -4.6983,  0.0437,
        -0.9806, -2.3453, -3.8634, -2.9959, -7.8303, -4.0464, -1.1061, -7.2275,
        -2.1191, -1.7787, -2.5361, -2.5555], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0995, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2682, -2.7990, -5.6965, -2.1025, -0.3946, -3.1423, -1.8554, -1.6418,
        -2.4602, -2.7559,  1.1717, -2.5231, -2.0082, -1.9792, -5.2683, -3.2226,
        -0.2132, -4.1474, -1.3210, -2.1477], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3388, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.4753,  -5.3378,  -2.5521,   0.5035,  -2.4161,  -2.6549,  -3.4852,
         -4.3736,  -3.0277,   1.7407,  -5.1504,  -3.7805, -11.5636,  -4.8387,
         -3.3777,  -6.2987,  -4.7607,  -4.3796,   1.0284, -21.1468],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4674, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7732, -1.4106, -4.0128, -2.6832, -2.6084, -1.9984, -3.6912, -1.5990,
        -1.4554, -6.7621, -1.3005, -4.0005, -3.3419, -1.5308, -1.5344, -4.0864,
        -1.9416,  0.6573, -3.9849, -1.6772], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5368, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.1206,  -1.3511,  -2.1551,  -4.7348,  -3.0003,  -2.0093,  -5.5991,
         -3.0744,   1.0260,  -2.9962,  -4.0169, -11.8490,  -5.3517,  -7.1993,
         -6.6938,  -1.1865, -12.0896,   1.1673,  -5.4481,  -3.5509],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3617, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.6329,  -2.9383,  -4.9081,  -1.7000,  -3.4872,  -3.6316,  -3.5586,
         -7.3874,  -3.7260,  -3.8827,  -5.5829,  -3.0131, -15.6649,  -6.4536,
         -8.5011,  -6.8318,  -1.6810,  -4.0936,  -2.3999,  -3.3900],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9732, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3961, -2.2694, -3.6184,  0.4753, -3.8631, -2.6449, -2.5027, -2.2971,
        -3.7624, -0.3389, -6.9751, -1.9553, -2.8562, -2.4988, -5.9701, -3.5343,
         0.3274, -4.2077, -3.4054, -2.7264], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0510, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.9280,   2.2313,  -5.1960,  -1.6158,  -3.3332,  -5.5759,  -3.7375,
          1.2458,  -2.9088,  -4.3589, -18.0020, -10.0382,  -8.3146,  -1.5695,
         -4.5395,  -1.3595,  -3.0710,  -7.3546,  -2.0502,  -3.4374],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3957, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-42.0253,  -6.2956,  -9.6124,  -3.8483,  -7.1557,  -6.6626,  -7.8998,
         -6.2756,  -2.8546,  -4.2452,  -1.3100,  -5.9773,  -3.1313,  -3.5493,
         -5.6858,  -3.6293,   0.0937,  -4.5876,  -2.2703,  -3.5027],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5212, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2020,  -1.4756,  -1.4145,  -5.5875,  -3.3648,  -2.1722,  -4.2612,
         -2.5074,  -3.1931,  -1.9303,  -6.4487,  -7.5620, -10.3890,  -8.8961,
         -3.4659,  -2.1045,  -4.2197,  -0.3747,  -4.8874,  -2.0541],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9755, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.6881,  -6.8455,   1.7099,  -6.5349,  -4.1068,  -1.8803,  -0.9449,
         -4.6287,  -2.4618,  -2.2145,  -2.7588,  -2.6588, -13.1559,  -7.2699,
         -7.4895,  -6.2548,  -3.0850,  -4.5769,  -1.7979,  -3.9941],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1819, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5219,  -6.3398,  -9.4010,  -3.8499,  -3.1597,  -4.6605,  -2.7823,
         -4.1330,  -5.5858,  -4.2730,  -0.9999,  -5.3342,  -5.2099, -14.5738,
         -8.3881,  -6.8574,  -6.9415,  -2.0908,  -6.0963,  -0.3383],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1769, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1742, -1.9342, -3.3336, -2.2394, -0.8480, -5.6507, -2.1069,  0.3765,
        -3.1375, -1.7935, -2.2677, -1.9153, -4.8768, -0.1336, -7.9505, -2.0275,
        -5.7614, -2.0627, -5.6451, -1.8860], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8684, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.1235, -14.8633,  -5.5613,  -6.3500,  -6.8714,  -1.2520,  -5.7430,
          0.4041,  -3.2198,  -2.8423,  -2.0794,  -2.8760,  -5.1107,  -1.2273,
         -5.1901,  -2.4227,  -2.9385,  -2.0750,  -5.9328,  -4.0360],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0656, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3305,  -3.3285,  -2.5058,  -3.8897,  -0.4180,  -2.9775,  -3.7513,
         -1.6044,  -2.3503,  -2.4818,  -0.7261,  -2.7706,  -4.1584,  -4.6028,
         -2.8468,  -4.4275,  -6.5612,  -2.2436,  -2.6067, -15.7527],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6167, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6135, -2.9094, -2.7848, -1.4953, -1.8842, -4.1374, -2.2917, -1.1395,
        -3.1071, -1.5413, -2.7865, -4.4057, -3.2623,  0.5505, -6.6954, -2.1730,
        -2.4425, -2.0980, -4.0383,  0.9643], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4646, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3639, -2.6294, -2.7702, -3.1236, -4.6513, -4.7226, -4.0243, -0.6704,
        -4.8076, -2.7007, -7.5975, -8.8134, -5.3218, -7.6186, -5.2249, -3.6776,
        -1.3893, -1.2901, -3.2539, -4.8399], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1245, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7410, -1.1406, -1.7057, -5.5612, -1.4256,  1.1333, -1.2842, -2.0117,
        -1.3605, -3.0369, -2.6777, -0.3968, -3.7520, -1.4763, -4.1502, -1.0776,
        -5.4290, -1.5484, -2.2245, -7.8662], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5366, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7961, -3.3291, -2.9287,  0.2219, -3.4191, -2.0142, -4.3691, -1.2545,
        -6.2011, -1.8909, -0.4787, -2.3722, -1.7994, -3.7940, -1.9439, -5.7949,
        -2.3804,  0.4064, -2.8944, -1.4884], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4760, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1173,  -4.6104,   1.0182,  -1.4671,  -1.4597,  -1.7475,  -1.2061,
         -7.2456,  -1.5286,  -2.4051,  -5.4095,  -1.5115,  -3.5214,  -1.2084,
         -5.5935,  -1.1676,  -0.8853,  -4.0466,  -2.5511, -25.8852],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7775, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.2962,  -4.5636,   0.3078,  -4.8818,  -2.2228,  -3.0693,  -1.9736,
         -5.8394, -16.0001,  -2.8755,  -1.8873,  -3.7467,  -1.8565,  -5.2702,
         -3.2416,   0.6202,  -4.7134,  -3.3289,  -3.1548,  -2.5668],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7780, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0460, -3.1418, -1.8468, -2.8853, -0.7487, -2.4282,  1.7049, -2.3432,
        -2.0466, -1.3524, -1.4914, -6.8351, -2.6522, -0.6634, -1.1311, -3.2942,
        -0.1690, -4.8931, -3.0029, -9.2821], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4774, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9891,  -5.7779,  -3.0758,  -0.9546,  -5.6833,  -4.3665, -13.5613,
         -6.4472,  -4.6875,  -5.0238,  -2.3406,  -2.6073,   1.9802,  -6.4786,
         -3.7124,  -4.1898,  -2.7516,  -5.7440,   0.6131,  -1.9552],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9877, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5621, -3.1875, -4.2903, -4.7585,  0.5628, -3.8985, -2.8485, -2.2979,
        -2.1142, -6.1872, -2.6906,  0.3418, -9.5036, -3.7792, -7.3723, -3.0101,
        -4.5291,  0.5247, -5.3751, -2.3798], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5178, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6486, -0.5665, -4.0153, -2.8388, -2.3417, -3.9544, -6.2972, -1.9921,
        -0.5494, -3.0727, -1.9261, -2.4004, -3.2143, -1.7910,  1.0017, -1.6974,
        -1.5589, -2.2031, -4.9032, -3.1850], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6077, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.3957,  -5.4463,   1.1277, -29.9661,  -2.8705,  -3.2212, -10.2378,
         -6.2879, -33.1597, -10.9207, -27.9282, -11.2967,  -5.2619, -10.5369,
         -4.9607,  -6.6371,  -7.3836,  -1.1128,  -9.4669,  -6.4015],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-9.6682, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6861,  0.8639, -4.5235, -1.0617, -3.3540, -2.9849, -6.8304, -2.3579,
        -2.0336, -1.9417, -2.3129, -0.9365, -4.2200, -1.9256,  0.7977, -1.9599,
        -3.5419, -3.6146, -3.4526, -6.8668], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7471, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.5099,  -3.6109,  -3.0533, -38.6701,  -5.0393,  -9.6229, -10.8316,
         -2.2595,  -3.7260,  -2.8108, -15.2132,  -5.1596,  -7.0361,  -6.3269,
         -2.0926,  -3.6495,  -3.2155,   1.3280,  -3.5376,  -2.1500],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.6094, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.4371,  -6.5843,  -5.5726,  -1.3118,  -5.5286,  -8.6958, -50.5979,
         -4.7356,  -7.3639,  -9.3654,  -9.9548,  -4.3775,  -3.6425,  -5.7375,
         -2.2097,   0.3984, -13.7249,  -1.9667,  -3.1390,  -5.5430],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.6545, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.6301, -4.5072, -3.5482, -1.3080, -0.9097, -6.3957, -1.8659, -2.1575,
        -6.6663, -2.1654, -2.9811, -0.7825, -5.5988, -1.3509, -1.4695, -5.1657,
        -1.4772, -2.4854, -5.8608, -3.5853], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9825, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.1076, -10.6295,  -8.4247,  -6.5039,  -6.8604,  -7.5651,  -7.1871,
         -7.8704,  -7.5780,  -7.0582,  -7.7662,  -6.7533,  -7.2289,  -7.6642,
         -7.5341,  -7.2495,  -7.8093,  -7.0493,  -6.9578,  -7.0986],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.4948, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5619, -1.9038, -5.5571, -3.8038, -0.1363, -3.2418, -1.6549, -3.5887,
        -3.4196, -7.2575, -2.3055, -1.6174, -4.5309, -1.7160, -1.8413, -5.6317,
        -3.2207,  0.6529, -7.1561, -4.3914], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2942, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8079, -1.7022, -1.4485, -2.9099, -1.5550, -1.4225, -3.3904, -2.4262,
         1.3095, -3.2915, -2.1102, -1.6278, -3.5976, -5.5838, -2.4439, -1.5418,
        -2.1992, -3.4000, -1.1396, -6.3243], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5806, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5001, -0.4969, -2.3733, -1.4297, -2.5807, -0.9903, -5.8980, -1.5810,
        -1.9787, -3.0433, -1.2108, -2.6595, -4.5842, -2.5568,  1.1619, -2.0817,
        -3.0621, -1.9669, -4.0124, -3.0252], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3935, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.9031,  -5.1808,  -0.7788,  -2.1043,  -3.3368,  -2.8293,   1.0774,
         -1.3287,  -4.8868, -14.8153,  -4.8351, -12.1517,  -3.1116,  -5.3105,
          0.5036, -35.0869,  -9.6414,  -6.9255,  -5.5335, -15.6842],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.6932, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9201, -5.0210, -2.5701, -0.8706, -2.3665, -1.2837, -2.8555, -2.5287,
        -3.4230,  1.2137, -3.9699, -2.7251, -2.1401, -1.0937, -7.1710, -2.2668,
        -2.2807, -1.7511, -3.1981, -4.2654], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6744, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3983, -5.7555, -3.8926, -3.5302, -5.9594, -4.1355, -4.1612, -2.5520,
        -4.6646, -2.9081,  0.8354, -1.7181, -1.4196, -2.0562, -4.5099, -2.6458,
        -0.2599, -4.3590, -4.2663, -1.9256], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0641, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.5964,  -3.1903,   0.0503,  -4.7616,  -2.8656,  -1.9595,  -5.5219,
         -3.0692,  -1.5489,  -6.5883,  -5.0136, -11.1259,  -6.1339,  -5.8884,
         -6.1258,  -2.1515,  -4.7024,  -6.3826,  -3.3576,  -2.5215],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4227, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.2752, -9.2396, -6.4640, -3.3366, -2.3672,  0.7628, -3.4003, -2.2064,
        -3.0816, -2.1761, -4.4884, -0.3702, -3.2521, -3.3738, -1.8234, -4.4926,
        -2.8236,  0.8205, -1.7690, -1.7677], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1562, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.6109,  -1.8783,  -3.2553,   0.8013, -10.0911,  -2.7979,  -2.5368,
         -2.5219,  -5.8542,  -0.2661,  -4.6139,  -1.4849,  -1.5611,  -1.0147,
         -5.3567,  -0.2141,  -3.1940,  -2.5847,  -3.7192,  -1.4997],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0627, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8928, -4.0529, -2.3852,  0.9369, -4.2080, -2.6021, -1.1933, -1.7326,
        -4.4741, -0.3217, -4.0720, -3.5303, -2.3325, -2.3230, -6.9360, -1.6355,
        -1.6021, -4.4467, -2.3553, -3.3871], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7273, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7225, -2.3177, -1.7132, -2.7073, -3.3680,  1.3114, -5.5368, -3.4296,
        -2.4800, -2.4108, -5.1815, -3.9482,  0.0401, -2.8128, -1.4244, -2.1319,
        -3.2453, -1.8024,  1.0844, -8.2549], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6526, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8541, -3.6579, -0.6893, -6.6137, -1.5544, -1.1331, -2.4853, -2.2101,
        -1.7678, -2.2700, -4.4839,  0.7772, -3.3207, -2.9626, -3.5130, -0.9590,
        -5.8327, -1.8915, -0.0499, -3.4259], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4949, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.9284,  -2.3684,  -3.6185,  -3.5199,  -4.9049,  -4.5879,  -0.7474,
         -0.1513,  -2.0171,  -5.6763,  -3.0719,  -3.3844,   1.1263,  -2.2455,
         -2.5435, -19.4291,  -2.3590,  -7.5042,  -5.1133,  -3.2809],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0663, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.8104,  -4.5783,  -5.2762,  -1.5202,  -4.6493,  -2.9033,  -8.1797,
         -1.7903,  -3.1632, -20.6922,  -4.7298,  -7.8836,  -0.9658,  -3.9620,
          0.1913, -16.0021,  -3.0122,  -3.0287,  -5.8822,  -4.0255],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2932, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.8370,   1.0938,  -3.8332,  -3.6372,  -3.6958,  -4.7050,  -6.7215,
         -3.3962,  -2.9937,  -3.0862,  -2.7892,  -4.1896,  -4.3711,   0.3507,
         -2.2842,  -2.8241,  -3.0267,  -4.6182,  -3.1130,   1.1495],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4264, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.0437,  -2.0847, -12.6644,  -1.9694,  -7.1872,  -3.9683,  -3.3911,
         -1.6611,  -5.7320,  -3.0924,  -4.9228,  -4.4922,  -7.1106,  -1.7191,
         -5.2736,  -2.4056,  -4.8831,  -9.3091,  -4.9420,  -0.3810],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6617, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.2742,  -2.6312,  -2.1761,  -3.6516,  -4.0904,  -2.8925,   0.5122,
         -9.8974,  -3.8429, -15.1585,  -2.0447,  -7.0853,  -1.8597,  -1.2237,
         -1.5512,  -3.1343,  -1.2764,  -2.1096,  -2.8203,   0.9617],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8123, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0710, -3.0640, -2.1229, -5.2693, -3.2419,  1.5613, -4.7446, -2.5852,
        -2.5863, -1.0834, -6.2256, -1.6119, -1.0788, -2.8193, -1.9167, -3.2301,
        -4.2836, -4.2206,  0.6902, -1.5449], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8224, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.8068,   0.4906,  -3.3952,  -2.7995,  -8.7245, -10.0027, -10.1562,
         -6.4918,  -3.7679,  -4.0503,  -6.5539,  -7.4527,  -2.7440,  -4.2017,
         -5.2322,  -6.1898,  -2.0241,  -1.1711,  -6.2801,  -3.5050],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9030, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6941, -3.4339, -4.1468,  1.0551, -3.1005, -1.1590, -3.9596, -3.7465,
        -4.9939, -1.3415, -2.7072, -1.9207, -2.5924, -4.8009, -2.7156,  1.5531,
        -4.4276, -3.0667, -2.1579, -1.6114], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5984, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.5185,  -5.6779,  -3.0635,  -0.0615,  -3.3941,  -1.9485,  -0.7054,
         -5.6830,  -1.5035,   1.6839,  -3.1121,  -1.0060,  -1.5591,  -4.9082,
         -2.4469, -10.3468,  -3.1527,  -3.0003,  -3.1360,  -2.2114],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9376, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0045, -0.1516, -3.8979, -1.9243, -1.5393, -4.6039, -2.5420,  0.7079,
        -4.9072, -1.6181, -1.5278, -4.0694, -2.4251, -0.6095, -4.1631, -1.6683,
        -2.5711, -0.5793, -6.3658, -2.2680], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5364, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.8937,  -4.1889,  -8.9710,  -4.7011,  -3.3322,  -4.0266,  -6.4253,
         -8.0857,  -7.0860,  -7.5618,  -3.3166,  -2.2856,  -2.5396,  -3.5429,
         -4.4247,  -5.3444,  -3.1679,  -5.1078,  -3.2864,  -3.6098],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1449, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.1504,  -3.5486, -30.4226,  -8.6517,  -9.1562,  -6.2127,  -2.0582,
         -5.4993,   1.9545,  -4.6800,  -3.9911,  -2.2763,  -3.9697,  -4.9398,
         -4.4962,  -0.4639,  -7.1152,  -2.7216,  -0.8854,  -2.8478],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4066, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2143, -1.2654, -6.0265, -0.7874, -2.1658, -2.2751, -5.7713, -1.1646,
        -3.1553, -3.6383, -5.4577, -2.4086, -2.7144, -6.0002, -1.1825, -1.2331,
        -2.6094, -2.7504, -1.2926, -5.6142], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9864, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-14.4663,  -2.7509,  -5.8580,  -3.5444, -20.4046,  -6.1472,  -7.3001,
         -7.2835,  -1.6449,  -8.6284,   1.0486,  -4.3271,  -3.5467,  -2.4964,
         -6.5060,  -4.6063,   0.6339,  -6.9561,  -4.6210,  -7.0575],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8231, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8344, -3.3799, -0.9398, -1.3201, -2.8395, -2.9169, -3.9656, -2.6560,
         0.9389, -3.9397, -1.0599, -3.7034, -5.9045, -2.8961, -6.0288, -3.8896,
        -6.8280, -1.7906, -3.6291, -6.0970], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3840, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.8938,  -5.9356,  -4.0888,  -1.5376,  -2.6509,  -3.3011,  -3.0273,
         -3.2898,  -4.5222,   0.7198,  -3.0965,  -3.1952, -15.0407,  -7.4685,
         -3.0672,  -6.4332,  -3.7267,  -4.0826,  -6.9772,  -5.7126],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5164, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1714, -3.6071,  0.2306, -3.3949, -2.5735, -5.3232, -2.7246, -5.5500,
        -3.9115,  0.6792, -3.6568, -2.5943, -3.9523, -3.9762, -3.5571, -1.9132,
        -2.3065, -1.4741, -2.6367, -5.7683], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0091, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7347, -2.9241,  1.7432, -2.7215, -2.2890, -2.6991, -1.8897, -4.6658,
        -3.6795,  0.7526, -3.0306, -2.4988, -3.9678, -1.5898, -6.3621, -0.7910,
        -1.6820, -4.8540, -2.7934, -2.8451], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5761, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0828, -2.5622,  1.1179, -4.2242, -4.3525, -3.3274, -5.1759, -2.6164,
        -0.1445, -1.7224, -1.9295, -1.6732, -1.8215, -2.6798,  1.4095, -4.6318,
        -4.0199, -6.1146, -9.0627, -8.8936], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4254, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2243, -1.1463, -2.2839, -1.9819, -7.0515, -2.1000, -0.8285, -4.3837,
        -2.2732, -2.8502, -2.5327, -5.6879, -4.1670,  0.4664, -4.9376, -1.4357,
        -2.7556, -3.7067, -3.3922,  1.9564], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6658, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.7496,  -0.7833, -10.0584,  -0.4119,  -5.5557,  -3.5578,  -3.8667,
         -5.7616,  -5.5908,  -2.1810,   0.3348,  -3.6634,  -2.3438,  -2.2439,
         -2.0893,  -5.0593,  -2.2179,  -0.6848,  -4.4626,  -2.7027],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5325, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.8881,   0.7569,  -3.2957,  -4.2245, -13.1446,  -6.7907,  -2.3674,
         -6.8931,  -2.0504,  -4.0339,  -0.6000,  -1.9587,  -2.3194,  -2.4160,
         -7.3339,  -3.0791,  -0.0252,  -2.2750,  -1.5535,  -2.1750],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4334, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3415, -5.0831, -1.4722, -1.3475, -3.7427, -1.8320, -3.3922, -1.4043,
        -5.7873, -1.7295, -0.7523, -2.1821, -2.5191, -3.2390, -3.2173, -2.5285,
        -2.1333, -3.1480, -3.3539, -1.7087], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5957, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5953,  -3.7638,   0.4665,  -6.2933,  -4.9291,  -1.1655,  -8.1606,
         -4.9314,  -1.7911,  -4.6586,  -5.1956, -17.8834,  -5.1622,  -7.9168,
         -5.7712,  -7.6785,  -4.9982,  -1.4835,  -3.8604,   1.4251],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8173, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.8132, -6.2138, -6.0522, -1.1557, -6.8702, -5.0561, -1.1204, -3.2700,
        -1.3773, -3.1937, -1.5127, -6.4264, -1.4471, -3.0013, -2.2192, -1.1744,
        -1.9673, -1.5678, -6.6659, -0.9754], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5540, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.8463,  -6.1904,  -6.5974,  -2.3908,  -4.6859,   0.7635, -10.2462,
         -3.9076,  -3.1993,  -8.7454,  -4.5236,   0.0566,  -7.1010,  -2.9580,
         -8.9468,  -6.3127,  -6.0371,  -6.2499,  -0.9877,  -7.6416],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1374, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3805, -2.5942, -2.6464, -2.6776, -3.4090,  1.1740, -2.3466, -4.0231,
        -2.1309, -3.6750, -4.2798,  0.8288, -1.9626, -2.2953, -2.5652, -1.7946,
        -3.6058, -1.2961, -3.9820, -4.7550], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5208, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0713e+00, -2.8886e+00, -1.1039e+00, -6.0763e+00, -1.4048e+00,
        -2.7911e+00, -2.9019e+00, -2.7460e+00, -2.3179e+00, -1.8514e+00,
        -4.4751e+00, -1.2101e-01, -8.2737e-04, -3.9899e+00, -9.6729e+00,
        -7.5735e+00, -5.0105e+00, -1.7210e+01, -3.1417e+00, -7.0075e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2178, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5078, -1.2522, -2.4041, -1.4852, -4.9304, -2.4274, -6.0335, -3.8547,
        -0.4659, -3.9058, -1.5294, -2.4622, -1.0232, -5.7555, -0.4049, -2.5602,
        -2.9539, -3.1412, -6.4784, -3.6663], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9621, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9654, -6.8386, -1.1056, -0.5423, -3.0847, -2.2048, -2.2693, -3.2181,
        -2.3906, -2.8896, -1.8505, -2.0508, -4.3559, -4.3303, -3.3589,  0.0311,
        -3.8322, -1.4500, -2.8625, -0.9244], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5747, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8520, -1.1538, -4.8036, -1.5519,  0.0245, -4.8660, -2.1488, -4.3760,
        -1.3223, -6.7381, -1.5472, -1.8058, -6.2076, -2.8316, -2.5930, -5.1224,
        -3.6825,  0.1086, -3.0316, -2.8676], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9684, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0971, -3.1440, -3.9058,  0.4729, -2.4011, -3.8261, -4.3413, -0.7244,
        -8.6027, -1.7848, -2.2911, -2.0909, -1.5908, -4.0969, -0.6298, -6.3592,
        -1.1244, -1.3498, -2.7178, -1.0238], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6314, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6375,  1.6752, -2.8866, -1.8301, -2.4120, -2.3660, -2.3674,  0.8870,
        -3.7349, -1.9596, -3.2134, -0.4729, -5.7052, -4.9459, -3.6863, -5.2093,
        -2.4206, -3.9303, -0.5625, -5.4230], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6601, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6328, -0.1017, -2.3863, -1.1330, -2.0022, -1.5371, -6.4335, -1.7112,
        -0.9506, -5.7097, -2.8017, -1.5182, -1.4047, -6.8489, -1.7382, -4.0309,
        -2.6160, -2.5221, -2.1055, -6.4824], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8833, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6560, -6.9662, -2.2671, -9.4951, -2.1333, -7.5283, -5.1836, -5.7917,
        -2.8147, -5.8198, -5.1378, -4.7663, -0.5424, -4.1476, -2.3835, -5.6903,
        -4.0568, -2.6788, -0.2142, -2.3091], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2291, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.3843,  -4.7662,  -1.9675,  -0.8849,  -1.0382,  -2.7378,  -1.6838,
         -4.5684,  -3.4032,  -3.9725,  -4.7560,  -2.7352, -17.6294,  -6.6431,
        -10.7108,  -5.8732,  -1.3999,  -4.7112,   0.8089,  -1.5630],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0810, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.2281,  -3.5501,  -4.0128,  -4.0044,  -4.8233,  -4.1375,  -3.9741,
         -6.6018,  -3.6682,   0.7249,  -3.0183,  -3.7756, -14.7407,  -6.4431,
         -3.1714, -11.1664,  -6.3410,  -6.5440,  -7.8450,  -6.8123],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5567, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.0907,  -4.9807,  -2.5966,  -6.3883,  -4.6943,  -6.4028,  -7.5204,
         -5.1690,  -6.6403,  -5.9748, -15.5937,  -1.4199,  -6.6130, -11.5940,
         -6.6611, -14.0242,  -0.9353,  -7.4272,  -2.7940,  -3.3083],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2914, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8180, -3.4894, -3.0034, -5.1479, -1.7808, -4.3693, -3.3602, -2.5910,
         0.0819, -2.5995, -2.7567, -2.4510, -4.8524, -3.7281,  0.8133, -6.1314,
        -1.4814, -2.0432, -2.9440, -3.2058], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9429, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.6055,  -7.3649,  -5.7221,  -6.0070,  -3.0570,  -1.7393, -10.6907,
         -2.3058, -15.8856, -11.3059,  -4.8813, -10.8809,  -8.9523, -39.4408,
         -8.3406, -12.4530,  -6.4890,  -7.6040,  -1.8999,  -9.3100],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.8468, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6579, -2.6368, -4.4425, -9.6954, -9.7073, -1.1499, -6.7468, -1.8072,
        -3.3798, -0.9073, -3.6556, -2.2335, -2.4547, -0.5525, -5.6996, -1.3709,
         0.1024, -2.9804, -1.9520, -2.2262], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2577, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.0112,  -4.3209,  -2.7412,  -5.6511,  -6.9105,  -3.5337,  -0.3155,
         -2.6801,  -2.8638,  -1.4518,  -2.4926,  -2.2333,   2.1450,  -2.9275,
         -2.4764,  -3.6665, -10.5747,  -7.5922,  -4.5891,  -2.1192],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3503, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6071, -0.9390, -5.5655, -1.6165,  1.0356, -4.2049, -2.2616, -2.4187,
        -3.7883, -4.9990, -3.0486, -1.3012, -2.1729, -1.6001, -1.9382, -5.6211,
        -3.1851, -2.8079, -4.5824, -1.7307], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7677, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5627, -2.3466, -3.3729, -3.1704,  0.0631, -2.0126, -2.1127, -1.5595,
        -3.7242, -5.2302,  0.4214, -0.3748, -2.4716, -2.9641, -3.1617, -2.4371,
         0.5951, -5.2027, -1.0796, -5.0957], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4900, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5771, -2.8879, -3.3820, -1.1578, -1.4675, -6.4190, -1.1277, -2.7643,
        -1.7130, -2.2197, -1.2882, -2.5747, -6.1089, -1.5053, -0.5512, -4.5421,
        -2.2263, -1.4078, -2.1199, -2.2514], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4646, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5467, -3.0960,  1.9741, -1.3674, -2.1515, -2.4200, -6.4911, -3.2351,
        -2.3883, -3.5252, -3.4088, -1.9831, -2.9701, -3.6917,  0.4702, -6.8882,
        -1.3641, -1.2580, -1.5038, -2.7786], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5812, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5276,  -4.9203, -10.4110, -21.3553,  -8.5220,  -7.7183,  -1.2649,
         -5.4909,  -0.3171, -30.4071,  -2.8059,  -5.7419, -10.3000,  -2.1858,
         -0.9898,  -5.3248,  -9.8387, -13.8686,  -4.8624,  -9.3053],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.9079, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7532, -4.9779, -5.2074, -1.9304, -3.7816,  1.1106, -3.2100, -2.0698,
        -3.0885, -2.4824, -3.3997,  1.8716, -2.0234, -2.8808, -2.5106, -2.1344,
        -3.2823, -1.4878, -4.1078, -1.0991], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6722, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.5585, -15.7133,  -4.4154,  -5.8775,  -1.4294,  -2.9514,   0.4791,
         -7.8102,  -3.4952,  -3.4172,  -1.7412,  -6.3664,  -3.8145,  -0.9016,
         -3.2700,  -1.7921,  -1.2381,  -5.5538,  -1.4759,   0.7911],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6776, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.1465,  -5.5284,  -1.7271,  -7.0802,  -1.2668,  -1.1458,  -4.6087,
         -2.5077,  -1.2814,  -1.5954,  -7.3298,  -4.1345, -11.8282,  -2.5233,
         -2.2897,  -5.4189,  -3.8146,  -1.1023,  -5.2172,  -3.1794],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8863, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3414, -4.1961, -0.1990, -6.4911, -2.7217, -2.1777, -4.6338, -3.6127,
         1.3370, -2.5665, -3.2914, -2.8427, -2.5250, -4.4929, -2.0686,  0.2027,
        -3.3900, -2.0913, -3.4372, -1.9130], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7226, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1374, -7.6230, -1.4412, -2.6959, -5.1266, -3.1928, -3.4138, -1.9492,
        -5.2799, -1.5971,  0.6005, -2.9718, -0.8535, -2.9303, -1.1713, -3.1957,
         1.4852, -2.4624, -2.2810, -2.9772], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5107, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.4720,  -3.9103,  -3.6916, -14.7383,  -6.0324,  -7.4226,  -6.6705,
         -1.4537,  -7.1557,  -0.2574,  -6.4945,  -4.3144,  -3.4048,  -5.1704,
         -3.6440,   0.4137,  -8.3531,  -6.8505,  -3.5170,  -1.8388],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0489, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1467,  -4.8229,  -1.7007,  -1.6350,  -5.0198,  -3.0857,  -1.3167,
         -8.1796,  -3.1455,  -0.7014,  -2.3726,  -7.0633, -23.4997,  -8.2126,
         -3.4566,  -6.3897,  -1.7932,  -4.6061,   1.5407,  -5.2661],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6437, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1472, -2.4896,  1.6927, -5.2983, -2.1276, -3.8176, -3.7973, -7.6819,
        -4.3450, -2.8665, -2.6725, -2.2469, -3.9047, -1.5960, -7.1894, -1.0921,
        -4.0856, -7.5222, -1.5865, -1.9475], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3361, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1464, -2.4274, -0.1827, -3.8960, -2.6606, -4.7072, -2.8110, -8.9276,
        -2.0721, -3.1896, -5.0366, -2.7870, -3.0872, -3.0658, -5.2800, -3.6617,
         0.0571, -8.1311, -4.1510, -1.3987], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5781, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7562,  -3.1162,  -2.0134,  -4.9730,   0.3367,  -5.9814,  -1.6299,
         -2.9083,  -4.5795,  -4.7089,  -0.0616,  -0.6257,  -3.3213, -18.6887,
         -9.0172,  -0.3223,  -0.9486,  -2.8112,  -9.5762,  -4.7714],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1237, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9301, -1.5017, -3.1366, -0.7874, -7.0222, -0.9581, -2.2830, -4.3421,
        -2.3474, -3.0514, -0.7125, -3.4010,  1.3700, -1.7737, -3.1913, -3.7819,
        -2.8976, -2.8568, -8.4082, -3.1739], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9093, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7062,  -5.8036,  -3.7533,  -1.8019,  -6.2211,  -3.9858, -30.2769,
         -3.3267,  -8.1738,  -1.2929,  -4.4288,   1.4272,  -2.9303,  -2.7157,
         -3.0527,  -5.1816,  -5.8662,  -0.0872,  -3.4726,  -2.0175],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7834, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1738, -1.7493, -2.2427, -3.3211, -0.3702, -3.9471, -2.6162,  0.7782,
        -3.2409, -1.3150, -3.3863, -2.3969, -5.7086, -3.7475, -0.1982, -2.5099,
        -1.6384, -2.0589, -1.8039, -4.6609], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3654, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.1646,  -3.0957,   0.7622,  -1.2945,  -3.4750,  -2.8803,  -4.0871,
         -4.2877,  -0.4408,  -2.7237,  -2.1293,  -3.3472,  -3.2449,  -2.9189,
         -4.9615,  -4.3010,  -4.4961, -19.3758,  -3.0942,  -6.6830],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0120, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.7345, -2.3522, -5.4995, -4.4586, -4.9271, -5.6193, -2.8314, -1.1944,
        -1.6313, -1.4401, -1.7007, -2.8468, -2.1882,  1.3548, -2.8025, -2.3371,
        -1.8166, -1.5603, -5.7258, -2.7547], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5799, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.5893e-01, -8.3263e+00, -3.4031e+00, -3.4244e+00, -3.1573e+00,
        -4.0221e+00, -7.7903e-05, -7.5068e+00, -1.8869e+00, -1.4146e+01,
        -1.2673e+00, -6.9055e+00, -1.9268e+00, -4.5169e+00,  9.7947e-01,
        -1.3897e+01, -2.7865e+00, -2.7873e+00, -5.3357e+00, -3.7530e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3905, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5948,  -5.8732,  -1.5697,   1.9829,  -3.7760,  -2.9802, -10.0441,
         -5.7851,  -5.8957,  -5.9211,  -5.2707,  -2.5593,   1.4180,  -5.2632,
         -2.2644,  -1.7839,  -1.5790,  -6.0617,  -3.6065,   0.4127],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4507, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1045, -2.9623, -4.9261, -4.5251,  0.8837, -4.3102, -1.2873, -1.6028,
        -2.1874, -4.7690, -0.2939, -0.9587, -3.1754, -3.7889, -6.4080, -3.9754,
        -1.7866, -1.7617, -1.7017, -4.5523], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8097, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7832, -1.7932, -2.9534, -6.3830, -2.2755, -1.6434, -3.5863, -0.8695,
        -3.1320, -1.7374, -4.4532,  0.8344, -3.9371, -2.2918, -3.0966, -4.1858,
        -3.8918, -7.8003, -3.6482, -2.2302], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0429, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8131, -3.8227, -4.4684, -3.2761, -2.2414, -2.7268, -6.2891, -2.6972,
         0.7336, -4.5764, -2.3519, -0.7854, -3.4193, -1.6167,  1.1612, -4.2348,
        -1.5295, -3.5603, -3.6837, -4.1604], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8179, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5252, -1.7720, -0.7495, -5.8399, -1.4266, -5.0820, -4.2106, -0.5061,
        -3.7304, -3.6279, -3.6682, -1.7691, -0.7181, -2.6869, -1.8100, -3.1150,
        -2.7210,  1.6231, -3.3199, -2.6332], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5144, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3759, -1.9528, -1.1004, -3.9338, -1.4607, -3.1711, -0.8188, -6.5366,
        -1.9392, -0.9793, -4.8300, -1.4363, -5.1591, -1.9527, -6.5412, -1.1694,
        -2.1891, -1.6858, -1.4880, -1.8026], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8261, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0766, -3.7027, -1.3614, -3.5345,  1.5323, -4.0060, -1.7564, -2.2151,
        -1.2769, -5.1323, -0.2191, -2.4453, -4.7535, -5.3115, -3.5170, -2.2010,
        -5.1799, -2.4410, -3.3167, -3.6539], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8284, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5611,  -1.5910, -11.3608,  -1.9132, -12.1113, -11.1550,  -4.6837,
         -8.3725,  -2.7745, -24.2550,  -4.5277,  -6.8897,  -9.0255,  -5.5157,
         -2.6139,  -3.6833,   1.2899,  -2.5069,  -2.5677,  -0.8909],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8855, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.1478,  -1.5451,  -1.9041,  -1.9464,  -3.1391,  -2.1675,  -1.2925,
         -4.1996,  -1.6916, -16.9917,  -2.1147,  -7.6607,  -6.3904,  -3.2125,
          1.5319, -14.0673,  -3.4973,  -2.6254,  -2.2618,  -5.6251],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0327, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1470, -3.9804, -0.5165, -4.3225, -2.0680, -2.6002, -0.6897, -5.9307,
         0.5559, -4.6078, -1.3026, -2.7995, -0.1583, -6.9150, -2.1211, -0.6998,
        -3.2325, -2.2004, -2.2726, -3.8641], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5436, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.2971,  -3.7604,  -4.5415,  -3.6508,  -3.9684,  -2.6610,  -2.9293,
         -1.3711,  -5.7397,  -3.2156,  -0.8029,  -4.1187,  -2.5155,  -3.5003,
         -6.2992,  -4.5857,  -2.1145,  -2.8448,  -2.9500, -15.5070],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9187, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.7463, -4.9312, -3.5198, -4.4870, -3.1814, -4.3548,  0.6349, -1.7576,
        -1.5270, -3.2734, -0.8875, -5.5823, -1.5112, -0.9174, -1.8317, -1.2414,
        -2.1981, -3.1876, -1.7072,  1.5503], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6829, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.7441,  -4.0838,  -1.7384,  -5.2117,  -4.0897,   1.1923,  -7.0829,
         -2.3974,  -3.8999,  -2.5095,  -6.6223,  -5.8733, -10.9914,  -8.0636,
         -2.0680, -12.9724,  -7.8355,  -5.9639,  -5.3748,  -1.6967],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1514, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-18.6206,  -3.6149,  -2.8789,  -7.5660,  -4.7184,  -0.9173,  -4.3680,
         -2.5474,  -2.6984,  -1.9461,  -7.2874,  -5.3037,  -1.4931,  -8.1094,
         -2.8910,  -1.5279,  -2.1728,  -2.1792,   1.3673,  -2.0554],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0764, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.1705,  -2.2800,  -3.6807,  -1.6971,  -1.6089,  -4.0111,  -1.6365,
         -2.6038,  -4.5023,  -4.2809,  -0.2433,  -0.9539,  -1.7907,  -1.1432,
         -3.7324,  -2.9721,  -1.2845,  -3.5846,  -3.3160, -10.9778],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8735, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1853, -3.1777, -3.0630, -2.5160, -3.8673, -3.8653, -0.0839, -4.4998,
        -3.5957, -5.6130, -1.4129, -5.7308, -2.9668,  0.2289, -8.9930, -3.6775,
        -2.2966, -1.2828, -5.7273, -1.6066], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3466, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.8744,  -2.0886,  -3.0158,  -4.4792, -17.7022, -15.1128,  -7.6012,
         -1.6471,  -3.1941,  -6.3710,  -5.2875,  -1.8717,  -2.8364,  -1.8380,
         -7.4412,   1.3208,  -7.4620,  -1.8642,  -3.2765,  -0.6788],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7161, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3305,  -2.2978, -15.8853,  -6.8487,  -4.2059,  -5.1550,  -3.1460,
        -16.7022,  -4.8692,  -6.6816,  -7.2371,  -6.5350,  -2.4324,  -5.8633,
          0.8047, -11.6292,  -3.2303,  -3.4926,  -4.7900,  -3.4713],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7999, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.5005,   0.6853,  -4.0953,  -3.6546, -35.8690,  -4.7223,  -9.5192,
         -6.0433,  -6.7463,  -2.0814,  -4.9525,   0.4092,  -3.8834,  -3.7830,
         -1.8619,  -1.3561,  -4.7940,   0.8999,  -5.1115,  -3.1596],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1570, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6059, -5.5024, -1.6611, -1.2393, -4.0326, -1.1537, -3.1695, -1.8427,
        -6.8697, -1.1466, -2.4287, -4.0508, -1.4410, -3.8309, -1.9437, -3.1661,
         1.4168, -2.2186, -1.8208, -1.6024], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4655, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.8719,  -3.4360,  -1.9420,  -3.8066,  -3.9521, -28.0958,  -3.3646,
         -7.6094,  -2.7439,  -6.1766,  -2.5443,  -0.9466,  -3.4262,  -2.2559,
         -3.4877,  -1.5272,  -4.8125,  -2.0163,  -0.2334,  -5.4701],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6360, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6150, -6.2402, -3.7932, -0.0707, -6.8665, -1.3841, -7.9483, -2.2864,
        -2.7925,  2.0900, -0.9680, -6.1415, -2.9441, -5.1475, -6.6941, -4.2615,
        -2.7241, -8.7706, -4.7143, -2.9060], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8589, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3167, -1.6483, -3.3476, -1.4567, -7.3991, -1.7118, -1.4829, -3.4157,
        -2.4420, -1.8443, -0.9129, -5.5696, -1.3445, -0.6089, -3.8244, -1.2377,
        -2.3788, -4.3829, -5.1293, -3.7870], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8121, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.8409,  -3.7538,  -3.8128, -23.9480,  -7.4697,  -7.0767,  -1.9314,
         -2.6379,   2.1739,  -6.0420,  -2.9276,  -4.1770,  -1.7993, -11.8944,
         -3.0881,  -1.7528,  -4.4950,  -2.4707,  -2.1357,  -4.2707],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7175, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.0718,  -0.9170,  -5.7822,  -3.1031,   0.9321,  -6.4525,  -2.5609,
         -5.8425,  -6.1646, -10.4966,  -4.8485,  -2.8462,  -5.1686,  -5.5366,
         -1.6225,  -7.6615,  -2.5137,   1.5879,  -2.9947,  -0.7865],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7425, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8459, -3.5034, -2.5181, -2.4773, -2.2462, -6.6637, -3.0694,  0.3273,
        -3.1462, -1.3842, -2.4854, -1.7396, -3.0321,  1.3860, -4.8541, -2.5849,
        -1.7528, -1.2992, -5.1979, -1.1031], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4595, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.2226,  -5.1731,  -2.6854,  -0.4072,  -6.3710,  -4.7952, -19.5223,
         -4.1627,  -6.0630,  -8.6564,  -4.1270,  -2.7302,  -4.5704,  -3.0149,
         -4.0415,  -4.9662,  -4.0106,   0.4523,  -9.1135,  -3.2539],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9717, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8951, -4.5447, -1.3448, -2.7999, -4.4070, -2.5547,  1.0962, -4.5011,
        -3.6527, -3.2972, -2.9522, -5.5920, -1.4859, -1.3638, -3.3484, -2.1840,
        -2.1972, -1.6769, -6.1556, -1.2005], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8029, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.3527,  -1.2894,  -2.3654,  -2.1863,  -5.4174,  -1.7962,  -1.9677,
         -3.6387,  -1.7200,  -2.8808,  -6.2588,  -2.6652,   1.0183,  -5.0624,
         -2.0264, -15.7123,  -4.0301,  -7.0520,  -1.2176,  -3.4806],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7051, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.3768,  -2.5290,  -0.9389,  -2.6178,  -1.6517,  -2.7681,   1.5321,
         -1.6780,  -1.7429,  -3.8509,  -1.5128,  -4.5629,  -0.8260,   0.8368,
         -4.1889,  -2.9409, -21.5633,  -1.3824,  -9.2529,  -1.0884],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1175, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7473, -12.0049,  -4.9752,  -9.8733,  -2.7521,  -8.0306,   0.8158,
         -3.8798,  -3.7501,  -1.9011,  -2.9668,  -4.8021,   0.1688,  -2.2093,
         -2.1160,  -2.4821,  -4.6015,  -3.0503,   1.3968,  -6.3427],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8052, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6688, -4.7269, -0.6981, -2.4432, -2.2134, -4.0075, -1.4328, -4.2840,
        -1.4936, -0.6840, -6.6465, -1.9649, -2.0847, -5.7525, -2.3751,  1.3739,
        -1.7681, -1.2220, -1.2956, -3.1420], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6265, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8539e+00, -6.1891e+00, -4.3004e+00,  8.6473e-03, -4.7241e+00,
        -2.8928e+00, -1.7474e+01, -6.8518e+00, -6.0388e+00, -5.9122e+00,
        -6.0410e-01, -8.6475e+00,  3.9162e-01, -4.5227e+00, -4.0253e+00,
        -2.7698e+00, -5.3496e+00, -4.1653e+00, -3.7574e-01, -6.2619e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6279, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1307, -6.0235, -6.6288, -1.4689, -2.8073, -1.8823, -2.6003, -2.5331,
        -6.0301, -2.9637,  0.0092, -2.9839, -1.7856, -2.5653, -4.8571, -2.5921,
         0.7325, -6.2590, -1.7358, -3.5372], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9822, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.1258,  -2.5753,  -3.0521,  -4.3138,  -3.5437,  -8.3922,  -7.2280,
         -3.8334, -20.3104,  -7.5354,  -8.3335,  -7.3901,  -6.1731,  -2.5016,
         -5.0198,   0.6273, -20.6105,  -2.5405,  -2.2075,  -8.2700],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.6665, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.4794,  -5.0547,  -3.1133,  -0.2178,  -5.5734,  -2.3360,  -3.1294,
         -2.1482,  -7.9648,  -1.8592,  -2.6841,  -3.7776,  -1.7962,  -1.9777,
         -3.2860,  -3.0727,  -2.9015,  -4.9379,  -3.2043, -12.1574],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7336, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7701, -0.9827, -2.3026, -2.7309, -4.0248, -0.2788, -2.0033, -1.6724,
        -1.3402, -3.9085, -2.4448, -2.1109, -2.0665, -3.6225, -1.5405, -1.4396,
        -6.7301, -1.2626, -4.8653, -3.3332], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5715, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.5980,  -5.0054,   0.2826,  -1.8604,  -1.8131,  -1.6270,  -1.7624,
         -2.8982,   2.1357,  -6.2940,  -2.1555,  -3.9527,  -6.1588,  -4.0805,
         -3.7106, -11.7767,  -3.4105,  -2.4067,  -4.6684,  -2.4303],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4095, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.2203,  -6.2791,  -3.2736,  -1.4640,  -5.4783,  -3.5695, -12.3998,
         -8.4544,  -5.8276,  -7.4301,  -2.6853,  -3.6799,   0.6272,  -6.4550,
         -3.0537,  -2.5667,  -2.0308,  -4.8077,  -2.7704,  -0.8968],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2358, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6480, -2.1233, -2.5063, -3.4657, -2.2397,  0.5412, -1.7951, -1.0995,
        -2.1270, -1.1557, -6.1624, -1.3908, -1.0097, -3.0772, -3.3031, -1.0093,
        -2.9659, -3.2535,  1.5560, -3.7674], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2501, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.1731,  -2.6854,  -0.4072,  -6.3710,  -4.7952, -19.5223,  -4.1627,
         -6.0630,  -8.6564,  -4.1270,  -2.7302,  -4.5704,  -3.0149,  -4.0415,
         -4.9662,  -4.0106,   0.4523,  -9.1135,  -3.2539,  -1.6595],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9436, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.8370,  -1.3591,  -0.7637,  -3.3417,  -2.1935,  -2.8417,  -3.4222,
         -3.5582,   1.4343,  -4.3694,  -1.5743, -11.1230,  -5.8997,  -4.1089,
         -6.2488,  -1.5410,  -6.9198,   0.3672,  -2.8239,  -4.3143],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5219, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4713, -3.5562, -3.9168, -2.0847, -1.0292, -3.1189, -1.6111, -3.0215,
        -1.5717, -4.9430, -4.0965, -0.8804, -2.1152, -1.1548, -1.9926, -1.8744,
        -3.6236, -4.0463, -2.3853, -2.2562], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5375, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9160, -1.5104, -1.4650, -2.2485, -1.9178, -1.1058, -0.9840, -2.6910,
         1.5710, -9.7148, -1.9938, -3.8260, -5.1348, -4.6917, -0.7867, -8.4206,
        -3.0687, -2.0661, -4.3270, -2.3619], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0830, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.0543, -2.9073, -2.2955, -1.4973, -1.9915, -3.9557, -1.5752, -0.9298,
        -3.4777, -7.1380, -9.8962, -4.7930, -8.2498, -3.8793, -5.0647,  0.1455,
        -4.4859, -2.4986, -1.2178, -1.2678], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3515, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.3429,  -2.2507,  -2.3018,  -4.9491,  -1.8043,  -3.1105,   1.7680,
         -7.2665,  -3.6623, -23.8441,  -7.0312,  -5.6119,  -5.1344,  -0.8893,
         -6.5321,  -2.5681,  -0.4162,  -3.9664,  -1.7147,  -1.7828],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1363, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0146, -6.4139, -6.2573, -5.3626, -2.8333, -7.4554, -2.9515, -2.0587,
        -5.1915, -2.9959,  1.7284, -4.8071, -2.4544, -2.5680, -1.1075, -6.2070,
        -1.5121, -1.9338, -2.9039, -1.8147], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3557, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9370, -5.0277, -0.1185, -1.2707, -2.0324, -1.7655, -6.5349, -2.9093,
        -2.3562, -2.8151, -3.4990, -5.9402, -1.6810, -6.1995, -3.8460, -1.2289,
        -4.2464, -1.4455, -2.7342, -1.7482], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0668, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.6116,  -3.1036,  -1.8617,   1.2404,  -4.0582,  -2.4235,  -2.9778,
         -4.4178,  -4.5456,  -0.6146, -12.0953,  -4.3758,  -3.2327,  -0.8669,
         -7.7699,  -2.5713,  -1.9601,  -5.0965,  -2.6682,  -0.8226],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3417, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9781, -2.9103, -2.4801, -2.9686, -5.5721, -1.5380,  0.3333, -6.3468,
        -1.4266, -3.6359, -1.4451, -5.7460, -1.4393, -2.3441, -3.4398, -1.5479,
        -2.5570, -2.8343, -2.2234,  0.4095], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5845, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3398,  -2.6573,  -5.7637,  -1.2302,  -0.4016,  -4.8118,  -2.2341,
         -2.8041,  -0.7346,  -6.6146,  -1.8371,  -0.5221, -12.4584,  -2.1471,
         -1.6211,  -2.3097,  -3.9886,   1.3859,  -3.2185,  -4.4102],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0859, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3149,  -4.3169,  -3.1026,   1.8927,  -5.6675,  -3.1548,  -2.3421,
         -3.7809,  -3.6312,   0.4835,  -2.9919,  -5.8809, -13.7036, -11.6156,
         -7.1898,  -1.3552,  -3.0395,   0.9443,  -7.5006,  -2.4917],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0380, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3254, -1.4350, -0.9036, -2.2048, -1.1221, -3.0301, -2.4830, -6.4939,
        -1.0432, -1.5621, -2.2638, -1.1812, -2.5995, -1.8215, -5.2623, -3.2020,
        -0.7800, -4.7168, -0.8124, -2.6241], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5433, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7825, -4.1480, -2.1992, -9.1865, -1.1066, -1.6236, -2.7163, -2.4870,
        -4.3769, -3.4011, -0.4205, -3.5615, -1.3837, -2.4036, -3.4591, -2.9290,
         1.8767, -3.1440, -2.9453, -7.8517], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1125, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7901, -1.4779, -6.6418, -1.7324, -0.0102, -3.8072, -1.8369, -2.0629,
        -1.3921, -3.6823,  1.6161, -4.6123, -2.7193, -3.9307, -1.1547, -4.3728,
        -1.6848, -0.9514, -6.3405, -4.6055], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9095, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5803, -3.1184, -2.8310, -5.4874, -1.3059, -2.1048, -2.9551, -2.3928,
        -1.5171, -3.4813, -2.8074,  1.4245, -3.2577, -5.0953, -2.5484, -2.2692,
        -7.0180, -3.6031, -1.5585, -7.1678], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0837, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.5552,  -0.1656,  -0.7998,  -2.3428,  -3.1213,  -4.5559,  -4.3005,
          0.8486, -10.5959,  -2.4266,  -2.7591,  -6.4479,  -2.7846,  -1.3037,
         -5.7005,  -5.9838, -14.1796,  -5.8378,  -6.0398,  -5.8929],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4972, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.4426,  -3.3255,  -2.6569, -12.0486,  -5.9200,  -7.7965,  -9.1661,
         -6.7560,  -1.6610,  -9.8592,  -5.3203,  -4.7325,  -3.2514,  -2.5966,
         -4.9438,  -3.7932,  -1.2740,  -3.6690,  -2.4250,  -2.3077],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6530, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3479, -2.9580, -2.0085, -5.1441, -2.4641, -9.3792, -5.3378, -6.1044,
        -4.5764, -2.7328,  2.7499, -3.0816, -1.9436, -1.9458, -1.8245, -5.4657,
        -1.2388, -1.9197, -2.7345, -1.1035], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2280, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5097, -4.8879, -2.5313, -5.5902, -2.4828, -5.3091, -4.2085, -6.7392,
        -2.3507, -3.2091, -7.0183, -4.9121, -1.8047, -7.2221, -3.4396, -2.3892,
        -6.5256, -2.8750,  0.7508, -2.8711], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0563, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7418, -3.2914, -3.6082, -4.3817, -3.1550,  0.9471, -3.8502, -1.7386,
        -3.9779, -5.0633, -3.3864,  0.4805, -6.5242, -0.7730, -5.5939, -2.8002,
        -5.8977, -3.0195,  0.1280, -3.9299], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1589, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1667,   0.7661,  -1.7065,  -3.2468,  -0.9395,  -2.4390,  -2.4813,
          0.3555,  -3.4161,  -2.5967, -13.1921,  -5.3453, -16.5731,  -1.9478,
         -9.0785,   0.8464,  -2.4979,  -5.3723,  -8.0204, -13.7829],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6417, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.2812, -1.6775, -1.7230, -4.8169, -0.7480, -5.3214, -3.5577, -1.1026,
        -4.9707, -2.3636, -3.1312, -2.7538, -4.5963, -1.4480, -1.4459, -1.9584,
        -0.9164, -2.5780, -3.5241, -2.4328], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5674, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.1264, -3.4958, -2.9116, -2.3830, -2.2949, -2.7800,  0.4977, -1.9274,
        -3.4508, -3.0229, -1.9431, -6.0436, -3.0860, -0.2764, -2.6727, -3.2566,
        -1.4911, -2.3882, -3.1654,  0.8287], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2068, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7263,  0.6766, -2.7881, -2.2197, -3.6000, -5.4867, -4.6357, -2.5702,
        -4.9463, -2.2085, -2.3761, -3.9403, -2.5400,  1.6907, -1.2497, -1.7830,
        -0.9185, -2.0378, -2.2764,  0.5810], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2177, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9039, -4.5668, -2.9890, -3.4492, -3.2537, -0.8268, -2.0098, -2.0406,
        -0.9657, -2.8242, -2.0825,  1.8907, -5.3269, -5.0091, -3.9297, -3.9486,
        -3.1141, -3.1729, -0.3245, -1.3156], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6081, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5554, -4.7931, -1.2333, -3.1836, -1.8212, -2.1147, -4.0333, -2.9587,
         0.2105, -3.6194, -2.1367, -3.5251, -2.8756, -6.4136, -2.2006, -2.7422,
        -2.1065, -3.0638, -0.9330, -6.4505], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8775, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6764, -1.9687, -3.3613, -5.8555, -2.4160,  0.0468, -1.7727, -0.8663,
        -1.4575, -1.7658, -7.6215, -1.1317, -2.8891, -5.0860, -2.6547, -1.4504,
        -3.7201, -4.1820, -1.4297, -0.5021], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7380, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1366, -4.2672, -2.4152, -0.7234, -3.0955, -1.1470, -2.3910, -1.6882,
        -5.7489, -3.0381, -1.8347, -2.3541, -1.3147, -1.3244, -1.6666, -1.6910,
         1.2629, -4.2251, -2.0994, -3.0766], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1987, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.7232, -2.7364, -5.0878,  1.2952, -9.2023, -3.5880, -2.0615, -5.1732,
        -4.3647,  0.1812, -2.2664, -0.7433, -8.0667, -7.9728, -9.4249, -7.5368,
        -4.1113, -8.2653,  0.6251, -2.0542], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4639, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9978,  -2.1347,   0.7189,  -2.4300,  -2.2949,  -1.2640,  -2.1591,
         -5.4397,   1.3913,  -5.4078,  -1.9702,  -2.2018,  -4.2725,  -3.3166,
          0.3211,  -2.2499,  -3.3345, -17.9126,  -5.2613,  -7.6847],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4950, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.3343, -3.1856, -1.7572, -3.8514, -4.7392, -3.7270,  1.2381, -3.5833,
        -1.4936, -3.4777, -2.8017, -3.3708,  0.3053, -3.3735, -2.8045, -4.3642,
        -3.6798, -4.9122,  0.1420, -2.4494], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6110, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2858, -1.6031, -1.7597, -5.6115, -0.8908, -2.4439, -3.7958, -1.5956,
        -1.7985, -1.1521, -3.6568,  1.1137, -2.2216, -2.9322, -3.0317, -0.6453,
        -6.3480, -2.1584, -0.0578, -3.4531], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3664, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.6580,  -1.6400,  -4.5062,  -3.6722,  -8.7224,  -6.0737,  -7.1242,
         -5.4435,  -2.6114,  -2.6249,   0.6570,  -4.8202,  -3.1991,  -1.3214,
         -6.3635,  -4.9345,  -0.6961,  -5.1527,  -2.5997, -17.7654],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5636, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7780, -2.5806, -6.1936, -1.3725, -2.2891, -4.3164, -2.0639, -2.4766,
        -0.7122, -4.1548, -0.5122, -7.2910, -4.4485, -1.7160, -3.9340, -4.1319,
        -2.5989,  2.0091, -3.6701, -1.3969], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8314, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-13.3696,  -5.4898,  -6.2265,  -3.6574,  -4.2753,  -7.7307,  -6.3015,
         -5.9892,  -6.6404,  -6.1789,  -3.2810,  -6.2053,  -1.3006,  -3.3003,
         -4.5483,  -4.4586,  -9.9147,  -6.5707,  -2.7351,  -3.8100],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5992, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4698, -1.1381, -2.9840, -1.2014, -0.9945, -1.9992, -1.9554,  0.8003,
        -3.7609, -2.7815, -1.2389, -5.0355, -2.3663,  0.9002, -4.2334, -2.0306,
        -1.1343, -1.9959, -2.7618,  1.4679], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8956, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9536,  -5.8847,  -8.3982,  -3.8980, -18.7109,  -7.8212,  -9.3032,
         -7.1390,  -1.5241,  -6.1112,   0.3594, -25.6328,  -3.5682,  -3.2926,
         -1.6812,  -4.6892,  -1.4774,   0.4917,  -8.1182,  -7.4309],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3392, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1303e+01, -9.2201e-01, -5.7692e+00, -1.2328e-02, -2.3017e+01,
        -4.4905e+00, -2.7863e+00, -2.0009e+00, -3.7368e+00, -2.0103e+00,
         1.9967e+00, -3.6118e+00, -1.4212e+00, -2.4649e+00, -1.4620e+00,
        -6.4344e+00, -4.1725e+00, -1.1688e+00, -3.2941e+00, -4.1245e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1103, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3157, -4.2309, -0.1311, -3.9548, -3.1086, -3.9741, -2.0071, -3.3967,
        -3.9230,  0.1437, -7.8083, -3.1870, -5.3835, -5.3126, -3.5733,  1.3659,
        -7.5504, -2.4332, -4.7178, -2.0645], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3782, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.3657,  -5.2084, -14.2607,  -7.6094,  -6.2381,  -6.3040,  -7.4540,
         -1.6725,  -4.7313,   0.0185,  -4.6540,  -3.8137,  -5.0675,  -5.4333,
         -5.1779,  -0.2857,  -4.6113,  -2.7556, -12.3351,  -6.5976],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4279, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5860, -2.5025, -3.4795, -2.0495, -3.2310, -4.0130, -2.7299, -3.6889,
        -5.6152, -5.9004, -2.3131, -3.3572, -2.6490, -1.7006, -3.1788, -3.4192,
         1.6701, -3.9097, -2.9898, -2.5015], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9572, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.1666, -4.7207, -1.5255, -3.4685, -3.9271, -1.7130, -2.8567, -1.6765,
        -6.2487, -2.9494, -0.3413, -2.7999, -0.5555, -2.5675, -2.4318, -1.9462,
         0.3310, -4.6574, -4.4890, -1.6964], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5203, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4022, -3.2683, -0.2795, -3.2878, -3.3848, -1.5675, -2.0844, -6.9791,
        -3.3043, -1.9608, -1.1411, -0.9346, -2.5361, -1.7208, -4.5825, -1.8959,
        -7.8722, -2.6294, -2.5520, -0.9262], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8655, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.8884, -6.4975, -2.9573, -3.3417, -4.0934, -5.5097, -3.7342, -2.3465,
        -3.0961, -1.5468, -1.6695, -3.1505, -2.3342,  0.1718, -4.5129, -1.4930,
        -2.1643, -4.8449, -1.8114,  0.9702], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6537, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2765, -3.3089, -2.2664, -2.7598, -2.0007, -3.0463,  1.4594, -3.0608,
        -3.5030, -2.5735, -2.8967, -2.8367,  1.0342, -2.9190, -2.7759, -1.4850,
        -6.2494, -2.9823,  0.9681, -4.6617], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4070, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1957,  -4.3993,  -3.1200,  -0.6821,  -1.9544,  -3.1607, -24.2664,
        -10.8076, -10.6183,  -2.2580,  -8.9230,  -1.7170,  -9.6740,  -7.3384,
         -9.0169,  -5.0279,  -4.0064,  -6.4952,  -3.8905,  -1.5360],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0544, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9438, -1.5353, -5.8583,  0.9608, -9.9475, -3.9642, -3.3231, -2.4474,
        -6.2821, -4.4147, -0.5259, -2.9348, -4.9540, -2.1358, -1.9426, -5.7289,
        -2.2257, -0.5226, -3.4505, -3.3391], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3258, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1444, -1.7886, -1.1328, -2.8253, -2.2853,  2.1738, -5.4557, -1.0180,
        -3.0936, -1.1935, -6.1737, -1.6601, -0.9039, -2.3649, -1.9982, -3.5536,
        -5.0450, -2.7784, -0.0894, -2.7594], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3045, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4917, -1.9859, -2.3440, -5.4874, -2.1129, -3.5961, -3.0156, -1.3693,
        -2.7731, -3.1423, -6.6427, -2.4313, -1.5731, -5.0357, -2.4204, -2.3143,
        -2.8727, -4.2787,  0.8626, -2.3708], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8198, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3077,  -7.4893,  -5.5520,  -4.6332,  -6.5978,  -6.3325,  -5.2279,
         -1.9602,  -4.2729,  -5.2122,  -7.4695, -13.0319,  -4.7922,  -4.2370,
         -7.6226,  -5.6433,  -5.8038,  -6.4473,  -4.3583,  -5.1799],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7586, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7705, -6.2899, -1.5398, -2.2570, -1.0289, -3.1880, -2.1201, -3.7396,
        -4.1344, -2.5418,  1.0843, -0.7506, -2.9668, -4.2350, -2.9633, -4.2249,
        -4.8284, -0.1255, -3.1819, -2.0550], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6929, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7660, -1.9638, -2.2677, -1.5531, -6.3041, -1.1078, -2.8076, -2.8809,
        -1.9139, -0.7795, -2.1062, -2.2619,  0.0599, -2.5656, -2.9041, -3.4963,
        -4.4579, -5.2166, -0.3494, -2.6216], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4632, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.7944, -8.8155, -5.2305, -6.3078, -2.8896, -3.5306, -6.3736, -3.6154,
        -2.7983, -3.9234, -4.2168, -2.4140,  1.1235, -2.9406, -2.2658, -1.8068,
        -0.8333, -5.2390,  1.8520, -2.8109], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5915, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8982, -6.9205, -4.5212, -2.2405, -9.9993, -3.2275, -1.4015, -3.5412,
        -2.3864,  1.2521, -1.6746, -2.5331, -2.2504, -6.3157, -2.1703, -0.3764,
        -2.5579, -3.2918, -2.3545, -0.9995], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0704, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2851, -1.0992, -5.3198, -1.5972, -2.0188, -2.8994, -1.8542, -1.2166,
        -3.2988, -5.8819, -1.8548, -4.0210, -3.5620, -2.4724, -1.4030, -5.4413,
        -3.4617, -0.3862, -3.2784, -1.8239], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0088, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9575, -0.8087, -5.6310, -5.2931, -1.3009, -3.2460, -4.5810, -2.0814,
         0.6642, -2.3666, -2.5790, -3.6069, -1.2016, -6.4264, -2.9344, -2.8257,
        -4.5673, -2.5022, -3.0626, -3.4467], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0877, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.9685,  -3.7937,  -1.3007,  -4.8734,  -1.8126, -11.5259,  -6.2293,
         -5.0710,  -8.9777,  -6.7280,  -6.1436,  -7.6589,  -7.3339,   1.1889,
         -5.4047,  -3.6353,  -2.5245,  -5.8277,  -4.3079,  -2.8132],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0371, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.5534, -17.8137,  -2.7066,  -6.3336,  -0.9831,  -4.1044,   1.5790,
        -22.9239,  -2.6676,  -3.1294,  -4.7980,  -4.2357,  -0.3485,  -3.3263,
         -4.8518, -47.1800,  -6.7665, -11.6794,  -5.6138,  -4.5261],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.6981, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9595, -3.8036, -6.2637, -4.5600, -2.3923, -1.9432, -2.1149, -3.7367,
        -3.5552, -2.2613,  1.6284, -3.3712, -1.2307, -2.5786, -1.9460, -4.8470,
        -2.8808,  0.2950, -3.5244, -1.9373], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7492, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.4616,  -4.7163,  -4.5771,  -0.0238,  -3.8062,  -3.2617,  -1.0253,
         -6.9962,  -3.0329,   1.0667,  -3.8388,  -2.7353, -22.1495,  -4.9381,
         -4.9364,  -5.0842,  -4.4191,  -7.4075,  -6.7269,  -1.4845],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7277, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.9090,  -5.3674,  -1.1912,  -3.8061,  -3.6468, -12.9624,  -5.3284,
         -5.0331,  -7.3492,  -2.3348,  -3.8528,   0.6262,  -4.6589,  -2.6471,
         -3.4824,  -2.8014,  -5.7755,   1.3581,  -6.3308,  -1.4125],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1453, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.0261,  -3.3186,  -4.2305,   1.0225,  -5.6799,  -2.9773,  -4.0034,
         -7.6201,  -7.1075, -10.5254,  -5.4559,  -1.4817, -18.1087,  -4.0580,
         -7.4129,  -0.5570,  -6.9796,   1.5717,  -3.1245,  -3.2343],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9154, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3866, -2.5572, -2.1370,  1.2217, -2.6246, -2.5731, -1.1778, -4.5851,
        -3.5924,  0.6109, -2.3566, -2.6175, -3.1331, -5.2636, -4.2631,  0.6751,
        -4.2543, -3.0100, -2.3188, -6.4146], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5879, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1912, -5.6929, -1.8680, -0.3473, -2.2438, -2.1122, -3.1075, -1.5505,
        -4.2515, -1.7241,  0.2789, -2.8583, -1.4683, -2.7303, -3.3844, -3.4350,
         1.7561, -3.0180, -2.2414, -1.3527], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1271, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2713, -2.9621, -2.1276,  0.5232, -3.9280, -1.3512, -3.0707, -2.9225,
        -6.9710, -4.9947, -1.2176, -6.2490, -2.6220, -2.8776, -6.3562, -3.3135,
         0.8430, -4.0454, -2.8656, -1.7272], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0254, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2348, -1.4444, -1.9512, -6.7692, -0.8365, -1.2195, -3.8296, -1.3063,
        -1.4103, -5.3849, -2.1109,  1.0427, -1.9360, -2.0191, -1.7134, -1.1685,
        -6.5576, -1.1547, -1.0456, -5.1330], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3591, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9950, -4.7767, -6.4285, -7.3796, -4.2334, -6.1944, -2.7433, -3.6958,
        -8.0306, -5.8196, -5.1491, -6.8000, -6.0918, -5.3902, -5.1931, -1.0628,
        -3.4182, -3.4261, -5.4425, -6.8606], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2066, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8475,  2.3008, -2.9674, -3.5329, -3.5744, -4.1159, -4.3003, -0.4370,
        -7.8204, -1.3857, -3.5707, -0.6155, -6.9168, -2.1932, -3.2503, -6.2703,
        -1.7113, -2.6783, -4.2725, -3.0699], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1615, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.5730, -0.2921, -2.1036, -0.8124, -4.8562, -2.1546,  1.3501, -5.0132,
        -0.5622, -3.6953, -1.1005, -7.3493, -2.7583, -1.1288, -3.8696, -2.9601,
        -2.2726, -0.2338, -6.8914, -1.4342], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4356, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4207,  -4.8808,   0.8154,  -7.0130,  -4.7608,  -2.6198,  -5.5916,
         -4.7283,  -1.9685,  -4.3575,  -3.1249, -22.8613,  -5.8252, -10.2685,
         -6.9763,  -2.7301,  -6.7124,  -2.6735,  -3.0881,  -3.1495],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2468, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0165,  0.0665, -2.0912, -1.4048, -0.8883, -3.9693, -2.2271, -0.5457,
        -2.3182, -2.2891, -2.5654, -5.0730, -1.6392,  1.0393, -3.7527, -2.2893,
        -2.4332, -1.5055, -5.2165, -2.5196], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1819, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-14.5855,  -6.3889,  -5.5759,  -2.0772,  -5.4498,  -2.0009,  -3.7020,
         -5.0506,  -1.3919,  -2.0091,  -3.7903,  -4.2958,   0.4404,  -4.0320,
         -3.0067,  -2.0903,  -0.6253,  -6.6730,  -1.4309,  -2.2814],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8008, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.8666,  -4.6264,  -2.3361,  -6.6387,  -3.9913,  -1.7070,  -4.6798,
         -2.5042, -14.0287,  -5.0653,  -5.4673,  -8.5507,  -6.5589,  -8.1794,
         -1.8015,  -7.1685,  -2.4045, -10.3944,  -3.8501,  -2.6916],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4255, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-40.0561,  -7.9559, -13.5544,  -6.7742,  -2.1736,  -4.8532,   0.5654,
         -2.8525,  -2.6427,  -1.5851,  -0.9206,  -6.7745,  -2.3830,  -3.6507,
         -4.3372,  -1.2260,  -1.3618,  -1.7518,  -2.8726,  -2.9329],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5047, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9318, -2.3236, -1.3173, -4.4844, -1.6435, -2.9831, -2.5545, -5.9064,
        -1.1487, -1.3735, -2.4854, -1.8927, -1.9796, -2.3310, -1.9082,  0.6110,
        -4.8971, -1.0114, -2.8455, -1.2112], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5309, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4598, -3.4025, -2.3541, -2.0507, -6.6462, -4.1289, -1.9603, -2.0436,
        -1.3592, -1.5284, -4.3763, -1.6956,  1.7348, -2.3575, -2.1424, -2.7685,
        -1.3812, -7.4014, -4.3720, -1.8606], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7277, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8690, -2.2537, -4.9524, -2.6252, -3.2242, -5.0492, -3.6241,  0.4348,
        -4.4112, -2.5337, -4.3569, -2.1715, -4.5987, -2.6438,  0.4593, -4.1086,
        -1.7317, -3.3903, -3.1418, -6.7826], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1287, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.6462,   0.9383,  -1.7123,  -1.4295,  -0.8118,  -1.4432,  -6.9305,
         -0.9040, -23.0391,  -1.6923,  -2.5631,  -2.6725,  -3.9750,   0.7911,
         -3.0025,  -9.3819,  -5.1302, -11.8963,  -2.7996,  -6.5105],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3405, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9065, -3.3373, -1.5565, -6.3351, -2.3355, -1.0012, -3.8852, -2.9395,
        -2.6691, -1.4605, -5.2666, -3.1203,  0.5048, -1.1354, -2.1038, -1.8005,
        -4.4315, -3.2259,  0.1762, -3.1129], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5471, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.2366, -4.1174, -1.4353, -1.5815, -4.1049, -1.3601, -0.2192, -2.8615,
        -1.8118, -1.5958, -1.3076, -6.3105, -2.3197, -3.9837, -3.1694, -1.3239,
        -1.5438, -1.9039, -3.5176,  1.4141], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1408, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.7774, -3.4349, -1.9556, -1.7040, -2.2192, -6.2409, -1.1972, -2.2019,
        -3.8676, -1.7012, -2.3548, -3.7844, -1.8787, -0.3181, -2.3352, -3.3883,
        -3.6639, -2.0615, -7.4734, -4.2584], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7131, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.9072,  -3.6001,  -3.6376,  -5.1940,  -1.6114, -11.3198,  -4.0017,
         -8.1087,  -7.0592,  -5.7613,  -6.5249,   0.7397,  -4.2677,  -5.4485,
         -1.7941,  -5.5052,  -5.6987,  -1.7341, -13.6442,  -3.2131],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3146, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5833, -4.4890, -3.4833, -3.1384, -1.9825, -1.9937, -0.2815, -4.2029,
        -2.5388, -4.7236, -2.8701, -4.9121,  1.1049, -2.4183, -3.2483, -4.7196,
        -2.3214, -5.6315, -2.6373,  0.5621], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7754, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.8762, -11.0320,  -2.8771,  -3.1749,  -5.8513,  -3.7595,  -2.7267,
        -11.6568,  -5.8571,  -4.7416,  -1.7908,  -4.3372,  -1.5063,   0.2574,
         -3.7206,  -2.1588,  -3.6947,  -2.5336,  -5.1171,  -2.1823],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8792, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1658, -1.6028, -0.9482, -1.6182, -2.3359, -2.5570,  1.3628, -3.9146,
        -3.9953, -3.7888, -1.6589, -5.1212, -4.3424, -0.5047, -7.6322, -1.3557,
        -1.2835, -1.7513, -7.1681, -1.6321], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6507, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7393,  0.0974, -4.1423, -2.0409, -2.0503, -3.3547, -2.3206, -0.3602,
        -5.7792, -1.5795, -2.4857, -1.1684, -6.4469, -2.1696, -1.3671, -1.2411,
        -3.3573, -1.3097, -4.4859, -3.4064], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5354, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4023,  -3.5396,  -4.2818,  -0.9924,  -4.8382,  -2.2071, -10.6446,
         -3.4661,  -6.6818,  -1.2911,  -3.5343,   1.5711,  -3.7641,  -3.1059,
         -5.8385,  -1.3790,  -3.7981,  -1.9596,   0.2336,  -5.1198],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3520, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8648, -1.9261, -1.7410, -6.5547, -1.3065,  0.1494, -3.1195, -1.9664,
        -5.7938, -5.0390, -1.9891,  1.8246, -3.9106, -0.8919, -1.8910, -5.1033,
        -3.1985, -1.3271, -4.9625, -2.5780], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7095, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.4879,  -2.8836,  -2.2751,  -0.9293,  -2.5138,  -3.0381,  -1.5999,
          1.0849,  -5.2378,  -1.6266,  -2.2639,  -5.1792,  -2.7559,  -0.2532,
         -2.9499,  -1.8086,  -2.8407,  -2.5524,  -3.7619, -17.4171],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1145, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.4437, -6.8468, -1.3927, -3.1960, -1.7813, -5.5281, -1.3780, -1.9972,
        -3.6357, -2.6408, -2.5476, -5.6960, -2.8563,  0.9752, -3.3542, -3.6785,
        -2.9397, -2.9581, -6.4845, -4.3027], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0398, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7280, -1.7389, -2.9294, -6.2766, -2.3140, -2.0042, -3.5947, -0.7781,
        -3.1089, -1.7537, -4.2859,  1.2238, -3.4964, -2.1864, -3.0504, -4.1208,
        -3.8537, -7.9164, -3.3837, -2.3658], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9831, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0450, -2.1372, -0.0754, -1.6511, -2.5307, -2.5760, -0.0827, -6.4599,
        -0.7229, -3.6933, -6.0006, -1.0968, -2.1202, -1.9167, -5.5970, -1.8342,
        -1.0261, -5.9801, -2.5471, -4.2280], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7660, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8625, -2.3821, -4.1880, -2.5597, -2.8940, -4.4981, -3.4232,  0.8389,
        -6.2881, -2.2345, -3.8991, -1.2384, -6.0033, -1.5177, -1.8368, -1.2192,
        -1.4008, -2.0167, -1.3317, -4.5075], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7231, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7357, -3.8368, -1.5500, -2.3205, -5.8923, -2.3233, -2.1434, -0.5245,
        -6.3958, -1.9200, -4.2038, -3.6364, -1.6838, -3.5705, -1.2252, -5.9963,
        -1.9475, -2.1392, -3.5430, -2.0521], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9320, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.0965,  -1.5271,  -0.7981,  -4.6149,  -3.0547,   0.2861,  -2.2225,
         -1.1422,  -8.3151,  -5.1842, -12.1591,  -3.3486,  -9.6218,   0.6411,
         -1.6485,  -4.1842, -14.7873,  -4.0495,  -3.9700,  -5.7796],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4288, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.0155,  -0.2048,  -5.9089,  -2.9404, -12.0085,  -4.3262,  -6.1323,
         -6.2288,  -2.3781,  -3.8886,   1.1483,  -3.6012,  -3.6939,  -1.9119,
         -4.9931,  -3.4096,   0.2166,  -4.7967,  -5.5827, -20.1411],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5899, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.2242,  -3.0319,   1.9918,  -4.0132,  -2.5711,  -2.8344,  -4.6648,
         -6.7511,  -0.9725,  -7.5612,  -2.6478,  -2.7033,  -5.1006,  -2.3773,
         -0.1393,  -3.6678,  -2.2541, -16.6176,  -8.2445,  -9.4451],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2915, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.4459, -12.5861,  -6.8375,  -5.2065,  -3.5162,  -0.5734,  -7.4593,
         -5.0082,  -3.9069,  -3.8265,  -1.7009,   2.0496,  -3.8140,  -1.5255,
         -4.2085,  -1.4999,  -6.8146,  -1.6537,  -1.2315,  -4.0436],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9905, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3140, -0.2988, -2.7515, -4.2893, -2.9394, -3.4970, -4.5976, -3.0504,
        -7.5975, -2.2854, -3.4735, -1.2127, -5.5129, -2.7369, -2.0493, -5.4519,
        -3.2021, -1.9231, -4.4551, -2.5053], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4072, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4660, -0.8680, -2.9430, -5.5368, -4.3981,  0.6967, -2.8790, -1.1573,
        -3.1858, -3.2719, -1.9249,  1.4218, -3.2895, -2.1540, -1.0587, -4.3146,
        -1.7065,  1.1405, -3.5985, -1.3938], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2944, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.5599,  -1.6475,  -7.5812,  -2.8951,  -2.5323,  -3.9483,  -2.4428,
        -12.2296,  -4.9422,  -8.6301,  -5.4040,  -7.4882,  -2.2666, -13.7967,
          0.8380,  -6.6174,  -3.7772,  -1.9442,  -1.5665,  -4.2671],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8350, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8730, -5.3870,  0.8614, -2.9596, -3.0217, -3.2951, -3.5080, -3.8449,
        -0.5387, -5.5391, -1.0798, -4.0483, -0.8848, -5.7379, -3.3905,  0.3816,
        -4.7308, -2.0850, -3.6076, -5.4446], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9867, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-12.1387,  -2.5091,  -7.1213,  -4.2249,  -3.1528,  -1.8691,  -3.3089,
         -3.6593,  -3.5686,  -5.6727,  -4.7154,  -0.9321,  -4.4945,  -1.6578,
         -3.3819,  -4.6696,  -4.1639,  -1.5930,  -5.1847,  -1.8844],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9951, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.8259,  -3.6872,  -2.7059, -13.5304,  -5.1114,  -3.7978,  -5.8842,
         -7.4097,  -4.5511,  -3.4177,  -2.5432, -12.1340,  -8.3698,  -2.7828,
         -5.1128,  -2.0498,  -2.3211,   1.4517,  -3.8739,  -2.7236],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9690, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1866,  -4.9811,  -0.4869,  -2.2662,  -2.2475,  -3.0629,  -1.1988,
         -4.5722,   0.7443,  -4.4027,  -4.6259, -15.2371, -24.3637,  -4.8271,
         -7.2033,  -2.1789,  -5.5485,  -1.5530,  -0.0642,  -2.8396],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7051, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.8174,  -2.7431,  -2.9695,  -6.0310,  -3.1050,   1.3137,  -2.5156,
         -1.8890,  -2.0369,  -0.8060,  -3.2680,   1.6634, -29.0840,  -3.2700,
        -31.1288, -11.4636,  -8.9057,  -0.6398,  -3.0509,  -5.5563],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9652, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6290, -5.9648, -7.9068, -4.2868, -5.9224, -3.5048, -3.3837, -7.1768,
        -6.7863, -3.2112, -6.6526, -8.6178, -3.6408, -3.0883, -5.5702, -3.5878,
        -3.8962, -9.1469, -4.4193, -6.8805], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.3637, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1127, -2.3182, -6.4324, -0.7189, -3.7787, -2.8679, -0.4971, -1.6531,
        -1.4206, -3.4218,  0.6287, -2.5175, -3.1257, -2.5016, -5.3617, -2.7971,
        -0.1117, -4.3796, -6.8082, -8.7442], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0970, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.0734,  -1.5910,   2.1324,  -2.7614,  -2.4567,  -5.5715,  -7.5377,
         -6.2334,  -7.0065,  -4.3740,  -5.4158,   0.9672, -10.6490,  -4.3451,
         -3.5261,  -5.8728,  -2.7791,   0.7674,  -2.6877,  -2.9867],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9500, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.7519,  -3.4933,  -2.8492,   1.4589,  -2.4883,  -2.2839, -11.2184,
         -3.6578, -15.8235,  -7.5812,  -5.9595,   0.4027, -20.4861,  -9.2644,
         -5.9234,  -4.7207,  -3.6344,  -3.3565,  -1.1308,  -3.4696],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3616, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.0831,  -3.5164,  -0.0185,  -2.8287,  -1.3057,  -2.0161,  -2.9664,
         -1.3178,   1.1924,  -7.5534,  -2.3268,  -7.6780,  -5.7676,  -1.1906,
         -5.2090,  -1.9720, -16.8359,  -5.5816,  -6.0804,  -7.0632],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2559, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3593,  -1.6595,  -0.4000,  -5.5937,  -1.3360,  -0.1524,  -3.9690,
         -2.2464, -15.1600,  -6.2996,  -8.1410,  -1.6347,  -2.2181,   0.9699,
        -12.0359,  -2.0761,  -1.1883,  -4.1730,  -3.1476,   1.2751],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5773, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5168, -1.4577,  0.6287, -4.3986, -1.3433, -6.9641, -6.7554, -6.8718,
        -5.4575, -2.2528, -4.4142,  0.5145, -3.6097, -4.0992, -2.6828, -5.5775,
        -2.5776,  2.1436, -3.6978, -1.3302], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1860, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.6953,  -3.8425, -13.6604,  -4.8396,  -5.8498,  -8.1192,  -6.3610,
         -6.8500,  -0.6560, -16.1013,   1.3726,  -6.2319,  -4.3604,  -5.8331,
         -6.8171, -11.3435,  -0.3736,  -3.2931,  -2.4288,  -2.2067],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4245, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2105, -0.8635, -1.8623, -3.5706, -1.6264, -0.5650, -2.0856, -1.3878,
        -2.2652, -0.6774, -6.4705, -3.4388,  0.6961, -1.6830, -0.3250, -1.8674,
        -2.8987, -2.1342,  1.6876, -5.5595], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9554, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.4381,  -2.0967,  -9.0381,  -1.8914,  -1.0896,  -5.9527,  -2.0450,
          2.0975,  -2.5704,  -1.6589,  -6.0099, -11.6816,  -8.1086,  -6.8892,
         -1.5286,  -7.4073,   2.1992,  -3.5536,  -3.2986,  -1.4203],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8191, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4471,  1.4348, -2.2249, -1.5307, -2.9363, -0.8367, -5.8088, -1.5833,
        -0.0497, -2.5919, -2.3333, -1.8821, -2.8997, -1.9658, -0.0206, -2.5942,
        -2.1041, -8.8080, -4.7055, -9.5146], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7701, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1853, -5.8754, -0.5469, -3.1342, -3.9781, -1.3187, -2.9032, -3.2000,
        -3.6676,  1.0812, -0.9042, -2.0944, -1.3114, -4.1247, -2.9431,  0.3923,
        -4.6096, -1.2456, -3.6188, -1.9911], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3589, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.1155, -4.8974, -1.4401, -1.6858, -4.9371, -1.1421,  2.0615, -3.4115,
        -2.7135, -4.6722, -2.7663, -1.7756,  1.8273, -2.3875, -3.0115, -2.7364,
        -2.5970, -6.5879, -4.2354, -1.3047], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3649, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8811, -1.0784, -4.1582, -0.5417, -5.8469, -0.8043, -1.2390, -3.9329,
        -1.8619, -4.1101, -2.8124, -6.2814, -2.5359, -0.3034, -2.6114, -3.5026,
        -2.7435, -1.1785, -7.6075, -1.2037], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9117, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.7728,  -5.3509, -10.8949,  -5.2091,  -2.9301,  -6.7498,  -6.2440,
         -1.1626,  -1.7650,  -2.4968, -17.1094,  -3.1832,  -7.7469,  -1.9703,
         -4.3069,   0.9152, -15.4434,  -3.3933,  -3.2340,  -1.0553],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3552, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9026, -8.5994, -3.5824, -2.2540, -1.9116, -2.7382,  2.5932, -5.5669,
        -2.3455, -3.0278, -0.6336, -5.6517, -3.2484,  0.2186, -3.4295, -0.8787,
        -3.8504, -1.2362, -5.9689, -2.4229], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8718, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.2918,  -4.9250,  -1.0745,  -4.3566,  -2.8609,  -3.3285,  -5.8100,
         -3.1138,  -2.1319,  -2.7994,  -3.3786, -17.1862,  -6.2036,  -6.9860,
         -0.7324,  -8.1483,  -2.4539,  -3.8343,  -4.4733,  -3.0408],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4065, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-12.8479,  -3.3790,  -7.5291,  -5.6286,  -7.2754,  -4.8773,  -5.7522,
         -5.7827,  -5.6326,  -2.2851,  -2.9353,  -5.6360,  -3.9356,  -5.8881,
         -4.9842,  -8.2520,  -0.7185,  -3.2570,  -2.8612,  -3.5363],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1497, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3320, -2.8151, -5.0949, -2.2712, -0.5964, -5.8584, -4.1044, -2.6283,
        -2.2785, -4.3777,  1.0462, -2.9852, -1.6398, -4.6569, -3.3909, -5.3971,
        -3.6013, -1.2810, -7.1192, -1.8614], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1122, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.7584,  -7.8594,  -3.6984,  -6.5063,  -4.7730,  -3.8701,  -2.9791,
         -3.3512,  -4.2106,  -4.9938,  -3.7281,  -5.4940,  -3.0836,  -4.2455,
         -5.7757, -10.5497,  -6.9816,  -5.8095,  -5.2391,  -2.5004],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1204, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.9813,  -1.2632,  -4.3404,  -1.2119,  -3.0657,  -1.6835, -11.6754,
         -3.5416, -14.6970,  -1.5159,  -5.2282,   1.7195,  -3.8276,  -4.9144,
         -9.9870,  -4.3428,  -3.1377,  -5.1534,  -2.9003,  -2.5277],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2638, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3317, -2.9085, -4.0922, -2.9105,  0.8348, -3.8165, -1.4692, -2.1645,
        -5.7047, -2.5123,  1.5213, -3.9638, -4.2034, -1.8045, -1.9426, -1.1083,
         1.8566, -3.0089, -1.1459, -2.3361], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1605, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.4375,  -2.9971,  -1.9923,  -2.4001,  -3.3003,  -6.2396,  -3.2143,
         -3.1179,  -3.8165,  -2.9271, -14.6346,  -5.5773,  -6.9927,  -6.7107,
         -6.8213,  -1.1621, -14.9157,  -0.2109, -11.0799,  -3.2921],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2920, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.2419,  -6.7771,  -1.9138,  -3.6343, -12.2226,  -1.7565,  -2.5719,
         -0.8980,  -4.6561,  -1.4957,   0.2652,  -3.8813,  -2.6495,  -2.2328,
         -0.4613,  -6.8779,  -1.1510,  -3.9285,  -4.7307,  -2.2992],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2558, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9672, -1.4897, -2.7080, -3.1280, -4.5810, -3.3498,  0.0931, -3.7935,
        -1.7262, -2.4253, -3.8542, -2.7537, -4.8333, -5.2702, -3.4008, -1.2302,
        -2.2850, -7.6076, -4.2148, -3.1017], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2814, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1585, -2.1033, -5.4897, -2.8042, -2.1239, -2.7524, -1.1702, -3.5735,
        -5.5395, -4.1405, -2.4553, -7.4416, -3.1683, -1.9141, -5.0341, -1.5923,
         1.3552, -4.5768, -2.4423, -2.3610], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0743, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9873,  -3.0686, -14.9394,  -4.0613,  -7.6280,  -0.6199,  -5.6570,
          0.1392,  -4.2157,  -5.4135,  -3.9622,  -3.2557,  -5.2002,  -4.0178,
         -0.6134,  -3.6443,  -4.4166,  -3.1016,  -3.3569,  -3.2431],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1632, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.0401,  -2.7590,   1.9060,  -3.7647,  -1.1976,  -2.9669,  -1.4598,
         -6.3765,  -1.9333,  -1.4649,  -3.8131,  -1.3673,  -1.4476,  -5.0475,
         -2.9391,  -0.6491,  -2.6239,  -1.7287, -10.3575,  -4.1290],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9580, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9514, -0.6155, -2.3603, -6.5274, -2.5137, -1.7800, -4.8922, -1.4729,
        -1.3364, -1.7286, -2.3723,  1.3776, -3.6497, -4.3089, -2.6425, -5.2125,
        -5.7961, -4.9391, -1.8717, -8.1112], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1352, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4469, -1.5784, -6.0452, -8.4779, -2.2456, -2.9835, -2.0029, -1.6568,
        -0.2460, -6.3184, -0.5220, -2.0400, -2.4085, -1.8478, -1.9089, -0.6199,
        -5.8774, -1.3626, -1.7334, -4.1061], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9214, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.4874,  -6.0657,  -2.1812,   0.1220,  -2.7955,  -1.2346,  -4.9259,
         -2.3394,  -6.3659,  -3.9918,  -1.3872,  -4.6239,  -4.4094,  -1.0224,
         -2.7299,  -1.9493,   2.3755,  -5.0549,  -3.3736, -10.6330],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2037, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2865, -1.9483, -4.5155, -1.4237,  1.4359, -5.3252, -1.7145, -2.9599,
        -5.2806, -2.5230,  0.8010, -2.8891, -2.2404, -2.0120, -2.4621, -2.6294,
         1.4241, -1.3539, -1.3806, -1.4611], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0872, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6262, -2.9967, -2.3232, -1.6927, -2.2239, -3.3960, -4.6429, -2.7987,
        -1.0579, -4.3596, -3.3850, -1.9295, -3.7473, -3.7916,  0.9812, -8.0485,
        -1.2559, -2.7077, -3.3698, -4.1937], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9783, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.5003,  -1.7187,  -3.1412,   1.4133, -10.9079,  -1.4954,  -4.3598,
         -6.8905,  -3.4398,   0.9544, -10.7480,  -1.9109, -30.8661,  -3.0748,
         -7.4767,  -1.7206,  -5.3950,  -1.6527,  -3.7849,  -3.9271],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4821, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-23.2193,  -6.2846,  -4.4834,  -7.2285,  -3.4332,  -2.7036,  -0.5012,
         -1.8236,  -3.6490,  -1.7365,  -5.1308,  -2.8832,  -1.7559,  -0.3403,
         -2.2526,  -1.8165,  -5.7477,  -2.6059,   0.0555,  -3.0460],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0293, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7147, -1.2528, -4.9644, -2.7954,  1.8921, -4.2882, -2.6709, -2.4746,
        -3.3867, -7.7239, -4.7265, -3.7742, -4.5144, -2.0486, -0.5733, -2.9352,
        -3.2562,  1.2853, -7.4486, -1.6553], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9513, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1807, -4.8037, -2.3560, -4.8891, -5.4556, -1.9384, -4.2942, -5.7230,
        -1.9347, -1.8268, -2.9897, -5.1311,  1.2105, -5.2788, -2.8330, -3.6650,
        -5.0638, -3.2692,  0.2096, -6.3841], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4798, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.0196,  -4.7136,  -5.7581, -10.4727,  -6.7165,  -5.7688,  -5.0657,
          0.8740,  -6.8059,  -5.2689,  -3.0397,  -2.6674,  -5.2866,  -1.4437,
          0.7089,  -2.4902,  -3.6172,  -2.0091,  -3.5670,  -6.2681],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4198, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.8186,  -3.7807,   1.1825,  -4.3456,  -3.0057,  -3.5720,  -0.8551,
         -5.7181,  -1.7505,  -1.0432,  -3.9363,  -1.9140,  -3.2918,  -3.9538,
         -1.4732,   1.3505,  -2.0355,  -0.9006, -12.1916,  -2.1493],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9101, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.7037, -4.1457, -7.1849, -1.9945, -3.1166, -0.7112, -5.1484, -2.5913,
         0.2415, -6.1073, -1.1625, -2.3259, -3.7627, -1.8903,  1.3169, -2.0748,
        -1.5372, -2.8000, -0.5509, -6.2707], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6260, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.6407,  -4.8379,  -3.0329,   0.6410,  -3.7770,  -1.5758,  -4.4408,
         -0.6205,  -5.0496,  -1.2867,  -0.7873,  -3.0913,  -2.5309,  -1.5367,
         -0.8070,  -6.5440,   1.2897,  -3.4519,  -2.1126, -17.4260],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1309, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5260, -4.8565, -2.6884, -0.6916, -2.4398, -1.7953, -2.6968, -0.5997,
        -3.1804,  1.2981, -3.6835, -1.8965, -2.8615, -4.2771, -5.7311, -3.0512,
        -0.2774, -3.7329, -2.2016, -3.0367], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5463, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9902, -1.9966, -2.5194, -5.8973,  0.8191, -3.6667, -1.3904, -3.2065,
        -3.3527, -7.1334, -2.6619, -1.8252, -0.9585, -2.9476, -1.0448, -8.0211,
        -2.2450,  0.4997, -3.2531, -4.1038], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8448, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-28.4611,  -7.4483,  -3.5686,  -2.4443,  -6.2252,  -3.7536,  -4.1462,
         -5.6471,  -5.1604,  -0.8958,  -3.1966,  -1.9425,  -2.1705,  -2.3295,
         -2.6156,  -0.4763,  -5.5948,  -1.7112,  -2.8262,  -2.6366],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6625, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.6918,  -0.6506,  -4.7331,   0.0174,  -3.4713,  -4.9017,  -3.6939,
        -13.6979,  -5.5187,  -2.5677,  -3.7025,  -1.9933,  -2.0466,  -2.0482,
         -7.0186,  -4.0248,  -1.7908,  -4.1317,  -3.4702,  -3.1423],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9639, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.7768,  -4.8958,  -2.9401,  -6.3830,  -5.2217, -16.7395,  -8.3085,
         -7.0714,  -5.9910,  -7.6666,  -0.7184, -13.9184,   1.6136,  -2.6054,
         -3.4460,  -6.0044,  -4.9168,  -4.8767,  -4.7034,  -9.8397],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1205, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4666, -1.2427, -1.9286, -5.6372, -2.4687, -3.6029, -3.7705, -6.9665,
        -3.1703, -1.3754, -1.7721, -1.8213, -1.9595, -4.3336, -1.1578,  1.4903,
        -2.3499, -1.2143, -3.1137, -5.7900], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9826, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.1098, -2.5688, -2.7096, -2.3437, -6.5881, -1.9445, -0.1533, -2.5615,
        -2.3699, -1.1018, -2.4897, -1.7530,  0.8738, -2.0040, -3.2792, -1.1534,
        -2.6404, -2.8314, -1.0261, -5.5722], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2163, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5772, -1.6323, -2.4515, -5.3614, -1.7427,  0.6933, -2.5852, -2.4173,
        -2.7270, -2.4934, -3.0284,  1.8135, -2.1472, -3.8543, -1.9977, -3.4452,
        -3.9039,  1.1853, -2.2300, -2.1047], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1504, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7320,  0.4858, -5.4024, -3.3886, -2.1672, -3.2261, -3.2137,  0.7557,
        -4.8767, -1.2208, -3.4103, -1.5778, -5.5715, -0.9426, -3.4283, -2.1786,
        -1.3293, -2.0123, -5.6872, -3.0346], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8079, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.4178,  -4.2955,  -3.6173,  -6.2158, -10.2862,  -6.3845,  -2.7189,
         -5.4161,  -1.4644,  -4.6558,   1.8499,  -3.4644,  -4.1165,  -2.8359,
         -1.8739,  -6.1467,   1.0185,  -4.1408,  -2.2003,  -3.8173],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0600, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.5174,  -0.8783,  -4.1508,  -5.2482, -13.5191,  -4.3929,  -2.8477,
         -6.0931,  -4.3680,  -2.4710,  -4.8633,  -2.8473,  -3.4648,  -0.9353,
         -6.3081,  -1.7581,  -2.4813,  -2.0962,  -2.7243,  -3.8087],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2387, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.8848, -1.9763, -3.7027, -3.5222, -1.1347, -8.8194, -1.5912,  0.4506,
        -7.2486, -2.8414, -2.0208, -3.4184, -2.9511,  0.9207, -2.0314, -3.5943,
        -0.7229, -2.1773, -1.6865,  2.3146], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1934, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3838, -2.2464, -2.2072, -1.8882, -2.0992, -3.8207, -1.7017,  0.1877,
        -2.9377, -1.0718, -2.0672, -0.1362, -6.7449, -4.2236, -1.6128, -3.3984,
        -3.0087, -1.0741, -3.1799, -2.6791], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3647, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.4149,  -1.4722,  -3.4520,  -3.5838,  -3.2606, -16.1277,  -8.3039,
         -7.5098,  -1.9401,  -1.8360,   2.0660,  -3.0702,  -1.8761,  -1.4913,
         -4.1899,  -3.3677,  -2.5022,  -1.9478,  -2.5999,  -2.8746],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9377, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.7106,  -3.1929,  -3.2317, -27.5606,  -5.9360, -20.1679, -12.3065,
         -5.0810,  -7.6179,  -1.9691,  -0.5520,  -9.1783,  -2.0121,  -3.4566,
         -3.9540,  -2.7222,   0.7775,  -2.9141,  -2.3366,  -3.1539],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3138, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1069,  -3.5495,  -0.1502,  -9.4447,  -2.8847,  -3.4434,  -7.3744,
         -2.8728,   0.4918,  -6.6093,  -3.9431, -12.4506,  -7.8183,  -2.8029,
         -7.3913,  -1.6116,  -3.0613,  -2.6208,  -2.4806,  -2.1450],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2635, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3437, -1.8516, -1.2344, -3.6529, -2.1482, -0.3230, -4.2992, -2.1603,
        -2.0054, -1.4569, -3.3704,  1.4579, -2.3943, -1.7017, -2.6541, -5.1802,
        -3.1938,  1.3508, -2.4937, -2.0650], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1860, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4950, -1.4034, -1.7827, -2.2588, -2.0279,  1.5574, -3.0358, -1.0706,
        -4.2495, -1.9081, -4.5460,  1.0962, -4.3303, -1.3127, -2.4232, -1.7082,
        -5.4060, -3.7134, -0.8069, -7.8500], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4337, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6879, -1.7416, -3.8478, -0.2409, -4.8183,  0.6969, -5.0345, -2.1592,
        -9.9774, -5.2375, -7.1905, -5.6396, -2.1946, -4.4758, -2.5618, -1.6168,
        -2.7832, -1.3104, -5.0203, -5.1848], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6513, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8613, -2.7525, -1.7406, -5.8084, -1.6275, -0.4838, -3.4337, -1.5556,
        -2.4732, -1.1780, -5.3447, -1.3331, -0.7626, -3.2532, -3.0481, -2.7779,
        -5.3659, -2.5028,  1.7109, -4.3712], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5982, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3560, -1.5449,  0.2608, -4.8134, -3.4742, -2.5735, -2.1336, -6.4951,
        -1.6488, -3.1082, -2.7287, -1.1908, -2.3490, -1.1694, -2.7593,  2.0268,
        -4.1170, -1.9057, -2.8929, -0.8152], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3394, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3173, -2.4485, -4.1566, -4.5043,  0.0616, -5.6301, -1.5144, -1.5557,
        -3.9950, -1.4145,  1.2884, -6.2583, -1.4359, -1.5855, -1.7930, -7.9594,
        -3.9164, -1.7233, -1.7159, -1.4132], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7994, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1986, -2.4778, -1.8863, -0.7519, -5.8154, -2.2389, -0.5143, -3.1855,
        -3.2806, -1.7051, -2.9325, -2.7416, -0.3555, -4.4470, -4.0496, -8.0613,
        -7.8297, -3.4257, -6.9547, -2.1947], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5523, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7847,  -1.3539,  -2.9337,  -3.7303,  -2.3422,  -1.3664,  -4.3438,
          0.0779,  -3.3929,  -2.3030,  -3.5702,  -4.9420,  -1.9230,   1.0163,
         -4.3263,  -2.9277, -20.9003,  -4.2862,  -7.5613,  -3.0825],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8488, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1592, -0.6079, -5.4999, -1.0146, -0.5137, -3.6766, -0.8255, -2.8488,
        -4.5676, -1.9258,  0.0470, -5.0730, -1.2484, -2.5242, -1.9901, -5.9667,
        -1.3207, -1.4885, -2.4000, -1.3083], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3456, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.7050, -5.0458, -2.8629, -1.3608, -2.2582, -1.8568, -1.4295, -3.3348,
        -2.7768,  1.0986, -3.6293, -1.3812, -3.3848, -4.9521, -2.7337,  0.1854,
        -1.8436, -2.1475, -3.0259, -1.0331], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2239, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.5514,  -3.8004,  -4.3181,  -3.0738,  -1.1886,  -2.1579,  -1.1104,
         -5.5904,   1.8903, -23.4968,  -1.0180,  -2.9959,  -5.2097,  -4.0192,
          0.8696,  -1.4587,  -1.9950,  -3.6088,  -2.0923,  -7.1984],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6562, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3017, -5.1596, -2.5623, -3.1777, -2.4057, -4.9999,  0.3291, -5.3975,
        -1.3678, -3.1571, -3.1009, -7.0606, -3.6101, -2.3791, -7.1664, -2.0833,
        -1.4492, -2.7889, -1.4767,  1.1433], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0086, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6975, -1.7022, -3.2341, -3.2814, -0.3825, -6.7923, -6.9327, -1.8864,
        -4.8784, -2.8077,  1.2286, -2.3070, -3.3757, -2.9059, -2.1518, -6.1539,
        -3.2006,  0.0682, -2.1580, -1.0864], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8819, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3063, -2.5308, -6.4923, -4.4637, -1.4374, -2.4346, -1.8425, -1.7367,
        -4.7848, -1.3788,  0.8543, -2.5012, -0.7308, -2.8037, -3.5683, -1.5306,
         0.7552, -2.8994, -2.2335, -2.0997], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3583, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.2301,  -2.3046,  -3.5742,  -1.4844,  -2.2444,  -4.3896,  -2.6472,
          1.2806,  -3.3148,  -2.2645,  -7.4171,  -6.2629, -18.2277,  -1.6243,
        -12.0048,   0.5149,  -5.2564,  -6.5306,  -9.6074, -11.3750],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9982, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0369, -3.2984, -0.5050, -5.1785, -2.1199, -4.5983, -3.7321, -3.0246,
         1.2602, -3.4039, -3.3622, -3.1992, -3.4476, -4.5585, -1.5344, -1.1306,
        -3.2072, -1.9776, -5.7733, -3.0556], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9942, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5328, -3.1407, -2.9372, -4.5616, -2.5502, -0.8886, -1.3763, -6.1287,
        -2.8320, -0.4813, -2.7307, -2.1071, -1.9260, -2.1951, -2.2071,  1.0657,
        -3.0306, -1.3913, -2.6537, -1.5243], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4565, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.1313,  -8.4707, -13.4173,  -4.3347,  -5.2061, -10.3047,  -3.6555,
         -6.9985,  -6.5746,  -9.5588,  -7.6965,  -4.4665,  -2.6405,  -3.1630,
         -3.9908, -10.7068,  -5.5562,  -6.7337,  -3.2474,  -4.9402],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5897, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.6912, -10.8676,  -4.6582,  -1.7702,  -2.2453,  -2.1115,  -0.1345,
         -4.4183,  -3.0202, -13.3146,  -3.7972,  -7.2760,  -0.5390,  -5.4948,
         -1.7086,  -1.3824, -11.4352,  -1.6075,  -3.8072,  -5.2780],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2088, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3340,  -1.8298,  -6.9644,  -1.1202,  -0.2450,  -2.7954,  -1.6609,
         -2.5731,  -2.5826,  -6.3820,  -2.6881,  -1.2906,  -5.0029,  -1.8521,
         -2.9220,  -1.7254,  -3.6283,   0.6005, -14.5996,  -3.7984],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3197, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2543, -2.1044,  1.1892, -6.5153, -2.4563, -3.1722, -3.5801, -2.9163,
         1.1971, -1.3734, -2.3455, -1.2161, -2.6417, -1.3662,  1.5731, -2.4070,
        -1.8067, -3.3605, -2.5137, -4.3763], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1723, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8808, -4.9488, -2.0439, -0.4082, -2.2437, -1.0766, -2.7156, -2.3079,
        -3.0077,  1.6259, -3.7454, -2.6466, -2.0693, -1.0298, -7.2085, -1.8580,
        -2.6400, -1.7054, -3.0281, -4.6533], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5296, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4693, -5.7903, -3.6534, -3.0625, -6.2161, -3.9143, -4.1499, -2.7333,
        -4.5330, -2.6376,  1.1066, -1.7295, -1.2160, -2.0642, -4.3005, -2.2276,
         0.7035, -4.4764, -4.0859, -1.7208], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9085, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.3641, -5.9953, -2.2548, -3.1738, -1.9836, -5.2279, -1.3101, -2.7657,
        -3.4418, -0.4955, -2.3112, -4.5532, -2.1643,  0.8720, -7.1300, -1.1573,
        -1.9910, -0.8404, -4.0299, -0.2039], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4397, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0185, -4.5734, -1.6479, -2.2774, -5.1923, -2.4849, -2.9817, -3.0061,
        -4.0633, -3.9386,  0.2812, -3.8800, -1.4103, -3.7092, -5.3331, -2.2162,
         0.6069, -5.3332, -2.4386, -2.5284], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9072, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7240, -1.9322, -2.0231, -4.6518, -1.4144,  1.1085, -1.3655, -2.5215,
        -2.9668, -2.8259, -4.4052, -0.6837, -2.1264, -5.6980, -2.7896, -2.4012,
        -2.7195, -5.8409,  0.0559, -1.5098], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4717, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1331, -4.5995, -3.6118, -0.6440, -3.7051, -1.4166, -0.2888, -2.6756,
        -1.8124,  1.4775, -2.4741, -3.4983, -3.2702, -2.4955, -5.1470, -2.4321,
        -0.6751, -4.6603, -3.6199, -3.2356], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5959, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8537, -2.3749, -3.4740, -2.6956, -0.3132, -1.7749, -1.4697, -3.1527,
        -0.7929, -6.1314, -0.4054, -1.4021, -4.0644, -0.5774, -1.9113, -3.1758,
        -5.6790, -1.6512, -1.6959, -1.7775], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3186, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9969, -2.8511,  0.9562, -2.7454, -2.2886, -2.0948, -5.0305, -2.7348,
        -4.6850, -5.4443, -3.5690, -4.3294, -3.6019, -4.6286, -5.7398, -2.7997,
        -3.6198, -0.3264, -1.9170, -2.3239], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2885, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7383, -4.6384, -5.6798, -9.8797, -6.4934, -1.2846, -3.9729, -3.1252,
        -4.1429, -7.7988, -6.6788, -2.4417, -4.3828, -2.2828, -7.9935, -7.6786,
        -4.8575, -6.8114, -6.2367, -3.0209], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1069, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.8768,  -3.1073,  -2.6634,  -1.7708,  -1.5996,  -2.1088,   2.3551,
         -1.8749,  -1.9750, -10.9669,  -5.4610,  -3.7916,  -5.5972,  -2.6821,
         -4.7314,  -2.8807,  -3.5693,  -1.8659,  -1.6983,  -6.8240],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0968, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.3206,  -2.2388,  -1.2080,  -2.7413,  -1.7227,  -6.2468,  -3.5993,
         -0.8108,  -3.1822,  -3.6490,  -2.0444,  -6.5287,  -2.6892,   0.3840,
         -2.4767,  -2.5305, -15.9244,  -4.6326,  -5.3744,  -1.6983],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3797, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4148,   0.8901,  -3.3731,  -2.4211,  -3.4117,  -5.0650,  -3.9729,
         -2.7968, -10.9636,  -4.7585,  -0.9495,  -1.1384,  -6.8979,  -3.2952,
         -1.7564,  -3.5931,  -0.7509,  -2.5063,  -1.4908,  -5.0451],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2856, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7725e-01, -3.1578e+00, -1.9548e+00, -3.4699e+00, -3.7572e+00,
        -2.4845e+00, -7.6838e-03, -3.5917e+00, -4.5554e+00, -1.8305e+01,
        -7.3977e+00, -7.5882e+00, -5.2572e+00, -8.1603e+00, -3.8937e+00,
        -4.7308e+00, -2.9268e+00, -2.6569e+00, -7.2352e+00, -2.6024e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7155, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-29.7683,  -3.4662,  -3.4447,  -8.4295,  -3.3458,  -3.4336,  -9.0017,
         -4.5540, -15.0586,  -4.0614,  -8.2063,  -1.3531,  -8.4584,   1.7298,
        -10.1110,  -1.5861,  -3.1737,  -2.8178,  -5.0370,  -3.5338],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3556, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2119, -2.3088, -1.4564,  2.2750, -3.4747, -1.5214, -1.9261, -2.6736,
        -5.1085, -1.2118, -4.0534, -3.9386, -0.7583, -0.9940, -2.1117, -3.3250,
         1.6125, -2.1093, -1.4275, -2.7758], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9750, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.2692,  -5.5271,  -6.0467,  -6.3285,  -0.9198,  -3.5132,  -2.8489,
         -3.4821,  -5.2899,  -4.7452,  -0.9210,  -2.8109,  -3.1697, -10.5092,
        -10.0437,  -3.1200,  -7.3469,  -2.9837,  -3.2289,  -4.5360],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4820, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4444, -3.9431,  0.8382, -6.0954, -1.2874, -3.4349, -1.7921, -4.0612,
        -1.3439, -0.2366, -2.7519, -1.5816, -4.0816, -0.2384, -7.1388, -1.6168,
        -1.2660, -4.2271, -2.4767, -1.4861], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6333, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.9797,  -6.4175,  -8.0789,  -5.7402,  -2.7856, -16.2555,  -1.6609,
         -4.4527,  -4.2477,  -0.6907,  -3.0947,  -6.5897,  -1.9991,   1.0109,
         -7.3054,  -4.8275,  -3.0788,  -3.3588,  -3.3701,   0.5070],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4208, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.2443,  -2.2341,  -5.1502,  -3.8967,  -2.4357,  -3.1173,  -1.9773,
          1.8834,  -3.1605,  -2.8935, -17.7244,  -4.8867,  -7.4014,  -5.3734,
         -5.3247,  -5.1058,  -5.2514,   1.0676,  -6.1367,  -3.5256],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3444, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3850, -2.7606,  0.7948, -3.5902, -1.0189, -3.3345, -3.4023, -5.5888,
        -3.4500, -1.4094, -1.7880, -1.7508, -1.3728, -3.5553, -2.1864,  1.4724,
        -1.6482, -2.7678, -1.5667, -4.6887], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4999, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4350, -2.2840, -3.4258, -2.2645,  1.9042, -3.7591, -2.3681, -3.3326,
        -2.4554, -4.2565, -0.8310, -4.5991, -3.5735, -1.6203, -2.4522, -4.7445,
        -3.5779,  1.2581, -3.4481, -1.6352], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4950, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8801, -4.0130, -4.7412,  0.4295, -3.2575, -2.8864, -9.8188, -5.7857,
        -3.7706, -6.9996, -3.6858, -4.0143, -1.6218, -4.2062, -3.1691, -3.3038,
        -4.5152, -4.4471, -1.6423, -3.1950], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8762, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9018, -2.9263,  0.3712, -1.9830, -3.5500, -2.7476, -4.5973, -2.4516,
         0.6372, -1.8565, -0.9850, -5.2930, -3.2216, -2.1244, -0.1062, -3.0827,
        -2.5734, -3.8288, -2.2240, -5.0661], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5755, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1921, -2.3909, -5.4190, -2.1872,  1.2443, -2.4897, -0.9628, -1.2827,
        -1.6778, -1.6531, -1.1698, -4.5813, -3.1016, -2.4804, -3.9613, -4.2331,
         0.1044, -6.8198, -1.7720, -2.4631], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4244, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.4225, -2.7598, -4.4218, -3.2699, -2.3106, -5.1213, -2.7881, -0.1554,
        -3.7097, -1.1502, -2.7438, -5.2473, -1.5064,  0.5517, -3.2037, -1.6688,
        -2.6850, -4.1262, -6.1313, -1.6985], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6862, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2034, -3.9409, -3.2043, -3.0786, -4.1540,  1.2897, -1.3854, -1.2484,
        -1.6432, -1.0899, -7.3571, -1.4232, -3.1117, -5.7194, -1.3414, -3.5772,
        -1.3493, -5.4283, -1.2439, -1.8217], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6516, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.8792,  -6.3459,  -0.8239,  -1.6633,  -4.4556,  -2.4009,  -0.6499,
         -1.8729,  -2.9270,   0.6777,  -2.1398,  -3.9425,  -4.0333,  -3.1525,
         -6.3836, -13.6187,  -6.6949,  -2.2977,  -5.4649,  -3.6658],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6867, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1371, -12.8899, -14.7870,  -6.9131,  -2.7246,  -3.1049,  -9.8425,
         -4.4311,  -2.9635,  -4.7938,  -5.4599,  -3.5377,  -3.2326,  -4.4404,
         -1.1030, -14.2004,  -4.1328,  -6.0576,  -9.5341,  -5.4160],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0851, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4274, -3.1593, -3.1799, -1.8386,  0.9987, -3.7124, -3.9292, -4.0868,
        -0.5776, -5.2609, -1.1749, -1.2030, -2.9904, -2.0733, -3.0477, -4.1334,
        -3.5110, -0.3286, -2.6575, -2.0240], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5159, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.8356,  -8.9717, -22.2107,  -6.1917,  -5.4220,  -6.6314,  -8.4113,
         -8.5208,  -6.8412,  -4.3275,  -6.2535, -24.6101,  -4.5855,  -0.4172,
         -7.3857,  -4.3069, -11.0349,  -6.4434,  -5.4919,  -6.2804],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.3087, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6712, -3.4657,  0.0689, -5.7342, -3.9057, -1.8580, -3.3206, -1.9144,
         0.9783, -2.6980, -1.3435, -2.6483, -1.9471, -2.5906,  1.0434, -2.8158,
        -1.7516, -3.1411, -2.7919, -3.2755], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2391, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6392, -3.6669, -1.5939, -1.0121, -1.8796, -0.9124, -6.5446, -2.6523,
        -0.3056, -2.9529, -1.6488, -2.5283, -5.1096, -1.9290, -0.1399, -3.1213,
        -1.6159, -2.2387, -3.0642, -4.6995], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5127, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0616, -2.6658, -2.4793, -2.3598, -5.7508, -2.5523, -0.5626, -3.4636,
        -0.6413, -3.6172, -2.9790, -2.2818,  1.4999, -2.1371, -1.4605, -2.5077,
        -2.2327, -7.0092, -1.2655, -0.5418], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4035, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2417, -4.9127, -5.1656, -6.6836, -2.3254, -3.4818, -3.1102, -4.8852,
        -9.3642, -2.8918, -7.2508, -6.7745, -3.8194, -2.8165, -7.1524, -4.0053,
        -3.9160, -6.5697, -4.7331,  0.0302], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7035, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5957, -2.7135,  2.5223, -5.4071, -2.2588, -2.6508, -1.5856, -5.5227,
        -2.0587, -0.7308, -4.5333, -2.6547, -2.2960, -1.6369, -6.4547, -0.9440,
        -1.2976, -4.2601, -1.9189, -3.5426], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6270, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.2408,   1.0413,  -3.1312,  -2.6386,  -2.7660,  -3.6542,  -5.1847,
         -3.3038,   0.8569,  -3.5176,  -1.6273,  -3.7699,  -5.6708,  -3.4160,
          0.9547,  -4.0307,  -4.6065, -16.9156,  -3.8666,  -6.3776],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6932, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.4908,  -5.1602,  -2.6933,  -4.5386,  -4.7788,  -1.6987,  -4.0126,
         -1.0144,  -5.1771,  -3.0732,  -0.8815,  -5.9720,  -3.8108, -12.7138,
         -3.3852,  -7.1408,  -4.9491,  -7.6597,  -1.4786,  -4.5561],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4093, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.1841,  -3.6848,  -9.7180,  -2.7048,  -2.7030,  -1.1438,  -3.8864,
         -2.5016,  -1.8429,  -6.6463,  -3.9328,   0.2704,  -5.2418,  -1.4786,
        -15.7515,  -6.4675,  -8.8887,  -7.5048,  -0.9349,  -8.8524],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1399, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.6820, -2.4945, -3.3565, -2.5496, -2.7947, -5.8100, -1.7920, -0.1847,
        -5.6722, -2.0248, -1.7392, -3.3027, -4.4446,  0.1299, -4.7154, -2.5529,
        -5.6773, -2.4984, -6.0969, -3.3907], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0143, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5630, -3.0382, -0.9667, -7.5395, -1.4325, -1.0545, -2.9594, -1.7372,
        -2.6228, -1.2655, -3.3998,  0.0691, -2.0535, -6.0036, -2.7712, -1.5519,
        -4.3667, -2.1392, -1.0753, -3.3911], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5931, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4868, -4.2703, -1.9361, -3.6516, -5.4209, -1.3818, -3.2457, -3.2682,
        -2.0359,  0.6864, -3.0813, -2.0508, -2.9694, -2.0788, -6.8663, -2.6472,
        -1.4930, -2.3083, -1.4050, -1.6541], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7283, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4150, -2.8300, -1.7589, -5.2191, -1.6805,  0.4104, -2.5071, -0.5955,
        -1.3883, -2.0263, -2.9563,  1.2498, -2.6123, -2.4488, -3.5752, -5.4314,
        -2.9378, -0.9266, -3.8882, -2.2339], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3386, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3407, -1.1773, -0.9913, -3.0762, -1.7101, -3.2649, -2.7921, -5.9763,
        -1.3080, -0.7310, -2.5018, -2.1258, -0.7207, -4.4387, -2.9822,  1.6784,
        -3.7640, -1.3753, -2.5695, -0.8326], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3000, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.1757, -3.3313, -2.9090, -2.9773, -2.3582, -5.0924, -4.9788,  0.9186,
        -3.7567, -1.7954, -2.5575, -4.9787, -5.8687,  0.5546, -5.5938, -2.3387,
        -2.5819, -2.5495, -6.0282, -3.4773], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0762, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7016, -2.4192, -5.3649, -1.5507, -0.2946, -1.4416, -2.1126, -2.8828,
        -1.6637, -7.0864, -3.6816, -1.9458, -4.3188, -3.9778, -2.5359, -5.3819,
        -2.2983,  0.8157, -4.0862, -4.0879], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9508, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8055, -3.3884, -2.8499, -4.2246, -6.2706, -3.6307, -0.4304, -3.4873,
        -2.3199, -0.4188, -2.2918, -2.1723,  0.8957, -4.6543, -2.3361, -2.7996,
        -1.3760, -9.1871,  1.6435, -2.5323], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6318, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.5301,  -4.5328,  -5.8379,  -1.1706,  -4.5682,  -3.3339, -12.8718,
         -3.4718,  -8.2098,  -1.3684,  -4.4150,   0.3796,  -4.0101,  -3.1833,
         -3.4407,  -2.6541,  -4.3622,   0.4160,  -2.9712,  -1.2241],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7680, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1894, -4.0264, -7.1503, -7.7011, -3.3564, -2.4241, -1.2979, -2.9519,
        -1.8825, -3.9437, -1.3171, -7.8618, -7.9516, -8.3077, -6.5240, -2.3558,
        -4.4406, -1.3734, -2.6381, -1.7520], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1723, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.0759,  -4.0599,  -4.4719,  -3.3324,  -2.6118,  -6.3168,  -1.9725,
        -25.5943,  -4.6129,  -6.4720,  -7.5716,  -4.2985,  -5.3719,   1.3450,
        -18.9124,  -2.9867,  -4.4870,  -0.5727,  -4.1933,  -1.1281],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4849, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0356, -13.9500,  -3.4493,  -7.2261,  -1.1928,  -3.9823,   1.3192,
        -21.8753,  -2.5743,  -3.2431,  -6.3101,  -6.5170, -22.8360,  -3.1782,
         -5.1882,  -6.2563,  -3.7780,  -8.2277,  -3.9269,  -1.3847],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3906, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.9654,   0.7523, -13.9356,  -2.4974,  -1.6235,  -3.9317,  -1.0341,
          1.8436,  -5.6019,  -1.0140,  -2.1065,  -5.0281,  -2.7045,   1.0917,
         -6.3679,  -0.8603,  -2.3000,  -2.5498,  -5.6304,  -0.5081],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8986, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2249, -1.2031, -3.9459,  0.0423, -1.9789, -5.6540, -6.4830, -4.5828,
        -4.6527, -4.5979, -0.2092, -3.2083, -2.6729, -1.2556, -2.3868, -4.5836,
         1.5445, -2.5724, -1.8350, -5.6160], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9538, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0392e+00, -7.4582e+00, -1.9127e+00, -3.4683e+00, -6.1999e+00,
        -2.0458e+00, -1.2407e+00, -2.6132e+00, -1.5522e+00,  3.2365e-03,
        -2.3682e+00, -6.5239e-01, -1.4500e+00, -4.0195e+00, -2.5340e+00,
         9.4880e-01, -2.1144e+00, -1.1302e+00, -2.5597e+00, -1.5159e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2961, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3216, -1.3765, -1.4370, -3.9021, -1.6125,  0.6286, -2.4476, -2.0835,
        -1.6741, -2.3875, -1.6268, -0.0623, -2.3919, -2.2569, -2.7949, -0.3350,
        -6.4780, -0.5356, -2.0277, -3.6276], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1375, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.3387, -1.7870, -2.3150, -4.3248, -1.8921,  1.2605, -2.5492, -0.7210,
        -2.1111, -3.1355, -6.8435, -1.2612, -4.2225, -4.2713, -1.6824, -4.7004,
        -0.6208, -9.2540, -1.4269, -2.6333], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7415, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.2279,  -3.4581,  -1.0369,  -2.2560,  -0.3604,  -1.1265,  -0.9431,
         -2.7032,   1.7316,  -3.1032,  -3.2624,  -3.0592,  -1.8241,  -5.1698,
         -4.4786,  -1.4298,  -5.5742,  -3.2466, -14.1578,  -3.4593],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3073, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5572, -1.4199, -2.1082,  0.6523, -5.7042, -1.8116, -2.5164, -6.2848,
        -3.4808, -2.4149, -4.2446, -3.2841, -3.0769, -3.6812, -4.2332,  0.2116,
        -6.1143, -2.8991, -4.0127, -6.8438], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5412, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.1214,  -2.8057,  -0.9572,  -2.9308,  -3.1930,  -2.0151,  -0.5223,
         -3.0873,   1.2738,  -1.8583,  -3.0426, -12.1644,  -6.2261,  -3.5373,
         -7.2835,  -0.7915,  -8.8400,   0.0908,  -2.5148,  -2.0001],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3263, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0188, -6.7312, -4.2120, -1.5334, -1.8456, -3.1440, -2.8253, -1.6866,
        -4.0271,  0.5550, -3.0202, -1.9443, -4.3028, -0.8664, -5.7179, -1.6215,
         1.2380, -4.8404, -0.8236, -1.8608], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6115, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3064, -3.0980, -1.8577, -2.3069, -1.4178, -5.1526, -1.9188, -2.0603,
        -2.7061, -1.9840, -1.9804, -5.5885, -2.1411,  1.5439, -3.6071, -2.4682,
        -2.2021, -2.0285, -6.4888, -3.1533], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6461, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.6217,  -1.9859,  -0.4962,  -6.4124,  -0.6377,  -1.5677,  -3.0775,
         -2.0787,  -2.0909,  -4.8932,  -3.8538,   0.5701, -18.5175,  -4.9128,
         -2.3184,  -3.9687,  -3.9459,   0.8864,  -3.1280,  -1.3048],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2678, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.8884, -2.5456, -0.6954, -2.6802, -1.5345, -3.4099,  0.6187, -1.5202,
        -2.1205, -3.0352, -3.0843, -3.8475,  0.9418, -2.6257, -1.8156, -3.7173,
        -4.0065, -5.3853, -0.5632, -2.0967], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0617, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.3558, -11.8349,  -5.5066,  -9.4106,  -5.7222,  -6.2296,  -1.7470,
         -3.6810,  -4.9672,  -4.5206, -14.3483,  -3.9379,  -2.2616,  -3.5592,
         -2.4094, -14.9137,  -5.9787,  -6.8575,  -6.4596,  -1.3882],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1045, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.2609,  -2.7031,  -6.7889,  -5.6612,  -3.5340,   0.3489,  -2.9975,
         -3.4169, -27.9099,  -4.7301,  -8.5493,  -0.5996,  -5.4371,  -1.5866,
         -1.1023,  -4.7089,  -2.2869,  -2.8607,  -1.6813,  -4.1117],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7289, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4363, -4.3185, -1.6130, -2.3443, -1.9664, -1.8147,  1.0223, -5.1836,
        -1.7471, -2.7429, -1.5207, -5.9706, -2.8337,  0.4130, -3.0597, -0.8498,
        -3.0338, -1.4445, -3.8954,  0.8239], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1758, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2735, -5.4299, -1.0324, -1.5712, -2.2322, -1.3729, -2.7915, -3.3880,
        -2.0985, -1.1234, -4.0486, -0.5850, -2.7566, -4.0299, -6.0263, -1.6951,
         0.6417, -3.4077, -2.9495, -4.4365], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6303, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.8398,   0.7865,  -5.6388,  -1.4269, -21.0695,  -4.4793,  -8.8053,
         -0.6260,  -6.8678,   1.0869,  -7.5577,  -3.5799,  -2.0741,  -7.0272,
         -3.6603,  -0.2791,  -7.1799,  -5.9173, -37.4783,  -5.9437],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5289, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9251,  1.3650, -5.6469, -2.3269, -3.0747, -6.2281, -2.7565,  1.8798,
        -4.7418, -1.2171, -8.6596, -6.8958, -3.9415, -6.9944, -1.0393, -6.0282,
         1.4746, -5.9598, -4.4514, -2.2339], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5201, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8576, -0.6437, -1.9204, -1.7366, -1.3617, -3.7360, -1.3057, -3.3809,
        -8.4402, -2.0471, -2.6744, -3.0324, -4.6243, -3.1190, -1.3651, -2.1297,
        -1.2828, -1.4230, -3.2183, -1.8091], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6054, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.8888,  -3.3504, -10.2746, -24.0927,  -4.8676, -12.4951,  -7.4679,
         -4.8980,  -3.3337,  -1.6583,   0.7216, -17.8069,  -3.5846,  -4.5862,
         -4.1346,  -4.6102,   0.5328,  -3.4478,  -2.1044,  -4.2622],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9805, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.5127, -4.9644, -2.0837, -1.6731, -1.9185, -1.2285, -1.6975, -3.4637,
        -0.9479,  1.0940, -3.2816, -1.9439, -1.8640, -2.9224, -7.1105, -4.6324,
        -0.7829, -8.4693, -4.1310, -2.1622], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7348, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4702, -1.4272, -0.5745, -2.2877, -1.0363, -2.0485, -2.1005, -5.8679,
        -0.7704, -0.7552, -2.1600, -0.6074, -2.5852, -0.5249, -3.8406, -0.3421,
        -6.6596, -2.2967, -2.4918, -0.6186], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2233, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4288, -2.6004, -5.0288, -2.9200, -0.4564, -3.1969, -1.9649, -0.6278,
        -1.6817, -2.6148,  1.2319, -3.4574, -1.6852, -2.4503, -3.5584, -4.1667,
        -1.9116, -8.6538, -1.7931, -1.0556], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5510, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2718, -2.6456, -2.3998, -4.2544, -1.4827, -4.0045, -2.1056, -3.9322,
         0.5505, -5.3731, -2.5076, -1.6893, -3.1313, -2.6247,  2.0412, -7.3563,
        -1.6698, -3.4718, -5.7014, -1.8390], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8435, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.8359,  -1.0262,  -1.5740,  -1.3390,  -1.6422,  -2.2182,  -1.8106,
          2.3238,  -3.1416,  -0.7499, -16.9318,  -4.7849,  -8.6173,  -0.8648,
         -4.1473,   1.6798, -21.6155,  -3.1225,  -3.4870,  -6.4302],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1668, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.6832, -1.8239, -1.8382,  0.8786, -7.8309, -3.0240, -2.6120, -0.0550,
        -5.5452, -0.5654, -0.0510, -3.9838, -1.8602, -2.3506, -0.7713, -5.1028,
        -3.8027, -0.1336, -3.9317, -1.6532], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7370, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0011,  0.6227, -3.8623, -1.7750, -1.9211, -1.9726, -4.7976, -0.7638,
        -0.4351, -2.6893, -1.5983, -1.9620, -2.6521, -2.1818,  1.4327, -2.2898,
        -4.3117, -2.0794, -2.4081, -5.2198], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1433, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.0361,  -7.1230, -13.3123,  -1.2262,  -8.1657,  -9.0242,  -5.3939,
         -4.5969,  -3.2762,  -3.5791, -10.4131,  -7.7648,  -2.9109,  -7.5991,
         -1.9130,  -4.4203,   0.8841,  -3.3904,  -2.8333,  -3.4294],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3762, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3024,  1.1057, -4.4588, -3.4026, -3.3564, -1.1790, -5.3130, -1.2283,
         1.7487, -3.3461, -2.4038, -2.0347, -4.1525, -1.7938,  0.3261, -5.1587,
        -1.0018, -3.4592, -4.2156, -1.7770], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5202, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7628,  -2.5308,   0.8942,  -5.1523,  -1.1514, -20.0234,  -1.7358,
         -8.9562,  -2.1581,  -4.3087,   0.1735, -11.9505,  -2.2513,  -2.7726,
         -1.5786,  -5.2302,  -1.4196,  -1.1955,  -2.2835,  -1.4098],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8902, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.3452, -4.0620, -2.0099, -0.9976, -2.9170, -3.8941,  0.3822, -3.4726,
        -2.5595, -1.2962, -3.3319, -2.6555, -0.7965, -2.4210, -2.2314, -2.4503,
        -2.5364, -4.8885,  0.2337, -1.0998], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1330, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.8125,  -1.9794,  -1.1196,  -3.1735,  -3.0024,   1.1210,  -3.5391,
         -1.5692, -12.7744,  -2.7205,  -7.6111,  -0.5235,  -4.3073,   2.1577,
         -5.6312,  -2.4174,  -3.8647,  -2.4354,  -4.3933,  -3.0161],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1806, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.2974,  -1.7378,  -3.8444,  -1.8090,  -3.7186,  -3.8193,  -2.7191,
         -2.0508,  -3.0313,  -1.8135,  -2.9453,  -3.4856,  -2.0610,  -2.9831,
         -3.1241,  -2.3681, -10.4346,  -4.2841,  -9.5651,  -1.1532],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4123, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0963, -3.2137, -0.3157, -1.6439, -2.1550, -2.0764, -3.9241, -1.5845,
         0.8907, -3.1837, -2.3586, -1.9119, -4.6594, -3.6575,  1.0763, -2.3971,
        -1.8572, -1.5263, -4.1438, -1.1212], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1930, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.0452, -3.5492, -2.8004, -4.9230, -2.9913, -1.6770, -3.5219, -2.6304,
         1.2522, -2.1701, -2.7667, -3.1730, -1.1130, -4.6037, -1.3719, -0.5639,
        -1.8151, -1.5905, -3.0088, -2.5474], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6805, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8545, -8.2119, -2.7435, -6.5618, -2.4101, -8.7171, -3.6272, -1.4446,
        -2.5990, -2.2332, -7.6158, -4.1454, -0.7053, -2.1309, -2.6305, -1.6172,
        -2.6269, -1.3042,  0.9913, -4.3679], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5778, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2000, -1.1255, -6.1190, -3.1622,  0.5704, -2.5045, -1.3290, -2.8244,
        -3.0393, -8.1428, -1.5372, -2.5834, -2.8135, -0.7551, -1.2751, -4.6250,
        -1.3816,  0.3913, -3.2722, -2.4936], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6111, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.2577, -2.0179, -1.3249, -2.1778, -1.3634, -4.9371, -1.0492,  0.4709,
        -6.0870, -0.6310, -3.0827, -1.7530, -4.4457, -3.0512, -8.8274, -7.6894,
        -4.6276, -6.9734, -4.5996, -5.7174], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4313, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.7367, -5.7145, -2.1525, -1.9145, -5.8989, -1.0549, -1.2437, -2.1970,
        -6.5348, -3.0758, -1.3453, -2.3740, -0.8644, -1.9269, -2.4231, -5.5858,
        -1.6999,  0.0159, -4.3639, -2.7894], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6940, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1452, -2.1912, -2.5957, -3.0828, -5.6393, -4.8669, -6.7725, -4.0887,
        -6.1283, -3.7697, -5.2540,  0.6364, -5.7210, -3.7780, -5.2717, -8.2779,
        -6.9565,  0.0916, -5.2086, -2.4541], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3737, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5936, -2.2403, -2.4822, -3.9778, -1.8044,  2.4125, -5.9626, -2.3761,
        -2.3823, -1.7433, -6.8157, -4.1886, -1.8851, -3.8625, -2.6742, -1.4216,
        -2.9616, -1.4301,  1.4495, -1.3848], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4662, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4955, -1.7438, -3.0184, -2.0227, -3.0699, -1.9975, -3.9138, -1.4199,
        -2.0673, -3.0657, -1.4097, -2.6620, -5.3085, -2.2004,  0.7242, -4.1212,
        -3.9071, -1.7436, -2.5837, -3.8208], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7424, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2928,  -1.6001,  -0.7738,  -3.1277,  -1.4347,   1.2709,  -3.2245,
         -1.8757,  -0.6114,  -0.8615,  -4.9127,   2.2346,  -9.1383,  -3.3367,
        -40.6595,  -8.2289, -10.6816,  -1.8860,  -3.9126,   2.4542],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6799, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9454, -2.1420,  2.2967, -2.4004, -1.0104, -2.3737, -1.1588, -3.7423,
        -0.3587, -4.7797, -6.4312, -2.8953, -3.7211, -1.8033, -5.4377, -2.8546,
        -1.5712, -2.9286, -3.7172, -4.8510], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6913, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9035, -1.7334, -1.7192, -1.6841, -4.4456,  2.0028, -3.8097, -1.9592,
        -3.2722, -4.3774, -3.0125,  0.7846, -2.8529, -1.2478, -3.1279, -1.1023,
        -3.6177, -2.6177, -3.8583, -3.3111], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4933, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0352, -1.4380, -6.1532, -1.2028, -4.3052, -3.2870, -1.4176, -1.0341,
        -1.7722, -3.9198,  0.1525, -2.2006, -1.0380, -2.0978, -2.3239, -7.1796,
        -3.7876, -1.7018, -1.7354, -1.6366], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5057, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.5338,  -2.8491,  -2.4992,  -7.9443,  -4.2879,  -3.4273,  -4.9465,
         -4.0368, -19.0591,  -7.4654,  -6.9181,  -6.8748,  -0.7807,  -5.0378,
          0.5261, -11.1637,  -1.6895,  -3.2870,  -1.2293,  -5.1710],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0838, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7432, -2.4454, -9.6206, -2.7043, -1.5206, -2.7575, -0.7888, -5.5826,
        -2.8034, -1.3148, -2.3547, -4.3683, -1.8054, -3.4796, -6.2966, -4.4226,
        -0.9607, -7.7129, -2.4210, -2.4882], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4296, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1222,   0.9430,  -1.4004,  -3.0117, -12.8619,  -5.2568,  -5.1467,
         -6.8207,  -3.0545,  -3.0966,   1.0185,  -3.9440,  -2.2929,  -2.3351,
         -5.1906,  -2.4345,   0.8034,  -6.3099,  -2.7772,  -3.0088],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4650, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.2115,  -1.0266,  -2.5353,  -1.1270,  -5.3693,  -1.2116,   1.1064,
         -4.3966,  -2.0158,  -2.2742,  -3.4967,  -4.8371,  -2.1918,  -1.2973,
         -2.0909,  -1.1212,  -1.5752,  -5.4674,  -2.6934,  -2.2262],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8529, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-29.1486,  -7.4165,  -3.5483,  -2.1769,  -6.2573,  -3.6926,  -4.1115,
         -5.6650,  -4.8930,  -0.8418,  -3.1473,  -1.8391,  -2.1902,  -2.3180,
         -2.3824,  -0.3709,  -5.7274,  -1.5550,  -2.7094,  -2.5601],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6276, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8637, -2.7673, -3.8687, -2.2003, -4.9674, -6.8004,  0.0463, -5.4176,
        -2.1086, -1.7188, -1.8358, -4.5654,  1.8707, -2.1854, -3.8625, -2.9255,
        -3.0601, -3.8425,  0.7996, -0.6967], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6985, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.8525,  -4.3303,  -2.0410,  -2.1324,  -6.8354,  -2.5179,   1.4304,
         -4.0694,  -1.0472, -15.7644,  -7.9647,  -6.8568,  -5.7849,  -3.5916,
         -4.5008,  -2.6502,  -0.3449,  -2.7542,  -1.7609,  -2.8163],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9093, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.5241,  -3.9715,  -1.4349,  -2.2501,  -1.0765,  -6.8129,  -1.1674,
         -2.1804,  -3.7796,  -1.0960,  -1.3131,  -6.6562,  -2.3998,  -2.8927,
         -3.4839,  -1.0874,  -2.5439,  -1.7007, -10.4324,  -1.3510],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8053, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.2784,  -3.5312,  -3.5094,   0.3811,  -4.8562,  -4.5751, -13.7851,
        -12.8261,  -3.1734,  -6.1020,  -2.8382,  -1.6328,   1.2029,  -3.6737,
         -1.3470,  -1.9164,  -1.7858,  -2.3495,   1.7077,  -3.7838],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7836, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4782, -6.7649, -1.5906, -4.5331, -3.2701, -6.0057, -2.3900, -2.7116,
        -2.7431, -2.6982, -2.5481,  1.0015, -2.0084, -1.5288, -1.0606, -3.8387,
        -2.2524,  0.6948, -2.3028, -1.1031], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4566, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7021, -6.4368, -0.3777, -3.4202, -2.6149, -7.2451, -5.2800, -7.6254,
        -3.0099, -3.4800,  0.4259, -5.2016, -3.1169, -5.0430, -4.7470, -5.0068,
        -0.4110, -3.6296, -3.2624, -8.9790], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1082, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.0327,  -2.3285,  -6.5298,  -2.7110,   0.7788,  -5.6637,  -3.5656,
        -24.6109,  -9.6532,  -9.2562,  -5.6666,  -1.1844,  -4.7680,   1.8888,
        -25.9371,  -2.5389,  -2.9450,  -2.1368,  -4.8426,  -1.3317],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7518, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9532,  -3.4441,  -3.1531,   1.3228,  -3.2586,  -3.4346,  -0.7035,
         -8.7540,  -2.7416,  -2.5857,  -7.0634,  -2.4573, -26.1239,  -4.4352,
         -7.8574,  -4.0222,  -8.0821,  -1.4956,   0.4869,  -3.2221],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6989, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.5833,  -1.1849,  -3.6535,  -8.9642,  -7.2715, -11.5556,  -7.4655,
        -10.3912,  -5.8706,  -1.3124,  -4.0175,  -1.5848,  -1.9403,  -1.9617,
         -3.4358,  -1.8883,   1.4097,  -2.5291,  -1.7827,  -1.7201],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1352, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6407, -1.9907, -0.0407, -3.4210, -2.6609, -4.6185, -4.5920, -2.4817,
         0.5555, -0.7076, -1.4049, -2.0707, -2.8107, -2.1314,  2.2116, -4.2527,
        -0.8602, -3.9188, -2.8729, -3.0335], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2871, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8056, -5.3843, -1.4155,  1.4155, -3.4498, -2.9358, -1.7681, -2.1705,
        -2.1982, -1.5911, -4.9103, -0.5554, -3.7961, -4.4876, -4.3682, -0.3230,
        -7.1380, -3.3845, -1.4319, -3.2417], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7470, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.7983,  -2.2431,  -2.2328, -12.8777,  -8.0510,  -4.2067,  -6.5809,
         -2.3936,  -3.5323,  -0.7076,  -3.8718,  -2.5803,  -2.1459,  -5.0457,
         -3.5844,   0.4025,  -7.9897,  -3.6997, -20.3805,  -6.4958],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8709, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0690, -3.6774, -5.2487, -3.3134, -1.0005, -5.2815, -4.4317, -4.0778,
        -2.7220, -6.7386, -2.5294, -0.7625, -2.9443, -2.6586, -2.1310, -4.4918,
        -2.5951, -2.2272, -3.3253, -2.9757], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2101, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.1774,  -1.5055,  -5.3864,  -2.1815,  -0.4684,  -1.6626,  -1.0297,
        -17.5084,  -6.5807,  -6.9995,  -5.6865,  -2.8421,  -2.5769,   1.3243,
         -3.7464,  -3.7757,  -8.1352,  -6.9606,  -9.0945,  -9.2925],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7643, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.7303,  -9.6142, -10.1057,  -2.6296,  -5.5524,  -2.1799,   1.9467,
         -4.7007,  -1.3279,  -2.7465,  -3.1535,  -5.3180,  -1.2406,  -2.1122,
         -2.5978,  -1.7507,  -3.9710,  -2.2530,  -4.1900,   1.2610],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3483, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9612, -3.5005, -2.7432, -2.4454, -9.6206, -2.7043, -1.5206, -2.7575,
        -0.7888, -5.5826, -2.8034, -1.3148, -2.3547, -4.3683, -1.8054, -3.4796,
        -6.2966, -4.4226, -0.9607, -7.7129], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4072, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4230,   1.5069,  -3.3632,  -1.8544,  -1.9396,  -0.8869,  -3.3900,
          0.0811,  -3.8390,  -2.6673, -22.5392,  -6.2640,  -8.2525,  -0.7688,
         -5.1523,  -1.0005,  -1.4234,  -5.4697,  -1.7185,  -3.3325],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7348, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.8894,  -3.3123,  -1.9496,  -4.1855,  -4.0684, -25.9208,  -5.9121,
         -7.1535,  -5.3721,  -6.1177,  -4.1392, -11.0653,   0.9993, -10.8269,
         -5.0353,  -6.2181,  -6.4210,  -9.5715,  -3.6312,  -3.5208],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3156, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7448, -5.0340, -3.8621, -3.5137,  0.3708, -7.4813, -3.1028, -2.2970,
        -2.2106, -6.1089, -3.0896, -0.5642, -3.1570, -1.6395, -2.9618, -0.6072,
        -6.3371, -1.1941, -1.4749, -4.3935], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0702, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4078,  1.2425, -3.5381, -3.2127, -2.2454, -5.5448, -2.5291,  1.0515,
        -2.0698, -1.8500, -0.8666, -2.5162, -2.0261,  1.5011, -3.1759, -1.8328,
        -3.8486, -1.9397, -6.9630, -2.0146], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2893, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.5957, -1.6083, -1.2456, -2.0949, -1.1606, -3.1714,  2.0392, -3.4121,
        -2.2914, -3.3754, -1.0519, -6.2483, -2.9085, -0.6141, -1.7479, -0.8543,
        -2.8441, -1.8465, -6.6611, -1.0561], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0779, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7071,  -4.3394,  -0.2168,  -4.2310,  -4.8504,  -1.4080, -14.0454,
         -4.3605,  -8.3258,  -3.5779,  -3.1136,  -1.9109,  -5.0806,  -2.2883,
         -1.1195,  -2.9932,  -3.4569,  -4.4513,  -2.7411,  -5.1558],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0187, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.5304,  -3.8452,  -6.1435,  -5.9701,  -5.1871,  -6.0003,   1.4029,
        -24.0597,  -3.0387,  -4.7204,  -1.0906,  -3.3863,  -5.9597,  -0.9670,
        -10.7884,  -2.4554, -19.5965,  -5.1563,  -2.4871,  -6.9642],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0972, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1674,  -2.1083,   1.6572,  -2.4600,  -2.6514,  -1.4004,  -1.8585,
         -1.6722,   2.1660,  -3.3635,  -1.7368,  -2.1345,  -2.0376,  -4.5806,
         -0.3740,  -2.6153,  -5.7542, -20.2343,  -7.9276,  -6.3186],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3786, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1133, -1.9102,  0.5652, -6.3002, -0.7050, -3.5079, -1.0865, -6.6862,
        -0.3922, -1.0219, -5.9911, -1.8208, -4.3714, -0.7784, -5.2116, -1.5333,
        -1.2329, -3.5713, -2.3853, -2.0972], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8076, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6094, -4.1016, -0.3228, -5.3642, -1.0146, -2.3197, -2.6503, -5.5034,
        -2.6533, -1.2600, -2.7333, -2.1120, -2.5965, -5.5234, -3.5245, -0.1740,
        -2.9168, -1.0920, -4.1119, -2.6780], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8631, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.8911,   1.4392,  -4.7381,  -2.5900,  -5.5231,  -0.7981,  -5.7186,
         -0.8646,  -2.5663,  -3.6509,  -2.0272,  -3.4011,  -2.1013,  -5.5738,
         -1.6631,   0.5372,  -3.3286,  -2.4167, -17.2254,  -7.8724],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6487, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.0562, -3.8162, -5.6549, -2.8821, -2.3848, -4.5565, -3.8891,  1.0083,
        -4.9338, -0.9732, -2.2690, -6.1854, -2.1796,  1.7214, -6.5454, -4.2512,
        -4.5481, -4.4724, -6.5582, -4.0521], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3739, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.2083,  -6.8028,  -3.8512,  -3.8380,  -3.9248,  -3.5486,  -2.8079,
         -1.0272,  -2.7719,   1.5792,  -4.4672,  -4.5726, -19.5700,  -5.7112,
         -4.4718,  -6.5285,  -2.1683,  -3.6531,  -0.1832,  -5.3856],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4457, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6089, -1.5128, -3.3841, -3.9077, -4.1263, -9.9778, -5.7212, -1.6182,
        -4.0717, -1.5784, -3.3186, -4.7426, -1.9169,  0.7388, -3.1393, -1.6489,
        -4.2116, -2.2083, -3.1305, -0.5079], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1796, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3038, -3.2218,  0.8206, -3.6380, -2.7094, -5.7801, -9.4349, -5.3840,
        -6.6472, -3.6001, -3.2209, -2.3024, -2.3333, -2.9218, -2.9371, -5.2686,
        -2.2961, -1.5619, -7.2252, -3.2341], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8600, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2540, -0.7620, -1.3488, -2.5496, -1.9003,  1.1320, -1.9851, -2.8579,
        -1.5852, -4.0548, -3.1248,  0.6825, -1.8799, -1.1588, -1.1972, -4.9790,
        -1.0076,  1.2195, -4.4015, -0.3683], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7690, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9330, -3.2925, -0.5837, -1.5127, -2.7422, -6.0165,  0.8109, -2.8698,
        -1.6188, -1.0449, -1.8401, -5.3384, -0.0548, -4.1253, -2.8184, -1.3711,
        -0.7785, -6.9193,  1.6717, -3.5631], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3470, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2335,  -2.4135,   0.8868,  -3.4826,  -1.3868,  -2.2301,  -6.0956,
         -2.1591,   0.9246,  -3.9340,  -4.2579, -18.2870,  -2.6381,  -7.1498,
         -1.3342,  -4.9665,   1.4562,  -2.7995,  -2.6620, -11.9169],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8840, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4172, -0.7251, -2.2079, -4.3055, -2.2773, -0.1507, -2.8255, -2.3897,
        -3.8050, -3.9793, -2.7280, -0.4948, -6.8352, -3.2619, -2.0157, -1.9862,
        -2.7596,  1.2570, -8.8628, -1.3074], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7039, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3877, -3.3013, -1.8735, -5.5294, -2.7482,  0.1419, -3.0028, -2.3054,
        -3.0165, -6.3872, -3.6433,  0.1359, -2.4197, -2.1137, -3.3000, -2.3582,
        -1.1295,  1.8962, -2.8504, -1.5908], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4392, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9517, -8.9161, -4.8758, -9.3725, -2.2974, -3.1258,  0.5083, -7.3632,
        -1.5321, -3.4815, -2.4420, -5.9972, -0.8244, -0.5624, -5.5682, -1.9782,
        -3.1042, -1.3641, -4.4297,  1.6299], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4024, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1906, -1.9973, -3.0236, -3.0789, -2.5185,  1.8897, -2.6054, -8.1673,
        -5.9033, -6.6581, -5.5824, -5.3542, -3.2630, -7.1053, -8.1351, -2.9940,
        -0.9756, -7.9422, -1.1362, -3.8216], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0281, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.7532,  -3.5431,  -4.7214, -26.1830,  -6.8290,  -7.3328,  -1.5314,
         -2.6277,   1.6526, -15.0946,  -2.3248,  -2.7577,  -8.5622,  -7.8497,
         -3.8108,  -6.1401,  -3.3833, -30.5430,  -6.4212,  -7.0736],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.2162, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.6416, -1.7806, -2.1987, -1.5088, -5.4354, -2.3545, -1.0490, -2.5796,
        -6.3820, -2.0825, -3.2975, -6.5911, -1.4315, -3.3572, -3.8513, -1.3453,
        -4.4155, -2.7073, -1.8542,  2.0824], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5749, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3558,  -4.1291,  -1.6298,  -6.6555,  -1.8246,  -1.2171,  -3.9598,
         -8.2747,  -5.2884, -20.1529,  -3.6078,  -3.5489,  -2.4768,  -5.3315,
         -2.6207,   0.8992,  -3.2920,  -1.0679,  -3.1744,  -1.4888],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1099, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.2191, -6.8795, -7.2863, -1.0144, -5.9780, -2.1007, -5.5258, -3.0483,
        -2.8227, -1.0787, -5.5304, -0.9772,  1.1226, -3.9654, -4.0208, -2.7788,
        -1.0980, -4.0580, -3.8402, -5.4990], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7299, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2751, -5.4160, -3.0725, -2.8210,  2.1070, -5.6397, -1.8126, -1.3567,
        -3.1456, -1.5125,  0.5413, -1.4030, -2.0856, -2.6587, -4.0115, -1.3345,
         0.9330, -3.5337, -1.8372, -4.5425], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2439, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0873, -2.5416, -0.9989, -6.5476, -1.2014, -2.3406, -5.1796, -2.3536,
        -3.5612, -0.7564, -5.4410, -2.4025,  0.5877, -2.5808, -1.1362, -2.8736,
        -1.1488, -3.3232, -1.9685, -0.6535], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3754, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3826, -2.5886, -2.9088, -5.6027, -0.8183, -1.6681, -2.4178, -0.9976,
        -2.4358, -1.5585, -1.5093, -0.1458, -3.4215, -1.8462, -2.0738, -2.7558,
        -2.9754,  1.3795, -4.6911, -1.2250], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0822, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0942e+00,  1.0805e-02, -3.2325e+00, -2.5381e+00, -1.0978e+01,
        -6.7204e+00, -4.7289e+00, -6.7291e+00, -5.3571e+00, -3.8899e+00,
         1.4383e+00, -5.7709e+00, -3.7027e+00, -4.0614e+00, -6.5120e+00,
        -3.7322e+00, -6.2955e+00, -6.5529e+00, -2.4399e+00, -1.8798e+01],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1842, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6905, -2.2904, -2.5117, -1.4194, -3.9236,  0.3571, -8.7783, -1.6040,
        -2.3493, -2.5440, -1.7406,  1.4688, -4.5625, -1.2787, -1.5082, -1.5568,
        -5.8226,  0.9360, -3.4749, -3.0392], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5666, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4711, -5.8033, -8.5956, -4.3753, -6.4363, -3.2433, -4.6967, -2.7068,
        -5.8541, -2.9345, -4.0966, -4.1617, -4.7805, -3.7121, -2.0145, -2.8637,
        -2.1556, -4.3587, -1.9532, -5.3601], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2287, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.6807, -3.0532, -2.3914, -2.2374, -1.0554, -6.1239, -1.8102, -0.1771,
        -2.8622, -1.7323, -4.1847, -0.0307, -7.5009, -1.8862, -0.5229, -8.6482,
        -2.3487, -3.0849, -2.4836, -3.8652], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8340, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.2870,   0.8938,  -3.3450,  -2.6611, -14.5909,  -5.2586,  -9.8908,
         -7.6221,  -2.8078,  -7.6723,   1.1764,  -1.5773,  -1.4504,  -2.0354,
         -2.7485,  -2.6091,   1.6170,  -1.8287,  -0.8957,  -1.9027],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3248, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9231,  0.6652, -6.1257, -3.1132, -3.3887, -3.8780, -2.0681,  2.1035,
        -1.8810, -1.3190, -2.9183, -1.1160, -4.8905, -3.6232,  0.4872, -3.1017,
        -2.0436, -1.5599, -3.5656, -4.3973], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4828, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0776, -6.4261, -2.5185,  0.1584, -6.1740, -4.8625, -2.5601, -3.8661,
        -8.3744, -3.3715, -0.3368, -7.2311, -2.6304, -1.2863, -3.0969, -6.0320,
        -0.7657,  0.0791, -2.1921, -1.7352], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5150, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8390, -5.4645, -4.1277, -1.4741, -1.4309, -1.9257, -2.6265, -1.4403,
        -3.2561,  1.3865, -3.9190, -2.4611, -1.8677, -1.9964, -5.6172, -2.3868,
        -0.2086, -2.9082, -2.2303, -2.3181], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4556, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0844, -5.4242, -4.8832, -2.5004, -3.2685, -3.9697, -5.7220, -6.1492,
        -4.4636, -5.4971, -3.0697, -4.9167, -1.0102, -4.2727, -4.4250, -3.9474,
        -6.8240, -3.8606, -0.6970, -3.2972], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0641, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.9008,  -7.9019,  -1.9635, -12.2340,  -7.9407,  -5.1921,  -3.4672,
         -2.1889, -12.9268,  -7.9226,  -3.8679,  -6.4943,  -1.1558,  -5.0988,
         -1.7617,  -0.8703,  -1.9439,  -1.4391,  -2.9962,  -3.0366],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5652, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0790,   1.3482,  -5.5625,  -3.3964, -15.4635,  -5.6893,  -7.0036,
         -6.6952,  -4.7922,  -3.1339,   1.0430, -11.3739,  -3.4032,  -3.2154,
         -0.7310,  -5.5584,  -1.8558,  -0.5511,  -2.5773,  -2.5291],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2610, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1609,   2.1721,  -1.2428,  -4.6759,  -3.8776,  -1.5384,  -4.1420,
         -5.7192,  -2.6136,  -3.7622,  -4.4679,  -2.5546,  -2.0509,  -1.3894,
         -3.5617,   1.7776,  -1.6627,  -3.2485, -15.9440,  -4.5231],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3093, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7583, -3.1969, -2.7728, -3.0379,  2.0409, -3.5300, -1.9486, -3.4251,
        -3.8334, -4.0159, -3.7589, -0.8678, -5.1510, -3.7369, -2.2857, -4.8460,
        -1.4895,  1.2553, -3.4121, -1.4128], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5592, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-21.4675,  -2.2155, -12.5807, -63.7448,  -1.7070, -11.0229,  -3.2948,
         -1.9536,  -1.2604,  -4.5016,  -1.7240,   0.6623,  -4.1286,  -0.5617,
         -2.1261,  -3.2683,  -2.1005,   0.8559,  -2.7624,  -1.3779],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.0140, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4957, -0.2524, -2.4609, -1.0437, -1.5539, -3.2230, -1.4836,  0.5858,
        -4.0285, -2.4817, -2.6521, -5.4189, -2.6182, -0.2362, -8.0680, -1.6864,
        -2.6596, -1.0470, -4.6649, -0.7658], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3627, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.6402,  -2.2594,  -5.5698,  -3.3610,   0.1639,  -3.2979,  -1.9219,
        -21.4264,  -5.3038,  -5.6978, -10.5853,  -6.7873,  -7.7644,  -1.7251,
         -3.1829,   0.2896, -11.9404,  -2.6596,  -3.1632,  -3.0393],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1436, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-23.0939,  -1.6768,  -2.3484,  -4.8236,  -2.5411,  -1.8734,  -4.4271,
         -5.6737, -29.5042,  -4.5026,  -6.4728,  -1.0824,  -3.1828,  -0.3891,
         -4.0319,  -2.4132,  -3.8359,  -1.4121,  -5.0614,  -1.8870],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5117, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6221, -2.7438, -4.3636, -1.0257, -0.5171, -1.9155, -1.7499, -1.6392,
        -5.6683, -1.8369, -0.6411, -4.5134, -3.0105, -1.4623, -2.0412, -0.9268,
         1.6138, -1.9466, -1.3033, -2.2559], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0285, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3797,  -1.5593,  -1.6526,   1.0759,  -4.3138,  -2.4136,  -1.4771,
         -3.1044,  -1.5503,   2.5541,  -4.1781,  -2.0892,  -4.9557,  -2.2779,
        -12.8099,  -1.1847,  -6.8970,  -6.8210,  -0.5028,  -2.4504],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9494, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.8872,   0.7731,  -2.2785,  -1.0948,  -2.4250,  -1.8828,  -2.6743,
          0.1224,  -6.3581,  -1.6921, -24.3384,  -3.4501,  -7.0681,  -0.8921,
         -1.8716,   0.9637,  -5.2367,  -2.7893,  -1.0804,  -4.8534],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6007, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7550, -0.6872, -1.5927, -6.8379, -2.0531, -3.4226, -3.5012, -0.5518,
        -3.3091, -0.8755, -2.2672,  0.2610, -8.9721, -3.9074, -2.7609, -2.3324,
        -5.2296, -5.7621, -0.5732, -2.7666], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9448, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1433,  -2.2898,  -4.6594,  -1.5613,   1.4980,  -2.4927,  -1.4700,
         -3.9661,  -5.7546,  -2.3950,  -1.5950,  -3.9543,  -2.6035, -34.1435,
         -4.7846,  -7.9034,  -1.8610,  -4.5379,  -1.7961,  -3.0433],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5728, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9685,  -2.1450,  -1.9424,  -3.5728,  -4.2887,  -6.1405,  -5.0941,
         -1.4462,  -2.6389,  -2.8884,  -2.9165,  -4.8249,  -5.3681,  -1.2484,
         -2.7780,  -4.8550,  -3.6199, -15.5082,  -4.5839,  -1.9617],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0395, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2020,  -3.5204,  -2.0118,  -1.9939,  -4.2566,  -2.5163,  -1.4624,
         -1.6443,  -2.9663,  -1.8020,  -1.4797,  -6.2056,  -1.3546,  -0.0243,
         -2.8042,  -3.9423, -14.1982,  -9.3848,  -3.5466,  -6.9211],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7619, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4273, -2.5103,  0.7167, -3.3759, -1.5236, -3.9968, -3.8150, -2.7097,
         0.9750, -2.0830, -1.5454, -3.8112, -0.5688, -6.4058, -0.8927, -1.8865,
        -2.9662, -1.7425, -2.0053, -3.4569], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4016, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6833, -4.6969, -1.0738, -1.5324, -4.7533, -0.1828, -2.2241, -1.1249,
        -6.4476, -0.8953, -2.5875, -4.6413, -1.1384, -1.7544, -4.2507, -2.4568,
         1.0491, -3.8147, -4.3192, -2.6688], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5599, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1927,  -3.4822,  -5.8766,  -8.9490,  -7.7819,  -2.3006,  -4.4032,
          0.8830,  -6.5090,  -4.5193,  -2.4328,  -5.5014,  -4.3933,  -1.5040,
         -4.3919,  -3.0836, -23.5932,  -5.8269, -10.7472,  -7.0156],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7311, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2076,  -1.7626,  -1.7588,  -2.4676,  -1.5528,   1.1822,  -2.0208,
         -2.8966,  -2.0300,  -2.2360,  -2.6272,   1.2150,  -3.5150,  -5.7405,
        -13.3938,  -5.5810,  -7.8681,  -1.2771,  -4.9385,   1.7430],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0367, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7061, -4.7832,  0.6332, -2.0000, -3.0163, -3.4230, -2.1132, -5.5441,
        -1.7511, -0.0624, -3.5581, -2.3032, -2.4266, -5.6213, -1.4366, -0.3969,
        -3.4888, -1.5226, -2.3430, -3.2878], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6076, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.4119,  -2.6370,  -2.1811,  -9.0765,  -3.2629,  -1.5595,  -2.2107,
         -1.8809,  -2.5070,  -3.1596,  -1.5068,   2.3522,  -2.1613,  -3.4868,
        -12.4436,  -7.2379,  -5.3801,  -6.3235,  -2.3259,  -2.5661],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5484, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9520, -1.0232, -2.5334, -1.4618, -1.6160,  1.5672, -1.0440, -2.7810,
        -3.4035, -0.8193, -6.3900, -1.0854, -2.9903, -2.5862, -1.5326, -1.2484,
        -0.4901, -3.5826,  1.3895, -2.7881], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8686, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.4679,  -5.4355,  -2.3356,  -3.2358, -13.1269,  -7.5377,  -3.9484,
        -10.7069,  -4.5899,  -5.2983,   0.1507,  -5.1208,  -5.8628,  -3.9527,
         -4.2625,  -8.1977,  -2.5890,  -2.9213,  -4.7637,  -5.8525],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1528, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.2440,  -5.0835,  -4.9231,  -3.6371,  -9.9158,  -5.7509,  -9.2802,
         -2.3598,  -4.7319,  -1.3636,  -2.4973,  -2.7647,  -4.2618,  -2.9525,
         -5.4495,  -3.2153,  -7.4192,  -2.6698,  -4.5970,  -3.6512],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8384, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.4354, -11.5672,  -5.4547,  -6.3486,  -5.8400,  -3.0988,   0.9668,
         -3.0825,  -1.6936,  -2.7842,  -4.4625,  -1.0547,   2.0997,  -4.7705,
         -1.8238,  -3.8036, -10.6210,  -7.0221,  -6.6680,  -2.5749],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3020, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8104, -2.9728, -2.2498,  0.6507, -7.1669, -2.6862, -3.5379, -0.3196,
        -6.6200, -1.1648, -2.0383, -3.7595, -1.5984, -2.8229, -0.3016, -7.0340,
        -1.8173, -0.3418, -6.2010, -2.7434], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8768, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3705, -25.1378,  -4.3998,  -9.1419,  -1.0358,  -5.4032,  -0.4958,
         -8.6217,  -2.1150,  -4.0027,  -0.2818,  -5.1628,  -1.3332,  -0.2299,
         -2.1513,  -1.4115,  -5.4801,  -0.7045,  -7.0991,  -1.4323],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4505, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2992, -8.0992, -1.6439, -1.8406, -2.1983, -6.1552, -0.1187, -2.4892,
        -2.9621, -0.3234, -2.4905, -4.0063, -1.5282, -1.2729, -3.5135, -2.8471,
        -2.4968, -3.3378, -5.2489, -1.4656], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8669, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7789,  1.3477, -2.3313, -4.0704, -3.1670, -3.2734, -5.3111, -1.2964,
        -0.5028, -1.7074, -1.4622, -3.7475, -5.7406, -2.1015,  0.2897, -4.1247,
        -1.8451, -1.9329, -0.7124, -7.0849], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5276, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0122, -1.9651, -1.4124, -1.5669, -5.3069, -1.8237,  1.5486, -1.4641,
        -1.2743, -1.2064, -1.6853, -3.8810, -0.6704, -2.2208, -2.1593, -1.6096,
        -5.2656, -3.0652, -0.9529, -3.3001], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0147, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.3137,  -5.4895,  -1.9518, -14.5842,  -6.3492,  -3.8393,  -6.2506,
         -7.2366,  -2.8362,   1.0526,  -7.7141,  -1.6516,  -2.4739,  -1.4361,
         -3.2948,   2.1601,  -5.4078,  -4.7291, -19.0652,  -6.3466],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9379, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.8970,  -6.2435,  -2.7982,  -2.7415,   1.7706,  -2.4866,  -1.3729,
         -0.9995,  -1.1065,  -6.1897,  -0.5573,  -2.3871,  -3.5240,  -1.2613,
         -3.7930,  -0.3004,  -2.5860,   2.4097, -11.7170,  -3.6725],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7727, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.7992,  -1.0507, -21.0235,  -2.6472,  -7.3087,  -0.5071,  -4.5805,
         -1.8561,   0.3889,  -3.2945,  -1.4844,  -3.5300,  -2.5950,  -6.3303,
         -1.7666,  -0.4552,  -4.7283,  -1.1561,  -2.6807,  -3.6817],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7544, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8209, -1.3147, -3.4949,  2.4083, -6.0011, -1.5824, -2.7651, -2.6698,
        -6.5683, -1.1758, -0.7897, -2.3402, -0.7626, -2.0312, -3.0542, -1.6298,
         1.3137, -2.8505, -1.1062, -2.4398], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0338, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6281, -3.6308, -2.9934,  2.1183, -4.3093, -1.9413, -3.1662, -0.9829,
        -5.6279, -2.2126, -0.9620, -2.2072, -2.4284, -1.6415, -1.9568, -1.3839,
         1.7481, -4.1217, -2.6457, -3.1689], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1571, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.6307, -1.9807, -2.8155, -2.4956, -4.9795, -2.4082,  1.1690, -3.4819,
        -1.8188, -1.6315, -3.2936, -3.6133,  0.5396, -2.0435, -2.4360, -3.4557,
        -1.2210, -5.7430, -1.8655, -0.5089], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2357, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2449, -1.9909, -4.6075, -1.3920, -0.9882, -3.3620, -2.1638,  1.6155,
        -3.8923, -2.0837, -1.6543, -4.7751, -3.7855,  0.9787, -0.8413, -2.2628,
        -1.5600, -4.2083, -3.0404,  0.7684], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1245, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8393, -2.7967, -0.1863, -1.9805, -5.1977, -2.9098, -4.5459, -1.9478,
        -2.0297,  0.2757, -3.5958, -1.7484, -5.8685, -4.4706, -6.1683, -5.9341,
        -5.2559, -1.3903, -5.8907, -3.5764], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3028, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1050,  -1.9492,  -5.1616,  -2.1086,  -0.3753,  -3.2714,  -2.3084,
         -2.8015,  -1.1603,  -5.1067,  -0.9633,  -6.8118,  -1.6487,  -4.0874,
         -4.5268,  -3.6152,   1.9515,  -4.8405,  -4.1020, -32.2152],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4104, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.5038,  -3.0038,  -7.0089,  -2.0885,   0.4003,  -5.1018,  -3.5239,
        -30.7537,  -3.1115,  -7.5519,  -0.7584,  -3.7587,   1.7938,  -5.0958,
         -2.2264,  -4.2164,  -0.7632,  -4.7357,  -1.7459,   0.9895],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2882, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2535, -8.9629, -9.8358, -7.8588, -5.4234, -6.4155, -1.1766, -0.2731,
        -4.9353, -4.1184, -4.9284, -2.1998, -2.8531, -2.1748,  1.6657, -9.9032,
        -1.1903, -0.9498, -2.2968, -3.2120], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1648, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.6635,  -1.0970,  -5.1123,   0.0998, -15.6530,  -2.2450,  -2.5523,
         -5.2536,  -2.7354,  -0.6710,  -8.9720,  -8.4082, -39.1401,  -6.3570,
         -9.3939,  -6.9060,  -9.1654,  -7.2419,  -5.7712,  -1.7450],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.9992, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0771, -4.7847, -1.5728, -6.2131, -1.8274, -1.0359, -8.6296, -1.5898,
        -4.0396, -2.3484, -2.6826,  2.0718, -5.1763, -0.6261, -2.8325, -1.4219,
        -6.0338, -2.0139, -1.2271, -4.2677], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9164, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.6261, -20.4355,  -5.7529,  -9.5604,  -6.9437,  -7.6494,  -0.4911,
         -4.7683,  -1.5093,  -1.9368,  -3.8358,  -2.4182,  -3.6270,  -5.4985,
         -2.6190,   0.5704,  -4.7706,  -4.0078,  -1.8946,  -1.4969],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7136, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.6472,   1.0833,  -3.4175,  -2.0907,  -2.9844,  -2.7716,  -2.6301,
          0.8650,  -3.0004,  -1.5810, -10.6782,  -4.5758, -11.6657,  -3.4453,
         -9.9988,  -1.2540,  -2.8203,  -4.5201,  -4.8708,  -7.7755],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0390, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.8310,  -9.6788,  -2.8292,  -2.5932,  -1.4601,  -4.8530,  -1.8011,
         -2.2267,  -5.5966,  -4.3361,  -3.5095,  -2.5742,  -3.7937,  -0.1529,
         -5.1628,  -8.6529, -15.6792, -12.8470,  -4.7331,  -7.7654],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9207, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.5821,  -6.3403,  -6.7397,  -7.4266,  -3.1138,  -4.5549,  -2.3643,
         -5.0717,  -2.0394,  -3.1096,  -2.0642,  -2.9444,   1.5372,  -4.4810,
         -2.0103, -17.4524,  -2.4868,  -6.8017,  -1.2114,  -8.4797],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5868, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7618, -3.7411, -7.2210, -6.4762, -5.6743, -7.5154, -3.6377, -5.4009,
        -1.7842, -7.2417, -2.6181, -5.1013, -6.4572, -3.3044, -1.5281, -3.9999,
        -2.9332, -3.8533, -5.1083, -4.5090], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6433, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3687,  -1.4050,  -2.4886,  -5.5170,  -2.0351,   1.3580,  -3.2854,
         -1.6045,  -1.5758,  -5.0223,  -2.2446,  -0.4273,  -2.7519,  -2.2766,
        -12.6536,  -5.1017,  -8.0749,  -6.4576,  -1.1570,  -3.9873],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4538, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2591,   2.1497,  -4.9293,  -2.7553, -13.9054,  -3.6007, -17.3531,
         -0.7051,  -4.9430,   1.0569, -15.5332,  -3.0717,  -2.0929,  -2.2293,
         -5.2194,  -1.4080,   0.0638,  -3.4047,  -3.7336,  -1.9119],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3393, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0776,  0.5765, -8.5047, -2.0058, -1.2387, -0.5602, -5.1198,  1.4531,
        -4.6843, -1.7503, -2.7621, -2.0156, -4.6423, -2.3857, -0.6131, -3.3518,
        -1.2846, -0.3119, -4.1186, -2.7998], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3599, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2925, -3.7432, -0.1021, -5.3501, -1.1190, -9.4562, -6.5544, -5.3695,
        -5.2855, -2.6862, -3.4185, -0.6666, -2.2649, -2.3588, -4.1554, -0.9373,
        -4.6540, -2.1464, -0.2782, -4.4316], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3135, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4348, -1.6734, -2.1317, -1.4634,  1.5188, -4.9780, -2.5336, -2.2036,
        -2.8213, -5.9502, -2.4492, -0.9628, -6.0524, -0.7802, -2.0655, -2.3695,
        -1.6712,  0.8574, -2.3177, -2.4775], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2480, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.0429,  -1.7808,   0.3275,  -6.3785,  -2.8119,  -2.2323,  -4.2613,
         -3.0322,  -1.5302,  -2.8457,  -2.5864, -22.4002,  -6.2238,  -8.0284,
         -6.9028,  -6.4303,  -2.4779,   0.8666,  -7.2612,  -3.6558],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7344, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.4438, -5.7895, -1.4564,  0.3863, -3.7399, -1.9807, -2.6967, -3.7500,
        -2.2450, -5.4595, -3.7682, -1.3640, -1.9960, -1.8482, -4.3786, -2.3955,
         0.0825, -3.6257, -0.8785, -2.9529], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5150, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.6290, -15.8437,  -6.9119,  -6.2043,  -7.1407,  -5.9654,  -2.8939,
          0.4040, -10.7640,  -2.5491,  -1.4113,  -0.9578,  -4.2581,   1.6748,
         -4.4426,  -1.7000,  -1.2400,  -0.2713,  -5.6759,  -0.9103],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9845, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.9505,  -6.0549,  -3.4605,  -5.1552,  -1.8951,   1.1573,  -1.5734,
         -3.4368,  -2.2473,  -5.3800,  -2.4867,  -0.0186,  -3.3211,  -1.6915,
         -6.5433, -17.1484,  -6.9098,  -7.4089,  -2.1901,  -2.7250],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0220, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5823, -3.2828, -1.4764, -2.5387, -2.5379, -4.4558,  0.9920, -4.4133,
        -1.5174, -2.7997, -2.8468, -2.4368,  1.9961, -5.4170, -1.9182, -4.2228,
        -1.1021, -7.0833, -2.0162, -1.8618], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6260, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.2614,  -6.5131,  -1.2034,  -1.5374, -11.9628,  -2.6009,  -2.0202,
         -3.9248,  -2.8540,   2.1979,  -6.2350,  -1.3918,  -3.0879,  -2.7239,
         -6.2948,  -3.3209,  -0.9622,  -3.3688,  -3.7202,  -2.1694],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1977, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6545, -0.2413, -3.8856, -2.6393, -1.0970, -1.6705, -6.6772, -1.0458,
        -0.9649, -3.4495, -1.6279, -0.9536, -4.6613, -0.8958,  0.9009, -4.0425,
        -0.8552, -3.1446, -3.0818, -8.7557], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5222, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8288, -4.8647, -2.0916, -2.9607, -1.3137, -1.4667, -1.5807, -4.9016,
         0.5279, -6.0537, -2.1553, -1.5771, -2.0136, -6.0443, -1.3119, -3.2104,
        -3.6045, -1.6907, -2.4835, -3.9381], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9282, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.9650,   0.0925,  -8.5527,  -1.5894,  -0.6755,  -1.0602,  -5.0308,
          1.9400, -17.2142,  -9.3832,  -9.7869, -10.6232, -12.4577,  -2.1260,
         -3.1891,  -1.0936,  -0.7636,  -3.1754,  -3.4870,  -0.7052],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6423, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9716, -6.7921, -3.9196, -1.4527, -1.7977, -3.1075, -2.9961, -1.6848,
        -3.8748,  0.6097, -3.2662, -1.8805, -4.3632, -0.8256, -5.6603, -1.4503,
         1.4357, -4.8618, -0.6901, -1.8202], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5685, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9357, -0.9944, -4.0577, -1.9062,  0.6964, -3.4433, -0.7992, -2.1127,
        -1.2251, -5.6111, -1.3759,  0.9748, -4.2573, -1.0759, -2.0367, -3.0528,
        -1.7959,  1.4473, -1.7294, -2.5321], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9911, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.1282,  -1.5486,  -3.5983,   1.6723,  -5.5292,  -1.7604,  -3.8338,
         -3.5644,  -3.0320,  -0.5426,  -4.9806,  -2.1006,  -1.4453,  -1.4440,
         -3.1501,  -2.5766,  -3.0853,  -4.5758, -18.5571,  -6.7136],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5747, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8530, -1.4716, -2.0015, -4.0067, -2.1966, -4.2485,  0.8412, -1.1141,
        -1.5888, -1.8948, -5.9808, -2.2867, -2.0629, -4.0817, -1.7497, -3.6678,
        -3.1492, -4.9301,  0.1946, -6.9555], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6602, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.6554,  -3.5386,  -3.8470,  -2.7932,  -4.1755,  -1.3107, -11.7527,
         -1.1275,  -0.6574,  -4.0603,  -3.9950,  -6.1100,  -6.6476,  -2.3479,
          0.6076,  -9.8623, -11.5964, -53.4106,  -8.3299,  -5.3878],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.1999, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2703, -1.0199, -3.2764, -1.4160,  1.8619, -2.4665, -1.3632, -1.1338,
        -0.9770, -6.5840, -1.3741, -3.4330, -5.1377, -0.3322, -2.3614, -1.7915,
        -5.9556, -0.7093, -0.9338, -3.1657], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1420, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0659, -4.6300, -3.7139, -4.2093, -1.8633, -1.9824, -1.5429, -2.6331,
        -2.3710, -9.0717, -4.5038, -6.8539, -6.7085, -5.6588, -1.9579, -4.6212,
        -0.5942, -6.8378, -3.7427, -3.4639], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0013, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7123,  -1.9038,  -6.7374,  -4.9665,  -3.6880,  -4.1779,  -2.4594,
         -4.3401,   0.6362,  -6.0657,  -3.0162,  -3.9629,  -7.1155,  -2.7032,
         -9.7198,  -5.9248,  -3.3849,  -8.9969,  -3.9773, -11.8782],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8547, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.1365,  -4.4322,  -0.3287,  -5.2473,  -4.5049, -30.2121,  -8.4208,
        -10.8118,  -3.6009,  -4.8418,  -2.5310,  -0.9410, -14.3117,  -1.2237,
         -4.7449,  -5.7408,  -3.0116,  -1.9082,  -3.7536,  -3.6205],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9662, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3070, -2.9262,  1.2356, -3.3948, -0.7117, -3.3123, -3.3523, -5.5206,
        -2.9063, -1.3172, -1.8137, -1.5747, -1.1835, -3.3392, -2.0038,  1.8823,
        -1.6876, -2.4279, -1.3689, -4.6256], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3328, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.3184,  -3.6599, -13.5430,  -4.7399,  -4.0430,  -4.7363,  -4.3812,
         -1.2126,  -2.6617,  -4.2875, -11.8560,  -4.8332,  -9.1224,  -0.9349,
         -6.7137,   1.3696,  -8.3804,  -2.6739,  -5.2606,  -5.0428],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9016, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.1008,  -3.2710,  -3.6839, -11.8647,  -8.6816,  -6.8249,  -6.5774,
         -3.2139,  -4.6701,  -0.6549,  -3.2813,  -4.3561,  -2.9918,  -6.2777,
         -2.9252,   0.1849,  -8.6207,  -4.1040, -21.0071,  -8.4418],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5581, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-14.8231,  -4.5105,  -5.0126,   0.7190,  -2.9023,  -3.8869,  -2.9320,
        -18.9592,  -2.9267,  -6.8456,  -0.0621,  -4.0756,   1.5287,  -6.9478,
         -1.6405,  -4.5401,  -0.4282,  -5.2965,  -1.6813,  -1.8682],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3546, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.8807,  -0.9342,  -6.3217,  -1.6169,   0.8948,  -3.6631,  -1.0062,
         -1.8511,  -1.1702,  -3.9785,  -0.4335,  -3.3749,  -1.5789, -11.7117,
         -3.2011, -15.4018,  -1.3915,  -4.8006,   1.8281,  -3.9594],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3777, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8738, -5.8687, -3.6057, -1.0644, -4.3450, -3.5398, -7.7093, -7.0997,
        -6.6095, -6.2908, -6.2158, -2.5841, -6.1977, -0.3280, -9.0687, -3.6962,
        -3.7706, -7.5975, -3.6467, -0.3942], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6753, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.2202,  -2.2621,  -3.1522,  -2.4252,  -3.4327,  -1.8600,  -1.3596,
         -3.7342,  -8.2520,  -8.4468,  -5.2263,  -7.0128,  -3.7842,  -4.0060,
         -7.1131,  -3.5606,  -3.1535,  -7.1628,  -2.2614, -12.3476],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7887, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.3187, -6.4488, -1.0345, -0.9336, -2.2615, -0.4827, -2.9639, -1.8136,
        -2.8668,  1.4505, -2.1691, -1.0347, -1.7938, -4.6055, -1.6436,  2.0586,
        -4.1131, -4.4319, -2.0046, -1.6337], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9523, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.7912,  -7.5026,  -2.5442,  -3.4884,   0.4960,  -4.5314,  -3.2532,
         -2.2833,  -1.2615,  -8.5221,  -1.1410,  -1.0060,  -2.5047,  -1.0241,
         -2.4223,  -1.1375,  -5.4880,  -1.0118,   0.6881,  -2.8503],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0790, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3417, -0.4799, -3.5012, -6.5036, -2.4964,  0.6487, -6.5266, -4.1087,
        -1.3804, -4.8148, -4.0519,  0.5418, -7.0373, -0.4624, -2.5066, -6.2698,
        -2.2495,  0.7348, -2.0178, -0.8179], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9321, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.2899,  -3.8063,  -2.3344,  -4.9631, -25.1508, -22.4132,  -4.6647,
        -10.9307, -10.4701,  -5.1375,  -6.8198,  -1.9976,  -4.6104,  -0.5676,
        -12.5763,  -3.5265,  -2.9077,  -4.5944,  -4.2081,   1.7006],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.6134, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.0918,   0.3494, -10.5663,  -3.0899, -18.1954,  -5.1873,  -4.9206,
         -6.8887,  -2.4348,  -3.4295,  -9.0604,  -4.1040,  -2.3607,  -0.7713,
         -1.3309,  -4.1119,   1.6704,  -3.2829,  -4.8259,  -3.5572],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4095, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.5349,  -5.9725,  -1.9756,  -4.2149,  -0.0209,  -6.7002,  -2.3314,
         -1.6776,  -2.9558,  -2.7153,   1.3164,  -3.0630,  -2.2126,  -8.4594,
         -6.3951,  -4.4273, -17.0803,  -1.3884,  -6.5755,   1.0106],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9687, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5185,   2.2183,  -2.9650,  -1.3976,  -2.7991,  -0.8591,  -2.2828,
          2.0015,  -4.4697,  -1.0040,  -2.3369,  -4.1285,  -2.9581,   0.7433,
         -4.2102,  -1.6379,  -2.0052,  -5.4786,  -8.1621, -21.7583],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3004, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9248,   0.4041,  -6.9290,  -1.0947, -12.2609,  -5.5763, -11.5392,
         -7.0030,  -4.1866,  -7.8324,   1.2710, -11.1400,  -2.9411,  -1.4079,
         -2.1781,  -3.7685,   1.0972,  -5.5407,  -2.6342,  -9.5899],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7888, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2106, -4.9025, -0.9040, -1.7707, -2.8102, -1.7219,  2.0285, -2.0203,
        -1.8956, -2.4715, -5.0061, -4.0841, -0.8928, -2.4147, -1.6526, -3.0107,
        -0.5999, -3.2602,  1.9954, -4.3613], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0983, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.3700,  -5.4560,  -4.6917,  -3.1835,  -6.4854,  -4.7063,  -9.6785,
         -7.7127,  -5.9740,  -6.7629,  -1.8360,  -4.3674,   0.4223,  -4.7503,
         -3.4374,  -3.2047, -12.2256,  -3.8536,  -1.8658,  -3.5626],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8851, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6565, -8.1637, -2.8273, -3.4466,  1.8333, -1.8709, -1.1860, -2.1061,
        -1.9997, -6.7427, -2.3031,  0.1930, -3.9424, -1.7739, -2.3547, -4.3643,
        -2.2738,  1.3790, -2.2767, -2.4985], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4191, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7408, -1.8229, -3.2959, -3.5607,  0.8183, -0.7322, -2.1199, -3.3491,
        -4.1933, -2.5374,  0.6458, -3.1862, -1.3591, -3.9651, -1.9233, -8.7963,
        -4.1387, -2.6250, -3.8532, -1.7825], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7259, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5212, -8.6287, -0.1874, -4.8081, -2.7982, -2.1507, -1.1524, -6.6416,
        -1.2460, -3.0220, -5.1837, -0.5767, -3.0958, -2.1331, -4.0620,  2.2652,
        -5.1433, -1.4912, -3.3424, -4.1274], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2523, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.7973, -2.4422, -1.4658, -4.2271, -0.1919, -9.8149, -1.1571, -0.5938,
        -3.6289, -2.1551, -3.3000, -0.9955, -4.7503, -1.1670,  0.9835, -2.3114,
        -1.8250, -3.8876, -0.4886, -5.7037], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3663, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1929, -2.2284,  1.0801, -4.2346, -0.9502, -1.6654, -0.6450, -3.4573,
         0.7227, -0.6864, -5.9592, -6.4850, -3.9181, -6.1739, -3.4838, -0.3360,
        -8.7161, -4.3807, -6.4432, -1.9448], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2049, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9231, -2.9587, -4.3770, -1.5660, -0.3253, -3.1923, -1.9076, -2.4333,
        -1.6041, -3.6552, -0.5981, -3.1311, -5.4796, -2.7297, -3.4110, -4.2107,
        -5.6963, -1.3857, -1.4419, -2.2997], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6663, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.2049, -1.6296, -0.7161, -2.6412, -1.2478, -5.6270, -3.1785,  0.3191,
        -3.4946, -1.5642, -1.6750, -2.9087, -2.0751,  1.1576, -1.8562, -1.2582,
        -5.0775, -1.3781, -4.5077, -2.3663], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0260, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1561, -9.4958, -5.6698, -4.0632, -4.0098, -2.6464,  1.0493, -1.9441,
        -2.4226, -3.6469,  0.0650, -7.6722, -1.0442, -0.4302, -7.0595, -2.5377,
        -4.1794, -0.4005, -3.3285, -2.4909], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4042, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5299, -0.0909, -4.4192, -2.6098, -0.9375, -5.0532, -1.0496,  1.1116,
        -3.2226, -2.7256, -1.3871, -2.7497, -9.4855, -4.7383, -3.5000, -3.7877,
        -4.1604, -4.2344, -2.0172, -7.1921], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2889, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1518, -2.3382, -3.9309, -3.3583, -7.9634, -6.7646, -5.0898, -6.7288,
        -4.8821, -3.7361, -1.8779, -2.4891, -4.8454, -3.0052, -4.7290, -5.0598,
        -0.2442, -2.9097, -3.3533, -5.2132], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1836, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0303, -1.8332,  2.9193, -3.7932, -1.3157, -3.0855, -1.0267, -5.6550,
        -1.3333, -3.2775, -2.1145, -1.5651, -1.4420, -5.0247, -1.2480,  1.5769,
        -3.7923, -1.7817, -2.6640, -3.9698], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1728, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.2482,  -3.4787,  -4.0865,  -4.1312,  -2.9047,   1.0445,  -1.3706,
         -2.9308, -13.0289,  -5.2422,  -5.1940,  -6.8398,  -2.7953,  -2.8707,
          0.6753,  -4.1557,  -2.1857,  -2.2478,  -5.2126,  -2.2221],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6713, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.1384,  -1.8759,  -1.3199,  -3.7475,   0.4476, -16.9955,  -3.0741,
         -4.5835,  -1.4025,  -5.2948,  -1.5212,  -0.4068,  -2.1380,  -0.7252,
         -3.2723,  -1.0540,  -5.2548,  -1.1353,  -0.6151,  -3.6227],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9365, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6810, -5.6476, -1.2998, -0.8057, -1.9437, -2.6176,  2.3514, -3.2792,
        -2.3530, -3.3070, -2.4866, -7.2872, -3.5177, -1.1924, -2.5312, -2.9434,
        -3.4716, -1.8080, -7.5130, -1.1068], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7221, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8467e+00, -2.1371e+00, -5.2137e+00, -1.8014e+00,  2.2435e-03,
        -1.8403e+00, -1.0270e+00, -1.2282e+00, -3.9454e+00, -1.0652e+00,
         2.0115e+00, -2.7221e+00, -1.8767e+00, -1.7363e+00, -6.2846e+00,
        -1.7695e+00, -9.2408e-01, -2.3445e+00, -3.7542e+00, -1.4179e+01],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6841, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.6684,  -2.1336,  -5.7388,  -3.3870,  -0.3220,  -3.8726,  -4.0663,
        -23.6748,  -5.4766,  -6.8903,  -7.6970,  -1.1213,  -2.7030,   0.5977,
        -12.2485,  -1.2003,  -2.9052,  -3.1200,  -2.8606,   1.8215],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4333, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0739, -1.5995, -0.9039, -4.6729, -0.6784, -1.9469, -1.8933, -1.4511,
        -0.5005, -4.6902, -1.8350,  2.2918, -2.8690, -3.1859, -2.8026, -4.7575,
        -7.3785, -3.4502, -2.2058, -5.2413], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6422, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2259, -2.4105, -2.0959, -4.2452, -1.4384, -3.9917, -2.0406, -3.6989,
         0.7715, -5.3709, -2.4015, -1.6937, -3.0670, -2.3828,  2.0029, -7.6843,
        -1.6268, -3.4236, -5.6731, -1.5731], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7635, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4237, -3.6328, -3.8172, -4.5031, -3.0932, -2.8160, -1.3770, -1.4607,
         1.8215, -4.0435, -2.5708, -1.7741, -1.3803, -2.7485,  1.8977, -1.9439,
        -2.4748, -3.0875, -0.1933, -5.8742], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3248, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7895, -2.4854, -2.5177, -1.6838,  0.5670, -1.4786, -3.1087, -3.2272,
        -3.4464, -5.0021, -4.3793, -1.4356, -7.4219, -5.3087, -2.2245, -3.8181,
        -2.5148,  1.4604, -1.6782, -1.9755], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8234, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.3710,  -4.1084,  -7.9658,  -6.1391,  -2.0952,  -1.4580,   0.5541,
        -19.1784,  -2.4850,  -2.6235,  -0.8813,  -4.2161,  -1.7266,  -7.8369,
        -11.9119, -12.9371,  -6.5116, -10.2337,  -4.7510,  -2.1681],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9022, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.0586, -4.1131, -4.4319, -2.0046, -1.6337, -5.8757, -2.4307, -0.4202,
        -4.1410, -3.0520, -3.4467, -3.2001, -2.3839,  0.8370, -3.5915, -2.8073,
        -2.9100, -2.1073, -6.1367, -2.9608], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7376, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9753, -0.9119, -1.9249, -1.1746, -1.7667,  2.0778, -2.8040, -1.5455,
        -3.2840, -1.0294, -5.0671, -0.9755,  0.0440, -1.8709, -0.7040, -2.0149,
        -1.9330, -2.0163,  0.9821, -2.3464], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6120, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.3267, -9.1157, -5.9926, -0.8263, -5.0650,  1.1163, -2.0393, -1.2738,
        -3.7297, -2.0026, -4.8874, -2.6787, -0.6281, -1.9912, -2.1160, -1.0294,
        -4.8585, -2.1692, -0.4176, -2.3468], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0189, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.0779,  -2.9095,  -4.4010,  -0.8452, -10.5881,  -2.0313,  -2.4823,
        -15.2767,  -4.2036, -13.0334,  -5.3450,  -2.6026, -50.9869,  -2.3951,
        -15.8283,  -5.7570,  -6.3165,  -5.9642,  -9.9162,  -1.2841],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.2123, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8301, -6.1729, -1.0582, -2.3418, -5.3546, -3.2846, -4.7252, -1.3226,
        -5.6753, -1.3043,  1.2922, -2.3019, -1.0313, -1.8611, -4.1168, -1.9130,
         0.8293, -2.9772, -2.1422, -1.5787], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3935, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.7465,  -3.8187, -13.0438, -11.9255,  -2.5520,  -9.5127,  -6.6346,
         -7.1213,  -7.4448,  -6.0533,  -1.6318,  -8.7136,  -5.3922,  -6.1879,
         -4.4728,  -3.4615,  -1.1357,  -4.4558,  -1.4742,   1.6828],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2048, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0338, -2.8887, -0.5977, -7.1836, -0.7016, -1.1896, -3.1947, -1.7080,
        -2.4916, -2.6730, -1.4279,  2.0889, -3.0121, -1.6953, -4.3559, -4.5763,
        -2.2693, -0.6963, -1.6568, -2.3647], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2314, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.7954, -1.8372, -0.7647, -2.5376, -5.6429, -3.0936,  0.9145, -0.8180,
        -2.1256, -1.5191, -4.7216, -1.6540,  0.7949, -3.5348, -1.3400, -4.4677,
        -4.8369, -2.3992,  0.0927, -2.6191], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0157, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.9380,  -3.8629,   0.8426, -22.4732,  -3.9618, -13.8556,  -9.6336,
         -9.8258,  -4.3197,  -3.7215,  -8.6131,  -7.6291,  -3.9685,  -7.1455,
         -2.5933,  -3.1968,   1.3634, -14.2596,  -4.1285,  -2.0523],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3986, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.6004, -14.6625,  -2.3758,  -3.1485,  -6.0230,  -3.7784,  -5.6556,
         -2.6119,  -4.5192, -27.0487,  -7.7512,  -8.0957,  -6.4666,  -2.3702,
         -5.7830,   1.8399,  -5.2736,  -2.7275,  -2.9164,  -5.0455],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7507, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5635, -4.5397, -2.2299,  1.4314, -1.9542, -1.0309, -2.2403, -2.2245,
        -6.5552, -1.3208, -2.3215, -2.6240, -1.5124, -2.9464, -4.4551, -1.8670,
         2.1228, -1.9215, -1.4005, -2.1833], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1168, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.8978,  -1.6894,  -3.6644, -12.2698,  -8.9270,  -2.3462,  -7.0125,
         -5.3036,  -2.5422,   2.2838, -11.9856,  -2.4276,  -2.6440,  -8.5861,
         -3.8552,  -5.3315,  -5.9753,  -2.8655, -17.8940,  -6.0263],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3582, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6943, -8.5949, -1.5270, -1.0172, -1.7626, -1.3069, -0.9880, -5.6635,
        -0.5482,  1.5581, -2.8321, -2.3921, -2.3090, -2.5204, -5.0129,  1.4942,
        -1.2171, -1.4017, -1.1708, -4.6835], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2295, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.9481,  -5.5446,  -3.6107,  -6.3621,  -2.2110,  -6.1071,  -2.3886,
          1.7927,  -6.6369,  -3.5877, -19.1177,  -5.9135, -12.3142,  -7.1395,
         -3.0126,  -4.5921,  -1.4215,   2.5674,  -2.5942,  -1.3881],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6765, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3566,  -5.3114,  -2.0292,   0.6380,  -1.7462,  -3.4258,  -5.4680,
        -14.1773,  -5.1225,  -5.3451,  -7.0218,  -2.4220,  -2.9896,  -6.9181,
         -2.7559,  -2.7295,  -1.2728,  -4.5669,  -2.5595,   1.1773],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8201, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4793,  -2.1705,  -1.2963,   1.5349, -13.2894,  -1.5874,  -2.0445,
         -2.1278,  -4.8523,  -2.2074,   0.2804,  -2.3808,  -2.6264,  -2.2614,
         -1.2260,  -4.9744,  -3.3666,  -0.9461,  -5.7594,  -2.6445],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8213, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5024, -1.8005, -2.7558, -0.8534, -5.2724, -0.2397, -3.0986, -7.8177,
        -1.3385, -3.0505, -2.5264, -2.0395,  1.0709, -1.5594, -1.8481, -2.7212,
        -2.1319, -5.2340, -4.7781, -2.1970], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5847, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6950, -3.7972, -3.2679, -5.0765, -2.6017, -6.6862, -1.5582, -2.5163,
        -5.3650, -4.4867, -6.9469, -5.1055, -7.8664, -3.6186, -6.4363,  0.5140,
        -3.4412, -3.7168, -6.2189, -3.1499], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2019, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.0902, -2.7250, -3.8663, -0.6834, -9.5528, -5.0333, -4.6272, -6.2604,
        -2.6410, -2.7124, -5.4635, -6.7778, -1.1490, -2.8078, -2.9441, -2.2380,
         1.9469, -5.4621, -2.1569, -2.0220], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3543, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0956, -3.7789, -1.8636, -4.6007, -1.7870,  0.5952, -1.9939, -2.2614,
        -1.0092, -1.0638, -2.9335, -0.0623, -4.6246, -2.1079, -5.3979, -1.4435,
        -3.4439,  1.9954, -3.2045, -2.3198], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2201, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.9356,  -1.2807,  -1.1022,  -6.5391,  -2.1698,  -2.4170,  -1.7815,
         -9.1562,  -1.0243,  -2.5643,  -2.8401,  -2.0666,  -3.7242,  -1.0835,
         -5.8123,  -1.4129,  -1.2658, -10.2487,  -0.9809,  -1.6357],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3021, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-13.1334,  -7.3053,  -8.5143,  -6.2579,  -1.3947,  -4.3282,   0.5737,
         -4.2719,  -3.4433,  -1.2403,  -2.4822,  -2.0983,   2.2088,  -1.4484,
         -2.9818, -15.2151,  -4.4742,  -4.2120,  -7.5118,  -2.2457],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4888, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4625, -1.4725, -1.0334, -6.6647, -1.3571,  1.7484, -3.9171, -0.6043,
        -1.5169, -1.2322, -6.9035, -2.9524, -1.5684, -2.6782, -1.4722, -1.4213,
        -2.8679, -3.7805, -0.5148, -3.4909], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3581, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2493,  0.8856, -1.9810, -0.9174, -1.8305, -3.4484, -2.0905,  1.7118,
        -2.4635, -0.6370, -2.2891, -3.1113, -6.5922, -0.9583, -1.4468, -2.3567,
        -0.4571, -2.4230, -3.1563, -3.0451], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9428, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4084, -1.1490, -3.7791, -1.2286, -5.0743, -1.9318, -0.3563, -1.8413,
        -0.9544, -2.3050, -2.0392, -1.7270,  1.9174, -4.1699, -1.1744, -2.3033,
        -4.4008, -3.0270,  1.0281, -6.1835], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2554, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.7828,  -2.8203,  -1.3365,  -1.8015,  -4.4088,  -3.1744,   1.4054,
        -10.1524,  -4.0124,  -0.8742,  -4.7134,  -0.7550,   1.3697,  -5.0037,
         -1.4536,  -1.9657,  -1.5346,  -8.6080,  -3.2291,  -2.2643],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6775, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5799, -3.2453, -2.2454, -0.5509, -3.8758, -0.9433,  1.5846, -2.1387,
        -1.7138, -2.1988, -2.0285, -3.0318,  1.3051, -0.8963, -1.8602, -1.5836,
        -5.2427, -1.7271,  2.6698, -3.2293], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6266, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.7047,  -1.8310,  -2.1286,  -1.7015,  -3.2365,  -2.1314,   2.1433,
         -2.4131,  -6.0174, -21.3432,  -5.0608,  -5.4977,  -6.1808,  -4.3267,
         -4.9632,  -5.3604,  -0.6780,  -4.7909,  -3.8876,  -3.5245],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1113, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.9888,   0.0749,  -2.5055,  -1.9865,  -1.4153,  -5.6558,  -2.6465,
         -3.4665,  -6.8459,  -6.7384,  -6.1275,  -2.9384, -10.2151,  -5.1133,
         -6.4215,  -2.9165,  -4.5338,  -1.6363,  -1.3763,  -3.8317],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8642, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0617, -1.5100, -1.5720,  1.6211, -3.8216, -2.5600, -2.9353, -3.3496,
        -3.8808, -4.1339, -5.8910, -5.9669, -1.6962, -6.0602, -2.0535,  1.6079,
        -2.0271, -0.1525, -1.8451, -0.7667], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4027, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.8139,  -5.7632, -16.0967,  -4.5233,  -5.8777,  -3.0398,  -5.2558,
         -9.7455,  -5.8226,  -5.8174,  -7.4224,  -3.0350,  -6.1474,  -4.4001,
         -4.5571,  -1.8967,  -4.0679,  -2.5574,  -4.8631,  -7.3432],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9523, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1550, -1.3279, -8.7122, -1.5592, -2.9771, -0.9317, -4.1966, -0.5264,
        -0.7466, -4.8729, -2.6378, -8.9843, -5.3747, -5.7388, -7.6101, -2.0509,
        -3.0100,  0.1037, -6.0832, -2.5746], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5483, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.7808,  -7.6310,  -3.0736,  -1.8582,  -5.7577,  -1.5752,  -2.4879,
         -1.2614,  -2.3433,   2.2137,  -3.0154,  -2.2200, -13.3504, -12.8987,
         -8.7495,  -6.4789, -10.4298,  -2.5290,  -3.6744,  -0.2289],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4565, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.0120, -2.5782, -2.8868, -3.0722, -4.1708, -1.3740,  1.4979, -3.8974,
        -2.1355, -2.1094, -2.7477, -2.5480,  2.2738, -2.6165, -0.7987, -3.1452,
        -1.2507, -5.0188, -1.6815, -0.0562], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3164, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.0200,  -6.2046,  -5.5975, -17.2006,  -6.2496,  -6.5297,  -4.4168,
         -4.0722,  -4.8485,  -5.2087,   1.5171,  -4.0896,  -2.8124,  -2.4449,
         -4.3288,  -3.0583,   0.3039,  -2.7390,  -3.0446, -15.6138],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8829, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4172, -2.3620, -2.8828, -1.3747, -5.1159, -1.0701,  0.8277, -3.0140,
        -0.8433, -8.8705, -2.0712, -6.9429, -2.2666, -3.5229, -3.3246, -1.8350,
        -1.9872, -1.7695, -1.5829,  1.4182], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7504, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.7902, -7.3767,  0.7939, -5.6260, -2.3674, -0.8984, -2.9907, -2.0614,
         2.2667, -3.6207, -1.5670, -1.7487, -2.4216, -4.5769, -1.3234, -2.0478,
        -3.0165, -1.4422, -3.1096, -1.7859], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2855, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.0899,  -7.0910,  -2.0773,  -4.1135,  -0.5995,  -4.0631,  -2.7750,
         -1.3613,  -3.6255,  -2.1432,   1.2525,  -3.9465,  -3.9371, -11.0613,
         -5.0458,  -8.5175,  -5.5610,  -1.9217,  -3.1535,   1.5226],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6654, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.5742,  -3.0687,  -2.8299,  -2.0944,  -1.2662,  -3.6572,  -7.8003,
         -7.1820,  -6.9787,  -5.6043,  -2.8934, -15.8028,  -7.0678,  -6.0674,
         -5.7231,  -6.7095,  -1.7258, -13.2655,  -0.3498,  -6.1838],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4922, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0478, -1.8232, -7.1096, -2.1106, -0.0371, -2.3609, -0.7557, -1.7858,
        -2.6257, -1.6245,  1.6286, -2.1212, -0.1347, -1.5170, -3.6536, -1.3734,
        -1.4151, -3.8139, -1.0394, -2.0980], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8409, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9587,  0.7105, -3.3196, -1.2992, -3.7324, -1.8797, -7.6111, -2.6961,
        -1.1896, -0.3807, -1.3493, -1.2280, -3.8025, -1.5615,  1.8599, -6.1798,
        -4.8683, -8.6805, -7.5599, -7.8402], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2283, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6038, -3.0517, -1.2891, -5.6722, -0.4551, -1.7186, -7.6969, -2.5019,
        -3.1480, -3.3862, -5.4874, -0.4316, -0.4158, -2.7275, -1.8817, -1.0313,
        -1.5874, -1.6915,  2.2907, -6.7076], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6597, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-18.0674,  -4.7799,  -7.1981,  -5.3156,  -5.2967,  -4.9930,  -4.7267,
          1.3910,  -7.6900,  -3.3143,  -3.0018,  -1.2450,  -6.0412,  -1.0606,
         -0.3831,  -2.7382,  -3.5410,  -1.4759,  -1.8410,  -5.3277],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3323, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8969, -1.0206, -5.8138, -0.4118, -1.4779, -3.3857, -0.9583, -3.4220,
        -3.8183, -1.1219,  2.6419, -3.2325, -1.9154, -5.6352, -2.8498, -5.9135,
        -2.5180, -0.3270, -7.9644, -2.4862], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7264, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5887,  0.6657, -3.6359, -1.8698, -2.0736, -0.3545, -5.4957, -0.8579,
        -0.2691, -3.2027, -1.1648, -2.2419, -3.6316, -2.4016,  0.4273, -3.8296,
        -1.3545, -2.2468, -2.2046, -1.5367], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0434, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3680, -0.9171, -6.7728, -2.4932, -1.7181, -1.9396, -3.0698,  2.3509,
        -1.3049, -3.1649, -3.8523, -4.6448, -4.2081, -2.8664, -7.1838, -3.3604,
        -4.7627, -1.6151, -6.5468, -0.4326], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0435, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0127, -0.6534, -6.1196, -0.6006, -2.1206, -3.6111, -2.2195, -1.4529,
        -5.5718, -1.3200,  1.7579, -6.4806, -1.8064, -2.3278, -2.6201, -1.8798,
         2.6393, -3.7821, -0.8822, -2.3673], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2216, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5986,  -5.6322,  -1.0168,   0.5881,  -2.6257,  -0.9623,  -1.1209,
         -2.9545,  -5.9762,  -6.7923,  -0.6527,  -2.5623,  -2.5203,  -1.4899,
         -1.3609,  -1.1492,   2.2091,  -3.5457,  -1.2526, -13.6267],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7521, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2289,  0.7286, -2.0068, -1.3576, -0.9656, -4.0993, -0.7757,  1.4580,
        -1.9677, -0.8948, -1.9283, -2.7690, -6.8494, -0.1596, -0.5533, -3.9296,
        -1.3837, -3.2097, -1.0812, -4.9708], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9472, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.9795,   0.3547,  -4.1553,  -3.4000,  -1.8259,  -4.4296,  -3.0597,
         -4.2344,  -3.0105,  -4.2931, -21.2498,  -5.1930,  -8.5681,  -6.2673,
         -1.3079,  -7.7734,   2.6231,  -5.3504,  -6.3914,  -2.4455],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5478, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7532, -8.2143, -2.2162, -0.7485,  0.9048, -7.8594, -1.3675, -2.8087,
        -2.4942, -6.9687, -0.8206, -0.9320, -3.2510, -1.3901, -2.3371, -5.2818,
        -1.5679,  1.3355, -2.6741, -1.7252], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6085, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.3998, -4.0731, -2.2309, -3.8138, -3.5387, -5.6067,  1.2018, -2.2545,
        -2.2243, -2.2445, -3.1111, -1.8556,  2.5067, -4.7388, -1.9452, -2.9919,
        -1.5566, -4.7238, -1.1337, -2.3986], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2167, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.0089,  -1.7015,  -2.6529,  -5.9749,  -2.8091,  -0.4689,  -4.8179,
         -3.5476, -15.9358,  -5.5525,  -9.8214,  -7.0643,  -4.2518,  -4.1853,
         -1.4296,   1.1498,  -2.9260,  -2.1454,  -3.1641,  -1.8075],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2058, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.5554,  -3.1065,  -0.8814,  -1.2624,  -3.1895, -10.3723,  -6.7238,
         -3.7228,  -6.7508,  -3.5108,  -1.3682,   0.8889,  -5.7812,  -3.8088,
         -7.3848,  -9.0259,  -3.9282,  -2.7485,  -1.0730,  -6.2110],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2758, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.5267,  -3.1634,  -2.8389,   1.4980, -16.3522,  -2.3293,  -1.0321,
         -8.1578,  -2.5537,  -8.6199,  -2.7971,  -8.9719, -21.6637,  -8.3878,
         -9.3828, -15.7951,  -9.7739,  -5.6188,  -6.9842,  -3.9969],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.1224, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.5099,  -1.2017,  -1.2813,  -2.9640,  -0.6453,  -3.4044,  -0.0668,
         -6.3658,  -2.9584,  -1.5767,  -6.0146,  -2.9375,  -1.8629,  -1.8580,
         -4.6834,  -2.0686,  -0.9073, -20.3447,  -1.8569,  -4.4188],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6463, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.8207, -19.2355, -10.2143,  -9.4151,  -7.2169,  -1.0483,  -7.3421,
         -1.8886,  -1.3399,  -5.9471,  -3.0031,  -1.7679,  -5.0690,  -2.1560,
          1.3308,  -2.9880,  -0.5758, -15.0395,  -7.7987,  -6.7341],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6635, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.6628, -3.3062, -1.0834, -5.9820, -1.0749, -2.0100, -3.2905, -1.0592,
        -2.3565, -1.2738, -6.0131, -0.6856, -1.0448, -4.9630, -0.5132, -2.8725,
        -1.9532, -1.2748,  0.9729, -6.0919], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3269, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3250,  -4.7515,  -1.1524,  -0.9739,  -5.6633,  -1.0767,  -2.8701,
         -4.5117,  -2.0718,   1.2603,  -4.1600,  -3.0276, -40.0594,  -1.6394,
         -8.5489,  -1.9787,  -2.8510,   2.4486,  -2.0123,  -3.1700],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4567, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0595, -2.3697, -1.1846, -1.7107, -1.0670, -0.4450, -2.0184, -0.9484,
         1.8465, -3.8530, -1.5557, -2.3649, -2.6133, -2.9593,  0.9672, -2.7105,
        -0.7545, -1.1387, -4.0751, -1.5767], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8796, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5120, -4.8640, -2.0789, -0.8004, -2.1955, -3.5572,  0.7469, -5.1271,
        -3.4471, -1.1748, -2.1014, -2.6177,  0.4115, -2.7380, -1.2297, -2.9670,
         0.3617, -8.4852, -1.4911, -2.9268], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3897, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.7764, -2.0993, -6.0189, -1.4711, -1.7054, -0.7134, -4.2107,  1.0504,
        -2.0476, -2.2288, -3.0338, -1.2347, -5.9328, -0.9256, -1.0609, -2.3157,
        -1.5603, -1.0884, -4.0221, -3.9698], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2683, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1364,  -4.4738,   0.8382,  -3.4583,  -1.8618,  -3.8857,  -2.5357,
         -7.9845,  -3.8904,  -2.0338,  -2.4823,  -5.4620, -12.0793,  -3.2563,
         -6.0772,  -5.3571,  -1.4765,  -3.3203,  -2.2937,  -0.4735],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7350, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4991,  -5.5647,  -2.1003,  -0.8933,  -6.9249,  -0.9367, -12.5368,
         -5.7284,  -6.1036,  -6.8844,  -5.5430,  -2.9353,   1.1048, -13.0322,
         -4.0945,  -4.0926,  -5.2762,  -3.6919, -11.8277, -16.9126],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8237, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.5482,  -2.4356,  -1.3313,  -1.7018,  -1.1615,  -2.1394,  -2.4487,
         -2.6695,   1.2362,  -1.5724,  -2.6235, -11.1145,  -6.3950,  -8.3488,
         -0.6091,  -4.1645,   1.4392,  -3.1488,  -3.1152,  -6.0872],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1970, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.5193, -5.6147, -2.4910, -5.5556, -3.8507, -0.9046, -1.3084, -1.2344,
        -0.7165, -4.0671, -0.9676,  1.5094, -2.7261, -0.2758, -2.2479, -1.5585,
        -7.5425, -4.7631, -1.3185, -3.0514], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4602, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-14.3731,  -5.2493,  -8.5451,  -0.8364,  -3.4849,   0.0571,  -3.7877,
         -3.7482,  -3.0656,  -3.5271,  -1.5420,  -6.7086,  -1.6752,  -3.3997,
         -1.5043,  -1.2701,  -4.6835,  -0.3059,  -5.4770,  -0.6255],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6876, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3830,  -2.2193,   2.0208,  -3.7969,  -3.2690,  -2.7021,  -2.6597,
         -6.1278,  -5.4273,  -3.3088,  -2.8637,  -5.0515,  -2.0409,  -3.8484,
         -6.0930,  -2.3161,  -0.7707,  -4.6727,  -3.4290, -10.6593],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5809, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7691, -2.2300, -2.9640,  1.0407, -3.0721, -1.3814, -1.8659, -4.0143,
        -3.7133, -0.7030, -4.3911, -1.5653, -3.8625, -3.8120, -2.6559,  1.3074,
        -2.1468, -2.6565, -2.6717, -2.4646], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3296, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7777, -3.8237,  1.4999, -7.3444, -2.9561, -2.0584, -2.2319, -5.7162,
        -1.1470,  0.2169, -2.4946, -1.6206, -0.8809, -3.1892, -1.3532,  1.9885,
        -2.9432, -2.3638, -1.2816, -2.3509], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0914, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.8143,  -1.1771,   1.4197,  -5.5082,  -1.6192,  -6.3390,  -0.8181,
         -9.9555,  -1.5525,  -3.0460,  -7.1510,  -1.5366,  -2.6975,  -2.0127,
         -7.1151,   0.8141, -11.9446,  -1.7572,  -1.8709,  -3.7387],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6210, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5295,  -2.8925, -25.3375,  -6.8806,  -2.5238,  -2.4639,  -3.2200,
        -21.2925,  -4.9575,  -7.3181,  -3.2034,  -4.8547,  -3.3070,   0.1009,
         -8.3042,  -1.6429,  -4.7247,  -0.9002,  -5.1479,  -2.6818],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7041, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2600, -0.2025, -5.7538, -3.0616, -0.3457, -3.9822, -0.3368, -1.6642,
        -1.3199, -2.9490,  1.8782, -3.5199, -3.7459, -2.2812, -0.7017, -6.5585,
        -0.4701, -1.2815, -3.4751, -1.8009], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1916, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4528,  1.9817, -5.6530, -0.9039, -1.0611, -4.4253, -1.6603,  0.9230,
        -2.4752, -2.0318, -0.7232, -3.2361, -1.0633,  2.1357, -1.4735, -2.7890,
        -1.2189, -0.6866, -3.6712,  0.7818], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4852, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.5569,  -3.3861,  -2.2381,  -3.1995,   0.6081,  -4.5864,  -3.4038,
        -29.6903,  -4.9311, -10.2871,  -2.0680,  -3.4652,  -9.8256,  -4.6801,
         -2.9645, -11.3014,  -3.8993,  -4.4918,  -1.6944,  -0.1889],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3625, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9774, -2.2932, -2.4224, -0.4271, -4.3570, -2.5239, -1.9798, -5.5983,
        -2.0186, -0.5546, -7.2862, -1.3161, -2.2644, -3.3012, -2.9671, -6.6335,
        -3.7516, -3.2368, -3.7162, -3.0186], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2822, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3829,  -1.6524, -15.1422,  -4.8842, -17.4537,  -2.1286,  -6.4412,
        -21.2713,  -4.2896,  -5.1060,  -5.4477,  -4.5166,  -4.7237,  -3.1468,
         -1.0324,  -2.2282,  -1.3115,  -1.8833,  -2.3934,  -1.8982],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4667, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7117, -3.0586,  1.2051, -4.0041, -1.5848, -3.1036, -4.5996, -1.7028,
         0.7998, -5.2549, -3.3129, -0.7411, -2.5943, -6.8016, -1.7175,  0.4991,
        -2.7279, -0.5803, -1.5238, -2.7685], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3642, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.6224,  -2.3121,  -5.1252,  -2.9393,  -0.5270,  -7.5029,  -3.2079,
        -13.4702,  -4.7387, -11.3856,  -2.2158, -11.1476,   1.0000,  -4.7510,
         -6.6549,  -0.2961,  -3.8856,  -2.7263,  -3.4833,   0.4853],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3753, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9755, -2.2224,  0.5168, -6.2481, -1.9711, -5.7755, -3.0172, -6.1346,
        -3.9878, -3.5512, -1.6771, -2.0829, -2.2914, -0.0269, -4.2097,  1.9975,
        -4.6536, -2.0125, -3.9781, -0.9955], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8648, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.1382,  -2.2374,   1.9015, -22.9634,  -1.6384,  -2.8297,  -6.4886,
         -2.2345, -12.5735,  -3.0684,  -4.9252, -18.2702,  -6.6524,  -4.9985,
         -7.9373,  -9.2655,  -6.7334,  -1.9413,  -4.4510,   0.6121],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8917, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.4209,  -5.3936,  -1.9167,   0.9037,  -4.4295,  -1.2682,  -2.2360,
         -7.1137,  -5.0300,  -6.1771,  -5.1916,  -2.9364,  -3.5470,  -1.3907,
         -6.2855,  -3.4580,  -2.4024, -11.9933,  -3.8214,  -6.9438],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1026, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0829, -1.0470, -1.4557, -1.7169,  1.8223, -4.3637, -1.4137, -1.2397,
        -2.8469, -3.2516,  1.7937, -4.1650, -0.1611, -4.6063, -1.7225, -7.4860,
        -5.9065, -1.0330, -8.0047, -1.8952], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5891, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1028, -2.7103, -2.1213,  0.1439, -4.7157, -2.3001, -3.0077, -2.5056,
        -3.3952, -1.1050,  0.4938, -2.8222, -0.9006, -3.7659, -2.0208, -7.3424,
        -4.7058, -0.5278, -3.7424, -2.1754], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7665, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6514, -1.1499, -1.9193, -3.5895, -2.1685,  2.1136, -5.2905, -2.8533,
        -2.2642, -3.1754, -6.4289, -3.3988, -1.0548, -2.3999, -2.9280, -1.5283,
        -2.6693, -4.1504,  1.6306, -3.6189], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4748, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5084,  -2.4700,  -8.4899,  -3.5816, -23.6118,  -4.1350,  -2.0497,
        -27.3908,  -7.4637,  -8.3446,  -2.0279,  -2.1993,   0.4514,  -3.5302,
         -2.4213,  -3.8941,  -7.0786,  -2.7553,   0.2596,  -3.8801],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8561, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.0487, -2.9453, -0.7523, -1.9907, -2.1531, -3.7785, -1.0987, -4.1243,
        -1.5548, -3.4739, -2.3661, -5.7832, -0.4109, -5.1718, -3.9210, -5.9831,
        -4.3762, -4.2861, -3.0873, -0.3765], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7793, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.0617,  -1.9535,  -1.5160,   1.6073,  -3.4236,  -1.8379, -14.5220,
         -7.0272,  -6.8506,  -5.7267,  -2.0333,  -3.1931,  -0.8778,  -1.5567,
         -0.4057,  -2.1176,  -6.6238,  -1.4452,   1.0284,  -2.3496],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0443, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8618e+00, -7.5575e-01, -2.6725e+00, -2.1948e+00, -1.8812e+00,
         1.1828e+00, -3.4635e+00, -3.9657e-03, -1.6108e+00, -3.2004e+00,
        -4.3583e+00,  1.6691e+00, -2.3163e+00, -1.4437e+00, -1.2913e+00,
        -5.8913e+00, -9.9169e-01,  9.8068e-01, -8.6645e+00, -4.1421e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1456, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.2729, -3.0611, -1.0871, -0.4939, -1.6503, -2.1321,  1.8473, -3.2601,
        -1.2224, -5.6267, -2.6117, -6.2077, -3.2169, -4.0398, -2.7833, -0.7292,
        -2.9832, -0.3210, -2.1475,  2.1040], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9948, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2351,  -3.4998,  -5.7723,  -8.8100,  -7.8659,  -2.3370,  -4.0339,
          0.8796,  -5.8700,  -4.2672,  -2.3553,  -5.2704,  -4.1958,  -1.2353,
         -4.5907,  -2.9874, -23.0290,  -5.7850, -11.0750,  -7.0717],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6203, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1239,  -1.6314,  -1.6571,  -2.4818,  -1.1459,   1.2424,  -2.0457,
         -2.6421,  -1.9740,  -2.2353,  -2.1814,   1.7388,  -3.5271,  -5.6863,
        -12.7221,  -4.5358,  -7.9694,  -1.3929,  -4.7475,   1.9530],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8383, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1122, -5.0761, -1.8074,  0.9407, -2.2866, -0.3515, -2.9171, -2.1351,
        -6.2640, -0.6431, -1.6082, -5.3622, -2.8114, -1.6217, -4.7873, -1.9602,
         0.4817, -6.6239, -3.2882, -2.5008], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6367, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.3311,  -1.9012,  -0.7874,  -2.3887,  -2.1212,  -2.2882,  -1.0422,
         -3.3240,   1.3903, -18.7546,  -2.3564,  -1.6890,  -1.2366,  -5.2783,
         -0.1045,  -2.5782,  -4.4062,  -1.3063,  -0.7398,  -4.5228],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0883, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.6524,  -5.9334,   0.9918,  -5.1796,  -4.4297,  -3.9081,  -5.6248,
         -2.6433,   0.2926,  -5.7094,  -9.7635, -52.4505,  -4.0846, -10.3265,
         -6.6549,  -7.1030,  -0.8629,  -5.3868,  -1.3395,   1.5161],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4626, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3789,  1.9754, -3.7254, -1.2203, -2.3246, -1.2711, -5.1311, -3.5858,
        -0.4128, -3.5117, -2.5953, -3.2901, -0.9360, -7.1191, -2.0268, -0.4989,
        -2.7355, -0.8779, -2.7742, -1.4172], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3429, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.4248,  -5.6200,  -2.5759,  -1.5189,  -4.5047,  -2.5297,  -2.6966,
        -22.6103,  -2.6878,  -1.6003,  -5.6286,  -4.9258,  -3.7133,  -5.9362,
         -8.1233,  -7.7029,  -4.7737,  -3.4434,  -4.2678,  -1.6448],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0464, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.3637,  -1.8697,   0.4813,  -2.7009,  -0.7260, -18.6332,  -5.0926,
         -6.9538,  -6.6990,  -0.9494,  -7.6959,   1.5517,  -5.2769,  -3.2454,
         -3.2075,  -5.4505,  -6.4465,  -0.6363,  -9.0497,  -6.9816],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7473, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9039, -3.0460, -1.7906, -2.3164, -1.4089, -4.9693, -2.0788, -1.7262,
        -2.7313, -1.7427, -1.8374, -5.6234, -1.7004,  1.7839, -3.5560, -2.2942,
        -2.2514, -1.9213, -6.4246, -2.4591], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4999, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2189, -0.9460, -2.8587, -2.1269, -1.7343,  2.8534, -6.2362, -2.9974,
        -2.5146, -3.0683, -2.3091,  2.8483, -1.6874, -3.7149, -9.3968, -7.3026,
        -5.1863, -6.6318, -2.2027, -3.5517], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2491, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.1414, -4.6822, -2.7440, -2.0333, -5.2587, -4.1499, -0.4474, -2.9933,
        -1.0562, -2.8315, -2.5363, -1.9202,  2.3873, -3.2555, -0.8419, -2.7535,
        -3.2753, -3.0895,  0.9250, -8.8669], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8782, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.8839e+00, -3.3435e+00, -2.8960e+00, -3.4864e+00, -1.8835e+00,
        -5.0379e+00, -7.7864e+00, -4.9622e-01, -6.2959e+00, -2.7873e+00,
        -1.8573e+01, -6.4620e+00, -2.6087e+00, -5.4254e+00, -1.6365e+00,
        -3.9935e+00, -5.9773e-03, -1.1732e+00, -1.5246e+00, -1.2261e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7379, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.7584,  -1.0002,  -2.9830,  -1.3865,   2.0290,  -3.2130,  -2.9886,
        -13.4809,  -5.8258,  -7.3503,  -1.5803,  -2.9103,   1.3910,  -7.1607,
         -1.1951,  -2.6691,  -5.2415,  -2.6336,   1.6863,  -5.3405],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1806, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6641, -3.9252,  1.7598, -7.2557, -6.5562, -2.9429, -2.8980, -4.5320,
        -3.7687,  0.8629, -2.5887, -2.9760, -1.6778, -3.8622, -1.8013,  1.4047,
        -5.4580, -1.9554, -3.0336, -0.8033], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7836, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.7969,  -4.0941,   2.1884,  -3.6036,  -4.5584,  -2.0016,  -3.6107,
         -6.0685,  -5.1485,   0.3975,  -6.1646,  -5.6954,  -3.9780, -15.2302,
         -2.5229,  -3.6442,  -1.9872,  -2.0190, -20.1962,  -1.8593],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5297, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.7911,  -2.6871,   1.1293,  -3.6531,  -1.0451, -10.7174,  -5.3774,
         -9.3356,  -6.0303,  -6.6104,  -1.0335,  -8.4857,  -1.7521,  -5.7403,
         -2.2041,  -2.4864,  -0.4105,  -5.4566,  -0.8650,  -0.1084],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9330, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.2422,  -6.4576,  -1.5181,  -4.2190,  -0.4926,  -0.9917,  -1.7227,
         -3.1593,  -2.1228,  -2.4832,   1.4464,  -2.6902,  -0.6044, -23.7594,
         -6.6935,  -6.8123,  -7.5788,  -1.8692,  -3.3969,   1.6832],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9842, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9608,  0.4923, -2.8229, -1.6065, -1.2870, -1.3807, -2.8182,  2.3269,
        -4.9339, -0.4004, -1.3770, -2.8148, -2.4614,  1.0692, -1.4772, -1.2887,
        -1.6308, -1.9733, -1.9264,  1.7995], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3736, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.2648,   0.0693,  -2.9508,  -1.1839,  -2.7140,  -6.3369,  -2.5557,
         -1.8602,  -3.6212,  -3.4665, -11.4983,  -5.0439,  -7.2347,  -5.3669,
         -6.2800,  -2.6611,  -4.7633,   0.7998,  -7.4294,  -2.9288],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9646, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.8591,  -1.6930,  -1.8189,  -2.2698,  -4.2675,  -1.7583,   2.1659,
         -1.6850,  -2.9758, -22.9328,  -1.5157,  -7.9010,  -2.7864,  -2.5320,
         -0.3663,  -5.1487,  -2.5713,  -2.4907,  -6.2809,  -2.9998],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6844, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9297, -4.6615, -5.3525, -9.4129, -4.7949, -7.8410, -1.6214, -6.2665,
        -2.6679, -0.1352, -3.1914, -2.5869, -4.2757, -5.3735, -2.2008, -3.8778,
        -5.5743, -2.8347, -1.4693, -7.2432], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3156, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.1373,   0.0541,  -5.9734,  -3.8350,  -1.5969,  -2.5309,  -4.1867,
         -1.8278,  -0.7691,  -4.7907,  -1.3715,  -4.9193,  -2.3310,  -3.2890,
         -4.6584,   0.3325,  -4.7540,  -1.9550,  -2.5983,  -3.6441],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2391, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2454, -11.7603,  -6.0524,  -0.3863,  -7.1702,  -4.7017, -12.4059,
         -8.9190,  -5.6236,  -6.5278,  -3.1125,  -4.1249,   0.7991,  -9.3277,
         -5.3583,  -3.7361,  -3.3446,  -3.0269,  -3.6861,   0.6686],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0521, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.9218, -17.3904,  -0.7494,  -2.4140,  -6.3415,  -2.3622,  -3.3958,
         -4.0664,  -5.2550, -13.1307, -22.3180,  -8.3081, -10.6065,  -6.9387,
         -7.6794,  -2.7036, -12.4151,  -0.1899,  -4.3593,  -4.1408],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.6922, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4026, -2.8207, -1.8784, -4.8688, -1.9202, -0.0513, -1.7873, -2.1363,
        -3.3287, -2.2338, -4.2679,  0.6148, -1.2159, -1.9809, -3.9931, -6.2110,
        -1.3471,  1.6317, -5.2775, -4.3608], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5418, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.6533,  -1.9360,  -2.3701,  -1.7850,  -3.0906,   0.0827,  -8.3115,
         -7.1240,  -0.8708,  -1.4116,  -6.4393,  -1.1178,   0.3334,  -5.1327,
         -1.9218, -10.8190,  -7.6517, -16.9226,  -0.8675,  -4.1541],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1582, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.9278,  -2.6538,   0.9959,  -8.9630,  -3.3933, -18.4377,  -8.5808,
         -9.1970,  -0.1003,  -9.1197,  -1.8233,  -1.9027,  -3.3562,  -1.1643,
         -1.8750,  -1.7892,  -7.2634,  -3.8009,  -1.3944,  -1.6147],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3681, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8724, -1.4402, -4.2015,  1.6351, -2.6693, -2.5051, -2.2610, -2.5634,
        -4.8526, -2.0862, -1.2893, -3.9314, -2.1370, -2.8911, -3.3365, -2.8872,
         1.2991, -5.9011, -2.1264, -3.6671], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5842, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.4242,  -2.6368,  -1.3580,  -2.0695,  -3.0470,  -1.9664,   1.9172,
         -3.9901,  -1.9448,  -3.4726,  -3.2945,  -5.9833,   1.2243,  -3.1500,
         -2.1228, -15.0972,  -7.3368,  -2.5515,  -6.1029,  -2.2413],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4324, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9825, -6.9463, -2.1197, -9.5229, -6.4134, -4.7121, -6.5456, -1.6033,
        -5.5024,  0.7144, -6.4412, -1.6882, -3.9092, -3.5259, -4.0873,  0.5435,
        -5.0019, -2.1708, -4.0161, -4.8108], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9871, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.6421,  -3.1521,  -2.0105,  -2.3738,  -2.8875,   0.9118,  -2.4588,
         -1.8889,  -7.8331,  -5.8062,  -5.9252,  -5.2980,  -2.3645,  -1.9164,
        -23.0917,  -6.3872,  -1.7286,  -2.4674,  -1.4199,  -4.9309],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2836, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9515, -1.4449, -0.4566, -1.6313, -1.1016, -2.6545, -2.9133, -2.3394,
         2.1367, -3.5630, -1.1594, -4.8891, -0.3921, -6.6647, -2.8439, -1.8512,
        -2.4814, -3.6516, -3.1250, -1.7973], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4388, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.4749,  -3.9186,   1.1002,  -2.4215,  -3.3618, -19.9661,  -7.7553,
         -2.0552,  -7.3920,  -3.8446,  -2.9226,   1.5991,  -6.6868,  -1.7253,
         -4.3486,  -2.0693,  -4.9616,  -1.7480,  -3.6839,  -2.4643],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9551, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.5414,  -2.1064,   0.1534,  -3.7749,  -1.3143,  -5.2348,  -1.9462,
         -5.7128,  -1.1496,  -1.3167,  -4.3810,  -2.7672,  -4.5389,  -1.9953,
         -4.3541,   1.8553,  -2.3248,  -2.7255, -10.6468,  -5.7535],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2288, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7807, -2.6075, -0.2644, -2.4553, -1.8450, -2.0300, -3.4813, -0.7842,
         1.2195, -3.8692, -3.3682, -3.4983, -0.7428, -6.0860, -0.5331, -0.3442,
        -2.9490, -0.2277, -3.9189, -1.6585], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2612, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.3660, -24.6758,  -5.4681,  -8.0895,  -5.6775,  -1.9443,   0.2365,
         -8.9798,  -2.8016,  -3.0872,  -7.2070,  -5.4557,  -0.5805,  -3.8613,
         -0.9150,  -2.8212,  -0.8641,  -5.0572,  -0.8604,  -0.9300],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6703, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.1466,  -3.7887, -18.2006,  -6.4539,  -3.5955,  -1.8514,  -3.1713,
         -2.8878,   1.4341,  -3.0667,  -1.3228,  -1.5461,  -4.2272,  -2.1535,
          2.4974,  -2.4146,  -1.6577,  -4.0249,  -0.2426,  -7.1452],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4983, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1726, -2.0937,  0.2692, -1.2389, -1.4718, -2.5270, -3.1938, -1.7705,
         1.9709, -5.1264, -0.9515, -1.7458, -1.2266, -3.5799,  2.0134, -0.6278,
        -2.3213, -3.9528, -0.4031, -5.8068], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9478, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6729, -0.7794, -2.9400, -1.9443, -5.3903, -4.7344,  0.7863, -3.9281,
        -1.0963, -4.0814, -0.6650, -3.6882,  0.7100, -2.4255, -6.5246, -1.6899,
        -0.9827, -1.8243, -2.4597, -0.1058], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3218, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5620, -10.7975,  -7.2626,   0.0827, -16.7304,  -2.5080, -11.4487,
         -8.0070,  -7.7277,  -1.0827,  -2.0253,   0.8858,  -4.6333,  -1.7014,
         -1.6848,  -2.5771,  -6.0207,  -0.8126,  -1.7011,  -3.0015],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5658, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.5124,  -6.1400,  -5.3949,  -7.3670,  -7.1160,  -4.4579, -11.9191,
         -4.3087,  -3.3716,  -5.0331,  -5.5216,  -3.9201,  -8.1105,  -7.1926,
         -2.7031,  -4.0246,  -3.7582,  -3.2138,  -5.2077, -15.1017],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0187, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6427, -2.0495, -3.6261, -3.8047, -0.5271, -2.9170, -1.4654, -1.1085,
        -2.5091, -5.5383, -3.6930,  0.6208, -2.8817, -1.4072, -3.3348, -2.5400,
        -3.6364,  1.3438, -3.4166, -0.9829], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3058, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-24.4998,  -1.7329,  -2.6981,  -8.1656,  -4.4378, -24.4834,  -3.6132,
         -0.7007,  -1.7107,  -3.6462,  -4.5154,  -1.4604,  -0.4812,  -2.7813,
         -3.4490,  -3.0584,  -1.7281,   2.5783,  -5.0785,  -2.4209],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9042, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7581,   1.5318, -16.2008,  -1.9791,  -4.4209,  -7.7636,  -2.9580,
         -2.1679,  -4.4152,  -3.4888, -36.1859,  -5.2587, -11.0176,  -5.9024,
         -6.1242,  -2.0427,  -4.3061,  -1.8158,   0.6012,  -2.7242],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9699, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0263,  -1.6302,   0.2762,  -1.6578,  -1.5349, -16.3425,  -3.7380,
         -7.8795,  -2.5882,  -8.5191,  -2.3883,   0.1148,  -5.3517,  -1.6232,
         -2.1395,  -0.9559,  -5.3976,  -2.5817,  -0.9798,  -5.4775],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7210, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.7540, -1.6853, -0.1336, -3.3323, -1.6156, -3.8186, -2.6860, -4.4974,
        -1.0400, -0.8901, -3.0930, -2.4676, -3.8543, -0.8823, -8.9391, -0.7236,
        -0.7707, -4.2523, -3.2230, -2.8045], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5732, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3117,   0.3511,  -4.8414,  -2.7399,  -0.6159,  -4.2311,  -2.2379,
          1.7752,  -2.8642,  -1.6760, -13.2044,  -5.8342,  -6.0764,  -7.7494,
         -1.8462,  -2.7077,  -0.3963,  -3.1439,  -0.9233,  -5.1873],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3230, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6109, -3.1005, -5.4881, -4.5757,  0.3425, -2.7966, -1.5342, -1.7965,
        -4.1880, -1.9363,  0.7818, -3.5562, -0.6864, -3.6759, -6.1942, -2.2054,
         1.5802, -4.4718, -1.9013, -0.4597], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5737, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8983, -6.0842, -5.2418, -2.6070, -2.6111,  0.4574, -4.7768, -2.6088,
        -1.6298, -1.7210, -1.0003,  1.9406, -2.2981, -2.3852, -2.1971, -1.6465,
        -1.5005,  1.9085, -4.1831, -1.9450], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2014, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9218, -2.2114, -2.1159, -4.2331, -5.7197, -2.4337,  1.5717, -3.0978,
        -0.8502, -2.5411, -2.3495, -3.4029,  1.5062, -5.4640, -1.1120, -2.6743,
        -3.5891, -0.9303,  1.5747, -3.8643], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1429, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.5757,  0.9456, -6.1508, -0.9599, -2.6743, -3.6140, -0.6248, -5.4492,
        -2.0768, -0.8406, -1.3772, -0.6908, -5.6913, -0.5613, -2.9847, -3.2807,
        -2.0407, -1.1766, -5.8628, -2.0895], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3888, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1071, -5.1234, -2.5812,  1.5453, -5.7502, -2.1400, -3.4325, -1.4031,
        -7.1219, -1.5950, -0.6394, -2.5032, -1.4827, -1.7199, -4.3979, -1.6093,
         2.2386, -1.8599, -1.5729, -7.0575], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6657, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2608, -1.2637, -1.6063, -3.2654,  1.3579, -4.6945, -2.5065, -2.1696,
        -4.2815, -3.2219,  0.5148, -2.1942, -2.3144, -2.4990, -3.5975, -1.2304,
         0.5025, -2.4525, -2.4923, -3.3829], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1029, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7788, -2.4446, -6.4090, -5.0568, -3.9853, -1.4919, -1.2698, -1.3032,
        -3.3710, -3.4445, -2.2545,  0.4055, -2.7384, -2.0315, -4.3298, -1.4145,
        -4.2195, -1.7448, -4.8033, -2.4511], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9068, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4156, -1.5515, -1.2858, -3.1215,  0.4841, -3.3556, -1.7693, -2.6142,
        -2.8300, -2.7217,  0.3758, -6.2754, -1.5151, -6.0601, -2.1690, -4.5863,
        -3.8101, -1.0988, -1.7232, -1.3951], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4219, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.6734,  -6.5593,  -6.5448,  -0.9015,  -6.2134,  -3.1355,  -0.7239,
        -15.7993,  -6.0865, -13.2238,  -5.4959, -31.5722,  -4.9030,  -6.1595,
         -8.1736, -11.4160,  -6.5831,  -7.7935,  -3.3802,  -2.8656],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.5602, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.1695,  -0.7250,  -2.7289,  -1.1528,  -5.5945,  -3.2147,  -1.4342,
         -3.5246,  -1.8090,  -2.0029,  -1.9157,  -3.7569,   1.9648,  -2.3403,
         -1.1792, -11.8062,  -7.6485,  -3.8275,  -5.0002,  -2.6368],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3251, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.5920, -7.3419, -0.9322, -1.9333, -5.1428, -1.3707, -2.4240, -3.3439,
        -3.2821,  0.6371, -4.0990, -1.5170, -2.5877, -1.4878, -5.7681, -1.6495,
         0.1963, -7.7793, -2.1780, -1.7835], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7190, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.0581,  -3.4569,  -6.9963,  -1.4171,  -3.3531,  -0.8128,  -7.4601,
         -3.3648,  -1.9812,  -5.3366,  -2.3267,  -6.1549,  -6.4972,  -4.1822,
        -23.0348,  -5.8122,  -7.1168,  -0.1698,  -4.9873,  -0.8928],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0706, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9380, -2.7957, -4.5327, -7.6006, -2.2534, -0.2257, -2.2915, -2.4908,
        -2.7590, -4.6083, -2.6608, -0.1840, -4.0066, -4.9385, -1.8691, -3.5241,
        -6.2864, -2.6348, -0.0844, -3.2757], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9980, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4687, -3.3734, -2.0693, -1.1197, -1.1459,  2.7213, -3.1565, -7.1218,
        -2.8882, -4.7027, -6.8752,  1.1835, -1.1107, -4.3203, -1.3026, -7.0157,
        -1.4965, -0.8493, -2.5501, -1.5230], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5592, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3601, -3.5174, -1.6773,  1.4813, -2.9445, -2.0664, -1.6942, -1.4557,
        -5.9386, -0.4203,  0.4946, -3.7214, -0.6962, -2.7112, -4.9456, -1.5527,
         1.5035, -2.4315, -4.0409, -4.7696], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2232, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.6180,  -3.4044,  -2.4432,  -2.6989,   0.2042,  -2.0621,  -3.8728,
        -15.5921,  -5.3989,  -5.3880,  -5.9400,  -6.1410,  -2.1508,  -4.4132,
         -0.9918,  -3.3122,  -2.0191,  -2.8042,  -5.5947,  -2.3008],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9471, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6262, -1.8839, -3.1653, -1.3365, -5.1488, -3.3548, -2.0135,  1.3304,
        -2.2308, -0.8008, -5.8018, -3.4345, -4.2987, -2.6219,  1.2918, -2.4489,
        -2.5807, -4.1167, -3.6323, -2.7690], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6321, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5867, -3.1802,  0.0762, -3.9259, -0.7192, -4.7806, -2.8231, -1.1910,
        -2.3130, -4.1011,  2.6492, -7.3127, -1.1734, -2.4822,  0.4244, -5.9621,
        -1.9473, -0.4239, -3.4332, -2.8528], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3529, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.5418, -22.4026,  -2.2060,  -1.0419,  -5.9851,  -2.6646,  -5.5219,
         -4.0449,  -4.3640, -12.9892,  -3.9838,  -7.4324,  -1.4766,  -3.6220,
         -0.4181, -12.2826,  -2.1630,  -3.7997,  -5.5549,  -4.1735],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3834, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.6034, -3.5456, -0.7376, -1.7816, -5.1756, -1.1166, -1.7934, -2.4959,
        -1.4975, -2.7818, -1.6567, -1.4258,  2.4574, -3.7812, -2.3594, -1.8758,
        -1.5293, -2.3286,  1.1023, -4.7237], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2325, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.2282,  -4.0270,  -3.6812,   0.8900,  -3.7144,  -5.9003,  -2.9764,
         -3.3464,  -3.5770,   0.9312,  -3.8627,  -1.6717,  -1.4169,  -6.6082,
         -2.0666,   1.3803,  -2.7313,  -4.6115, -17.1099,  -5.2788],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5804, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3728,   1.2296,  -1.9214,  -6.1888, -20.8788,  -6.1564,  -9.6514,
         -0.8308,  -4.5496,   0.8128,  -5.7572,  -5.7216,  -2.8280,  -4.1672,
         -3.5923,  -5.6261,   0.2490,  -5.1316,  -1.9898,  -2.7899],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3931, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0434, -3.0593, -0.8759, -3.1657, -3.3930, -1.8227,  1.6174, -3.0105,
        -1.2392, -2.5552, -1.8216, -5.3531, -5.9476, -0.1467, -2.6676, -0.5322,
        -3.3442, -3.5061, -1.5027,  2.7996], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0785, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.5638,  -1.6732,   0.9932,  -3.8988,  -2.6837, -17.2030,  -7.1027,
         -6.6848,  -6.7222,  -1.4123,  -1.7828,   1.2459,  -6.3859,  -4.9497,
         -3.4441,  -4.2640,  -5.8735,  -4.5976,  -3.7634,  -5.1512],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5459, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8975, -0.8806, -4.6451, -0.8095, -1.5214, -0.8908, -5.8114, -0.8235,
        -2.4377, -4.9703, -3.0106, -2.6939, -1.0567, -6.7073, -2.2331, -0.3676,
        -2.4246, -1.9083, -5.9578, -9.1422], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9595, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.5975,  -5.2043,  -1.2082,  -3.2471,  -4.4086,  -2.3658,   0.2975,
         -3.5566,  -1.6602,  -3.3920,  -1.7393,  -7.0790,  -4.3691,  -2.1843,
         -5.8164,  -3.7866, -10.9525,  -3.3419,  -5.4963,  -3.3110],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6710, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.9021, -2.7305, -0.9054, -3.1269, -0.5706, -8.8863, -1.9855, -0.9362,
        -1.6369, -1.3137, -0.5164, -3.5949, -2.1985, -1.3111, -6.9364, -1.7996,
        -1.1969, -2.9311, -6.7055, -3.0574], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5719, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.4330, -3.0930, -2.3861, -5.0674, -2.7474, -7.0117, -2.8580, -2.6933,
        -2.3132, -2.5499, -1.2904, -2.7997, -2.0146,  1.8925, -4.6764, -2.0776,
        -1.7610, -4.8346, -3.2883,  1.6643], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4236, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.9069,  -1.4059,  -5.9065,  -9.8530,  -4.7568,  -6.8901,  -2.0245,
         -2.6440,   1.7043,  -6.8947,  -3.7073,  -0.7477,  -5.4186,  -3.8176,
         -2.2190,  -3.0556,  -2.6933, -23.9013,  -6.2577, -10.1713],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2784, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.0870,  -2.0164,   0.6309,  -4.0697,  -1.2790,  -1.9214,  -2.4725,
         -1.7219,   2.0934,  -0.7626,  -2.2647,  -1.2637,  -1.3345,  -4.3143,
          1.5832, -13.6021,  -3.6660,  -4.7159,  -6.0323,  -7.0395],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0128, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8272, -5.8722, -2.4541,  2.0091, -5.8666, -3.8463, -3.0855, -1.0969,
        -5.2566, -2.3657, -0.4255, -1.4623, -2.3055, -0.9797, -2.3769, -2.9377,
         2.3975, -3.2500, -5.9761, -1.8241], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4901, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.1928, -14.6264,  -4.5697,  -5.2973,  -7.0852,  -2.1551,  -2.7052,
         -6.0282,  -2.6353,  -2.5926,  -1.2182,  -4.5526,  -2.2304,   1.0296,
         -2.4253,  -2.0745,  -2.8597,  -1.9689,  -7.9428,  -3.3443],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0238, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.8555,  -1.3667,  -2.2274,  -7.0078,  -3.2951,  -1.0617,  -4.9742,
         -0.4169,  -8.8193,  -6.1238, -13.9726,  -8.2962,  -3.4065,  -3.6780,
          1.0017,  -4.3579,  -2.1306,  -4.8129,  -6.7741,  -4.1013],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3338, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2470, -0.2316, -1.9482, -4.4128, -3.0579,  1.5400, -1.4186, -1.6217,
        -1.4096, -2.6596, -4.2774, -1.1959,  0.4273, -3.1050, -1.3886, -4.3519,
        -2.1121, -5.0090, -3.1068, -1.6938], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2640, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2524, -1.4298, -3.9430, -3.5137, -3.0420, -0.5997, -5.1786, -2.1453,
         0.7288, -2.2440, -2.6124, -2.8262, -0.5730, -5.6858, -0.6729, -0.0197,
        -5.4189, -0.4905, -1.9913, -1.3064], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3108, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3008, -6.7422, -1.7302, -5.4783, -6.8383, -2.9322, -3.8370, -1.0214,
        -4.5356, -1.1635, -0.6003, -4.2332, -1.3274, -3.2421, -2.5497, -1.2567,
         2.2920, -4.4178, -2.3482, -3.8878], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9075, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7519, -0.9770, -2.7559, -4.2637,  1.9230, -3.6474, -4.6251, -3.0521,
        -4.2158, -1.6186,  2.2693, -1.3044, -2.1687, -3.0570, -2.0621, -5.0204,
        -0.1845, -0.7563, -2.6676, -1.6409], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0789, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.2435, -0.9827, -1.7198, -2.1036, -1.2289, -5.4587, -3.6843, -6.9508,
        -0.8422, -2.4354, -4.8347, -2.1407, -4.2511, -1.4768, -5.2890, -2.3728,
        -3.7052, -2.9601, -0.4881, -2.1464], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1657, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.0161,  -5.7004,  -2.8207,   0.4134,  -3.7876,  -4.5475, -18.1976,
         -5.1875,  -7.3437,  -6.7152,  -6.6558,  -1.8250,   0.8002,  -7.4067,
         -2.5620,  -2.8650,  -3.7222,  -1.4861,   1.4342,  -5.8236],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3008, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.7640,  -4.5046,  -1.5080,  -1.4519,  -5.1667,  -1.8560,  -2.0789,
         -4.9958,  -2.9803, -11.0831,  -7.3703,  -4.7605,  -6.0947,  -1.5370,
         -5.5008,   1.1219,  -5.6852,  -2.0564,  -4.1915,  -1.6009],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7532, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.8395,  -5.7020,  -5.4095,  -6.4730,  -1.6350,  -4.1866,   1.5870,
         -2.4478,  -1.1141,  -1.7460,  -5.9508,  -1.7018,  -2.1224,  -6.1345,
         -3.1836, -28.4612,  -6.5589,  -8.3473,  -6.7331,  -5.1055],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6633, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1936, -3.3458, -1.4537, -1.7529,  0.1093, -7.0152, -1.9046, -2.5337,
        -6.7801, -1.6300, -2.0424, -5.3906, -2.3413,  0.3424, -3.2817, -1.4864,
        -9.1820, -4.9291, -9.1495, -1.3146], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3638, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.1280,  -3.6789,  -2.4353, -18.1531,  -0.8574,   1.8046,  -2.8212,
         -3.0376, -16.7261,  -4.8476, -15.2689,  -2.5620,  -5.3485,   2.4922,
        -10.0547,  -5.7071,  -7.1347, -10.9758,  -1.5613,  -9.4297],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2716, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3431,  2.1910, -3.6252, -1.1474, -2.3497, -1.1626, -5.0293, -3.4991,
        -0.5334, -3.6135, -2.5247, -3.3436, -0.9165, -6.9811, -1.9107, -0.7735,
        -2.6513, -0.8622, -2.8388, -1.3912], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3153, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4664, -1.9050, -2.2016, -2.1365, -3.9771,  1.1367, -4.9469, -3.6578,
        -2.7460, -1.8700, -5.8515, -1.5600, -0.2642, -1.7006, -0.9091, -1.6435,
        -5.8518, -1.2879,  1.7348, -2.9499], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3527, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5657, -2.5073, -4.3485, -6.7658, -3.1129, -0.2427, -2.7364, -1.6264,
        -0.8190, -2.0018, -4.2857,  1.8209, -6.5834, -3.1535, -2.8075, -0.7654,
        -5.3715, -2.6222, -2.1474, -6.9018], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0272, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.3578,  -4.8398,  -0.5435,   2.0340,  -2.8210,  -0.9637,  -2.1714,
         -2.9329,  -1.6102,   1.7897,  -2.2298,  -1.7789,  -4.4008,  -1.2460,
         -5.4290,  -4.1001,  -3.1791,  -1.7973, -10.0495,  -4.7300],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6179, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.5577,  -5.7976,  -7.0003,  -3.4687,  -1.5374,  -5.0776,  -3.0514,
         -3.3589,  -1.9635,  -2.1842,   2.1338,  -2.6118,  -3.5044, -11.9028,
        -10.7654,  -3.4937,  -6.7171,  -1.2299,  -2.2321,   1.9822],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0169, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4625, -5.1506, -3.5628, -0.7753, -5.0272, -3.8955, -2.7633, -2.7369,
        -3.7341,  0.4029, -2.6640, -3.4443, -4.8904, -3.2211, -3.7434, -2.4269,
        -0.5209, -2.7163, -3.7813, -3.1488], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0631, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6356e+00, -1.5314e+00, -8.8896e-01, -5.0937e+00, -1.3689e+00,
         1.2353e-01, -1.7203e+00, -3.8233e+00, -3.3861e-01, -2.3752e+00,
        -7.6566e+00, -9.0663e-01, -5.8664e-01, -2.7316e+00, -1.0358e+00,
        -1.4736e+00, -5.8114e+00, -9.2077e-01,  5.1069e-03, -3.3332e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2552, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1164, -2.5773, -0.6728,  0.1026, -4.2285, -2.0764, -3.1634, -2.1812,
        -3.3683, -2.5415, -4.6243, -3.8093, -0.6550, -3.8856, -5.9812, -2.6323,
        -5.5955, -1.5484,  2.4158, -3.0559], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6597, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5713, -5.2838, -1.9047,  1.0516, -4.5965, -1.9166, -2.3006, -2.3740,
        -4.9151,  0.9691, -6.5268, -1.4568, -1.6426, -0.1841, -5.7699, -0.0079,
        -3.3233, -3.8783, -1.8416, -2.4763], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6475, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8970,  2.7755, -2.3683, -0.9887, -2.5056, -1.9736, -6.6636, -1.7700,
         0.0536, -3.1279, -1.4099, -3.8829, -3.3304, -1.4331,  1.0177, -3.8590,
        -2.5331, -4.6515, -0.9685, -5.1053], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2311, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1742, -4.9599, -1.8919,  1.2352, -2.9997, -1.9959, -1.9400, -5.5715,
        -2.1604,  1.3057, -1.7973, -0.7361, -1.8421, -1.4047, -8.9598, -2.2510,
        -1.3495, -3.3087, -1.2720, -3.2619], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6168, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.2987,  -6.0125,  -7.9321,  -0.7414,  -9.1630,  -4.0595, -10.9997,
         -3.3778,  -4.0565,  -2.1765,  -4.4900,  -0.7592,  -0.6722,  -8.9189,
         -2.8493,  -4.4408,  -5.4861,  -5.2547,   1.2495,  -3.7743],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5107, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2974, -0.5577, -1.3863, -3.4430, -2.1265, -8.1139, -7.7467, -0.5985,
        -6.6766, -1.9781, -9.3585, -6.2992, -4.5409, -6.0849, -1.9508, -3.4129,
         0.0242, -1.4513, -1.5200, -2.8760], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8197, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9549,  -1.8653, -26.8274,  -7.5655,  -3.7665,  -5.8555,  -1.1858,
         -2.4669,  -0.3083,  -4.8497,  -2.6399,  -1.2741,  -0.3771,  -3.9341,
          2.2190,  -3.3655,  -2.7542,  -2.3617,  -1.5955,  -5.2998],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9514, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8140, -2.2467, -7.4547, -2.5416, -0.9975, -2.0849, -0.0544, -1.0978,
        -0.6549, -2.3113,  1.8401, -3.2277, -3.3167, -3.2191, -1.6378, -4.9540,
        -3.9230, -1.3205, -5.1316, -3.0102], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4579, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.6405,  -1.4011, -28.0022,  -2.6677,  -3.1208,  -6.7776,  -3.5121,
         -3.7286,  -8.6213,  -3.9215, -20.9551,  -5.6644,  -4.7039,  -9.7242,
         -5.0238,  -6.2605,  -1.5142,  -4.7720,   0.8104,  -5.1251],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.6163, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4012, -2.1423,  0.3479, -2.7203, -1.6980, -2.7281, -2.6033, -2.3040,
         2.3304, -2.4875, -1.3042, -3.4467, -1.9054, -4.2389, -0.7546, -0.0315,
        -3.3082, -0.6909, -2.4772, -5.0925], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1328, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.7974,  -6.4371,  -2.8008,   0.9572,  -3.9516,  -4.2492, -17.3683,
         -6.3079,  -4.0714,  -6.7533,  -1.0588,  -3.8843,   2.1457,  -2.4567,
         -1.3363,  -4.9850,  -2.9318,  -4.1913,  -1.3462,   1.0462],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7389, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6717, -2.9238, -2.3290, -2.7878, -2.2350, -5.3804, -1.9444, -0.1947,
        -3.9567, -1.9394, -1.5440, -0.7109, -5.3566, -2.0903, -1.7280, -5.0485,
        -1.8977, -4.8399, -0.6228, -5.0685], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7135, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7254,  1.3174, -2.2811, -3.0986, -1.4233, -3.2587, -0.8730,  2.0850,
        -2.4860, -2.0440, -2.6015, -3.0144, -2.3480,  1.4618, -2.1368, -3.9340,
        -3.8038, -1.3369, -6.4304, -1.8590], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0395, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.0765,  -2.9331,  -4.4094,  -3.6803,  -1.9094,  -2.6749,  -2.0427,
         -2.1480,  -3.1355,  -1.5981,  -0.6163,  -2.0488,  -2.1626, -12.2763,
         -4.7994,  -7.1432,  -0.7042,  -5.0754,   0.5737,  -4.6790],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3270, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0543,  -4.9261,  -2.5080,  -1.8688,  -3.7273,  -1.2459,   0.1812,
         -2.8339,  -4.2255, -14.7322,  -5.9216,  -3.5739,  -6.4773,  -1.3001,
         -4.3479,   1.8022,  -5.5193,  -6.8228,  -0.5999,  -3.8512],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8276, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3635, -4.4056, -6.0374, -5.2325, -2.3292, -3.6215, -4.0876, -6.5422,
        -3.8718, -5.7548, -2.9563, -7.2201, -1.4384, -2.9614, -3.9456, -2.4306,
        -5.6887, -3.5865, -5.0443,  0.0140], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1252, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.8579,  -3.9075,  -1.1910,  -3.5842,  -4.1330,  -4.7161,  -3.4088,
          0.0502, -13.8424,  -1.6620,  -2.2984,  -4.5863,  -1.8213,  -0.9839,
        -17.0751, -12.8695,  -5.8920,  -5.2178, -11.8635,  -0.9288],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9037, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.7428,  -6.7526,  -4.3109,  -8.6162,  -2.9910,  -6.1344,  -0.5009,
        -13.3716,  -4.9673,  -3.9912,  -6.7628,  -3.7121,  -0.5320,  -4.2531,
         -3.6099,  -9.2176,  -6.3238,  -2.8470,  -7.8055,  -2.6401],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3541, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.2189,  -2.5568,  -0.5085,  -3.4740,  -1.2152,  -2.7990,  -3.0760,
         -3.4130,  -8.7341,  -2.5372,  -2.1665,  -0.9949,  -5.7179,  -1.2047,
          1.0319,  -2.8913,  -3.8778, -24.4166,  -4.5193,  -8.0421],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3166, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.7091,  -5.8987,  -0.6600,  -2.0834,  -3.5454,  -0.6447,  -3.0269,
         -3.1678,  -1.2620,   1.5725,  -2.6948,  -1.1928,  -2.7369,  -5.6873,
         -1.6758,   1.2999, -11.8382,  -3.7129,  -3.2161,  -3.5951],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7738, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.4881,  -2.1464,  -1.7555,  -5.8195,  -1.2428,   0.2217,  -1.8852,
         -1.8320,  -2.1674,  -5.1101,  -2.7662,  -0.9637,  -3.7869,  -1.8711,
        -18.7720,  -2.0753,  -8.1905,  -0.4547,  -2.6079,   1.2686],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1223, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1259, -2.6768, -4.7634, -1.1265, -5.5913, -2.6861,  1.2338, -2.5377,
        -1.5817, -2.4735, -1.4056, -5.1353, -2.3665,  0.6470, -1.2734, -2.4630,
        -0.9142, -4.1832, -1.3285,  1.2532], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1749, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.3604,  -7.2499,  -3.0564,  -3.5087,   1.5121,  -3.7659,  -2.6775,
         -1.2667,  -1.0273,  -7.9115,  -2.7793,  -0.2804,  -3.6460,  -1.8455,
         -1.4370,  -4.4263,  -0.6553,   1.2475,  -2.2263,  -0.1664],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8264, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1411, -1.7191, -3.6448, -3.1793, -4.8574, -2.9740, -8.1403,  0.4105,
        -4.2141, -3.6850, -2.5273, -4.1091, -4.2672, -1.4281, -7.3982, -2.9844,
        -3.7188, -2.9025, -6.3397, -4.0886], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7454, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.8174,  -0.7457,  -6.7033,  -0.6478,  -2.5190,  -3.0136,  -0.2454,
         -2.4941,  -2.8829,  -1.7862,   1.3472,  -1.4722,  -1.2763,  -4.0736,
         -1.2116,  -6.8989,   1.3513,  -4.3103,  -0.6842, -33.8022],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7443, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.1028,  -9.7339, -15.3720, -20.6205,  -3.0932, -12.9178,  -5.8270,
         -8.0515,  -2.7377,  -3.1900,  -2.8471,  -3.7601,  -3.1063,  -3.5364,
         -5.2094,  -3.3086,  -0.1447,  -2.8024,  -2.8018,  -0.8774],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7520, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.0438, -29.5508,  -6.0442,  -8.0924,  -4.2080,  -5.7266,  -4.7146,
         -3.8630,  -3.4194,  -4.5613,  -3.7839,  -2.5968,  -6.8230,  -3.7142,
         -1.2272,  -5.8606,  -3.6203, -12.3989,  -7.8701,  -9.4059],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.6262, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.5834,  -2.1211,  -0.8283,  -3.5700,   2.6927,  -6.0164,  -1.4933,
         -2.9259,  -2.2365,  -2.7084,   2.4178,  -1.2271,  -5.1800, -20.6493,
         -8.2708,  -2.5491,  -6.2198,  -1.9546,  -3.1341,   1.2743],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3142, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.8602,  -1.4799,  -3.6622,  -2.3982,  -1.6458, -12.3842,  -0.8208,
         -0.5253,  -6.3822,  -5.5745,  -4.2638,  -1.0241,  -3.7380,   0.6474,
         -3.6218,  -3.2465,  -4.3966, -17.4442,  -6.2558, -22.1611],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1119, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0726, -4.2241, -3.0686, -2.4885, -3.3915, -1.9246,  1.0571, -1.9555,
        -0.9966, -1.0304, -3.1079, -1.4601,  0.4588, -5.0220, -4.1478, -2.4697,
        -3.5440, -3.2734, -2.3181,  1.0108], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1484, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0696, -2.3216, -1.1655, -5.4518, -1.1261, -2.7371, -1.5175, -1.7830,
        -3.3404, -1.5559, -5.2458, -1.4007,  0.3932, -1.8238, -0.6810, -2.9315,
        -3.6127, -2.4274,  2.3716, -6.1901], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2308, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0990, -1.8823,  0.8022, -3.3505, -2.2217, -2.0224, -3.2006, -4.5266,
        -3.2722, -0.5119, -1.8053, -0.6883, -0.7514, -3.1018, -1.0771,  2.4736,
        -4.6656, -1.4732, -3.1376, -0.6300], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0571, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.1690,   0.5796, -12.1304,  -3.1396,  -1.8027,  -5.4757,  -1.8414,
          1.3136,  -3.1714,  -4.9481, -15.3374,  -7.6742,  -8.3584,  -6.5101,
         -8.5191,  -6.4530,  -1.7251,  -3.1166,   0.9868,  -3.8499],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8171, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2408, -4.2378,  0.0693, -2.5218, -1.4100, -1.8268, -3.9784, -1.9335,
         1.2410, -3.6764, -2.0438, -3.5255, -2.6768, -6.4750, -0.9792, -2.0997,
        -2.2229, -2.6702, -0.7843, -6.4803], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4736, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.8128,  -4.1744,  -3.1799,  -9.4192,  -5.9967,  -2.1424,  -8.8932,
         -3.3568,  -4.3185,   2.1276, -27.6653,  -2.7298,  -2.6670,  -5.5934,
         -2.4738,   0.4012,  -8.0086,  -4.1959,  -4.3698,  -1.1377],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7990, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2659, -3.0721, -1.2795, -4.9107, -1.1883,  0.1906, -3.2280, -1.4798,
        -2.3047, -2.8070, -4.5745, -2.6875, -0.2187, -1.2616, -6.4710, -3.2170,
        -0.2036, -1.4627,  1.1920, -2.8813], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2066, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1251,  -5.0322,  -2.3149,   1.4388,  -4.1989,  -3.8988, -17.1351,
         -6.3903,  -3.9348,  -6.6933,  -1.6619,  -2.5341,   2.5578,  -6.8373,
         -1.6213,  -3.0695,  -7.6842,  -1.8745,   2.4143,  -6.0726],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8834, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.6403,  -3.1677,  -1.2655, -14.0189,  -2.1835,  -9.0110,  -2.4117,
         -2.3551,  -3.5084,  -0.8023,  -2.6719,  -2.3554,  -0.7517,   2.0884,
         -2.6149,  -1.6473,  -1.2069,  -3.3590,  -2.7799,   1.3281],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5527, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3934, -0.8819, -4.0130, -1.0196,  2.0496, -2.3176, -0.4745, -2.4483,
        -1.7653, -1.6895, -4.0114, -4.1259, -3.4523, -3.4699, -1.0833, -7.1865,
        -1.1061, -0.6587, -3.3146, -0.5239], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1443, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.8392,  -1.5737,  -5.0129,  -2.2301,  -3.6891,  -3.0801,  -4.5862,
         -8.3554, -14.6830,  -3.1476,  -6.4857,  -1.3840,  -2.8249,  -0.6219,
         -3.0634,  -1.3103,  -2.2502,  -2.7004,  -4.6619,   2.1989],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6651, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3836, -2.1032, -3.8976, -3.6432,  1.9164, -2.9331, -1.3785, -2.1854,
        -2.4881, -8.8681, -0.9167, -0.9991, -3.9304, -0.4057, -3.1157, -4.4445,
        -0.8852,  0.7529, -2.8120, -0.9868], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2854, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7908, -6.6043, -1.1024, -0.7260, -2.1139, -0.5471, -2.7840, -5.0601,
        -2.1782,  0.1433, -3.8818, -0.7324, -1.9461, -1.0604, -1.4125,  2.5813,
        -2.8441, -1.4226, -3.3765, -0.8801], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8869, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.5906,  -2.0238,  -2.4675, -18.6430,  -5.7187,  -5.8855,  -9.8531,
        -24.6001,  -3.7204,  -5.5253,  -7.3632, -42.7865,  -4.8379,  -0.8672,
         -4.8710,  -6.4631,  -1.4979, -11.8069,  -3.4760,  -5.1324],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.8065, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3101,  -7.1852,  -5.2318,  -4.7002,  -6.6009,  -5.8087,  -5.0864,
         -1.6412,  -4.0156,  -4.6625,  -7.2638, -11.1849,  -4.7731,  -4.2657,
         -7.5035,  -5.5070,  -5.7603,  -6.5174,  -4.5933,  -4.4838],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5048, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.8149,  -0.8019,  -6.4207,  -4.9169,  -1.9366,  -3.1352,  -1.8297,
        -15.4649, -10.0758,  -4.4089,  -9.3019,  -5.4175,  -3.0832,  -5.6205,
         -2.2745,  -6.4083,  -0.7994,  -2.7687,  -3.7004,  -3.2807],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7230, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1275, -0.6570, -1.5781, -6.0954, -2.3897, -1.2462, -1.5331, -5.1434,
        -0.7033, -0.3865, -4.4668, -1.2884, -4.0449, -0.9263, -7.5718, -0.9844,
        -0.3642, -2.4597, -1.1427, -1.9675], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5538, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1024,  1.5055, -6.5055, -2.6453, -3.8864, -5.9922, -2.4932,  0.7452,
        -3.7491, -2.2195, -6.4524, -1.5121, -5.5402, -0.9549, -1.1387, -9.5449,
        -2.0850, -2.4939, -0.8797, -4.0916], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3518, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4192, -2.2754, -4.1317, -2.0937, -3.9860, -1.7272,  0.5132, -4.9556,
        -0.7911, -2.2707, -2.9308, -3.4295,  1.6165, -3.6507, -0.3144, -2.8094,
        -1.8329, -2.4684,  2.9286, -5.3619], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2695, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2815, -4.4118, -0.7922, -2.6659, -2.0280, -2.5029, -0.4951, -2.0945,
         2.5419, -4.6309, -2.4934, -1.9751, -1.2661, -5.2083, -0.7761, -2.0433,
        -2.3789, -2.0492, -3.1691, -1.7733], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3247, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2774, -3.9307, -1.4564, -1.0941, -1.8701, -1.3745, -3.8041, -1.4787,
        -5.8342, -2.6545, -0.3531, -5.6792, -2.1780, -2.5284, -0.8315, -4.1113,
        -1.2341,  1.7038, -2.3865, -1.0986], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2236, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9889, -1.5480,  0.6074, -5.1200, -1.1402, -2.8364, -4.8254, -2.2481,
         0.2157, -4.9781, -1.6254, -4.4210, -0.6643, -7.2149, -0.8620, -3.1336,
        -3.2216, -1.4402, -2.0719, -4.2973], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6407, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3199, -7.4760, -0.8858, -1.9479,  1.5337, -4.4578, -1.3292, -2.4491,
        -2.0583, -5.2576, -0.6807, -0.4800, -4.7937, -1.2128, -2.5662, -0.2042,
        -8.0032, -1.1405, -2.0624, -2.1244], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5958, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.5610,  -2.5918,  -3.3211,  -7.6808,  -3.2349,  -0.9809,  -4.3081,
         -6.6419, -14.6580, -10.1742,  -4.4041,  -7.1263,  -1.2044,  -4.7361,
         -1.7426,   1.1367,  -4.2104,  -2.5970,  -3.7608,  -4.6533],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7226, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4781,  1.2487, -2.2433, -1.3610, -1.7416, -2.2992, -2.0504,  2.7601,
        -3.2104, -3.2199, -3.1994, -3.1816, -6.7116, -1.9084, -0.5679, -3.8081,
        -0.4105, -0.9456, -3.0625, -0.7803], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9085, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.1919, -2.2430, -0.8129, -5.7651, -3.1233, -6.9594, -4.3611, -1.5912,
        -2.8512, -1.3981, -1.5388, -0.5216, -3.6734,  0.4726, -3.4386, -1.6954,
        -1.4407, -0.9337, -4.1386,  2.9273], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1447, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8213, -0.7468, -5.0205, -3.8206, -0.2581, -4.0256, -1.4478, -1.8466,
        -3.7847, -1.7927,  1.6084, -2.5510, -0.5652, -3.7713, -2.3825, -5.5498,
        -7.5129,  0.2747, -2.4667, -1.6174], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5549, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.6744, -3.2461, -3.8942, -1.5519, -1.5571, -1.4622,  1.6302, -1.8980,
        -1.7144, -2.9769, -1.2175, -1.9637,  2.0977, -2.9899, -2.6040, -2.3217,
        -0.8242, -7.2059, -0.3299, -0.9468], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7825, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0316, -2.2543, -2.3498, -4.7717, -3.6451, -4.5444, -9.4419, -3.9666,
        -3.3044, -2.7337, -3.0418, -1.7038, -6.7378, -3.0729,  0.6333, -3.9297,
        -5.0502, -9.1749, -8.8757, -6.5999], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5798, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.8820, -2.9225, -2.6319, -3.5262, -0.7449, -3.8192, -1.3523,  1.2508,
        -5.8050, -1.4221, -0.9654, -1.6653, -3.9149,  2.1607, -5.1256, -0.6458,
        -4.5149, -2.1421, -7.7266, -1.5759], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2604, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0540, -1.5280, -3.5651, -0.5532, -5.3844, -1.4094, -0.3905, -3.9303,
        -0.8954, -3.0756, -1.1188, -5.6952, -0.7966, -0.0673, -4.6608, -1.7737,
        -2.0559, -1.8953, -6.0766, -5.7405], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7333, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9335, -3.4892, -3.1673, -3.8886, -2.5335, -2.1880, -2.3142, -2.8327,
         0.8711, -2.6072, -2.6747, -1.9175, -2.4598, -5.2051, -0.6164, -0.1812,
        -2.6601, -1.2119, -2.1422, -2.8182], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2485, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.8126,  -2.5975,  -2.5815,  -4.9010,  -4.0105,  -5.0080,  -3.6976,
         -4.0832, -12.8878,  -6.2563,  -7.6272,  -6.6078,  -1.0440,  -6.7269,
         -0.0592,  -7.7873,  -3.5368,  -3.1146,  -5.1942,  -2.3878],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7461, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.9796,  -4.4744,  -1.0608,  -7.3612,  -3.7678,  -1.6849,  -2.9772,
         -7.9612,  -3.1919,  -1.5001,  -4.0494,  -1.7864,  -1.5746,  -2.6966,
         -1.5582,   2.2172,  -2.7550,  -3.2326, -11.7078,  -5.5993],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5351, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0126, -3.4137, -1.1186, -6.0775, -1.7157, -2.9444, -0.2989, -8.0480,
        -2.0843, -1.5886, -2.1947, -2.5187, -1.2734, -4.3389, -0.8561,  1.3806,
        -5.2489, -2.3476, -2.9298, -2.9589], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8294, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0249, -2.3046, -4.2181, -6.5732, -0.9856,  2.4469, -4.0865, -1.2979,
        -2.8510, -1.8688, -4.9561,  1.4644, -1.3673, -2.2884, -1.2360, -4.9581,
        -1.0612,  1.9910, -1.6659, -0.4302], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1136, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.6090,  -7.0145,  -6.6512,  -6.2715,  -5.8208,  -0.8876,  -6.1916,
         -5.2998,  -2.7755,  -1.8670,  -2.7607,   0.1407,  -8.6543,  -7.0614,
        -14.3109,  -9.2924,  -8.1840,  -1.0263,  -2.4638,   0.7402],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1131, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2762,  1.6396, -4.1873, -2.0822, -1.9386, -4.0340, -2.4705,  0.6125,
        -4.5995, -0.2752, -3.0884, -0.7486, -6.4495, -3.4195, -0.9297, -6.9909,
        -2.2089, -1.9043, -2.9169, -6.8377], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8553, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.9509,  -6.2898,  -0.4731,  -1.4341,  -3.0897,  -1.5406,  -3.2106,
         -0.8906,  -6.0172,  -1.8755,   0.2560, -11.1347,  -1.3486,  -2.6784,
         -0.0461,  -5.4588,  -0.4461,   0.4267,  -6.0226,  -3.4666],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7846, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.5229, -2.3036, -0.9916, -1.3980, -4.3732, -1.4757,  1.5998, -2.4699,
        -2.0157, -1.1821, -2.9062, -1.7674,  1.6051, -0.8842, -4.5916, -2.5949,
        -4.6788, -5.0495, -5.1089, -0.7792], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0944, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4535,  0.4487, -5.6703, -2.4893, -2.5156, -1.8845, -4.6949, -1.6078,
        -0.7499, -4.8015, -1.4005, -5.1280, -0.9822, -5.7022, -0.5587, -0.7493,
        -3.1150, -1.1093, -2.6899, -2.9241], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5389, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3924,  -2.9500,  -6.3042,   0.8554,  -1.6636,  -3.5963, -17.3023,
        -18.5851,  -2.2645,  -8.0269,  -7.3796,  -5.1952,  -2.5841,   0.5663,
         -7.2141,  -2.0727,  -1.4096,  -1.3885,  -5.4026,  -0.5544],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7432, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.0314,  -2.9375,  -1.4471,  -3.2227,  -3.8090,  -1.6652,   0.4249,
         -3.1261,  -4.7565, -16.8323,  -7.5276,  -7.4799,  -5.3387,  -8.5459,
         -3.6497,  -4.6564,  -2.2412,  -1.4171,  -7.1766,  -2.6329],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4003, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-30.4744,  -3.0671,  -3.3044,  -9.1124,  -2.6343,  -2.2008, -10.1814,
         -4.3313, -14.1952,  -3.9215,  -8.3620,  -0.4506,  -8.9231,   2.4291,
         -9.1259,  -1.2098,  -3.1636,  -2.9486,  -4.8348,  -2.7180],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1365, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.8384, -2.1639, -3.1035, -3.6321, -2.8636, -4.2222, -2.1594, -0.8901,
        -2.2574, -1.0483, -0.3831, -1.5944, -1.0270, -0.0145, -3.5713, -2.7351,
        -4.6667, -1.9537, -8.6305, -0.0666], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3072, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5607, -0.7592, -2.9733, -2.0610, -1.9320, -2.8328, -4.5762,  2.3977,
        -3.0972, -1.7674, -2.3441, -4.5537, -1.9977, -0.0754, -1.8777, -0.5697,
        -2.0654, -1.6834, -2.1181,  1.5356], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8456, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9146, -5.5719, -1.8248,  1.7415, -3.6671, -1.2205, -1.5468, -1.4079,
        -5.8191,  1.4225, -3.4717, -7.0156, -1.9375, -3.8696, -3.7266, -2.1751,
         1.3029, -2.6969, -2.5417, -0.6686], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2804, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.2092,  -6.4440,  -5.7907,  -2.3558,  -3.0082,  -0.6082,  -4.4019,
         -3.3860,  -3.9041,  -4.8923,  -3.5366,  -0.0302,  -5.9470,  -3.1847,
        -12.1698,  -4.2906,  -6.7312,  -0.6479,  -4.7312,  -0.4397],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2355, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1176, -2.3581, -6.6934, -0.8169,  2.8605, -3.9798, -2.5281, -1.3089,
        -2.3718, -4.0271,  1.9978, -3.0276, -1.9579, -3.5682, -3.0610, -4.4898,
         0.8657, -4.6982, -5.5164, -0.5896], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3693, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.8329,  -0.6901,  -3.2020,  -3.2357,  -1.6626,   2.2781,  -1.7506,
         -1.0171, -13.4591,  -6.6817,  -8.8627,  -6.5912,  -2.4115,  -1.8646,
         -0.1639,  -2.4546,  -1.5738,  -1.9815,  -2.4865,  -0.9852],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0815, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9773,  0.3923, -6.1111, -0.4195, -2.7488, -2.4065, -2.1512,  2.3248,
        -5.9367, -2.4763, -4.3678, -1.3734, -4.8485, -7.4800, -1.7940, -3.5439,
        -3.0835, -1.3321, -0.8655, -2.7156], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7457, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.8318,  -4.3381,  -2.5575,  -3.2056,  -8.7278,  -4.2245,  -2.5885,
         -7.1418,  -5.1102, -32.6873,  -9.1520, -11.6360,  -6.4984,  -1.1775,
         -7.1127,   1.6838,  -5.4257,  -2.5871,  -2.7971,  -1.2234],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8670, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7893,  -4.4181,  -0.9216,   2.0292,  -2.5665,  -4.2646,  -4.1979,
         -2.7868,  -4.7473,  -0.8962,  -0.9792,  -1.7382,  -1.6298,  -1.5956,
         -3.3545,  -1.9282,   1.1849,  -2.5238,  -1.5446, -14.4434],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7056, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6668, -2.1832, -1.8400, -1.2357,  0.6405, -6.5853, -2.0029, -2.2762,
        -2.5551, -5.3375, -1.0621, -1.7907, -3.4771, -2.2572, -5.2391, -1.3929,
        -6.5434, -3.5058, -2.0906, -2.6687], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8035, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.9308,  -5.8251,  -5.7980,  -5.8953, -22.0028,  -5.4713,  -9.0893,
         -4.4955,  -4.3971,  -6.8272,  -4.3216,  -6.9594,  -3.3339,  -4.6121,
         -1.8284,  -4.4818,  -5.5816,  -5.1279,  -8.6314,  -5.0315],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3321, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.6606, -11.0244,  -0.3536,  -3.3383,  -8.9600,  -3.1145,  -3.6399,
         -2.4730,  -5.1768,  -2.6975,  -0.6163,  -3.1641,  -0.1840,  -1.7354,
         -4.6998,  -0.8699,   1.2125,  -3.0436,  -1.4042,  -4.4961],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0720, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1167, -4.8408, -0.9057,  2.0937, -1.8756, -2.3709, -3.0538, -1.2487,
        -3.7517,  0.8650, -3.9857, -2.7473, -5.4587, -2.7784, -2.6426,  2.4757,
        -2.7769, -1.6022, -3.8781, -1.8767], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0738, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.4150,  -6.6183,  -4.1494,  -5.8140,  -5.2955, -11.1735,  -2.2659,
         -4.4085,  -3.1969, -13.8443,  -7.5933,  -8.7998,  -9.3418,  -3.1899,
         -0.2296,  -5.3168,  -2.3964,  -3.2117,  -6.4048,  -2.8581],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2347, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3145,  -3.3933,  -4.6353,  -1.7994,   2.1747,  -5.7807,  -1.2401,
         -2.9959,  -5.2400,  -3.8698,  -0.2497,  -3.8051,  -6.2013,  -0.9310,
         -8.0444,  -2.4817, -20.4362,  -5.3420,  -6.3970,  -5.1279],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4055, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8474, -0.8296, -3.6715, -3.8625, -3.2761, -1.0679, -3.9091, -4.0634,
        -0.0442, -0.6957, -1.1234, -1.0778, -4.5455, -1.4449,  1.1549, -1.6228,
        -0.7824, -1.9645, -3.7902, -0.5045], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9984, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2807,  -4.6310,  -3.1314,   1.2723,  -3.6300,  -2.1805,  -3.6479,
         -9.7562, -13.2817,  -7.9924,  -4.4900,  -3.1506,   1.5150,  -7.3490,
         -2.1860,  -3.7022,  -4.4796,  -3.0246,   0.4858,  -6.0559],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1348, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5041, -1.7065, -5.4108, -2.9074, -2.3762, -2.3096, -8.3203, -1.5723,
        -1.4418, -7.3242, -1.4257, -2.1250, -1.0714, -4.9912, -0.2542, -0.8692,
        -2.9562, -1.9991, -5.1296, -1.1995], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9447, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.0840,  -5.6616,  -4.5111,  -6.0902,  -8.2465,  -4.8914,  -5.6237,
         -7.2245,  -7.6638, -14.7744,  -6.6051, -25.8348,  -5.0740,  -9.3243,
         -4.1242,  -7.8423,  -6.4406,  -5.1082,  -1.3973,  -2.8891],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.2706, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.2292,  -2.4626,  -6.0272,  -4.0068,  -4.3123,  -4.5187,  -5.5896,
         -5.5985,  -5.9623,  -6.4166,  -7.7319,  -2.9842,  -6.3034,   0.3230,
         -5.7101,  -2.6681,  -3.9894, -13.0286,  -3.9703,  -6.6475],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2417, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4721, -6.3150, -5.2819, -3.0954,  1.1472, -7.2061, -1.6353, -2.9816,
        -1.1108, -5.4914, -0.9714,  0.0348, -5.0117, -0.2048, -2.6456, -3.2157,
        -2.9491,  2.0540, -3.9186, -2.4879], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7879, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9584, -3.9396, -3.9413, -7.2119, -1.7744, -2.3757, -2.3385, -1.5219,
        -4.2574, -2.5044, -5.8935, -0.7240, -0.6097, -5.7047, -1.2596, -6.7308,
        -5.7754, -1.3860, -6.1639, -2.6900], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4381, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.5380,  -6.1404, -11.5657,  -1.9721,  -2.8452,  -0.6608,  -1.3041,
         -0.9822,  -2.5813,   2.3843,  -2.1591,  -2.1976,  -1.6434,  -3.0525,
         -6.7317,  -0.7603,  -1.3540,  -2.2316,  -1.7319,  -1.4104],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5239, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2833, -1.2709, -0.4536, -2.4795, -2.5815, -0.5428, -2.2243, -2.2998,
         2.1883, -0.8908, -1.2786, -4.1124, -2.5077, -6.4256, -4.2145, -0.8834,
        -5.4462, -2.9461, -2.8868, -1.9251], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4732, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1966,  -3.2867,  -1.8023,   2.0247,  -6.5068,  -1.3914,  -9.7024,
         -6.4339,  -9.4220,  -6.3524,  -5.2434,  -2.2816,  -3.9300,  -3.2418,
         -3.5797, -25.8071,  -5.7480,  -9.8755,  -0.9343,  -2.6510],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4181, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8956, -1.6550, -3.8540, -4.8624, -1.8729,  1.3258, -3.2380, -3.4076,
        -4.0365, -1.4185, -3.6143,  1.9488, -4.1475, -2.5412, -4.1940, -1.0490,
        -4.0414,  0.6141, -4.5408, -2.0084], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5244, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.4898,  -1.5430,  -2.9428,  -4.5637,  -2.6034,  -0.6207,  -5.1596,
         -4.2883, -19.1281,  -3.9877,  -6.1407,  -4.1964,  -7.8731,  -5.1279,
         -2.9026,   1.4776,  -6.8712,  -4.2965,  -2.7669,  -2.9911],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5008, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.6706,  -9.2194,  -4.3130,  -4.8847,  -6.4749,  -1.6038, -10.1923,
         -2.7348,  -2.7293,  -5.0150,  -2.2772,   1.7976,  -3.8949,  -3.7737,
        -11.6041,  -5.6706,  -7.5510,  -5.5183,  -6.5558,  -1.7274],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8807, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.2620,  -4.8801,  -6.3709,  -6.4623,  -7.1671,  -9.7583,  -1.9881,
         -5.1142,  -4.1839,  -4.2958, -12.1226,  -6.7543,  -4.8984,  -6.5838,
         -2.5781,  -6.6655,  -1.5811,  -3.1712,  -6.6248,  -5.4480],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5955, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.0446,  -6.5038,  -1.7899,  -3.3431,  -2.7068,  -1.4038,  -3.0597,
         -4.5452,  -4.5851, -15.0071,  -5.8919,  -3.8883,  -5.1618,  -4.4254,
         -2.8135,   1.7761,  -2.2172,  -2.9714,  -1.3010,  -5.5784],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7731, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4917, -1.9100, -2.3819, -4.0881, -1.1944,  2.5556, -5.6303, -2.1435,
        -2.5160, -1.7175, -6.6705, -3.4275, -1.6425, -3.8783, -2.2578, -1.3694,
        -2.9198, -1.1984,  1.8929, -1.3085], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2649, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1897, -1.7901, -2.7371, -2.0448, -2.6127, -1.8455, -3.9850, -0.8498,
        -1.3272, -2.8891, -1.0938, -2.6357, -5.2923, -1.6392,  0.8496, -3.7888,
        -3.9607, -1.6713, -2.5858, -3.7911], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5440, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.6450,  -1.5659,  -3.2144,  -2.5830,  -4.8022,  -2.7288,   0.4465,
         -3.1825,  -1.6221, -21.3363,  -3.3425, -21.6895,  -2.3711,  -6.5632,
          2.5435,  -5.8490,  -3.7531,  -3.0617,  -1.2548,  -3.2594],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5917, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.3477, -3.1260, -1.6584, -1.7343, -1.3869, -7.3701,  0.6395, -6.3367,
        -2.3156, -1.7580, -1.3992, -7.1983,  2.1153, -1.6488, -1.8284, -4.0809,
        -2.6533, -4.2844, -2.1249,  1.4525], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2675, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7026, -0.9524,  2.1474, -2.2957, -0.9658, -1.5673, -1.9003, -1.5335,
         0.3704, -6.0704, -3.1169, -2.9732, -3.9600, -6.7684, -2.7330, -1.0027,
        -5.5088, -2.2695, -1.6839, -4.1116], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5299, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2626, -2.8703, -1.8403,  1.6270, -4.1187, -3.2500, -4.5070, -1.3470,
        -5.6311, -1.7777,  1.2356, -3.8906, -1.7221, -2.3555, -1.0015, -6.0051,
         2.8171, -3.6345, -2.3070, -3.4353], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3138, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.8796,  -2.0555,  -1.7873,  -0.7362,  -3.6592,   3.0717,  -2.0572,
         -3.4360, -15.2063,  -6.9880,  -8.9623,  -1.6836,  -3.1129,   0.4454,
         -2.5984,  -4.2747,  -1.9750,  -4.5437,  -0.4009,  -6.8341],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6837, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.3555,  -6.1001,  -3.1621,  -4.6419,  -3.4223,  -1.2630, -14.5130,
         -3.1607,  -3.1994,  -1.3003,  -5.6446,  -1.0799,  -4.9083, -10.0049,
         -1.2265,  -2.4473,  -2.0832,  -3.5481,   2.0186,  -1.5401],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0791, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7875,  -2.4041,  -2.9967,  -0.1889,  -5.0175,  -4.0943,  -1.5380,
         -7.1418,  -4.1046,  -1.9525,  -3.6639,  -3.3947, -26.0141,  -6.8177,
         -8.3418,  -7.0095,  -3.1111,  -2.0221,   1.4901,  -3.6654],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7388, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8476, -5.8266, -2.0753, -0.5282, -2.0633, -1.2214, -2.3011, -1.3714,
        -7.0185, -2.7272, -0.8975, -2.0984, -1.6736, -1.4824, -3.0980, -1.1981,
         1.5412, -4.3309, -2.7271, -3.5966], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3771, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1229, -2.2880, -2.3232, -1.8731, -4.3923,  1.5462, -3.5324, -0.9066,
        -3.0578, -0.6832, -5.1807, -2.5174, -1.9704, -4.7208, -1.8167, -3.2927,
        -2.9742, -7.5121, -1.9814, -2.2715], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6936, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.2069,  -3.2380,  -7.1682,  -1.4222,  -6.4280,  -3.3709,  -0.2546,
         -2.6675,  -2.8135, -14.3386,  -3.9603,  -7.8179,  -0.9389,  -5.6373,
          0.7824,  -3.7029,  -5.0193,  -4.2121,  -2.9919,  -5.2997],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9646, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4708,   0.4890,  -3.2239,  -1.2591,  -4.8232,  -3.6106, -13.3938,
         -2.1454,  -1.3974,  -4.2838,  -1.7264,  -1.6716,  -2.3027,  -2.5777,
          2.5963,  -6.8246,  -2.1125,  -5.5819,  -3.1834,  -5.9032],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2703, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3962, -2.2238, -2.2492, -4.4333, -0.3078, -7.6245, -2.2596, -3.6677,
        -5.3230, -2.2816,  1.9148, -2.6390, -0.3394, -4.1688, -4.2138, -3.4807,
        -0.9165, -6.2043, -8.7682, -3.1435], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3363, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2306, -0.8707, -8.8007, -3.3612, -2.9306, -5.9277, -1.4441,  0.2473,
        -3.3605, -0.5102, -2.3208, -3.3235, -3.7214,  1.2675, -1.7496, -2.1354,
        -3.9501, -4.1239, -5.5754,  1.5992], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7111, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.5347,  -7.1581,  -0.5854,  -4.6593,  -0.8677,  -1.9667,  -6.1222,
         -2.7971,  -2.6460,  -4.5027,  -2.1992,  -9.6914,  -6.3426,  -7.1388,
        -12.7748, -33.6725,  -7.3291,  -5.5365,  -1.4935,  -5.1494],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3084, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8302, -1.4646, -1.8395, -2.0906, -5.2862, -1.2116,  0.0818, -2.3183,
        -1.8164, -2.9122, -3.0407, -8.6619, -1.7961, -0.5147, -3.3579, -2.9010,
        -2.1678, -2.6600, -5.2219, -0.2042], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6107, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.7111, -3.6625, -0.1292, -1.5890, -5.4043, -0.9117,  1.2518, -1.2026,
        -1.4942, -1.2745, -2.7157, -1.5213,  1.2374, -3.8574, -0.9622, -4.4116,
        -1.3121, -5.0129, -2.0186, -3.1826], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8231, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3510, -3.2047, -0.9628, -2.1575, -1.6429, -1.6043, -2.3116, -0.6854,
         2.5266, -4.3653, -0.9533, -1.3843, -0.7965, -5.3725, -0.3304, -4.4181,
        -3.8346, -1.1358, -4.3428, -1.8826], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1605, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.7308,   2.0277,  -2.1336,  -1.6658, -15.4805,  -2.9245,  -8.5017,
         -6.7168,  -3.1764,   1.0320,  -8.6660,  -2.6610,  -6.0820,  -1.2545,
         -3.3550,  -2.5570,  -3.5097,  -3.8567,  -2.3281, -13.3956],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3468, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.8364,  -3.1465,  -2.4770,  -0.8103,  -5.2738,  -0.4865,  -0.1675,
         -4.0260,  -1.3069,  -4.9144,  -4.4395,  -1.7247,   1.3117,  -9.8981,
         -4.2325, -28.3736,  -6.5091,  -7.3007,  -6.0687,  -6.2418],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2961, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.0345,  -1.3507,  -0.7775,  -7.5515,  -2.6438,  -3.3866,  -7.8200,
         -3.3255,  -1.0898,  -4.1559,  -6.5204, -14.2105, -10.1347,  -4.1831,
         -7.1404,  -1.2708,  -4.5617,  -1.5580,   0.9725,  -4.1711],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5957, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6001, -3.4957, -4.5116, -0.1227, -2.4151, -3.6493, -1.4047, -0.9299,
        -1.2844, -2.6332,  1.1747, -3.6593, -1.1621, -2.7620, -1.4217, -5.8365,
        -0.3065, -2.5042, -5.1916, -2.7365], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4226, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8497,  1.8717, -2.0077, -1.9100, -2.0582, -2.4155, -1.6613, -0.4972,
        -4.5547, -1.6709, -4.3696, -0.6283, -5.9802,  1.6882, -5.4004, -0.6450,
        -3.4066, -1.9823, -3.2770,  2.6843], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9035, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.9050,  -4.8087,  -0.2277,  -6.0356,  -4.9486,  -7.2784,  -0.6573,
         -3.3349,  -5.8365,  -0.7678,  -2.3610, -11.1798,  -1.6473,  -2.5783,
         -0.8172, -16.0185,  -3.0680, -17.5743,  -1.5929,  -3.9093],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6868, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.9208,   2.1439,  -3.5329,  -2.5645, -15.0757,  -2.1404,  -6.7282,
         -1.9762,  -1.8843,   1.5250,  -8.1882,  -2.8893,  -2.8423,  -0.7077,
         -5.9620,  -1.2729,   0.2598,  -9.2246,  -2.5437,  -1.9658],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3245, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9381, -4.5622, -5.7028, -2.1161, -0.1536, -9.9250, -2.3549, -3.4534,
        -0.6931, -7.0986,  0.0659, -0.8989, -2.6993, -1.2356, -2.4758, -1.0976,
        -6.8795, -4.6975, -2.5763, -1.7949], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1144, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2345,  -5.7203,  -1.3877,  -2.4471,  -3.6110,  -5.2385,  -1.9221,
         -2.8172,  -3.7793,  -3.3875,   0.9153,  -6.8463,  -1.7554,  -4.0371,
         -0.0372,  -5.2203,  -0.2536,  -0.0703, -13.7251,  -2.1080],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3342, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.5259,  -2.3029,  -0.6189,  -2.0254,  -6.9355,   0.8840,  -3.8421,
         -4.6632, -19.0525,  -4.9401,  -3.5270,  -6.5874,  -0.8808,  -5.2005,
         -0.5119,  -2.3291,  -3.6662,  -0.9676,  -1.5129,  -1.3670],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7786, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-12.8829,  -3.0975, -31.6230,  -7.4178,  -7.8166,  -1.0473,  -3.7537,
          2.1327,  -2.5395,  -1.9504,  -2.0202,  -6.7464,  -1.6693,   0.1024,
         -2.9003,  -2.2785,  -3.5719,  -5.7258,  -4.1458,  -3.6089],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1280, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1928,  2.6661, -4.4531, -1.1158, -2.3956, -4.1468, -6.2343, -3.4411,
        -1.8717, -8.4416, -5.2288, -1.2233, -1.0542, -1.3579,  0.6898, -8.0138,
        -1.0700, -3.5026, -0.6517, -6.1855], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0112, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5303, -2.0299, -6.4378, -1.6299,  0.1796, -4.5568, -6.5231, -6.8470,
        -2.9896, -4.8769, -9.0359, -2.6413, -2.6815, -4.4100, -1.3591, -1.2410,
        -3.7293, -1.7086,  1.7291, -2.8710], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2595, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.7396, -5.5990, -1.8414,  0.2495, -3.4900, -2.2009, -3.2343, -3.4525,
        -1.5042,  0.4537, -4.5826, -0.5946, -3.0261, -2.0529, -1.9426,  1.9028,
        -2.2557, -1.4116, -3.0857, -0.4634], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9436, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7080, -3.5923, -2.2665, -3.1743, -5.0484, -2.4135, -7.4543, -1.0551,
        -1.8583, -4.0993, -1.7573, -1.0120, -2.2150, -0.9328,  1.4998, -1.8914,
        -2.1709, -3.2752, -5.1186, -8.1694], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9356, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.2459, -11.7706,  -2.8731,  -7.2236,  -2.6341,  -3.1812,  -4.3709,
         -4.1598, -14.1531,  -9.0498,  -3.7662,  -4.3009,  -1.7354,  -4.8614,
         -5.1108,  -2.3219,  -0.0891,  -2.0485,  -1.8574,  -2.6338],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9194, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.4107,  -3.7542,  -1.8547,  -2.7804,  -3.5006,  -3.0360,   2.3718,
         -2.6152,  -3.7199, -14.9604,  -5.2449,  -8.4947,  -6.2940,  -8.1897,
         -1.2499,  -8.1176,  -2.6357,   0.0477,  -4.8388,  -3.3069],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9882, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.2747,   0.4188,  -3.0991,  -1.0432,  -2.2861,  -3.0488,  -1.8783,
          1.8030,  -4.1311,  -0.8642,  -2.4802,  -6.5747, -19.8681,  -5.4264,
         -7.4226,  -1.8112,  -2.6238,  -5.4125,  -4.8128,  -2.9269],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7382, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.2683,  -2.7627,  -7.4530,  -5.3890,  -7.7519,  -4.1193,  -2.6934,
         -1.7152,   2.5833,  -5.4488,  -1.0406, -12.2569,  -5.2972,  -5.3753,
         -9.2953,  -6.8686,  -1.0613,  -5.7045,  -1.9790,  -3.4411],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4669, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3174, -3.1033, -2.7752, -4.2232, -1.0857,  0.7993, -3.6075, -2.4336,
        -1.6746, -1.1056, -7.7171, -2.0052, -1.6079, -4.2560, -2.0968, -1.7291,
        -0.2805, -5.5542, -0.1680, -0.2835], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3612, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.0096, -4.3743, -2.9782, -1.7997, -1.2763, -5.4628,  1.5915, -8.9733,
        -3.7420, -3.1314, -2.6281, -6.0776, -4.5087, -0.0962, -3.0800, -5.1136,
        -2.0619, -1.6554, -5.6354, -0.9218], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0967, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3910, -0.1247, -1.7976, -1.8268, -2.4059, -2.8646, -1.5124,  1.6798,
        -2.6589, -4.1841, -5.6105, -3.2787, -5.2406, -4.3453, -4.7056, -7.4814,
        -3.4377, -0.4751, -7.6640, -1.7220], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1024, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4511, -4.1733, -0.9218, -2.0506, -3.6392, -0.9813,  1.6768, -5.4825,
        -1.9445, -2.8308, -2.1513, -4.4960, -0.4629, -3.0494, -4.7514, -1.0012,
        -2.0560, -5.1121, -0.7951, -3.0885], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5381, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4004, -0.5623, -0.0502, -1.8657, -1.3346, -4.1171, -1.1773, -5.5459,
        -1.5746,  0.8505, -3.9610, -1.1252, -3.1950, -0.6520, -5.2202, -0.5935,
        -3.1401, -2.8999, -1.9902, -1.9451], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2250, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.0152,  -0.1576,  -5.7565,  -1.8894,  -1.5183,  -1.2002,  -4.8864,
         -1.2776,   0.6078,  -4.0548,  -2.3553, -16.8357,  -2.7967, -12.8488,
         -1.4938,  -2.8757,   1.6850, -13.6275,  -3.8367, -12.1793],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4656, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.7860,  -2.1721, -12.9012,  -3.2608, -11.3189,  -2.3451,  -5.3751,
         -1.5789,  -6.5795,  -4.1746,  -3.9566,  -8.6743,  -7.6910,  -1.8978,
         -5.1483,  -2.5049, -11.3898,  -5.0055,  -8.5257,  -1.1267],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4706, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7913, -8.0227, -3.2434, -1.9142, -4.4803, -1.7477, -1.3490, -2.9052,
        -1.4830,  1.7632, -1.5741, -1.9705, -1.6554, -6.3828, -1.6645,  0.0955,
        -2.6887, -1.1387, -3.2281, -1.9875], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3684, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9477, -3.4516, -4.9212, -2.2951, -1.6399, -2.6125, -1.1030,  2.1638,
        -3.1650, -2.1671, -1.8082, -2.6618, -3.5784,  1.4890, -1.8718, -1.0619,
        -2.0384, -2.6125, -7.3913, -1.3774], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1526, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.8821,   0.4062,  -5.2374,  -1.8480,  -3.0174,  -2.5903,  -6.7726,
         -2.7822,   0.0450,  -1.1438,  -1.4233,  -2.3268,  -1.9472,  -3.4850,
          2.1438, -24.5926,  -3.0391,  -3.6896,  -1.4582,  -5.4211],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5031, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8651, -0.7811, -4.7975, -1.4607,  1.6967, -4.7398, -0.5426, -3.5129,
        -1.2035, -7.0173, -1.3316, -0.1047, -3.1304, -2.4755, -2.5194,  0.1028,
        -5.9726, -0.4376,  0.1140, -3.3862], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3683, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8186, -1.4525, -5.1240, -1.7788, -1.7544, -2.5044, -0.9611, -1.9236,
        -1.3453, -2.1343,  2.6010, -2.2455, -1.6525, -2.2362, -4.2472, -1.7117,
         1.9159, -1.6382, -2.0913, -0.9920], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6047, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7145, -2.8403, -6.9738, -0.4324, -8.2263, -2.9672, -3.8945, -8.3518,
        -4.7563, -0.8727, -3.5634, -3.5456, -7.4781, -8.3690, -5.3865, -7.3086,
        -3.2770, -5.3598, -0.3208, -2.5706], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5105, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4001, -0.9536,  0.2517, -3.7703, -1.7475, -2.2620, -0.9036, -6.1181,
        -5.2296,  0.7808, -2.6130, -0.9545, -2.7982, -6.1571, -2.1598,  0.8166,
        -4.3599, -1.0933, -1.8649, -2.9062], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4721, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1409,  -3.9682,  -0.6061,   1.2612,  -3.2460,  -2.3108, -12.8924,
         -4.7147,  -9.6502,  -7.7585,  -1.2950,  -8.1772,   1.9661,  -1.6895,
         -1.1622,  -2.0931,  -2.4183,  -2.2202,   2.1160,  -1.8717],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1436, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6138,  0.1998, -4.8361, -3.0215, -4.4047, -2.2027, -4.2059, -1.0354,
         0.1429, -4.5204, -2.3688, -3.0528, -4.0178, -3.2047,  1.1050, -2.6248,
        -2.4274, -1.8644, -1.0205, -7.8904], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7432, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.5252,  -2.9800,  -5.5297,  -1.1173,   0.3332,  -3.9023,  -1.7440,
        -14.9985,  -5.9975,  -6.4549,  -2.2034,  -4.2824,   1.6447,  -4.9120,
         -2.6693,  -3.9995,  -2.5273,  -4.7726,  -0.7992,   0.7100],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3864, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.9187,  -1.6026,  -2.8874,  -2.4362,  -5.2837,  -0.6875,  -1.3663,
         -2.4119,  -0.6132,  -3.5747,  -2.5107,  -7.8083,  -1.0039,  -3.4799,
        -13.0127,  -5.2087,  -4.1734,  -4.9902,  -4.7343,  -1.3203],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6512, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2726, -1.3129, -3.0083, -2.7089,  1.4914, -3.4589, -1.3683, -2.7053,
        -0.3997, -7.0705,  1.7888, -2.7380, -3.7533, -2.7252, -2.0981, -6.2609,
        -3.3029,  0.3210, -5.3831, -1.3584], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4162, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9531e+00, -8.8582e-01,  1.4529e+00, -2.1006e+00, -8.6706e-01,
        -1.7090e+00, -3.5902e-03, -5.6837e+00,  1.8928e+00, -3.0059e+00,
        -1.5847e+00, -6.0703e+00, -2.3022e+00, -4.5806e+00, -9.4169e-01,
         1.3618e+00, -3.4736e+00, -4.3014e+00, -8.1453e-01, -2.0492e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.9810, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.2893,  -0.3434,   0.8093,  -3.0007,  -0.4671,  -6.5886,  -0.4382,
         -6.4188,  -0.5814,  -0.6200,  -3.8444,  -0.8017,  -2.2171,  -3.5522,
         -1.7475,   0.5467,  -4.4434,  -3.5535, -18.9295,  -2.8250],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1653, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.2277, -4.3499, -2.8781, -4.9485, -2.9583, -2.2779,  1.4750, -1.8459,
        -1.3905, -1.4492, -2.3417, -0.3044,  1.6627, -3.7956, -1.1960, -3.0166,
        -2.8125, -5.8763, -2.8174, -2.8044], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2077, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2462, -2.5227, -1.7007, -5.1793,  0.1338, -4.7377, -4.1926, -1.5244,
        -4.5204, -4.0138, -3.0756, -0.2158, -1.7332, -2.6593, -0.2742, -2.2987,
        -1.7754, -0.7451,  2.3201, -1.7552], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1858, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8432, -2.2722, -4.5740, -2.8638,  2.1471, -5.7878, -1.3747, -3.5231,
        -4.3004, -3.9103, -0.7269, -3.5654, -2.9757, -3.1049, -1.9539, -3.3909,
         0.0651, -2.9108, -4.7230, -7.2312], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9410, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5021, -2.4675, -1.4814, -6.4443, -0.0842, -0.8919, -3.1493, -1.5409,
        -3.7506, -4.5687, -2.6568,  0.3113, -7.3965, -1.6354, -3.8197, -1.1314,
        -6.6039, -4.1474,  0.5574, -4.9069], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8655, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.3996,  0.6074, -3.2589, -3.3426, -4.3642, -4.9535, -6.2926, -0.5320,
        -1.8515, -1.1482, -3.4299, -3.8828, -3.1095,  1.4462, -3.8637, -0.3724,
        -3.0973, -0.2057, -5.2713, -1.2252], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4274, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.0915,  -1.5690,  -3.4044,   2.2809,  -5.5483,  -1.7792,  -3.9488,
         -3.6008,  -3.1668,  -0.0377,  -5.1839,  -1.9198,  -1.4106,  -1.1911,
         -2.8209,  -1.3929,  -3.9023,  -4.6182, -15.8794,  -6.7875],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3486, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0456, -1.5437,  0.5548, -6.0269, -2.6593, -4.6686, -1.9166, -1.0257,
         1.8227, -1.3301, -3.0231, -9.7584, -5.3920, -4.8332, -6.1687, -1.9263,
        -1.8477,  1.5296, -3.3964, -2.4898], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9573, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.1042,  -9.1321,  -2.5956,  -1.9751,   0.1894,  -5.3918,  -4.3157,
         -3.8970,  -7.5222,  -6.5626,  -1.3067,  -3.4060,  -2.7069, -10.3906,
         -6.4660,  -5.0512,  -6.2541,  -2.7355,  -4.6862,   1.3712],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3969, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.5672, -10.3453,  -4.0851,  -6.8525,  -5.5294,  -3.6230,   1.8735,
         -7.3664,  -2.7033,  -1.4592,  -0.7600,  -5.8756,  -0.6326,   0.0406,
         -2.2524,  -0.2989,  -1.2972,  -0.0795,  -3.7347,   1.8606],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8844, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5616,  -1.4836,  -3.3164,  -2.3572,  -4.6785,  -2.5375,   0.4714,
         -3.1427,  -1.5464, -20.7444,  -3.2729, -21.8921,  -2.4566,  -6.6286,
          2.6265,  -5.6837,  -3.6798,  -3.1831,  -1.2677,  -3.1322],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5234, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.8321, -3.0636, -1.6553, -1.7023, -1.3813, -7.5116,  0.6587, -6.3904,
        -2.4002, -1.8140, -1.3910, -7.4113,  2.6308, -1.5190, -1.8259, -4.1551,
        -2.5853, -4.1191, -2.0586,  1.2796], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2291, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4383,  -0.9323,  -2.2135,  -1.8561,  -1.1669,   2.9087,  -3.5389,
         -0.9361, -10.4615,  -3.0251,  -9.2890,  -0.4103,  -4.7966,   1.6493,
        -14.4622,  -3.2340,  -4.6039,   0.1600,  -6.0160,   0.1843],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2239, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.9811, -10.9655,  -3.2605,  -8.4281,  -7.7404, -11.6367, -10.6527,
         -7.2994,  -2.8996,  -1.6500,  -4.3661,  -3.1890,  -0.0198,  -6.5909,
         -1.9022,  -2.7527,  -1.9094,  -7.7372,  -1.9642,  -2.0085],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0977, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7859, -1.8855, -5.6062, -2.4457,  1.6076, -2.6519, -5.3332, -9.8802,
        -8.4663, -3.0692, -5.8835, -2.4119, -2.5597, -3.5685, -3.6210, -1.0148,
        -2.7528, -4.1401, -1.3481,  0.7360], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3540, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7045, -1.3846, -8.7283, -6.7198, -4.1779, -6.1764, -2.1220, -4.5656,
        -2.9758, -2.8094, -2.2444, -2.5902, -1.5987, -6.7325,  2.4618, -4.1739,
        -1.5378, -3.7584, -4.5579, -3.3957], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5246, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5606, -2.2221, -2.1666, -4.3861, -2.4052, -7.7075, -2.7829, -2.9065,
        -3.8146, -1.7269, -4.6050, -4.8887, -2.7728, -8.5522, -3.5962, -2.5752,
        -2.8389, -5.0878, -2.3843,  0.6363], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4172, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-16.0596, -48.0747,  -7.8838,  -2.5746,  -7.0219,  -5.9230,  -4.9423,
         -2.2105,  -1.6762,  -3.6228,  -1.3318,  -3.3443,  -2.1226,   0.5265,
         -6.5930,  -0.9103,  -2.4117,  -0.4884,  -6.7702,  -2.0280],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2732, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.1807,   0.1971,  -3.6987,  -1.6721, -19.3643,  -5.2051,  -6.4546,
         -6.7335,  -1.1062,  -3.6086,   2.5868,  -7.2089,  -1.7978,  -2.0590,
         -4.3856,  -2.9419,  -0.7490,  -3.3699,  -2.2525,  -2.0602],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6532, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4509, -1.3957, -3.9350, -1.4272, -4.4970, -2.4535, -1.8330, -3.8921,
        -0.5634, -3.1728,  0.0099, -2.6997, -0.5580, -3.9955, -2.0818, -3.6234,
        -0.7399, -4.9784, -1.3210, -1.6490], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4629, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.2034,   0.8083, -10.2657,  -2.6237,  -1.4826,  -3.5346,  -4.5951,
         -2.1753,   0.6883,  -5.0352,  -2.1304,  -3.2905,  -1.9717,  -3.3957,
         -0.3844,  -0.4883,  -1.9320,  -1.3242,  -2.2687,  -5.5162],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5560, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9116,  -2.9111,   1.1160,  -2.6274,  -2.2416, -14.5530,  -4.0614,
         -9.1953,  -1.6180,  -4.6257,   2.3783,  -4.0566,  -2.8426, -11.2047,
         -4.7711,  -5.5797,  -0.7390,  -0.1993,  -1.4246,  -1.1627],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6616, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2493, -3.4476, -1.3257, -8.9194, -2.7142, -3.2220,  0.2290, -5.4934,
        -0.5077, -1.2910, -3.8842, -1.2529, -1.8402, -2.6544, -3.4009,  2.0853,
        -2.8685, -1.0539, -2.4121, -1.3380], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3281, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.7869, -9.0814, -3.5853, -4.2031, -2.1026, -4.2516, -1.1444,  1.5698,
        -6.4464, -1.0343, -2.3534, -3.0245, -2.1128,  1.8719, -3.7393, -3.6573,
        -3.8861, -0.5969, -7.3244, -0.6121], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6964, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.5415, -4.2450, -0.7616,  1.9183, -3.2339, -1.2100, -4.8878, -2.2144,
        -6.4698, -1.7269, -0.6140, -5.3395, -2.1734, -2.8028, -2.3542, -4.3081,
        -0.8934, -4.5654, -3.5690, -2.0118], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6002, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.9448,  -3.9846,   0.4879,  -1.5014,  -0.8574,  -1.1489,  -1.7365,
         -0.5461,   1.9011,  -2.3665,  -0.9665,  -2.5997,  -0.4738,  -6.0363,
          1.0267, -22.5987,  -1.9388,  -1.0850,  -3.8257,  -2.7067],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8451, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8492, -1.8818, -3.6123, -1.3783, -1.7816,  1.9393, -2.1995, -3.2608,
        -2.0172, -2.1080, -2.4918,  0.3351, -3.0032, -3.9823, -1.6932, -2.0233,
        -4.3181, -1.7358,  0.1410, -4.0380], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1480, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.7428,  -1.5022,  -3.2483,   2.4075, -12.6664,  -1.5956,  -2.5847,
         -3.1769,  -5.3037,  -0.2171,  -3.7233,  -3.7944,  -1.4517,  -1.7963,
         -5.2395,  -2.0253,   0.1174,  -4.1032,  -2.0825,  -1.0417],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7385, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2907, -3.9758, -2.6473,  0.1645, -4.9801, -1.1233, -8.1836, -5.6111,
        -3.6724, -6.5845, -1.9026, -4.8539,  1.8146, -2.3720, -3.0320, -1.9255,
        -1.2062, -5.1170, -0.4026, -2.6432], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0272, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.3114,  -3.2534,   2.0342, -12.2519,  -1.7525,  -2.1103,  -4.8617,
         -2.5609,   1.7193,  -7.6561,  -1.2489,  -4.6792,  -5.4912,  -5.5849,
          1.1184,  -0.7891,  -2.5047,  -0.8678,  -3.1678,  -0.8217],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8021, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4767, -2.4279, -5.3714, -1.6866,  0.6602, -2.2052, -2.2964, -1.8208,
        -4.5445, -0.5816,  0.9344, -3.0512, -0.1718, -2.2566, -4.9419, -1.4301,
         0.0292, -3.8489, -2.1723, -3.7710], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1216, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5775, -5.6134, -0.8404,  1.0737, -2.2977, -1.0606, -2.2603, -3.0003,
        -6.3690, -3.3001, -3.2985, -4.2694, -1.6922, -1.2083, -2.4678, -1.5725,
         1.6479, -3.2817, -2.2433, -3.9306], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3781, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.9352, -3.2580, -1.3960, -4.6884, -4.9522, -4.5043, -2.3163, -6.2272,
        -1.4680, -1.7593, -6.4928, -1.4232, -0.4428, -4.0435, -1.1239, -1.4651,
        -3.7820, -1.3483,  1.3652, -2.7149], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4553, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.3813,  -7.2213,  -4.3161,  -2.4896,  -6.0842,  -2.6280,   1.0203,
         -5.8265,  -4.6955, -22.2415,  -5.4186,  -4.9819,  -8.3292,  -6.4708,
         -6.5322,  -1.0512,  -8.0715,   1.2538,  -5.5310,  -2.4439],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0839, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1360,  0.2446, -1.9668, -2.3467, -3.5883, -5.3196, -3.1852,  0.2959,
        -4.4576, -4.1335, -3.5415, -4.0868, -2.6336,  1.1366, -2.1680, -1.2872,
        -3.0768, -0.7495, -4.4635, -4.6985], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6081, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.6410,  -5.8084,  -2.8173,   0.4341,  -2.8368,  -0.8072,  -2.8622,
         -2.8748,  -1.7386,   2.4750,  -3.3016,  -1.2052,  -9.0062,  -3.6439,
        -13.5507,  -3.0259,  -4.0893,   2.4832,  -2.2713,  -3.6348],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9361, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0438, -2.3752, -3.5010, -0.3688, -5.6993, -1.0544, -0.6740, -3.0719,
        -0.5653, -2.4308, -3.9255, -0.3580,  0.6675, -3.0501, -0.6620, -2.6008,
        -3.3702, -3.1827, -4.5853, -3.4088], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4130, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.4335,  -3.5705,  -0.9173,   1.5759,  -4.1241,  -3.3184,  -1.0685,
         -5.5253,  -1.7893,  -1.4716,  -6.8364,  -4.7225,  -1.9159,  -6.8669,
         -2.4331,   1.9972,  -7.1795,  -4.2043, -40.4645,  -7.3412],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0805, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.1251, -2.7496, -0.7907, -3.3376, -3.0218, -2.4398,  1.3508, -3.7479,
        -0.9224, -3.1743, -1.5999, -6.0380, -0.2127, -0.7755, -5.1158, -1.8185,
        -3.2418, -2.3361, -6.7721, -0.3405], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3480, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9357, -4.3246,  0.8763, -3.5370, -2.6115, -2.5954, -4.5693, -4.3863,
         0.2674, -2.4334, -4.3520, -4.0323, -4.5882, -2.6834, -4.5811, -2.2151,
        -3.0847, -4.5780, -4.4536, -2.1930], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2005, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1431, -1.8644, -1.5042, -4.4428, -0.7471,  1.7474, -3.6360, -0.9326,
        -3.0580, -1.8696, -2.4952,  1.8644, -3.3920, -2.2139, -5.8525, -2.5512,
        -5.2129, -2.8648,  0.9151, -4.0124], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3633, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.8137,  -1.7425,  -2.7362,   3.0435, -10.2840,  -1.1725,  -3.5014,
         -1.7529,  -3.5060,  -1.3445,  -0.8986,  -4.3354,  -1.8394,  -4.7599,
         -3.6471,  -7.3021,  -2.4292,  -3.5980,  -9.7942,  -4.2470],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3831, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5757, -2.2898, -3.3759, -2.1695,  1.3507, -2.1760, -3.1710, -0.8062,
        -1.3868, -1.2846,  2.7122, -3.8323, -1.9091, -2.9638, -4.1234, -6.7953,
        -2.1250, -2.2781, -3.6796, -0.9062], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1893, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.2670, -1.7565, -1.2440, -1.5414, -6.3308, -1.2503,  0.9530, -3.2121,
        -1.4684, -2.4208, -4.9583, -0.9230,  1.1221, -4.0535, -0.1458, -2.6301,
        -5.6495, -1.5712,  1.6752, -1.5578], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8348, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.7479,  -5.2323,  -4.9986, -17.2278,  -7.0049,  -1.0675,  -5.5366,
         -2.4528,  -4.0305,  -1.4427,  -1.9777,   1.2073,  -4.4706,  -1.6520,
         -1.8991,  -3.3950,  -4.1286,  -7.1574,  -1.8018,  -1.4799],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3248, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.4803,  -4.1719,  -2.7195, -32.6283,  -5.9163,  -7.7317,  -2.4059,
         -2.7505,   0.4375,  -1.5158,  -5.8953,  -3.6528,  -2.6343,  -3.7811,
         -2.5151,   2.0951,  -4.1063,  -1.3061,  -4.2519,  -0.5457],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2258, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.7281,  1.3945, -5.7005, -0.7153, -1.8469, -2.5736, -6.2729,  0.5375,
        -2.4823, -2.7144, -2.2026, -6.0142, -2.7600,  0.8716, -3.1327, -1.4306,
        -2.2536, -3.6848, -1.8147,  1.2532], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1135, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.9693,  -2.0139,   1.4826,  -1.5152,  -1.7095,  -2.6472,  -2.3518,
         -2.1654,   2.3275,  -2.9660,  -1.7695,  -3.9977,  -1.0124,  -5.6375,
         -3.2660,  -0.1191, -14.1254,  -1.6334,  -2.6701,  -5.4290],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7594, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.8487,  -4.2521,  -4.9898,  -1.2728,  -5.2217,  -2.7032,   1.1297,
         -3.8386,  -1.2867,  -4.3661,  -4.5849,  -2.5834,   1.0055,  -3.2739,
         -4.5902, -14.0439,  -7.8853,  -9.3470,  -7.8550,  -0.9392],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2374, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.0946, -5.8824, -1.0040, -0.9808, -3.3889, -2.7490, -3.2617, -0.8094,
        -6.2977, -6.3424, -1.8882, -4.5443, -2.5745, -2.0720, -2.3180, -5.6914,
         0.0941, -4.1115, -2.4615, -0.7355], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8462, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.7399, -3.9870, -0.6173, -6.0705,  2.9140, -3.8367, -1.4700, -2.1066,
        -2.7773, -7.2692, -1.3927, -2.1834, -4.1165, -0.9467, -2.4251, -1.2434,
        -4.1057,  0.9820, -4.0547, -1.3220], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3384, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.3614,  -2.7806,   1.6996,  -5.6817,  -2.9782,  -7.6651,  -5.9747,
         -3.2056,  -6.2773,  -3.0667,  -2.3047,   1.0103, -14.9792,  -2.9115,
         -0.7871,  -5.2669,  -1.6173,  -2.6829,  -1.8740,  -3.7015],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8203, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7990e+00, -2.0747e+00,  2.3796e+00, -3.2796e+00, -1.0580e+00,
        -3.2432e+00, -5.3947e+00, -2.3884e+00,  4.4367e-01, -1.7094e+00,
        -1.4997e+00, -1.4392e+00, -4.7889e+00, -1.3870e+00,  9.6881e-04,
        -2.3706e+00, -4.9631e+00, -1.6022e+01, -6.5581e+00, -3.9695e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1060, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6918e+00, -4.6598e+00, -1.3351e+00,  7.0643e-01, -2.5731e+00,
        -3.5037e-01, -2.9229e+00, -2.0655e+00, -1.5431e+00, -5.2938e-01,
        -4.6820e+00, -2.2568e+00, -1.0939e+01, -8.6506e+00, -1.9331e+01,
         6.5042e-03, -1.3553e+01,  4.1564e-01, -9.9249e+00, -6.5921e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6236, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.9756,  -1.7315,   0.4437,  -5.8181,  -2.3930,  -4.3198,  -7.0026,
         -2.0789,   0.5786,  -7.0774,  -3.9280,  -4.7709, -12.8132,  -6.3520,
         -6.7861,  -1.6771,  -7.4262,  -2.4336,  -8.2555,  -3.0048],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5411, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.7301,  -5.7979, -37.9511,  -5.3963,  -4.5580,  -3.2927,  -2.0470,
         -3.3891,   0.9032,  -1.5875,  -2.8629,  -3.3213,  -1.2884,  -4.8116,
         -0.6815,   0.9225,  -4.4258,  -1.8907,  -3.5587,  -1.0760],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8920, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.7054,  -8.6515,  -1.4995,  -4.0881,  -2.6256,  -2.1700,   1.0014,
         -2.6485,  -5.2446,  -9.3020, -11.4190,  -4.6278,  -7.9768,  -7.5738,
         -1.2174,  -6.0163,  -3.8357, -18.6607,  -2.7349,  -2.7062],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2851, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.8543,  -3.7036,  -2.9544,   0.9133,  -2.0891,  -2.3382,  -1.4919,
         -2.4448,  -1.1657,   0.9922,  -1.8972,  -2.6207, -21.8128,  -1.8242,
        -11.0884,  -1.9119,  -0.9251,   1.2936,  -3.3647,  -4.5002],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2394, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.6355,  -5.0542,  -2.1897,  -3.4209, -13.3378,  -4.1561, -10.1196,
         -5.0904,  -1.2544,  -2.7005,  -5.0891,  -3.1993,  -0.8651,  -3.4096,
         -2.0983,  -2.0824,  -5.5293,  -2.2127,  -1.1573,  -2.8763],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8239, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.7571, -6.0902, -4.7494, -3.3127, -8.7513, -2.6712, -1.3134, -4.6877,
        -1.9272,  1.5500, -7.7421, -0.8456, -4.6899, -1.2827, -5.4312, -2.7566,
         1.1270, -3.0378, -1.9336, -3.1965], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1250, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4138,  1.4003, -1.1021, -1.8570, -7.5507, -4.9861, -2.2621,  0.4126,
        -3.8676, -1.6344, -1.5650, -2.7054, -6.6050,  0.5348, -3.3289, -2.2355,
        -2.9894, -0.6082, -6.5032,  0.2974], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4285, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.6410,  -1.9003,  -2.5708,  -9.0036,  -0.9239,  -5.5792,  -7.7690,
         -1.4280,  -2.1970,  -1.3349,  -3.4320,   2.9940,  -2.8789,  -3.1099,
        -12.8739,  -4.9001,  -8.2950,  -5.0962,  -7.0043,  -0.5162],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0730, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.0398, -29.5117, -11.0579, -10.0518,  -5.1328,  -7.5651,  -3.9984,
         -0.7713,  -3.7516,  -2.0462,  -2.9008,  -0.6291,  -5.7654,  -3.7008,
         -3.8371,  -3.9463,   0.0353,  -5.5034,  -0.1504,   0.7864],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2269, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.0177, -5.3861, -5.2741, -6.6213, -4.3564, -4.3085,  1.5893, -7.0232,
        -2.0255, -4.5723, -0.5111, -5.3612, -2.1392, -1.0458, -7.4698, -2.1224,
        -1.7886, -0.8374, -3.0785,  2.8764], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4237, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3076, -3.5298, -2.0893, -3.9143, -6.3961, -7.0015, -2.0437, -2.2848,
        -3.2784, -3.0936, -0.8960, -2.7267, -2.8348, -1.2679, -7.6902, -3.5863,
        -2.8425, -3.5197, -3.5785, -0.9676], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2425, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.7647,  -2.1816,  -1.2585,   0.8953,  -3.2381,  -3.0751, -11.9065,
         -4.7941, -19.5894,  -1.9974,  -9.8377,   1.6268,  -2.3266,  -5.2454,
         -7.4200, -15.4942,  -2.2925,  -8.6841,  -1.0454,  -7.8996],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3264, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8863, -0.5833, -3.7493, -1.4426, -5.2029,  0.1851, -0.7140, -3.8456,
        -1.9050, -2.4848, -3.7922, -1.5930,  1.9894, -3.5487, -0.8811, -1.5243,
        -1.6717, -3.0871,  2.5808, -3.1459], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0151, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.0113, -16.2757,  -1.5274,  -4.9843, -13.9228,  -2.4521,  -7.6162,
         -3.3260,  -3.9579,   0.9679,  -8.7609,  -3.1752,  -3.2962,  -4.4774,
         -2.6172,   1.0316,  -3.5212,  -2.7790,  -4.2482,  -0.8806],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6915, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7287, -8.6286, -2.1296, -2.3189,  0.7840, -2.2529, -2.2099, -2.6900,
        -5.0783, -2.3997,  2.1069, -3.2838, -0.8305, -3.6945, -2.9371, -4.0616,
        -2.0929,  0.3879, -4.8876, -1.2283], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5587, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.0508,  -5.8530,  -0.9363,  -1.2487,  -2.9820,  -1.7638,  -6.5713,
         -4.0359, -23.9291,  -2.7687, -10.2969,   1.5206,  -0.7197,  -6.3936,
         -9.1605, -11.9306,  -3.0817,  -7.1876,  -3.4468,  -2.8327],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1835, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.7953,  -5.8104,  -0.4584,  -2.2098,  -2.6345,  -2.2453,  -3.2566,
         -0.3707,  -7.2853,  -0.4916,  -3.4107,  -4.1362,  -2.1493,  -2.5129,
         -2.7604,  -3.3009,   0.4292, -12.6739,  -0.9890,  -2.3225],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9692, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5219, -6.0846, -0.5947, -2.9301, -4.1261, -2.4718,  2.1763, -3.2500,
        -2.5641, -3.8254, -0.3698, -5.2870, -2.7237, -0.4786, -1.4526, -2.0988,
        -3.2171, -4.8677, -2.8729,  0.6138], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3973, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.3258,  -0.0788,  -5.9390,  -1.7619,   0.2032,  -3.7294,  -1.6325,
        -11.2272,  -6.8823,  -2.6961,  -6.8229,  -3.7292,  -2.1629, -16.6745,
         -4.9865,  -4.9008,  -3.9061,  -6.6284,  -4.7041,  -0.7830],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6684, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5934, -2.9839, -3.6665, -1.0391, -2.7768, -0.3667, -3.0100, -1.4090,
        -4.9057, -2.3101,  0.0184, -3.6868, -1.6503, -2.8675, -0.7574, -5.7396,
        -1.0731,  0.2965, -3.3426, -3.2736], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4069, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.5557,  -6.1317, -11.3326,  -1.4054,  -3.1099,  -3.6066,  -5.1997,
          0.0351,  -4.8751,  -2.9576,   0.3449,  -2.1758,  -8.3854,  -5.8527,
         -4.1726,  -3.6987,  -1.1942,  -1.1885,  -3.1540,  -2.9258],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7771, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.8039,  -7.0643,  -5.0653,  -3.1808,  -0.4369,  -3.8927,  -4.0268,
         -3.0469,  -0.7630,  -5.4824,   0.2013,  -0.7872,  -2.8608,  -6.6452,
        -11.7340,  -7.0817,  -5.8723,  -9.1515,  -1.7017,  -5.4885],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6942, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1346,   0.5086,  -0.7877,  -2.5448,  -1.8392,  -4.6000,  -1.1804,
         -0.4429,  -2.6236,  -1.4304,  -4.1656,  -4.1396,  -0.2870,   2.4788,
         -2.9385,  -2.6047, -20.5204,  -6.0989,  -7.8772,  -6.3435],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5286, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.0058,  -0.9984,  -1.7161,   2.3943,  -4.2054,  -1.4593,  -3.2179,
         -3.6922,  -2.6443,   0.6345,  -3.2102,  -2.2109,  -8.3045, -10.4866,
         -7.9120,  -6.5183,  -2.7140,  -4.7145,  -3.1423,  -3.9219],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7523, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1828,  3.1658, -5.2486, -3.7213, -1.9396, -2.9398, -5.6010, -2.4787,
        -0.8263, -5.6389, -0.5532, -2.0517, -1.6403, -1.4673, -0.1748, -4.7957,
        -2.0651, -1.3538, -1.5272, -1.2779], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3159, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4840, -2.0434, -0.4616, -3.2754, -1.3796, -1.5885, -3.7169, -1.7408,
         1.7010, -2.0593, -2.3515, -1.4951, -1.5641, -3.3029,  1.9393, -2.8662,
        -1.2448, -1.7249, -6.7163, -2.1290], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0252, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.6115,  -0.2918,  -1.8670,  -0.6004,  -1.8043,  -2.6274,  -1.2333,
          0.6642,  -6.1521,  -2.3114, -16.3298,  -6.9207,  -2.8087,  -6.5873,
         -3.9356,  -2.4531, -10.1239,  -4.2016,  -8.7232,  -2.0178],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1468, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7444, -4.9289, -1.0326,  1.0624, -2.2770,  0.0655, -2.2255, -4.6840,
        -1.7676,  1.3071, -3.7400, -1.2982, -2.2664, -6.3272, -0.4064,  1.1922,
        -6.5051, -1.9133, -0.9161, -3.5448], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1475, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1555,  0.9868, -3.9221, -2.8362, -3.7955, -0.6201, -5.8686, -1.1815,
        -0.5620, -6.3976, -1.4353, -2.6917, -3.2144, -2.8153,  1.6766, -6.0781,
        -2.4056, -4.9197, -8.2380, -5.9449], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0709, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-13.7889,  -4.7369,  -8.6392,  -4.2906,  -2.5807,  -2.3803, -22.5641,
         -6.1841,  -4.9926, -12.8365, -31.0745,  -2.7416,  -3.6011,  -2.4418,
         -3.6156,  -5.6790,  -3.8477,  -1.9900,  -4.9837,  -4.2183],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.3594, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8046, -2.6946, -3.3616, -2.7621, -5.3489, -4.8693, -0.1182, -3.4853,
        -4.0094, -2.6833, -1.8925, -4.8868, -0.0987, -0.4817, -2.5738, -0.7701,
        -1.6063, -2.3357, -0.8771,  0.0917], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3784, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.0243,  -1.6237,  -5.3591,  -1.9894,  -0.8142,  -3.9877,  -2.4890,
        -39.6836,  -3.5062,  -7.8234,  -1.3403,  -2.0538,   1.7376,  -4.3132,
         -2.1653,  -2.3819,  -1.0709,  -5.1146,  -0.3517,  -0.3407],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3348, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.4295, -11.2691,   0.5265, -11.4395,  -2.5205,  -3.9144,  -5.3056,
         -1.9430,   1.8528, -11.4415,  -3.9084, -10.3118,  -3.9430, -14.5679,
         -2.8919,  -4.3635,   1.7077,  -3.1912,  -2.7149,  -2.3683],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7719, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.2979, -19.4226,  -3.2857,  -8.4198,  -6.0601,  -2.9341,   1.5269,
         -8.3273,  -4.5416,  -7.1328,  -0.9668,  -2.5234,  -3.5554,  -1.8285,
         -4.7032,  -9.3602,  -1.6199,  -2.0871,  -3.5217,  -5.5761],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8319, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2212, -5.2668, -1.7958,  0.5828, -1.7021, -2.7088, -0.8118, -3.2502,
        -0.9610,  0.6174, -2.1085, -0.7177, -2.1003, -1.8089, -5.4703, -1.2244,
        -0.4098, -1.7885, -1.3168, -0.6653], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8064, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.2274, -11.7272,   1.0265,  -4.3283,  -3.1500,  -5.3157,  -5.5746,
         -5.3019,  -2.1087,   0.3530,  -1.7262,  -2.0588,  -1.8750,  -3.0471,
         -1.2419,   2.6888,  -3.0166,  -1.6879,  -2.2585,  -6.1643],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8371, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5938, -11.9363,   2.0759,  -1.9356,  -7.4253, -12.8811, -11.6171,
         -5.8767,  -8.8550,  -4.9829,  -6.8284,   1.0112,  -4.3281,  -2.7375,
         -1.7022,  -7.0587,  -2.3488,  -1.5713,  -7.0852,  -8.8781],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3778, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5534, -1.5410, -1.0893, -5.2508, -1.6597,  1.0350, -2.7807, -1.1315,
        -1.9235, -3.7017, -3.8979,  1.4597, -3.9948, -1.9498, -1.5142, -6.5810,
        -1.5265, -2.4209, -4.2488, -3.9197], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4095, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9517, -1.5827,  2.0831, -3.3163, -2.6212, -2.3694, -1.6447, -6.4382,
        -1.5710, -1.9811, -4.2430, -1.8127, -2.8702,  0.0452, -5.6773, -0.6177,
        -1.8939, -3.6956, -1.4092, -4.2962], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4432, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8572, -2.6036, -5.7451, -1.6216,  2.5886, -3.7176, -1.5457, -2.1439,
        -1.9026, -5.3621, -0.1826, -3.1105, -3.0629, -3.6299, -3.0685, -1.9500,
        -7.3975, -1.5328, -0.6479, -2.8241], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5659, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.2518, -5.5809, -1.8885, -6.4459, -6.0001, -4.0711, -6.4737, -3.6561,
        -7.2682,  1.4534, -6.5605, -2.3676, -4.0549, -6.0837, -2.5540,  0.9814,
        -3.7928, -2.1182, -6.3664, -1.3682], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6982, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.4378, -2.8115, -1.5858, -0.9530, -1.9873, -2.3189, -0.7624,  1.7699,
        -3.8196, -2.2966, -4.8310, -4.1250, -2.4034,  1.3605, -2.3223, -3.6323,
        -2.8287, -3.5430, -3.2618, -0.8511], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0820, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.0194,  0.5125, -2.8821, -0.3181, -1.0652, -1.6396, -4.0255,  2.7241,
        -1.6263, -0.8062, -2.9030, -1.0970, -4.9377, -0.7755,  0.3766, -3.2068,
        -2.0424, -4.4863, -1.1256, -5.6297], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7487, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5778, -1.6685, -2.2276, -2.9370, -6.2345, -1.0788, -3.3783, -3.1049,
        -1.8112, -1.7960, -4.8411, -1.5982, -2.5018, -3.7061, -0.2513, -2.2299,
        -4.5253, -0.1739,  1.5056, -3.1234], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6130, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9023, -2.4484,  2.0292, -3.5277, -1.2315, -2.6971, -2.4151, -4.2667,
         1.4482, -6.5755, -1.4769, -2.3448, -2.1384, -4.1878, -1.4905,  0.6595,
        -3.1029, -1.9529, -2.4045, -6.0418], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5034, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3695, -7.9594, -1.5911, -0.8061, -1.3656, -1.8408, -4.9701, -2.1035,
         0.2513, -6.3247, -1.5747, -6.4404, -5.2563, -1.5206,  2.1177, -1.8566,
        -3.6965, -2.7238, -2.7344, -3.2949], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0530, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1748, -3.4951, -1.6268, -4.4701, -2.3814, -0.5149, -1.2587, -1.7703,
        -1.6043, -5.2619, -1.4378, -6.3486, -3.3897, -3.3237, -9.0312, -5.8861,
        -9.6266, -4.9146, -1.4707, -4.3209], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7154, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3876, -2.1686, -0.8298,  1.9752, -3.6458, -1.9939, -3.7256, -1.0719,
        -4.3228, -3.1228, -0.0086, -2.7428, -1.5246, -2.4588, -3.6138, -1.9413,
        -1.5520, -5.4205, -1.0910, -4.3946], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3021, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7072, -1.7256, -1.5254,  1.9505, -4.1312, -0.9255, -3.4696, -0.9471,
        -7.0907, -1.4228, -0.8758, -3.7750, -1.3513, -4.0103, -0.0981, -4.9141,
        -3.4980, -1.1555, -3.4763, -1.4867], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3318, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.1014,  -3.7614,  -1.3895,  -2.8533,  -0.5068,  -1.3650,   2.4523,
         -3.4491,  -6.6404,  -4.4657,  -2.3001,  -3.7551,  -3.0226,   0.7806,
        -14.7659,  -1.8317, -14.5924, -13.6975,  -5.0533, -17.1322],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8624, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9392, -6.3390, -3.3178, -6.3190, -0.9090, -2.9462, -3.5813, -4.1633,
        -2.9746, -7.1283, -3.2426, -0.2396, -4.2419, -3.4663, -4.0141, -5.7095,
        -3.3448,  0.2296, -3.1542, -3.0675], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6434, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6861, -6.3922, -4.8051, -5.0260, -1.2861,  1.3149, -8.6930, -1.6481,
        -3.5852, -3.7059, -4.3417,  0.0651, -2.9017, -2.9698, -2.4500, -1.0198,
        -6.2259, -4.9973,  0.7518, -2.6155], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3609, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.2764,  -1.8382,  -1.1815,  -3.4422,  -3.0123,  -4.3790,   1.6415,
         -2.0417,  -1.5454,  -1.4182,  -5.5480,  -0.7161,   1.1212, -10.2135,
         -2.6870,  -3.6360,  -4.3531,  -2.7900,  -3.8180,  -6.5205],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7051, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7025,  -1.6515,  -3.1209,  -0.4821,  -7.3422,   1.4650,  -4.5522,
         -0.4399,  -4.3102,  -7.2447,  -3.2833, -13.2983,  -1.8157,  -3.0215,
        -12.9579,  -2.2135,  -2.4416,  -1.5417,  -7.7164,   2.8439],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7914, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.7553,  -3.5560,  -3.3532,  -0.6795, -17.0583,  -4.4726, -11.4773,
         -3.6307,  -4.3270,  -4.4422,   0.7948,  -4.4183,  -2.4546,  -4.7032,
         -1.9965,  -4.4074,  -0.3583,   0.5404,  -3.9800,  -0.8066],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9271, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.5050,  -5.8286,  -1.0988,  -1.7689,  -4.8668,  -2.6555,  -0.1531,
         -3.6381,  -3.0544, -25.6389,  -3.3352,  -7.8177,  -2.8063,  -5.5154,
         -1.2945,  -0.7472,  -3.4984,  -1.7778,  -3.6976,  -1.7713],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9730, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.2455, -3.3418, -0.7431, -2.7573, -1.0616, -5.7620, -1.3094, -0.0466,
        -3.3494, -2.0267, -3.4439, -2.1194, -7.3036, -0.5224, -4.2074, -2.4839,
        -1.9407, -2.6412, -2.4588, -5.7920], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5533, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7673, -4.2769, -0.0547, -3.2583, -2.4161, -1.1769, -2.4471, -4.5236,
         2.2146, -2.5174, -1.7069, -5.8073, -3.3491, -4.3484, -0.4424, -1.8770,
        -2.3858, -1.1306, -2.7738, -3.6594], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5352, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0811e+00, -2.0599e+00, -5.8749e+00, -1.2683e+01, -6.6350e-01,
        -5.4208e+00, -1.2029e+00, -1.0532e+00, -3.0257e+00, -2.2706e+00,
         2.3767e+00, -2.3692e+00, -4.6946e-01, -2.8063e+00, -3.5758e+00,
        -4.1983e+00, -2.1662e+00, -2.2505e-02, -2.3499e+01, -1.3747e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8220, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1826, -5.0951, -1.6712,  1.3514, -3.9487, -1.9491, -4.1432, -0.9544,
        -6.6781,  0.7614, -2.7147, -4.2756, -1.9078, -3.2206, -6.8570, -1.8986,
         1.4453, -2.9878, -1.5944, -4.5455], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7533, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8616,  2.0606, -2.8323, -1.9018, -2.9250, -2.8725, -7.9508, -1.2294,
         0.0183, -3.5893, -2.7207, -2.2897, -0.6547, -5.4728, -0.5045, -1.7261,
        -4.7209, -2.5405, -1.3408, -2.5682], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4311, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1309,  -4.2180,  -8.5217,  -5.6565,  -1.0699,  -8.0667,  -3.5859,
        -10.1122,  -5.2676,  -5.8451,  -7.4115,  -2.0399,  -2.9331,  -0.4693,
         -4.6620,  -2.1501,  -3.3319,  -4.7244,  -3.0447,  -0.0129],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3127, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9616, -2.9658,  0.0679, -6.5001, -2.5954, -0.0374, -3.6416, -1.7890,
        -2.4457, -3.3249, -5.1710, -3.8825, -1.1677, -6.7396, -3.5955, -3.7655,
        -2.6103, -0.1844,  2.0450, -3.3186], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6292, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.1508,  -2.5537,  -1.7872,  -1.3488,  -3.5647,  -1.4872,   1.2488,
         -2.1328,  -2.5835, -13.3329,  -4.4960,  -8.4157,  -3.6581,  -4.7903,
          1.7027, -24.3052,  -5.0887,  -4.2095,  -9.9917,  -4.2342],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6939, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5988, -4.9694, -0.6785, -0.2857, -2.4552, -1.8019, -3.2200, -1.3298,
        -3.8019, -0.3557,  0.9733, -2.8838, -0.6871, -2.5091, -3.3402, -2.3095,
         2.7109, -2.4266, -2.2698, -1.2402], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7239, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5288,  2.0008, -4.5797, -3.1390, -2.9418, -4.3082, -2.3649,  1.3489,
        -3.0720, -0.7796, -0.8481, -3.2479, -0.4509,  1.8077, -1.8028, -0.9332,
        -1.1183, -5.2096, -2.1840,  0.8500], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6251, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.7398,  -5.4916,   0.2327,  -3.7994,  -2.8915,  -2.6364,  -7.2413,
         -7.0558,  -5.4494,  -3.7559,  -3.5537, -10.4554,  -7.5849,  -6.2671,
         -6.4855,  -0.9392,  -3.0018,   1.9569,  -2.9615,  -2.2334],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0677, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.6552,  -1.3011,  -1.1541,  -6.3618,  -3.0222,  -1.6997,   1.5937,
         -2.5483,  -2.0871, -19.1675,  -2.5303,  -7.7214,  -2.5255,  -1.7968,
          1.7976,  -2.9683,  -1.9306,  -2.7634,  -2.9578,  -5.9280],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2864, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6149, -5.4277, -2.3764, -0.4149, -5.8913, -1.0254, -2.2253, -4.3348,
        -0.5282,  1.9087, -4.4487, -2.0542, -2.1623, -1.7187, -4.2573, -4.1827,
        -0.0748, -2.8053, -0.5406, -3.2671], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4721, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5062, -1.7440, -5.7438, -4.3014, -4.0316, -5.8947, -5.0650, -1.1049,
        -6.7124, -2.2892, -4.8019, -5.5645, -2.6906,  1.3353, -3.1351, -3.4849,
        -2.2466, -4.5598, -1.4197,  1.4353], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3763, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.3874,  -1.8158,  -4.9412,  -2.3979, -10.3574,  -6.3609,  -7.4415,
         -1.3755,  -3.8717, -15.2644,  -4.4685,  -1.7240,  -2.3870,  -0.5137,
         -6.2717,  -2.0078,  -0.6231,  -5.7297,  -1.9495,  -4.6218],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6755, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8189, -2.5461, -6.7455, -0.8831, -1.6327, -3.4323, -0.3410, -2.0971,
        -4.8542, -1.1116,  2.5904, -2.8196, -4.3847, -1.0587, -3.1254, -2.4912,
         1.9256, -4.7653, -3.3425, -1.7499], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2842, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.3782,  -6.3959,  -0.2604,  -2.2355,  -3.5357,  -2.1039,   2.5868,
         -1.4438,  -2.7478, -12.7654,  -3.3557,  -3.7990,  -6.8708,  -2.0492,
         -2.0514,  -0.4130,  -1.8175,  -2.0175,  -4.0913,  -2.3586],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1552, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2069, -3.9647,  0.9068, -3.5232, -3.5885, -2.2641, -0.3237, -5.9583,
        -0.0422, -1.2014, -3.1400, -1.5604, -0.5960, -0.7653, -2.0146,  1.3599,
        -4.0775, -2.7142, -3.8263, -5.4000], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3450, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7637, -2.3828, -3.8394, -3.7859, -1.0058, -6.5288, -0.8830, -3.1064,
        -5.5147, -2.3307, -0.4243, -2.5034, -1.0138, -3.3805, -1.5713, -5.3324,
        -0.9249, -1.1623, -2.6463, -0.4034], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8252, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9826, -4.4655, -6.2428, -1.7423, -3.6414,  3.0493, -4.2440, -1.4388,
        -0.9692, -2.9393, -2.5802,  1.4294, -9.7534, -2.0931, -3.0776, -2.8910,
        -3.5015, -1.3026, -4.2814, -2.7969], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9732, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3701, -3.9606, -0.7624, -4.0517, -0.0700, -1.2876, -3.1827, -0.9685,
        -2.1480, -0.9174, -5.8788, -0.1593, -1.0096, -2.7337, -1.1347, -2.9239,
        -1.1023, -6.0704, -1.3778, -1.9917], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1551, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.7535, -3.8152, -2.8118, -7.8029, -1.7093,  1.3877, -3.9647, -2.6048,
        -4.8304, -2.5105, -4.1875, -2.6354, -0.7921, -3.6144, -4.1169, -2.9264,
        -3.5156, -1.2113, -0.3718, -3.2253], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8006, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2219, -0.3068, -0.2484, -2.6327, -1.3377, -3.5387, -3.0455, -1.9895,
         1.1561, -2.5265, -1.2864, -2.7457, -1.2544, -5.3183, -0.2106,  1.2594,
        -2.4572, -1.0203, -2.1200, -2.1403], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8993, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2973e+00, -4.5718e+00, -6.7305e-01, -2.2728e+00, -7.9163e+00,
        -6.4047e+00, -3.5851e+00, -6.0683e+00, -6.0436e+00, -1.3968e+00,
        -2.1159e+00, -4.0716e+00, -3.5201e+00, -4.8773e+00, -2.5995e+00,
        -1.6786e+00,  1.3141e+00, -8.8724e+00,  2.2185e-02, -2.2528e+01],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4578, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3936, -2.6022, -3.4550, -5.7924, -4.2383, -0.4811, -3.3520, -3.0958,
        -2.7765, -4.3198, -2.8417,  0.8308, -1.3032, -1.6760, -1.4444, -4.5810,
        -0.6498,  1.4754, -3.0611, -2.1785], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3968, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4776,  1.7504, -8.8228, -2.6719, -3.2512, -1.2487, -2.9030, -0.1329,
        -3.9958, -2.3154, -2.4274, -3.6293, -2.3593,  0.4229, -3.3208, -1.4944,
        -5.3556, -3.4972, -4.4042, -2.3291], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8232, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.8963,  -2.4742,  -2.2838,  -1.9497,  -1.4788,  -0.8209,  -5.8903,
         -0.8038,   1.7032,  -4.6895,  -1.1356,  -1.7673,  -4.8640,  -0.9055,
          1.8241,  -3.8234,  -2.4677, -29.6284,  -4.8740, -14.4286],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4327, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0725, -7.3628, -5.2345, -3.9755, -3.1972, -4.6537, -5.2350, -6.7406,
        -8.1426, -6.8084, -2.2002, -3.2783, -2.2606, -4.8258, -3.9830, -5.3985,
        -0.0164, -3.9644, -2.7813, -3.3712], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3251, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2563, -1.8505, -4.2830, -0.9767, -4.8806, -0.3204,  0.8117, -4.5102,
        -2.9521, -1.2100, -1.3296, -5.2522, -2.5858, -2.8314, -3.1017, -2.6443,
        -2.8879, -5.4052, -1.6414,  1.3492], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4379, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9975,  -3.0292,  -2.0331,  -4.9212,  -3.3760, -11.5268,  -4.4334,
         -0.4698,  -6.8764,  -2.7970,  -4.0102,  -2.7021,  -4.2204,  -1.7119,
          0.5568,  -2.1058,  -1.6233,  -4.8911,  -2.1826,  -0.8012],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3076, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.8920,  -2.2034,  -1.2829,  -1.1517,  -4.8012,  -1.4926,   2.1097,
         -3.8054,  -3.1378, -15.2169,  -7.8554,  -5.1277,  -9.6712,  -6.2711,
         -4.3393, -15.4409,  -2.5794, -31.0038,  -4.9232,  -2.2866],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1686, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-20.1455,  -2.4863,  -9.8027,  -1.3616,  -4.3636,   2.1627,  -1.9555,
         -5.6489, -16.3366,  -5.3619, -13.6245,  -2.9900,  -3.4430,   1.6917,
         -3.0900,  -6.0983,  -2.2401,  -4.3417,  -4.6347,  -7.6795],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5875, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0175, -25.9417,  -0.5911,  -6.7326,  -1.1806,  -3.9030,  -4.8976,
         -2.5458,   0.5107,  -8.6752,  -2.2014,  -0.6658,  -3.8085,  -1.2720,
          0.6127,  -4.4094,  -2.8759,  -1.3920,  -2.1086,  -9.1014],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2598, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9908, -2.3424, -4.8805, -0.3143, -1.6857, -3.5633, -2.1968, -2.1992,
        -3.9232, -1.3059,  1.0053, -5.6420, -1.7695, -3.0551, -0.8155, -1.7261,
         1.8538, -3.3554, -1.4572, -3.0600], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2212, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.2572,  -3.6848,  -5.3386,  -4.0737,  -2.8766,  -7.2819,  -4.8933,
         -9.3679,  -7.1743, -11.6164,  -4.6736,  -4.7411,  -3.6768,  -5.3455,
         -5.2218,  -5.3047,  -1.6890,  -2.7606,  -2.5013,  -3.5559],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1517, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8779,  1.8446, -4.2432, -2.9613, -4.0799, -2.7112, -3.5139,  0.6258,
        -2.8902, -1.8969, -3.9631, -2.4298, -5.4748, -1.8273,  0.5568, -7.5496,
        -1.6530, -1.0298, -2.1668, -2.5844], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4913, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-19.7524,  -4.5527,  -6.1933,  -2.3342, -11.9559,  -2.7371,  -0.4900,
         -4.5976,  -1.9280,  -2.7332,  -2.5161,  -7.2167,  -0.3908,  -0.8300,
         -2.6518,  -3.1335, -17.7614,  -5.1568, -10.1265,  -0.4673],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3763, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4232,  -2.4570,  -9.6576,   0.3034,  -6.6748,  -1.1113,  -1.8315,
         -2.3951,  -2.6872,   3.2143, -14.9864,  -2.7651, -21.5612,  -2.6846,
         -8.4995,  -0.4468,  -4.9658,   1.7292,  -4.4894,  -3.6586],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4024, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9610,  2.0886, -6.5560, -1.2159, -3.3333, -1.0056, -5.4401, -1.2043,
        -2.7618, -2.5461, -0.6407, -3.8124,  0.1829, -6.2233, -0.6797, -3.0865,
        -4.3750, -2.3992, -4.0149, -2.2512], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6618, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4890, -1.1149, -2.7853, -3.1825, -6.8025, -3.1113, -1.2791, -2.7214,
        -0.7533, -0.7719, -4.3887, -0.5056,  1.7288, -2.8930, -1.5218, -5.0353,
        -1.9297, -4.8151, -4.7599, -3.4943], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6313, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4602,  -3.9851,  -8.3562,  -3.1218, -16.9698,  -7.1769,  -8.5866,
         -7.2450,  -6.3995,  -2.0286,  -4.2156,   1.4908, -19.8765,  -1.9173,
         -2.1112,  -7.5892,  -6.6576,  -1.7435,  -5.5412,  -1.9768],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8234, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.0790, -3.2660, -1.9716, -3.0662, -4.5420, -1.4275,  0.4872, -4.8398,
        -0.4910, -2.4286, -1.6314, -3.9079,  1.5555, -4.5661, -3.7578, -2.4360,
        -1.6229, -2.8599,  1.0744, -4.4898], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2133, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2245, -7.7364, -0.9490, -7.1196, -0.4945, -3.5486, -9.2301, -1.9833,
        -3.2349, -4.5934, -3.1527,  1.6680, -4.9435, -2.0179, -7.1690, -4.4986,
        -4.2675, -6.3702, -1.9106, -3.6671], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9222, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3326, -2.8138, -1.0583, -5.4737, -0.2928, -1.0259, -3.1605, -1.3220,
        -2.0283, -2.6311, -1.4667,  2.3943, -2.3893, -2.4172, -2.1511, -1.3180,
        -5.3503, -1.9386, -0.3434, -5.3487], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1234, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5906,  0.7661, -5.6542, -3.7724, -3.4286, -1.7758, -6.9168, -3.8153,
        -0.2509, -2.4246, -2.2777, -2.1712, -1.6994, -3.3856,  2.0060, -3.8374,
        -1.4885, -2.5564, -0.3438, -4.5208], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4569, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.5968, -4.0772, -1.9920, -1.9202, -2.7012, -0.4548,  1.8871, -2.9205,
        -3.9937, -3.5566, -3.6722, -0.7625,  0.3919, -2.8082, -0.8859, -2.9220,
         0.4032, -6.5964,  0.5345, -0.8583], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8154, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.6877, -2.4699, -1.0919, -4.6036,  0.4056, -5.1023, -0.4253, -2.4853,
        -3.4576, -0.6167, -4.1236, -3.7132, -3.9093,  2.1486, -1.4151, -0.4752,
        -2.1709, -0.3378, -2.4573,  0.8875], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8050, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3130, -4.3820, -3.1402, -1.7705, -2.6943, -3.7525, -3.1500, -9.0902,
        -7.8463, -1.4040, -5.0886, -3.4917, -2.2426, -5.4055, -8.3585, -2.7153,
        -2.2912, -3.5979, -2.0589, -2.6992], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1246, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.7069, -15.8656,  -5.8893,  -8.2235,  -5.9748,  -6.5142,  -1.9728,
         -8.3232,   1.9207,  -4.9450,  -2.3457,  -4.1326,  -5.5112,  -3.7623,
          0.2469,  -2.6338,  -3.1030,  -2.3665,  -2.1877,  -6.0392],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5665, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4213, -0.9060, -3.5704,  1.1898, -7.2636, -3.2672, -3.6468, -0.4482,
        -5.2420, -1.2907, -0.0453, -3.1453, -0.8763, -2.1854, -2.3879, -1.6987,
         2.6707, -2.5823, -0.7217, -3.5484], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0194, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6401, -1.0894, -4.1926, -2.4804, -0.6932, -5.4455, -2.5559, -2.4652,
        -5.6174, -3.4111, -3.0579, -3.1180, -2.1312,  0.5357, -3.4927, -3.9110,
        -2.8020, -4.4092, -0.2821,  1.6487], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5305, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6353, -2.7666, -4.4709, -0.5590, -4.2619, -0.8475,  1.0949, -1.2699,
        -1.3934, -0.8600, -1.8251, -0.9233,  2.7117, -2.7954, -0.8665, -2.4034,
        -1.6587, -4.6710, -2.1770, -0.6938], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6636, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.3450, -2.1145, -1.6801, -1.8376, -6.2101, -1.9067,  0.8733, -4.3347,
        -1.8308, -1.3989, -4.1377, -1.0324,  2.3238, -5.1776, -2.0468, -4.6066,
        -3.5695, -1.7763,  2.1820, -4.5099], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0223, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.0595,   2.8610,  -4.2468,  -0.9416,  -3.6908,  -0.1670,  -6.1999,
         -0.8537,  -0.6968,  -3.5323,  -1.8266,  -3.7966,  -0.1339, -11.4295,
         -3.3058,  -1.8885,  -3.5951,  -2.2078,  -4.4908,  -5.3203],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8261, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1957,  1.7131, -4.8033, -2.7101, -4.7727, -1.4829, -2.8564, -0.6451,
         0.1875, -3.2680, -2.9529, -2.9922, -6.1764, -1.7473,  0.8597, -2.3865,
        -0.7529, -3.4554, -2.1797, -4.6699], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3644, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7729, -0.4104, -2.0965, -2.1316, -1.7789, -2.5905, -2.3795, -6.3103,
        -0.3166, -2.1453, -3.4355, -2.0697, -3.3703, -5.6781, -2.9128, -5.5979,
        -8.1921, -5.1269, -1.2120, -1.7416], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2135, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.8913,  -2.2272,  -1.7402,  -3.0070,  -2.6978,   1.5952,  -4.0821,
         -4.0866, -21.6079,  -2.4524,  -6.9182,  -0.7738,  -2.6340,   2.5523,
         -4.1269,  -3.8628,  -1.2593,  -2.8821,  -6.9571,  -1.9014],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6480, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.2904,  -3.7732,  -6.6733,  -2.0902,  -1.5422,  -5.0605,  -3.6883,
        -16.1442,  -4.5310,  -8.4314,  -2.8157,  -2.3830,   1.5591,  -2.4727,
         -1.7952,  -3.2072,  -5.3956,  -1.3300,   1.7824,  -4.9115],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0097, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6865, -2.6813,  0.4824, -7.7919, -0.8218, -3.1732, -4.8485, -3.2260,
         0.0676, -3.3789, -3.6155, -4.4438, -2.0145, -3.2936, -2.8872, -5.8468,
        -2.9179, -8.1604, -2.7632, -6.1299], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6066, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.6238,  -2.1320,  -3.8013,  -2.3693,  -0.9357,  -2.1341,  -2.0355,
          0.9967,  -4.2474,  -1.4699, -14.4544,  -6.1351,  -3.5089,  -6.6041,
         -3.0921,  -3.2714,   2.0255,  -3.8172,  -1.6125,  -2.6981],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1960, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.8144, -5.5585, -6.4467, -6.4258, -5.5080, -1.4551,  1.6555, -7.7775,
        -3.0192, -1.1224, -0.6721, -5.4575, -1.6221, -0.2650, -9.4391, -5.4644,
        -4.4046, -4.5514, -7.7601, -2.0599], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3084, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.6789,  -3.1343,  -1.3878,  -1.0598,  -3.5691,  -2.0069,   2.2359,
         -2.6266,  -2.5707,  -1.8128,  -3.4931,  -2.4022,   1.2231,  -3.1681,
         -6.0501, -11.7293,  -6.5204,  -5.7169,  -6.2218,  -1.8348],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1762, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.9544,  -2.7111,   2.3359, -18.9571,  -3.5156,  -2.5042,  -3.6189,
         -6.1835,   1.7869,  -5.9120,  -2.7622,  -4.2047,  -1.9205,  -6.8034,
         -1.2503,  -1.7535,  -3.4049,  -1.6317,  -3.1111,  -1.1660],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4621, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4764, -1.2947, -5.9476, -1.0543, -0.5188, -3.7124, -2.2099, -4.5358,
        -2.2635, -6.1622,  0.4340, -4.8087, -4.5046, -2.3025, -2.0880, -1.8921,
        -6.0444, -0.3042, -2.0856, -3.1157], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7944, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.8690,  -3.9974,  -4.1819,  -2.6255,  -3.2812,  -3.5122,  -2.7694,
        -19.3571,  -5.4365,  -9.2374,  -2.8662,  -5.0342,  -1.6992,  -0.6116,
         -4.1313,  -2.1274,  -1.6813,  -3.0411,  -2.0611,   2.8457],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7838, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4923, -4.0885, -2.8919,  1.5187, -1.9308, -4.5670, -3.3438, -1.7097,
        -6.0149, -1.9605, -0.1319, -3.6930, -1.8589, -2.0913, -2.9440, -2.1835,
         1.8330, -7.6715, -1.9193, -3.2128], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5177, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.5950,  -2.6316,  -0.1746,  -3.5868,  -5.9855,  -3.5978,  -0.8645,
         -4.1394,  -1.8014,  -2.5556,  -1.7586,  -5.5674,   0.1200,  -1.9550,
         -4.2337,  -3.8985, -21.8896,  -2.1619,  -7.4681,  -0.7523],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8249, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0385,  -1.5989,  -3.3635,  -2.9489,  -3.6280,  -4.9808,  -3.8450,
         -2.9215,  -6.2072,  -1.7044,  -0.0703,  -3.4299,  -3.0564,  -2.7711,
         -1.2093,  -1.5382,   1.5588,  -5.9437,  -2.9746, -17.4372],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6054, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5604, -2.4553, -0.3483, -4.7549, -2.1865, -2.4433, -1.7625, -6.1601,
        -3.8103, -1.1444, -3.7502, -0.6562, -0.7425, -3.8203, -1.7103,  1.6923,
        -2.7351, -2.0753, -5.2051, -3.3131], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5971, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.0682,  -7.0013,  -1.8543,  -5.5233,  -4.1557,  -6.1387,  -1.4983,
         -1.6272,  -3.6318,  -4.1276, -13.9005,  -6.3038,  -2.5584,  -5.9066,
         -1.1019,  -2.8538,   1.4042, -18.2872,  -0.8413,  -3.3732],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4106, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.5030,  -1.0550,   1.0508, -12.4516,  -3.1770,  -4.1527,  -4.5959,
         -6.2957,  -0.7864,  -2.2859,  -4.7611,  -0.9917,  -1.6732,  -4.5786,
         -1.3188,   1.7385,  -3.1019,  -0.1739,  -2.6899,  -1.6523],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9228, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1183, -3.0429,  0.7026, -4.7125, -7.3266, -2.5517, -7.0471, -1.1986,
         1.4345, -1.5055, -2.0920, -0.7971, -2.0437, -5.7451, -0.7812,  0.2988,
        -2.5976, -0.8589, -2.1469, -2.1066], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3618, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.9374,  -1.1941,  -1.8112,  -1.0044,  -3.7770,   2.4251,  -3.0965,
         -3.3017, -13.1445,  -6.8268,  -3.0628,  -6.0279,  -2.7969,  -2.0892,
          1.5446,  -1.7645,  -0.8519,  -1.4190,  -3.9118,  -0.8024],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8425, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1613, -4.2151, -6.2292, -2.9501, -1.7940, -7.8163, -2.3679, -1.5801,
        -1.5030, -5.3133, -2.1508,  1.5781, -3.1208, -1.0786, -2.9041, -3.0924,
        -1.4354,  0.5660, -2.9648, -0.5976], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7565, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4651, -3.1546, -5.8824, -5.4770, -1.7435, -1.5179, -4.7297, -1.9406,
        -2.5872, -7.6551, -1.9748,  0.8087, -5.9454, -1.7302, -4.3876, -3.7922,
        -2.5137,  0.1228, -4.4287, -3.6216], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4308, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5684, -1.4270, -7.5927,  2.6704, -2.9942, -3.4167, -5.5878, -3.7163,
        -4.1782, -1.8301, -2.7271, -4.5392, -6.5635, -5.1604, -4.6574, -2.4945,
        -5.1145,  0.3779, -4.2190, -1.9175], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4328, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5164,  -0.5809,  -1.7611,  -2.4327,  -0.9404,   0.6942,  -1.9968,
         -1.5809, -13.9020,  -3.8584,  -7.8171,  -1.3063,  -2.4362,   1.7707,
         -6.4492,  -3.2724,  -2.1122,  -7.0902,  -1.9333,   2.4991],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8511, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.2657,  -7.7283,  -7.3811,  -8.3977,  -7.1836,  -8.0777,  -7.2214,
         -7.3533,  -6.5416,  -7.3838,  -6.7332,  -7.2715,  -7.8208,  -7.6396,
         -6.9225,  -7.0941,  -8.4186,  -6.2080, -10.9816,  -7.1177],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.5371, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.8621,  -2.5282,  -1.5683,  -8.8774,   1.9331, -37.4380,  -1.9746,
         -4.5413,  -7.6978,  -2.7997,  -5.7032,  -7.0250,  -3.8661, -12.0444,
         -5.8134,  -3.5883,  -7.4536,  -2.3025,  -5.9147,   2.9771],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8544, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2251, -2.6095,  1.2333, -2.5561, -3.0826, -2.8234, -2.4885, -4.3027,
        -3.0101, -0.3052, -1.5490, -0.9400, -1.6674, -3.4819, -1.1176,  2.8850,
        -3.5487, -0.6726, -1.7635, -0.1574], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6591, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1467, -4.2689, -3.6671, -2.7514, -3.9391, -2.7923, -2.6248, -6.0848,
        -1.5786, -0.6350, -7.6586, -1.8542, -1.6292, -2.8821, -1.6571, -7.4918,
        -5.9790, -3.9523, -6.5164, -1.4469], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6778, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.0209, -4.3143, -1.7133, -2.6231, -6.3674, -2.5845, -5.0680, -3.2538,
        -2.4623, -3.9472, -3.1277, -0.7476,  1.4949, -3.9793, -1.2398, -3.9647,
        -5.1901, -3.0382, -0.0383, -3.3822], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7263, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2421, -6.6362, -7.1236, -1.2630, -6.0132,  1.3056, -3.8431, -1.9040,
        -1.7217, -2.7471, -2.9706,  0.9933, -3.8356, -0.6967, -2.5265, -1.8459,
        -5.0274, -0.9096,  0.2484, -2.3027], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7531, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3476, -8.4121, -4.9647, -4.2044, -6.7935, -5.7207, -2.2566, -1.8580,
        -5.8826, -1.2212, -1.6579, -3.6935, -0.3227, -2.2928, -2.4269, -0.8516,
         1.6466, -1.6025, -0.8921, -2.5851], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0170, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.3913,  -1.8647,  -4.5114,  -2.8307,   1.8204,  -4.5495,  -0.6345,
         -0.9544,  -2.3691,  -0.6462,   1.8159,  -1.0534,  -2.1582,  -2.1754,
         -2.4785,  -1.8852,   1.6097,  -2.3470,  -3.8135, -14.3183],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2368, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.0022, -4.9794, -1.2241, -2.6861, -3.2103, -0.9533,  1.8640, -4.0537,
        -1.1778, -0.4037, -2.7055, -1.9356,  1.5787, -3.5078, -1.2263, -3.1614,
        -1.0531, -5.2902, -3.2800, -0.3285], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8366, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1897, -4.3770, -5.7884, -2.9366, -3.8915, -6.5923, -4.8880, -4.8337,
        -5.9289, -4.7775, -5.7799, -2.1805, -1.1440, -5.5890, -4.7637, -6.3689,
        -8.8738, -3.6426, -7.1868, -1.5710], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9152, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.7807, -6.7512, -4.4176, -5.9866, -2.4471, -4.8097, -1.0627, -5.4087,
        -2.3744, -1.6369, -2.5565, -4.1782,  0.5516, -5.5962, -2.1835, -7.4545,
        -7.7324, -3.9942, -7.4744, -1.5327], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2913, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2337, -2.3972, -3.6154, -6.4739, -2.4072, -2.3389, -3.9238, -5.2411,
        -3.7069,  1.3381, -3.3754, -1.3692, -0.8867, -2.5984, -0.4212,  1.4113,
        -3.4387, -3.4902, -1.8583, -1.9386], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5983, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.6402,  -2.6614,  -2.7792,  -5.6944,  -0.5089,   0.5520,  -2.1998,
         -2.3112,  -0.7229,  -7.4064,  -1.3661,   2.3777,  -3.5432,  -9.0040,
        -14.1974,  -5.6428,  -5.4287,  -5.3998,  -0.9975,  -4.1983],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7386, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9667, -4.6552, -0.8440, -1.8678, -3.2514, -1.0745,  1.9698, -2.3229,
        -1.9015, -2.9347, -5.0833, -3.9748, -0.3068, -2.3249, -1.4968, -2.8827,
        -0.4822, -2.9036,  2.9122, -3.9569], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9674, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5398,  1.5802, -2.5391, -0.5749, -4.0777, -1.4012, -6.5742, -3.0871,
        -0.0121, -2.1722, -2.7541, -2.1613, -0.1971, -5.8947,  0.2155,  0.0660,
        -2.4621, -1.6250, -1.3968, -3.1641], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9886, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7570, -1.7209, -1.7968, -1.7311, -5.2164,  0.0361, -2.2033, -3.7108,
        -1.8639, -3.1559, -3.0971, -1.8996,  0.7458, -2.1755, -0.7249, -3.7618,
        -1.7573, -4.0293, -2.6006,  0.6495], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2885, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9240,  -3.0234, -13.6280,  -4.6237,  -0.2260,  -5.1459,  -2.9685,
         -5.5066,  -5.4799,  -7.8422,  -4.9661,  -6.1790,  -1.4643,  -5.4018,
         -0.5454,  -2.8737,  -0.8546,  -2.5537,  -5.4627,  -2.5562],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2113, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1413,  -3.0720,  -3.6914,  -3.7716,  -2.7149,  -2.5363,  -7.4470,
          0.4333,  -0.8120,  -9.8093,  -1.2488,  -2.0411,  -5.9759,  -1.7728,
        -39.2899,  -3.8400,  -6.8123,  -3.4703, -16.5244,  -6.8035],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2171, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.3723, -0.2373, -3.7672, -0.7502, -1.3750, -1.7617, -4.8540,  1.9421,
        -3.2449, -1.0488, -0.7368, -1.7338, -6.0440, -0.2942,  0.3742, -2.1858,
        -0.8311, -2.1329, -0.2524, -1.7592], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5533, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.7272, -5.5914, -1.1776, -3.5858, -5.3016, -1.9682, -0.2245, -8.2418,
        -6.0106, -8.6132, -2.8418, -5.9616, -2.4224, -2.6103, -5.3538, -2.8209,
        -2.3579, -8.9915, -2.5307,  0.1395], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6869, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.2848,  -3.6077,  -2.7177,  -3.9292,  -0.7973,  -5.3990,  -0.5506,
         -0.0399,  -3.4070,  -0.3580,  -1.0224,  -0.5832,  -4.6450,   3.2271,
         -1.9062,  -0.9772,  -6.5949,  -5.6243, -14.9385,  -2.8340],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8495, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1731,  -4.9076,  -9.2130,  -4.8423,  -6.3665,  -2.3916,  -1.5906,
          1.6188,  -6.0614,  -1.4086,  -1.1012,  -3.4710,  -2.6268,  -0.5835,
         -2.9078,  -0.0236, -22.4122,  -2.0346,  -9.3466,  -0.8139],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1328, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4285, -3.8015, -5.6018, -3.7427, -0.0802, -2.0133, -1.9146, -1.2267,
        -4.6606, -1.2273,  1.8310, -5.1313, -1.2076, -4.1071, -2.4284, -2.2449,
         1.8632, -1.8978, -0.3368, -3.2171], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1287, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2163, -6.2981, -0.1749,  0.6480, -2.4204, -0.3139, -1.4928, -1.9871,
        -0.3765,  1.3373, -1.9788, -1.0225, -3.2867, -1.0343, -8.7051, -1.5668,
        -1.9266, -3.8209, -1.6349, -2.0543], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0163, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6800, -2.3242, -3.4701, -1.9372,  2.5060, -8.6363, -2.5183, -3.0397,
        -1.0512, -4.6830, -0.8574,  2.4536, -3.5231, -0.5653, -1.6844, -2.6558,
        -5.3754, -2.9031,  0.1236, -2.6743], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3248, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7208, -1.3836, -1.2539, -7.3301, -2.0066,  2.0358, -3.4939, -3.5690,
        -3.1874, -1.5373, -3.2472,  2.9926, -2.3775, -2.9794, -3.8422, -2.5799,
        -4.3316, -2.1589,  1.8400, -4.7626], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3447, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2822, -1.2773, -0.8468, -7.5313,  0.2870, -4.2553, -1.9729, -2.7572,
        -1.9021, -6.0915, -3.2582,  0.4295, -2.4142, -2.0535, -1.8566, -2.4864,
        -1.5899,  2.2671, -1.2678, -1.6995], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1280, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.5633,   1.8437, -15.5797,  -7.6936,  -4.6136,  -9.5516, -17.7835,
         -2.7709,  -5.4999,  -6.3415, -28.6649,  -8.7040,  -5.8729,  -8.2565,
         -6.3276,  -4.9737,  -3.6951,   2.3638, -15.2810,  -3.9075],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.7937, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.6806,  -4.1177,  -1.9010,  -5.4092,  -8.6432,  -5.7691,  -2.3823,
         -0.0596,  -6.3787,  -5.0972, -15.4269,  -4.1554,  -8.6088,  -1.5011,
         -0.8618,  -0.4866, -21.6357,  -2.2974,  -4.8959,  -6.9205],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4114, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8593, -2.0460, -4.9957, -0.9399, -1.7066, -3.1333, -1.2010, -2.0363,
        -0.9288, -7.3496, -0.0803, -1.8592, -6.4341, -1.0632, -2.2187, -0.4474,
        -4.2736,  3.1608, -2.6338, -2.5401], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2293, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.2003, -3.1511, -3.2168, -2.6946, -1.5425, -5.5531, -1.2625,  0.5528,
        -2.8001, -2.2237, -1.1046, -2.8720, -1.3548,  2.8281, -2.4594, -1.7443,
        -3.0426, -1.6656, -5.4261, -2.1778], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9355, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.8921,  -7.2378,  -0.9787,  -1.6715,  -2.4475,  -1.4820,  -4.0451,
         -2.6216,  -5.5578,  -0.2327,  -0.0809,  -5.6556,  -1.3005,  -7.3599,
         -6.0967,  -1.3930,  -6.1320,  -2.7655, -12.7244,  -7.3877],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0532, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.8361,  -2.1444,  -0.8973,  -2.2630,  -3.8719,  -0.5510,   0.7501,
         -2.3645,  -1.3589,  -1.4572,  -0.7299,  -2.3106,   2.6989,  -3.8991,
         -3.3556,  -3.5253, -12.7020,  -3.8302,  -9.4821,  -7.5914],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8025, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0920, -0.2402, -0.1057, -2.4036, -0.4496, -3.2039, -0.7992, -6.5861,
        -0.8872,  0.3143, -2.9950, -2.8463, -5.0973, -1.0786, -5.8313,  0.1036,
        -2.3627, -6.4792, -1.0742, -1.7483], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4931, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.2401, -9.8143, -7.4314, -3.6237, -4.5433, -6.7581, -3.3736, -7.0541,
        -6.6659, -4.5961, -5.1150,  0.6965, -4.8152, -2.8768, -4.8414, -6.7180,
        -7.0331, -6.3816, -7.3222, -6.5889], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.7048, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.7375,  -1.5598,   2.9776,  -9.7402,  -3.0413,  -2.5790,  -1.5457,
         -4.5940,  -1.5977,  -2.6963,  -6.0076,  -3.7159,  -3.6970,  -3.1195,
         -4.4855,   1.1903,  -6.1655,  -9.2200, -13.6628, -12.1514],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3574, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.1509,   1.4578,  -3.6183,  -4.3736, -13.4623,  -3.0901,  -4.9154,
         -1.6554,  -2.2979,   3.0021,  -4.0880,  -2.7995, -16.9298,  -5.4518,
        -16.5065,  -1.0673, -11.0432,   2.9179,  -6.1341,  -7.8280],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9517, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.6133,  -4.0017,  -1.2459,  -5.6442,  -2.0676,  -2.9982,  -1.7177,
         -8.9375,  -4.3130, -18.7776,  -0.2703,  -5.4695,   2.0678,  -7.2253,
         -4.7349,  -8.8960, -12.3795,  -4.7958,  -7.1070,  -2.5362],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1332, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1932, -0.5849,  0.9212, -4.2333, -0.8982, -1.7821, -1.8295, -3.4790,
         1.4934, -2.2360, -2.8238, -4.7829, -1.0546, -4.7903, -0.1959,  0.7297,
        -4.4835, -0.9684, -1.9140, -1.8651], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9985, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4487,  2.3186, -3.7431, -1.3919, -2.6559, -0.2248, -5.7824,  0.2168,
         0.1652, -4.6046, -0.5907, -2.0433, -0.7143, -4.7412,  2.9606, -3.5815,
        -0.8553, -2.5924, -0.9857, -5.2744], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7784, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9769e+00, -1.9371e+00,  1.0313e-02, -6.4229e+00, -2.1996e-01,
        -1.0060e+00, -3.0304e+00, -3.9873e+00, -4.8189e+00, -1.0693e+01,
        -3.9312e+00, -6.1475e+00, -1.0191e+00, -3.0912e+00,  1.7388e+00,
        -5.4869e+00, -4.3753e+00, -3.9337e+00, -4.7127e+00, -1.9763e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4009, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3521, -1.7904, -2.8633, -8.1611, -2.8121, -1.9062, -2.5752, -0.5055,
        -1.3805, -3.8934, -1.1018,  1.8411, -3.1317, -1.9654, -4.3530, -2.6621,
        -5.7992, -0.1505, -2.3302, -2.3300], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4611, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.0435, -0.1581, -5.3868, -4.3497, -3.9334, -5.2901, -4.5032, -1.2258,
        -4.5498, -3.2023, -6.9535, -6.9680, -5.6620, -6.0417, -2.5683, -2.9134,
         1.3460, -2.2536, -3.1093, -4.1153], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0441, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.3963,  -1.5026, -19.5189,  -5.9933,  -7.0290,  -1.2666,  -4.8558,
          2.7518,  -5.9900,  -2.1317,  -6.0922,  -0.6767,  -4.3927,  -1.8755,
          0.2832,  -2.1460,  -3.1784,  -3.6244,  -0.8446,  -6.8167],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1148, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7558, -2.9751, -2.0930, -0.7773, -6.0744, -2.0420,  1.0277, -3.1768,
        -1.6415, -1.5271, -5.8162, -1.6828,  2.2216, -3.5292, -1.1231, -3.3529,
        -2.2096, -1.1778,  3.2342, -3.2478], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9859, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2454e+00, -5.4141e+00, -3.6621e+00, -2.4570e+00, -1.2558e+00,
        -5.4243e+00, -2.0349e-01,  8.3388e-01, -3.0673e+00, -1.2873e+00,
        -3.6946e+00, -2.9102e+00, -4.9420e+00,  1.5634e-03,  3.7765e-01,
        -2.6501e+00, -2.9706e-01, -2.0071e+00, -2.6022e+00, -1.0193e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1963, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3300,   1.7038,  -5.5464,  -2.9312, -21.4953,  -7.1714,  -8.5764,
         -8.0010,  -6.3695,  -1.4293,  -2.4353,   2.5469,  -5.8351,  -1.2616,
         -1.2123,  -3.4813,  -0.6783,   2.7045,  -3.6058,  -2.5615],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8983, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.9104,  -8.0877,  -2.0418,  -1.4223,  -4.5765,  -0.9765,  -2.3812,
         -1.3197,  -1.6778,   2.9507,  -2.6688,  -2.2031, -11.5661, -12.0722,
         -6.6353,  -6.2528,  -9.8482,  -2.6612,  -3.2843,   0.5773],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9029, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5487, -2.9948, -3.1317, -2.9795, -4.1072, -0.5595,  1.8444, -3.8427,
        -1.6535, -1.9351, -2.8751, -1.9493,  2.9731, -2.2208, -0.8103, -3.1428,
        -1.2705, -4.4133, -0.4076,  0.2807], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0372, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.5491, -22.7487,  -6.6250,  -7.8794,  -6.5647,  -2.9989,  -4.0689,
         -0.6321,   0.2286,  -6.1323,  -2.7087,  -2.2943,  -1.5333,  -5.2070,
          2.1636,  -5.6940,  -0.9215,  -4.5271,  -0.0587,  -4.0458],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2899, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1736,  -1.2391,  -1.6911,   2.2510,  -2.8038,  -1.3358,  -8.3589,
         -3.6752, -10.1086,  -0.5509,  -6.1606,   0.7396, -21.3584,  -3.9380,
         -3.4974,  -0.5153,  -5.4951,  -1.0577,   1.1647, -12.3290],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1067, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3883, -2.7745,  0.5484, -2.6273, -2.5022, -6.9182, -5.6249, -5.2580,
        -7.1668, -1.8696, -4.1721,  1.0759, -2.1045, -0.9157, -2.2029, -4.8785,
        -2.1267,  0.9752, -3.9744, -0.3796], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9142, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.1556,  -5.6940,  -7.5643,  -4.0256,  -6.0082,  -5.0398,  -4.2331,
        -11.0388,  -3.8996,  -6.5970,  -7.2731,  -3.5639,  -3.8849,  -5.6154,
         -6.8795,  -4.4096,  -6.1795,  -5.5288,  -2.3160,  -5.7177],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4812, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.5282,  -1.6217,  -1.3944,   0.9342,  -1.8102,  -2.8542,  -4.0764,
         -0.6688,  -4.6113,   0.1275,   0.1597,  -2.3940,  -3.1189,  -1.1333,
         -4.4646,  -2.0045,   0.7284,  -3.4670,  -3.0771, -17.5205],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6898, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1167, -4.1282, -0.3931,  1.0090, -2.5083, -4.6946, -3.3053, -0.3963,
        -6.1048, -1.7362,  0.1801, -1.5310, -2.8941, -1.8946, -4.0001, -0.5576,
         2.5201, -2.3066, -1.8622, -0.5733], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8147, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2653, -2.2575, -0.9678, -9.5194,  0.2282, -7.3940, -0.6581, -4.0216,
        -4.7970, -5.6202,  0.8273, -5.0609, -1.0780, -1.9879, -7.7400, -3.3574,
        -2.5167, -3.0824, -3.0123, -2.2023], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2742, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.6647, -4.7031, -1.9642, -2.7286, -2.3869, -5.0798, -0.4572,  0.6312,
        -4.7536, -0.6962, -1.3351, -1.7529, -1.0727,  1.9937, -5.5906, -2.1651,
        -0.6713, -5.9539, -2.6034, -0.6119], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0618, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2476e+00, -1.4254e+00, -3.5201e+00, -1.6450e+00,  1.1649e+00,
        -1.2886e+00, -1.9463e+00, -4.1465e+00,  2.8455e-02, -9.1136e+00,
        -7.4428e-04, -1.3777e+00, -6.4049e+00, -4.7555e+00, -4.6900e+00,
         8.3318e-03, -2.5506e+00, -1.3408e+00,  1.0169e+00, -6.2084e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.4722, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.5304,  -1.2476,   1.1817,  -2.7779,  -0.9257,  -3.1538,  -0.2264,
         -5.5603,  -1.1153,   0.6934,  -3.6646,  -0.7447,  -2.5240,  -1.5180,
         -4.9540,   0.9338, -12.9657,  -1.4654,  -1.5198,  -4.3522],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.4718, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0482, -2.9736, -6.9332, -2.1701, -0.7677, -7.8177, -3.3047, -2.9431,
        -3.5110, -5.6464, -2.1956, -0.1033, -1.7420, -0.2371, -2.0390, -3.8626,
        -3.5520,  0.4963, -2.6365, -4.8624], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1925, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.5557,  0.6041, -2.8624, -1.4739, -2.3434, -0.9972, -5.5569, -1.3640,
         0.0387, -3.0515, -2.0725, -1.5966, -0.9700, -7.4859, -0.2320, -0.6606,
        -5.2765, -1.7897, -1.1862, -5.5744], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2203, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-16.7617,  -5.3036,  -8.1164,  -2.2757,  -1.0970,   1.4017, -13.9251,
         -1.1493,  -6.6284,  -8.5046,  -1.6177,  -8.5929,  -2.9349,  -1.5594,
         -1.8516,  -4.6108,   0.1843,   1.3534,  -3.3409,  -1.8953],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3613, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1601,   0.5779,  -2.1140,  -0.7967,  -3.2065,  -1.1508,  -1.0969,
          2.1767,  -3.3183,  -1.1459, -19.1378,  -7.5286,  -3.8684,  -6.3265,
         -1.8035,  -4.2203,   0.7871,  -5.3881,  -2.2349,  -2.3348],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2145, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0604, -3.2067, -3.6099, -5.1195, -2.5805, -0.3900, -3.2640, -1.1301,
        -1.5596, -6.1447, -1.1911,  1.2594, -4.0193, -1.2435, -0.3365, -4.6527,
        -0.5618,  2.1560, -3.4861, -0.2890], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0715, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.6959,  -6.0545,  -5.6847,  -4.5555,  -3.8127,  -1.8051,  -4.7394,
         -3.1946,  -3.7471,  -7.6472,  -6.1639,  -2.9253,  -4.5581,  -4.5492,
        -11.1522,  -5.0832,  -6.5346,  -7.3105,  -2.4785,  -3.5038],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1598, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0049, -2.6098, -6.6436, -1.2695, -0.6504, -2.5825, -2.5843, -1.0292,
        -2.3600, -8.1281, -0.3638, -0.8214, -4.9787, -2.4216, -0.9634, -0.7605,
        -4.1547,  2.0905, -3.4210, -1.5512], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3104, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0712, -6.4492, -8.3613, -3.5768, -4.9941, -4.5279, -4.7773, -5.2110,
        -4.4933, -5.2642, -5.8659, -5.5097, -4.5282, -5.4801, -4.9480, -4.9964,
        -3.8730, -2.9263, -6.7354, -5.6250], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1607, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-17.0764,  -3.4117,  -7.4328,  -0.4548,  -1.6318,   2.9729,  -6.5821,
         -2.2841,  -1.7085,  -0.6508,  -4.5873,  -0.1410,  -0.9331,  -4.8061,
         -1.8092,  -6.4419,  -4.8299,  -6.1859,  -2.7892,  -2.0006],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6392, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.5127,  -3.4173,  -2.3075,  -4.4103,  -3.3373,   1.8820,  -2.7587,
         -3.0162,  -4.4884,  -0.9208,  -4.9609,  -7.2637,  -2.9769,  -1.3803,
         -8.8610,  -4.9655,  -2.9366,  -5.4008,  -3.7087,   0.8115],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7465, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.8277,  -2.6492,   1.9804,  -4.8253,  -0.7259,  -2.8599,  -2.4724,
         -6.5153,  -2.4269,  -0.6876,  -2.4999,  -1.6856,  -0.4001,  -2.2831,
         -5.7078,   2.4163,  -1.8855,  -1.0720, -10.1492,  -5.2909],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5784, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.5081,  -2.7459,  -1.9720, -10.7379,  -3.6324,  -1.3384,  -3.4982,
         -0.8455,   0.7833,  -4.8684,  -3.2928,  -1.8291,  -5.5624,  -0.9257,
          1.5804,  -3.8777,  -1.0984,  -2.5963,  -1.5905,  -4.9604],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9758, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.1640, -8.4299, -2.2850, -2.0741, -3.2848, -4.1083, -2.7605, -2.9736,
        -4.3146, -2.5738, -2.3344, -5.1582, -2.7172,  0.8916, -2.2382, -1.6040,
        -2.6740, -1.0861, -3.5841,  2.5913], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4777, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4065, -0.5133, -5.4801, -0.8233, -0.2592, -5.0581, -0.7127, -2.3547,
        -1.3151, -1.9398,  2.7872, -3.3738, -0.3324, -2.0927, -2.1597, -4.3904,
         0.0972, -1.4172, -2.7576, -1.2675], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8385, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.5124, -11.2768, -11.6104,  -4.1758, -20.6743,  -6.5050,  -4.7712,
         -3.0325,  -3.8123,  -4.3355,  -4.0239,  -4.4854,  -5.8587,  -8.3191,
        -10.9186,  -9.0217,  -7.4279,  -8.3634,  -8.2685,  -4.0159],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.6205, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.5776,  -2.4114,  -2.0393,  -4.4768,  -3.3633,  -3.1568,   0.5112,
         -0.9169,  -1.7062,  -2.5344,  -4.3732,  -2.1213,  -0.0221,  -3.7322,
         -0.8176, -15.0475,  -4.4774, -10.6458,  -5.7455,  -5.1164],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6885, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.8464,  -4.6351,  -1.2255,  -2.1262,  -5.2080,  -2.3232,   0.7710,
         -5.3029,  -3.0814, -21.9566,  -8.9633,  -9.4791,  -7.6233,  -5.1310,
         -2.2434,  -0.8020,  -6.1092,  -3.2018,  -2.1084,  -3.5998],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8098, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7916, -7.1231, -2.2464, -1.8987, -9.3395, -2.6612, -1.7823, -3.7760,
        -0.7674,  1.8072, -3.4678, -0.5158, -2.1068, -4.1463, -0.3276,  2.2299,
        -4.1693, -0.6607, -2.7727, -1.2134], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4865, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-12.4197,  -7.9437,  -4.9905,  -5.7009,  -1.4953,  -4.5659,  -1.0751,
         -0.1538,  -3.2560,  -0.7962,  -2.4567,  -3.6933,  -1.4640,   2.6874,
         -3.1460,  -1.3521,  -1.7904,  -2.8633,  -8.1611,  -2.8121],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3724, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9617, -2.1779, -5.6148,  0.4161, -5.6161, -0.8675, -1.7545, -1.0168,
        -3.1711,  1.0438, -2.5958, -6.6489, -3.5225, -2.3882, -3.5103, -4.0386,
        -0.0671, -0.6953, -6.7679, -1.8840], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5919, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.5266,   1.6079,  -2.7480,  -1.5627,  -2.5751,  -2.3455,  -3.6755,
          1.0658,  -3.7352,  -2.7760,  -1.4786,  -4.0328,  -1.2369,   2.6140,
         -3.5213,  -3.0767, -10.2169,  -3.3282,  -3.6410,  -7.0165],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6603, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9690, -0.9439, -4.3241, -2.5661, -4.6761, -1.2938, -4.9453, -1.5159,
         1.9962, -1.8584, -1.2455, -3.7373, -4.5763, -1.7367,  0.1614, -2.7407,
        -1.0575, -1.6165, -3.4796, -0.0197], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0572, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3130,  0.4859, -4.1033, -1.2081, -1.2931, -5.0369, -2.0150,  1.6178,
        -3.2605, -2.3036, -1.6404, -2.8202, -7.1676, -7.5205, -1.4770, -5.5616,
        -0.6505, -1.6705, -2.9842, -0.7177], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5320, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.7867, -5.9285,  0.0616, -0.3120, -1.9399, -0.1943, -2.0575, -1.7279,
        -6.2139, -0.7617,  0.0443, -2.1469, -0.7934, -2.1521, -0.6302, -4.7009,
        -0.0084, -0.8575, -2.6546, -1.4762], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7618, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3602e+00, -2.1985e+00, -1.4235e+00,  1.3795e+00, -1.8282e+00,
        -1.2189e+00, -1.5270e+00, -4.8291e+00, -1.5599e+00,  1.0176e-02,
        -3.2510e+00, -1.6706e+00, -1.6446e+01, -3.0246e+00, -7.0220e+00,
        -1.8071e+00, -1.2000e+00,  1.9973e+00, -2.4707e+00, -1.9624e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7706, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.9569,  -2.6241,  -2.4828,  -3.5488,  -3.8494,   1.9022,  -5.2657,
         -2.5051, -22.2225,  -5.8494,  -6.1973,  -6.2602, -12.6059, -15.3881,
        -10.6052,  -8.9056,  -4.2449,  -1.8881,  -2.0881, -33.7309],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.5158, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9445, -3.2475, -2.1534, -1.2736, -3.9462, -1.3964,  0.9331, -3.9296,
        -4.3243, -4.8020, -0.2590, -6.6438, -0.6106, -4.1961, -3.7500, -2.2040,
        -4.5111, -5.2669, -2.1581, -0.1418], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0413, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3166, -7.0051, -2.5061, -3.1576,  1.9295, -4.3053, -2.5436, -5.1107,
        -0.5116, -3.5596, -2.6359,  2.5442, -3.2248, -0.7751, -4.5281, -4.8483,
        -6.5777, -7.8457, -4.3723, -1.4246], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2888, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-19.7159, -10.5962,  -5.5199, -20.5166,  -3.6869,  -7.4951,   1.5378,
        -13.3738,  -3.8930,  -3.2985,  -1.8078,  -6.0078,  -0.9480,  -6.9384,
         -2.7880, -16.1296,  -7.4216,  -3.1604,  -5.3443,  -1.8689],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.9487, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5448, -1.1084, -4.9267,  0.2219, -0.5338, -5.0094, -1.3571, -2.2897,
        -2.0537, -1.3233,  1.8900, -1.9010, -0.5230, -2.1298, -3.6057, -1.2851,
         1.5709, -1.9115, -2.2351, -0.7800], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5918, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.1667,  -1.3861,   0.7929,  -4.5901,  -5.7123, -15.3728,  -3.3446,
        -12.1605,  -2.3920,  -4.5205,   2.3708,  -6.5241,  -2.4665,  -2.4292,
         -2.5728,  -4.9133,  -0.9324,  -0.1430,  -2.4046,  -2.7585],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7813, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1614, -3.4691, -4.4081, -3.9424,  1.1237, -4.4112, -2.4338, -1.8247,
        -3.7944, -4.9620, -1.0916,  0.4619, -2.8643, -1.2073, -0.6787, -2.6383,
        -1.8054,  2.9806, -3.7922, -2.8742], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2396, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.4868,   1.8545,  -3.7830,  -1.4527,  -2.9326,  -4.0682,  -2.5260,
          0.2366,  -5.0341,  -1.4016,  -1.1320,  -4.6539,  -1.5345,   0.7612,
         -3.3821,  -2.4500, -10.6661,  -6.6208,  -9.3725,  -6.0907],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2368, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3877,  -0.8489,   0.9321,  -1.9077,  -1.5329, -13.5834,  -3.7071,
         -7.7485,  -1.2157,  -2.3860,   1.6479,  -6.5555,  -3.2748,  -1.9905,
         -7.2691,  -1.9016,   2.6294,  -4.7228,  -4.3107,  -2.6821],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1408, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1866,  0.3300, -1.8529, -2.8106, -7.3046, -7.6537, -7.6614, -4.6342,
        -5.7120, -2.8962, -2.4376, -4.4613, -4.2654, -2.5295, -1.9848, -2.2465,
        -1.6728,  2.4062, -4.8908, -2.3785], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4921, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.0090,  -2.5734,  -6.0475,  -2.6118,  -2.0261,  -1.7732,  -0.8135,
         -2.2416,  -3.1837,  -5.7431,  -2.3551,   0.4323,  -2.0863,  -1.4550,
         -1.3397,  -8.0799,  -2.5057,   0.2457,  -1.9972,  -0.2860],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8225, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3019,   1.7839,  -3.4455,  -1.1240,  -2.3456,  -3.6606,  -1.2723,
          0.9188,  -2.1279,  -1.7422,  -2.1201,  -2.7591, -10.8345,  -2.9343,
         -1.7160,  -4.9809,  -0.3701,  -1.0430,  -3.3036,  -5.8087],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5594, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.7957,  -2.7030,  -2.8856,  -5.3829,  -3.6156,   1.5148,  -5.1264,
         -3.5408,  -2.2225,  -2.8464,  -7.0369,  -5.8502,  -3.4427,  -3.0083,
         -3.8385,  -1.1628,  -0.5261,  -3.3501,  -0.4802,   1.2609],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2520, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.3442,  -5.1650, -13.4561,  -3.1609,  -8.0053,  -2.9290,  -3.3336,
          2.8399,   0.0149,  -3.6457,  -9.2337, -11.1916,  -1.5768,  -7.1862,
         -2.5876,  -1.0470,  -9.9353,  -1.5793,  -1.4434,  -4.1128],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5539, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.6299, -4.8715, -3.8158,  0.5837, -5.5853, -2.7012, -1.4125, -2.4980,
        -9.1025, -4.5642, -3.9762, -8.0536, -6.8209,  0.1638, -1.5311, -2.3941,
         3.1050, -3.4293, -2.1400, -3.8293], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1751, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.5017,  -5.5821,  -8.6868,  -1.8584,   0.6291,  -3.6999,  -2.6129,
         -1.1629,  -1.6852,   0.0471,   2.0963,  -3.8153,  -2.2054, -14.1455,
         -4.2438,  -5.9924,  -6.1463,  -1.2002,  -4.0262,   1.8965],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3948, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.9421,  -6.0421,  -2.3978,  -8.5098,   0.8737,  -2.7757,  -0.2793,
         -3.3499,  -2.3517,  -2.0528,   0.1336,  -3.2727,  -3.1491,  -9.6960,
        -11.2405,  -4.1532, -10.0082,  -4.6407,  -3.2088,   2.1952],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2434, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.1443,  -2.5982,  -4.3701,  -3.3066, -15.8616,  -7.2884,  -9.0886,
         -1.0314,  -3.9787,  -0.7294,  -5.4641,  -4.6525,  -3.5852,  -3.2056,
         -5.2604,  -2.3700,   0.9550,  -1.7349,  -1.1602,  -2.0808],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2478, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.9317,  -2.3827,  -1.2038,  -2.6416,  -3.6794,   2.5110,  -6.7641,
         -1.3706,  -2.0283,  -4.5397,  -2.6271,   0.5631,  -3.6068,  -1.5393,
        -18.6834,  -2.4684,  -7.4753,  -0.7817,  -6.6177,   2.7416],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3763, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.5151,  -1.9758,  -5.3038,  -0.5618,  -3.4177,  -3.4869,  -2.8965,
         -4.6929,  -4.7784,  -0.2509,  -7.4857,  -1.9970,  -8.8678,  -7.7500,
         -4.1487,  -7.3332,  -1.8595,  -4.4293,   1.0054,  -5.3297],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3538, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.6421, -9.4888, -3.5850, -1.3855, -2.7529, -5.2930,  1.0514, -7.5677,
        -3.9471, -4.8007, -2.3670, -5.6315, -2.7920,  0.5023, -2.6033, -1.1957,
        -1.3619, -4.7001, -1.9913,  2.6865], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7791, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.0303,  -2.7804,  -4.0630,  -0.6375,   2.3036,  -2.6096,  -2.4795,
         -6.7492, -10.4011,  -2.9595,  -6.7460,  -1.8300,  -2.5526,  -7.4476,
         -2.0970,  -2.1793,  -1.6468,  -8.8263,  -3.0603,  -2.0924],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4412, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8260,  1.9518, -4.1481, -0.9107, -1.8180, -4.6061, -2.8289,  0.6234,
        -3.0443, -0.2376, -3.2588, -3.1732, -0.2535,  1.9889, -2.5873, -0.7746,
        -2.4057, -4.3779, -2.7732,  1.5489], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5955, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0512, -6.4219, -2.1575, -3.6830, -2.5443, -2.4933, -2.0422, -2.0907,
        -3.0962, -2.4669,  1.0374, -4.7810, -0.6636, -4.5686, -0.7766, -4.2329,
        -0.4087,  1.1668, -3.0173, -2.1195], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4206, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-12.6806,  -7.5170,  -6.2678,  -5.2429,  -2.8527,  -3.1072,   1.3297,
         -4.9642,  -2.8572,  -2.9746, -16.1319,  -6.7179, -13.2878,  -2.8699,
         -7.7473,  -4.0975,  -5.7679,  -5.4885,  -6.9567,  -5.6667],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0933, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9046, -3.2895, -0.0664, -1.9887, -3.9917, -3.1325, -3.3277, -1.2753,
         2.0235, -2.8319, -1.1645, -1.9118, -1.1886, -6.7913,  0.2013, -1.7353,
        -5.3314, -0.8837, -3.1878, -4.6070], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4192, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.2390,  -5.4654,  -1.9630,  -2.0458,  -3.8690,  -1.6980, -13.2540,
         -6.8498, -13.8337,  -6.0496,  -2.6198,  -5.3052,  -0.6913,   0.7336,
         -2.2002,  -2.1411,  -2.8707,  -5.6404,  -2.0648,   0.6538],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8468, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.2270, -1.2273, -3.3366,  2.4244, -2.7336, -1.3259, -2.7348, -4.8950,
        -6.4419,  0.7156, -3.1452, -1.4507, -2.2095, -2.6259, -7.1303, -0.8888,
         0.2083, -3.8045, -2.1012, -0.4316], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5681, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8965, -4.8298, -3.4902, -4.9300, -7.5503, -5.9079, -5.9907, -7.7991,
        -4.9474, -1.9800, -5.1905, -2.9285, -3.1731, -4.6639, -1.4226, -3.6397,
        -7.2741, -6.0626, -8.2492, -5.2231], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9575, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1350, -4.8551,  0.0407,  2.3462, -2.7800, -1.1743, -2.1303, -2.8600,
        -1.2724,  2.1628, -1.9924, -1.6133, -4.0017, -1.2459, -5.6442, -2.0676,
        -2.9982, -1.7177, -8.9375, -4.3130], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3094, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9361, -6.2227, -7.2703, -5.2173, -3.9303,  2.0970, -8.8282, -2.0305,
        -1.8121, -4.0721, -3.8857,  1.2122, -6.3962, -3.1525, -2.8468, -1.1467,
        -6.9239, -1.9479,  0.4268, -2.2061], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5045, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.5775,  -2.8541,  -1.5434,  -3.7660,  -0.0951, -11.8783,  -8.1089,
        -13.2212,  -4.4282,  -7.9626,  -1.2352, -12.8843,  -1.0845,  -5.8517,
         -3.6553,  -1.4296,  -1.7633,  -2.9048,  -1.4792,   1.5708],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4576, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.6255,  1.1537, -5.6606, -2.2491, -2.0072, -4.1203, -2.3618,  1.8932,
        -5.4617, -1.7631, -3.5903, -0.5359, -5.3145, -1.1714,  0.4851, -4.2521,
        -1.2532, -4.3138, -2.7788, -4.8033], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4365, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.8645, -7.3202, -7.0022, -2.4522, -2.7176, -1.8690, -0.5412, -2.1078,
        -1.5754, -1.4152, -5.5529, -1.6662,  1.4368, -2.9757, -2.4157, -7.7855,
        -5.1326, -4.9771, -6.4857, -0.7830], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1237, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5786, -2.7009, -5.2648, -4.6272, -5.3864, -3.6242, -8.4934, -3.7616,
        -6.6732, -2.2051, -4.4249, -2.0794, -4.3621, -3.2922, -4.6061, -8.0576,
        -2.2112, -6.4869, -4.7136, -1.9538], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5252, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.9901,   0.6949,  -4.6137,  -2.5096,  -1.4554,  -3.8451,  -2.5333,
          0.1756,  -3.9209,  -0.6945, -13.3060,  -5.2882,  -2.5215,  -2.4327,
         -6.0617,  -3.9300,  -2.8425,  -2.5570,  -1.2816,   1.2777],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9818, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3018, -2.1826, -1.8158,  0.5614, -4.2423, -1.4571, -3.7662, -0.0987,
        -5.2885, -0.9219,  0.0160, -2.2446, -0.9688, -1.3074, -2.9111, -0.9894,
         2.8321, -3.6607, -2.8552, -3.8855], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8744, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1473, -1.3077, -0.4943, -3.0096, -0.9105, -1.3406, -3.6407, -5.9338,
        -0.5093,  0.1356, -2.1358, -0.4292, -4.8821, -1.9787, -4.4698, -1.4680,
         0.6987, -5.4301, -1.9431, -2.4354], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3816, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6378, -3.7861, -2.1763, -1.9919, -7.1901, -0.4214, -0.6951, -1.5709,
        -2.7892, -1.4885, -4.2162, -0.6259,  2.0632, -3.2678, -0.8943, -2.9156,
        -2.7279, -4.5464, -1.9697,  0.1639], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1342, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3201, -1.9622, -0.8199, -5.2503, -0.4350,  0.6221, -4.0415, -0.5953,
        -2.3314, -3.8911, -0.9678,  2.1775, -2.6840, -2.9580, -1.8714, -2.7882,
        -1.4729,  3.2212, -2.9906, -1.5951], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6477, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4951, -1.4720, -3.4146, -0.6595, -4.0158, -1.3159,  1.2639, -0.9571,
        -1.0999, -0.6260, -4.7290, -0.5160,  2.7699, -4.3151, -1.3817, -1.4203,
        -2.3120, -4.6675,  2.9982, -4.8068], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8086, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7650,  -3.8157,  -6.2533,  -0.5263,  -3.3711,  -2.7330, -21.0067,
         -2.1258,  -6.9849,  -1.0432,  -1.8958,   2.4687,  -4.6529,  -1.4585,
         -2.5663,  -0.5512,  -5.3417,   0.6473,  -0.6579,  -2.6557],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3644, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.7829,  -2.6235,  -6.4495,  -1.1545,  -4.9751,  -3.2804,  -6.1467,
         -7.1904,  -3.6477,  -6.4663,  -4.1765,  -4.4127,  -3.1352,  -6.3656,
         -4.7790,  -8.0742, -15.1395,  -6.5062,  -7.9739,  -3.6646],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6472, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.8246,   2.1602,  -1.5860,  -2.2820, -12.5570,  -3.0519,  -8.4157,
         -1.4498,  -3.7340,  -0.1423,  -6.4912,  -2.4775,  -2.6077,  -0.8275,
         -3.4678,  -0.2645,   2.2918,  -4.3675,  -1.4089,  -2.5774],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7541, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.2198, -2.8766, -0.9947, -3.3794, -0.8098, -4.1238,  0.0590, -0.7907,
        -5.4003, -1.0648, -2.0510, -3.5681, -1.5944,  0.9115, -6.0340, -2.0237,
        -4.8656, -3.0625, -4.5286, -2.3049], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4361, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8010, -1.4010, -2.7576, -0.8694, -2.0582,  1.4477, -2.2337, -3.0142,
        -1.3275, -3.3369, -2.7744,  1.7346, -1.5723, -3.4656, -3.1612, -2.2500,
        -5.4044, -2.6680,  1.6284, -2.8471], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9566, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.5589,  -2.6559,  -0.3144,  -8.3698,  -0.3410,  -2.0745,  -4.1049,
         -1.4186,  -2.7406,  -2.5223,  -3.0419,   1.0837,  -1.5934,  -2.6352,
        -16.8311,  -2.8658,  -7.1057,  -5.0280,  -4.7231,   2.1588],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3341, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.7241,  -5.9325,  -3.9791,  -6.3565,  -5.0046,  -1.8434,   2.3911,
         -8.1181,  -1.6058,  -1.4672,  -4.9188,  -0.8985,   1.1307,  -3.4057,
         -0.2728,  -2.4263,  -2.2154,  -2.3775,  -0.4855,  -2.4301],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0470, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.2733, -3.9355, -4.0275, -5.8246, -3.0890,  1.1737, -3.1453, -3.1735,
        -4.3386, -2.6625, -1.9762,  1.4865, -4.2459, -1.3474, -1.8827, -3.4518,
        -1.8654,  0.9409, -1.7957, -1.8057], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7120, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6033, -2.8522, -0.0812, -6.7786, -4.6151, -3.5435, -1.8421, -6.6482,
        -0.1114, -0.7199, -2.8033,  0.0420, -2.1407, -3.3771, -0.5388,  0.8837,
        -2.7835, -1.0185, -4.5509, -1.4909], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3787, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.5442,  -9.5438,  -5.9786,  -6.4913,  -1.1371,  -2.4575,   2.9855,
         -3.2949,  -1.7842,  -2.8440,  -2.8429,  -3.7483,   1.1733,  -4.0209,
         -1.8208, -11.7921,  -4.7069,  -4.3697,  -5.5298,  -7.0640],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3406, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.6916,   1.3629,  -7.0417,  -5.4390, -16.9456,  -4.8357,  -7.4563,
         -6.2047,  -9.8050,  -7.3999,  -2.1846,  -3.5751,  -1.2516,  -1.6048,
         -2.7158,  -1.2207,  -2.8789,  -2.6192,   0.0596,  -6.0226],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4735, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.0319,  -2.6072,  -3.9075,  -4.6613,  -5.2153,   1.4264,  -5.3991,
         -0.7486,  -2.2531,  -3.7439,  -6.7614,  -1.7002,  -1.4977,  -2.4600,
         -2.8807,  -1.0825,  -4.3311,  -0.6624,  -0.2889,  -3.6251],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1716, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8301, -2.0291,  2.3378, -3.4305, -1.1802, -4.0045, -0.1838, -5.0290,
        -2.8275,  0.7223, -5.1941, -2.3401, -1.1278, -1.7049, -2.5038,  2.7205,
        -2.9534, -2.7377, -3.5429, -4.5050], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0672, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.2156,   1.1836,  -4.5518,  -1.7150,  -4.7006,  -5.7510,  -1.7245,
         -0.1201,  -7.4795,  -6.7548, -20.5288,  -6.0768,  -6.1741,  -4.9745,
         -7.5447,  -4.8281,  -2.4171,  -1.0157, -14.8243,  -7.8245],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6019, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.5698, -3.1159, -7.3946,  0.1693, -1.0857, -3.1549, -1.6958, -2.8935,
        -1.0957, -5.3858, -0.3079,  0.1235, -4.0016, -2.7007, -1.4522, -3.5202,
        -2.0923,  1.0880, -3.4276, -2.1427], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2328, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.2494,  0.2841, -3.3884,  0.0559, -2.0903, -2.1120, -0.7059,  1.6508,
        -3.5892, -1.6124, -1.8552, -0.9902, -5.3871, -0.0430, -0.6655, -1.9071,
        -0.8530, -0.0828, -4.8972, -1.6364], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5037, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4306, -4.6698, -2.2720, -3.7869, -2.4125, -3.0337, -1.8217, -4.8723,
        -4.9743,  0.3370, -0.9649, -2.0474, -1.2485, -2.4952, -1.6222,  2.7906,
        -1.1939, -1.8755, -2.9992, -0.8594], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1226, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.3636,  -4.8906,  -6.7602, -30.6252,  -5.2090,  -8.0174,  -7.3300,
         -7.5619,  -3.1007,  -5.2641,  -3.5785,   0.4518,  -5.2499,  -3.5652,
         -2.4655,  -4.4901,  -0.4079,   2.5742,  -2.4460,  -2.3449],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0822, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.3077, -9.7195, -1.9691, -2.6612, -2.4486, -3.5976, -0.6427,  1.7932,
        -2.6769, -0.8245, -2.3956, -0.6797, -5.9387,  0.0151,  0.1013, -3.7299,
        -0.0170, -2.7224, -4.0463, -0.7575], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0805, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.8704,  -7.4090,  -3.1950,  -6.5189,  -1.8430,  -2.9606,   0.2569,
         -5.9698,  -2.3920,  -1.6242,  -5.2961,  -3.3564,   1.5013,  -6.9751,
         -3.5687, -16.8480,  -4.9836,  -5.5734,  -2.9119,  -2.6410],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6589, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7997, -2.0647, -2.0354, -4.9487, -0.0148,  1.9450, -2.3473, -0.3844,
        -1.7745, -0.8284, -4.2093,  1.6858, -5.0833, -3.1234, -3.5034, -1.3905,
        -3.5961, -0.6711,  0.8988, -2.2938], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8770, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2671,  -1.4793,  -0.4664,  -4.4791,  -2.2186,   1.9532,  -1.3297,
         -0.3815,  -8.0382,  -4.1052, -13.5437,  -3.5097, -10.3552,   1.4265,
         -1.7145,  -3.5823, -13.0012,  -4.1190,  -4.2173,  -5.1112],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0770, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-18.1862,  -1.8927,  -4.7875,  -6.5580,  -3.5485, -23.6031,  -5.4219,
         -1.4949, -11.9131,  -4.1953, -11.6611,  -3.1304,  -1.2545,  -9.9576,
         -3.5006,  -2.5376,  -2.8024,  -5.3349,  -0.4037,   0.6883],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0748, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.2172, -1.4800, -2.3214, -2.5033, -4.1261, -1.4681,  0.8671, -3.4040,
        -0.9122, -3.9803, -4.1491, -2.6086,  1.8397, -5.4633, -1.7113, -1.8248,
        -2.0730, -1.3766,  2.4978, -2.5904], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7785, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0074, -4.1751, -8.4979, -6.3533, -9.9246, -2.3126, -3.3723, -5.0876,
        -4.3229, -7.9703, -5.7715, -3.2814, -5.9711, -4.0386, -4.6627, -3.4933,
        -3.9462, -3.0182, -4.8902, -5.1585], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1628, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3392, -0.7793,  1.8234, -2.8924, -2.7863, -2.1978, -5.2121, -1.8872,
         1.7674, -5.8029, -2.1432, -3.3416, -3.7248, -4.3645,  0.3594, -5.3898,
        -7.4655,  0.0173, -7.9684, -1.7497], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8538, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4933, -3.1158, -3.9927, -5.0333,  0.3059, -2.9335, -2.3917, -1.3001,
        -2.6984, -2.8788,  3.1279, -1.7376, -2.3956, -4.8784, -4.4162, -4.8573,
        -7.3409, -6.0241, -8.1489, -2.3142], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2758, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3574, -1.4272, -4.1398, -3.0602, -3.2794,  0.8818, -2.6589, -0.9567,
        -2.4015, -2.2016, -4.6466, -0.8455, -2.5792, -3.1884, -0.5805, -0.7405,
        -2.3300, -0.4505,  1.3575, -2.2332], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9419, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2350, -7.8255, -0.0739, -2.0804, -4.6328, -3.2003, -1.5864, -6.2205,
        -2.1802,  0.2684, -2.8325, -1.2198, -1.3059, -3.6452, -1.0632,  1.3982,
        -2.4716, -2.4121, -3.8714, -1.3087], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3749, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.8320,  -1.1263, -16.1518,  -2.7219,  -0.5710,  -3.6126,  -0.7640,
         -1.0390,  -3.6240,  -0.5150,   2.2464,  -3.7466,  -1.4722,  -4.9396,
         -1.1868,  -4.3410,   0.0651,   0.9266,  -2.3931,  -0.1106],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3955, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8297, -4.9651, -1.8814, -0.1835, -2.3135, -1.2562, -1.2370, -4.6088,
        -0.6836, -0.2155, -5.4083, -0.3658, -3.5829, -0.7350, -4.6725, -0.4189,
         0.0157, -5.2865, -1.9395, -0.6666], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1617, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.3548,  -1.6008,  -2.2250,  -2.1406,  -1.3671,  -0.4174,  -3.4754,
         -0.3628,   3.0648,  -2.2133,  -0.7984,  -1.8824,  -2.0661,  -5.3916,
         -1.3703,   0.3589,  -1.2620,  -1.1160, -23.1783,  -9.6194],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1709, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.9814,  -2.7455,   2.3787, -14.4914,  -2.3304,  -2.2912, -16.1311,
         -5.4655, -42.4505,  -5.7606, -18.0188,  -9.0780,  -5.5075,  -7.4570,
         -7.2802,  -1.2425,  -6.8485,   1.2351, -11.4195,  -3.2748],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.0080, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0515, -1.8829, -6.2835, -5.9122, -5.0000, -4.6317, -1.5849,  2.3422,
        -3.1419, -1.3407, -4.2600, -5.7802, -2.3081,  1.0788, -5.1984, -2.4811,
        -2.5349, -2.5089, -7.1676, -0.6656], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1157, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8134e+00,  6.1120e-03, -1.7701e+00, -6.3075e+00, -1.6675e+00,
        -1.4418e+00, -3.3928e+00, -1.7804e+00,  3.1189e+00, -2.7655e+00,
        -1.0678e+00, -2.3787e+00, -2.2871e-01, -4.6575e+00, -5.5474e-01,
         2.0907e+00, -2.7940e+00, -2.6926e-01, -2.4824e+00, -7.8734e-01],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.6972, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9579, -1.8586, -3.2730, -1.2767, -5.8400,  0.1401, -0.8944, -2.5100,
        -0.4823, -1.4904, -1.6542, -1.2703,  1.1364, -1.8638, -2.8537, -4.0507,
        -0.6364, -4.5314,  0.2456,  0.5322], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8695, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1045, -2.8761, -1.1071, -0.4255, -2.2495,  1.6394, -6.4460, -3.1168,
        -2.9679, -0.3806, -6.2672,  0.3058,  0.1289, -3.0132, -1.4403, -1.7381,
        -3.6211, -1.3864,  1.2914, -2.8349], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9805, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9884, -4.6072, -3.0570, -1.8580, -1.2344, -3.3824,  2.8970, -3.2150,
        -3.0591, -6.5724, -6.4448, -4.0609, -5.3803, -3.4674, -0.9557,  1.8159,
        -4.3737, -2.1830, -2.9054, -0.7433], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7888, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.6807,  -4.6104,  -1.3781, -10.0810,  -5.9390,  -4.3676,  -6.2882,
         -4.8112,  -4.2836,   1.4889, -11.6059,  -3.4172,  -1.4691,  -3.0937,
         -6.3640,   2.0611,  -8.4722,  -0.7133,  -1.9004,  -2.6528],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8608, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.0349,  -1.9816,  -0.0445,  -2.9219,  -2.3263,  -1.7357,  -3.2677,
        -13.3970,  -6.2536,  -6.0148,  -2.9114,  -0.9723,   2.5314,  -2.6168,
         -1.1226,  -0.8796,  -3.4276,  -1.4613,   2.5500,  -2.6602],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.4974, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.7844,  -1.5481,   0.7080,  -7.4636,  -4.4432,  -9.0620,  -3.8991,
         -4.5048,  -1.2389,  -5.0630,  -6.2404,  -2.2458,  -6.1119,  -4.0662,
         -7.3788,  -7.2360,  -0.9412, -13.7360,   1.9536,  -7.4348],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7868, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.7875, -5.9481, -2.2184, -4.0885, -1.9559, -6.5065, -0.6576, -1.4490,
        -3.5319, -1.4951, -4.1943, -2.1785, -5.4075, -4.8064, -0.1110, -1.7802,
        -0.8667, -1.9674, -5.1382, -1.9700], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6742, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.4427, -2.6067, -0.9285, -2.2804, -1.5338, -5.1234,  1.8844, -7.7876,
        -1.5828, -1.5486, -2.2024, -4.6234,  2.7316, -2.9930, -1.4518, -3.4533,
        -1.4840, -5.2988, -1.0916,  0.2586], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0779, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-16.0053,   2.1098,  -3.0769,  -3.1331,  -7.4669,  -4.7409,  -4.4480,
         -6.0868,  -7.3837,  -4.7141, -25.8689,  -7.9789,  -4.8299,  -6.7644,
         -5.2427,  -7.2832,  -1.8071, -14.0545,   1.1696, -10.3082],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.8957, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.7568,  -0.3561,  -2.6880,  -1.5343,  -0.9451,   0.0895,  -4.1097,
         -0.8107,  -2.7778,  -1.9053,  -4.5836,   1.8942,  -4.9721,  -2.6255,
        -20.3874,  -2.9613,  -7.2477,  -1.0128,  -4.8595,   0.3163],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2617, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3468, -8.5631, -0.5908, -3.2967,  1.5425, -8.0762, -2.9811, -2.9385,
        -3.6058, -2.9112, -1.2566,  1.4778, -3.1991, -0.3052, -3.1208, -4.5009,
        -1.2218,  0.4153, -2.7320, -0.1977], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4704, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8163, -8.4360, -6.0688, -4.5868, -1.9248, -4.6980, -3.8832, -2.8861,
        -3.1554, -2.8589,  1.3790, -3.5620, -2.0229, -2.1319, -2.2479, -5.5072,
        -1.2106, -2.4278, -4.6865, -1.7358], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4734, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0014, -0.6361,  1.4983, -3.3089, -1.1770, -2.2795, -2.9229, -4.2377,
         1.4797, -3.2822, -2.5387, -1.4728, -3.5124, -2.7704,  1.2528, -2.4526,
        -1.7819, -2.3117, -1.0823, -7.4875], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2013, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1373,  -1.2917,  -3.4793,   2.7233,  -5.8224,  -2.0914,  -3.7269,
         -1.4390,  -4.1190,  -1.3309,   1.3322,  -4.0011,  -1.2137,  -6.1139,
         -1.0647, -10.2444,  -0.5096,   0.1418,  -4.4329,  -2.8393],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6330, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.8266,  -0.8445, -10.8336,   0.9103,  -7.9525,  -2.3009,  -3.6456,
         -4.0985,  -4.5806,  -1.0465,  -1.1715,  -1.1685,  -2.7495,  -0.7898,
         -5.4090,  -0.2040,  -3.4375,  -7.7275,  -1.5726,  -2.8573],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3653, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.4078, -2.4814, -3.3388, -1.2513, -4.2006, -3.8647, -0.1380,  1.2859,
        -3.6648, -1.0128, -0.4957, -2.2157,  0.0872,  0.8699, -4.3257, -3.6289,
        -2.3442, -3.6552, -4.4404,  1.8665], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8270, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4040, -0.1320, -1.8003, -0.2133, -3.2391,  0.9599, -1.1471, -2.0219,
        -3.7371, -0.2989, -5.1171, -1.8643, -2.1838, -3.4793, -3.6808, -3.1214,
        -0.4159, -4.5382, -1.3604,  1.5115], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1642, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7687, -4.4376, -2.6003,  2.5747, -3.4561, -2.3242, -2.4734, -3.8545,
        -3.0769,  0.5724, -2.9483, -0.6214, -1.9566, -2.5306, -1.9788,  1.2444,
        -3.8436, -3.4620, -6.6323, -0.7273], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4650, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.3343, -6.6506, -6.4440, -2.9278, -4.9936, -1.7278, -2.9501, -2.1132,
        -2.9178, -4.5443, -2.5006,  0.8597, -4.2137, -2.3866, -3.2548, -2.8172,
        -7.6968, -2.0406, -2.7076, -6.8463], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8604, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4485, -7.6047, -1.9429, -0.7720, -3.1474, -1.6235, -1.2878, -5.6491,
        -1.4676,  0.9585, -2.6141, -2.6022, -6.5299, -7.9562, -9.7787, -7.4496,
        -6.5966, -6.6484, -1.9625, -4.5707], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0347, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.0109,   0.0668, -10.8575,  -5.8406,  -1.0339,  -0.7335,  -6.8595,
         -1.5493,   0.0845,  -3.0157,  -1.0082,  -2.0656,  -2.3456,  -1.4131,
          3.0407,  -5.5349,  -0.6138,  -3.3164,  -2.2128,  -5.5295],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6374, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5357, -2.3356, -3.1009, -1.8707, -6.6012,  0.4030, -7.3565, -2.4372,
        -3.9253, -5.1556, -4.0526, -2.4854, -4.2712, -4.2326, -2.4749, -3.2772,
        -2.5451, -2.6254,  2.7618, -3.8000], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3459, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.0219,  1.0600, -3.4374, -1.6097, -3.7561, -1.3677, -4.2002, -1.5216,
         1.6652, -3.0555, -0.0662, -1.1443, -3.0718, -3.8027,  1.4651, -3.1899,
        -1.0447, -1.6014, -3.3028, -1.1254], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6543, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1315, -3.3524, -4.9082, -1.2010,  2.1632, -1.9454, -0.2082, -2.9666,
        -2.3740, -0.3843,  2.6068, -3.8590, -1.6047, -3.3990, -4.3443, -8.8515,
        -4.6346, -8.2833, -2.6455, -5.5812], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8452, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.6874,  -1.9121,  -1.9123,  -6.0375,  -3.6753,  -0.2708,  -4.8709,
         -4.7454,  -2.7351,  -8.2059,  -3.5333,  -3.4044, -10.7045,  -5.4257,
         -9.2168,  -4.5616, -11.7648,  -2.5236,  -1.4034,  -2.0445],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5818, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5416,  -2.8905,   1.8859,  -3.3028,  -2.2180,  -2.4763,  -6.3057,
         -0.5696,   2.7910,  -2.5013,  -0.2697,  -2.9321,  -1.1768, -11.9819,
         -2.5211,   0.3092,  -1.6536,  -1.1455,  -0.8994,  -3.3817],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1891, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4968, -9.1769, -7.9684, -3.3238,  1.7095, -6.5175, -3.0484, -2.8380,
        -2.8427, -4.1441, -2.2698,  2.7380, -3.1970, -0.0863, -0.9940, -4.8751,
        -2.1562,  1.5108, -1.7284, -3.1565], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7931, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.0181,  -4.1648,  -0.4813,   0.8456,  -4.0529,  -2.4428,  -9.2563,
         -3.4117, -18.0358,  -0.2415,  -4.9704,   2.6239, -30.2432,  -2.0393,
        -13.5825, -18.5176,  -7.2388, -14.2300,  -6.8912,  -8.9979],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.3673, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.6516,  -6.9289,  -6.4500,  -6.7426,  -7.2874,  -7.9435,  -7.3058,
         -7.4916,  -7.0060,  -8.0139,  -7.2386,  -7.7680,  -6.9058,  -6.8745,
         -7.6170,  -8.1875,  -6.8159, -12.1546,  -9.4106,  -6.8223],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.6308, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0232, -7.3541, -2.1664, -3.0330, -0.7002, -3.7451,  0.0889, -5.4136,
        -7.6802, -3.6609, -2.0579, -4.6695, -1.7050, -0.2258, -4.2489, -1.5199,
        -6.1091, -8.1558, -1.6268, -6.5359], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6771, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.8786,  -8.5630, -10.7341,  -7.8990,  -6.5406,  -2.6779,  -3.5571,
         -4.3292,  -4.3070,  -3.8143,  -3.1940,  -6.7674,  -4.0778,  -6.4677,
         -6.9866,  -6.1430,  -5.8360,  -6.5217, -19.4094,  -0.5431],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2624, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3374, -0.9262, -4.0034, -1.9023,  0.9464, -2.0615, -0.5097, -3.1153,
        -2.5428, -5.1726, -0.5651,  1.4457, -2.5750, -1.9372, -0.6947, -1.3406,
        -4.5302,  3.1170, -7.9871, -1.4791], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9586, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9456, -2.7523, -1.7064,  1.6117, -3.2545, -1.1283, -0.7938, -2.5298,
        -2.0677,  2.5631, -1.8062, -2.6051, -3.2495, -1.9601, -3.1833, -2.0960,
         0.8891, -3.6563, -1.3732, -3.5803], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6812, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7233,  2.3379, -6.5409, -0.5961, -1.0357, -4.2387, -0.5092,  1.2150,
        -2.7851, -1.7259, -0.6088, -3.0940, -0.2061,  2.7056, -1.6079, -3.2180,
        -1.2449, -0.3354, -3.8142,  1.9787], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2523, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3742, -0.1754, -1.6876, -1.5664, -1.8724, -0.4073, -6.3526,  1.3106,
        -3.1972, -2.8498, -2.5668, -0.9631, -5.5369, -0.5715,  1.6725, -2.3677,
        -1.1677, -2.2270, -1.7919, -0.6118], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7652, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.4876,  -3.4607,  -0.5007,  -5.6831,  -1.1741,   1.3982,  -2.0790,
         -0.5816,  -2.4868,  -0.4782,  -2.2271,   2.6643, -15.0377,  -1.1187,
         -1.9248,  -2.2351,  -5.6162,  -0.1565,   0.6779,  -1.5218],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1515, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.7929, -2.4843, -1.0650, -6.5561,  0.5634, -1.8823, -4.5907, -1.0927,
        -2.0663, -5.2786, -1.2569, -0.2979, -7.9862,  0.5109, -3.2408, -3.3407,
        -0.6111,  2.2230, -3.5861, -1.2002], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2016, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.1759,  -1.6989,  -1.6800,   0.5481, -35.6500,  -2.3312,  -1.0059,
         -5.8338,  -3.1011,  -3.5098,  -3.4538,  -3.4360,  -4.0311,  -3.9837,
         -7.0941,  -4.3477,  -2.1538,  -4.0776,  -2.2020,  -1.6898],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9454, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3773, -3.7443, -2.8240, -4.9295,  2.1578, -3.7098, -8.7796, -3.6029,
        -1.1215, -3.1872, -5.4731,  0.0302, -0.1268, -3.6794, -1.0089, -1.6597,
        -1.2654, -4.9757,  0.1111,  0.2386], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4464, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.1426, -2.6983, -1.5834, -1.7731, -3.5060,  0.0739,  1.6787, -4.0266,
        -4.1228, -3.6067, -0.6959, -5.9031,  0.3967,  0.4730, -2.8712,  0.0727,
        -3.4320, -1.6603, -5.7238,  0.0719], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9347, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.4000, -1.8165, -2.7611, -1.9312,  0.0793, -4.4209, -1.2645, -3.7580,
        -2.4800, -3.1511,  2.3838, -2.7957, -0.7065, -2.1165, -3.7679, -0.7913,
         1.8406, -1.0303, -0.8525, -1.6338], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5687, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3857, -7.7739, -8.0859, -5.0862, -5.1849, -4.6640, -4.4354, -6.0540,
        -5.8443, -5.5509, -6.0366, -6.5118, -5.0021, -3.9939, -2.9941, -3.5678,
        -3.6019, -5.3311, -3.1125, -4.7633], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1490, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8546, -2.3708,  0.9274, -2.5312, -2.0966, -2.2236, -2.5306, -4.5296,
         0.2592,  0.1101, -2.8768, -2.1279, -3.9630, -1.0981, -5.4772,  0.2292,
        -1.5440, -4.5381, -2.1695, -2.1849], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3295, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.9947, -20.1205,  -1.5855,  -2.7930, -15.4392,   1.3659,  -4.5558,
         -4.0500, -16.1292,  -6.4477, -11.9763, -11.2021,  -9.8372,  -7.3664,
         -1.5270,  -8.1511,   3.1324,  -7.5300,  -1.6167,  -1.8010],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2318, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.0202,  -4.2750,  -3.2118,  -8.1704,  -4.2932, -14.4508,  -1.5369,
         -0.7236,   0.1901,  -3.8959,  -1.2799,  -2.9901,  -6.7571,  -1.5741,
          2.3835,  -5.2375,  -1.3235,  -1.9163,  -2.4310,  -1.7629],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1118, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.6250,  -2.5621,  -1.9155,  -4.5643,  -3.3565,  -4.9721,  -1.7021,
        -10.9752,  -6.9787, -11.0874,  -3.8202,  -3.4751,  -1.8905,  -1.8997,
        -20.1963,  -5.6455, -11.0325,  -4.4005, -12.9522,  -6.7289],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3390, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1714, -0.8376, -5.7689, -0.2847, -0.4116, -2.2282, -1.2464, -1.3411,
        -0.8754, -3.5297, -3.3354, -7.0758, -1.9577, -3.0454, -3.3317, -3.6048,
        -3.5571, -0.3816, -5.6056, -0.2560], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4923, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.1981,  -1.1249,  -6.9703,  -1.1296,   0.4959,  -6.1054,  -2.2881,
         -2.0490,  -3.6918,  -4.9549,   2.3937,  -2.6487,  -2.2096,  -3.2930,
         -8.9278,  -7.4687,  -8.3337,  -1.8792, -12.5112,   2.4214],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8736, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.3414,  -2.2289, -32.2577,  -4.6958,  -5.3659,  -7.2556,  -0.8177,
         -7.8408,   1.6144,  -4.2745,  -4.9354,  -4.6558,  -5.7142,  -5.8999,
         -1.8347,  -5.0309,  -3.1258,  -6.5345,  -7.2524,  -4.4434],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8445, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1132,  -2.5951,   1.7778, -17.8601,  -1.8148,  -2.9493,  -6.3633,
         -2.5133,   2.1516,  -3.1777,  -2.4639,  -9.1361,  -5.9764,  -5.2902,
         -6.3981,  -3.4968,  -3.3456,   1.7278, -19.8828,  -2.6695],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6695, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0561, -2.3677, -3.5643, -1.3065,  2.5987, -6.6074, -2.8698, -3.0440,
        -2.3060, -0.8910,  3.0680, -2.3925, -1.1639, -1.6880, -3.1047, -4.9664,
         2.0624, -3.0497, -1.6615, -0.9312], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7621, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2649, -7.1469,  0.1559, -3.2039, -4.0431, -1.1795, -0.9567, -1.6439,
        -5.9117,  2.0361, -6.7976, -5.5065, -1.8498, -2.6456, -5.1868, -0.5582,
         1.2714, -2.4109, -1.0534, -1.3323], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4614, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2916, -0.4399, -3.2069, -1.8386,  1.2455, -3.1269, -0.8512, -4.0451,
        -3.2329, -4.3538, -1.5873,  0.0361, -3.0024, -1.9906, -1.0076, -4.5590,
        -0.2334,  1.9476, -6.4174, -0.6524], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9804, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.4282,  -2.8538,  -4.8411,  -3.1109,  -5.5431,   0.3114,  -3.9885,
         -2.1946, -10.0505,  -5.9509,  -5.1885,  -6.6381,  -3.7324,  -2.6679,
         -1.4257,  -3.3951,  -2.9350,  -4.6349,  -8.7431,  -4.2627],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3137, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3129, -4.0348, -2.1165,  1.9069, -2.1106, -2.2806, -1.7713, -4.2845,
        -1.8796,  0.3578, -3.4576, -0.6357, -2.7409, -2.4328, -4.7047,  0.3345,
         0.6572, -2.5124, -1.2055, -2.4897], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8357, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.4266, -1.9999, -0.4557,  2.6510, -2.1343, -1.1106, -1.3853, -2.9757,
        -3.8823,  2.6775, -4.3734, -1.9321, -3.6522, -5.6422, -2.3989,  0.5657,
        -5.1712, -3.3309, -3.3913, -3.0164], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0692, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.6636,  2.2292, -4.3957, -2.0912, -2.8411, -2.3833, -2.3863, -0.5488,
        -0.7734, -2.6843, -1.7031, -5.6940, -1.5827,  1.4842, -4.5906, -0.2815,
        -2.6127, -0.8603, -5.5167,  0.7863], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8555, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.1964,  -2.6002,  -1.5060,  -5.0637,  -1.6799,  -0.2199,  -4.8095,
         -2.0177,  -3.9410,  -4.5716,  -3.9793,  -2.1536,  -4.3390,  -4.2213,
         -2.1586,  -2.3469,  -1.7597,  -1.0227,   1.3363, -10.5777],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9414, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1537, -2.1681, -3.7667, -1.9941,  2.0695, -2.5986, -3.0055, -1.5292,
        -2.0746, -5.8608, -2.9572,  0.9857, -2.9252, -1.0238, -5.4227, -5.4631,
        -2.1258,  1.9267, -3.4038, -0.0684], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2280, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.9169,  -3.1876,   2.4928, -14.0415,  -3.8664,  -2.2506, -10.0738,
         -4.3460,   0.4870,  -2.5421,  -2.8998, -24.5520,  -5.5070,  -5.2128,
         -7.4885,  -4.4510,  -2.3844,   2.2655, -17.1710,  -1.6309],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5138, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3771, -4.6816, -2.7286, -7.2180, -2.6217, -1.2135,  1.8507, -5.1492,
        -3.4124, -3.1579, -1.6658, -5.4760, -2.4342, -0.9587, -3.6028, -1.6300,
        -3.7706, -3.7056, -5.6826, -0.3429], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1989, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8048, -2.6381, -1.7782, -1.5072,  3.0181, -2.3653, -4.1821, -2.7714,
        -4.2284, -4.2428, -3.0648, -4.6380, -6.4639, -6.6348, -3.0937, -1.2197,
        -4.2377, -0.4267, -0.1387, -6.2014], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9310, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.3696, -5.2135, -1.8988, -4.8674, -3.1419, -5.9767, -1.4687, -3.2995,
        -3.1980, -3.4159, -1.6035, -3.4951, -1.8319,  0.5956, -3.6105, -1.9258,
        -2.4841, -2.9803, -2.1880,  3.0136], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4680, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8349, -2.9462, -1.7424, -4.5039, -0.9409,  3.0475, -5.2557, -2.2588,
        -3.2268, -0.3192, -2.3279,  2.7037, -3.6751, -2.1852, -2.7700, -0.5908,
        -5.6596, -0.4452,  0.1873, -3.1487], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0446, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1157,  0.1000,  0.5461, -2.9647, -0.4753, -3.5419, -0.3181, -9.0476,
        -0.6707, -3.1585, -2.9639, -1.4752, -4.2939, -2.0214, -9.1041,  0.2349,
         1.0346, -3.2567, -1.8352, -2.6741], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5501, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.4326,  -7.1509,  -3.7631,  -5.4923,  -4.5548,  -5.9015,  -1.5827,
         -4.0512,  -2.7303,  -3.4240,  -3.7095, -12.2399,  -2.8822,  -6.8679,
         -6.3955,  -2.0799,  -2.0419,  -2.8384,  -2.6587,  -7.8765],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6337, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6509, -5.4542, -1.3462,  0.8568, -2.9408, -0.9630, -4.1791, -5.0217,
        -0.7097,  1.6963, -1.3277, -1.0483, -1.6210, -4.9642, -1.3725, -3.5564,
        -4.9547, -2.0864, -3.9844, -3.4627], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4045, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.4942,  -6.0293,   1.3368,  -3.0280,  -3.3429,  -4.3064,  -6.3237,
         -2.4982,  -1.8266,  -3.2021,  -2.0556, -11.3308,  -2.2137,  -6.5460,
         -1.1906,  -2.9464,   2.0400,  -5.6147,  -1.7070,  -4.6443],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3462, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3812, -3.9525, -0.6015, -6.4982, -0.3878, -0.6394, -4.8792, -2.0981,
        -4.5545, -4.8635, -8.4928, -2.7654, -1.9229, -5.5561, -3.5545, -1.6566,
        -5.1186, -0.6988,  1.4560, -3.1051], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0635, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.4319,  -5.9223,  -2.2237,   1.2900,  -4.3525,  -3.8971,  -1.3815,
         -8.2373,  -0.5603,   2.6539,  -1.8137,  -1.0349, -21.7312,  -5.7120,
         -8.1672,  -3.1862,  -0.5226,  -0.7668,  -5.1499,  -1.4181],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8783, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7294, -2.0387, -5.4263, -2.5297,  1.3696, -5.2890, -1.5754, -2.6075,
        -1.9522, -6.1334, -0.5882, -0.3480, -9.8691, -1.8688, -2.3793, -3.6035,
        -2.7838, -3.2970, -3.7642, -4.7049], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0559, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8737, -1.6799,  2.1761, -1.6596, -1.8814, -2.7868, -5.3013, -2.9431,
         1.3254, -3.6684, -4.0416, -2.0258, -1.0385, -3.6508,  2.4344, -2.0753,
        -3.2625, -4.8922, -1.8445, -5.3272], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2508, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6262, -2.7130, -6.4509, -1.3921, -3.9492,  2.5923, -3.1875, -2.2662,
        -3.1174, -2.1574, -5.3829,  0.0744,  2.2884, -2.9201, -0.3180, -0.7120,
        -3.6597, -1.0500,  3.1493, -3.2057], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0002, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.6832,  -4.1092,  -4.6917,  -1.5203,   0.9677,  -5.7566,  -3.6310,
         -2.9397,  -1.8009,  -6.9866,  -2.2833,   0.1970,  -3.3629,  -2.1384,
         -1.9308,  -0.8321,  -3.3286,   2.4081, -17.4015,  -3.1416],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1483, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.1201,  -5.7700,  -1.7363,  -2.4380,  -7.4577,  -0.8460,   2.9286,
         -1.8265,  -1.7370,  -2.2049,  -2.3640,  -2.8495,   2.0660,  -3.2254,
         -2.4373, -15.8886,  -5.9526,  -9.0965,  -0.3975,  -3.6441],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2379, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5513, -1.2100, -4.7635,  1.7185, -5.1510, -1.0466, -2.1589, -2.8206,
        -1.4646,  1.2859, -6.2378, -1.0449, -3.0242, -2.1845, -6.6305, -1.7761,
        -1.0512, -4.5080, -1.1687, -1.8657], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6327, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.2375,  -9.6351,  -7.3134, -21.9561,  -4.8839,  -5.2777,  -3.1235,
          2.9293,  -2.6878,  -1.2008,  -2.9906,  -1.9794,  -4.6232,  -1.7620,
         -0.0433,  -2.6454,  -1.3648,  -3.2625,  -1.7084,  -7.2328],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1499, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.4014,  -3.9774,  -4.0199, -11.0355,  -3.2052,  -5.7458,  -5.2617,
         -7.1803,  -1.4464,  -5.1909,   1.4983,  -9.1926,  -1.6725,  -4.0152,
         -6.5517,  -3.8243,  -1.3986,  -4.8594,  -4.6950, -36.2713],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2723, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.3061,  -8.0074,   2.0720,  -9.8202,  -1.4990,  -2.5687,  -4.6527,
         -3.3720,  -0.4631,  -5.7471,  -6.5791, -24.6858,  -5.1851,  -9.5807,
         -8.9678,  -6.3439,  -4.2835,  -6.1655,  -1.5063, -17.6419],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3152, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.9130,  -3.8341,  -1.6176,  -5.0766,  -1.8860,   1.3720,  -4.9380,
         -2.2056,  -1.6505,  -1.5280,  -8.8884,   0.5843,  -3.9587,  -3.6303,
         -6.5601,  -5.4570, -10.0561,  -6.3476,  -6.7706,  -5.5067],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9434, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.8057, -7.5476, -2.6438, -3.1529, -0.3504, -2.4165,  1.8842, -2.5762,
        -2.7371, -2.3138, -1.2028, -8.2891, -0.0435,  0.0300, -1.7676, -1.3839,
        -1.0762, -3.3205, -0.8825,  1.6419], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7671, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.7461, -2.8551, -2.8004, -0.4104, -3.0779, -0.4921, -6.0417,  0.0399,
         0.0909, -2.2775, -2.2096, -0.9277, -3.3003, -2.8835,  2.7798, -6.1947,
        -1.8763, -2.1773, -3.8885, -4.7818], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2015, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1488,  -4.4992,   2.3003,  -5.1749,  -2.1386,  -1.2893,  -3.0230,
         -5.2112,   1.2001,  -3.1379,  -1.5870,  -4.6664,  -2.2306,  -8.1879,
         -2.6780,  -1.4488,  -2.4969,  -5.3321, -10.0889,  -3.3642],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2602, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6051, -6.3580, -1.0233, -5.0599,  0.1486,  0.6980, -4.8465, -1.5512,
        -2.5462, -5.1726, -1.5718,  0.2424, -3.0450, -0.2882, -3.1921, -2.5248,
        -7.0316, -1.9554,  1.6914, -3.8382], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6915, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.3544, -3.3115, -1.3499, -3.9945, -5.6985, -1.7860,  1.5766, -4.6868,
        -0.6354, -2.1427, -3.3234, -1.6103, -4.3643, -1.8141, -3.3055, -1.2685,
        -5.9300, -0.5605,  2.5638, -3.4544], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1871, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9048, -6.5351, -6.0658, -1.3220, -4.3929, -0.1857, -3.4955, -0.7942,
        -2.6383, -4.9071, -2.6601,  0.3013, -3.8437, -2.2432, -7.2643, -6.3786,
        -2.6432, -6.5578, -1.3245, -4.5630], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7209, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.1417, -2.0276, -3.1854, -3.2653, -3.8789, -2.8362,  1.3098, -4.1293,
        -1.0664, -5.5112, -0.4706, -9.3325,  0.5488,  0.0437, -5.7001, -1.8922,
        -2.0571, -2.1204, -5.9469,  1.9687], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3704, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1527, -2.5859, -2.0289, -5.6045, -1.0562,  1.7220, -3.6451, -0.5827,
        -1.7483, -1.5193, -5.8289,  2.4290, -6.1657, -1.9326, -2.9995, -1.6963,
        -5.4662, -1.5483,  1.1094, -5.3528], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3827, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.9092,  -3.7007,  -0.3889,  -1.9164,  -0.5344,  -3.9803,  -5.5931,
         -1.8825,  -5.4856,  -4.2849,  -2.0292, -16.0690,  -3.0636,  -6.9978,
         -1.6526,  -4.4159,   2.3335,  -7.8072,  -1.3149,  -2.9172],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8805, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.3548,  -1.6008,  -2.2250,  -2.1406,  -1.3671,  -0.4174,  -3.4754,
         -0.3628,   3.0648,  -2.2133,  -0.7984,  -1.8824,  -2.0661,  -5.3916,
         -1.3703,   0.3589,  -1.2620,  -1.1160, -23.1783,  -9.6194],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1709, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.3496, -1.9293, -1.0128,  2.8417, -1.3808, -0.9388, -1.7778, -5.4267,
        -2.0145,  0.5662, -6.4051, -0.5508, -4.8979, -4.7803, -3.5202,  0.0387,
        -3.5385, -1.5079, -1.3022, -5.2726], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1580, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8365, -6.0999, -1.3649, -1.2182,  1.5390, -3.5587, -1.7792, -2.5652,
        -1.5632, -3.8736,  0.2486, -3.7586, -3.8020, -1.7888, -1.7029, -2.3763,
        -4.4210,  1.3483, -3.0149, -0.6660], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1627, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1243, -3.4167,  2.4261, -1.9248, -1.4400, -2.9254, -1.3574, -5.6149,
        -2.0567, -0.5048, -2.5496, -0.7803, -1.5399, -4.1847, -1.4354,  0.8634,
        -2.8913, -1.3948, -2.5135,  0.2068], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7079, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.6166,   2.9954,  -2.0882,  -4.6911,  -1.8648,  -3.4945,  -3.3067,
         -0.4823,   1.4764,  -2.7243,  -1.9475,  -3.8752,   0.2363, -11.0159,
         -0.4919,  -2.4968,  -4.4906,  -1.5219,  -2.2504,  -1.0958],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2873, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2263, -3.5802, -3.2117, -5.3925, -0.5654,  0.2957, -2.4089, -1.3458,
        -1.8253, -5.2387,  0.0819,  0.7318, -3.4407, -1.2839, -4.0436, -1.8865,
        -3.9979, -0.8343,  1.7142, -2.4661], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9962, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3100, -4.0193, -2.8028,  0.8625, -2.3168, -2.5752, -3.3170, -5.7653,
        -2.1097,  1.1206, -3.4558, -3.3898, -4.7744, -4.5504, -3.6286,  0.4409,
        -2.3304, -4.7317, -2.4886, -3.6957], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8919, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1567, -3.6308, -1.5567, -1.4238, -2.9356, -1.5586,  1.9893, -4.4621,
        -0.7303, -3.1016, -0.6555, -5.8483, -1.4685,  0.7563, -2.3373, -2.3327,
        -4.0506, -5.6515, -2.2270,  0.2269], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2078, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8538,  0.0451, -7.5381, -3.3635, -3.8115, -1.4924, -3.8546, -2.0297,
         1.5509, -2.7011, -0.9391, -1.2550, -1.1130, -1.4665,  2.5950, -2.4934,
        -0.9918, -1.9727, -1.7684, -8.0075], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1231, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7889, -0.6350, -3.2661, -3.8586, -0.7604,  2.2530, -5.4746, -1.7526,
        -4.0184, -0.9304, -5.2079, -1.2610,  0.1573, -3.5443, -1.6142, -1.1503,
        -3.2578, -1.7042,  1.9081, -2.7000], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9803, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-13.2959,  -4.5595,  -2.5238,  -5.2865,  -2.3404,  -2.7332, -31.3701,
        -21.5083,  -2.3516,  -2.9302,  -7.6899,  -1.5858,  -2.6913,  -3.6270,
         -3.5104,  -7.5904,  -5.2537,  -3.2710,  -5.6148,  -5.9313],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.7833, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0695, -0.4276, -2.0384, -4.5752, -0.7924,  2.3984, -2.7902, -1.1684,
        -2.3769, -1.8940, -3.3352, -3.7994, -3.2117, -3.8255, -1.6491, -1.0917,
        -2.2185, -5.4479,  2.8232, -5.4062], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1448, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.8559,  -0.7148,  -4.1020,  -2.7593,  -3.4044,   2.5693,  -1.8091,
         -2.6666, -17.3537,  -2.4814,  -8.9243,  -2.4732,  -5.1183,   1.5082,
         -4.1286,  -8.1545,  -3.8332,  -5.4570,  -1.6089,  -3.7455],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9257, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6950, -2.9777, -3.0967, -4.6156, -2.9291, -4.5913, -4.1960, -2.7115,
        -9.8571, -6.7690, -4.2532, -7.0858, -2.7078, -4.7232,  1.9329, -3.4750,
        -1.6957, -2.8735, -1.5674, -3.3151], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7601, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.2086,  -0.7503,  -3.0944,  -1.7189,  -2.3586,  -1.4460,  -4.1560,
          1.7962,  -3.2531,  -4.3414,  -3.6397, -13.2961,  -5.2997,  -1.8081,
         -2.7294,  -1.5206,  -1.5000,  -2.7451,  -7.1735,  -0.0598],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9652, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.2086,   2.2650,  -2.7664,  -4.2661,  -9.7984, -11.1986,  -6.2072,
         -5.8453,  -3.9883,  -5.5767,   2.3456,  -5.7158,  -2.4763,  -2.8691,
         -1.3079,  -4.7566,   0.0957,   0.7004,  -2.4997,  -2.1036],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3589, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.7790,  -1.4441,   0.9307,  -2.4679,  -0.9065,  -1.8088,  -2.6633,
          0.1330,   2.2431,  -8.6052,  -2.3518,  -9.1315,  -5.7409,  -0.5139,
         -7.4593,  -1.4485, -15.0717,  -5.3232,  -5.5873,  -6.9425],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9469, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.7363,  -1.3969,  -0.3852,  -4.8819,  -0.0177,   0.2001,  -3.5999,
         -1.8174, -13.1112,  -4.6722,  -8.0675,  -1.0640,  -1.0245,   1.9807,
        -15.4779,  -1.5873,  -0.8027,  -4.1962,  -2.4080,   2.7968],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0635, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3355, -1.4492, -2.4741, -2.0592, -4.7364, -1.1869,  0.4336, -3.2539,
        -0.2325, -0.9969, -3.9331, -0.4168,  1.7199, -3.2241, -2.8682, -4.9936,
        -6.3406, -3.7683, -5.0995, -4.5988], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5907, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0371, -3.2995, -1.9320, -5.1196, -1.7042,  1.5853, -4.4101, -0.4807,
        -3.2928, -4.7295, -1.7367,  1.6870, -1.6971,  0.0705, -1.0689, -1.5063,
        -2.1251,  2.9557, -4.8340, -1.9330], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8304, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0066, -1.3980, -5.3774,  0.2890,  0.0135, -1.6450, -1.1211, -0.9232,
        -4.8234, -0.8453,  1.1429, -7.8341, -0.8681, -3.6435, -2.7109, -1.3248,
         2.4264, -4.2969, -2.3554, -1.9430], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9122, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.7644, -4.0116, -5.2781, -3.1080, -2.0782, -0.9498, -3.8414, -1.7858,
         0.9410, -5.1184, -0.4899, -2.8517, -4.4767, -0.3737,  2.3291, -5.4728,
        -2.7002, -2.2664, -7.4173, -2.3418], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0028, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1683,  -4.1981,  -1.3695,  -4.7179,  -1.3823,  -3.9337,  -6.6669,
         -2.7186,  -4.0907,  -3.8150,  -4.7763,  -2.2938,  -0.2448,  -3.8361,
         -1.1961,  -2.6701,  -1.1343,  -5.1226,   0.3613, -10.7876],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3881, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3601, -4.3032, -2.0918, -2.8904, -1.3473, -0.6262, -3.1279, -2.8226,
        -1.7740, -4.5578, -0.1313,  2.6078, -2.7562, -2.5192, -2.9168, -4.8529,
        -1.6612,  0.9082, -4.2876, -0.7919], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3151, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.0682,  -5.5432,  -1.2752,  -9.0055,   1.4050,  -6.9737,  -3.2989,
         -3.4813,  -6.2959,  -3.7668,   0.4840,  -7.2340, -11.3356, -35.4695,
         -4.1551,  -6.2369,  -8.4398,  -7.2700,  -5.8996,  -2.9978],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.6429, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.7385, -15.2321,  -8.7580, -17.1898,  -3.2541,  -4.0966,   0.5026,
         -4.3943,  -2.6463,  -3.2069,  -0.0413,  -5.4789,   0.1235,   0.3519,
         -3.0321,  -1.5900,  -5.1260,  -4.1848,  -1.6686,  -2.8699],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3265, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1603,  0.0908, -0.0494, -2.6573, -2.1743, -1.3955, -2.2169, -1.1017,
        -0.2007, -1.8392, -1.3294, -4.3146, -0.8224, -5.1107, -2.2538,  0.8373,
        -3.4751, -1.2289, -4.5022, -4.1050], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1005, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.6166, -4.8949, -0.9403,  2.6339, -1.5436, -1.0900, -0.6047, -3.4208,
        -0.3044,  1.9233, -1.9324, -0.6792, -2.1797, -1.7879, -5.7145, -0.6293,
         0.7629, -3.2144, -0.9291, -2.3499], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3756, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.8988,  -6.3379,  -2.6337,   0.7586,  -5.7189,  -6.0194,  -3.2511,
         -4.9349,  -5.8540,  -2.9787,  -1.6697,  -6.0278, -11.4227,  -1.3956,
         -2.5194,  -1.4893,   3.2178,  -6.0131,  -1.6306,  -2.3039],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5062, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0721,  0.1790,  0.6729, -2.6214, -0.1211, -1.0642, -0.9031, -1.8282,
         2.9310, -1.7426, -3.3968, -1.0524, -5.4147, -1.3677,  3.0983, -1.1291,
        -1.0050, -3.1381,  0.6464, -6.6175], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4473, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2266, -5.1541, -2.2501,  1.1296, -4.1521, -2.3002, -1.0299, -4.1936,
        -0.4420,  0.0538, -4.9598, -2.0247, -2.2837, -2.4013, -4.6478,  2.9676,
        -4.5872, -2.4171, -1.5923, -5.6810], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5096, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.3615,  -2.6360,  -2.7751,  -0.0195,   1.9673,  -2.3294,   0.4445,
         -3.3352,  -0.6278,  -2.9206,   0.5917,  -2.3168,  -3.2515, -16.1426,
         -7.8029,  -2.0085,  -6.4585,  -4.5027,  -1.9187,   0.5721],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8416, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-18.2335,  -1.8102,  -4.7220,  -6.5120,  -3.6117, -24.0958,  -5.4021,
         -1.4221, -12.3767,  -4.3282, -11.5752,  -2.9586,  -1.1778,  -9.7056,
         -3.4261,  -2.4265,  -2.6733,  -5.2594,  -0.2717,   0.6391],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0675, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.4075, -1.5018, -2.2382, -2.3152, -4.0609, -1.4030,  0.9503, -3.4292,
        -0.8485, -3.9712, -3.9768, -2.4738,  1.9560, -5.5999, -1.6823, -1.7507,
        -2.1074, -1.2960,  2.7437, -2.5573], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7077, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.0395,   2.4331,  -4.0084,  -0.6420,  -2.4739,  -2.3975,  -0.7452,
          2.3156,  -1.5602,  -1.0754,  -3.2149,  -5.1318,  -1.0006,   3.4008,
         -3.8426,  -2.2611, -17.8038,  -2.4946,  -9.2144,  -1.5772],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6167, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7896,  -3.0275,  -8.4481,  -0.5606,  -1.1840, -17.0253,  -5.6256,
         -5.6860,  -8.4987, -25.9150,  -3.0346,  -5.3511,  -3.3422, -46.2901,
         -4.5672,  -0.8473,  -4.9928,  -7.8454,  -1.1718, -11.1111],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.3657, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0887, -3.8396, -0.8671,  2.4323, -2.8806, -1.0835, -1.6686, -0.6954,
        -4.7473,  1.6037, -9.5725, -1.2241, -3.1935, -1.3012, -4.5027,  0.6401,
         1.6492, -3.7104, -1.9321, -3.9629], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0472, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8137, -3.0853, -1.1618, -7.4645, -1.5447, -0.2930, -4.2403, -0.7610,
        -0.7648, -4.7056, -1.4143,  3.0469, -3.7919, -0.9552, -9.6419, -9.7205,
        -4.5958, -5.7477, -1.6210, -1.6192], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0948, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7791, -0.6888,  3.0059, -2.5565, -0.5841, -2.7118, -0.0057, -4.1273,
         0.9911, -2.4455, -3.3688, -0.4694, -2.4009, -1.5002, -1.3483,  2.7198,
        -4.7629, -2.4159, -0.2518, -0.9868], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3344, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.2464, -4.1872, -6.7150, -2.4640, -8.2187, -0.7293, -1.4915, -4.1527,
        -3.8249, -2.0692, -1.5236, -5.0752,  0.3423,  0.2945, -1.6280, -1.2919,
        -2.5372, -2.7081, -2.2816,  3.3851], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8061, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3521, -0.0672,  2.8707, -2.2906, -1.3832, -5.5520, -3.4017, -2.0968,
        -2.0815, -6.2180, -3.3254, -2.0395, -4.2349, -0.9261,  2.4825, -2.6133,
         0.0929, -1.5559, -1.4660, -3.8443], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1001, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6301, -2.0917, -0.0897, -6.3437,  0.6134,  1.6375, -2.4842, -0.8680,
        -3.7366, -1.8585, -6.8423, -9.0548, -2.8735, -1.3915, -3.7491, -0.4681,
        -4.9282,  0.4662,  1.1926, -2.0106], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4256, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1850, -3.0964,  2.6753, -9.5054, -1.0626, -2.6568, -1.7753, -4.3363,
         1.8977, -2.7915, -0.9196, -3.6244,  0.2620, -4.2984, -0.4411, -0.3983,
        -3.6377, -1.0674, -2.8115, -1.5262], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0149, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.9173,  -2.4715,   0.2463,  -3.4323,  -2.2116,  -4.0778,  -2.5864,
        -11.4138,  -4.4463,  -1.0676,  -2.9000,  -4.9213,  -1.6907,  -4.7632,
          0.8126,   2.8079,  -5.8505,  -2.6376,  -2.3661,  -1.9879],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9438, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4410,  0.7076, -1.8404, -1.4537, -0.2024,  2.5489, -1.1680, -2.4522,
        -2.7278, -5.1114, -1.1421, -1.1978, -1.6989, -1.3914, -1.2663, -0.2466,
        -4.4178,  1.4050, -4.3176, -4.3403], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6877, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.0651,  -6.3265,  -3.1243,  -6.0025,  -4.8990,  -1.9085,   2.0305,
         -4.0932,  -3.6456,  -3.0715,  -6.6408,  -5.3228,  -8.2523,  -7.9325,
         -2.1555, -20.9703,  -6.2777,  -8.9425,  -6.3889,  -3.6929],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7341, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3604, -4.0519, -3.1178, -2.2764, -4.8573, -0.8660,  1.0865, -1.7778,
        -1.5767, -0.9663, -2.6420, -0.7989,  2.1164, -4.4962, -1.9806, -3.3676,
        -3.1390, -7.9240, -2.9240,  0.1684], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3876, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7204e+00, -6.4931e+00, -3.1987e+00, -4.1523e+00,  1.3517e+00,
        -3.3582e+00, -2.7968e+00, -4.1899e+00, -5.2927e+00, -4.7590e+00,
        -1.2604e+00, -4.3188e+00, -3.0409e+00, -3.6973e+00, -1.0046e+01,
        -4.4808e+00,  3.8258e-03, -4.0678e+00, -1.1808e+00, -3.0159e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6357, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.4954,  -0.9446,  -0.5735,  -2.5991,  -2.9891,  -4.3102,  -3.1803,
         -3.9283,  -0.9921,   1.5823,  -1.8282,  -1.0666,  -3.7045, -13.6049,
         -4.9046,  -2.0848,  -0.0374,  -5.2501,  -1.7289,  -4.6245],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0632, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6815, -4.4436,  0.2949,  0.2514, -2.5717, -0.6922, -2.8835, -2.4037,
        -1.1515,  2.7165, -3.8479, -1.4187, -1.8252, -1.9480, -2.4144,  2.6792,
        -4.7179, -1.1890, -2.6215, -2.3412], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6105, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.3583, -1.3086, -2.5888, -0.8243, -4.1905,  0.1226,  2.1013, -5.3455,
        -0.6557, -1.0907, -1.0054, -2.4153,  1.5979, -0.5593, -5.5504, -1.0191,
        -1.6643, -4.9326, -0.4232,  2.3231], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3535, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.2525, -2.5867, -5.3842, -1.1236,  2.0496, -3.4716,  0.0630, -2.1597,
        -1.5383, -5.5592,  0.2386,  0.5265, -1.7720, -1.5958, -1.5557, -3.7651,
        -1.1013,  2.3659, -6.7939, -3.2378], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8327, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.6346,  -7.1861,  -4.8490,  -6.1727,  -3.9027,  -5.0928,  -7.1357,
         -5.1698,  -5.4196,  -4.3456,  -8.7768,  -1.4987,  -3.5164,  -5.1151,
         -5.5778, -10.5250,  -7.4590,  -5.0382,  -4.0737,  -5.9906],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5740, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.4714,  -3.0611,  -4.8716,  -1.5319,  -1.2355,  -1.5354,  -1.0943,
          2.6717,  -2.6909,  -2.7244,  -5.7997,  -0.9748,  -6.3058,   0.1165,
         -0.7288,  -6.8034,  -7.8381, -12.4762,  -2.5835, -11.4731],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5234, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.2432,  -6.3317,  -4.1378, -11.0551,  -8.3537,  -7.1958,  -5.2355,
         -5.6893,  -2.6430,  -5.6538,   1.0609,  -5.1045,  -4.8640,  -2.6528,
         -4.0145,  -4.8569,  -1.6288,   1.2315,  -2.7099,  -1.5850],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4331, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4087e+00, -4.9405e-01, -2.9984e+00, -1.2055e+00,  4.7181e-01,
        -4.1515e+00, -3.7923e-03,  7.2541e-01, -2.9342e+00, -3.2537e+00,
        -4.6169e+00, -4.5562e+00, -7.9271e+00,  2.4177e-01, -1.8325e+00,
        -8.8503e-01, -3.2320e+00, -3.8393e+00, -3.0632e+00,  2.0797e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1442, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5455,  1.8585, -2.2138, -0.2664, -2.9799, -1.0643, -6.0769,  0.3876,
         0.4914, -3.7838, -0.0743, -2.5276, -4.4899, -4.4329, -1.4760,  1.1051,
        -2.8511, -0.1503, -1.7214, -3.0289], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7420, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1428,  -6.5079,  -1.8977,  -2.8595,   2.0595, -24.0676,  -2.2047,
         -8.5499,  -6.6411,  -2.2839,  -3.0671,  -4.2269,  -8.3031, -16.6440,
         -3.9391,  -4.0919,  -5.7989,  -5.5937,  -5.6143,  -1.2422],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7308, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.8826, -22.0849,  -2.0811,  -2.6523, -10.0935, -11.1320, -35.4306,
         -4.1089, -36.1282, -12.5425,  -4.2298,  -6.6133,  -2.0028,  -4.5120,
         -2.9692,  -5.0001,  -1.7171,  -1.6784,  -2.6407,  -2.9378],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.4836, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2756, -10.3259,   1.4097,  -1.4708,  -2.3755,  -1.8618,  -3.6186,
         -0.8024,   2.9440,  -0.9173,  -0.7786,  -1.3653,  -5.8528,  -1.7228,
          2.0768,  -3.2283,  -2.8527,  -3.6373,  -4.6985, -10.1675],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6261, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9414, -4.9169,  2.3199, -5.2825, -2.4992, -3.2270, -2.2390, -4.0778,
        -0.8496, -2.3824, -3.7819, -0.8906, -3.3475, -1.9749, -0.5815,  3.5438,
        -1.4135, -4.3715, -1.7553, -2.6659], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1667, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.0815, -5.2662, -4.0236, -2.4312, -4.4219, -0.8300,  3.5744, -5.0960,
        -7.2819, -3.8062, -2.6981, -2.7441, -2.7167,  2.1584, -3.2520, -2.6306,
        -1.0554, -3.0228, -1.4892,  2.2900], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2412, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.3838, -0.2456, -6.2388, -1.4489, -4.6259, -2.1818, -6.2941, -0.8022,
         0.3330, -3.6757, -1.6325, -1.8971, -2.0127, -3.6902,  2.1203, -7.4924,
        -1.5541, -5.3198, -1.8723, -5.7394], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7327, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.9146, -1.4997, -2.5686, -0.6101, -4.8844, -0.3752,  1.7955, -5.7538,
        -1.4915, -1.6770, -0.4408, -4.4928, -1.0507, -2.3218, -2.4853, -1.5217,
        -2.9223, -3.0018, -3.9072,  2.2493], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2937, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.2719,  -3.3442,   1.7138,  -6.7102,  -3.5271, -16.7542,  -4.9045,
         -5.4278,  -2.8663,  -2.5565,   1.8259,  -8.8283,  -5.0539,  -1.9814,
         -2.5918,  -5.8492,  -3.8645,  -3.2578,  -5.8991,  -2.8300],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3989, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8773,  3.5071, -3.2266, -1.9758, -2.3160, -4.0519, -8.0681, -1.7480,
         0.0240, -2.3372, -0.7706, -3.0930, -2.3112, -3.1562,  2.2740, -5.6947,
        -1.5409, -0.4161, -7.2830, -1.7289], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3895, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3660,  2.6409, -3.2812, -2.4781, -3.3011, -0.9802, -4.5676,  0.6129,
         0.6204, -3.3488, -0.0920, -1.9191, -1.9514, -1.0796,  3.4221, -3.3424,
        -2.8698, -3.3428, -4.4798, -2.7095], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9407, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1868, -1.5436, -8.5447, -6.0653, -2.8052, -5.9523, -4.3865, -0.9314,
        -1.7579, -3.7422, -4.4028, -2.4485, -2.2796, -6.0994, -2.2137, -0.4629,
        -2.3373, -0.1888, -1.4418, -1.3072], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1549, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.4555, -7.3667, -1.8462, -3.4118,  2.0617, -6.1243, -2.7135, -2.2810,
        -1.4033, -8.2705, -0.0222,  0.1555, -2.0964, -0.5021, -2.2785, -0.8632,
        -4.8804, -0.0233,  1.4830, -2.9684], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6404, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7337, -0.5251, -3.0634, -6.5571, -1.0075,  0.9875, -6.5113, -3.7046,
        -1.9580, -4.6775, -2.0942,  0.9173, -5.7783, -0.2534, -2.2466, -6.5177,
        -1.5746,  0.4817, -2.4971, -0.2067], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6260, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.1539, -3.1278, -0.9548, -1.9561, -1.2770, -1.5407,  3.4377, -3.5713,
        -2.0614, -2.9703, -2.7868, -9.7745, -4.5817, -0.6951, -2.5488, -1.3941,
        -2.5939, -2.3270,  0.2057, -0.3665], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9865, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.9983,  -0.1737,  -6.5059,  -0.7668,  -0.6360,  -4.6113,  -0.7912,
         -2.5240,  -1.5784,  -6.9981,   3.3554, -10.2791,  -1.3180,  -1.5965,
          0.1803,  -6.5256,   2.2310,  -3.1765,  -6.5477,  -4.3179],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8289, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5390, -6.7503, -3.7735, -3.1957, -3.1682, -0.6644, -1.5531, -0.6771,
        -2.0370, -3.3335, -1.8896,  1.7953, -2.8871, -1.2427, -3.3860, -4.0195,
        -1.9597,  1.7984, -3.7076, -3.4254], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3808, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.5977, -7.0133, -1.5975, -5.8834, -0.9888, -2.4390, -4.9997,  1.6997,
        -5.3714, -5.3060, -1.9509, -4.2422, -5.3445, -5.3586, -0.4589, -0.4490,
        -2.8481, -1.2367, -1.6976, -8.5863], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0737, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.0002,  -1.6789,  -5.1778,  -0.3261,  -3.6973,  -6.1017,  -2.2419,
         -1.7256,  -4.4522,  -3.0592, -13.0600,  -6.8909,  -8.6471,  -6.1857,
         -1.0501,  -5.1691,   3.1451,  -7.1453,  -1.1448,  -1.9634],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8786, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.3047,  -3.3159,   0.0574,  -0.3786,  -2.6298,  -0.9801,  -1.3038,
         -1.7527,  -0.8513,  -0.1939,  -2.6325,  -0.2807,   3.0020,  -3.2388,
         -1.9414, -13.0801,  -5.1921,  -6.1219,  -5.8949,  -2.4175],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3421, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5218, -2.7296, -6.5320, -2.1826, -1.4118, -4.5821, -2.1740, -1.3250,
        -6.0725, -0.0675,  2.0837, -2.9613, -0.9480, -0.4243, -4.5894,  0.0804,
         1.6351, -2.8497, -1.1994, -1.7506], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9761, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4635, -0.8034, -1.7737, -3.4634, -2.0002,  3.0722, -5.5987, -2.7238,
        -2.5939, -2.8749, -5.8459, -2.2098,  0.0088, -2.8874, -2.6319, -1.3115,
        -2.7825, -4.5969,  3.1655, -3.1885], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1752, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6140, -1.2609,  0.8512, -2.4307, -2.5772, -3.0840, -1.4588, -3.1908,
         1.3477, -1.9832, -2.6106, -1.8957, -0.6678, -3.2084,  2.1067, -7.5196,
        -1.7629, -4.8324, -4.1721, -3.5067], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4235, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.8384,  -5.2567,  -0.5369,   1.0377,  -2.5089,  -2.3186,  -2.3762,
         -0.7278,  -6.7181,  -3.0942,  -0.2162,  -9.3793,  -2.2068,  -2.8821,
         -5.3655,  -2.1699,  -3.0612,  -4.2238,  -2.6169, -19.9643],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8212, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.4066,  -1.1871,  -4.8717,  -2.4184,  -1.5318,  -5.8552,  -4.3801,
         -3.0638,  -2.1263, -10.3259,  -0.4407,  -0.6659, -10.4767,  -1.6119,
         -1.3103,  -3.4095,  -3.1465,  -1.0627, -17.7567,  -1.4623],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8755, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.9680,  -2.3318,  -3.9665,  -2.2903,  -0.5363,  -6.3122,  -0.7353,
        -13.2827,  -4.7825,  -4.8537,  -7.2297,  -9.8258,  -5.6007,  -1.8291,
          1.5110, -11.8762,  -4.4988,  -4.1521,  -3.0323,  -6.1026],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6348, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8571, -0.4827, -6.4942,  0.3869, -1.2823, -2.2955, -1.7816, -3.1360,
        -1.0241, -8.3520, -1.7223, -1.1592, -2.3725, -3.5620, -1.9693, -1.6621,
        -6.8872, -1.0198,  0.4922, -4.2924], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6237, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.8508,  -2.2743,  -0.6133,  -4.5501,  -0.5876, -27.9709,  -8.3336,
         -8.7416, -12.4909,  -5.6884,  -6.1053,  -1.4471,  -3.2461,   1.7930,
         -9.4656,  -1.3329,  -2.2046,  -4.2555,  -2.6509,   2.7103],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2653, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7256, -1.6418, -4.1640, -6.4270, -3.4005,  0.2827, -3.5395, -2.4476,
        -3.2084, -1.2806, -5.6336,  0.1986, -0.6560, -2.9682, -1.7862, -2.8125,
        -6.6470, -1.7583,  1.8839, -3.3374], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6534, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.6860,  -1.3918,  -4.9704,  -1.4926,  -7.2845,   0.1042,  -0.8473,
         -5.9230,  -1.8541,  -2.5053,  -6.5762,  -4.4824,   0.2909,  -2.9622,
         -3.8432,  -2.8353,  -3.6319,  -5.8811, -14.3504,  -6.5891],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0356, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.2448,  -2.3126,   0.7088,  -2.9165,  -4.3592,  -0.7107,  -1.5515,
         -3.8648,   2.7887,  -4.3131,  -4.1570, -13.4994, -16.8840,  -3.9397,
         -6.7569,  -1.9524,  -4.7846,  -0.9828,   1.5464,  -5.5562],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8871, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0340,  -0.8895,  -3.7593,  -0.0596,   0.7315,  -2.8782,  -2.8825,
        -19.3145,  -5.5157,  -7.7529,  -0.7295,  -1.9293,   3.1435,  -6.7634,
         -1.2490,  -4.7007,  -0.7283,  -4.0239,  -0.0660,   2.2050],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0598, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.1123,  -1.7902,  -2.6005,  -4.1646,  -6.1038,  -4.5149,  -0.5124,
         -3.7417,  -2.0380,  -8.8211,  -3.8798,  -8.7319,  -0.5841,  -4.8628,
          2.5181, -13.6203,  -2.4803,  -5.0342,  -4.8971,  -5.2216],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9484, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0275, -3.1792, -0.7862, -4.3100, -0.3598,  2.1126, -2.4588, -1.4367,
        -3.1867, -0.2358, -4.3727,  0.9163, -2.5466, -1.4487, -3.0178, -0.4224,
        -7.8397,  1.2843, -4.4610, -0.1972], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8487, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-15.8115,  -4.9032,  -1.0686,  -4.5237,  -2.9827,  -5.9531,  -1.8126,
          1.5223,  -3.3983,  -4.3919,  -2.0708,  -1.4806,  -1.4667,   1.8538,
         -4.8586,  -0.9210,  -2.1820,  -6.9054,  -3.4513,   1.2714],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1767, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6425,  1.7292, -2.9410, -1.7915, -4.3453, -0.0514, -5.4076, -0.1753,
         0.2995, -2.9108, -1.5285, -2.9718, -5.2641, -2.5224,  1.6740, -2.6561,
        -4.2059, -3.0129, -4.2346, -4.0223], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4491, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.0967, -1.7336, -3.5419, -1.4414, -1.8237, -5.0673, -5.5688,  1.1622,
        -1.8995, -0.1675, -1.2243, -1.6069, -3.6342,  1.1414, -6.7323, -1.0466,
        -3.7215, -1.7900, -1.0255,  2.6823], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8568, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.7793, -1.9101, -2.9888, -2.0818, -3.7928, -0.9654,  2.3759, -2.7111,
        -2.0802, -2.9751, -1.2062, -4.7090,  0.8874, -0.9957, -3.4576, -0.9210,
        -0.6015, -5.3525, -1.9883, -0.5238], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7609, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.3846,   1.7499, -12.2289,  -5.6354, -18.7202,  -9.0732,  -6.6184,
         -6.0467,  -1.4061,  -9.6731,   3.6464, -10.7402,  -3.2414,  -2.8375,
         -3.3603,  -2.7731,   1.2488,  -4.5349,  -2.5178,  -3.1209],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2134, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3217,  2.1222, -2.7016, -3.5554, -2.8525, -1.9972, -0.4680,  0.8477,
        -4.0819, -2.5449, -3.2992, -2.3191, -2.3855, -1.1270,  0.4058, -3.0593,
        -1.0933, -1.8746, -3.2262, -0.0935], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7813, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.4801,   0.7096,  -3.1034,  -2.6952,  -6.3531,  -7.4366,  -7.4982,
         -5.4296,  -4.6307,  -1.5484,   3.0649, -26.3736,  -2.1724,  -1.9672,
         -7.0766,  -2.4868,  -6.2559,  -6.5758,  -4.2617, -13.0967],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3334, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.3051,  -3.3736,   1.3964, -10.3259,  -3.0236,  -2.6908,  -3.9911,
         -3.0116,  -0.3011,  -3.3477,  -2.1825, -10.1695,  -5.5630,  -4.7862,
         -6.6684,  -1.6342,  -4.0186,   0.2956,   0.4215,  -2.7796],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5029, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.1607,  -3.9781,  -1.2618,  -2.6439,  -2.4530,  -3.1041,   1.5833,
         -1.5022,  -2.5833, -16.2396,  -2.7375,  -6.8179,  -5.7607,  -5.0699,
          2.5767, -13.0516,  -3.8738,  -6.2729,  -0.6901,  -1.9356],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8488, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.0635,  -2.0427,  -5.5913,  -0.6004,  -5.7859,  -3.6089,  -0.8456,
         -6.1453,  -9.0385, -22.4251, -10.4820,  -9.1885,  -7.0693,  -0.5760,
         -4.8261,   2.4520,  -8.9550,  -1.5284,  -2.7981,  -2.1224],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1120, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.0906, -7.1302, -0.8593, -3.3399, -0.8898, -4.6453,  0.0204, -1.5422,
        -4.8703, -1.9232, -1.1114, -5.1357, -0.9189,  2.3179, -2.5660, -1.1488,
        -1.3148, -5.0888, -1.4558,  2.9152], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9389, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9691, -2.0744, -4.4979, -1.5457, -0.0336, -2.7140, -1.2951, -3.2185,
        -1.7937, -7.2174,  0.5340, -4.6625, -3.7422, -1.6024, -2.5776, -3.6806,
        -3.6370,  3.0561, -3.6948, -2.3852], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4876, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8259, -7.0584, -0.5631, -1.8105, -2.4431, -3.0523, -3.1141, -6.4843,
         0.5506, -5.5613, -1.8770, -3.4453, -0.7376, -4.7753,  0.4308, -0.0897,
        -4.3869, -0.8670, -2.1986, -1.0860], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6197, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1949e+00, -3.1219e+00, -3.2942e+00, -4.8505e+00, -2.2175e+00,
         8.6497e-01, -4.1221e+00, -2.7840e+00, -3.1191e+00, -2.7673e+00,
        -5.7739e+00,  7.6292e-01, -5.0169e-03, -3.4912e+00, -2.0992e+00,
        -9.6770e+00, -8.5581e+00, -9.4894e-01, -8.1386e+00, -1.8155e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5176, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.2659,  -5.1874,  -2.1688, -27.7475,  -5.2591,  -6.9533,  -4.8801,
         -7.5400,  -7.7517,  -5.6168,   2.5302,  -5.1152,  -2.0069,  -2.5515,
         -2.6542,  -4.6275,   2.1195,  -4.3763,  -1.1615,  -2.8514],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7033, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4608, -1.7999,  0.4531, -4.6339, -3.0811, -3.7185, -1.3852, -4.6408,
        -0.1182,  0.9331, -2.5235, -1.7521, -3.1376, -1.2514, -3.4606,  0.1851,
         1.7328, -2.7029, -0.4538, -2.1951], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9005, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.2163, -2.6280, -2.0471, -1.5478, -3.2805, -1.6503,  2.3119, -2.1496,
        -1.5814, -8.2551, -2.6321, -8.9964, -1.6347, -3.4927,  0.9282, -6.6848,
        -3.3282, -1.0452, -1.7494, -6.2297], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7955, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0385, -1.4566, -3.2717,  1.0989, -3.5156, -1.3075, -1.8449, -2.1277,
        -4.6224, -0.2340,  1.6254, -2.7429, -1.8936, -2.3744, -3.9963, -2.5823,
        -0.1917, -5.0376, -1.7905, -4.0674], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3686, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.2888,  -1.5643,  -0.3971, -22.1126,  -3.9761,  -0.9687,  -3.4047,
         -4.3472,  -2.6534,   2.9960,  -1.9716,  -4.8753,  -6.8337, -16.2430,
         -6.7596,  -8.9113,  -7.3160,  -0.9986, -16.5052,  -2.5762],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5354, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3447,  -1.4997,   2.9716,  -1.2597,  -1.5804,  -2.7508, -11.6439,
         -9.1110,  -1.2732,  -1.2654,  -5.2033,  -0.8319,  -1.4021,  -2.1860,
         -1.7470,  -4.0824,  -3.3068,  -2.3785,  -4.2312,  -1.6451],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8386, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1729, -5.0037, -2.3589, -4.8403, -1.8583,  1.0832, -1.8211, -0.3994,
        -2.8511, -4.0321, -2.3364,  1.5558, -2.0556, -0.9717, -4.0834, -7.1208,
        -0.6863,  1.5729, -2.2358, -1.5668], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1091, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.6379,   2.1253,  -5.1709,  -2.1063,  -2.8886, -15.6823,  -6.8000,
         -8.0168,  -4.1479, -11.1574, -23.1992,  -7.5383, -15.3977,  -7.4141,
         -7.0678,  -8.5573,  -0.8952,  -3.0289,   2.7703,  -4.2967],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.6554, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9880,  -5.9724,  -2.9225,   1.3615,  -3.8766,  -0.1869, -18.2695,
         -6.8436,  -9.5204,  -5.6990,  -6.7957,  -1.0735,  -4.0341,   2.1153,
        -15.3360,  -1.0465,  -3.3732,  -5.3827,  -2.8673,   1.0600],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5826, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.2767,  -0.8537,   1.9888,  -3.8708,  -1.8595,  -2.4198,  -2.9331,
         -2.5218,   1.2505,  -3.5119,  -7.0377,  -0.7532, -19.5466,  -3.6157,
        -10.8754,  -3.6102, -17.2648, -15.0989,  -9.1283, -12.0351],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9487, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7014, -2.2243, -2.6486, -3.8765, -3.0443,  1.1779, -6.7355, -2.1000,
        -4.2176, -0.9556, -5.3958, -1.0158,  1.2976, -2.2255, -1.0221, -1.3149,
        -0.0395, -3.7904,  1.3784, -2.8754], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1165, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7026,  -1.0905,  -1.1797,  -1.3778,  -1.6886,   3.3707,  -1.7730,
         -3.4740, -14.5139,  -5.1394,  -6.0442,  -0.3227,  -5.5831,   1.9134,
        -17.2239,  -1.0050,  -2.2197,  -4.5736,  -3.1146,   1.5004],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3121, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.4427,  2.8663, -2.6912, -2.2326, -3.8993, -2.3189, -4.2007, -6.9452,
        -3.9290, -3.4491, -3.5762, -1.7055, -1.5878, -4.3366, -0.9323,  1.4210,
        -2.8733, -2.3759, -0.9630, -5.6838], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4928, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3796, -9.1493, -7.4130, -7.8894, -6.3601, -7.2556, -7.6179, -6.6946,
        -8.2634, -7.2787, -7.3633, -6.7513, -7.4924, -7.0866, -7.3855, -7.6179,
        -7.0840, -7.0827, -7.6166, -7.2487], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4015, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6797, -4.8772,  2.5787, -2.3566, -1.9707, -2.0089, -3.6060, -3.9071,
         1.3730, -2.2149, -2.3231, -1.8476, -5.4464, -1.8177, -1.1306, -3.9635,
        -2.2086, -1.8382, -5.5397, -1.3130], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3549, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.3178,   2.7700, -12.4391,  -2.5396,  -1.7408,  -5.0823,  -1.9905,
          1.5561,  -5.8108,  -3.3797,  -4.0025,  -3.4890,  -5.8578,  -2.2346,
         -1.0950,  -9.3046,  -1.6855,  -1.4739,  -5.1880,   0.0922],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4107, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3569,  1.7416, -2.5210, -0.6711, -3.3107, -3.0329, -4.3350, -2.1187,
         1.5255, -1.5801, -0.8710, -1.1629, -3.8325, -0.0287,  0.0393, -2.6766,
        -1.3117, -1.0610, -2.3611, -1.3147], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5620, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4440,  -2.1927, -11.6801,  -6.0918,  -4.5148, -11.6426,  -8.2367,
         -6.2189,  -2.6290,  -3.1598,  -8.6923,  -4.2491,  -2.0365,  -0.6693,
         -3.2067,  -0.7490,   2.9828,  -1.6956,  -1.3703,  -3.8063],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1151, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5458,  -6.2751,  -1.0677,  -3.9502,  -3.0802,  -3.1405,   0.8852,
         -2.7720,  -2.1142, -19.3835,  -5.8405, -14.8267,  -5.8907,  -4.0258,
         -6.1169,  -0.9670,   1.3045,  -2.6059,  -2.3677,  -1.7841],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3282, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9721, -4.2805, -0.0319,  1.9272, -4.7989, -1.4089, -1.4853, -1.1319,
        -0.7587,  2.7702, -3.4554, -1.6114, -4.2363, -0.2224, -6.5726,  0.4376,
        -0.0778, -2.4923, -0.1488, -0.8966], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5223, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6989, -4.0228, -2.2548, -0.1973, -8.1574, -0.3681, -1.4817, -2.5872,
        -1.0561, -2.0020, -1.2352, -4.5970,  0.5557, -1.1221, -3.6826, -2.6345,
        -3.5407, -4.6470, -1.7658,  1.0752], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2710, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.1690,  -4.7471,  -1.3542,  -1.2952,  -2.9926,  -0.9364,   2.3958,
         -5.2131,  -3.2505, -12.1806,  -3.2679,  -8.3183,  -1.6806,  -3.2161,
          3.3490,  -3.7281,  -0.8719,  -2.8696,  -1.9340,  -2.3629],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7153, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1012, -1.2933, -2.1014, -1.9640, -2.6412,  2.8969, -1.7977, -1.7444,
        -4.6568, -0.0272, -5.2883,  0.0298,  1.1067, -5.5378, -0.9428, -3.9178,
        -5.8890, -3.9890, -2.9291, -4.0010], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3394, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3035, -1.0853,  0.0470, -3.8617, -1.5296, -9.1481, -4.8270, -4.6108,
        -6.3971, -2.3596, -1.1993,  2.4858, -7.7344, -4.4062, -3.4141, -1.6251,
        -4.5430,  0.0411,  1.7150, -2.1618], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0459, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.6494,   0.2196,   1.2665,  -2.8530,  -0.8759,  -2.4602,  -1.4023,
         -3.1029,   2.5670,  -3.0263,  -0.5198,  -2.4178,  -2.0724,  -2.8957,
          2.0366,  -2.7264,  -1.3360,  -6.5208, -12.0328,  -4.8384],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3820, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5881, -1.2792, -2.9095, -0.7662,  3.1451, -1.3312, -2.3136, -1.3018,
        -2.1110, -7.6522, -0.2920, -0.2633, -2.7378, -0.2820, -2.6709, -1.5119,
        -1.5666,  1.4283, -6.0302, -2.9190], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7477, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.3454,  -3.9786, -14.0871,  -1.8553,  -1.8489,  -0.4866,  -8.3402,
         -3.1786,  -1.7324,  -4.9724,  -0.4468,   1.0591,  -2.9452,  -2.3961,
         -2.4010,  -2.5264,  -3.9725,  -2.0572,  -4.9513,  -5.0700],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7766, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6647, -1.0948, -1.9797, -3.8284, -1.7404,  2.4476, -4.9488, -2.2460,
        -2.5677, -0.9113, -3.5976, -0.5277,  1.1753, -2.2623, -1.1935, -2.3073,
        -1.2970, -2.1276,  3.1072, -3.4218], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6493, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9992, -1.0990, -3.4221,  1.0785, -1.7509, -4.4432, -0.7670, -2.5711,
        -2.0438, -7.7084, -0.8443, -2.4287, -1.1888, -4.2347, -1.3378, -1.7203,
         1.2771, -4.8519, -3.2127, -2.1225], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3195, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.6195,  -4.7731,  -1.6925,  -2.0290,  -1.3148,  -6.1010,  -0.9756,
         -1.7512,  -3.2723,  -3.6371, -21.9875,  -2.4683,  -8.7547,  -0.4070,
         -3.2881,   2.4561,  -4.7724,  -3.1545,  -2.1919,  -1.3343],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5415, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.2653, -1.6818,  0.0296, -0.8953, -0.4080, -1.5465,  2.7311, -3.9182,
        -2.7167, -1.9907, -1.7734, -4.8637, -1.8736, -1.1234, -8.1423, -1.5205,
        -2.0976, -0.9693, -4.5918,  2.6061], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7240, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.1671,  -1.9818,  -1.5036,  -4.7243,   2.1752,  -5.2524,  -1.7397,
         -3.1362,  -3.9104,  -2.2278,   0.9882,  -3.1932,  -2.9703, -13.8313,
         -9.4492,  -2.3377, -10.2509,  -6.5323,  -8.4029,  -0.8450],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0146, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.4144, -2.2276, -2.3417, -1.4575, -1.4141, -3.5362, -0.7519,  0.9099,
        -2.9612, -0.5319, -1.9734, -4.4781, -1.9632,  2.0674, -6.8799, -1.6602,
        -2.0478, -1.3195, -3.4564,  2.7211], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6858, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.0578,  -2.8424,  -2.8405,  -2.4427,  -4.4834,  -4.8138,  -2.6944,
          0.7086,  -4.6606,  -2.3146,  -6.5823, -11.8171,  -4.4575,  -8.0129,
         -4.8479,  -1.9383,  -1.2063,  -1.8250,  -2.6773,  -4.6245],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8215, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9792, -1.6159, -5.5726, -2.2571,  0.8901, -1.8592, -2.3157, -2.1311,
        -6.3053, -0.1859,  2.9933, -3.5367, -3.3508, -8.5900, -5.9060, -6.1573,
        -7.5163, -0.6084, -4.0783, -5.6325], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3358, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.5753,  -0.6896,  -7.9884,  -2.2089,   1.1702,  -5.3391,  -4.1936,
        -30.1452,  -5.5358, -10.9337, -10.3301,  -8.2707,  -7.0043,  -2.5696,
         -4.7203,  -0.5065,   1.4525, -10.7966,  -0.9453,  -2.7114],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7921, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4362e+00, -3.1377e+00, -2.0790e+00, -4.5996e+00, -4.2743e-01,
         1.4074e+00, -1.3296e+00, -2.2476e+00, -1.4087e+00, -5.7970e+00,
        -1.8356e+00,  5.1665e-03, -5.0563e+00,  1.8081e-01, -2.8154e+00,
        -2.9922e+00, -3.4288e+00,  8.5948e-01, -1.8834e+00, -8.2703e-01],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.0424, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.7013,  2.5917, -2.3780, -2.0016, -3.0484, -1.3081, -6.6241, -1.8962,
         0.3736, -2.2907, -1.9553, -0.7737, -3.9972,  0.5239, -0.1381, -1.6799,
        -1.2968, -0.8360, -3.6506, -1.6660], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6377, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.4938,   0.5624,  -1.8183,  -1.7413,  -0.3414,  -3.8170,  -1.6813,
          1.7061,  -1.9976,  -2.3161, -13.9570,  -3.3970,  -7.8576,  -0.2475,
         -5.3091,   0.2930, -11.3331,  -2.2665,  -3.3965, -10.6810],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5545, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.7220, -3.9086, -0.6455, -1.1520, -4.9526, -1.6192, -0.1108, -2.6001,
        -2.2448, -8.3019, -5.8074, -3.9584, -8.1284, -3.8440, -4.0688,  1.2746,
        -8.6594, -1.7730, -1.8468, -0.8088], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1939, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.6238, -1.5742, -2.8144, -3.4170, -5.2317, -2.0530,  0.2345, -6.3498,
        -0.8291, -2.2541, -4.2465, -0.0663,  2.7656, -4.2740, -1.7643, -2.0778,
        -1.5976, -3.8992, -3.3718,  0.6906], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4877, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2609, -0.9980, -2.2656, -2.3077, -4.2373, -0.3421,  0.0724, -4.3452,
        -0.7020, -1.3886, -4.7543, -0.8958,  0.4948, -4.2318, -1.6486, -2.3511,
        -2.9567, -0.7811,  1.2781, -0.8118], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8717, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8393, -2.7557, -3.6868, -0.1424,  2.1550, -2.7489, -0.7780, -1.2841,
        -1.3297, -3.1871,  1.7992, -3.7401, -3.2865, -1.8170, -1.9160, -5.2532,
         0.4055, -2.6147, -2.9901, -0.2444], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7127, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7583,  -4.1305,  -4.5724,   2.0976,  -2.0036,  -0.6452, -15.6381,
         -4.2546, -10.8997,  -2.2795,  -7.0407, -14.9932,  -7.5992,  -3.9938,
         -3.4260,  -7.7899,  -6.3758,  -0.5137,  -5.0053, -11.5674],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6695, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3392, -1.2837, -4.9857, -1.9796, -6.0287, -4.0792, -0.2260, -3.2917,
        -3.2493, -3.0208,  1.8066, -4.4524, -2.2452, -0.7846, -5.2059, -1.6241,
         0.8182, -4.9303, -3.9023, -6.2035], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8104, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3769e+00, -5.0415e+00,  2.5904e+00, -4.1940e+00, -9.4866e-01,
        -5.3872e+00, -3.0166e+00, -2.6062e+00, -7.7228e-03,  1.1231e-02,
        -7.0599e+00, -1.0336e+00, -2.9243e+00, -5.1107e+00, -1.9560e+00,
        -3.2764e+00, -3.6938e+00, -1.9302e+00, -1.7023e+01, -3.6642e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3825, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5804, -7.0825, -7.4916, -6.8740, -6.2065, -6.0424, -0.9507, -6.9262,
         2.6624, -6.2825, -1.2669, -3.7650, -2.5312, -6.8437, -0.1785, -2.5435,
        -5.0993, -2.1291, -2.2716, -1.5589], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0481, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.2840,  -8.8207,  -7.3480,  -9.1736,  -3.3954,  -9.5646,  -2.2617,
         -4.6790,  -0.6063, -11.3214,  -3.3882,  -2.7869,  -4.4256,  -1.0564,
          1.9993,  -2.8613,  -0.9908,  -3.4152,  -1.5017,  -5.7261],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4804, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8816,  2.6646, -3.7250, -1.9970, -2.4705, -4.0838, -0.9997,  2.1533,
        -4.2190, -1.9974, -4.2690, -3.5121, -6.4985, -3.4353, -0.6627, -1.6243,
        -2.5411, -1.2363, -8.0406, -0.3139], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5845, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.6121,  -2.4106,  -2.0954,  -8.2500,  -7.1490,  -3.9022,  -6.4230,
         -1.5033,  -2.8238,   1.3765, -14.0958,  -2.5761,  -2.2484,  -3.3570,
         -5.0856,   0.8035, -11.4807,  -5.7615,  -7.2093, -12.5699],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7075, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.5212,  -1.5220,  -5.5126,  -3.3497,  -7.6777,  -8.2255,  -4.0064,
         -7.3971,  -6.1278,  -1.8676,  -2.6675,  -7.5607,  -3.2736,  -3.5619,
        -22.7842,  -4.7582,  -5.5685,  -3.1841,  -5.3979,  -2.1917],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6578, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2288,  0.9588, -1.6829, -4.4922, -0.8095, -1.8178, -5.2351, -1.0599,
        -0.4175, -7.7211,  0.7083, -3.1759, -3.2359, -0.3791,  2.3534, -3.2557,
        -1.1714, -1.1584, -2.6884, -0.8583], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0684, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5793, -6.0574, -5.5598, -5.2677, -5.7570, -7.9984, -2.8496, -2.1592,
        -3.6486, -0.9104, -5.6413, -5.5246, -5.5825, -4.7558, -7.0848, -2.7405,
        -4.9437, -2.3744, -3.4504, -3.4805], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5183, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.5689, -2.8429,  1.6800, -1.1307, -2.4661, -1.5683, -5.7700, -0.5425,
         1.5998, -2.7232, -0.9985, -0.9867, -1.8071, -1.7481,  2.1994, -2.0963,
        -1.4660, -1.8714, -2.8188, -0.9343], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3430, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7366, -1.9724, -3.0265, -5.5288, -1.2193,  1.1459, -3.1119, -0.8433,
        -3.0141, -3.8700, -6.3733, -7.5773,  0.2628, -3.0538, -2.2411, -1.5519,
        -3.9383, -1.1755,  2.5732, -1.3398], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3796, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8949, -2.5585, -0.2330, -3.6807, -0.7999,  1.7960, -3.8351, -1.4041,
        -2.0204, -0.8184, -1.2329,  3.3874, -2.0775, -8.2346, -2.0940, -2.7473,
        -4.4795, -4.3877,  0.1573, -2.8951], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1026, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1806,  2.8130, -4.0083, -0.6547, -4.7464, -1.0527, -5.2416, -0.8184,
        -0.7152, -3.0135, -1.5066, -2.3806, -0.6799, -5.2002,  0.1457, -0.0679,
        -3.4981, -2.0313, -3.0440, -1.8212], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9351, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.7470,   1.0842,  -3.2157,  -0.9493, -18.2052,  -2.3971,  -7.9748,
         -0.8260,  -3.5442,   2.9066, -11.4204,  -3.1593,  -1.6187, -16.3565,
         -5.7585,  -1.0936,  -3.9810,  -1.6421,  -2.4109,  -1.7662],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2038, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.4152,  -8.0311,  -6.2489,  -6.4397, -10.1434,  -1.7398,  -6.1869,
         -1.7577,  -2.1120,   1.9319,  -6.7202,  -1.8163,  -2.0069,  -2.6225,
         -9.4913,   0.9196,  -2.7232,  -2.2040,  -9.2112,  -7.5960],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1392, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.9962,  -1.8235,   0.9220,  -3.6749,  -1.0364,  -2.4210,  -2.9254,
         -2.9852,   1.9264,  -4.9475,  -2.2897,  -2.9002,  -3.2192,  -3.5746,
          2.4825,  -3.2765,  -3.8475, -13.7553,  -8.6312,  -4.6502],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3312, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.7162,  -4.3963,  -2.3183,  -1.0854,  -5.9138,  -5.1666,  -1.5975,
         -7.5700,  -2.2093,  -0.4320,  -2.9747,  -3.6876, -10.7527,  -7.1586,
         -9.1298,  -7.4076,  -4.6408,  -3.9282,  -0.3070,   2.5542],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0919, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6484, -7.5828, -1.3027, -5.1678, -1.6246,  1.7157, -3.1846, -0.6955,
        -1.6092, -4.5941,  0.2253,  2.8705, -2.6774, -0.6485, -2.5520, -1.2607,
        -4.9701, -0.4950,  1.3205, -1.7171], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8799, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0821,  -0.2054,  -5.4332,   1.7649,  -4.4397,  -1.7455,  -3.9389,
         -3.1469,  -5.7447,  -2.1624,  -0.5905,  -3.8803,  -1.7890,  -1.6327,
         -4.4878,  -0.8562,   0.0206,  -6.2959,  -0.7743, -15.7111],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2566, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.5372,  -2.2998,  -0.9305,  -5.2337,  -7.1480,  -4.3633,  -8.1113,
         -4.1756,  -2.0667,   2.2310,  -9.8612,  -2.7211,  -2.3699,  -6.1292,
         -2.8606,  -0.3298,  -2.8813,  -1.9463, -14.4927,  -6.5065],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0330, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0225, -2.7343, -2.2087,  2.6918, -2.3731, -0.5741, -1.0729, -5.5656,
        -1.3353,  2.5651, -4.6995, -2.7083, -2.8752, -2.3404, -1.9341,  2.3062,
        -3.6605, -2.2209, -1.2342, -1.5580], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6277, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1320,  1.7151, -2.7936, -3.3610, -4.8494, -1.8359, -4.1731, -2.7616,
         0.6199, -3.6250, -2.6020, -2.2985, -3.5971, -5.2862,  0.0214,  1.1816,
        -2.6011, -1.5676, -1.8174, -3.0980], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2431, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.4560, -23.8699,  -8.6870, -17.1003,  -7.0275,  -2.9958,  -6.9515,
         -0.4095,   1.8247,  -4.0852,  -0.8464,  -4.1292,  -4.4838,  -2.0883,
          1.9999,  -3.7913,  -1.1161,  -3.6139,  -0.9528,  -3.9585],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7869, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.7479, -1.9531, -0.9765, -6.2718, -2.6797,  1.8071, -2.1844, -2.2887,
        -7.2090, -9.4218, -6.8037, -9.6855, -6.1003, -1.9167, -8.9282,  3.2878,
        -5.2287, -1.6601, -2.6097, -5.1839], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8378, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7257, -2.6544, -7.5145, -6.3879, -0.6108, -5.5900, -0.3866, -2.4305,
        -2.6799, -4.9828,  0.6133,  1.0613, -4.5212, -1.2445, -0.4639, -3.9253,
        -1.7551,  1.8955, -4.8697, -1.9513], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5562, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.5785,  -1.8783,   0.8419,  -6.2356,  -3.0747,  -5.6936,  -1.4094,
         -1.9396,  -1.6477,   2.4525, -11.1056,  -2.7048,  -4.4764,  -1.2040,
         -3.5393,   0.3766,  -1.6779,  -4.4471, -13.7662,  -9.2542],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6981, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5428, -1.1740, -5.3126,  0.6534, -1.1153, -2.7158, -2.5188, -5.1824,
        -0.2280, -5.8421, -0.3438,  1.7191, -2.3324, -1.0298, -1.6527, -0.0941,
        -8.9940, -0.4740, -5.9414, -1.1480], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2635, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.4591,  -0.8225,  -4.1076,  -4.2136, -18.6019, -11.9887,  -9.7269,
         -1.5997,  -3.7141,   1.9134,  -8.9409,  -3.8386,  -8.3584, -14.5770,
        -32.3108,  -2.1375,  -7.1265,   1.7659,  -3.7677,  -1.4764],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.7044, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6408, -2.9051, -1.4046, -3.2466, -1.7271,  2.1632, -4.1440, -0.9314,
        -5.4061, -5.4996, -1.6367, -0.5599, -5.3901, -1.3855, -3.1359, -0.3690,
        -8.1707, -1.7437, -1.2792, -3.7370], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7075, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.8821,  -0.4495,   2.7506,  -4.4350,  -1.3380,  -2.5039,  -1.7341,
         -5.4960,   1.5067,  -3.9317,  -4.0765,  -6.4103, -23.9070,  -6.2001,
         -8.5387,  -6.9027,  -3.8304,  -5.2825,  -3.4731,  -7.3686],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6751, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.0144, -4.5556, -1.1324, -1.6061, -3.1981, -5.9675,  0.6854, -0.3301,
        -2.5517, -1.6337, -2.0346, -2.7582, -1.4003,  2.1046, -4.0967, -1.6023,
        -2.3132, -1.1292, -4.0584, -0.1148], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8854, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3270, -6.1777,  2.1177, -6.7070, -3.8576, -2.3021, -3.6230, -4.2468,
        -1.6733,  1.6826, -2.4564, -0.9609, -1.9007, -1.8260,  0.3030,  3.1937,
        -2.4363, -1.5777, -3.3897, -5.0508], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1608, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.4786, -1.5147, -5.1572, -6.5208, -2.7223, -3.3345, -3.0300,  0.5159,
        -3.5629, -2.0538, -1.4226, -4.6000, -0.1462,  1.0706, -4.2027, -1.2093,
        -2.4587, -1.0029, -5.9326, -0.2874], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2547, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0481, -3.3920, -0.4249,  1.9014, -1.6182, -1.0998, -0.7316, -2.9767,
        -3.0879,  1.8585, -1.9998, -1.1681, -3.2233, -2.5204, -6.6909, -2.8083,
        -2.3241, -6.3304, -0.4917, -1.3178], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0747, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.5956,  -2.1466,   0.9653, -11.3860,  -2.1718,  -4.4322,  -1.2574,
         -3.6073,  -1.0974,   0.3606,  -6.0625,  -0.1436,  -2.4232,  -2.8866,
         -0.0524,   2.1492,  -2.9832,  -0.0898,  -2.6951,  -1.6336],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2595, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1249, -5.4790, -1.3585, -2.5010,  2.3787, -4.3520, -1.5380, -0.6563,
        -3.0274,  0.0647,  2.7336, -1.6116, -1.0926, -3.0881, -0.9388, -5.2420,
         0.2248,  1.0604, -2.7901, -0.5082], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6423, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.2529,  -4.5758,   2.2573, -13.4171,  -2.1114,  -2.0451,  -1.1707,
         -4.9254,  -0.4543,   0.3891,  -2.0990,  -0.6065,  -0.8898,  -4.0192,
         -0.9881,   2.4945,  -2.3634,  -1.7727,  -0.6793,  -2.7047],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.9967, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.6396,  -3.8766,  -1.0400,  -3.2722,  -2.7871,  -7.1868,  -1.2183,
          0.3543,  -1.4542,  -1.8209,  -0.8649,  -7.8659,  -1.0016,   2.1744,
         -3.7467,  -3.9802, -20.9894,  -5.9681,  -5.6080,  -5.8443],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6679, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.5729, -6.4837, -2.4417, -4.7321, -5.5396, -0.2997,  3.3310, -7.1292,
        -0.7136, -1.9975, -2.1208, -2.4701,  2.4914, -2.2032, -0.5304, -2.5755,
        -1.2908, -5.0293,  0.3920, -0.4758], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0196, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2884, -5.5190, -1.4863, -3.2944, -2.4095, -0.1213, -1.2442, -3.2561,
        -0.1463,  3.2009, -3.8165, -1.6704, -1.4091, -0.2728, -6.6219,  0.3142,
        -1.1552, -6.4044, -0.8807, -7.5288], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5005, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2896, -3.6137, -1.8119, -8.2096, -3.0323, -0.9197, -3.2172, -1.3107,
        -1.2144, -4.6064, -0.3774,  1.2320, -6.3620, -0.8782, -3.1372, -0.9357,
        -6.8037,  2.8497, -3.3561, -0.3885], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3691, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3162, -3.1516, -1.7341, -0.5041, -1.9617,  3.1303, -4.0023, -2.9744,
        -2.9093, -3.6857, -6.1815, -1.7923, -1.4919, -3.3758, -1.0112, -1.7713,
        -1.5937, -2.3001,  2.1143, -2.6203], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0566, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3214, -1.2075, -2.4215, -2.2188, -3.0185, -1.3802, -5.2963,  0.3296,
         1.5482, -4.2239, -2.6709, -1.4270, -4.3012, -0.6322,  3.1124, -1.7618,
        -0.4434, -0.8067, -4.1357, -0.9128], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7595, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5427, -1.8290, -4.8863,  0.6132, -7.0469,  0.6157, -0.1367, -2.5024,
        -1.4680, -1.7900, -0.5454, -4.2798,  1.2615, -3.5841, -5.5857, -1.8671,
        -3.1301, -5.3289, -0.8693,  2.0476], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1427, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-16.7862,  -5.3638,  -9.6957,  -2.7450,  -4.4537,  -0.4854,  -3.5281,
         -1.1302,  -1.6383,  -4.2026,  -4.1795,   0.8788,  -4.7508,  -0.6527,
         -1.6118,  -2.7926,  -1.3739,   3.0323,  -1.6108,  -2.6897],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2890, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3836, -3.7265, -0.6835, -5.4348, -1.0894, -0.1708, -6.4461, -3.0318,
        -2.1438, -4.2238, -4.1723, -0.4832,  1.6070, -4.0604, -3.0899, -1.1249,
        -5.2948,  0.0935,  1.1009, -5.5743], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5666, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-14.4761,   0.6333, -17.7536,  -3.1820,  -3.3341,  -4.1756,  -4.2460,
         -2.2344, -12.8210,  -2.0938, -17.0953,  -3.7494,  -4.3261,  -6.2638,
         -7.0818,  -2.9841,  -4.7004,  -0.3296,   0.4744,  -3.8120],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6776, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.3706, -3.2100, -0.1263,  1.2985, -3.6198, -0.8361, -3.5369, -2.6952,
        -6.9008, -1.5985, -2.5891, -9.2131, -2.8577, -2.0351, -1.1259, -6.2062,
         1.0691,  0.5567, -3.2906, -1.4520], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4370, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1504,  -4.2057,  -0.4674,   2.4827,  -4.3059,  -1.1178,  -4.8117,
         -6.5301,  -2.7553,   0.6481,  -5.3063,  -2.1865, -12.1358,  -6.7390,
         -5.2474,  -6.6239,  -2.0821,  -1.9294,   2.1186,  -5.1306],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4238, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6693, -2.4417, -3.8617, -4.8265, -6.6398, -7.5050, -4.1660, -4.7469,
        -4.3270, -2.4165, -3.1737, -7.4240, -7.0394, -2.3344, -3.1200, -2.8805,
        -5.5830, -8.7345, -5.3365, -8.7579], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9992, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.6595,  -0.8160,  -1.9163,  -0.1661,  -3.4354,   3.1066,  -6.4806,
        -10.7486,  -3.4624,  -2.9208,  -4.5803,  -0.5490,   2.4148,  -1.8757,
         -2.1470,  -3.2520,  -3.3823,  -5.4802,  -0.5600,  -3.2361],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6574, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.7334,  -5.3730,  -6.1061,  -4.0665,  -6.4603,  -6.4060,  -2.8015,
         -3.3915,  -2.9150,  -3.9363,  -3.8498,  -7.4830,  -5.3790,  -1.6528,
         -4.3809,  -2.5633,  -4.5625,  -4.0380, -11.7566,   0.4437],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5206, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1178, -3.4855, -3.6789, -0.0921, -1.0056, -3.5573, -0.1733, -2.8218,
        -3.5112, -4.2237,  2.1650, -2.4515, -2.6685, -1.8126, -5.6071, -0.0468,
         2.6607, -2.9311, -1.1182, -2.6507], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0064, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.9234, -16.9446,  -6.9971,  -1.8889, -31.5919,   1.1589,  -1.8481,
         -7.2055,  -7.6694,  -7.9313,  -1.7702,  -6.2410,  -2.9042,  -3.2548,
          2.3925,  -4.7158,  -2.1042,  -3.8265,  -4.1547,  -2.6474],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0534, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.8401,  -1.8727,   1.1271,  -2.2561,  -0.7179,  -7.2377,  -5.0313,
         -2.8601,   0.3252,  -2.9561,  -7.1615,  -1.1759,  -7.9334,  -1.1276,
          0.7795,  -1.7585,  -0.6739, -24.4991,  -7.8172,  -4.5900],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0639, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0107, -1.6690, -1.0315, -1.1207, -6.0489,  0.1222,  0.1341, -3.0992,
        -0.9451, -2.0222, -5.5903, -2.4182,  1.2761, -2.7800, -0.9355, -5.3877,
        -3.4336, -6.5990, -5.7384, -9.6433], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9470, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.6271,  -8.1772,  -0.9598,   0.6994,  -2.5936,  -1.9094,  -2.1973,
         -1.6203,  -5.4485,  -1.1847,  -1.1316,  -2.3159,  -0.0170,  -1.1965,
         -1.2223,  -2.8601,   2.9763,  -4.0937,  -3.4732, -11.3847],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.4869, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4449, -3.7921, -4.5271, -4.9529, -5.9191, -1.3786, -2.8909, -2.7818,
        -2.7390, -3.8052, -2.3774,  1.1135, -5.5770, -2.8663, -2.7848, -5.2573,
        -1.4755,  2.0698, -2.8611, -1.3544], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8801, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5578,  3.8885, -1.0385, -4.7713, -2.9196, -4.3698, -2.9540, -7.0160,
        -3.2166, -1.5697, -2.2122, -1.7592, -1.2646, -4.4762, -0.8541,  1.9054,
        -4.6924, -1.8803, -6.7096, -6.4445], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7456, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6856, -0.2839, -5.3367,  0.2413, -2.1560, -2.6787, -2.3274, -3.9865,
        -0.1456, -4.6179,  1.7966, -1.6580, -0.5005, -1.9290, -1.5737, -7.3964,
        -2.2113, -0.3824, -4.7991, -1.4782], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2054, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.6509,  -4.4408,  -2.7794,  -6.8869,  -4.0863,  -8.3834,  -7.1245,
         -6.1092,  -8.4295,  -7.4102,  -8.2189,  -0.0938,  -8.3853,   3.2336,
        -12.7563,  -2.5431,  -3.0276,  -5.9182,  -4.8059,  -1.1345],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3476, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8008, -2.3024,  0.5149, -2.8444, -1.2059, -2.5628, -0.8471, -5.9899,
         0.5388,  0.7767, -1.8464, -1.7390, -0.9135, -4.2380, -0.8176,  0.6929,
        -3.6768, -2.1973, -7.5793, -6.6623], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4350, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2968, -1.5911, -5.1953, -2.2147, -0.5966, -4.6371, -3.7179, -1.4957,
        -7.4888, -2.6571, -3.5694, -1.3178, -4.2077, -0.1952, -0.8813, -2.4738,
        -0.5839, -1.8707, -5.0051, -0.0798], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7038, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1842,  -1.0390,   2.7284,  -5.2578,  -1.9231,  -0.8822,  -4.4274,
         -2.2547,   1.7132,  -2.0623,  -0.6667, -12.7686,  -3.6235,  -8.8053,
         -0.6073,  -2.2159,   3.3294,  -3.4338,  -2.4191,  -1.3348],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.4067, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.6076,  -0.7178, -10.2881,  -1.4342,  -2.7896,  -3.3714,  -4.7252,
          3.3710,  -2.8760,  -0.3034,  -0.9602,  -2.3599,  -6.0544,   0.1604,
         -2.8389,  -5.6899,  -1.4191,  -1.0365,  -1.0461,  -6.9708],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5371, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8588,  0.0433, -3.5372, -2.1093, -5.0853,  0.4027, -0.3959, -3.5664,
        -0.9878, -2.4628, -0.2477, -5.6391,  2.7569, -6.3532, -1.6666, -1.9204,
        -1.1504, -4.7316, -0.1612, -1.5005], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0586, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.6686,  -5.7813,  -1.0827,   0.5633,  -2.6705,  -1.2564,  -1.8306,
         -4.6108,  -1.3920,   2.8045,  -4.3714,  -1.2821, -12.3712,  -3.7723,
         -4.8159,  -5.2280,  -7.7895,  -0.8575, -24.0172,   3.0574],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8686, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.3403, -1.7729, -2.5212, -3.1625, -1.3520, -3.6265, -1.7306,  0.2736,
        -2.4296, -1.1016, -0.5574, -2.8697, -4.6919,  2.8847, -4.3486, -1.0727,
        -1.8254, -4.6382, -2.1319,  0.2160], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6559, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8527, -1.3218, -7.4709, -0.4425,  0.4357, -9.9172, -2.7069, -0.9496,
        -1.1680, -4.5948,  0.7915, -2.9874, -3.4358, -1.7207, -5.5747, -5.3087,
        -5.4390,  1.0667, -6.5331, -1.5673], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2849, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.2633,   1.2750,  -1.7228,  -2.4173,  -0.3403,  -8.7583,  -1.0732,
          0.5684,  -3.5223,  -0.7102, -19.6224,  -4.5383,  -3.1689,  -7.9735,
         -5.7264,  -7.1597,  -3.4651,  -3.9824,   1.9718,  -3.4722],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7051, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.8929,  -4.6158,  -1.5555, -43.2469,  -2.8135,  -8.4979,  -0.7975,
         -9.5271,   3.1296,  -7.2423,  -2.5427,  -6.1372,  -4.5415,  -4.8881,
         -0.4476,   1.1050,  -1.9981,  -1.7337,  -3.7957,  -0.8757],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9565, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1470e+00, -2.5883e+00, -2.0118e+01, -7.3647e+00, -3.8826e+00,
        -5.7001e+00, -8.7343e-01, -5.8098e+00,  2.2522e+00, -1.3175e+00,
        -3.1145e-01, -3.7906e+00,  1.8034e-01, -6.2153e+00, -1.4079e-02,
        -4.4136e-01, -2.8063e+00, -6.6133e-01, -3.6170e+00, -5.3744e-01],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3382, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7212, -2.3379, -2.7927,  1.1567, -3.0722, -1.4839, -3.9650, -0.8027,
        -3.9124, -1.4875, -0.2150, -4.6393, -1.9144, -3.0703, -0.6377, -5.2428,
         2.9479, -6.3336, -1.2459, -2.4220], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3096, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.4548, -13.7706,  -7.1078,  -3.9094,  -1.4642,  -4.1089,  -7.8454,
         -1.1183,  -7.8043,  -1.2791,   1.1757,  -4.9845,  -0.6584,  -5.1123,
         -2.8124,  -3.6639,   0.9539,  -4.2050,  -4.0600,  -2.4549],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8842, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.5532,   2.1981,  -4.3980,  -0.9982,  -1.0481,  -4.5312,  -4.2698,
          0.3298,  -3.5016,  -1.2010,  -2.4078,  -0.8391,  -4.1153,  -0.1433,
         -1.5022,  -6.7552,  -0.1377,  -1.9616, -11.7839,  -4.5571],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6088, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8247, -0.7440, -7.3007, -1.9140,  0.2335, -3.3737, -0.6100, -1.8834,
        -2.5885, -0.8903,  2.4439, -3.5540, -2.6053, -2.8847, -1.9255, -3.5653,
        -2.2557,  0.1065, -3.0884,  0.0809], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9072, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8879, -1.7026, -1.8096, -2.4931,  0.8470,  3.2548, -3.3587, -1.5487,
        -3.0786, -0.6056, -5.4207,  0.7809, -0.6054, -2.0519, -0.2265, -1.8370,
        -1.8216, -0.5091,  2.8730, -3.6517], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3427, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5158,  3.4158, -6.5732, -0.9490, -3.6037, -1.3502, -4.8151, -2.3545,
         2.0333, -3.7179, -2.3104, -3.0202,  0.1271, -5.8835,  0.3801, -0.4618,
        -3.8800, -1.3386, -3.1555, -0.6771], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0325, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.4614,  -5.5232, -10.1740,  -7.2742, -12.4158,  -0.5848,  -1.3720,
         -5.2163,  -4.8114,  -2.4310,  -7.5351,  -3.7628,  -1.8882,  -3.3577,
         -3.4892,  -1.4919,  -3.8401,  -1.5069,  -3.9577,  -7.4314],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9262, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7993, -2.8186,  1.0327, -3.2712, -0.8245, -2.2678,  0.2198, -4.3476,
         3.7818, -5.2703, -0.7512, -1.3542, -3.6920, -7.6707, -0.6420,  0.0458,
        -2.5816, -2.8123, -1.8415, -0.6224], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9243, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2080, -0.8550, -0.7481, -1.8542, -6.0406,  0.1001, -0.5908, -2.6469,
        -0.9316, -2.4321, -2.3935, -2.5103,  1.4644, -7.6006, -2.8437, -2.6365,
        -0.5153, -2.2316,  0.3814, -4.1430], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0618, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.6445,   0.7257,   1.2492,  -8.8279,  -3.5407,  -0.8406, -19.7866,
        -25.8915,  -2.4917,  -5.1542,  -2.2871,  -4.0607,  -4.6794, -11.1954,
         -5.8074,  -8.1481,  -4.9523,  -2.4399,  -1.3904,  -5.3707],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0767, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6706,  2.5194, -7.0160, -1.1876, -1.3229, -0.6433, -8.1087,  3.9880,
        -7.0928, -1.2763, -1.3807, -0.2181, -5.4077, -0.4210, -0.2865, -5.5905,
        -4.4161, -2.3108, -2.9657, -4.6221], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5715, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.9743, -2.6433, -1.0975, -2.4265, -0.0621, -2.5552, -0.4769, -5.2918,
        -2.9124, -3.3382, -2.6741, -3.9090, -0.9611,  0.8989, -3.0749, -2.1028,
        -1.6303, -2.0469, -0.2278,  1.6543], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5952, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7044,  -1.5436,  -4.6056,  -1.8548, -14.8418,  -7.2014,  -7.6992,
         -5.4774,  -1.3809,  -6.1390,   1.3822,  -2.0914,  -6.8271,  -2.1527,
         -3.7965,  -5.0544,   0.1872,   2.4647,  -1.9707,  -1.2389],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6273, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7756, -1.6267,  0.0474, -3.7618, -0.6045, -1.2729, -5.2830, -2.0789,
         1.4490, -2.3102, -1.4280, -3.9252, -4.2505, -3.1589, -2.3251, -2.3414,
        -3.6792, -0.7846, -4.5745, -0.1873], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3936, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.6972, -6.2025, -1.9561,  0.7496, -1.3022, -0.7637, -1.6282, -7.8700,
        -0.9511,  0.2188, -4.6586, -1.7708, -7.4555, -4.5639, -4.8800, -5.6845,
        -1.8224, -2.4267,  0.2169, -4.3705], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8909, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7476, -5.5452, -4.8437, -2.3482, -3.4923, -3.6444, -5.1572, -6.2169,
        -4.0664, -4.7694, -3.2828, -4.5088, -0.3124, -3.6678, -3.5373, -3.7037,
        -6.4231, -4.2886,  0.0185, -3.6556], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8096, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.2144, -3.0488, -0.9427, -2.0274, -1.1967, -1.5233,  3.3480, -3.3775,
        -1.9984, -2.9461, -2.5330, -9.9449, -4.6455, -0.7606, -2.5563, -1.3776,
        -2.6654, -2.2579,  0.3899,  0.0701], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9390, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0357, -0.3501, -6.4421, -0.4664, -0.6514, -4.6105, -0.6728, -2.5038,
        -1.4905, -7.2331,  3.6117, -9.3957, -1.2530, -1.5681,  0.2569, -6.7031,
         2.3910, -3.2274, -6.6087, -4.1845], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7569, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4381, -3.9220, -2.1101, -3.1132, -3.9161, -9.3054, -1.2501, -0.4082,
        -1.9612, -1.3354, -0.9433, -2.3955, -1.6128,  2.7975, -3.8467, -1.7015,
        -2.4281, -0.3090, -5.6353, -0.9094], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4872, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.3519,  -3.6229, -20.0120,  -5.8326,  -5.6038,  -6.7248,  -0.9485,
         -3.7124,   0.2144,  -2.1046,  -6.3503,  -1.8092,  -2.5889,  -4.0249,
         -3.1539,   1.2695,  -2.6722,  -0.7085,  -2.5521,  -0.3427],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7816, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  3.4401,  -3.7676,  -0.7081,  -1.9032,  -0.1517,  -3.7854,   2.6937,
         -3.2415,  -3.4273,  -3.9473,  -4.9199,  -4.1656,   0.5726,  -8.9379,
         -3.4743,  -4.1427,  -5.2829,  -2.7028, -37.6623, -18.6982],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2106, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.0304,  -8.2128,  -3.7839,  -6.4911,  -5.4960,  -2.0346,  -3.0043,
         -2.9487,  -2.4213,  -4.4601,  -4.7630,  -2.6757,   0.6572,  -4.5388,
         -2.3390,  -6.5595, -12.3128,  -4.3954,  -7.9396,  -4.7250],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7237, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9666, -0.3301, -2.3347, -6.3095, -1.1898,  0.9788, -1.6725, -4.9563,
        -8.2661, -4.2633, -7.5594, -8.6872, -6.1069, -1.2395, -5.4984,  1.6899,
        -6.2610, -2.4347, -0.1644, -4.8458], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5709, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4070, -3.0261, -6.5424, -2.9404, -1.3039, -4.7334, -0.6828,  2.5714,
        -3.4134, -0.9641, -1.3738, -1.5730, -4.8541,  0.3082, -0.8440, -3.9795,
        -2.7034, -4.2308, -0.6976, -8.3510], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6871, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3791, -2.3720, -0.3544, -1.1949, -3.0507, -1.9818, -0.9582, -1.0893,
        -2.4402,  3.0330, -3.1228, -1.6437, -1.9020, -3.9730, -2.6404,  0.4913,
        -2.3849, -1.2696, -1.4143, -2.3419], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6494, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.6468, -11.7965,  -1.6849,  -2.2472,  -2.6621,  -0.4382,   0.7014,
         -2.8249,  -2.2259,  -1.1353,  -0.1333,  -4.7145,   3.2290,  -3.1770,
         -2.3810,  -3.2381,  -1.9021,  -1.9932,  -3.1668,   0.9985],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.0073, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8302, -0.0349, -4.7868, -2.5650, -1.6839, -2.3493, -2.7941, -1.2495,
        -2.2402, -0.3225, -2.2184, -1.3362, -6.0195, -0.1201,  0.5300, -2.6267,
        -0.5655, -0.1434, -3.1922, -0.0108], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7280, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.6734,  -4.1755,  -4.8279,  -7.3205,  -5.8905,  -4.5459,  -5.9112,
         -3.6630,  -5.3634,   0.3254,  -3.3244,  -3.2154,  -5.1447,  -5.6327,
         -6.1376, -11.5470,  -4.3946,  -6.8795,  -5.0279,  -2.9601],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0155, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-13.4022,  -4.7560,  -6.2839,  -5.1640,  -6.1924,  -4.8015,  -4.2089,
         -5.8629,  -7.6605,  -3.4748,  -3.9043,  -5.0346,  -3.5055,  -2.9197,
         -6.5130,  -3.8262,  -8.3204,  -5.2934,  -4.0488,  -5.0668],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5120, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3199, -0.5410, -0.0196, -2.8966, -0.9756, -2.1496, -1.9896, -4.8434,
         0.1635, -0.3106, -1.8337, -1.7322, -1.5327, -4.8149, -3.0017,  1.4832,
        -3.7729, -1.5597, -0.8365, -5.0110], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0247, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.1757,  -2.6804,  -2.4267,  -0.5489,  -6.2597,   0.4502,   1.6440,
         -3.1110,  -0.2295,  -1.7910,  -1.0798,  -2.7700,   1.4830,  -3.2735,
         -2.7081,  -3.3177, -12.6858,  -6.0544,  -0.8099,  -3.2982],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6822, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4347, -3.4613, -1.8705,  0.7110, -2.8123, -1.5975, -2.5024, -2.1752,
        -1.6890,  2.9542, -4.4860, -1.1008, -3.2255, -2.1009, -4.8432, -0.9686,
         1.5983, -2.9492, -3.0460, -2.9287], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0464, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2565, -2.0309, -3.2771, -0.0220,  1.8149, -3.3235, -0.9411, -3.6646,
        -0.7641, -5.2202,  0.1821,  0.2998, -2.7171, -0.0791, -1.2325, -1.7661,
        -0.1926,  2.9399, -2.4758, -6.1979], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4962, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7851, -2.9137, -2.0138, -1.2506,  3.2860, -2.2966, -4.0687, -2.7725,
        -4.3576, -4.0101, -2.9556, -4.4204, -6.4868, -6.3909, -3.0093, -1.0665,
        -4.0884, -0.1556,  0.0399, -6.2003], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8458, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1140, -3.0572, -4.2160, -2.9611, -5.7498,  1.1061, -1.2723, -2.1812,
        -2.5815, -8.8283, -3.2190, -0.8203, -4.2852, -2.7217, -3.8308, -5.4205,
        -2.4183,  1.3396, -2.5200, -1.2440], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8498, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.6774, -5.4298, -2.5527, -4.2599, -1.5073, -5.7795,  0.1747,  1.0181,
        -2.3008, -0.7230, -1.2350, -2.3276, -1.0505,  1.8323, -4.3199, -0.3229,
        -2.1938, -2.7484, -6.0371, -2.8240], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9455, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.2855, -11.4417, -37.1314,  -4.2113,  -6.1709,  -8.4804,  -7.4032,
         -5.7943,  -2.9784,  -6.5648,   3.8836,  -6.4706,  -2.8797,  -1.5732,
         -0.5710,  -4.1997,  -1.1203,  -1.2665,  -2.2545,  -2.2049],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8059, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.3566, -2.2867, -1.0578, -2.0752,  0.2075, -6.1306, -2.7695,  0.8825,
        -2.7115, -2.0488, -0.6164, -3.0381, -1.5459, -0.0153, -4.8202, -0.6780,
        -1.4911, -3.7182, -3.1193, -1.6186], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8147, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2827, -2.4568, -3.7336, -0.3987,  0.6420, -4.5378, -1.3639, -2.7869,
        -2.4799, -4.1427,  0.3655, -0.3429, -3.5491, -0.3606, -1.5299, -3.2924,
        -2.1819,  0.6841, -3.6324, -1.9207], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1151, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.3768,  -0.5835,  -1.6424,  -2.0206,  -4.2290,  -4.8550, -12.5139,
         -2.0713,  -4.6073,  -4.9510,  -2.4623,   2.2606,  -4.9544,  -3.1366,
         -3.2401,  -3.1110,  -6.2988,  -2.1925,  -2.0395,  -6.1438],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9085, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.4185,  -4.7833,  -5.4644,  -4.2160,  -2.9093,  -2.8020,  -2.4273,
          2.6167,  -4.8043,  -4.1428,  -4.0045,  -3.0084,  -4.0063,  -9.1726,
         -3.2133,  -2.9473,  -4.1876,  -3.8490,  -1.1820, -10.5557],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8239, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4702, -1.6499,  1.1332, -8.0612, -4.1661, -2.0131, -2.7662,  0.3275,
         2.4221, -2.3968, -1.2867, -1.5629, -4.9888, -1.7895,  1.5480, -2.0867,
        -2.1494, -8.1571, -6.8442, -5.3267], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6642, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.4582,  -4.6898,  -0.7424,   1.7871,  -3.7972,  -0.8934,  -1.6799,
         -7.7598,  -6.6985,  -5.4953,  -5.2090,  -2.9019,  -3.8184,  -1.2145,
         -5.5700,  -1.0394,  -1.1434, -10.8284,  -3.7835,  -6.6401],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6788, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3773, -7.3688, -3.0453, -4.0230, -1.6459, -6.2604, -6.3004, -3.6625,
        -5.3008, -1.9266, -1.5480, -4.1059, -2.3496,  1.6025, -7.6741, -2.6988,
        -1.5188, -1.4095, -3.7072,  2.9833], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1168, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.6854, -4.3532, -1.8944, -2.1994, -1.0652, -1.5241, -4.3379,  0.1364,
         1.5849, -1.5538, -0.6143, -1.0527, -1.6857, -1.7386,  3.2943, -1.9651,
        -2.1700, -1.4635, -1.4445, -4.2431], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8488, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3923, -0.8266, -2.1707, -0.3553, -3.5127,  3.6773, -1.3114, -3.2946,
        -3.0519, -2.5096, -6.2921, -1.4161,  0.1748, -2.9302, -2.2787, -3.4145,
        -0.8345, -5.2143, -1.4255, -3.2559], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1317, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7059e+00,  9.4476e-02, -1.6958e-01, -2.7314e+00, -9.8755e-01,
        -1.8808e+00,  4.5998e-03, -5.3837e+00, -4.2253e-01, -1.0423e+00,
        -6.9846e+00, -7.2695e-01, -5.1836e+00, -6.2713e+00, -2.5666e+00,
        -1.6570e+00, -3.7697e+00, -3.7037e+00, -1.1334e+01, -5.4391e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2431, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.2211,  -0.7519,  -2.9620,  -1.2534,  -2.9754,   2.8239,  -2.0873,
         -1.7128, -12.9253,  -3.4499,  -8.4698,  -1.9786,  -2.9994,   3.0310,
         -6.5651,  -2.0791,  -3.1554,  -2.8723,  -4.0085,  -1.6572],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0135, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7117, -6.1233, -3.2365, -0.5403, -0.3448, -2.4567, -1.2248, -2.0655,
        -5.5921, -1.7038,  1.1152, -2.4553, -0.5083, -3.5991, -2.7910, -2.4768,
        -0.4011, -1.6319, -2.1784, -1.3147], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1620, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1930, -1.4485, -2.7704, -3.0422,  2.0360, -2.2010, -0.9450, -3.7001,
        -2.0840, -6.9019, -0.8767,  0.8682, -5.5591, -2.2857, -1.5290, -1.9051,
        -4.2033,  0.1014,  1.9120, -4.6501], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0689, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9485, -1.9208, -3.0735, -1.6544, -6.4386, -1.3660, -0.3188, -3.8828,
        -0.2514, -1.0892, -4.6454,  0.2759,  2.5388, -4.9445, -2.0170, -1.8263,
        -2.5980, -6.2299, -1.7529, -0.5925], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2368, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.6889,  -4.5168,  -0.6451,  -9.2476,  -9.5659, -13.9642,  -3.5092,
         -4.2211,  -8.8183,  -7.5631,  -2.3828,  -7.5056,  -1.9732,  -1.3666,
         -5.6051,  -2.6344,   1.2713,  -3.6554,  -3.4720,  -5.8231],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1444, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.6952,  -6.2523,  -3.9153,  -2.1623,   3.1653,  -4.5182,  -1.2551,
         -4.1367,  -2.5995,  -0.7867,   1.9575,  -2.9477,  -2.9595, -13.0836,
         -3.2069,  -5.0097,  -0.9373,  -1.5811,   2.6277,  -7.3622],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7829, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.6907, -3.1380, -1.1916, -1.0409, -5.3905, -0.4413,  1.7131, -2.0514,
        -0.6137, -2.2599, -0.9235, -4.8560, -0.1538, -0.0810, -3.8893, -0.3878,
        -4.0324, -1.1554, -3.5733,  3.3976], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4189, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.4779e-01, -2.2874e+00,  1.1876e-01, -1.4862e+00, -3.5393e+00,
         2.7227e-02,  2.3630e+00, -4.8304e+00, -1.1721e+00, -9.7976e+00,
        -9.3540e+00, -6.0987e+00, -9.8842e+00, -1.5166e+00, -7.5824e+00,
        -2.7587e+00, -3.5440e+01, -4.3897e+00, -7.4065e+00, -8.3302e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6509, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.8076,  -1.9524,  -1.9952,  -9.5474,   1.9925, -10.5367,  -1.8718,
         -1.3674,  -4.6919,  -1.4384,   2.7110,  -2.0843,  -1.3508,  -0.7820,
         -4.9691,  -1.7331,  -3.4720,  -2.9997,  -2.1522,  -3.8386],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6944, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2327, -2.4081, -1.1904, -4.2983,  0.8363, -0.5986, -3.7434, -0.4919,
        -2.6168, -0.6883, -2.3264, -1.2792, -2.6186, -4.6827, -3.0465, -2.0857,
        -1.7885, -5.7256, -1.1362, -1.3398], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1231, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8015, -2.4158, -1.9879, -5.4640, -0.6452,  0.2680, -2.8476, -1.1564,
        -4.0008, -4.7637, -1.7020,  1.8266, -1.5875, -1.9729, -1.0238, -5.1561,
        -1.3956,  2.7560, -2.9835, -0.7366], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8395, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.2591,  -4.6619,  -2.0619,  -1.9745,  -1.8014,   1.1886,  -3.7299,
         -1.2527,  -2.6313,  -6.9035,  -2.3290,  -0.6585,  -5.1063,  -3.8782,
        -19.0487,  -9.0108,  -9.3809,  -5.2194,  -0.8852,  -2.9668],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2286, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.5245,  3.5140, -5.4810, -1.6449, -2.9160, -1.9992, -7.3465, -1.0367,
         0.8033, -2.0622, -0.6807, -0.9847, -1.2503, -6.4305, -2.6435, -2.2521,
        -2.9605, -1.4834, -4.1558, -4.9114], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2699, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4602, -1.7547, -2.8700, -4.3857, -1.1658,  1.5362, -1.0369, -1.0281,
        -0.7881, -2.4404, -0.9354,  1.3042, -2.7139, -2.5101, -2.6909, -2.2388,
        -6.6524, -2.2315, -1.3857, -5.9600], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1704, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3390,  -3.6506,  -3.9151, -11.7829,  -4.8788, -12.4163,  -1.3322,
         -4.8053,   3.2388,  -3.4180,  -3.0309,  -4.1092,  -4.7674,  -5.5026,
         -0.7077,  -3.4021,  -2.5649,  -3.0547,  -5.2641,  -3.2858],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0994, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2631e+01, -3.4008e-01, -3.3808e+00, -5.7918e-02, -4.8777e+00,
        -8.0931e-03,  5.5022e-01, -1.9271e+00, -2.6539e-01, -3.0861e+00,
        -1.3475e+00, -4.6718e+00, -4.5172e-02,  1.3720e-01, -3.2811e+00,
        -8.2157e-01, -5.3994e+00, -1.0547e+00, -7.1634e+00, -1.3792e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5525, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3764, -1.3512, -3.7758, -1.0722, -2.5245, -3.2964, -2.9056,  2.5401,
        -3.8590, -1.6740, -7.0826, -7.1732, -3.2424, -6.0039, -1.8181, -2.6327,
         2.4890, -2.4803, -2.1811, -2.6589], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7040, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.1108, -10.8273,  -5.7121,  -1.3427,  -3.7769,  -2.8994,  -3.5919,
         -2.3409,  -2.7292,  -3.5340,  -0.8740,   3.3022,  -3.7160,  -0.6496,
         -1.4150,  -0.3397,  -6.2794,  -0.4458,  -2.6870,  -4.4860],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0728, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8547, -4.0692, -9.2592, -2.8278, -4.2872, -6.2467,  1.9149, -1.9000,
        -0.5354, -3.4744, -5.0697, -1.6603,  3.0366, -2.2609, -0.5651, -3.7364,
        -1.3840, -4.9038,  1.0655, -5.4980], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8758, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.5543,  -2.9724,  -1.5412,  -2.9694,   3.3383,  -2.3251,  -1.5701,
        -13.3262,  -5.5172,  -3.6569,  -5.8580,  -1.2824,  -1.2728,   1.9408,
         -5.9901,  -1.0596,  -1.4074,  -6.7609,  -1.8731,   2.9950],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6332, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.5268, -1.3329, -5.3477, -1.2997,  2.6008, -3.3419, -0.3270, -2.8948,
        -1.3142, -1.4791,  3.3415, -4.1521, -0.2160, -1.9837, -1.3799, -2.3873,
         2.9629, -4.5030, -0.1434, -3.4641], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3594, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5270,  -4.2793,  -2.8857,  -3.5489,  -8.1242,  -5.0764, -18.7595,
         -4.7255,  -8.1086,  -1.5727, -12.8505,   2.0922,  -2.9921,  -7.1010,
         -6.1062,  -4.3105,  -2.1434,  -2.7070,  -1.1930,  -0.5377],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8728, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5311, -0.9441,  2.2726, -4.1932, -1.8435, -0.5853, -4.5990, -0.6857,
         2.4032, -3.6720, -0.1122, -1.1355, -2.6052,  0.4314,  2.2601, -3.8636,
        -0.4346, -2.1524, -2.0146, -8.6946], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6850, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4731, -5.7443,  0.2330, -2.5065, -3.5614, -0.0099, -1.1396, -0.1972,
        -4.1728,  3.0469, -2.4083, -0.9217, -1.2313, -1.2879, -5.5005,  1.7557,
        -7.9112, -2.5726, -2.9464, -1.7015], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0625, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.2484,   0.9913,  -3.5717,  -3.9853,  -5.0828,  -3.6190,  -1.5313,
        -10.7942,  -9.8290,  -2.0341,  -1.2003,   0.2681,  -6.9869,  -1.8101,
         -3.5580,  -3.6984,  -0.4709,   3.1054,  -2.8576,  -1.8547],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9884, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.4037,  -0.5902,  -7.9978,   2.4190,  -9.5991,  -1.5585,  -2.6287,
         -2.4811,  -8.9788,   2.6487,  -3.1872,  -1.6365,  -1.5056,  -0.2025,
         -3.9050,   3.7328,  -1.7403,  -3.6034,  -4.9434, -10.1274],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0644, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.2330,  -0.2647,   2.9536,  -8.9222,  -4.9401,  -7.6612,  -9.4566,
         -6.2784,  -7.7311,  -6.1583,  -3.7979,  -3.7788,   2.4564, -12.4997,
         -2.2691,  -2.5697,  -6.3083,  -3.6413,  -0.8081,  -3.0915],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3000, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4772, -1.4056, -3.8011, -3.3959,  2.1569, -3.2921, -2.0324, -2.0236,
        -2.5689, -4.6808, -4.2606, -2.1855, -1.5655, -3.8752, -0.6463, -5.5385,
         1.1796,  0.9222, -2.9049, -0.1283], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1762, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0526, -10.4235,  -6.0723,   0.6749,  -7.8823,  -4.6771, -11.1652,
         -8.9943,  -4.6816,  -6.3159,  -3.3051,  -4.0942,   1.5511, -10.4023,
         -4.7812,  -3.6926,  -3.1585,  -2.7495,  -4.1569,   1.5442],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8418, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3299, -3.1182,  1.5389, -3.4190, -2.9116, -1.9316, -0.3779, -5.6367,
         0.9196, -0.1082, -2.3588, -0.5117, -0.9464, -5.2606, -1.7412,  1.4567,
        -9.5147, -7.2741, -5.1509, -2.9384], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6807, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4411, -3.3752, -1.9737, -4.1779,  0.4364,  0.3615, -3.1007, -1.8549,
        -3.5541, -2.6225, -7.0250, -4.3652, -1.5272, -4.5611, -1.0958, -2.3006,
        -2.5808,  0.3500,  2.8241, -3.2337], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2409, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9476, -6.7124, -5.7102, -1.1077, -4.0810,  0.5056, -4.0497, -0.5924,
        -2.3743, -5.4073, -2.5498,  0.8089, -3.3823, -2.1105, -7.0960, -6.7574,
        -2.4593, -6.2836, -1.1133, -4.2159], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5818, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.6655, -2.1472, -3.3347, -3.1954, -3.3266, -2.5745,  1.5924, -4.5277,
        -0.9745, -5.5267, -0.5663, -9.7186,  1.1299,  0.2700, -5.5390, -1.6292,
        -1.9368, -1.8177, -6.4822,  2.6337], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2503, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8770, -1.8393,  0.0407, -2.9156,  1.9215, -2.3747, -5.8935, -0.8716,
        -1.6354, -5.0537, -1.4513,  1.2273, -2.0753, -2.0112, -3.4840, -3.3624,
        -2.5129, -5.2620, -1.6028, -3.9453], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2489, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4630,  -1.9165, -10.0665,  -2.5677,  -6.9837,  -2.6048,  -6.2907,
         -6.3024,  -1.2742,  -3.9604,  -4.9215,   2.5226,  -4.2905,  -1.9234,
         -2.3385,  -1.2899, -11.3164,   1.5044,  -3.1768,  -1.9470],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5803, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0185, -9.6981, -8.9809, -7.5420, -4.3055, -4.5461, -6.1107, -6.4758,
        -4.8643, -5.9949, -4.5560, -3.6310,  1.1139, -3.6168, -2.7524, -4.5379,
        -2.7756, -6.2472, -3.9649,  0.5326], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6986, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.8573,  -5.0459,  -5.4651,  -2.6132,  -0.8498,  -3.6659,  -2.0505,
          1.4724,  -4.1740,  -1.1379,  -8.4038,  -4.9456,  -9.0782,  -0.8487,
         -4.2054,   1.9692,  -8.7904,  -3.0732,  -2.1368, -10.3066],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8103, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.5753, -10.4154,  -0.9148,  -2.1250,  -0.6683, -10.2348,   0.4819,
         -6.7131,  -0.2767,  -3.9460,  -5.0677,  -5.7175,   0.9760,  -4.7435,
         -0.6709,  -1.5137,  -7.8985,  -3.4961,  -1.8727,  -3.2885],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2765, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5006, -6.9661, -1.5243, -8.9705,  3.6843, -4.0623, -1.9141, -4.3130,
        -0.2491, -5.6844, -0.0840,  0.3930, -3.3978, -0.2887, -2.4320, -5.9354,
        -0.7774,  2.6777, -4.0026, -0.3244], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4836, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.2332,  -3.9260,  -0.6062,  -2.7849,  -0.2504,  -4.7373,   2.7416,
         -3.2147,  -1.6508,  -2.1921,  -5.0466,  -1.1606,   2.4646,  -1.9839,
         -1.3810, -14.5330,  -2.2192,  -8.0271,  -6.1492,  -2.8893],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7656, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.7750, -21.2899,  -3.7055,  -4.1849,  -1.1150,  -3.6218,   3.0923,
         -3.2599,  -1.4948,  -3.4753,  -3.4119,  -3.0513,  -1.6142,   1.5073,
         -4.5977,   0.5525,  -3.6235,  -2.8191,  -1.3749,   0.9003],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0681, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.1413,   0.7753,  -1.3103,  -0.7176,  -0.8184,  -2.9321,   0.5389,
          2.8661,  -3.1674,  -1.4059,  -2.9937,  -1.2406,  -8.1654,  -2.3845,
         -1.8231, -10.1341,  -3.7600,  -1.0719,  -3.9709,  -0.9067],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3882, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.7866, -2.3398, -2.6400, -2.2854, -4.0253, -4.8144,  0.2689, -0.5831,
        -2.9038, -1.4331, -2.4725, -0.4215, -5.3637,  3.7024, -1.7732, -0.1487,
        -1.8452, -4.6842, -0.9299,  1.9181], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5994, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4154, -2.1654,  1.1619, -3.9126, -1.3565, -0.9094, -3.8410, -0.9773,
         2.6340, -2.7475, -0.6341, -0.9702, -1.0725, -3.5315,  3.3645, -9.7113,
        -2.0531, -2.3782, -6.1030, -1.5448], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0582, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2812, -0.9696, -4.2272,  2.8475, -2.5952, -1.4866, -1.7243, -1.4848,
        -5.4738,  2.2366, -3.9067, -3.5425, -5.0568, -1.2591, -3.9822, -3.2969,
        -1.9585, -2.1851, -3.3720, -2.2188], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3969, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.3284e+00, -2.9507e+00, -1.6730e+00, -2.0829e+00, -1.0570e+00,
        -5.3965e+00,  3.1112e-01,  1.3537e+00, -2.0619e+00, -2.6329e+00,
        -1.6011e+00, -5.5769e-01, -6.3091e+00, -3.9321e-03, -7.0139e-01,
        -4.7121e+00, -1.1292e+00, -1.9453e+00, -7.5155e-01, -3.4788e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.7526, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.5299, -12.4916,  -6.0444,  -6.3070,  -4.7426,  -4.0765,  -6.8056,
          1.3083,  -3.1152,  -2.2595,  -1.6448,  -2.7754,  -2.8665,   0.2636,
         -2.7260,  -1.8874,  -2.8220,  -0.7908,  -5.1689,   0.4311],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3026, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1680,  1.7676, -5.0584, -0.7470, -2.7865, -0.5793, -4.8834,  3.6211,
        -4.1388, -1.2993, -4.2384, -0.4510, -3.7087, -0.4669,  1.7128, -0.8465,
        -1.2861, -1.8531, -0.5550, -5.7311], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6848, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.1390, -4.4127, -0.9542, -1.4972, -1.8730, -0.5641, -1.2349, -3.1738,
         0.8404,  1.3747, -3.1908, -1.3411, -1.8259, -2.5505, -7.3929, -3.8082,
         0.3664, -6.6261, -4.6230, -1.6331], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2129, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8903, -3.8448, -2.3487, -0.1642, -4.6038, -2.0457, -2.8408, -3.3193,
         0.2293,  2.5228, -6.6226, -0.5294, -2.3447, -5.2519, -2.1171,  2.1143,
        -4.0514, -0.8601, -5.8761, -5.0914], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4468, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.8149,  -5.5520,   1.3651,  -2.0878,  -0.7507,  -1.6062,  -7.4612,
         -1.5478,   1.7183,  -4.6694,  -1.5726,  -4.5374, -10.3556, -13.5563,
         -6.0313,  -1.2001,  -5.6101,  -0.3193,  -6.2688,  -1.2440],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7551, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8813, -6.8065, -2.3899,  0.2518, -2.2007, -5.5127, -4.3310, -2.4086,
        -7.0030,  0.9476, -1.9545, -3.2653, -0.8470, -3.2174, -2.2460, -1.4764,
        -4.1897, -3.3427, -0.9921, -3.3132], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7589, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.8906,  -6.5909,  -2.0757,  -0.4677,  -3.0858,  -1.9173,  -4.1703,
         -1.1916,  -5.3030,   0.0373,   1.4951,  -1.6177,  -1.6976,  -1.8488,
         -1.9426,  -0.1364,   3.0002,  -1.9588,  -1.4435, -18.3295],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5568, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.6062,  -0.0696,  -1.3620,  -3.9936,  -2.0491,  -2.9656,  -2.0738,
         -1.8102,   2.5668,  -2.6655,  -2.1390, -10.3816, -10.2978,  -5.0690,
         -5.4406,  -7.5767,  -7.7615,  -8.2240,  -0.3755, -13.2207],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4258, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.6913,  -3.3710,  -1.8436,   2.6109,  -2.3159,  -0.3249,  -9.5224,
         -7.3345,  -2.2562,  -5.6971,  -2.4533,  -4.4551, -16.5652,  -2.6832,
         -3.1670,  -2.6110,  -1.5278,  -5.3779,  -1.0729,  -5.2748],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7967, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.6170,  -3.0081,  -1.1331,  -0.9981,  -1.6339,  -2.4756,  -2.3000,
         -0.9285,   3.3261,  -4.8148, -11.1437,  -4.3979,  -0.5277,  -3.6540,
         -0.2527,   1.3508,  -2.3343,  -0.4519,  -2.1860,  -2.5627],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3872, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.4115, -2.1912, -1.7868, -1.7685, -3.2299, -5.1577, -1.7348,  0.7131,
        -2.5122, -1.2193, -3.1327, -0.7300, -1.0331,  2.0743, -2.7127, -0.4223,
        -2.9948, -1.2123, -4.2516, -0.6276], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5259, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6047, -3.1084, -0.4926, -9.3302, -3.6931, -1.6737, -5.6852, -2.4454,
        -3.6192, -1.5010, -3.9478, -7.7737, -6.5564, -3.6951, -5.6505, -1.5093,
        -6.0145,  0.3618, -6.8879, -2.7860], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9306, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9239, -1.3635, -2.1656, -1.7592, -7.2925, -1.9441, -0.6604, -2.5545,
        -1.8957, -4.0669, -0.9845, -3.0717,  3.8058, -5.0573, -2.6666, -1.8218,
        -7.0640, -1.6226, -1.0375, -1.2410], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4194, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.6482,  -1.4733,  -1.9550,   2.7733,  -4.8028,  -0.8126,  -2.7410,
         -2.3995,  -4.2101,  -6.0133,   1.8222,  -2.0495,  -0.7653, -15.8614,
         -2.7818,  -7.8133,  -2.1467,  -1.8697,   1.5575,  -5.7400],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8965, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3953,  -3.6176,  -0.4849, -16.7503,  -6.1048,  -5.0369,  -5.6603,
         -5.9369,  -4.2626, -13.6943,  -5.1549,  -9.6522,  -7.3530,  -6.2775,
         -4.2880, -11.1426, -12.0313,  -8.0225,  -4.1708,  -2.1030],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.7570, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.7580, -3.9987, -1.9677, -4.0923, -2.0318, -4.6963, -2.3937,  2.4339,
        -2.2660, -2.4319, -4.0446, -1.0692, -4.1753, -1.0875,  0.7078, -2.4243,
        -1.2747, -1.6021, -2.7206, -1.7449], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9061, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.2937,   3.0233,  -4.3934,  -2.1671,  -3.0621,  -4.2618,  -6.6009,
         -2.8102,  -0.7443,  -3.0804,  -2.9764,  -1.7646,  -6.6640,  -1.1658,
         -2.6240,  -3.5318,  -2.7856, -11.2753,  -6.3830,  -4.6525],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3813, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.8785, -2.6481, -2.2385, -4.9651, -1.5239, -5.3648,  0.3111,  1.1702,
        -3.3153, -0.5132, -4.9267, -4.4703, -3.8562,  1.0651, -2.8745, -0.9653,
        -1.0622, -2.4861, -5.1379,  0.0910], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0916, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.2301, -3.8304, -2.1122, -2.5571, -0.8525, -5.4574,  0.2827,  0.2363,
        -3.9172, -0.8877, -0.7296, -1.0387, -3.3691,  2.6123, -7.5610, -3.2888,
        -2.2698, -3.6046, -3.8790,  0.2530], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0370, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3692, -1.2036, -2.9013, -1.6298, -2.2787, -0.2487,  1.7508, -3.6936,
        -2.8258, -3.7144, -0.5050, -5.4467,  0.1715,  0.9345, -5.7870, -1.1254,
        -2.0846, -3.2760, -2.7294,  2.5829], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8690, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5256,  -3.6157,   1.6191,  -6.0685,  -0.7958, -12.4277,  -6.3727,
         -6.1362,  -6.4644,  -2.9345,  -3.0729,   1.9189,  -4.5172,  -2.1505,
         -2.4408,  -5.0624,  -2.9004,   1.3604,  -4.8953,  -3.4541],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5468, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5897,  -1.6843,  -2.5158,  -1.7749, -22.6880,  -2.8899,  -6.8318,
         -0.8532,  -1.3401,   2.7664,  -6.5801,  -2.5550,  -0.6836,  -6.7542,
         -2.2645,  -0.3088,  -1.9879,  -1.7074,  -2.4354,  -1.4863],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3582, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.5232, -4.1148, -1.6997, -4.1675, -0.8048, -4.7004,  1.2315,  0.5078,
        -4.7157, -0.5914, -3.7293, -6.5807, -0.8282,  1.5230, -5.4728, -2.9558,
        -3.6008, -1.4030, -6.4632, -5.1847], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5614, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0113, -0.9025, -5.1777,  3.2701, -2.7588, -1.6798, -1.5859, -0.7772,
        -6.8133, -0.4310,  0.6541, -2.1795, -0.4085, -2.2067, -1.4139, -0.6910,
         2.4534, -4.8466, -0.8110, -2.8701], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6094, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2777, -0.9986, -2.1621, -2.8517,  3.3852, -1.7266, -2.3016, -4.8832,
        -3.9836, -5.6527, -5.7248, -6.2482, -7.6864, -2.2338, -4.3101, -0.9059,
         2.5313, -3.7462, -1.4733, -4.0976], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8674, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.5692, -16.3845,  -3.0305,  -5.0499,  -1.9521,  -1.3340,   2.9895,
        -13.5856,  -0.5223,  -2.9233,  -5.2580,  -2.4248,   1.5657,  -3.5909,
         -1.1977,  -1.0073,  -1.6103,  -8.1500,  -2.1054,  -0.9063],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7023, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.1646, -4.4424,  0.0555, -2.5986, -1.7743, -1.4821,  1.7543, -2.7231,
        -3.0683, -3.7486, -4.3018, -3.2742, -1.4023,  2.0646, -2.6747, -0.7026,
        -0.7759, -4.0084, -2.9691,  1.7433], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6582, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5682, -2.3618, -3.5403, -1.3438, -3.3455, -3.3169, -5.6994, -7.2143,
        -6.9658, -2.1570, -1.0733, -1.9851, -6.3859, -1.6035, -2.5722, -4.9274,
        -1.3056,  2.5341, -2.7665, -0.5930], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9596, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.2755, -2.7015, -3.3257, -0.9978, -6.5794,  0.9807,  0.9398, -4.0675,
        -2.3590, -3.0760, -6.7801, -1.4272,  1.5726, -2.4774, -1.5907, -4.0900,
        -4.6677, -3.0961,  0.7419, -1.3948], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6836, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.3729,  -5.7722,  -7.6919, -27.0319, -11.9335,  -6.4912,  -7.4262,
         -9.3599,  -4.4989,  -6.1730,  -1.5775, -13.3879,   0.8784,  -4.7663,
         -1.8628,  -1.5646,  -2.8708,  -4.5725,   0.2395,   1.1089],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8063, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.5142,  -7.3774,  -3.4818, -11.3406,  -2.4662,  -3.0076,   3.0249,
        -12.0538,  -6.0844,  -6.1152, -16.0915,  -2.4343,  -8.1058, -88.2696,
         -2.6215,  -3.7990,  -3.3264,  -0.7583,  -0.7079,  -2.0832],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-9.0307, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5726, -0.1370, -1.5792, -3.7112, -6.6201,  2.7751, -4.8127, -1.5081,
        -1.1953, -4.8521, -0.7904, -3.8167, -3.5218, -0.4772, -4.3268, -1.6911,
        -0.1103,  1.2468, -1.3713, -1.6814], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0377, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.6415,  -4.5590,  -0.6222,  -3.5759,  -0.5056,  -4.8794,   0.0335,
         -0.7332,  -5.6237,  -0.4412,  -3.7440,  -0.4097, -11.4431,   0.2772,
         -1.4876,  -6.5865,  -1.4084,  -2.9301,  -7.5412,  -7.3741],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2098, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0254,  0.0506, -1.8031, -1.9143, -0.6721,  3.2411, -2.4327, -0.1966,
        -2.8832, -5.8549, -1.3369,  1.3934, -3.0250, -0.8269, -2.0865, -1.9171,
        -6.4594, -1.9396,  0.0976, -5.3282], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7960, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6577, -1.9249,  0.4065,  2.3400, -2.4548, -1.2006, -1.1183, -4.0466,
        -2.1036,  2.7648, -6.9004, -0.5748, -3.1131, -0.8724, -5.0962,  0.3774,
        -0.7641, -2.9003, -0.9097, -1.2105], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6980, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0745, -3.6379, -0.6830, -4.1692, -2.6613,  1.4501, -5.4550, -0.9322,
        -1.3577, -3.5959, -2.2436,  2.6704, -5.6346, -0.7603, -3.0531, -1.8503,
        -7.2346, -1.0470, -0.4636, -2.0125], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1873, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.0005,  -1.8937,  -2.4099,  -5.8956,  -2.1660,  -1.3379,  -2.8652,
         -2.2519, -14.5757,  -3.2965,  -7.2833,  -3.2098,  -5.9291,  -0.0215,
          1.8764,  -2.7558,  -0.8124,  -1.9938,  -1.0161,  -5.9741],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7406, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.9398,  -2.4631,  -2.4312,   1.0364,  -4.1395,  -4.7900, -10.9886,
        -11.9341,  -2.4597,  -6.0669,  -1.1545,  -0.1409,   0.3335,  -4.2400,
         -0.3479,  -1.7309,  -1.7196,  -1.4106,   2.3587,  -4.3611],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1795, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6619, -6.6499, -2.0165, -4.1519, -4.6231, -5.8485, -1.6112, -2.9445,
        -2.4210, -1.9883, -1.1203,  1.7431, -2.9423, -0.7618, -0.5671, -3.1309,
        -1.4539,  2.1653, -2.0060, -0.7411], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1366, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4551, -2.2749, -3.2607, -2.0572,  2.8200, -4.1248, -0.1210, -3.8790,
        -1.7639, -3.8522, -1.4856,  1.2877, -7.0561, -0.6467, -2.9590, -4.7795,
        -3.3413,  0.1140, -3.3306, -3.5182], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2842, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3965, -4.6320,  3.4194, -3.8326, -3.8808, -2.6473, -2.3722, -1.0173,
        -1.0780,  3.2755, -2.4549, -1.4539, -3.0791, -1.7181, -5.3572,  1.5527,
        -5.6929, -4.4947, -2.6770, -0.7750], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0156, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9649,  -2.2552,  -4.5912,  -6.0198,  -0.4467, -12.2130,  -9.9160,
        -15.9013,  -3.6107,  -1.0752, -11.9613,  -2.9948,  -2.0862,  -3.5207,
         -1.5199,  -1.1585,  -5.4192,  -2.7945,  -2.3260,  -3.7153],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8245, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4649, -0.4082,  0.8739, -1.9949, -0.8311, -1.3394, -0.6704, -8.2549,
        -0.0461,  0.4223, -3.6825, -2.6850, -1.8513, -3.2541, -1.4676,  2.2894,
        -1.7833, -0.6308, -0.6896, -2.7707], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7120, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1989, -0.7211, -4.6450, -0.3555,  1.7726, -2.6037, -1.4787, -3.5730,
        -3.0025, -4.1841,  0.1685,  1.6012, -3.4044, -2.9213, -1.3166, -0.7372,
        -0.6349,  3.8287, -2.3359, -1.7741], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4258, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.7780,  -3.1994,  -3.2017, -10.4698,  -4.9558,  -2.1365,  -2.1082,
         -2.5418,  -0.8782,  -3.9581,  -1.1545,  -0.1256,  -3.2935,   0.2782,
         -2.1691,  -1.7425,  -6.6792,  -0.1901,   0.6815,  -4.1537],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6888, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4061e+00, -1.7933e+00,  2.0109e+00, -2.5106e+00,  1.2371e-02,
        -1.9290e+00, -9.1269e-01, -2.2394e+00,  3.5506e+00, -4.0410e+00,
        -1.3827e+00, -1.9983e+00, -2.9297e+00, -2.7240e+00,  6.8062e-01,
        -5.0109e+00, -2.0968e+00, -4.1033e+01, -2.4142e+00, -8.5345e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9851, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.0264, -3.1043, -9.4534, -7.0604, -3.6269, -6.2491, -3.2970, -0.8124,
         2.4054, -5.5599, -4.2702, -4.5238, -3.2283, -0.4817,  2.2825, -4.4418,
        -1.2236, -2.3319, -3.8758, -0.5904], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3735, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8917, -7.3514, -2.6092, -3.5085, -3.8365, -3.2378, -1.0067,  0.0193,
        -2.0500, -1.0172, -2.5087, -2.6681, -2.0687,  2.7787, -6.4821, -1.4935,
        -4.9439, -3.1813, -7.0234, -2.9954], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8038, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7571, -5.8909, -2.3177, -1.4694, -3.6127, -6.1223, -1.5896, -3.1765,
        -3.0419, -3.0233,  0.7380, -3.8579, -1.3959, -1.5121, -2.3608, -6.1367,
         0.6775, -0.9568, -2.3395, -1.1556], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7151, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9228, -3.3040, -0.2559, -3.7876, -2.0600, -2.1138, -6.0980, -2.5322,
         2.3573, -6.8974, -1.8899, -4.0059, -5.7153, -0.7459,  2.7600, -1.8647,
        -1.2160, -2.5626, -2.9675, -1.9900], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3406, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0185, -0.2649, -2.7211, -4.3015, -1.1190,  0.1354, -5.1397, -1.9458,
        -0.6870, -4.7338, -1.2180,  1.5679, -2.6662, -1.1970, -4.0706, -1.6567,
        -5.4058, -0.6529, -1.4409, -4.2737], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3405, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.5569, -2.7375, -5.9450, -4.8320, -3.1021, -1.4470, -2.7178,  3.3418,
        -2.2234, -0.1967, -3.5836, -3.5206, -4.3875, -0.3695,  1.4756, -3.2884,
        -0.7670, -1.0985, -3.1076, -0.5413], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9246, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.8905,  -2.1342,   0.6109,  -2.7905,  -4.3654,  -0.9799,  -1.7722,
         -3.9447,   2.5387,  -4.2502,  -4.1379, -12.9552, -17.7543,  -3.6313,
         -6.7738,  -2.0036,  -4.6941,  -0.6634,   1.6626,  -5.3331],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8631, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4587, -4.2751, -2.1672,  0.1331, -3.3285, -2.4565, -3.1329, -2.1809,
        -4.7242, -0.4412,  1.4845, -3.3680, -1.4899, -3.1292, -0.9196, -5.1384,
        -0.8427,  1.6337, -2.3391, -1.3432], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2242, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6081, -1.4712, -2.3625, -0.6990,  0.1661, -4.6296, -1.1092, -3.9397,
        -0.3169, -5.6499,  0.2144,  1.3370, -2.4932, -0.5826, -1.9198, -2.5153,
        -4.1185,  0.1878,  1.3725, -1.4533], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6296, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3198, -2.8585, -2.9632, -4.6652, -1.3035,  1.2304, -3.9554, -0.2155,
        -4.0750, -0.4916, -6.6822, -0.8059,  0.3831, -7.1228, -1.5938, -2.5576,
        -5.5702, -2.6264, -0.6029, -6.6999], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8748, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.5168, -0.4252, -7.6330, -2.2029, -4.4105,  0.0697, -5.3802, -3.3815,
        -1.8020, -5.6692, -2.1676, -2.4175, -0.7247, -8.0586,  3.6115, -4.3201,
        -0.3284, -4.2157,  0.0934, -6.0384], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7959, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4694, -1.5097, -3.6526, -1.5467, -3.5664, -0.0609,  1.2538, -2.9637,
        -1.3084, -2.2454, -0.7515, -5.2502, -0.7037,  0.5408, -2.9133, -2.1089,
        -1.5689, -1.0745, -7.6150,  0.6901], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8912, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3405,  -6.5353,  -0.8379, -30.6912,  -1.3550,  -1.4766,  -5.8650,
         -4.5609,   2.0558,  -3.6258,  -2.1111, -13.2989,  -5.1483,  -5.6265,
         -6.5058,  -0.8816,  -4.8697,   0.1619,   1.2039,  -3.9253],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8617, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3855,  -1.7860,  -2.8371,   0.1166,   1.9530,  -3.2783,  -0.9358,
         -2.0503,  -4.6756,  -2.1432,   1.2624,  -4.1265,  -2.3162, -53.7196,
         -6.4481,  -9.3093,  -5.4851,  -2.2424,  -5.1817,   2.5698],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2009, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8006, -1.8948, -3.5854, -1.3703, -3.7274,  0.6400, -0.1962, -4.6214,
        -1.2708, -1.9163, -5.7754, -1.8000,  2.4743, -3.3913, -2.6905, -0.6845,
        -3.0959, -2.7720,  1.7340, -5.6423], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1193, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.6737,  3.5005, -2.9773, -0.7742, -1.9741, -1.0827, -2.2275,  3.9121,
        -5.4051, -2.9957, -2.8481, -1.5141, -5.2427,  0.7958, -0.6097, -2.7939,
         0.1122, -2.1973, -1.2128, -2.7733], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3817, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.2190e-01,  3.2531e+00, -4.6070e+00, -6.0476e-01, -3.6503e+00,
        -1.5305e+00, -5.1530e+00, -2.2241e-01,  1.5802e-01, -2.2990e+00,
        -6.5345e-01, -4.2666e+00, -1.6596e+00, -8.3982e+00,  1.1511e-03,
         9.7621e-01, -4.3524e+00, -1.9630e+00, -3.6162e+00, -1.6913e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.0079, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9286, -3.5727, -0.7215, -2.5095, -1.0468,  2.4531, -2.6415, -2.6175,
        -3.6642, -2.8744, -4.1948, -0.4937,  0.2417, -2.6053, -0.8401, -1.8277,
        -2.2183, -0.0633,  3.3241, -3.3068], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5554, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-13.8202,  -5.8732,  -4.6262,  -5.1674,  -0.6757,  -4.1252,   3.6483,
         -2.2976,  -1.4093,  -2.4239,  -3.1442,  -0.0387,   1.7123,  -5.4587,
         -0.7954,  -4.1487,   0.3437,  -9.6389,   0.6358,   0.0816],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8611, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8337, -2.2133,  2.9197, -3.7734, -1.2619, -4.1488, -0.8447, -4.1899,
         0.3615, -6.2300, -0.6614, -4.5447, -2.0066, -3.5337, -3.4043,  1.5610,
        -3.0520, -3.0007, -4.1717,  0.1098], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1459, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6047, -3.1084, -0.4926, -9.3302, -3.6931, -1.6737, -5.6852, -2.4454,
        -3.6192, -1.5010, -3.9478, -7.7737, -6.5564, -3.6951, -5.6505, -1.5093,
        -6.0145,  0.3618, -6.8879, -2.7860], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9306, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3269,  -1.9422,  -0.4814,   1.4879,  -4.6031,  -1.6340, -14.3633,
         -6.4169,  -3.8748,  -4.9937,  -1.7458,  -1.2856,   0.9805,  -3.4039,
         -1.0448,  -1.8902,  -4.4682,  -0.4604,  -0.7915,  -6.9140],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0086, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.8026,  -4.7098,  -1.1977,  -3.1116,  -1.0848,  -6.2444,   0.8068,
         -0.2949,  -4.2998,  -1.7543,  -1.2156,  -2.8690,  -2.1369,   0.9957,
         -3.7307,  -3.2090,  -3.8891,  -0.1834, -10.1132,   0.5153],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2462, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.0660, -2.6687, -0.6727, -0.5294, -3.3955, -0.4329,  2.7870, -5.0405,
        -0.6992, -1.6679, -3.1800,  0.7539,  0.7597, -1.9314, -0.7832, -2.1342,
         0.0216, -6.7286, -0.4365, -3.0562], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3484, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5677, -1.1838,  2.5439, -3.1473, -0.3585, -3.1362, -4.4350, -1.3297,
         2.5608, -2.5529,  0.0300, -1.9730, -2.0581, -8.3344, -3.9019, -1.1073,
        -6.1262, -3.1741, -1.1198, -7.6471], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4009, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3189, -2.7477, -2.1237,  3.0492, -3.3358, -1.2608, -2.6466,  0.4941,
        -6.6708,  1.0243, -1.4490, -4.0068, -0.0990, -1.6517, -1.9706, -8.8986,
         0.1003, -8.6069, -2.4189, -0.0726], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2805, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5625,  -1.6478,  -4.3206,  -0.1919,   1.3354,  -2.1818,  -2.4710,
         -1.3733,  -2.2686,  -0.5057,   2.5985,  -8.2735,  -2.8980,  -6.8682,
         -3.8530, -10.7605,  -2.4795,  -4.7930,   2.3213,  -2.5447],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6869, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.9448, -12.5461,  -2.1134,  -7.2353,  -0.5471,  -3.9318,   4.0240,
         -5.6225,  -2.4741,  -3.8996,  -1.7969,  -3.9452,  -1.5565,   1.2191,
         -1.1910,   0.1170,  -2.6334,  -1.4655,  -0.4623,   2.5139],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2246, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1452,  -0.4297,   3.0761,  -3.3689,  -1.5487, -19.3099,  -5.8155,
         -7.4452,  -5.6251,  -1.8172,  -0.8033,   1.2288,  -7.5669,  -4.0534,
         -4.5101,  -2.7662,  -6.4646,  -1.7003,   1.3243,  -2.2458],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6493, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.6760e-01, -1.2478e+00, -4.0781e+00, -2.8005e+00,  2.9366e+00,
        -4.2819e+00, -2.3747e-01, -5.6219e-01, -1.7658e+00, -5.1824e-03,
         2.5488e+00, -9.3567e-01, -1.7528e+00, -1.9083e+00, -2.4380e+00,
        -1.3060e+00,  2.3806e+00, -2.2176e+00, -3.9720e+00, -1.5270e+01],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.8941, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.2325, -4.9502, -1.1517, -2.2918, -2.6052, -0.9194,  2.7659, -3.3522,
        -0.6687, -0.1096, -2.5756, -1.5060,  2.3448, -3.1514, -0.8935, -3.1613,
        -2.2505, -4.7599, -2.0603,  0.9941], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4535, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1220, -1.0767, -4.1457, -2.3655, -1.7099,  1.6921, -2.1965, -1.0551,
        -0.7769, -5.0818, -1.5107,  1.8091, -2.5442, -1.7458, -1.5024, -0.6558,
        -6.7949, -1.3193,  0.1306, -4.4061], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9689, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.0105, -2.6493, -1.0555, -3.4831, -2.6958,  0.0458,  1.4853, -3.3589,
        -0.4113, -1.5131, -1.9882, -0.9898,  1.9190, -1.7147, -1.2239, -2.1512,
        -5.2488, -2.1896,  2.8908, -4.5921], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4457, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7516,  -4.4924,  -4.2889, -15.6750,  -6.3970, -21.8379,  -5.7602,
         -9.1000,  -8.3450,  -9.1081,  -5.7476,  -5.4672,  -6.1378,  -4.1602,
         -3.7111,   1.8930,  -3.1932,  -2.5200,  -2.6032,  -3.7667],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1585, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.1408,   1.0006,  -0.5886,  -2.4604,  -0.7394,  -3.2988,  -2.5054,
         -4.7286, -38.0771,  -1.3370,  -4.0502,  -0.2154,  -1.4418,  -1.5632,
         -0.4428,   2.9013,  -3.8004,  -1.8968,  -9.7553,  -2.5405],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9840, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0559, -3.9456, -2.6238, -5.1188,  2.8004, -3.6746, -9.2117, -3.7181,
        -0.9696, -3.0215, -5.2367,  0.4956,  0.0323, -3.3705, -0.7896, -1.5721,
        -1.1495, -4.6611,  0.5637,  0.1584], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3034, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8635, -1.7772, -4.2963, -0.6351,  1.7171, -4.5103, -1.7377, -1.9279,
        -6.7491, -0.7014,  2.3142, -3.0275, -3.5706, -4.9002, -0.7701, -7.2931,
        -0.1782, -0.6822, -3.6559, -1.5862], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3416, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.1747,  -6.6243,  -3.8167,  -4.9736,  -3.1943,  -6.3751,  -1.6860,
         -4.4489,  -3.9288,  -2.2560,  -3.7791, -10.1844,  -2.5242,  -1.9774,
         -4.1931,  -3.7626,  -5.3300,  -7.3827,  -6.7746,  -5.0368],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7211, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.8445,  -7.0264,  -3.6604,  -5.0795,  -6.7325,  -3.7970,   1.0248,
         -4.1989,  -4.3311, -22.1339,  -8.7767,  -6.9138,  -6.2504,  -1.8652,
         -3.4777,   1.8821,  -2.5758,  -1.3100,  -2.9953,  -5.2807],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8171, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.7710,  -2.0411, -11.0212,  -3.9815,  -3.6534,  -9.5684,  -2.3551,
        -14.4724,  -3.2598,  -4.7004,   2.5065,  -5.9412,  -2.6031,  -2.9385,
         -6.3867,  -4.3625,   2.0407,  -3.3091,  -0.7226,  -3.0264],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3784, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4889, -2.0523, -2.0032,  1.8426, -1.9721, -3.4154, -0.5325, -1.8898,
        -3.5902, -2.1704,  2.3584, -2.0255,  0.0780, -2.0007, -4.7032, -0.6303,
         0.0548, -3.3751, -3.3980, -2.5760], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6745, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6838, -3.7637, -1.9116, -2.0807, -3.0706, -4.1884,  0.9411,  0.1413,
        -2.5793, -1.1865, -0.3483, -1.3973, -3.4993,  3.4684, -5.1431, -0.5584,
        -2.7357, -1.3614, -4.3048, -8.2867], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3774, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.4407,   0.3223,  -5.0802,  -2.5111,  -0.5311,  -7.1864,  -0.7977,
          1.7041,  -3.7806,  -3.8680,  -7.6526,  -4.6515,  -7.7554,  -9.3846,
         -4.6896,  -7.4757,  -3.2789,  -0.9676,  -0.6500, -12.4596],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0567, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1742, -3.5949, -5.9787, -7.2106, -0.8297, -0.0107, -2.4292, -1.3457,
        -2.6299, -0.4466, -6.2688, -0.8735,  0.7730, -1.8358, -0.6370, -1.2595,
        -5.3018, -2.9841,  2.6237, -2.8426], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2128, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8887, -4.1900,  0.8716, -0.8015, -0.3901, -0.1955, -2.7073, -0.9755,
         2.9452, -1.7324, -0.5519, -1.1186, -4.3313, -0.7831,  1.9247, -1.5665,
        -3.6865, -4.3332, -3.8058, -4.3969], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6857, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.9532,  -7.9595,  -8.3465,  -4.6911,  -6.1466,  -1.3071,  -3.5950,
          1.8896,  -2.9816,  -0.6523,  -1.6229,  -3.2965,  -1.5670,   1.7492,
         -5.0880,  -3.6634, -16.6217,  -9.1210,  -5.4872,  -6.9436],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3203, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0041,  1.3908, -1.9499, -5.7012, -2.2107, -1.4981, -1.6863, -4.7861,
         0.8481,  0.6650, -3.9504, -1.1355, -3.9118, -0.6010, -6.2297, -0.0379,
         0.5411, -2.0542, -0.5463, -1.6254], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0242, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3867, -2.7889, -0.4518, -5.9673,  0.6601, -1.2294, -2.2324, -1.5081,
        -2.4605, -3.1790, -5.1597, -0.8216,  1.3860, -1.6858, -1.0162, -0.4064,
        -1.9869, -2.0629,  1.9934, -4.9978], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7651, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.1923,  -4.3069,  -4.2915,  -3.8009,  -4.4736,  -4.0796,  -5.3853,
         -6.3551,  -5.9453,  -4.2488, -12.6014,  -7.1009,  -4.4651,  -4.1237,
         -4.5892,  -4.0627,  -7.0435,  -3.2780,  -4.4881,  -7.2142],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4023, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.3363, -5.3363, -2.4362, -2.4912, -3.2666, -4.7056,  0.4582,  2.2794,
        -4.3041, -0.3340, -0.2618, -4.6423,  0.0241,  3.0456, -4.8665, -0.6879,
         0.1974, -3.0013,  0.6727,  1.4032], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3958, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9020,  -4.4347,  -0.7948,  -4.5201,  -6.7501, -11.3228,  -0.6030,
         -2.1890,  -9.7001,  -1.1509,  -1.6333,  -3.9252,  -5.5552,   1.4924,
         -3.1746,  -0.4310,  -2.5101,  -0.8817,  -3.4416,  -0.6277],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2528, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7048, -1.9519, -2.4230, -1.9864, -4.3968,  1.4192,  1.7473, -5.8279,
        -0.1112, -3.7787, -1.9369, -9.6221,  1.0777, -1.3101, -4.3365, -0.9919,
        -1.7653, -4.8253, -4.8813,  3.1063], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4750, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.6841,  -8.9581,   0.0968, -23.0493,  -6.3911,  -2.2676,  -3.3486,
         -4.0459,  -9.2904,  -4.6159,  -7.2660,  -1.1342,  -2.0600,  -1.8007,
         -4.1261,  -1.9485,  -3.2553,  -5.3401,  -2.2491,   1.3621],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5686, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9760, -5.4690, -6.2409, -8.8995, -4.8903, -7.0659, -5.3102, -4.5562,
        -5.9572, -4.7097, -4.2849, -6.3998, -7.6444, -8.7082, -1.8177, -5.5256,
        -4.1994, -7.5608, -3.9342, -5.4012], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.7276, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.0453,  -1.3445,  -1.0851,  -0.2790,  -4.1459,  -0.3304,   2.8294,
         -1.8711,  -2.2779,  -3.0779, -19.4281,  -4.0899,  -6.6420,  -0.7852,
         -3.1742,   0.0581,  -7.3767,  -1.5839,  -0.2094,  -3.9810],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9420, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.2769,  3.0449, -2.9614, -3.5513, -3.1712, -1.5952, -4.0249, -1.7025,
         1.6851, -3.0848, -1.9636, -3.7804, -6.5878, -0.8367,  0.9098, -4.4382,
        -2.3393, -7.7125, -7.5261, -5.9389], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7926, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.7332,   1.4102,  -2.0741,  -1.8024, -13.5506,  -4.2234,  -8.7553,
         -0.7351,  -4.6205,   4.0104,  -3.2615,  -7.8869,  -5.4873,  -6.3250,
         -3.1473,  -3.5638,  -0.6780,  -2.4985,  -7.3760,  -4.5866],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7209, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.1563, -2.5560, -3.5047, -2.1742, -2.5924, -1.3485,  0.8448, -2.2657,
        -0.5225, -2.3240, -0.1022, -9.0995,  1.3876, -1.5186, -3.3384, -0.5553,
        -2.2608, -0.0192, -5.4725,  1.2830], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7491, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  -4.3392,   -3.1147,  -31.8219,   -4.1127,  -11.3678,   -1.8633,
          -4.5355,    2.1893,  -25.1027,   -1.8488,  -26.8156,  -12.6918,
         -13.3579, -102.4593,   -0.8275,   -2.4346,   -6.2560,   -3.6565,
         -16.8933,   -8.8435], device='cuda:0', grad_fn=<SumBackward1>) tensor(-14.0077, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3701, -3.4497, -1.9559, -4.1153,  0.6210,  0.3494, -3.0393, -1.8504,
        -3.5585, -2.5166, -6.9739, -4.3556, -1.5313, -4.5701, -1.0788, -2.2744,
        -2.3868,  0.5402,  2.9681, -3.2373], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1893, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4762, -3.4584, -3.9860, -1.1778, -7.0070,  0.3365,  0.2863, -2.3068,
        -0.1035, -0.8295, -1.8757, -5.6417, -3.6964, -2.0979, -3.5405, -0.7801,
        -2.1632,  0.2786, -5.4957,  3.1047], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1815, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6575, -4.4048,  1.0412, -2.3564, -2.3745, -3.9438, -1.7227, -3.1365,
         0.7339,  0.6359, -5.5994, -1.0046, -1.0836, -5.3967, -0.4757,  2.9982,
        -2.1840, -0.1079, -0.6606, -2.2869], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8493, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.2997,  -6.3425,  -3.5250,   2.4626,  -4.1179,  -2.3774, -16.3765,
         -7.2057,  -4.4300,  -5.5106,  -0.5298, -11.6478,   3.7672,  -4.1047,
         -3.0368,  -1.8498,  -5.4191,  -3.6804,   1.8013,  -6.2527],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9838, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.0143,  -2.9381,  -3.4998,  -4.9769,  -2.9720,   2.4459,  -3.1560,
         -0.6848,  -3.1921,  -3.6056, -10.9417,  -3.1766,  -0.3683,  -3.1013,
         -0.3351,  -2.4941,  -5.7656,   0.2309,   2.3010,  -3.7138],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6479, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.8544, -0.1075, -9.6546, -1.4387, -1.2266, -5.4797, -3.0547,  1.6944,
        -4.7860, -2.7523, -3.4537, -5.8530, -4.8919, -2.3443, -0.5515, -8.8316,
        -8.3630, -4.6767, -1.2257, -8.7679], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7455, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.4331,  -3.2964,  -0.6062,   3.3153,  -1.4855,  -0.3387,  -0.9334,
         -4.5643,  -0.0916,   3.0171,  -2.9408,  -0.7067,  -1.0750,  -0.3146,
         -5.1428,  -1.2239, -16.5840,  -1.6170,  -4.4780,  -6.2405],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3370, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.0817,  -0.9543,  -1.7690,  -5.1187, -25.3366,  -2.2116,  -9.0680,
         -1.6001,  -2.1304,   3.3752,  -8.9651,  -3.1013,  -1.8014,  -0.1078,
         -7.0391,   2.1842,  -5.6640,  -4.7512,  -3.6347,  -3.6407],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9626, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7590, -1.0814, -9.7392, -5.0182,  0.1054, -3.3600, -0.5236, -3.3494,
        -0.0899, -2.8584,  3.7014, -3.8522, -1.3210, -0.9779, -1.9702, -4.7417,
         0.1135, -1.9091, -3.0372, -2.2340], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3451, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5912, -0.8339, -2.6511, -2.7165, -3.3147,  2.1553, -2.4562, -0.3089,
        -2.6282, -3.4749, -0.5742,  3.0254, -1.8106, -1.8900, -1.8736, -7.1220,
        -2.5925, -1.8069, -4.0657, -0.7989], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9165, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.7980, -3.0265, -1.6594, -4.6373,  0.1241, -0.4506, -3.9667, -1.8259,
        -0.5569, -4.0151, -0.1476,  3.2144, -1.6666,  0.1310, -0.9869, -0.8099,
        -1.8754, -1.1536, -4.3345, -0.0055], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4224, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.2595, -5.4870,  0.8050,  2.5916, -2.2133,  0.1172, -1.8784, -1.8106,
        -2.1528,  0.5450, -2.3116, -2.7848, -0.9509, -5.2913, -0.1860,  3.0499,
        -2.3795, -0.3683, -1.1876, -3.2805], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2457, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.8105, -2.8813,  0.3954, -1.9752, -1.8776,  0.6411,  1.8640, -3.4940,
         0.4722, -2.5849, -4.2346, -1.0743,  2.3423, -1.4621, -0.0195, -0.7900,
        -4.8758, -1.6191,  1.8896, -3.2877], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0881, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.0172, -6.8948, -3.3607, -2.2895, -3.1045, -6.9561, -2.3117, -1.0650,
        -4.6388, -0.2472, -0.9987, -2.8377, -1.5359,  2.8456, -4.8341, -1.0735,
        -5.0513, -1.5066, -3.8324, -1.2943], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4485, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6266, -2.0086, -2.8044, -2.6481, -2.5031,  0.0836,  0.9700, -2.5529,
        -0.5579, -3.4911, -1.8781, -5.9023, -4.2900,  0.5541, -3.5130, -1.7250,
        -1.9762, -2.0839, -4.0061,  1.5565], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1702, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.3521,  -6.4879,  -1.0442, -27.4097,  -3.5585,  -8.4234,  -0.4831,
         -5.1371,   2.9725,  -3.9382,  -2.8010,  -3.0213,  -0.3520,  -4.8419,
          0.9615,   0.7213,  -2.5416,   0.1078,  -1.2544,  -0.9295],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2554, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.8388,  -7.9279, -10.3152,  -6.3733,  -2.6590,  -2.7887,   1.4832,
         -3.5092,  -1.2979,  -1.4752,  -7.1427,  -1.3136,   0.1873,  -4.2072,
         -2.4172, -14.4634,  -6.8313,  -7.8963,  -7.8895,  -6.3326],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2004, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-14.3079,  -3.3318,  -3.7486,  -2.7812,  -3.4690,  -1.3353,  -7.7974,
          1.6361,  -0.7263,  -4.2377,  -2.0434,  -4.0540,   0.5903,   3.2422,
         -2.2056,  -1.8042,  -3.1212,  -4.6070,  -3.7800,   0.8766],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8503, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1550, -1.5051, -1.0631, -1.5716, -0.3003,  2.5354, -4.8892, -2.1707,
        -2.8970, -2.5511, -4.7115, -0.6159, -0.3654, -5.4762, -0.2746, -1.5270,
        -1.6129, -0.3839,  2.4523, -2.0495], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5566, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.6012,  -2.7834,  -2.2598,  -2.6626,  -1.4385,  -5.2086,  -2.8125,
          1.7446,  -1.4887,  -0.1041,  -1.8031,  -4.1658,   0.4010,   3.0449,
         -7.6398,  -3.5111, -19.3472,  -1.8767,  -7.7569,  -0.7259],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3498, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.7526,  -3.7387,   3.4843,  -9.3259,  -2.5685, -13.8311,  -8.8225,
        -25.4386,  -1.0715,  -6.0340,  -0.8124,  -2.4447,  -1.9880,  -3.5475,
         -0.3625,  -5.8471,   1.5176,  -0.7578,  -3.6606,  -0.6885],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3845, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-12.2258,  -3.4681,  -6.8559,  -0.3857,  -1.5894,   2.5199,  -5.0677,
         -1.8595,  -2.3772,  -0.9462,  -4.9757,   0.7058,   1.9257,  -2.3384,
         -1.1013, -22.6572,  -2.7869,  -9.4331,  -1.4114, -24.0061],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9167, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9979, -6.1518, -2.9965, -3.7636,  0.0808, -5.5930, -0.7923, -2.8273,
        -7.5605, -2.4277, -2.6610, -2.3732, -4.4007,  0.4878,  0.9470, -4.8683,
        -1.1461, -0.6758, -3.0764, -1.4347], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8116, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3237, -4.5444, -4.9993,  0.1030, -2.2320, -2.7513, -0.8565, -2.5847,
        -1.9865,  3.5061, -1.7018,  0.0450, -2.5426, -0.0115, -3.3265,  3.5861,
        -1.0972, -2.3253, -2.8103, -3.3818], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6618, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3354, -3.4059, -1.3039, -1.2782, -0.3720, -3.8174,  3.9597, -5.5363,
        -1.9927, -2.1633, -6.2734, -6.7668, -0.3497,  1.4885, -3.2098, -0.1870,
        -2.2958, -6.5946, -0.7821,  2.8514], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9682, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.4236,  -1.7226,   1.8366,  -5.2272,  -1.0163, -12.1768,  -4.4864,
         -3.8564,  -6.6866,  -8.2085,  -7.8468,  -7.9358,  -2.8628,  -2.9275,
          0.1272,  -2.9697,  -2.5219,  -2.5231,  -8.4620,  -2.1143],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3002, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.0116,  -2.2256,  -0.9150,  -1.4357,  -0.3327,  -3.4615,  -1.6623,
         -6.5102,  -1.8081,  -2.9485,  -3.0087,  -3.2672,  -3.4473,  -0.4806,
         -5.7305,   0.1127,  -2.9756, -10.7292,  -0.2923,   1.0982],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5016, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.4984,  -1.3559,  -2.3942,   0.1091,  -2.3279,  -2.3811,   0.4047,
          3.4047,  -2.7280,  -5.1711,  -1.9118,  -3.2851,  -3.6701,  -0.5300,
          1.1685,  -1.8526,  -0.6186,  -3.1397,   0.3199, -10.4621],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.8960, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.4460,  -1.6310,  -9.7505,  -2.2737,   1.2964,  -2.1786,  -2.1771,
        -13.4286,  -2.0537,  -7.3001,  -2.5513,  -4.0361,  -0.0727,   2.2885,
         -4.7489,  -1.6203,  -1.2416,  -1.3340,  -3.1118,   2.6532],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0359, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.8601, -2.1598, -0.7152, -3.0219, -1.7102, -5.5201,  0.5095,  0.0610,
        -1.9873, -0.0665, -2.2021, -6.4607, -0.7978,  1.1145, -1.5787, -4.8824,
        -8.6054, -4.3226, -7.6710, -8.7183], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8937, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.8436,  -5.2230,  -2.5033,  -2.9670,  -1.9047,  -2.6641,   4.0849,
         -4.6959,  -3.1477, -15.6525, -23.4852,  -6.8432,  -6.7667,  -6.3869,
         -6.1150,  -5.3896,   1.7486,  -4.3411,  -1.9873,  -4.5232],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7960, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3524, -0.8149, -2.4444,  3.5844, -7.6643, -1.2103, -3.2393, -2.3519,
        -5.5160,  0.3698, -2.2997, -2.9495, -1.4514, -0.7969, -2.6153,  0.4893,
         2.9303, -3.0730, -2.8705, -1.8393], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8558, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2539, -0.7325, -4.8732, -2.0917, -4.2487, -5.1611, -3.9799, -1.1796,
        -7.6567, -4.1810, -3.0998, -1.1830, -5.5976,  0.3169,  1.6047, -2.5584,
        -1.7413, -2.6589, -1.3773, -6.9755], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2314, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1055,  -6.2510,  -1.3887,  -4.8338,   3.6249, -12.7032,  -1.8032,
        -14.2184, -10.0107,  -8.0686, -17.2504, -10.2840, -19.6299, -23.3957,
         -8.8869,  -9.9271,  -6.2341,  -0.7161, -15.3026,   1.4472],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.4469, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.2014, -3.0674,  0.8535,  1.5727, -1.9200, -0.3593, -0.6004, -1.6467,
        -7.3469,  2.9571, -2.5200,  0.3418, -2.0653, -1.4248, -3.3331,  2.9894,
        -1.4186, -0.8208, -1.2840, -2.7664], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1030, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.1655, -10.7582,  -2.2089,   1.4368,  -2.6231,  -3.0141, -10.1394,
         -9.2780,  -3.1598,  -6.7337,  -1.5154,  -2.4592,   3.1980,  -4.0225,
         -0.1294,  -2.6455,  -4.1963,  -0.8022,   2.6454,  -3.5116],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0541, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2156,  2.5778, -2.9172, -1.1916, -2.4192, -0.3304, -3.1129,  3.3073,
        -3.8472, -0.9266, -5.1031,  0.4598, -5.0191,  0.3023, -1.6818, -2.2344,
        -3.1196, -2.4319, -2.7712, -4.2806], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9478, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1114, -2.5125, -0.3465, -1.7440, -0.7680, -1.1988, -0.3171, -4.5234,
         2.9575, -1.4811, -3.3624, -4.8995, -1.8385, -9.4061,  0.1474, -1.1730,
        -3.5196, -0.3452, -1.4336, -2.2379], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1057, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1050, -2.9047, -3.8342, -0.6627,  0.6664, -4.1931, -0.8532, -3.9537,
        -4.4121, -0.6917,  0.8352, -2.5933, -0.7458, -1.6758, -2.7902, -5.9773,
         0.7919, -0.5833, -2.4760, -0.5507], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9355, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.5084,  -5.5606,  -4.1396,  -3.2629,  -6.7294,  -0.9485,   2.9311,
         -4.5009,  -4.2129, -15.4996,  -7.9670, -11.4174,  -5.4207,  -1.2550,
         -3.6233,   3.7232,  -8.4972,  -0.3705,  -2.6918,  -1.5854],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2268, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  3.1995,  -2.7051,  -3.7722,  -1.3511,  -0.9809, -11.2461,  -5.1782,
         -1.4026,  -1.9130,  -0.8490,  -2.0158,   0.0355,  -2.8074,  -0.3510,
         -3.9462,  -3.5828,  -3.6232,  -2.8705,  -5.3491,   0.3565],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5176, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9225, -2.1028, -4.5923, -2.1572,  1.1181, -9.1100, -2.4218, -9.6539,
        -6.5803, -7.4696, -6.5840, -1.0491, -3.3163,  4.1305, -3.6231, -0.8849,
        -2.4752, -1.2097, -4.5937,  0.4977], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1500, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0410, -2.4877, -0.7933,  3.0348, -7.7150, -1.6772, -1.5261, -1.8631,
        -5.9361,  2.2811, -5.5605, -0.2535, -0.9216, -0.1164, -5.6380,  3.5063,
        -3.3803, -2.3322, -3.8237, -1.2028], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1723, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9624, -3.7410, -0.1810,  1.4048, -3.3647, -1.0992, -0.4583, -1.3336,
        -3.8265,  3.1980, -4.0692, -2.9142, -2.4038, -1.1842, -6.9181,  0.3368,
        -0.2709, -3.7612, -0.9243, -2.1494], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7311, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8259, -3.5897, -2.9784, -8.6745, -2.7649, -7.4033, -3.5503, -2.4971,
        -3.0383, -9.9877, -5.8504, -2.5497, -5.5071, -4.6806, -2.1480, -3.3866,
        -3.6301, -5.0599, -6.1593, -4.0557], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6169, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.2393,   3.7434,  -8.8494,  -5.8747,  -6.4247, -10.8081,  -0.0523,
         -8.4965,  -1.0608,  -3.6419,  -1.1458,  -3.2992,  -2.6503,  -0.5878,
         -1.4843,  -2.9805,   2.3264,  -3.4245,  -1.7474,  -1.6683],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1683, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.8623,  -6.4622,  -1.8273,  -4.2297,   0.1216,   2.2175,  -3.2644,
         -3.1508, -10.1615,  -7.4535,  -4.5575,  -5.8602,  -1.1435,  -4.4674,
          1.8240,  -9.9944,  -2.7031,  -3.2773, -10.8643, -11.2076],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5162, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.7774,  2.8546, -2.5373, -3.6868, -2.8152, -2.8478, -6.0866, -1.7444,
        -0.1356, -1.9155, -0.2934, -4.4593, -1.6269,  0.0300,  3.0817, -4.0037,
        -1.9718, -9.8600, -3.2866, -8.4821], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4505, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.4640, -4.2987, -1.4693, -1.7610, -2.6605,  0.2794,  2.3313, -2.8305,
        -0.5919, -0.9349, -3.4271,  0.1433,  2.8898, -3.1773, -0.3445, -1.6776,
        -3.5869, -1.6956,  2.2048, -2.9335], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2002, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.9846, -2.7891, -6.4833, -1.3677, -2.1858,  3.2513, -6.5954, -2.1474,
        -1.3487, -3.8302, -0.6270,  2.9647, -3.9707, -1.1380, -2.8286, -5.5690,
        -4.9519, -1.8808,  1.5600, -5.5218], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6722, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.0494,  -3.5222,  -5.7928,  -4.4925,  -6.7437,  -0.4575,  -4.9588,
         -3.9165,  -4.8609,  -7.2372,  -3.1608,  -6.9235,  -5.9387,  -7.4212,
         -2.9351,  -2.7635,  -3.5561,  -1.7426,  -3.5667,  -4.6092],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7324, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7870, -0.7800, -4.3470,  3.2495, -2.2545, -1.2394, -1.5852, -3.9580,
         0.1949,  2.2112, -2.9545, -2.6719, -2.5797, -2.3991, -4.0741,  2.8927,
        -3.4201, -1.7914, -1.5962, -5.8941], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8892, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.9198,  -6.5370,   0.1514,   1.0129,  -4.1990,  -3.9571,  -2.5957,
         -1.7053,  -2.1178,   3.2382, -10.4352,  -3.4793,  -5.6682,  -1.6931,
         -4.6453,  -0.1961,   2.5976, -12.7557,  -3.1290,  -3.0297],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0032, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3189,  0.7319, -3.2881, -2.5074, -5.1184, -4.0005, -6.7014,  0.9727,
        -2.3726, -2.0004, -3.4255, -4.0723, -3.3574, -0.1602, -5.0634, -3.4371,
        -5.4076, -5.2759, -2.4324,  1.8097], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9213, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.4351, -0.5420, -2.2036, -1.5112,  2.1974, -3.0828, -0.6339, -0.4745,
        -2.3220, -1.6232,  3.1054, -1.5303, -2.3199, -3.1911, -1.8409, -2.8616,
        -1.6879,  1.2054, -3.1206, -1.4201], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1711, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0120, -1.9993, -0.0621,  3.0774, -4.5939, -1.0706, -3.1156, -0.9114,
        -6.2906,  0.9928, -0.2475, -4.2275, -1.6628, -1.1426, -2.8070, -2.0145,
         1.0030, -3.7799, -3.2649, -3.9010], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8515, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5728, -0.6100, -6.2091,  1.2006,  0.2097, -2.4876, -0.8093, -0.7458,
        -1.7115, -2.0285,  1.1554, -3.7548, -2.2789, -8.2422, -5.3269, -3.4689,
        -6.0992, -1.1996, -2.2435,  1.0373], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3093, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.3558,  -0.5595,  -2.3703,  -6.2180,  -1.3521,   1.9666,  -5.3885,
         -3.3182,  -2.8805,  -2.0711, -18.8983,  -6.9245,  -2.6755,  -3.9716,
         -4.1859,  -1.1586,  -3.8128,   0.3307,   1.1298,  -2.8937],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4804, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.6733,  -1.8777,   3.0010,  -2.7604,  -0.9465,  -3.1905,  -2.8423,
         -4.1566,  -1.6593,   2.0350,  -1.7457,  -1.4740,  -1.1632,  -6.6346,
         -3.9380,  -0.5952,  -2.9973,  -1.3363, -13.3540,  -6.7301],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8019, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.2022, -1.1447, -2.4150, -0.0491,  1.6230, -5.4285, -0.4607, -3.8472,
        -2.2421, -5.6401,  0.2965, -0.5670, -3.3273, -1.0381, -1.6873, -4.5061,
        -1.8905,  1.2546, -3.3467, -0.5714], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7595, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8959e+00,  9.5389e-03, -1.6375e+00, -2.0428e+00, -6.9543e-01,
         2.1980e+00, -4.9009e+00, -7.9632e-01, -1.3326e+01, -3.7235e+00,
        -1.0365e+01, -4.1161e-01, -3.1793e+00,  2.5161e+00, -6.9464e+00,
        -1.8226e+00, -2.0261e+00, -3.1444e+00, -2.5058e+00,  1.8402e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7428, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.1803,  -8.8252,  -0.3201,  -6.8349,  -2.7638,  -4.5568,  -0.8186,
         -2.6537,  -1.7296,   0.2223,  -8.3273,  -2.4530,  -3.1691,  -4.7107,
         -2.2612,   0.7404,  -2.7090,  -6.3436, -36.1641,  -8.0137],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2936, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7135, -2.6931, -2.2103, -1.0136,  1.7238, -2.7113, -4.2538, -4.9125,
        -6.9713, -7.8360, -4.4139, -5.5888, -3.8722, -3.1018,  2.1484, -5.9258,
        -1.7623, -1.7868, -0.6989, -5.1330], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2363, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.3549,  -3.8110,  -4.0084,   2.1971,  -2.9466,  -0.8693,  -1.3604,
         -4.2100,  -0.9558,   2.2110,  -1.3709,  -2.9647, -20.7086,  -1.8800,
         -7.3112,  -1.6011,  -1.4789,   1.6434,  -4.1783,  -1.8091],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0884, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7652, -4.9527, -1.6876,  0.3925, -2.4765, -0.8084, -0.5351, -5.1627,
         0.9167,  0.7882, -3.0929, -1.1149, -2.5849, -2.7132, -2.4188,  1.8834,
        -1.7341, -0.8967, -1.1425, -3.4571], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6281, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.0031,  -4.4750,  -5.1966,  -1.6143,  -2.8288,   4.2303, -11.5089,
         -0.5459,  -2.2546,  -3.7268,  -2.4132,  -0.8916, -11.1464,  -4.9886,
        -13.0135,  -8.0241,  -8.7133,  -7.2972,  -4.9587,  -2.2840],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9327, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.2973, -5.1186, -2.8239, -5.8061, -1.1160,  0.4013, -4.5572, -0.7485,
        -1.8818, -4.2916,  0.0741,  3.1878, -3.3140, -1.6736, -8.1565, -5.6250,
        -5.7067, -5.3271, -1.1534, -0.3839], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7159, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.2243, -13.7614,  -4.4107,  -5.8680,  -2.1770, -15.9481,   2.6430,
         -1.5613,  -3.9154,  -1.0965,  -2.7301,  -2.1441,  -3.9690,   1.0159,
         -1.5994,  -1.8466,  -0.0855,  -3.7095,  -1.2562,  -0.6416],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4643, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.5657, -13.7592,  -8.6045,  -5.3491,  -6.1091,  -4.5162,  -4.7512,
         -1.8194,   1.8047,  -6.5317,  -3.9894,  -2.9964,  -5.9535,  -6.0751,
         -0.3306,  -3.9690,  -3.1277,  -8.6885,  -3.2920,  -5.1295],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8377, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5816, -2.5122, -7.0165, -3.9418, -9.1890, -0.8003, -2.6385,  2.4320,
        -8.9785, -0.7276, -3.4652, -1.6551, -6.3353,  1.2690, -0.2628, -5.0686,
        -1.3833, -3.1869, -0.8411, -4.2372], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0560, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9761,  0.0324, -1.5786,  3.5273, -1.9924, -1.0349, -1.7813, -1.4268,
        -1.0687,  4.2501, -3.7808, -2.0109, -2.2557, -0.5052, -4.4933,  1.1008,
        -1.0608, -2.4011, -1.7986, -2.1759], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0715, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3830, -2.9679, -2.9982, -5.8586, -3.4675,  2.5414, -3.6339, -0.6376,
        -3.4166, -4.0612, -5.9144, -2.4199, -1.6212, -5.8071, -4.2685, -1.1697,
        -4.1129, -2.1568,  1.3808, -4.1245], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9549, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1848, -1.1417,  2.8827, -1.2104, -3.6654, -8.2745, -8.0825, -3.5539,
        -6.0424, -5.5725, -6.2537,  1.5938, -7.9311, -1.1694, -0.5084, -5.6324,
        -1.1226, -0.2039, -4.1613, -3.5892], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2912, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2840,  0.5396, -3.8901, -1.2156, -2.6700, -3.4069, -2.4247,  2.1252,
        -2.0069, -1.5580, -1.4101, -1.1164, -6.1207,  0.9057, -0.5163, -2.3751,
        -0.6443, -0.4087, -1.5764, -0.4430], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5748, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.5615,  -3.3497,  -3.0244,  -5.3898,  -4.1700,  -4.3327,  -1.6530,
         -3.4460,  -3.1031,  -3.7299,  -1.9602,  -3.9343,  -0.8627,   1.2464,
         -3.7379,  -3.4663,  -4.4278, -13.0597,  -6.5289,  -0.8625],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5677, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.6086,  2.3639, -4.1197, -0.6098, -1.8384, -1.4656, -4.2310,  0.3638,
        -0.8522, -2.1947, -2.1624, -4.1727, -3.0974, -2.1163,  2.5700, -2.6794,
        -0.6977, -0.9558, -3.7894, -2.1420], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6218, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6856, -5.4643, -5.7576, -8.6483, -8.4768, -0.4727, -3.8633, -3.2999,
        -4.2099, -8.3204, -7.5113, -1.6992, -4.0137, -1.5718, -7.9530, -7.6823,
        -3.8083, -6.4506, -5.0834, -1.9191], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9446, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.1648, -3.9135, -1.9772, -1.4797, -1.6341, -0.4461,  3.8235, -1.7579,
        -1.7492, -9.2519, -5.5503, -2.5359, -4.8659, -1.3830, -3.5287,  1.1958,
        -6.9433, -0.7527, -0.5130, -6.8354], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3967, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.8357, -0.6899, -3.1116, -1.4956, -2.7424, -0.5858, -3.7899, -0.4783,
        -0.2653, -2.6904, -0.2684, -1.9726, -1.4863,  0.3038,  1.9680, -3.0230,
        -0.3725, -3.7641, -1.1561, -5.0992], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4942, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.0616,  -2.4125,  -3.9350,  -3.8592,  -5.4200,  -5.1583,  -5.1641,
         -0.2068,  -4.8055,  -2.2216,  -6.8079,  -8.7629,  -3.3579,  -7.0634,
         -5.3329,  -2.0842,  -2.8265,  -5.6528,  -3.1004, -16.8091],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9021, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4965, -4.6329, -3.1228, -4.3696, -2.9309, -4.4280, -5.1686, -5.1037,
        -1.9280,  0.1745, -2.6075, -3.0398, -4.1310, -1.3408, -5.6437, -0.4571,
         1.6989, -2.5162, -2.3260, -3.8376], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0104, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2527,  0.1539,  2.6634, -2.1612, -0.8223, -1.6146, -1.7069, -4.4990,
        -0.3558,  1.0218, -4.4021, -0.4485, -2.9456, -2.4864, -4.3279,  2.6474,
        -1.9686, -0.8484, -2.0129, -3.4632], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4915, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  3.5771,  -5.9097,  -3.9779,  -5.5354,  -1.2675,  -1.7038, -11.0308,
         -2.0586,  -0.6332,  -6.2510,  -3.0777,  -2.0542,  -1.0474,  -5.4706,
          1.3644,  -1.0131,  -1.2250,  -9.8456,  -4.2597,  -8.9399],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5180, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5915, -0.9700, -5.3271,  1.2557, -9.6529, -3.6414, -2.4101, -3.6375,
        -0.2939, -5.6024,  3.5560, -2.8736, -3.0297, -1.2304, -2.7090, -1.3002,
         2.0061, -3.4992, -1.9109, -1.4450], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2653, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4700, -1.6303,  2.3462, -3.3207, -2.2419, -2.5555, -1.0306, -6.8870,
        -1.8653, -0.1135, -3.2742, -0.1733, -4.1662, -1.3372, -3.3853,  3.5329,
        -4.7730, -1.8010, -1.5723, -0.6788], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9698, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.3133, -3.8922, -0.7200, -3.9101,  0.8514, -8.0881,  0.4314,  0.3110,
        -3.7127, -1.2039, -5.3106, -2.8734, -3.8771, -0.0797,  1.9262, -2.4525,
        -0.2015, -4.6510, -0.9144, -5.1556], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1105, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.3765,  -4.6135,  -1.9205,   0.2219, -10.3517,  -1.0781,   2.5645,
         -2.7265,  -3.4590, -10.5852,  -7.0431,  -4.1928,  -4.6606,  -2.6688,
          0.0250,   3.0217,  -4.7719,  -0.3030,  -3.8597,  -2.1201],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8072, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.1783, -3.1879, -6.5119, -2.8579,  3.1493, -5.5146, -4.7411, -4.8855,
        -3.2545, -4.1154, -3.9581, -0.0846, -7.7592, -3.3483, -0.7288, -6.5636,
        -1.3576,  1.5024, -3.5968, -4.0340], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1013, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.9159,  -8.1973,  -8.2908,  -4.5620,  -6.0780,  -1.1835,  -3.3659,
          2.0100,  -3.0265,  -0.5515,  -1.5602,  -3.2555,  -1.4007,   1.8740,
         -5.0696,  -3.7231, -16.7841,  -9.2835,  -5.4006,  -6.9447],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2855, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0693,  1.6281, -2.0178, -5.6862, -2.1940, -1.4787, -1.7118, -4.7412,
         1.0371,  0.7674, -3.9210, -1.0617, -3.9240, -0.5469, -6.2187,  0.1344,
         0.5337, -2.0344, -0.3597, -1.5829], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9724, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.0965, -2.1304, -0.0387, -1.2819,  0.5614, -3.4594,  4.2747, -4.5404,
        -0.5294, -1.6974, -1.2435, -2.5441,  2.8199, -9.4780, -0.2208, -1.8136,
        -0.9851, -5.5944,  3.2795, -5.5094], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4017, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8504, -2.5238, -0.4641, -7.0906, -1.0066,  0.5533, -5.5337, -2.7740,
        -2.2416, -3.3867, -0.9712,  1.0015, -2.1391, -1.3728, -2.4563, -0.5010,
        -4.7291,  0.6259, -2.2938, -2.1727], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1663, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3192, -0.4667, -0.7485, -0.0711, -4.9570,  3.0081, -2.7449, -1.3020,
        -3.3466, -4.3927, -1.9532,  2.2821, -3.5092, -1.1143, -1.2526, -1.2541,
        -7.4033,  1.1044, -0.5003, -2.2376], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7089, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2504e+00, -5.5109e-01, -2.6096e+00, -4.6029e-02, -6.4931e+00,
         1.1872e+00,  5.0639e-03, -3.3927e+00,  2.4814e-01, -1.1532e+00,
        -6.9022e-01, -3.4776e+00,  3.3203e+00, -1.0696e+00, -1.1504e+00,
        -1.3622e+00, -4.1758e+00, -1.4748e+00,  1.4599e+00, -1.5480e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.3112, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.3554,  -3.3697,  -0.7434,  -2.6905,  -0.6883, -11.0356,  -5.0479,
         -4.7142,  -8.3594,  -5.7699,  -6.6221,  -1.2555,  -8.4087,   1.1452,
         -4.6359,  -3.4025,  -0.9416,  -0.9767,  -5.2463,   0.6133],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9253, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4768,  1.2976, -4.4615, -3.3330, -3.7103, -4.0005, -3.1960, -2.6201,
        -0.7272, -7.7459, -4.2336, -1.6394, -1.9859, -5.6203, -1.1379, -2.6666,
        -3.5776, -1.1309, -2.5048, -1.5911], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8031, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.5947, -6.3209, -4.1196, -3.2473, -1.9016, -0.6328,  1.7479, -4.9996,
        -1.3337, -3.4384, -2.1099, -3.4236, -2.3360,  0.5728, -2.1936, -1.4728,
        -1.2917, -4.2578,  0.1560,  3.2184], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3489, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.8965, -3.3471, -0.7971, -1.4102, -2.1955, -4.1128,  0.6879,  1.4242,
        -2.2174, -0.8535, -0.9827, -4.8514, -2.0270,  2.1703, -4.3086, -0.8074,
        -3.9306, -1.8240, -6.7933, -0.9390], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7109, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.2022,  2.7199, -3.3197, -1.8071, -3.4239, -2.2302, -1.1294,  3.4303,
        -4.9134, -2.2804, -3.8671,  0.1510, -6.0418,  0.6882, -1.6638, -5.3230,
        -1.3318, -3.4416, -2.2797, -2.9496], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9608, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0942, -2.4570, -3.7193, -1.9240,  1.8484, -2.7439, -1.1752, -2.5943,
        -1.1973, -6.5486, -1.5417,  0.2028, -2.9177, -0.2970, -1.9670, -4.7228,
        -0.7772, -4.6614, -4.4850, -3.0252], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2899, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8419, -6.6516,  1.2331, -0.0174, -2.6695, -1.1814, -1.2205, -3.5795,
         0.8744,  2.5106, -2.3454, -5.0590, -3.0521, -1.0089, -2.8990, -2.0614,
         1.4996, -2.7341, -1.8999, -1.0955], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6100, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9752, -6.3014, -5.6559, -0.7927, -3.9392,  4.2060, -3.7095, -1.2991,
        -2.2148, -0.6084, -4.0455,  0.5069,  0.1244, -4.0285, -1.9085, -1.7676,
        -1.8545, -4.2604,  2.7065, -2.4003], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1609, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2278,  0.6916, -3.9773, -2.0464, -4.7024, -2.3186, -5.9878, -1.2699,
        -1.9552, -2.3759, -0.1711, -1.0863, -3.3982, -1.7657,  2.9432, -4.0051,
        -1.0970, -2.1704, -3.7396, -5.1558], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2408, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7538, -8.4813, -0.7629, -5.2597,  2.3082, -4.0921, -1.5520, -3.1462,
        -0.9905, -4.1984,  0.7889,  2.9422, -2.8620, -0.3060, -1.0451, -2.5633,
        -0.7619,  3.1889, -5.0491, -1.4589], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8028, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.6020, -10.6081,  -9.2008,  -3.7247,  -4.7545,  -1.3154,  -3.0550,
          3.4400, -39.0862,  -2.6546, -18.9040,  -5.7174,  -1.6841,  -8.6405,
         -3.5083,  -3.9172,  -2.3297,  -4.1147,  -4.0255,   1.3040],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3049, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3451, -10.1244,  -3.2550,  -0.6450,  -1.6437,  -1.3141,  -4.5831,
         -0.8866,  -1.9699,  -7.6711,  -0.5034,  -3.6939,  -0.9560,  -3.3761,
          0.5886,  -6.9801,  -4.2106,  -1.8730,  -4.0182,  -1.2337],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0347, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5346,  0.0962, -2.8232, -0.2105, -5.7204,  0.6534,  0.7948, -1.4141,
        -0.6274, -1.5985, -2.3189, -0.5784,  3.4850, -3.9318, -0.7205, -3.6408,
        -0.8940, -4.3644, -5.7529,  1.1223], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5989, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.1724,  -2.6788,  -9.6099,  -5.8256,  -4.9399,  -7.1322,  -1.3801,
         -2.6255,   0.4792,  -3.3047,  -0.6880,  -3.1382,  -2.1667,  -2.7660,
          2.2287,  -0.9251,  -1.0221, -13.5795,  -6.4793,  -5.5188],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7622, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.8768, -3.1637, -0.5874, -2.5311, -1.7025, -1.2838,  2.4947, -2.0565,
        -0.9542, -4.3824, -1.7588, -3.9207,  0.4503,  2.7152, -2.8383, -1.5137,
         0.0296, -2.2410, -1.7277,  0.9528], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1071, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8169, -9.0269, -4.2443, -0.6921, -3.3186, -0.8717, -2.8869, -1.7458,
        -0.2558,  1.6653, -2.9359, -3.5438, -2.4914,  0.1105, -5.3142,  0.1878,
         0.4508, -2.8652, -1.8086, -2.4388], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2421, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5396, -1.6646, -6.2966, -2.6051, -6.1432, -5.1925, -2.0591, -3.5261,
        -1.0657, -1.6182, -2.1443,  1.5745, -3.1079, -0.1137, -1.8515, -3.9637,
        -0.9208,  2.7437, -2.1990, -0.7384], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3716, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.0978,  -2.5508,  -3.6307,  -1.6480,  -4.6453,   0.7712,  -0.7804,
         -9.2876,  -2.2064,  -2.2618,  -1.2665,  -3.8634,  -0.1178,  -0.5888,
         -3.3273, -20.0871,  -6.6673,  -9.3346,  -0.7450,  -3.8185],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9577, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.3942, -4.1643, -1.7596, -1.7927, -3.4363,  0.4620,  2.3740, -2.2609,
        -3.8175, -3.4152, -2.6732, -0.0587,  1.0501, -2.5976, -0.4601, -2.8241,
         0.0125, -6.7333,  1.6521,  0.5159], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4766, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.6760,  -1.1199,  -3.3220,  -2.5294, -10.0926,  -6.5490,  -4.6221,
         -6.0448,  -2.7454,  -4.6115,   2.6034,  -8.3600,  -2.1265,  -2.6326,
         -6.3159,  -2.8501,   1.9767,  -4.1737,  -0.7594,  -3.9568],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7954, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6860,  0.3551,  2.5788, -3.3827,  0.2098, -1.0729, -0.2793, -1.7865,
         2.8957, -3.1891, -1.7509, -3.3623, -1.7730, -3.6348, -1.6385, -0.9478,
        -5.1978, -3.5382, -1.5261,  0.0257], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5350, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.6277,  -5.6714,  -1.7761,  -3.9310,  -1.9284,  -5.7280,   2.0837,
         -8.7948,  -4.1904, -13.3745,  -2.7731,  -7.5757,  -1.1790,  -1.4250,
         -2.2288,  -2.8236,  -2.7252,  -1.7374,  -1.1316,  -3.6216],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1080, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.5232,  -0.0676,   2.9085,  -4.5387,  -2.4691,  -0.6644,  -4.4167,
         -0.4018,   0.4889,  -3.0788,  -1.6631,  -1.6420,  -4.4446,  -0.9980,
          1.7403,  -3.4016,  -4.1840,  -4.5356,  -7.6418, -10.1174],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.4825, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.4642, -0.8717, -5.7939, -1.7671, -3.2547, -3.7801,  0.1422,  3.0961,
        -2.8397, -0.9528, -2.9609, -1.3621, -2.4895,  3.1933, -2.1169, -1.6313,
        -2.7839, -0.8555, -4.4256, -0.9071], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6413, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-15.7370,  -2.8472,  -4.3116,  -6.8310,  -1.8016,   2.0612,  -7.5617,
         -4.2094, -20.0218,  -7.1607,  -6.9690,  -3.3161,  -1.2688,   1.8764,
         -3.3876,  -3.2315,  -2.4047,  -2.5284,  -9.3149,  -0.5270],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9746, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4588, -0.6282, -2.9124, -0.3572, -4.5791,  3.5471, -2.2212, -4.0800,
        -4.9190, -0.8241, -5.9733, -1.6065,  0.8792, -2.8372, -3.2627, -2.4784,
        -2.5604, -2.5675, -1.6951,  1.3103], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0612, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.0862,   0.2492,  -2.6393,  -0.7206,  -4.8134,  -0.3714,   1.8805,
         -2.3291,  -3.3492,  -2.1035,  -5.2401,  -0.8638,   2.8079,  -3.2305,
         -1.2937, -13.2560,  -5.1807,  -7.7269,  -2.8318,  -3.7142],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8906, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2807,  -2.1719,   3.1724,  -4.1754,  -2.4394, -15.5359,  -5.2008,
         -3.4274,  -6.7580,  -4.8541,  -0.2425,   2.7014,  -2.0128,  -0.2771,
         -1.7457,   0.1426,  -2.2637,   3.5729,  -3.9491,  -2.5228],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5634, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-16.4645,  -2.4740,  -7.8469,  -0.2474,  -2.2893,   3.1888,  -6.0980,
         -1.2484,  -1.2083,  -3.1732,  -4.5541,   1.7375,  -2.0221,  -0.7370,
         -2.4697,  -2.3374,  -1.4439,   2.8508,  -1.5187,  -1.4034],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.4880, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0863, -1.0530,  0.3160,  3.9573, -4.7113, -0.6868, -2.3114, -2.1439,
        -1.5519,  3.2633, -2.4395, -3.1370, -4.8187, -4.7773, -2.8871, -5.5812,
        -8.9029, -1.0950, -2.4041, -4.8884], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3470, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5860,  0.3417, -0.6340, -1.5232, -0.1343,  3.2473, -4.3231, -0.6149,
        -1.9228, -1.7835, -5.9987, -0.4189,  0.1866, -1.2771, -0.8296, -2.6209,
        -0.5560, -4.6365,  1.7724,  0.6047], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1853, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  3.0359,  -2.6828,  -0.2591,  -2.9125,  -2.0480,  -7.8571,   0.6727,
         -0.5207,  -2.8420,   0.4954,  -2.4872, -17.7058,  -4.2977,  -2.0426,
         -4.3983,  -1.2861,  -1.2043,  -2.9547,  -3.1720,   2.3913],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6038, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.7943,  -5.3256,  -6.0844,  -4.0293,  -6.3550,  -6.2508,  -2.3895,
         -3.3679,  -2.9311,  -4.0698,  -3.8498,  -7.3381,  -5.3476,  -1.5359,
         -4.5373,  -2.6158,  -4.5349,  -3.8641, -12.4188,   0.7427],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4949, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2463,   1.1536,   1.5006,  -2.6227,   0.2029,  -7.0400,  -0.1581,
         -5.7196,   1.0710,   1.5121,  -3.3998,  -0.1241,  -1.4045,  -3.0414,
         -0.7311,   2.1298,  -4.5355,  -3.6703, -23.0502,  -3.0547],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7114, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6121, -4.2759, -0.9091, -1.6502, -2.2935,  1.5363, -2.1816, -0.7072,
        -4.2617, -0.2290, -8.4370,  0.3768,  0.7101, -3.7473, -0.5838, -4.0079,
        -0.9186, -4.0497, -0.3094,  0.7337], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0408, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1435, -2.2552, -3.7567,  0.5423,  2.5335, -1.2376, -0.6640, -4.9445,
         0.3268, -6.5471, -1.0946,  0.5050, -5.2861, -1.7276, -1.7935, -2.1134,
        -0.8574,  2.8026, -1.8982, -1.3540], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5482, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4285,  -2.1327,  -1.3562,  -1.0553,   3.7297,  -1.6737,  -0.7422,
        -12.6373,  -6.2569,  -3.6376,  -5.5761,  -1.4632,  -2.2342,   1.6377,
         -2.8075,  -2.5964,  -1.9374,  -6.8385,  -1.8361,   1.9976],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.4922, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.0683,  -1.1334, -10.2757,   4.2943, -19.8887,  -2.2468,  -1.7834,
         -3.5368,  -2.4129,   2.7689,  -3.6886,  -4.1419,  -2.9336,  -3.1754,
         -6.2591,  -1.6730,  -0.8576,  -1.7574,  -1.9008,  -0.6073],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0639, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.4589, -5.8361,  0.6735, -3.0676, -4.4617, -3.0229, -5.7838, -1.6680,
         3.2684, -6.0971,  0.2931, -2.1733, -1.9616, -5.7393,  0.7681,  1.0062,
        -2.6041, -0.5965, -3.4425, -0.7281], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0816, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9123, -1.7846, -3.7858, -1.3787, -1.0965, -2.3176,  0.6085,  1.1428,
        -1.9731, -0.2112, -3.7258, -1.7204, -0.3894,  3.5445, -2.5108, -0.5741,
        -3.0189, -4.0905, -5.4084, -1.2061], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6404, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4677, -4.5596, -0.5254,  1.7484, -1.9840, -1.0111, -1.4149, -2.9999,
         0.9936,  3.6901, -1.8661, -0.8158, -0.6868, -2.1639, -1.0263,  2.3065,
        -6.1247, -1.0797, -1.8997, -1.8820], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1885, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5226, -1.6320, -1.8160, -7.9413, -1.3062,  1.2160, -5.3586, -1.3814,
        -4.1694, -3.1684, -2.1761,  0.8740, -4.3151, -3.3069, -2.2934, -3.9051,
         0.4564,  0.6263, -7.4890, -1.2100], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7409, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0059, -6.9518,  0.5317, -4.9310, -0.6177, -2.8800, -0.5643, -6.0902,
         0.6619, -0.6516, -2.7384, -0.5487, -2.0537, -2.4023, -1.4222,  3.3416,
        -3.2685, -0.6868, -3.1196, -0.8410], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9619, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.0106,   1.7532,  -2.9720,  -0.2822,  -1.5108,  -1.2896,  -5.8628,
          0.2885,  -0.0898,  -3.5864,  -0.3632,  -1.6770,  -3.1080,  -4.5197,
          3.7488,  -2.1298,  -0.5261, -18.5216,  -4.1142,  -8.5715],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7173, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8280, -2.9707, -3.4263, -1.1692,  1.8414, -5.3098, -0.3489, -2.3742,
        -3.7871,  0.5236,  0.9809, -6.1955, -0.5981, -1.5904, -1.0308, -9.8173,
        -1.3002, -0.3865, -1.9706, -0.2927], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1525, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5511, -1.6240, -1.5668, -0.0591, -5.1984, -0.0730,  0.0279, -2.7906,
        -3.0613, -1.0302, -2.0465, -0.6453,  2.7802, -3.5785, -4.4239, -7.6990,
        -9.2622, -2.6117, -6.7095, -1.4707], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8297, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.1535,  -1.1249,  -2.6134,   2.0616,  -2.4156,  -3.3613,  -2.4968,
        -13.0590,  -6.9763,  -6.2911,  -4.7470,  -0.5360, -19.6169,  -2.3351,
         -7.0252,  -0.8409,  -3.9426,  -0.5139,   1.8312,  -3.6747],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1916, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9513, -5.2746, -3.8079, -3.4881,  2.8867, -2.5445, -1.3252, -2.9070,
        -2.4499, -7.4718,  1.3505,  0.0991, -2.2350, -0.5485, -3.9094, -2.4270,
        -1.5339,  3.3378, -2.8720, -1.7200], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0396, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8508e+00, -7.1630e-01, -2.4314e+00, -4.0290e+00,  2.9971e+00,
        -3.2771e+00, -1.7363e+00, -4.2533e-03, -6.1748e+00, -3.3541e-01,
        -4.8051e-01, -2.4449e+00,  3.1522e-01, -3.5500e+01, -6.8784e+00,
        -9.3049e+00, -5.2533e+00, -5.7136e+00, -3.0529e+00, -5.7661e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5819, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3793,  -2.3767,  -2.7916,   0.9222,   3.1761,  -1.7044,  -3.3717,
         -3.5342,  -0.0949,  -5.3347,   0.8291,   1.3022,  -2.8750,  -6.4828,
         -2.5175,  -0.6657, -11.5258,   1.3563,  -2.4646,  -7.1715],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3852, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.9277,  2.9024, -2.4686, -0.7252, -0.5570, -3.5651,  1.2566,  3.1294,
        -2.1199,  0.7862, -1.7867, -1.0554, -6.2395,  0.8739, -1.6198, -2.1971,
        -0.1958, -1.2992, -2.1154, -8.9114], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2490, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8977, -1.7956, -0.0548,  3.2025, -2.1857, -0.1111, -2.8451, -0.7145,
        -5.6706,  0.7775, -1.3299, -3.4514, -1.4363, -3.0688, -2.8640, -2.5716,
         1.0167,  0.6379, -2.3327, -0.0622], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3379, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.1605,   0.6737,  -2.8394,  -4.2683,  -2.1598,  -3.4697,  -2.5847,
          2.0704,  -5.9543,  -3.5310, -11.8376,  -3.3125,  -6.8863,  -3.1551,
         -2.6818,   1.6100,  -3.5773,  -2.6013,  -1.8014,  -4.8373],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2652, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9053, -1.1854, -2.6218, -9.2556, -3.0503, -2.2607, -0.9470, -6.4515,
         1.7145,  0.8911, -3.2282, -1.3797, -2.8184, -1.7921, -2.5829,  3.0545,
        -3.2561, -3.1667, -1.0706, -4.2161], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5264, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.8003,  -7.4635,   1.0629,  -0.2742,  -2.9954,   0.2165,  -1.0962,
         -5.1140,  -0.1634,   2.8623,  -4.5057,  -3.0505, -16.2895,  -7.0008,
         -3.2566,  -5.4802,  -1.2536,  -3.3927,   1.5859,  -5.7387],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2074, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.3871, -22.4037, -28.9031,  -5.0060,  -5.3256,  -5.3166,  -0.7565,
         -4.7306,   0.0934,  -4.9048,  -1.0391,  -1.3759,  -4.9714,  -1.0944,
          2.8192,  -2.6932,  -0.4677,  -8.7028,  -3.7348,   0.5559],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1172, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.5670, -2.8228, -1.9837, -3.6837, -0.7195, -2.3535, -0.3768,  1.6636,
        -5.2245, -0.5536, -0.7231, -0.9582, -4.7377,  3.3960, -4.5081, -0.0508,
        -4.4633, -1.4771, -7.1069,  0.2183], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6949, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5914, -3.5795, -3.4875, -5.3330, -3.3601, -2.5675, -2.1358, -2.8858,
        -3.3107,  1.2737, -7.8593, -2.0839, -6.6599, -2.0392, -4.1493, -1.3420,
        -1.4491, -2.5428, -1.2503, -4.1337], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1244, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.6003,  -6.0151,   1.7605,   1.1154,  -5.6204,  -1.7329,  -2.3245,
         -3.6584,  -3.0992,   0.7671,  -2.8220,  -2.1105, -12.4471,  -4.5822,
        -10.1359,  -1.1697,  -6.1097,   2.1417,  -5.6239,  -3.7442],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3506, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.8385,   2.3905,  -7.4395,  -1.8944,  -2.3105,  -6.0808,  -1.9068,
         -0.9657,  -1.3456,  -0.5297,  -5.6123,   0.4340, -13.6354,   0.4781,
         -1.9510,  -9.3560,  -4.5489,  -5.7818,  -7.1581,  -5.7287],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7391, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.5988,  -4.3772,  -2.5582,  -2.9843,   0.8842,  -5.8582,   1.2898,
          0.5710,  -3.2916,  -3.5197, -13.2963,  -6.8728,  -2.3252,  -6.6468,
         -3.9721,  -0.5097,   1.3755,  -3.7844,  -2.6191,  -2.8212],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9359, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.2625,  -4.0263,  -6.0805,  -2.4224, -22.9033,  -6.2272, -11.8146,
         -5.2896,  -6.3159,  -1.7075,  -3.7033,   4.1566,  -4.2257,  -1.5290,
         -1.4246,  -0.8004,  -4.5370,   2.5760,  -6.8575,  -0.2604],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6828, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.0279, -4.2497, -1.7422, -2.4392, -2.8092, -5.8637, -0.2607,  1.9155,
        -2.4293, -0.1092, -1.8302, -0.6621, -1.0109,  3.5385, -4.1492, -1.3061,
        -1.2609, -1.1503, -4.2333,  0.6856], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4697, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.2022,  -4.5334,  -0.7712,   1.8345,  -6.0191,  -5.1791, -16.6448,
         -4.6979,  -6.1833, -13.3870,  -3.1481,  -3.1009,  -3.7589,  -2.4648,
         -2.9955,  -4.3909,  -2.4502,   1.8933,  -8.9397,  -2.8127],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4476, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.7572,  -2.2959,   0.6293,  -0.7137,  -2.6177,   0.2627,   1.7025,
         -5.2719,  -0.9457,  -1.6654,  -3.9202,   1.3107,   2.7831,  -3.1543,
         -0.3171, -18.2386,  -6.0672,  -5.1194,  -5.9694,  -1.9250],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6145, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8155,  0.4361, -3.0830, -0.8283, -3.3478, -0.6197, -5.3372, -6.4786,
         2.0560, -3.9631, -2.7209, -2.7223, -1.2139, -4.8606, -3.7731,  0.9608,
        -3.1163, -3.1188, -2.9304, -0.3768], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2927, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.3925, -3.3305,  0.5758, -1.7589, -1.6520, -1.4006, -3.8525, -2.1745,
         1.7691, -4.3434, -1.0799, -2.3312, -0.7484, -5.6085, -1.0353,  1.3129,
        -2.1466, -0.1721, -1.0064, -5.6043], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7490, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1006, -0.4250, -5.4533,  2.2062, -9.2549, -1.5984, -0.9623, -1.1776,
        -4.8238, -0.6722, -4.0958, -1.8514, -6.0991, -9.4950, -6.6826, -2.4916,
        -1.9931, -4.7508, -1.4661, -1.8709], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4529, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.0160,  -3.7843,  -1.5942, -15.0441,  -5.6723,  -6.6578,  -1.3187,
         -1.6407,   0.9843,  -6.6181,  -0.4693,  -2.6630,   0.0733,  -4.7941,
         -0.2294,   0.1651,  -3.5329,  -2.8778,  -3.2026,  -4.9685],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1915, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.6030,  -6.0167,  -2.3482,  -5.3846,  -6.4166,  -2.5970,  -5.3774,
         -3.8545,  -0.2786,   2.6872, -16.8239,  -2.1540,  -3.5747,  -6.4859,
         -2.9884,   2.2688,  -8.2971,  -4.4381,  -6.8622, -18.0896],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8817, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9754, -0.7869, -8.3333, -2.5959, -3.0123, -1.6420, -3.9953, -1.5208,
         0.7936, -3.8201, -0.7690, -0.1937, -7.2294,  0.2161,  2.6107, -1.5438,
        -1.4567, -2.9521, -3.0804, -6.4247], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3356, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.5850,  -2.3589,   0.4736,  -1.3357,  -3.3463,   0.6959,   2.8672,
         -4.9019,  -1.2371, -10.6832,  -9.8794,  -5.9274,  -9.8339,  -1.2300,
         -7.2439,  -3.5652, -34.2392,  -4.4409,  -6.9306,  -8.6291],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5581, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.1277,  0.8440, -2.3844, -1.6162, -3.2202, -4.5495, -0.9397,  2.4799,
        -7.9192, -4.7942, -5.0171, -4.5292, -0.5233,  2.2663, -0.4492, -2.7763,
        -0.6969, -6.2121, -0.1822, -1.2552], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0801, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.0284, -5.4458, -0.8968, -4.7848, -1.2223, -3.7659, -0.9731, -1.2614,
        -1.3200, -4.8963,  0.6786, -1.4223, -8.3864, -1.7726, -0.5164, -2.9963,
        -4.3411,  3.8321, -5.1292, -0.8018], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6725, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.3956,  -4.7327,  -3.7167, -10.5573,  -6.8191,  -3.1909,  -5.6828,
         -1.9112,  -5.5455,   3.4523,  -6.3261,  -1.8867,  -4.4541,  -1.5904,
         -6.0299,  -0.4846,   1.8461,  -0.7451,  -1.5495,  -2.8920],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1606, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7520,  -5.2670,  -1.9083,  -2.0241,  -5.4057,  -7.9562,  -1.3416,
         -2.3331,  -4.3675,  -1.7185, -14.8989, -10.0103,  -7.4936,  -3.6929,
        -28.9476,  -8.9109,  -6.9261,  -3.8167,  -5.0661,  -2.3348],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3586, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3059,  -0.7084,   3.6247,  -4.3577,  -1.2419,  -3.8525,   1.0233,
         -5.2027,   1.6625,  -2.9985,  -3.0407,  -0.3780,  -4.1520,   0.0457,
         -7.1100,  -0.2899,  -1.2998,  -3.5099,  -5.2192, -14.2983],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7305, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.2478,  -6.4687,  -6.1610,  -5.7404,  -3.4581,  -2.8336,   1.6009,
        -26.3684,  -1.5219,  -2.8426, -14.6203,  -6.8495, -32.5426,  -9.0751,
        -35.9093,  -7.9787, -10.1882,  -5.9363,  -4.7755, -11.6938],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-10.2305, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0550, -4.9860, -2.9116, -4.6028, -1.0194,  1.2343, -1.7801,  0.0597,
        -2.5924, -3.9104, -1.7960,  1.6448, -2.1834, -0.8948, -4.1795, -6.5774,
        -0.0225,  2.4546, -2.0612, -1.4394], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8809, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.7633,   2.4627,  -5.4439,  -1.9685,  -2.5645, -16.7244,  -7.3436,
         -7.6113,  -4.1884, -10.7938, -27.4040,  -7.6071, -17.0261,  -7.5280,
         -7.1894,  -8.2336,  -0.8195,  -2.4029,   3.4783,  -4.2252],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.8448, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0991, -2.3789, -1.3926,  3.0880, -4.0635, -0.3092, -1.0872, -3.4311,
        -4.6895, -0.1607,  0.8956, -2.9826, -1.6272, -1.8859,  0.1223, -7.2872,
        -1.4043, -0.5595, -4.9605, -0.3038], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8258, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2657,  1.1478,  0.4427, -4.4452, -0.8735, -2.8316,  0.1846, -1.5933,
         3.1929, -2.3396, -0.7664, -1.3600, -4.5020, -1.7442,  3.2734, -3.6986,
         0.6748, -1.2340, -2.0355, -3.3893], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3581, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.6355,  -3.3245,  -4.1168, -11.6180,  -2.3421,  -3.8179,  -3.2730,
         -3.6054,  -3.1359,  -2.3272,  -2.7917,   2.8718,  -9.4797,  -1.7747,
         -5.1931,  -3.8336,  -4.0866,  -0.7414,  -7.6428,  -0.5212],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6195, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.4801, -3.9599, -2.2973, -3.7328, -2.2020, -4.3010,  1.9894, -2.8321,
        -3.6032, -2.3600, -4.0943, -4.1441,  0.7993,  1.6874, -0.9809, -0.7628,
         0.2460, -4.1296, -0.0225,  2.9558], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4632, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.1874,  -3.8436,  -2.1312, -17.9593,  -4.0878,  -5.8658,  -1.6363,
         -0.2283, -19.5831,  -6.6430,  -0.9897,  -3.7979,   0.4647,  -4.3446,
         -0.7110,   2.1814,  -3.5746,  -3.5322,  -3.3052,  -0.4257],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0100, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.4114,  -0.5899,  -5.5953,   1.2725,   0.2245,  -2.6307,  -0.9789,
         -0.4842,  -2.6164,  -1.0202,   2.2624,  -3.1657,  -0.5816,  -1.8891,
         -1.7009,  -6.0021,  -3.0980,  -0.8451, -10.7166,  -0.7915],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1179, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.0748,  -4.6259,   1.2802,   3.0601,  -1.9308,  -1.7830,  -0.2580,
         -3.0889,   0.2254,   2.5532,  -4.0403,  -0.3368,  -4.3542,  -0.0344,
         -5.7639,  -0.9086,   2.3085, -11.0259,  -0.7273,  -7.0437],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.8785, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.7392,  -0.7700,  -0.5026,  -4.8325,  -3.5562, -20.3801,  -3.3432,
         -6.8513,  -1.2648,   0.0477,   1.5174, -22.5706,  -2.0355, -28.2268,
         -6.2644,  -1.4052,  -4.0796,  -2.7674, -16.4365,  -3.5264],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.7494, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5762, -2.8531,  3.3617, -4.7436, -1.2057, -2.7638,  0.1305, -4.5175,
        -0.5303,  0.8029, -3.9678, -0.4809, -0.4329,  0.5694, -6.3039,  3.8419,
        -2.6631,  0.2411, -4.1450, -2.0497], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4643, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8050, -1.0749, -0.6771, -4.6755, -1.8124, -3.4915, -3.9591, -3.9336,
        -2.6814, -4.3580, -4.1451, -1.7195, -2.1540, -1.3584, -0.2516,  2.0843,
        -9.0335, -0.6597, -2.9303, -4.1091], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7873, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5841, -0.1745, -0.3716, -5.2838, -0.2815,  1.8211, -2.1325, -0.2915,
        -4.1959, -3.4799, -6.7981, -2.1948,  2.1627, -1.5547, -0.0591, -1.0562,
        -0.4634, -6.4200,  2.8099, -7.6086], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9078, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.0204,  -9.9994,  -1.3692,  -4.1751,  -4.1826, -39.4693,  -9.2761,
         -5.2240,  -5.5075,  -1.3569,  -2.3853,   3.5067,  -9.7191,  -0.2235,
         -1.8126,  -5.6957,  -2.8674,  -2.3541,  -8.4530,  -4.1276],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2856, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0622, -3.9193, -1.9470, -2.9018, -3.8747, -9.1302, -0.7054, -0.1872,
        -1.9034, -1.0223, -0.7907, -2.2428, -1.0417,  3.2398, -3.6812, -1.5054,
        -2.3006, -0.2112, -5.5300, -0.2283], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2973, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0147,  -3.7445, -20.2312,  -5.8521,  -5.3607,  -6.6375,  -0.8349,
         -3.3035,   0.8532,  -2.0194,  -6.4798,  -1.2917,  -2.5233,  -3.9321,
         -2.9476,   1.9151,  -2.4654,  -0.4769,  -2.5666,   0.1967],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5858, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.2948, -2.4828, -1.2140, -9.9433, -3.6143, -7.8211, -5.9740, -6.7628,
        -0.6998, -3.3457,  1.4287, -1.8651, -5.2673, -1.5273, -3.3326, -6.7912,
        -5.9931, -1.6878, -2.3434, -3.8291], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5386, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.4339,  0.4706, -5.9579, -1.0336, -3.4477, -1.6549, -6.8771,  1.6018,
        -0.5076, -2.3392, -0.3555, -3.2181, -5.5199, -0.0406,  1.5909, -3.3853,
        -0.3318, -1.7604,  0.2071, -3.3324], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7729, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0077, -1.6369, -3.8329, -1.2111, -4.7103, -1.0234,  0.9665, -2.4619,
        -1.5049, -0.6066, -1.0592,  0.2981,  3.6150, -2.4425, -1.8648, -4.1591,
        -0.8526, -5.1234,  1.0178,  1.7029], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3949, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.0189, -2.0817,  0.1574, -4.1312,  3.4995, -2.8072, -1.8021, -2.3071,
        -5.0859, -0.6140,  1.0521, -3.1280, -1.7382, -3.1969, -2.1400, -5.6777,
        -7.9623,  0.5767, -2.3882, -0.7732], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0283, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7039, -0.9861, -3.9541, -0.6783, -4.9247,  0.7616,  2.1672, -3.1349,
        -0.0277, -1.7279, -3.3437, -1.0569,  3.1189, -1.5198, -0.1070, -0.5598,
        -2.4876, -4.9602, -0.2335,  0.6426], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3858, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.5778,  -0.5570,  -2.0254,   3.5757, -13.5231,  -3.2779,  -1.5282,
        -19.8728,  -7.9160, -32.1436,  -7.4125, -23.6165,  -6.0312,  -1.4159,
         -2.8587,  -0.7723,  -3.2235,  -4.8253,  -0.6904,  -1.4448],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.8569, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8841, -1.3592,  2.8035, -2.3076, -1.8564, -4.0279, -0.9341, -1.7411,
         3.7053, -3.7749, -1.8728, -5.0116, -1.5971, -4.5904,  0.1873,  2.1380,
        -6.2080, -0.5653, -0.1991, -1.4541], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5275, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.3140,  -0.1726,   2.4596,  -3.7017,  -3.1834, -10.3507,  -5.7607,
        -17.9201,  -2.6403,  -4.5689,  -1.8416,  -0.5141,  -5.1114,  -5.3387,
         -8.2613,  -3.6129,  -9.6558,  -0.8442,  -6.8792,  -0.3294],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4771, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.4048,  -3.1955,  -0.8490,  -0.5252,  -3.2605,   0.2612,   0.3417,
         -3.0194,  -2.2938,  -5.1447,  -1.0268,  -3.8436,   0.4055,   0.8900,
        -10.3335,  -0.6377,  -5.0374,  -4.9524,  -5.6377,   1.0947],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3180, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.9142,  -3.8831,  -8.1480,  -4.3992,  -5.7716,  -0.9167,  -3.6073,
          1.0928,  -3.3873,  -0.9731,  -3.0902,  -6.4506,  -2.1055,   0.3656,
         -4.6534,  -2.7620, -15.5167,  -2.9233,  -5.8367,  -1.4667],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0674, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.1639,   0.9755,   1.1346,  -2.6696,  -2.1527,  -3.9902,  -6.9920,
         -6.2663,   0.0770,  -9.4880,  -1.7948,  -1.8933,  -1.5586, -11.6841,
         -3.3250,  -1.3639,  -4.4865,  -1.2023,  -1.3483,  -7.3677],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5280, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2974, -3.6773, -1.1351, -7.2636, -2.1366, -1.0307, -6.8617, -1.5041,
        -0.8187, -3.0597, -0.5130, -1.7733, -2.7596, -0.5528, -0.6743, -2.0786,
        -0.7583,  2.2230, -1.9708, -2.4227], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0533, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.0647,   0.0748,   3.3113,  -7.2449,  -1.4653,  -1.7456,  -3.5626,
         -1.6776,   2.3920,  -4.7690,  -2.8352,  -6.9118,  -5.7559,  -4.7324,
         -6.4916,  -2.8235,  -2.0694,   3.1225, -31.2958,  -2.6849],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9615, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.6528,   1.8460, -16.6475,  -4.4006,  -2.8098,  -2.6788,  -3.3474,
         -1.3602,   3.4060,  -3.1804,  -0.3430,  -2.3724,  -1.0456,  -5.3832,
         -2.1279,   0.2168,  -2.5483,  -4.4231,  -4.0754,  -1.4450],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9186, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.8894,  -1.3133,  -0.4152,  -1.7490,  -1.6731, -10.1329,  -1.7989,
         -0.8114,  -1.1397,  -1.2174,  -1.9310,  -0.0602,  -8.7624,   1.0082,
          0.5128,  -4.4904,  -1.8861,  -2.5318,  -3.1621,  -1.1387],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.9902, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8130, -1.6649,  2.2492, -3.5487, -1.0305, -0.1813, -2.6365, -1.1970,
         3.9931, -4.3907, -3.8425, -3.2268, -1.5849, -0.8900, -5.0230, -2.5537,
        -1.0013, -2.3253, -2.4497, -4.3785], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9248, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.5724, -3.4174, -1.0392, -2.4104, -2.1398, -4.7402, -0.3604,  1.5550,
        -1.6150, -1.0197, -1.1250, -5.5755, -1.8589, -1.5071, -5.2333, -3.0342,
        -2.0378, -2.8001, -0.6974,  2.7171], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6883, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8373,  0.4291,  2.4952, -1.2494, -0.0135, -1.9629, -3.6966, -1.6892,
        -0.0511, -1.6534,  0.1083, -2.7388, -0.4884, -7.1941,  0.8307,  0.2400,
        -1.8053, -0.5668, -2.6928, -2.2297], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4383, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6019, -1.5679, -3.6728,  0.4784,  0.1399, -6.1609, -1.9845, -2.3019,
        -2.3491, -2.9812,  0.3560, -0.2137, -3.4203, -1.2827, -1.3655, -1.7402,
        -0.1607,  2.7321, -3.5544, -2.3220], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6987, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6113, -0.0288,  2.9142, -3.2270, -1.1499, -3.2416, -1.1266, -3.3608,
         0.7018, -3.3449, -1.1489, -2.2724, -4.5570, -0.3135,  2.9075, -3.0199,
        -1.6907, -0.5828, -2.6461,  0.4241], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3687, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.5640,   0.2324,  -4.9137,  -1.6515,   1.7318,  -1.4598,  -2.2935,
         -4.4465,  -0.1318,  -2.2278,  -3.4093,   1.9745, -15.3570,  -2.0237,
         -2.1717,  -7.4420,  -1.2056,   2.2887,  -3.2406,  -2.1517],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6731, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9033e+00,  3.2436e+00, -6.9740e+00, -1.1961e+00, -1.4608e+00,
        -1.1461e+01, -5.5076e+00, -2.0578e-02, -5.1915e+00, -5.4748e+00,
        -3.4618e+01, -5.9297e+00, -7.8510e+00, -1.0621e+01, -5.2129e+00,
        -3.1603e+00, -1.6337e+00,  2.0454e+00, -5.1305e+00, -3.6763e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6867, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.4469,  -2.7300,  -1.6090,  -2.4978,   0.4354,   3.1758,  -6.2994,
         -0.8325,  -1.0465,  -1.6111,  -4.2297,   3.3165,  -4.5162,  -4.5590,
          0.1911, -20.1661,  -6.3152,  -0.3918,  -3.4395,  -1.5997],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0086, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6384, -4.1474, -2.9434, -4.4020,  3.4069, -2.5269, -0.0590, -1.8599,
        -1.0330, -2.2565,  3.8503, -4.5849, -5.0425, -3.3355, -1.5984, -2.7641,
        -0.0290,  1.3961, -1.8207, -0.3897], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5889, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3790, -0.5164, -1.0171, -4.5781, -0.7930, -5.3115, -0.0994, -5.3113,
         1.1955, -1.0222, -2.7998, -0.4641, -2.4619, -2.0390, -1.8937,  2.5610,
        -2.2664, -0.3438, -2.5776, -0.9553], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7537, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.9507,  -2.6613,  -5.2509, -12.6456,  -6.4425,  -7.1498,  -4.8040,
         -1.6905,  -1.0960,   3.3609,  -3.6503,  -1.4522,  -1.8749,  -7.2676,
         -2.0748,   2.1529,  -2.4686,  -3.1584,  -4.7295,  -3.4826],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3668, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5772, -3.7540, -2.0213, -3.2251,  1.5328, -2.5789, -2.4939, -2.6460,
        -3.2503, -4.9618,  0.4468, -1.9489, -1.3726, -1.9313, -2.1830, -3.4304,
         2.5104, -4.4439, -1.3044, -3.8049], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2219, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2029, -2.7383,  1.8932, -3.3949, -1.8247, -4.4889, -2.2770, -1.9057,
         0.7124, -5.5412, -1.0950, -3.8402, -3.1904, -6.1406, -0.8568,  0.0295,
        -3.5868, -2.0677, -1.2623, -3.8473], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3813, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6460, -6.1810, -2.0421, -1.7357, -6.2654, -2.4006, -1.3652, -3.0604,
         1.1367,  2.9103, -4.9533, -3.7435, -3.6085, -2.0179, -4.9419, -0.8543,
         1.2212, -4.2618, -2.3739, -2.3533], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4768, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.1210, -2.9367, -1.2754, -0.7776, -1.7330, -5.3943,  1.5521,  0.3357,
        -2.9160,  0.2756, -2.3772, -0.1505, -1.6117,  3.8586, -1.7952, -2.4509,
        -1.3367, -1.5052, -0.4463, -2.8368], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1700, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7065e-01, -1.7890e+00, -3.8284e+00, -2.5681e+00, -3.7536e+00,
        -3.1352e+00, -5.7883e-01,  2.5421e+00, -3.0595e+00, -4.8242e+00,
        -6.1735e-01, -6.7096e+00,  1.0725e+00, -1.3394e-03, -3.4407e+00,
        -1.1168e+00, -4.0510e-01, -3.4185e+00,  1.2059e+00,  2.8594e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.5919, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3859,  -3.8564,  -2.8318,   0.6472,  -5.0820,  -2.4173,  -4.2297,
         -4.2152,  -5.3972,  -1.3272,   1.4472,  -3.2837,  -0.5437,  -1.0093,
         -5.0288,  -0.6675,   0.6606,  -6.5585,  -0.5446, -12.4520],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9538, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.2510, -0.1723,  1.5751, -2.7925, -2.3239, -3.8117, -2.9324, -4.0461,
        -2.4718, -1.3247, -6.6270, -0.3536, -2.0934, -2.1739, -1.7435,  3.3788,
        -3.7936, -1.7438, -3.1448, -2.8431], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3845, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6768,  0.7541,  1.6119, -3.7152,  0.5376, -1.4402, -2.3160, -5.8489,
        -0.7678,  0.2366, -1.6494, -3.7323, -2.7497, -4.5800, -0.6265,  2.0854,
        -9.9139, -3.8832, -3.2178, -3.0815], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3487, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.3683,   0.9934,  -0.0805,  -2.7141,  -2.0618,  -3.1410,  -0.0922,
         -6.3384,   0.7994,  -1.9803,  -3.6742,  -2.2964,  -2.4275,  -2.2735,
         -3.2242,   2.5290, -10.8141,  -0.4724,  -2.1796,  -3.9953],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.4406, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.1456,   2.0606,  -6.8471,  -1.8313, -14.0682,  -8.0164,  -9.9605,
         -3.1911,  -4.2908, -13.3944, -13.0071,  -8.9323, -10.1371,  -6.9070,
         -6.7296,  -2.5592,  -2.9993,  -2.0918,  -6.6936,  -6.3836],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2417, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.1508,  -7.6082,  -3.1289,  -7.0136, -10.5013,  -2.2944,  -2.5112,
         -4.6299,  -4.8421,  -6.3065,  -8.1971,  -5.6345,  -5.6013,  -7.5110,
         -3.5025,  -4.4728,   2.5803,  -6.4900,  -2.6653,  -4.1070],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0294, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.6122,  -1.5477,   1.7993, -11.1160,  -2.8149,  -3.0982,  -0.3643,
         -5.5664,  -1.5431,  -1.0598,  -2.4522,  -2.2714, -20.6365,  -6.6947,
         -8.1838,  -1.3865,  -2.1984,   2.6035,  -2.7750,  -0.7019],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6310, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.7083, -3.2599, -1.6805, -3.2214, -1.7052, -5.5750, -5.1494, -3.1747,
        -4.3144,  0.5161, -2.3108, -5.1137, -1.0728,  2.1768, -3.4606, -2.1407,
        -4.4764, -4.0187, -7.2071,  0.3899], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7753, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8116, -1.3321, -3.3998, -3.3558, -0.3748,  1.7704, -4.9886,  0.5944,
        -2.3092, -4.9327, -0.0483,  2.0920, -2.1673, -1.8889, -4.4675, -2.3443,
        -0.4618,  2.4158, -0.8800, -1.2083], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4049, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.3775,   3.0040,  -4.5014,  -4.7628, -19.7649,  -5.8182,  -7.8764,
         -6.6718,  -0.2100,  -3.1377,   3.4945,  -6.2099,  -1.7119,  -0.7507,
         -5.7618,  -1.1650,   2.8844,  -9.7225,  -5.1397,  -3.4477],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8824, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6978, -0.8860, -4.0489, -4.2272, -1.2620,  2.7739, -3.8419,  0.5006,
        -6.9398, -2.8420, -5.5825, -1.9105,  1.1911, -5.7049, -2.9478, -0.1502,
        -1.0405, -2.7221, -3.1310, -3.3168], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3893, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.3040,  2.4085, -1.8186, -1.1874, -1.2425, -1.3735,  0.0162,  0.7355,
        -2.2679, -1.5025, -2.7787,  0.4382, -6.8415,  1.4886, -0.4516, -2.9878,
        -0.5085, -3.4417, -2.7210, -3.7868], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3760, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.3054, -3.3229,  0.3754, -0.7850, -2.4138, -0.0060,  1.2109, -3.7930,
        -0.7475, -0.7103, -2.8020, -1.5127,  3.3418, -2.9731, -2.0430, -1.7981,
        -3.7895, -5.1421, -0.9700,  1.6590], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2958, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.5687,  -5.2751,  -1.7567, -12.0799,  -5.8339,  -6.1471,  -5.7607,
         -1.7351,  -7.0535,   1.6990,  -3.3237,  -2.6300,  -3.8413,  -4.5557,
         -4.6641,  -6.6130,  -0.5164,  -3.9754,  -2.3684,  -3.8203],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4410, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.8406,  -2.3411,   1.9070,  -6.7076,  -3.0396, -11.9369,  -5.2252,
         -7.1191, -10.6907,  -6.3949,  -1.5844,  -2.9572,   0.2476,  -5.1673,
         -2.4643,  -1.4957,  -5.2321,  -4.6313,   2.5577,  -3.7311],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0423, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.0387, -1.6199, -5.3255, -2.3104, -1.2257, -4.6145,  1.4054,  1.0441,
        -1.9539, -0.7774, -2.7250, -3.1483, -0.2311,  1.0037, -3.5033,  0.7138,
        -2.2908, -5.5521, -5.0584,  0.4250], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6853, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.8028,  -4.3880,  -4.9721,  -7.3844,  -2.1883,  -5.7236,  -2.0363,
         -2.3210,   0.2308,  -2.4510,  -1.0645,  -3.0883,  -3.6402,  -0.3454,
          4.5674,  -1.3833,  -7.3959,  -8.3433, -11.8723,  -6.9248],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8264, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2922, -4.8673,  0.2460,  1.4566, -4.9704, -1.9778, -2.2517, -5.5566,
        -1.5427, -1.6790, -2.1270, -2.5842, -2.7761, -2.2494, -6.0851, -1.8938,
        -0.0725, -3.0269, -0.4319, -3.3006], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3491, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.8524,  -8.9493,   0.8507,  -3.5687,  -1.3377,  -0.7677,  -2.2141,
         -0.9548,   1.8057,  -4.2142,  -0.4488,  -9.8023,  -2.4683,  -8.0281,
          0.3093,  -3.1155,   2.8432, -16.4539,  -4.1506,  -2.2922],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4405, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8517, -2.3194, -3.3351, -2.5432, -5.7277,  0.1425, -0.6153, -3.9712,
        -0.7823, -2.5675, -0.8971, -3.7508,  1.4265, -4.6966, -3.3825, -0.1934,
        -8.6697, -5.0249, -3.3644, -5.7072], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9916, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4696, -3.5995, -3.9394, -2.9312, -4.6485, -6.3625, -5.8769,  0.4227,
        -3.0870, -1.6034, -6.3928, -6.6819, -4.1888, -5.5353, -2.4433, -3.9857,
        -1.4624, -6.6057, -2.2026, -2.2817], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8438, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.2111, -7.9479, -3.8998, -3.6040, -3.4150, -1.7241,  2.5904, -2.5104,
        -1.0276, -1.4029, -0.1444, -2.5483,  1.5974, -4.8583, -1.4721, -0.7664,
        -2.6933, -2.7169,  2.6132, -4.3081], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8014, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5905, -5.8402, -1.1788,  0.0256, -4.0966, -2.6989, -4.7929, -3.3451,
        -7.2116, -3.1221, -1.0062, -4.7996, -2.8592, -6.7634, -3.5657, -5.1436,
        -1.1893,  0.2840, -6.4538, -1.4313], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3890, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6168,  0.7256, -4.7674, -1.8044, -2.5551, -4.0797, -4.2709,  0.4351,
        -3.5946, -1.6948, -5.8884, -8.3273, -4.2031, -7.6259, -2.5785, -4.6440,
        -6.4609, -7.1227, -3.5674, -4.4839], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9563, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4443, -3.9932,  1.3311, -1.7030, -3.1595, -1.0469, -2.4408, -0.7448,
        -2.2459,  2.9449, -2.3105, -1.5815, -2.6798, -1.0975, -7.1406, -0.5203,
         1.4250, -2.7412, -0.5817, -1.4644], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5597, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.7241, -0.9804, -2.7137, -6.6964,  0.3855, -0.1217, -2.4711, -1.0563,
        -1.5119, -3.7341,  0.4196,  3.1390, -2.7409, -2.3949, -1.9501, -1.2282,
        -5.0262, -2.3803,  1.9831, -4.8306], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7317, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4218, -1.4842, -1.6185, -0.5139,  4.0138, -2.2848, -0.6579, -4.7596,
        -0.7435, -3.3218,  1.4618, -1.8872, -4.3233, -1.9107, -6.6280, -3.7816,
        -7.3267, -1.2508, -1.4418, -3.8671], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2374, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1018, -5.7598, -3.3094, -0.9373,  3.2067, -3.5312, -2.8195, -4.3913,
        -2.1084, -2.5399,  0.4797,  0.9356, -1.9863, -5.3029, -2.5353, -2.2605,
        -3.5751,  0.7295,  0.5745, -1.7270], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0480, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.1738e-03, -1.1273e+00, -1.8773e+00, -5.4244e-01,  3.8289e+00,
        -1.8980e+00, -9.2352e-01, -3.8546e+00, -3.9377e+00, -2.7290e+00,
         1.8568e-01, -3.8159e+00, -1.6644e+00, -3.6735e+00, -2.7758e+00,
         7.3001e-01,  4.0805e+00, -2.9200e+00,  1.5161e-01, -1.1443e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.1952, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3776, -2.5738,  0.9350, -4.6213, -4.2005, -3.2113, -4.3733, -0.1989,
         1.1042, -2.3625,  0.4584, -0.2693, -1.9701, -1.0258,  3.3947, -4.8883,
        -0.0768, -4.6767, -0.8142, -5.6341], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0691, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.3052,   4.1080,  -4.5919,  -1.4749,  -2.0368,  -0.7833, -10.0063,
         -1.3561,  -1.8229,  -3.6950,   0.1820,  -1.5872,  -3.0652,   0.0571,
          3.4385,  -2.7859,  -3.3637,  -2.7764,  -1.9154,  -7.2553],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1018, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8669, -0.5700, -1.9887, -0.8166, -6.1684,  1.8150,  0.6331, -3.1796,
        -1.2821, -1.3227, -3.0719, -0.1447,  2.8273, -5.5527,  0.0319, -1.8348,
        -3.3329, -3.6931,  3.0371, -7.8264], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7654, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2137,   0.1219,   2.6789,  -5.5316,  -1.2489,  -1.5160,  -1.6376,
         -1.9359,   3.8208,  -3.7598,  -1.7248,  -2.4333,  -0.6584,  -2.7992,
          2.9663,  -3.5773,  -0.6192, -16.3309,  -3.5461,  -8.6508],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.4798, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0679,  0.5215, -4.2795, -2.5633, -1.4767, -3.5724, -0.3258,  3.1974,
        -3.6563, -0.6980, -2.0857, -3.9350,  0.3123,  3.0898, -2.7965, -1.0658,
        -3.3140, -5.7140,  0.2880, -0.2234], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5183, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3100, -0.6456,  1.7588, -4.8904, -1.2094, -3.6689, -4.6914, -2.4434,
         0.4361, -2.9904, -1.4567, -2.8300, -2.5980, -4.9035, -0.4322,  0.9800,
        -2.6430, -0.8328, -3.0666, -6.3818], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3410, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.2886,   1.4670,  -1.3708,  -0.5421,  -1.0172,  -6.7102,  -0.1430,
          3.0544,  -5.3471,  -1.8388, -34.3004,  -7.7865,  -6.2300,  -6.8789,
         -1.1869,  -2.6033,  -1.3609,  -4.0377,  -1.0550,  -1.5933],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9596, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2133,  2.3461, -4.8317, -1.6521, -2.1219, -4.3948, -2.1161, -0.8759,
        -3.0253, -3.0878, -9.2348, -5.9593, -3.1123, -7.2618, -3.7068, -2.2646,
         2.4434, -8.4012, -3.2207, -2.8609], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3776, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.3493, -4.5572,  0.1509, -4.0271, -0.3054, -0.0125,  2.3395, -2.9971,
        -2.0451, -1.0857, -1.5643, -5.7979,  1.4546, -1.8443, -3.0040, -0.2097,
        -1.1231, -5.1275, -0.0082, -0.7129], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4564, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4557, -2.8057, -4.2077, -3.2830, -4.6008,  1.3956, -0.2484, -2.8041,
        -1.5072, -2.8051, -0.5521, -3.6324, -4.0304, -6.7071, -1.0259, -1.7618,
        -2.6157, -6.2289,  1.6858,  1.4202], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3385, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.4516,  -3.1296,  -1.9925,  -4.7869, -48.0084,   1.0181,  -2.0410,
         -2.6539,  -1.0598,  -6.7087,  -0.7530,   2.1529,  -4.0164,  -2.1745,
         -3.2356,  -0.5100,  -5.8991,   0.2689,   1.2057,  -2.3255],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3050, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7135, -0.2022,  3.2749, -3.5026,  0.2854, -2.0889, -2.2366, -6.8660,
         1.5821, -0.8078, -3.4267, -0.9445, -1.4083, -4.1076, -5.3687, -1.7334,
         0.9473, -2.9157, -1.9277, -2.1894], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9675, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.3592,  3.0577, -3.6195, -0.3267, -1.5679, -3.9298, -2.3482,  1.9886,
        -3.4887,  0.3802, -3.1235, -2.6801,  0.9537,  3.1633, -2.3695, -0.4552,
        -2.1245, -3.6262, -1.9851,  2.3330], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9705, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1720, -6.6711,  1.4168, -0.5652, -2.8156, -1.1331, -2.4926,  0.2401,
        -4.2075,  2.5084, -4.1938, -4.4371, -1.8644, -4.1335, -4.7962, -1.1350,
         3.0881, -1.6914, -0.3624, -0.5802], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7499, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9695, -5.0022, -4.7218, -1.5941, -2.1259, -3.8062, -0.4073, -6.1981,
        -1.1724, -4.1809, -0.5144, -4.0561,  0.5353, -0.8242, -4.2290, -2.0666,
        -3.1158, -2.7331, -1.1457,  3.5709], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2879, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4802, -1.7202, -2.2362, -2.7218, -1.2039,  0.9941, -2.7116, -0.5704,
        -3.5562, -0.6203, -5.8793,  1.4036,  2.0541, -2.2002, -1.0506, -1.9557,
        -3.6656, -1.6075,  1.1945, -3.5093], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6021, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7172,  1.2533,  4.0168, -5.6555, -1.4496, -3.0414, -2.1064, -7.4527,
        -0.7195,  1.0527, -2.0382, -0.5710, -0.9170, -1.3022, -6.5775, -2.7500,
        -1.7675, -3.0410, -1.3307, -4.2781], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1196, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.5176,   0.1187,  -1.7325,  -3.2841,  -0.7036,  -3.3705,  -0.6195,
        -13.0346,   4.1456,  -3.4920,  -1.2901,  -1.6054,  -4.2292,  -1.4499,
          1.9954,  -6.6495,  -3.1070, -34.1153,  -6.4587,  -8.0489],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4224, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.5138,  -8.0331,  -4.3342,  -5.4138,  -1.1077,  -1.5099,  -1.1593,
        -17.5642,  -3.0118,  -0.3620,  -4.8990,  -3.1477,  -0.8175,  -5.1751,
         -2.3254,  -4.3773,  -1.8024,  -4.0368,   2.5716,  -5.8868],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0953, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3442,  1.6800, -4.9881, -1.8575, -4.5357, -4.8092, -2.3690,  1.6449,
        -4.4600, -3.3190, -2.7047, -3.1843, -3.3705,  2.4702, -3.1707, -2.8587,
        -2.9268, -4.1536, -4.4331,  1.9414], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4374, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.0584,  -1.0293,  -2.5751,   4.2500,  -4.4134,  -1.7090,  -3.4258,
         -0.7178, -15.4004,  -3.1444,   1.0023,  -2.7384,  -1.8438,  -4.4813,
          0.5649,  -4.1341,  -4.0129,  -0.0444,  -5.3443,  -6.0408],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8148, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.3795,  -2.5104,  -2.8035,  -3.5593,  -2.2142,  -3.5464,  -7.6225,
         -5.1014,   1.2891,  -4.7410,  -3.5787, -10.7753,  -4.3291,  -8.6382,
         -2.9081,  -2.2380,   0.8151,  -3.0947,  -3.8391,  -3.0783],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8427, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.5595, -3.3736,  0.7120, -3.0718, -1.1464, -6.8473, -0.9381, -0.3050,
        -2.9335, -2.0110, -2.5938, -3.5116, -4.1805, -2.3507,  1.0618, -3.0703,
        -1.3625, -2.4816, -3.2996, -1.6606], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9902, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4398,   4.1754, -19.1530,  -3.9727, -11.5018,  -6.7303,  -9.8562,
         -3.3228,  -4.8665,  -0.9634, -15.1032,  -3.1613,  -1.5860,  -7.9351,
         -3.7507,  -2.3658,  -4.9246,  -5.9987,  -3.8840,  -3.3662],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5353, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4827e+00, -1.1199e+00, -4.2245e+00, -1.8232e+00,  2.2431e+00,
        -2.0215e+00,  3.4105e-02, -3.7020e+01, -2.9404e+00, -9.7984e+00,
        -3.5149e+00, -4.2276e+00, -6.1420e-01,  1.5781e+00, -2.9836e+00,
        -2.2013e+00, -3.9224e+00, -4.8884e-01, -5.8101e+00,  1.1977e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0070, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9423, -7.1568, -3.9716, -3.9525, -3.2591, -6.5194, -5.6946, -6.4404,
        -6.0870, -6.4452, -2.9732, -5.0256, -3.4456, -0.3274, -6.0495, -4.2840,
        -4.3326, -5.1352, -5.6640,  0.4452], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.5630, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.2999, -2.8151,  0.5587, -1.4284, -4.9285, -3.4423, -2.3063, -1.5921,
        -1.0837,  3.7353, -2.9328, -3.7941, -4.2672, -2.6098, -0.0070,  3.5857,
        -2.3094, -1.2000, -4.4322, -1.9023], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6436, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.7292, -4.6122, -0.6639, -1.7068, -2.4067, -4.3953,  3.8949, -2.0779,
        -1.4804, -2.4019, -0.6476, -5.7571,  1.4087, -0.5096, -6.3628,  0.0655,
        -1.1807, -4.4443, -0.3581,  1.2710], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5318, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5961, -0.3385, -1.1501, -3.1983, -0.7384,  3.7494, -5.5091,  0.3645,
        -3.8279,  0.6637, -6.0428,  1.7199,  0.9518, -3.1194, -1.4410, -1.6617,
        -4.7701, -1.0563,  1.4061, -5.9673], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6281, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.0728,  1.9529, -7.5816, -0.9610, -2.3654, -0.6484, -0.2389,  3.6028,
        -2.7964, -2.2124, -2.4872, -1.5327, -6.0110, -1.4577,  0.4497, -3.1204,
        -3.2817, -0.9408, -3.0851,  1.0064], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5891, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.2758, -1.1618, -3.1873, -0.7412, -1.5284, -3.1057,  1.0756,  2.6842,
        -1.4516, -2.4313, -1.2717, -3.9427, -0.2187,  3.3212, -5.1589, -0.5996,
        -2.1449,  0.2750, -1.6609,  3.2448], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8364, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.6668,   3.0246,  -3.2303,  -1.5150,  -1.9857,  -2.9600,   0.8117,
          2.9480,  -1.4399,  -0.9152,  -2.3590,   0.2513,  -2.3670, -38.9394,
         -6.4822,  -9.8302,  -4.2071,  -4.9283,  -6.1038,  -0.6629],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0779, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5949, -4.8886,  1.7489,  2.7765, -2.0582,  0.0077, -3.5914, -2.8490,
        -1.8301,  2.8357, -3.1699, -1.3328, -2.4782, -3.1296, -3.0512,  2.3855,
        -2.6665, -2.0358, -1.1957, -3.0550], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4586, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 5.3234e-01, -7.1250e-01, -3.1494e+00,  2.1072e-03, -1.0647e+00,
        -1.9735e+00, -6.2558e-01,  4.1620e+00, -3.2804e+00, -9.2288e-01,
        -2.2300e+00, -6.6401e-01, -4.8944e+00,  6.2605e-01,  1.5343e+00,
        -3.1616e+00, -1.8634e+00, -2.1828e+00, -2.9106e+00, -5.3298e-02],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.1416, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4087, -0.8655,  2.3295, -2.0375,  0.2126, -1.4583, -4.8762, -1.0587,
        -0.3308, -3.7624, -1.1342, -3.2966, -0.6088, -0.1503,  3.4711, -3.6458,
        -2.2425, -3.5255, -2.0054, -3.6895], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6042, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.9613,  -4.3094,  -2.6964,  -5.4588,  -1.7131,   0.5798,  -6.5119,
         -4.6632, -35.2330,  -4.3836,  -9.3200,  -0.7705, -11.7633,   4.0084,
         -3.4011,  -4.2990,  -3.3901,  -2.9347,  -3.4207,  -5.0758],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5859, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1145,  -3.9716,  -1.8430,   2.2909,  -2.1665,  -1.5910, -12.3158,
         -6.7890,  -7.5501,  -6.1274,  -0.9717,  -1.4066,   3.7655,  -2.8253,
         -0.6621,  -2.4707,  -1.0665,  -1.5270,   1.2378,  -3.2555],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5680, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5041, -4.9452,  0.4527,  3.6398, -8.1862, -0.9792, -2.8252, -1.4244,
        -5.6719, -1.2193,  0.9632, -4.0585, -2.3038, -1.4767, -0.1509, -0.5661,
         3.7946, -3.9115, -2.1376, -2.1729], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7342, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.6958,  -2.5758,  -1.9671, -12.1484,  -2.4289,  -7.9639,  -0.4502,
         -3.6372,   2.0488,  -4.1840,  -1.3081,  -3.7591,  -2.4853,  -7.4945,
         -0.5022,  -1.0288,  -3.4094,  -3.1300,  -1.7265,  -1.8672],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8661, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8457, -3.6554, -0.3390, -2.7521, -1.1239, -5.3039,  0.7763, -0.4101,
        -4.5068, -0.6907, -1.7282, -3.5557, -1.7780,  1.8762, -3.2095, -1.4698,
        -1.9471, -0.7642, -4.5195,  0.5888], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7679, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.8812,  -1.7090,  -1.8150,  -2.7612,  -0.3098,   3.2484,  -3.0653,
         -0.3859, -12.8340,  -3.7216,  -8.5297,  -6.3457,  -4.2828,   2.2673,
        -10.3122,  -4.3168,  -2.5818,  -3.8719,  -7.9094,  -1.9841],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7051, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.7813, -2.0321,  3.0740, -8.5122, -0.5605, -2.4533, -1.2276, -3.9404,
         2.5606, -2.3851, -0.3939, -3.6446,  0.4357, -4.0025,  0.2559,  0.4540,
        -3.7267, -0.6895, -2.7929, -1.4213], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5892, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5473, -2.0358, -2.4350,  2.5143, -3.4549, -0.5532, -1.4720, -3.8869,
        -2.8963,  0.1288, -4.3887, -0.9677, -4.1786, -3.2738, -2.0044,  3.0219,
        -2.0308, -1.9992, -2.2997, -1.8575], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8308, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1168, -3.5766,  3.6580, -5.9161, -2.7803, -2.0952, -1.9977, -5.1572,
         0.7860,  1.6112, -2.2213, -0.8523, -0.1851, -3.1083,  0.1411,  3.7358,
        -2.5667, -1.5314, -1.1603, -1.9131], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3123, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3995,   4.2639,  -2.1285,   0.0910,  -8.2861,  -4.1104, -21.3479,
         -0.4548,  -4.2958,   4.0322,  -6.6998,  -3.0324,  -9.5738,  -6.6363,
         -4.6477,   0.3484, -13.3631,  -3.2033,  -5.5593,  -4.5931],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6298, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.9468, -8.0872, -6.9558, -1.5979, -8.3273,  2.9808, -8.2296, -3.1913,
        -3.4938, -4.7859, -2.1896,  2.0732, -5.5532, -5.2211, -3.5066, -5.2110,
        -4.1942, -1.4904, -1.6197, -5.2653], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0906, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9650, -6.7634, -1.1091, -0.2681, -2.5054, -4.4657, -2.7606, -2.7363,
        -5.3523, -0.8792,  2.1783, -2.5679, -0.0455, -2.6926, -0.6009, -4.2544,
         1.7231,  0.7585, -3.1229,  0.2827], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9573, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.3828, -4.1211,  0.8776,  2.5259, -3.5682, -0.3148, -0.9161, -3.6546,
        -0.7155,  2.0591, -4.5565, -0.2915, -0.7713, -4.5669,  1.0865,  3.0122,
        -1.6904, -1.0656, -1.3870, -2.3132], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0377, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.1228,  -5.1557,  -1.9954,   2.8632,  -2.9841,  -2.1446, -40.1728,
         -3.7771,  -9.6871,  -0.1134, -12.4509,   3.7657,  -3.6706,  -6.6793,
         -4.9629,  -5.5158,  -2.9190,  -4.0763,   2.7965,  -2.7611],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1882, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5217, -1.7862,  3.3873, -3.3459, -0.5567, -1.8956, -4.3881, -3.0663,
         0.7223, -5.9734, -2.9792, -3.4778, -1.7514, -5.0010, -4.2703, -7.2247,
        -4.1454, -4.0481, -3.1616, -6.3443], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0914, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6585, -0.2458,  3.0328, -1.7662, -0.3011, -2.6101, -4.6268, -2.6566,
         2.3094, -3.0446, -0.5997, -2.4301, -3.4974, -1.0750,  3.4691, -2.1155,
        -1.0101, -3.5210, -0.4401, -5.9078], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4848, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4837, -3.0679, -1.2201, -3.0208,  0.5977,  2.3777, -1.8922,  0.4540,
        -2.1531, -2.5345, -6.2295,  0.3955, -0.4739, -2.7789, -0.2992, -1.0456,
        -3.3440, -0.0259,  2.5708, -3.8682], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4021, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1502, -4.4723,  1.7369,  2.2592, -3.0018, -1.5320, -1.8320, -2.2108,
        -5.5023,  1.0768, -1.0660, -2.9082,  0.3365, -0.9162, -1.5050,  0.3508,
         3.4057, -4.4547, -0.1243, -2.1171], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2314, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  3.5868, -16.1998,  -1.3773,  -2.4041,  -4.8074,  -2.5994,   0.0998,
         -3.5061,  -2.6184, -13.4997,  -5.7383,  -8.6470,  -5.6638,  -6.9477,
         -3.0801,  -2.2610,   0.6346,  -2.4839,  -2.7926,  -6.1663],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3236, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5084,  1.7774, -2.0664, -0.5958, -2.3883, -2.2579,  1.1455,  3.0365,
        -5.1834, -0.5039, -3.8998, -1.9257, -6.3304,  0.5435,  1.4254, -2.6654,
         0.1240, -3.0056, -2.8349,  1.3296], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5892, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2135,  -0.4684,  -5.5453,  -1.9926, -13.6905,  -6.9231,  -4.0292,
         -6.4479,  -2.1738,  -1.4488,   1.8611,  -4.4279,  -1.1166,  -0.9888,
         -1.7933,  -0.7637,   2.4032,  -2.0097,  -0.7618, -14.5886],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4060, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.7113, -1.1878, -1.8989, -2.8305,  0.9213,  2.7211, -4.3498, -2.6928,
        -8.4544, -5.0141, -3.6173, -5.2461, -3.8237, -1.3386,  3.3569, -9.4437,
        -2.2117, -1.7131, -5.0188, -1.5421], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7048, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.6333,  -2.6749,  -7.7526,  -3.6402,  -3.3613,  -4.6416,  -4.3825,
         -1.3900,  -0.2116,  -7.1945,  -3.2673,  -3.3264, -11.5519,  -2.7513,
          3.3445,  -0.8249,  -3.6327,  -5.4944,  -5.8783,  -8.1591],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8712, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.4771,  -2.8734,  -0.5981,  -2.8764,  -4.5198,  -2.7290, -38.8367,
         -5.4798,  -3.1558,  -2.6158,  -2.8437,  -2.4559,   1.1420,  -4.4753,
         -1.9762,  -2.9507,  -1.5495,  -6.4187,   0.9128,   1.1581],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1332, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8277, -6.5522, -6.3304, -6.5079, -8.0865, -2.9718, -9.2582,  1.0731,
        -3.0894, -4.4679, -2.7750, -0.6336, -4.2676, -1.2206, -0.7196, -1.4268,
        -2.4141, -2.2113, -3.7974, -1.6681], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5077, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0273, -0.1024, -4.9366, -2.1302,  1.1680, -1.3466, -0.6733, -2.4488,
        -0.5038, -4.3933,  1.4116,  1.4072, -2.2895, -1.2851, -2.1026, -1.0452,
        -3.6071,  2.7678, -1.9097, -0.1017], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2574, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.0385,  -1.3114,  -3.5514,  -1.7010,   1.4205,  -3.1373,  -1.1797,
        -10.1554,  -4.5325,  -8.1663,  -0.1265,  -1.8077,   1.4273, -10.1973,
         -1.3637,  -0.8305,  -4.9890,  -1.4778,   2.5612,  -3.2996],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7228, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9894, -3.1922, -3.2512, -0.4917,  1.5499, -1.9158, -0.1230, -2.1241,
        -0.1551, -0.2435,  3.4017, -4.7756, -2.5572, -3.8339, -2.3077, -9.6418,
         0.1763,  0.3604, -2.8287, -1.1747], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8059, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7317, -7.5316, -5.9403, -1.2010, -3.7648,  1.7595, -3.9795, -3.4409,
        -2.1821, -4.4730, -1.9007,  1.8230, -2.7584, -0.5991, -3.1453, -0.8318,
        -4.9863,  3.7361, -1.3862, -2.0787], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5306, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.4534,  -0.4155, -17.8514,  -2.2954,  -2.4537, -21.9783, -20.2948,
         -7.0960,  -6.3539,  -5.1922,  -6.1526,  -6.2299,  -7.8821,  -2.8599,
        -13.8450,  -0.5181,  -1.7531,  -6.0766,  -3.4173,  -2.6419],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.1880, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9014, -6.3260,  0.6040,  2.1677, -2.6627, -1.4011, -0.9960, -3.1968,
         0.1448,  1.3063, -2.8082, -0.9086, -3.9277, -2.9577, -0.4391,  1.1839,
        -1.3101, -0.5323, -0.8296, -2.2664], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5028, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.3705, -1.9350, -0.1996, -1.3701,  3.9306, -1.5393, -0.9102, -1.8537,
        -3.0442,  0.3307,  3.1527, -1.9649, -1.1618, -1.0989, -5.1701,  0.1473,
         2.9355, -6.1739, -0.0267, -1.7231], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9023, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.8890,  -0.3306,  -2.8153,  -3.9535,   0.6296,   3.1538,  -4.7734,
         -2.4854,  -1.8801,  -7.7296,  -1.9009,   0.5291,  -4.5703,  -2.8256,
        -31.7601,  -7.9685,  -8.3899,  -5.1717,  -0.9170,  -2.1982],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5123, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4569e+00, -3.9171e+00,  3.1226e+00, -3.1739e+00, -1.7260e+00,
         3.1570e-02, -6.0775e+00, -7.3983e-02, -3.6515e-01, -2.4226e+00,
         3.8319e-01, -3.4871e+01, -6.8632e+00, -9.3734e+00, -5.2689e+00,
        -5.7241e+00, -2.9670e+00, -5.7128e+00,  8.6931e-02,  4.0164e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1677, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4591, -2.3910, -1.0454, -6.8211, -9.3411,  1.0644, -1.2368, -0.0221,
        -1.7738, -2.8128, -1.4698,  3.7716, -3.7104, -1.4607, -1.2711, -0.4889,
        -5.6773, -0.1047, -0.1525, -3.7959], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0099, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.9936, -20.3876,  -4.8855,  -0.2966,  -2.7465,  -0.8427,  -1.3296,
         -4.1075,   0.5941,  -1.4788,  -5.3503,  -0.6485,  -2.2629,  -0.2515,
         -1.1986,  -0.0474,  -3.3358,  -1.4848,  -2.8695,  -3.2318],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9078, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.9735,  -2.6313,  -0.8070,   3.6328,  -5.8945,  -2.6568,  -4.6367,
         -1.9249, -18.1937,  -2.5796,  -0.9436,  -3.8045,  -1.3004,  -1.2701,
         -4.1004,  -2.8672,   3.3521,  -3.4092,  -0.7914,  -1.0061],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9403, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.3926,  -4.6409, -10.8005,  -1.1321,  -4.3588,  -2.3560,  -2.4112,
         -3.8237,  -4.0452,   1.0237,  -3.6617,  -2.4375,  -3.8093,  -4.1594,
         -2.9118,   0.6262,  -3.0771,  -1.7154,  -2.3098,  -3.8218],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2607, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.1807,  -2.5052,  -1.9713, -14.6953,  -4.0230,  -7.2441,  -1.8902,
         -2.5203,   2.2121,  -2.5140,  -2.4237,  -2.8581,  -7.4058,  -4.1232,
          1.4812,  -5.1398,  -0.9431,  -9.0316,  -4.7332,  -4.3905],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7269, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2982, -0.8135, -2.9369,  0.2446,  3.8722, -6.4555, -0.9953, -1.9053,
        -1.5384, -6.7370,  3.7454, -2.5534, -1.1305, -1.0842, -0.9266, -6.1262,
         3.0971, -3.1786, -3.4540, -3.0032], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6589, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1101, -2.8552, -4.6915, -5.1138, -0.0612, -2.0170, -4.7120, -2.2303,
        -1.1726, -2.4686, -3.1268,  3.0775, -5.0957, -2.2639, -1.6148, -1.8051,
        -4.6483, -0.8156,  1.6694, -3.6931], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4374, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-12.1094,   1.1570, -10.5069,  -2.6710,  -0.3501,  -3.8174,   1.6104,
          2.9819,  -2.7181,  -0.1970,  -0.8718,  -0.7619,   0.1205,   3.5199,
         -2.8204,  -2.7179,  -3.0263,   0.2683,  -2.3750,   3.6032],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.5841, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.5189,  -0.9153,  -3.2897,  -1.6010,  -4.4482,  -0.2228,  -5.1435,
          0.7557,   1.5679,  -2.2630,  -2.0477,  -0.0778,  -1.9179,   0.6036,
          3.1754,  -2.7053,  -2.6018, -20.6201,  -4.7020,  -8.9869],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7461, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3442,  1.6800, -4.9881, -1.8575, -4.5357, -4.8092, -2.3690,  1.6449,
        -4.4600, -3.3190, -2.7047, -3.1843, -3.3705,  2.4702, -3.1707, -2.8587,
        -2.9268, -4.1536, -4.4331,  1.9414], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4374, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5747,  0.1955,  3.0371, -1.9355,  0.0746, -1.5764, -1.0717, -3.0813,
         0.9193, -4.0641, -2.7005, -3.3189, -3.8647, -4.7461, -2.7561,  0.5042,
        -7.9214, -3.1613, -1.1262, -6.3893], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3779, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4654, -1.7109, -2.8794, -1.4278, -1.8929,  3.5750, -2.1771, -1.2448,
        -3.7551, -2.4672, -5.0753, -0.7814,  0.6063, -8.5400, -1.0587, -1.8217,
        -2.4377, -3.3426,  1.3615, -2.1218], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0328, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.4903,   2.5418,  -5.5261,  -2.7657, -39.1450,  -6.0918, -14.8475,
         -0.9245, -14.9814,   4.2540,  -6.5348,  -5.8548,  -4.4885, -19.5798,
         -5.6074,  -5.6984,  -1.4944,  -1.0982,  -0.3875,  -2.9190],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.6320, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2579,  3.3590, -6.1273,  0.1795, -3.9410, -4.5175, -1.7849, -0.5809,
        -9.1653, -1.4961, -3.0681, -0.2726, -2.0608,  2.0397, -5.0689, -0.0566,
        -1.4162, -0.5715, -4.2860,  3.7271], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0183, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2254,  0.5908, -8.5949, -3.2383, -2.0382, -1.7208, -4.9922,  0.5436,
         2.6932, -3.5773,  0.4129, -1.5079, -1.2777, -0.7851,  3.2676, -2.1943,
        -1.1920, -4.5067, -4.8523, -3.7866], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8991, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.2478, -1.6851, -4.0045, -0.7539, -1.9014, -3.6834,  1.6183,  1.7401,
        -5.5708, -2.5451, -0.7517, -5.4884, -0.8065,  2.3032, -2.3793, -1.9006,
        -3.2004, -1.3993, -8.2744, -4.4373], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1436, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.1048,  -3.5220,  -2.5772,  -0.4245,  -3.8007,  -6.5806,  -0.6799,
          1.7667,  -3.7842,  -1.8864,  -1.0199,  -5.3111,  -0.6267,   2.5503,
         -5.8689,  -4.3475, -10.0470,  -6.3384,  -3.8524,  -5.2493],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0247, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7188, -3.1548, -2.5675, -4.2125,  0.0253, -0.3270, -1.3148,  0.3923,
        -0.9642, -4.8398, -0.8241, -0.6462, -4.3094, -0.6789, -4.3402, -1.0387,
        -7.0227, -3.2625,  1.1545, -1.6551], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0653, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.4281,  -0.2181,  -5.0670,  -1.1808,  -3.4866,  -4.3221,  -2.7173,
          0.3448,  -8.6576,  -5.6939, -14.6023,  -3.3300,  -8.7610,  -6.2624,
         -3.7696,  -6.8619,  -2.1254,  -4.0797,  -6.2369,  -0.1713],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3386, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8443, -1.1537, -2.2214, -1.2910,  3.4252, -6.5315, -0.6779, -2.8419,
        -1.9429, -1.4373,  2.8922, -6.1288, -2.6592, -3.8900, -1.8308, -5.2838,
        -0.8270, -0.7324, -4.6191, -0.4163], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9506, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4284, -1.2308, -4.2336, -2.9233, -0.6673, -3.2903,  0.5002, -1.9652,
        -3.4742, -0.7056,  2.6036, -1.8596, -0.1714, -2.8594, -0.4734, -2.2773,
         0.4714, -2.5080, -2.7373, -2.9570], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8093, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3131, -1.6380, -4.7596, -0.3446,  3.3546, -3.5406,  0.8061, -2.5071,
        -3.5730,  1.7188,  3.3961, -1.1456, -0.6177, -2.2551,  0.2325, -4.3889,
         1.3456, -2.2114, -5.3464, -6.0654], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4426, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3920, -1.5209, -1.2213, -1.6899, -3.9498, -0.7627, -1.6786, -3.1868,
         0.4973, -3.5987, -2.1703, -6.5990,  2.0767, -0.7674, -1.8866, -0.8588,
        -3.6126, -0.8313,  2.2365, -5.2833], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9600, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.6397,  -5.0414,  -2.1784,  -0.3564, -15.5355,  -4.9827,  -2.7971,
         -6.5048,  -3.2939,  -0.8479,   2.5871, -11.2890,  -3.1804,  -2.9676,
         -6.9096,  -2.1818,   2.1532,  -3.3830,  -0.4216, -10.1879],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8979, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.0193,  -9.6866,   1.8314,  -0.9583,  -3.0140,  -0.6600,  -1.8791,
         -4.0090,  -1.9733,   3.3704,  -2.7583,  -1.7585, -14.1662,  -6.0472,
         -3.0304,  -4.8184,  -0.7030,  -7.9384,   3.5034,  -3.8718],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9293, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.6560, -3.3402, -0.2798, -9.5290, -4.7365, -8.4204, -6.6747, -6.8790,
        -6.4846, -1.3247, -0.1722, -1.6217, -7.8400, -2.1867, -0.7672, -3.1082,
        -0.2957,  2.2346, -3.4223, -1.8750], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3034, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.8559,   2.4020,  -2.2130,  -1.5984, -19.0266,  -3.3578,  -7.6193,
         -4.7762,  -3.6756,   3.8102,  -3.3376,  -3.5578,  -5.5833,  -4.0671,
         -4.7803,  -4.6961,  -1.4136,  -4.8998,  -4.9103,  -2.0046],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9081, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.9492,  -3.1006,  -2.6988,  -4.2063,  -4.5719,   1.2747,  -1.0421,
         -4.9783,  -1.9251,  -4.1742,  -1.1205,  -4.5618,  -1.3134,  -0.4515,
         -4.6592,  -3.1433, -31.3065,  -3.8635,  -7.0419,  -0.9648],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4899, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-21.0155, -27.2256,  -4.1858, -10.1649,  -5.6829,  -5.2838,  -2.2846,
         -1.0962,  -0.2810,  -4.9468,   0.0946,  -1.3106,  -3.1876, -12.0342,
          3.4891,  -8.4998,  -1.8357,  -2.8186,  -1.4060,  -4.9463],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7311, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.1632,  2.9506, -2.4526, -0.7364, -0.5492, -3.5253,  1.4868,  3.3055,
        -2.1179,  0.9556, -1.7334, -1.0801, -6.4086,  1.0503, -1.4896, -2.1820,
        -0.0568, -1.3588, -2.1959, -8.8686], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1922, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0018, -1.8564,  0.2070,  3.4089, -2.1470,  0.0212, -2.8551, -0.4924,
        -5.7551,  1.0557, -1.2263, -3.4225, -1.3733, -3.0786, -2.9765, -2.5396,
         1.1980,  0.7465, -2.3286,  0.0308], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2693, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.8560,  -1.4503,   3.0889, -12.5789,  -0.9565,  -2.8222,  -2.9762,
         -5.0495,   3.2249,  -5.1635,  -2.7176,  -0.5455,  -3.1306,  -4.3336,
         -0.3659,  -1.0834,  -2.7271,  -0.2594,  -1.1397,   0.0133],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1914, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.2347,  -0.1115,  -1.6607,  -1.0411,  -1.0834,   0.3705,  -4.5847,
          1.5710,  -9.5175,  -1.1050,  -1.2072,  -6.7868,  -2.3637,   0.1838,
         -3.8928,  -1.3700, -16.8541,  -7.0032,  -4.3448,  -5.2493],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3143, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4235,  0.0425,  2.6868, -2.0671,  0.0729, -1.6064, -3.2460,  0.5798,
         2.4314, -1.7612, -2.3100, -3.1442, -4.0082, -3.9184, -1.2365, -0.1648,
        -2.3122,  0.0441, -2.1679, -0.1239], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2816, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0830,  0.4584, -1.9735, -1.2873, -0.7882, -2.8506,  0.4048,  3.2234,
        -2.7350, -0.2890, -2.0182, -0.9285, -6.5266,  0.3115,  0.9642, -2.4346,
        -2.2273, -0.3199, -1.8803, -0.9340], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1457, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.2299,  -4.1836,  -5.9212,  -0.8321,  -1.9716,   1.3508,  -4.1131,
         -2.7505,  -3.5659,  -5.1277,  -5.6553, -13.0014,  -4.5262,  -8.3112,
         -2.5491, -18.8285,  -9.2195,  -5.6035,  -5.1325, -11.9250],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9048, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4667, -2.3761, -0.5444, -5.2004,  2.0828, -3.7291, -2.0004, -3.0512,
        -2.1802, -8.1543, -0.2737,  0.0902, -3.7859, -0.3223, -1.2221, -3.2239,
        -0.1400, -8.3128, -3.2290, -1.0469], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4043, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4445, -1.2743,  1.6068, -4.5890,  0.4877, -0.8991, -2.0479, -0.6085,
         1.3126, -3.3340, -1.1706, -1.8692, -0.6579, -4.8002, -1.1571,  1.3773,
        -3.9331, -0.7060, -2.0243, -0.9531], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3342, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0498e+00, -7.7375e-01, -8.6691e-01,  4.0713e-01, -4.7976e+00,
         3.1041e+00, -3.1946e+00, -2.1009e+00, -1.8009e+00, -4.6731e+00,
        -3.5198e+00,  6.6260e-01,  2.0456e+00, -3.9747e+00,  6.3016e-01,
        -1.1699e+00, -1.5649e-03, -9.5550e-01,  2.3632e+00, -6.1083e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.3887, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4920, -5.3086,  0.3711,  0.4671, -3.6680, -2.5241, -3.0584, -0.6632,
        -6.3974, -2.2159, -0.1932, -3.8832, -2.3602, -4.3561, -0.9791, -3.9037,
        -0.5733,  1.6770, -3.7463, -0.1966], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2002, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.2864, -3.8550,  0.2905, -4.9496,  0.6148,  2.0698, -3.4465, -2.3207,
        -2.5489, -0.3642, -4.1734,  3.6168, -1.2613, -2.5843, -3.5390, -3.8409,
        -5.1291,  0.1589, -6.4841, -1.0512], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9542, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3523, -2.3158, -1.1308, -7.2607, -0.8321, -4.5276, -3.0688, -2.3020,
        -2.1061, -2.0443, -8.0905, -1.1944, -0.2868, -7.4740, -1.5468, -1.8198,
        -3.9951, -1.4715,  1.6149, -2.2978], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6751, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1730,  -0.5023,  -2.0641,  -2.8680,   0.6045,   2.5837,  -3.2104,
         -0.4232,  -1.8008,  -1.5878,  -4.0312,   4.2122, -11.9952,  -2.5414,
         -1.4401,  -2.5707,  -6.1468,  -1.0128,   1.3438,  -2.7711],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.9697, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9709, -1.6722,  0.2037,  0.6239, -3.3975, -0.7936, -3.5260, -0.2415,
        -5.1127,  0.9724,  1.7978, -3.2383, -1.0435, -1.8301, -3.4847, -1.3193,
        -8.3841, -3.6461, -0.3269, -2.0548], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0222, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.2074,  -4.3713,  -0.7729,  -8.8443,   1.5630,  -0.6928,  -5.2969,
         -1.4245,  -1.6579,   0.1841,  -5.2774,   1.9117,  -1.9219,  -3.3437,
         -4.8068, -31.6165,  -3.4725,  -6.2461,  -1.3881,   0.5408],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9071, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0792,  2.8525, -4.0301, -1.8160, -3.1479, -7.8211, -5.2309, -0.0751,
        -2.0704, -0.8146, -2.1331, -2.0995, -6.7162, -1.5815,  1.1775, -5.1978,
        -2.5966, -1.2135, -2.6156, -2.0767], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5643, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8918, -3.7151, -1.2648, -9.4190, -2.4846, -0.8510, -3.3218, -1.0246,
        -1.0848, -4.5570,  0.5794,  1.5356, -5.6978, -0.2175, -3.1408, -0.7686,
        -6.9331,  3.7393, -3.2183, -0.0995], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1418, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  3.5721,  -0.5534,  -0.9545,  -5.4440,  -0.8480, -14.7779,  -4.2483,
         -1.7868,  -8.2370,  -1.6056,  -1.6600,  -4.9184,  -2.3467,   1.4632,
         -4.7998,  -2.8732,  -4.3724,  -2.5221,  -1.0041,   2.2446],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7836, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9878, -7.7704, -6.8515, -3.3594, -5.8614, -2.2452, -1.5105,  3.3358,
        -4.0073, -1.1101, -0.5690, -1.1923, -5.6305,  0.2298, -2.1012, -3.3276,
         0.0332, -1.0024, -1.6133,  0.5204], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2510, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.3295,  -5.9105,   0.1664,  -0.2123, -10.9363,  -1.2448,  -2.7958,
         -1.0220,  -3.7784,  -0.1099,   2.8866,  -2.9720,  -1.6802,  -2.4987,
          0.0682,  -7.6006,   0.5991,  -1.4182,  -4.6444,  -1.3821],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2908, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.6595, -2.5240,  4.0307, -1.9677, -1.8975, -1.8878, -4.6986, -0.4269,
         2.9912, -2.7392, -0.7941, -0.5710, -4.8679, -1.2217,  2.5758, -1.9025,
        -0.4173, -2.9505, -2.2960, -5.0830], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3654, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1127, -4.3733,  0.0802, -5.0160, -0.9489,  1.4722, -4.9544, -1.2334,
        -3.0663, -2.7396, -5.3395,  1.4344, -2.8824, -4.5494, -2.8037, -2.6096,
        -2.1167,  0.1173,  2.7749, -4.2957], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1081, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.6886,  -8.8979,  -6.1601,  -5.5836,  -2.9640,  -0.6487,   1.1353,
        -11.1805,  -6.2319,  -6.1928,  -2.3791,  -6.5050,  -0.1912,   0.2548,
         -2.9654,  -1.0926,  -2.4705,  -2.3159,   0.1624,   2.8029],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4056, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.6756,  -0.1988,  -5.4261,   2.5711,  -2.9371,  -0.5626,  -1.4277,
         -1.8881,  -3.1103,   2.9722,  -3.5634,  -3.0613,  -1.6133,  -1.1225,
        -14.3953,  -2.9444,  -2.0883,  -2.7751,  -1.7211,  -0.1649],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5066, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8316, -1.8401, -1.3746, -4.5412,  0.6876,  2.1192, -1.7959, -1.3575,
        -0.9128, -4.3630, -0.5713,  1.7897, -6.2289, -1.3932, -6.4507, -0.6144,
        -7.7420, -2.6951,  0.7090, -2.8326], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0620, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.5584, -12.3525,  -6.2874,  -3.9847,  -5.8850,   1.0977,   0.7956,
         -4.2326,  -3.6489,  -3.7162,  -4.8267,  -3.2040,   1.9264,  -2.5348,
         -3.0677,  -3.4963,  -1.0647,  -2.5333,   3.1716,  -3.2090],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0806, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4667, -7.1492, -1.2243, -5.6560, -2.9220,  1.8736, -4.2545, -0.7121,
        -0.2455, -5.9749, -0.5101,  0.0886, -2.6565, -0.1458, -0.6341, -2.6764,
         0.4111,  3.2366, -4.1285, -1.7024], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9225, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.9840, -0.9797, -3.4346, -0.8625, -1.5590, -3.0282, -0.0667,  3.7274,
        -4.6835,  0.5305, -2.6930, -0.3694, -4.5168,  0.6020, -0.9232, -2.6755,
        -1.6748, -0.8973,  0.1795, -5.8360], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3588, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.6582,  -4.3306,  -6.4937,  -6.2640,  -7.4085,  -1.0377,  -3.9161,
          0.8608,  -4.6038,  -3.0263,  -3.3893,  -2.4361,  -6.6294,  -3.0998,
        -11.6359,  -4.2264,  -8.9567,  -7.2091,  -6.2714,  -6.7766],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2754, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8137, -5.4817, -4.2258, -1.8968, -5.0944, -3.6833, -1.0486, -0.4101,
        -3.1836,  3.2751, -5.2849, -2.1957, -3.8201, -2.9225,  0.0320,  3.0416,
        -2.9389, -1.2273, -4.4866, -4.7270], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5546, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.1651,  2.7701, -3.2140, -2.4565, -3.5452, -1.8162, -2.5158,  2.6849,
        -3.6327, -4.9147, -1.1298, -0.5076, -3.1937,  0.9567,  3.3935, -5.7448,
        -1.7446, -2.5074, -2.7070, -0.0518], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4856, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.2537,  -3.5270, -10.5322,  -3.7996,  -3.5985,  -0.9711,  -4.4413,
         -4.3387,  -2.6379,  -0.7383,  -5.7144,   0.2759,   0.8412,  -5.6513,
         -2.1827,  -2.9044,  -1.8551,  -4.1431,  -2.3133,  -0.1931],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4839, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1857e-01, -4.1234e+00, -8.3215e-01, -5.9987e+00,  5.3663e-01,
        -1.3919e-03, -1.1587e+01, -3.2705e+00, -2.9831e+00, -3.1491e+00,
        -3.4263e+00,  1.6720e+00,  1.1753e-01, -3.9473e+00,  2.6367e-01,
        -1.7201e+00, -6.5082e+00,  2.3018e-01,  3.0981e-01, -1.8577e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3197, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.9678, -1.3987, -0.2769, -1.2811, -2.2041,  1.1352,  1.4191, -3.2894,
        -0.4239, -2.8740, -0.9896, -6.7520,  0.9329,  1.2138, -3.3661,  0.0579,
        -1.8704, -0.4750, -3.2640,  4.3517], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9193, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.7425, -1.7507, -0.0612, -1.4435, -1.7380,  0.4282,  2.9911, -4.2917,
         0.2535, -2.5543, -1.8803, -3.4931, -0.4781,  1.2730, -3.2860, -2.2459,
        -1.6102, -5.1987, -1.9155,  3.0431], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1608, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9583,  -5.0678,   2.5012,   0.7279,  -4.9612,   0.1320,  -2.0034,
         -3.2041,  -0.8228,   4.1023,  -2.1494,  -0.6338, -12.1052,  -6.0441,
         -2.8038,  -6.6079,  -0.5929,  -0.5408,   0.6821,  -6.1315],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.4241, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.4077,  -3.0813,  -1.1838,  -1.8803,  -4.3833,  -2.4567,   2.2562,
         -3.6450,  -2.5883,  -2.3675,  -0.6211, -10.0558,   1.0995,   0.3092,
         -2.4605,  -0.6951,  -4.9432,  -2.7706,  -6.5898,   2.5636],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3451, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.0000,  3.4024, -3.9895, -1.9901, -4.1727, -0.0628, -3.9176,  1.6303,
         0.8672, -3.4248, -0.4293, -2.3332, -0.5626, -4.2648,  1.3460,  2.1456,
        -2.4355, -1.3596, -2.2344,  0.5837], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0101, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.0465,   3.2511,  -3.1475,  -1.8238, -13.2954,  -7.2447,  -4.4035,
         -5.6868,  -3.5021,  -5.1018,  -3.2584,   1.4410,  -4.1085,  -1.0468,
         -2.0830,  -5.2904,  -1.1806,   1.9224,  -3.4846,  -0.0776],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9584, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4567, -2.4039, -2.1460,  3.8854, -1.6393, -1.5912, -2.5013, -0.2669,
        -7.0011, -1.4761, -0.2897, -2.0328, -0.0826, -1.9999, -2.1597,  0.9441,
         3.2142, -5.0452, -1.8170, -4.4045], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5135, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7009, -2.6265, -2.2120,  1.7239, -3.1067, -1.2971, -1.8686, -1.8001,
        -3.4629,  2.9512, -4.8623, -1.3817, -2.3872, -1.8529, -5.8688,  2.8286,
        -8.4982, -0.9057, -1.7138, -3.2595], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2651, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7602, -3.6420, -2.6512,  3.7631, -3.6594, -0.6559, -2.6889, -0.7304,
        -5.3851,  1.5243, -0.7638, -3.7073, -0.2848, -2.7831, -1.1127, -5.3001,
         0.8884, -0.1772, -4.5683, -0.6449], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7170, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6597, -2.3058, -1.1082, -6.7331,  4.1379, -3.7093, -1.8507, -3.1067,
        -1.9134, -4.1431,  2.0355, -5.0621, -3.7582, -0.6143, -3.4142, -1.8736,
        -4.8909,  3.8833, -8.8785, -0.6779], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2821, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-13.5280,  -5.5350,  -9.5812,  -1.5649,  -2.4600, -11.4597,  -6.4645,
         -2.0892,  -1.8351,  -1.6832,  -3.8523,   0.4873,   2.4021,  -3.7115,
         -1.2053,  -2.7354,  -0.8498,  -3.1331,   4.4391,  -2.5666],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3463, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.9883,  -1.4723,  -2.4241,  -3.9558,  -1.0071,   0.8562,  -7.6153,
         -3.4246,  -6.2180,  -5.4544,  -2.2038,  -4.7071,  -2.9472,  -3.9312,
          0.4190, -18.0120,  -4.0777, -17.9978,  -9.7839, -18.8081],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9377, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2670,  0.1376, -4.8169,  1.4558,  1.5343, -3.1088,  0.4470, -2.4937,
        -4.0431,  0.5257,  2.6592, -5.1070, -0.4910, -2.5621, -1.9736, -4.9479,
         1.1009,  0.6881, -2.4523, -0.8097], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3262, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  3.6469, -17.7346,  -1.5280,  -0.7052,  -4.0954,  -1.7490,   2.9278,
         -2.2547,  -2.5829,  -6.5224,  -5.9492,  -2.3699,  -5.8180,  -1.5195,
         -1.4794,   2.1592,  -3.4884,   0.0827,  -3.4310,  -4.0354],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8223, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.5408,  -4.1532,  -3.1824,   0.6517,  -7.0267,  -1.2816,  -6.5499,
         -4.1471, -17.6798,  -0.6939,  -3.7375,   0.8370,  -4.8170,  -7.0193,
         -5.0068,  -4.9222,  -4.2029,  -2.7647,  -5.1721,  -2.3998],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2405, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.9723, -2.7486, -2.8646,  0.8531, -2.4949, -3.0202, -5.7010,  3.1040,
        -1.8718, -1.8655, -1.3206, -1.5689,  0.2440,  3.3111, -3.3044, -1.9971,
        -3.4195, -1.9446, -7.8365, -0.8199], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6647, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.1563, -2.1643, -0.1939, -2.8409, -1.0640, -4.1520, -0.4680,  1.4966,
        -2.5626, -5.4915, -1.4878, -0.8969, -0.2638,  3.9014, -1.6002, -1.0311,
        -0.3852, -2.8141,  0.5383,  4.0570], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7133, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.7096,  -1.3417,  -0.7607,  -0.9106,  -2.5664,  -0.1987,   2.0197,
         -2.1597,  -0.9514, -13.9962,  -8.0344,  -5.0221,  -6.0977,  -1.7011,
          0.2335,   0.3096,  -1.9068,  -1.1146,  -1.3375,  -3.1289],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3978, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0177, -4.0335,  0.8460, -1.4152, -6.2146, -0.8830, -3.4006, -2.4346,
         0.7475,  3.1045, -3.3141, -1.2913, -2.6710, -1.8074, -6.3423,  0.3504,
         0.0394, -2.0910,  0.0852, -0.7705], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7257, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5250, -2.7173, -1.0954, -4.3845,  0.9730,  2.7993, -1.9578,  0.5525,
        -0.9351, -0.7821, -1.3891,  3.9034, -2.1905, -1.0697, -2.5946, -5.3743,
        -0.8188,  0.7722, -3.7360, -1.6613], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1616, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4777, -0.3312, -0.7910, -2.3863,  0.5664,  2.1565, -1.7315, -0.5765,
        -3.6086, -3.8850, -5.0453,  0.9599,  0.7046, -4.1881,  0.1525, -1.5701,
        -3.3092,  0.5740,  3.2642, -3.7311], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2627, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2517, -4.2215, -5.8289,  1.6086, -4.1218, -3.6655, -4.3451, -0.9854,
        -5.4006, -2.7919,  2.1473, -2.6418, -0.3642, -0.4126, -1.9495,  0.5934,
         3.1097, -1.7541, -2.2868,  0.0116], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8275, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.7610,  -3.3994,  -1.7791,   3.4202,  -3.3578,  -0.4744,  -2.9315,
         -1.5378,  -4.0647,   1.3838,   1.9793,  -2.5580,  -0.7314,  -4.1621,
          0.4480,  -1.7915,   3.6435,  -2.5959,  -0.4207, -18.9023],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1796, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.1135, -3.4993, -0.1513, -6.5562,  4.7256, -3.9894, -0.8504, -2.4440,
        -1.8725, -8.2396, -0.8677, -0.3422, -3.7345, -0.4270, -2.3652, -0.4839,
        -3.8876,  3.1170, -4.5709, -0.5477], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8437, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.0075, -7.6563, -1.3047, -1.6030, -1.4217, -0.7944,  3.1043, -5.6357,
         0.8487, -1.6796, -4.3241, -0.8575,  3.3295, -3.7077, -0.5145, -2.8231,
        -1.5387, -4.8240,  1.0354, -0.6929], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4026, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  3.9774,  -2.8835,  -1.4634,  -3.1785,   0.7363,  -5.6999,   4.1197,
        -13.8616,  -1.9027,  -3.7434,  -2.2705,  -2.1451,   1.0986,   2.5326,
         -2.3364,  -0.9717,  -1.2771,  -4.3822,  -1.0761,   3.5247],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.5602, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3834,  1.0550,  1.7334, -1.5941, -1.0718, -1.8383, -3.1916,  0.4906,
         2.9899, -1.5113, -0.7507, -2.3120, -2.0891, -1.6560,  1.4554, -1.7966,
         0.5902, -1.0287, -2.4516,  0.4231], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8969, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5228, -1.2864, -2.3403, -5.0841, -0.7099,  2.5247, -4.3632, -1.4084,
        -2.2760, -4.1468, -1.9920,  2.3529, -5.7985, -1.5014, -2.8662, -2.0692,
        -4.2374, -0.4081,  1.1654, -2.4732], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0220, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.0491,  -0.5535,  -1.1204,  -3.4803,  -2.2811,  -0.3576,  -5.5713,
         -0.8313,   3.9774,  -2.8835,  -1.4634,  -3.1785,   0.7363,  -5.6999,
          4.1197, -13.8616,  -1.9027,  -3.7434,  -2.2705,  -2.1451],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1780, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1399,  -6.3125,  -0.7500,   1.9986,  -6.7541,  -3.5425,  -9.5236,
         -5.4496,  -4.8554,  -6.8784,  -2.0431, -10.8836,   3.1581,  -1.8920,
         -4.6987,  -3.4083,  -2.7863,  -2.7179,  -7.5925,   0.4088],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8331, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.3936,  1.4840, -2.2653, -0.9335, -2.2653, -2.7913, -5.7956,  1.2575,
        -1.7821, -5.1223, -1.2585, -4.3063, -1.5347, -5.2784,  0.1989,  1.6295,
        -2.7718,  0.0619, -0.6174, -5.0172], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7857, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.4461,  -2.9265,  -0.0464,   3.4164,  -6.8470,  -3.7254, -24.0149,
         -6.5399,  -7.0793,  -6.3961,  -3.4274,  -3.4219,   0.9348,   2.7269,
         -5.7560,  -2.0411,  -2.0241,  -0.5356,  -4.9449,   3.5574],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6769, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.6152,  -6.2965,   0.8403,  -5.5266,  -4.8063, -12.0968,  -4.8738,
         -9.0205,  -3.3574,  -3.7024,   1.0241,  -2.5138,  -3.0953,  -1.8915,
         -3.9973,  -6.0873,  -4.0292,  -2.9703,  -4.8134,  -3.7827],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3806, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0852,  4.4712, -2.5037, -0.0119, -2.4688, -0.4879, -6.2984,  3.0239,
        -1.3858, -2.0263, -0.8989, -1.5184, -7.1872,  0.2197, -0.8189, -2.5114,
        -0.8904, -1.8343, -0.8495, -1.5869], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3325, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.9676,   3.6847,  -1.7079,  -0.5619,  -3.6543,  -1.8604,  -3.4523,
          1.2786,   1.4100,  -3.1039,   0.3750,  -1.6802,  -4.5913,  -1.2700,
         -2.6690,  -8.4340,  -3.1548, -12.7586,  -0.4573,  -7.2472],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5411, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.0247,  1.0483, -2.6167,  0.1984, -0.1071, -0.7607, -2.8018,  3.6786,
        -3.6971, -1.4684, -2.7405,  0.3423, -6.2434,  1.1607,  1.3650, -2.7804,
        -1.1283, -3.4520, -6.1631, -0.4877], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3315, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9298,  1.4529, -4.5508, -1.7009, -4.8562, -2.0270, -4.1579, -1.9976,
         0.2973, -3.7515, -3.7540, -0.3618, -1.0728, -3.3269, -0.2916, -1.2046,
        -1.4562, -2.0810, -1.2840, -3.3438], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0699, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8914, -4.0520, -6.7206, -1.4035,  1.2029,  1.5514, -6.2404, -0.9848,
        -2.5770, -4.4677, -0.8425,  3.6539, -2.9254, -1.0350, -2.1867, -1.6408,
        -4.5188, -2.5101, -0.3257, -4.1266], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1520, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9201,  0.7965, -3.5481, -2.1980, -1.5617, -1.3265, -0.8768, -5.2875,
        -0.7294,  1.3653, -2.3716, -4.5773, -4.7315, -2.5343, -3.2003,  1.3349,
         1.8346, -2.9562, -0.7119, -1.6205], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6910, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3187, -0.0144, -2.2446, -3.6673, -2.1208,  2.5389, -3.5719, -2.2011,
        -8.7417, -6.2648, -4.2915, -6.2437, -2.9677,  0.2636,  2.8169, -6.4560,
        -5.6290, -1.0977, -0.3317, -3.9803], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8262, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.6464,  -1.1424,  -0.6989,  -7.7481,   1.8604,   0.4246,  -3.9247,
         -1.9635,  -0.9928,  -0.6190, -10.5955,   4.3901,  -2.0118,  -3.1593,
         -4.3186,  -7.3225,  -6.2913,  -8.7619,  -5.8746,  -7.2527],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3324, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.1440, -3.1352,  0.6768, -3.2455, -0.8614, -7.9168, -2.1870,  0.8600,
        -4.2545, -2.3273, -2.8475, -2.3542, -0.9839,  2.6737, -2.0059, -1.7337,
        -2.9650, -0.2952, -5.3117,  0.6210], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7225, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1252e+00, -1.3290e+01, -5.1057e+00, -5.3321e+00, -6.3624e+00,
        -1.0166e+00,  6.8651e-01,  3.7148e+00, -5.8082e+00,  1.1526e-03,
        -9.9266e-01, -2.1726e+00,  1.9148e-01,  4.4160e+00, -2.2311e+00,
        -2.4183e+00, -3.5571e+00, -3.8144e+00, -3.6394e+00, -8.0378e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8947, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8572, -2.2003, -2.5300, -1.9065, -5.1065, -1.2621, -1.1605, -3.2415,
        -1.4332, -1.6707, -2.0465, -0.6654,  3.5869, -3.3668, -5.5633, -2.5140,
        -2.9417, -5.3476, -8.0447, -0.3350], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4803, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3276e+00, -1.2681e+00, -2.2552e+00, -5.3538e+00, -9.9969e-01,
         1.9913e+00, -1.9678e+00,  1.1186e-02, -1.4446e+00, -3.9489e+00,
         9.1901e-01,  1.0857e+00, -3.4834e+00, -2.6278e+00, -2.1191e+00,
         6.6998e-02, -1.4190e+01, -1.3673e+00, -9.1579e-01, -1.3241e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2760, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4860, -0.2169, -1.3965, -0.9947, -0.9576,  3.2959, -2.5561,  0.0221,
        -1.7382, -2.8092, -1.0047,  2.8184, -4.3974, -0.3122, -2.3412, -1.3226,
        -5.4250, -1.4120, -0.3283, -2.8794], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3221, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8269, -1.8828,  1.6735, -2.3012, -1.1857, -2.5627, -0.8886, -3.2178,
         4.2447, -6.3442, -0.8717, -3.0078, -1.6742, -4.8173, -0.0125, -1.8837,
        -6.4702, -0.6846, -1.6141, -1.5844], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9956, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2345, -2.5341, -3.1240, -1.4762, -5.9309, -4.0185,  1.6053, -1.7544,
        -2.5383, -0.9861, -4.0462,  0.2555,  1.0528, -3.5828, -2.8362, -9.6954,
        -4.5253, -8.8498, -0.3969, -7.5624], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2089, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2302, -0.7638, -1.7684,  1.1617, -2.3285, -3.7113, -2.7444, -1.2698,
        -4.7445,  0.7065,  0.3141, -3.7154, -2.6795, -3.3279, -2.6079, -5.0575,
        -6.0327,  0.7015, -2.6968, -2.2568], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2526, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6196, -0.0877, -2.6211,  3.6672, -3.1840, -1.2275, -2.8190, -1.5895,
        -5.3444,  1.0013,  1.5374, -2.8678,  0.5367, -2.3358, -0.4504, -0.7962,
         4.5446, -3.3492, -1.3363, -1.5368], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1439, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.0770,  0.3182, -3.8720, -6.0566, -1.2047, -4.4500, -1.3263, -4.7574,
         1.4877, -1.5643, -3.3163, -0.4399, -0.7976, -2.2650,  0.9260,  3.4536,
        -2.1681,  0.3066, -4.3257, -0.6934], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9911, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9054, -1.7797,  0.1506,  2.7069, -4.6023, -6.2282, -4.5811, -1.7105,
        -5.8548, -1.8659,  2.0735, -4.0993, -1.2986, -0.5113, -1.2558, -4.2034,
         1.0764, -2.7478, -2.1337, -0.0918], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9431, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.5891,  -2.9631,  -3.7029,  -1.7769,  -3.4613,  -3.4072,  -0.0941,
        -14.2278,  -6.1204, -13.5824,  -2.6624,  -4.7233,  -1.1228,   0.1572,
         -2.9814,  -3.5798,  -1.1908,  -1.1494,  -4.3377,  -0.3302],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1423, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.6333, -1.5248, -2.3278,  4.5242, -4.6180, -1.4736, -3.3987, -3.8523,
        -2.3590,  0.2639,  3.3017, -2.4644, -0.0843, -2.4032, -1.7448, -3.8221,
        -4.3566,  1.1350, -0.8329, -0.0244], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6848, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1986,  0.2237, -0.9850, -4.5679,  1.3542,  2.8253, -2.8283, -1.2591,
        -2.6348, -1.7874, -1.7411,  0.6963, -2.1607, -2.4720, -1.3455, -3.3900,
        -0.2173,  1.6066, -4.0284, -1.7287], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3819, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.8954, -14.9446,   1.6117,  -4.1945,  -7.7786, -15.9011, -76.3282,
         -5.0002,  -9.6982,  -2.1104,  -4.1814,   0.8213,  -3.1664,  -2.4918,
         -3.1360,  -6.1011,  -3.1568,  -0.3853,  -6.5095,  -7.1216],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.5834, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5828, -3.0385, -1.6751, -3.5501, -4.7626, -1.1175, -0.4208, -3.0573,
        -3.5202, -2.3707, -1.5650,  0.4142,  3.8040, -2.1785, -0.7548, -2.2520,
        -3.3713, -7.8063, -2.1877, -0.1483], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1571, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  4.1870,  -3.2123,  -1.6851,  -3.1566,  -3.6785,  -9.1033,   1.5775,
         -7.5138,  -1.4398,  -2.5635,  -4.6314,   0.7216,   2.2239,  -4.7625,
         -1.4801, -16.6741,  -9.2771,  -8.2356,  -6.9088,  -3.3826],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9498, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.5730, -3.0431, -4.7487, -4.1823, -8.2849, -1.9964, -9.0146, -1.2972,
        -6.0897,  3.2202, -0.9354, -4.2733, -2.2574, -2.3144, -2.0784, -6.9500,
         3.4914, -1.4299, -0.6297, -1.4698], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5355, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4916, -6.4205, -5.2322, -2.9004, -5.9814, -1.5222, -2.8965,  1.5422,
        -4.8694, -0.6232, -3.2310, -6.0471, -1.4402,  0.7474, -5.1602, -1.5499,
        -3.6058,  0.0345, -6.2960, -7.0384], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2991, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5060, -0.4775, -1.3182, -2.6842, -0.7901,  3.6014, -6.8486, -1.5939,
        -3.4081, -7.1463, -0.8023,  2.8993, -3.9610, -1.9399, -9.1783, -6.4289,
        -2.5170, -6.0942, -1.2193, -1.4623], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7438, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0029, -0.5539, -4.7150,  1.6565, -1.3702, -3.2900, -0.3954, -2.1814,
        -0.3727, -2.0604,  2.1136, -3.2464, -1.4443, -4.7042, -3.4471, -3.2510,
        -1.2409,  1.0977, -3.4746, -2.0710], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7977, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.1662, -1.8232, -3.0449, -2.0595, -3.4250, -0.4465, -3.8903,  3.5766,
        -4.0317, -0.6367, -1.7117, -0.5996, -4.1425,  1.2908,  1.4537, -2.6966,
        -1.7818, -4.1498, -0.0698, -7.4324], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7727, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2330,  1.6746, -1.8081, -2.3704, -3.6347, -2.0006, -5.2995, -0.8146,
        -0.7587, -3.5976, -1.3921, -0.1253, -3.3880,  0.6093,  2.9450, -1.4720,
        -2.3109, -0.4978, -2.5575, -0.8096], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4921, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3421, -15.6540,  -6.2009,  -9.1124,  -5.7975,  -6.2938,  -2.1712,
         -9.2167,   2.4096,  -4.9700,  -1.5732,  -3.3376,  -4.9566,  -3.3606,
          1.5219,  -2.6194,  -2.9622,  -2.7229,  -2.1469,  -5.7860],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4146, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8468, -0.1863, -3.1181,  1.9149, -6.7466, -2.3050, -3.6822, -0.9081,
        -4.6623,  0.5443,  0.9970, -2.6693, -0.0485, -1.9592, -1.8265, -0.3612,
         3.6985, -2.4386,  0.0573, -3.5301], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4038, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.1108, -3.4333, -3.1901, -0.5392, -5.4932, -4.3532, -1.0180,  2.1961,
        -3.1803, -1.4153, -3.6597, -3.8426,  0.3621,  1.5677, -2.9372, -2.0742,
        -4.2340, -2.6084, -0.9085, -0.2848], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9579, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5987, -2.3163, -1.4001, -0.7189,  3.0918, -5.3371, -1.3184, -3.6590,
        -1.4614, -3.7784, -0.0452,  1.1270, -4.1068, -2.2386, -1.0598, -0.7472,
        -1.1168,  3.5426, -1.7857, -4.2722], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4600, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0253, -3.8960,  1.3978, -0.7074, -2.4241,  0.2863, -1.2829, -1.2765,
        -3.0646,  2.2444, -7.2599, -1.8060, -0.9663, -4.6590,  1.5954,  3.5352,
        -5.0648, -2.4688, -2.8899, -2.8375], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7285, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2610, -2.2000, -2.0289,  3.1287, -7.2868, -0.9625, -1.3963, -4.2263,
        -0.3244,  1.8991, -2.9620, -2.5513, -6.4538, -4.4471, -4.6164, -5.7291,
        -0.1590, -7.2729,  2.6540, -5.5006], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8848, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1648, -3.0883, -4.5048, -0.6867,  0.9095, -3.3498, -0.7308, -1.7072,
        -1.8324,  0.8980,  3.1896, -2.7918, -6.2683, -1.8860, -2.2084, -3.6848,
        -1.8887, -0.3888, -7.3396, -6.5746], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2549, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9564, -4.7634, -4.5185, -0.7718,  1.6185, -1.4713, -1.0665, -1.9581,
        -3.6047,  0.9388,  1.0572, -6.6026, -0.4056, -3.6312, -1.9327,  0.2344,
        -0.3667, -2.7845, -0.9413, -0.8545], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7390, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.2951,  -0.9514,  -2.1434,  -3.2882,  -0.8772,   0.3024,  -2.3270,
         -1.6160,  -2.3839,  -2.2672,  -0.6284,   3.0905,  -7.4382,  -2.6273,
        -17.9404,  -2.9041,  -7.2402,  -0.7708,  -2.0222,   0.4051],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9462, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.1452,  -2.9873, -18.6908,  -5.1650, -10.3339, -10.0176,  -9.3428,
         -0.8595, -11.1673,   4.1157,  -8.1948,  -3.3417,  -2.4055,  -4.3308,
         -1.3888,   3.2831,  -1.1574,  -2.3411,  -8.5634,  -3.9486],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2991, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.4187, -0.7377, -3.9746, -0.1650, -2.6594, -3.3097, -1.7409, -1.1048,
        -1.9044,  4.2011, -8.4483, -4.7270, -3.6942, -4.2944, -6.2399, -4.2353,
        -1.5549, -6.9782, -7.5462, -5.6995], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6616, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.0107,  -2.3520, -12.0594,  -5.1069,  -7.5642,  -7.7751,  -8.2967,
         -8.3860,  -1.3406,  -3.8740,   0.3644,  -7.3251,  -1.3770,  -2.3524,
         -5.0248,  -4.5354,   2.8861,  -1.9179,  -1.4859,  -2.8930],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1713, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.4993, -11.9040, -21.4981,  -4.6843,  -6.5154,  -6.0784,  -1.6109,
         -3.8761,  -1.0651,   2.0919,  -2.9407,  -0.6042,  -2.2379,  -6.0843,
         -1.1451,   1.4863,  -3.7593,  -0.5974, -14.8839,  -4.2086],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7307, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1662, -8.2955, -2.8121, -0.9477,  0.4767, -5.5446, -4.4570, -4.3387,
        -7.7364, -1.2897,  1.3785, -4.0226, -1.3711, -1.8994, -1.6018,  0.1072,
         2.7236, -3.1639, -0.6680, -3.3620], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4995, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.0663,  -4.5646,  -1.8027,  -0.3311, -13.5401,  -2.6833,  -0.9268,
         -1.8306,  -0.8871,  -1.3990,   3.0452,  -2.5649,  -2.9000,  -2.7080,
         -1.1024,  -3.4628,  -0.4113,   2.5940,  -3.3003,  -0.5892],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1216, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.6475,   3.1695,  -5.1565,  -0.0876, -16.2879,  -6.6852, -10.9199,
         -1.2690,  -0.7001,  -0.8803, -13.9507,  -1.2131,  -2.6591,  -6.3804,
         -3.6658,   3.2931,  -3.1972,  -0.3969, -16.3668,  -4.7339],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4368, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1630, -11.4234,  -1.9791,  -7.3801,   1.2642,  -2.3029,  -4.1879,
         -9.5834,  -3.8916,  -5.6821,  -3.1095,   2.1037,  -1.1649,   0.2233,
         -2.4655,  -1.1792,   0.1759,   2.6813,  -2.8427,  -0.2624],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7085, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.4497,  -2.8468,   0.2377,   3.8068,  -1.4838,  -0.0264,  -0.8901,
         -4.4472,   0.7342,   3.6443,  -2.9273,  -0.4327,  -1.0425,  -0.2545,
         -4.8932,  -1.1029, -15.8926,  -1.5365,  -4.4383,  -6.0260],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.0634, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.1967,  -5.5051,  -2.8343,  -7.2790,  -4.2550,  -6.5682,  -1.3655,
          0.6206,  -2.4563,  -3.3842,  -4.0397,   0.6214, -10.1502,   0.7412,
          1.2441,  -2.0487,  -3.3596,  -2.4409,  -1.8029,  -5.2575],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9858, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.2540,  -0.2732,  -3.1921,  -0.9337,   2.4959,  -2.3334,  -1.5315,
        -28.8798,  -6.5384,  -5.0645,  -6.2357,  -1.9168,  -0.9357,   3.3303,
         -3.9670,  -0.1002,  -1.1764,   0.6050,  -4.4771,   1.8219],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9524, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7389,  1.0963, -2.3678, -0.7760, -3.1122, -1.0414, -5.0049, -1.3875,
         2.4953, -2.7841, -1.4858, -1.5644, -6.0188, -0.1876,  1.1045, -4.9421,
        -1.2904, -6.7993, -8.3606, -2.8668], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3516, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.8817, -1.4565, -1.4552, -0.2694,  3.3602, -2.7721, -0.9615, -3.3483,
        -2.3832, -2.7476, -7.4770, -1.6527, -2.6141, -3.4328, -0.4318, -0.7890,
        -3.5258,  0.8303,  3.6051, -2.7870], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4713, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5414, -1.6869, -0.1183,  4.3847, -1.3106,  0.4440, -2.3557, -2.5428,
        -5.0708, -1.6530,  0.4419, -2.5415,  0.0707, -0.5656, -1.9743,  0.2040,
         1.6274, -1.6446, -0.7827, -3.7452], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0680, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1221, -0.8431, -2.1814, -2.0688, -6.1228,  0.3730,  0.6278, -2.6094,
        -4.5118, -1.7636, -1.9924, -3.2869,  2.2710, -1.8539, -4.7338, -0.5106,
        -2.6190, -4.3988, -3.5167, -1.6755], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1769, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1541,  -0.0624,  -1.3260,  -1.7421,   1.0830,   3.8335,  -2.8458,
         -2.1269, -13.0308,  -3.5176,  -6.5540,  -0.7719,  -0.3592,   2.8013,
         -4.4087,  -0.3126,  -2.6585,  -3.3107,  -4.0691,   3.1390],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.9197, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.0639,  -2.7320,  -2.4780,  -3.3310,  -3.7461,   1.5535,   2.0923,
         -1.6695,  -1.5436,  -1.3748,  -4.7631,  -0.7757,   1.1740,  -4.8549,
         -0.9356, -16.0407,  -2.2387,  -7.9680,  -2.2388,  -6.6966],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0316, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3547, -1.3187, -0.7823, -6.9965,  0.8258,  2.5830, -3.0709,  0.3308,
        -1.5337, -0.3515, -7.0266, -2.2173,  0.6376, -2.7703, -1.0188, -0.9248,
        -3.1882, -4.9891,  3.9034, -3.0146], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7139, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.6242,  2.5245, -2.1834,  0.0206, -1.2897, -3.0949,  0.1579,  3.3293,
        -2.7661,  0.2606, -2.0588, -2.7404, -6.0275,  1.6385,  0.4231, -2.0218,
         0.5386, -2.0157, -2.1871, -2.1570], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0137, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9612,  0.4309,  0.9847, -2.1209, -1.4928, -2.1644,  0.1317, -6.6180,
         2.0685, -0.3536, -2.8608, -1.2978, -2.0295, -0.6242, -0.1005,  3.6976,
        -3.0353, -1.6708, -2.4608, -5.1850], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5331, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.3439, -4.0207, -0.4320, -5.9522, -1.7337,  0.6595, -3.5486, -2.5925,
        -1.6342, -0.5465, -4.9793,  4.9794, -2.6739, -0.9963, -2.1133, -0.2666,
        -4.8018,  0.9322,  1.3501, -3.4006], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5714, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.5772, -2.8276, -0.1767, -1.6061, -5.0906,  0.9425,  2.4024, -2.9837,
        -0.1761, -2.2427, -3.2009, -5.0655, -0.6743,  2.5595, -2.6060, -0.4490,
        -3.9447, -1.1331, -6.4521,  1.5691], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5867, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.6973,  -1.0838,  -2.7575,  -0.5584, -16.2348,   1.3140,  -3.7182,
         -3.5900,  -0.8650,  -2.4536,  -3.2548,  -0.4661,   0.7026,  -4.1774,
         -0.6297,  -4.7421,  -3.0782,  -2.1719,  -1.9600,   1.9238],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.4749, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5975,  -2.1380,  -3.3929,  -3.3851,  -4.7750,  -4.7547, -11.2711,
         -1.3433,  -2.9236,  -2.5602,  -5.1120,  -4.4401,  -5.3703,   1.5547,
          0.9562,  -3.7271,  -3.9134,  -1.1777,  -5.3456,  -2.1528],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3935, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.2364,  -5.7812,  -4.2404,   1.1262,  -6.5203,  -3.2960, -20.0633,
         -2.6588,  -5.2355,  -2.2255,  -1.0800,   3.7569,  -4.8720,  -1.2772,
         -1.3255,  -3.9323,  -1.3518,   3.7469,  -3.0351,   0.4498],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9526, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5154,  -3.0863,  -3.8900,  -2.1473,   2.7781,  -5.7151,  -3.3174,
        -13.1663,  -3.7601,  -7.5229,  -0.4375,  -0.5286,   3.2504,  -4.1082,
         -0.6930,  -1.5570,  -2.1052,  -5.7236,   2.2859,   1.1707],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5394, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.6355, -0.0812, -3.1339, -2.6482, -0.1284, -6.0883,  1.7277,  0.8388,
        -2.7152,  0.0296, -1.1848, -1.5875, -2.6630,  2.4312, -5.8461, -2.3487,
        -2.4256, -1.7614, -3.9661, -0.2645], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5090, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.6438,   3.1276,  -1.7819,  -0.7055, -23.0410,  -5.2658,  -8.1596,
         -3.4351,   0.7427,  -2.1040,  -5.4766,  -1.2517,  -1.9759,  -1.8201,
          1.3641,   3.3253,  -3.1783,  -2.1755, -24.5345,  -2.2910],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8997, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-13.7306,  -1.5280,  -2.4047,  -1.9705,  -4.3608,   1.0813,   1.0634,
         -2.3753,  -0.6634,  -0.9369,  -2.9472,  -2.2796,   3.6386,  -5.6894,
         -2.8047,  -3.1058,  -3.7557,  -4.0503,  -1.5055,   2.3026],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3011, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.2693,  -1.5232,  -1.6083,   0.2326,  -0.7585,   3.7998,  -2.7489,
         -1.7052,  -4.8774,  -2.4225,  -6.4690,  -6.6577,  -7.9950,  -7.5655,
         -4.5725,  -0.8442,  -4.0695,  -4.0268,  -4.9084, -21.6772],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1333, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.1691,  -3.7568,  -3.3377,  -0.0555, -10.8267,  -3.3299, -26.0056,
         -4.8329,   1.0545, -10.9206,  -4.3361,  -7.2273,  -2.1543,  -3.4834,
         -2.2966,  -0.5127,  -0.2184,  -1.1240,  -2.5975,   1.8281],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4151, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.3106,  -2.4427,  -4.0829,  -2.8389,  -3.5886,   2.7722,  -4.8522,
         -2.1071,  -2.5512,  -3.4010,  -8.3764,  -2.5597,  -2.2572,  -4.7016,
         -3.4274,  -4.8457,  -1.6595,  -0.0832,  -0.5002,  -1.6546],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1734, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.7792,  -2.7985,  -0.7759,  -3.8479, -17.7430,  -6.6520, -17.6069,
         -3.4404, -16.0466,  -8.0623,  -5.2873,  -6.0120,  -1.3325,  -0.8220,
          3.3432,  -5.1848,  -0.2779,  -2.9228,  -7.1822,  -2.0374],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0955, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2989,  3.5094, -3.1826, -0.1112, -4.2503, -0.3796, -5.7694,  1.1882,
        -0.4607, -4.8418, -2.6755, -4.7302, -1.2059, -4.7118,  1.5674,  2.6740,
        -2.2568, -0.9326, -1.8077, -2.0837], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6380, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.9358,   0.1992,  -1.5084,   0.5108,  -8.5267,   0.5032,   0.2443,
         -2.0026,  -0.3278,  -0.5776,  -4.2388,  -0.0181,   2.6576,  -6.6836,
         -2.8057,  -4.7341,  -1.1635, -10.9288,   0.6921,  -1.3963],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2020, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.6595,   0.9314,  -2.9234,  -0.7189,  -2.7570,  -2.9274,  -5.4886,
         -0.2967,  -3.6025, -10.1352,  -3.3117,  -2.4082,  -3.5552,  -4.3644,
         -3.4340,  -6.9836,  -9.3790,  -3.6021,  -1.2224,  -3.4720],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4496, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.7901, -32.0062,  -4.4254,  -8.0654,  -6.4769,  -0.8244,  -1.0105,
          4.5627,  -4.4268,   0.2482,  -3.8964,   0.3860,  -5.0336,  -0.8579,
         -2.0757,  -2.6618,  -0.5127,  -3.1334,   0.1795,  -5.5096],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7375, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-12.0100,   3.9074,  -2.1419,  -4.3964,  -5.3701,  -4.5523,  -1.8999,
         -3.4420,   0.7112,  -2.1320,  -3.1722,  -1.0640,  -2.8718,  -4.4268,
         -1.5463,   2.4214,  -3.7929,  -2.2669,  -2.0079,  -2.6425],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6348, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7197e+00, -1.0014e+00, -3.2793e+00, -1.6526e+00,  1.0647e-02,
         1.8435e+00, -4.0517e+00, -2.6015e+00, -4.4473e+00,  3.3546e-01,
        -7.1138e+00,  1.1134e+00,  1.1626e+00, -4.3198e+00, -1.3191e+00,
        -1.4554e+01, -2.5759e+00, -1.0343e+01, -8.8353e-01, -4.1090e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0253, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.2592, -7.5358, -4.3427, -4.4930, -5.9960, -6.3717, -4.9090, -5.8924,
        -4.5176, -3.0097,  1.4681, -3.8667, -2.6856, -4.5926, -2.7422, -6.2023,
        -3.7010,  0.8524, -2.6504, -2.8270], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1637, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5029e+01, -3.0877e+00, -1.5816e+00, -1.6768e+00, -4.6838e+00,
         1.4631e+00,  2.8338e+00, -2.3600e+00, -1.1332e-02, -1.6838e+00,
        -4.7036e-01, -3.9349e+00,  4.1540e+00, -4.6201e+00, -2.3687e+00,
        -3.5450e+00, -8.8259e-01, -3.1454e+00,  4.4107e-06,  1.3301e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.9650, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.9649, -3.8178, -2.5227, -2.3264, -3.8770, -4.5246, -1.7652,  1.1644,
        -1.8399, -0.4614, -0.2565, -3.2474, -1.3077,  3.3109, -4.5584, -1.8639,
        -1.3722, -0.1544, -6.7622,  2.1415], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6538, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0691, -3.6828, -2.1599, -3.6765, -2.4669, -4.1893, -4.1598, -5.4919,
         1.3925, -2.6244, -1.3450, -4.1812, -4.9506, -4.2814,  2.3914, -3.8809,
        -1.2076, -5.1220, -2.4964, -1.2267], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9214, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.1318,  -7.5611,  -1.0272,   1.1036,  -2.5352,  -0.1075,  -1.7372,
         -1.7677,  -1.4203,   4.0425,  -2.9790,  -3.2949,  -3.6583,  -1.1057,
         -4.5385,  -0.5688,   1.0688,  -1.1282,  -1.1141, -18.4789],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3338, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.3194,  -3.1617,   3.7173,  -7.1854,  -2.5097,  -2.1974,  -1.0284,
         -4.3572,  -0.8696,  -2.1760,  -3.2838, -15.8763,  -2.8288,  -6.0989,
         -0.8268,  -0.0805,  -2.1462,  -2.5099,  -0.7089,  -0.7261],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8087, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.8784,  -2.1406,  -2.4096, -13.3914,   0.1582,  -0.0389,  -2.9690,
         -0.7973,  -5.5922,  -0.8512,  -2.5597,   2.3223,  -5.7456,  -3.9768,
         -3.7245, -12.6921,  -3.1009,  -5.8255,   0.1963,  -3.2710],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5644, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.4508,  -8.3845,  -7.2157,  -2.8561,  -2.1174,  -1.5146, -13.0652,
         -1.7911,  -1.1241,  -1.1830,  -1.3434,  -3.1004,  -3.5103,  -0.3264,
          3.2971,  -3.0524,  -1.8696, -11.8862,  -5.1829,  -4.0489],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5363, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5830e+00, -1.8500e+01, -3.6775e+00, -7.0923e+00, -3.2760e+00,
         5.2230e-01,  1.1177e+00, -6.9566e+00, -2.3188e+00, -2.4629e+00,
        -8.2092e+00, -6.6731e+00, -2.1135e-01, -3.0349e+00,  2.7066e-02,
        -2.9629e+00, -8.1030e-01, -3.9944e+00,  1.2402e+00,  1.6336e-02],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5920, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.2347,  -5.9065,  -2.6746,  -2.9438,  -1.2170,  -4.8288,  -5.1292,
         -5.1279,   0.3541,  -6.2112,  -7.7159, -14.7252,  -5.6608,  -7.2389,
         -6.8860,  -4.9829,  -2.2820,  -3.2770,  -2.6672,  -2.6968],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9026, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0653,  -0.2835,  -3.5426,  -3.9338,  -8.2492,  -4.0824,  -8.4405,
         -3.7269,  -2.6830,  -0.3412, -16.2789,  -6.1384,  -5.0310,  -5.6731,
         -5.6140,  -4.1428, -13.4322,  -5.2854,  -9.2677,  -7.5692],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8891, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.4573,  -2.3113,  -5.7503,  -2.8086, -18.3097,  -2.7491,  -9.2890,
         -0.9322,  -3.5569, -15.8405,  -5.5932,  -4.8991,  -1.7816,  -5.8440,
         -2.7178,  -2.0022,  -2.3552,  -1.3772, -23.6308,  -4.1483],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8177, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.8309,  -1.7910,   1.0325,  -5.3636,  -2.1129, -23.4056,  -3.3379,
         -8.0322,  -1.9990,  -1.1280,   3.6002,  -5.7599,  -2.3678,  -3.5965,
         -0.2498,  -4.5817,   2.1550,   1.0970,  -4.6155,  -1.9184],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2603, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5453, -0.7677,  3.3097, -2.9803, -1.6786, -2.3743, -1.0602, -3.2035,
         3.4037, -4.1413,  0.2357, -0.7557, -2.0321, -1.3770,  0.7667, -4.5834,
        -1.7314, -2.0398, -2.2056, -5.1387], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5450, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0116, -2.4902, -0.6009, -3.2363,  0.1830, -2.0678, -5.4125, -1.8890,
        -2.7412, -3.3487, -6.4630,  3.5964, -0.5427, -1.4584, -2.6648, -3.6656,
        -0.9463,  3.2787, -3.7299, -3.6036], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9407, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1278,  1.3062,  1.1889, -2.1409, -0.6298, -2.2572,  0.1912, -5.5178,
        -1.5856, -2.1652, -6.9806, -6.9705, -4.9447, -2.9225, -8.8541, -2.0328,
        -3.8792, -3.7867, -3.6318, -9.5533], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4647, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.3742, -1.5665, -2.1636,  1.7213,  1.2144, -6.5990,  1.0141, -4.0543,
        -2.8306, -0.4272,  1.8216, -1.1311,  0.5194, -1.0977, -1.0593, -0.8816,
         4.3534, -3.6839, -0.4548, -1.9411], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8810, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.7779, -5.0415, -5.0404, -2.9362, -1.9499, -1.0428, -4.9840, -4.8154,
        -1.6201, -0.8708, -6.2636, -0.0229, -1.6962, -3.3444, -0.3523, -3.8172,
        -2.4697, -7.3947,  1.5355, -1.6890], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5019, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9572, -2.3849, -1.3015, -5.1602,  1.7079, -1.3107, -3.6864, -0.2809,
        -1.5321, -1.7974,  0.3625,  4.3503, -1.6428,  0.7187, -5.0056, -3.9742,
        -7.5516, -1.6051, -0.6940, -6.4187], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9082, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.4086, -4.6964,  3.9574, -2.9558, -0.1986, -3.0247, -1.2354, -3.8033,
         1.0969,  3.1440, -1.8877, -0.1612, -0.9341, -4.2805,  1.1337,  2.1498,
        -4.4177,  0.4534, -3.0777, -1.3430], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0245, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.6357, -3.6325,  1.2094,  2.0612, -2.3982, -5.4711, -2.9779, -0.0900,
        -6.0017,  0.1548,  0.8992, -1.3537, -2.3896, -1.7695, -3.3824,  1.1952,
         3.5426, -2.0410, -1.1242, -0.0644], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2135, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1456e+00, -4.8316e-01, -2.8672e+00, -2.8373e+00, -1.3764e+00,
         3.4879e+00, -2.0488e+00, -1.8897e+00, -1.2606e+00, -3.9755e+00,
         4.3217e-03,  1.9275e+00, -3.1624e+00, -3.7507e+00, -1.7688e+01,
        -5.3709e+00, -5.5495e+00, -3.2465e-01, -1.3605e+00,  4.1632e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.4754, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5400, -2.6025, -6.7603,  4.6422, -1.4653, -2.4969, -6.8393, -5.5062,
        -3.8179, -8.7466, -0.6756, -6.5595, -1.4715, -2.5736, -0.4843, -3.2261,
         2.2980, -5.3807,  0.0789, -2.0645], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0096, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7908,  0.4369,  2.1480, -4.8301, -2.1016, -3.6676, -1.9063, -3.7374,
        -3.2173,  1.0253, -2.3733,  0.2892, -0.3818, -0.8174,  0.0573,  3.8604,
        -5.2813, -0.3166, -2.2431, -1.9914], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3919, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.9899,  -2.1328,  -6.2206,  -1.1476,   0.3185,  -4.1828,  -1.5226,
         -1.3948,  -2.8355,   0.1135,   3.6795,  -5.2893,  -0.9896,  -3.5434,
         -1.6577, -10.2663,  -0.2193,   0.0316,  -3.3844,  -0.6250],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1129, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.1659, -2.6581, -1.3508, -2.3378, -3.9267, -6.4936,  2.2216, -2.4801,
        -4.1465, -3.6593,  0.0616, -8.3503,  0.1921,  1.0361, -3.5974, -0.0687,
        -2.8278, -0.6562, -3.9532,  0.8927], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9968, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.1502,  -2.3067,   0.6640,   3.8066,  -6.7464,  -2.0091,  -3.0623,
          0.4630,  -1.8848,   3.9548,  -1.7189,  -1.9242,  -2.1835,   0.0857,
        -11.1139,   1.3641,   0.5242,  -1.6833,  -0.7143,  -0.7705],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.3203, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.8069,  -2.7459,  -8.1635, -20.2387,  -3.7381,  -9.3889,  -5.2994,
         -2.4930,  -1.4504,   3.9954, -10.0042,  -0.5046,  -2.7887,   0.2797,
         -3.4340,   1.1213,  -2.2411,  -4.7970,  -2.1828,  -0.3288],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8105, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.0009, -3.9515, -0.3420, -1.0588, -3.3313,  1.3677,  3.3934, -3.9889,
        -1.4624, -1.1295, -3.4160, -0.3220,  3.6419, -1.4566,  0.3672,  0.0374,
        -3.6834,  2.2873,  2.6324, -2.6758], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.5545, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.1525e+00, -1.8188e+00,  1.4898e-01, -3.3556e+00, -2.6153e+00,
        -1.3680e+01,  1.6085e+00, -5.7334e+00, -1.9331e+00,  9.6089e-03,
        -3.3261e+00,  4.5574e-01,  1.7425e+00, -7.0662e+00, -2.2304e+00,
        -5.0319e+00, -7.1063e+00, -2.6237e+00, -8.6355e+00, -3.1251e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1082, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.4313, -2.1843, -1.2978, -2.9850, -1.5141, -5.2738, -2.2187,  1.4086,
        -2.5737, -0.0664, -2.3975, -5.3042, -1.2641,  2.7509, -6.0212,  0.2210,
        -1.4467, -0.2772, -1.3131,  3.7219], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3802, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  3.3042,  -4.4173,  -0.6036,  -2.4197,  -3.7998,  -1.2721,   3.2760,
         -2.0598,  -5.8180,  -3.1124,  -2.3455,  -4.1658, -14.4806,  -4.2656,
         -2.7095,  -4.6976,  -0.9671,  -3.2964,  -7.7280,  -2.4655],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2022, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.1949,   1.7860,  -8.9909,  -0.4800,  -7.2164,  -1.7835, -11.4760,
         -1.0093,  -0.6205,  -2.8219,  -1.2351,  -1.7027,  -3.8072,   0.7655,
          1.6079,  -7.5151,  -3.3832,  -3.4423,  -0.7851,  -4.5157],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8910, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0399, -2.0331,  0.7709, -0.2130, -1.7503, -3.3221, -1.4353, -1.7727,
        -0.1661,  4.2322, -4.3989, -1.7754, -2.1651, -5.0691, -3.3173,  1.2870,
        -1.9125, -0.5712,  0.0513, -1.5706], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3086, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6582, -0.7930,  1.4506, -3.7859, -0.7947, -3.9801, -3.1892, -0.0245,
         2.4573, -4.6212,  0.0311, -0.5633, -2.4497, -2.1282,  4.4952, -8.7453,
        -0.6609, -3.6725, -0.9250, -2.1705], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7364, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-16.8488,  -2.6499, -10.0835,   0.3974,  -5.4121,  -0.0799,  -3.6966,
         -3.9792,  -7.1265,  -6.0006,  -5.3705,  -7.3668,  -1.4790,  -6.6879,
         -2.5881,  -3.7927,  -0.9907,  -3.6817,   1.3628,   0.1096],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2982, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6929, -6.5259, -3.6088, -0.4569, -4.4798, -3.3662, -5.0635, -6.8029,
        -3.0823, -4.8659,  0.6359, -3.0962, -3.0374, -5.3804, -4.6691, -1.9371,
        -3.9133,  1.4422, -5.4736, -5.7436], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6559, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.8123, -39.4767,  -8.0395,  -5.0934,  -6.4023,  -0.7772,  -5.6504,
          4.3423,  -2.9424,  -0.4679,  -1.0819,  -3.7004,  -0.7182,   3.4683,
         -2.1312,  -1.6256,  -1.2658,  -0.9842,  -8.7577,  -0.1728],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3145, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2829e-01,  3.0358e+00, -9.8457e-01, -5.0620e-01, -1.5515e+00,
        -4.6313e+00, -7.0162e-02, -7.3251e-01, -2.9765e+00, -8.8570e-01,
        -3.3648e+00, -3.3762e+00, -1.4888e+00,  8.4285e-01, -1.1135e+01,
        -1.0702e+00, -1.0897e-02, -3.6018e+00,  1.1923e+00,  4.1918e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.3626, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.6822,   0.1305,  -6.3128,   5.3232, -14.2727,  -2.3012,  -3.6492,
         -2.5579,  -4.8584,  -2.1621,   2.0724,  -3.6784,   0.1420,  -1.6544,
         -0.9019,  -0.3569,   3.3526,  -5.9375, -12.7344,  -2.4233],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7231, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6675, -4.9690, -0.8297,  2.4485, -3.9138, -2.1391, -1.9522, -6.6156,
        -2.8970,  0.2795, -2.4072, -2.0888, -3.5929, -4.2973, -9.0697, -1.3457,
        -0.5217, -1.2540, -0.1698, -0.7410], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5372, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.4572,  -1.3923,  -2.3494,  -1.9632,  -0.1549,  -2.2178,  -2.5000,
         -4.9235, -23.1712, -10.2961,  -4.2208,  -6.9318,  -0.4542,  -1.6099,
          3.2875,  -6.0154,  -0.8472,  -2.0362,  -0.8365,  -5.8657],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1978, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.8451,  -7.3740,  -6.2471,  -6.0712,  -1.4535,  -4.0026,   2.5892,
         -5.0495,  -1.4058,  -0.6834,  -3.3896,  -2.4385,   1.1194,  -2.1404,
         -1.1749, -11.6934,  -6.3232,  -2.6843,  -5.8179,  -1.6667],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5376, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2839, -3.1445, -2.0816, -6.4070,  1.7213, -0.3935, -2.7452, -1.1362,
        -1.2927,  0.7664, -2.4938,  2.0379, -5.8726,  0.1847, -1.8379, -2.8481,
        -0.7312,  2.8501, -7.7005, -2.7144], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7561, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.9385,  -6.5537,  -0.9532,  -5.4723,   4.4933,  -7.6137,  -2.2840,
         -1.8629,  -8.7892,  -8.9423,  -0.2418,  -3.5600,  -0.9437,  -2.4798,
         -2.4452,  -2.5358,  -0.3762,  -3.7971,  -1.9188,  -3.6475],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5431, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3551, -0.3161, -4.1865,  3.8490, -4.5184, -0.7061, -4.2702, -3.1460,
        -6.8062,  2.5890, -3.1524,  0.6382, -1.8219, -2.3544, -6.3493,  1.1242,
         0.7771, -2.7500, -1.1039, -2.4237], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8141, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.1580, -11.7244,  -5.8345,  -7.3595,  -2.1382,  -2.0111,   2.3443,
         -4.0367,  -1.4350,  -2.3644,  -5.4754,  -0.4623,   3.2260,  -3.0339,
         -0.2718,  -2.9774,  -5.8812,  -1.5479,   1.1121,  -3.4874],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8758, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4418,  0.4643,  1.3677, -6.5954, -2.9262, -8.8185, -7.5530, -3.8173,
        -5.6591, -3.9256, -2.5428, -6.8553, -3.8890, -3.4192, -1.3827, -3.3964,
        -0.1164,  3.0274, -9.3656,  0.6642], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3590, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.4569,   3.3562,  -2.6753,  -1.7490,  -1.6914,  -1.3634,   0.3415,
          1.1941,  -1.7086,  -1.1839, -13.0248,  -8.9870,  -8.7612, -13.0453,
         -7.0110,  -0.8108,  -0.8227,   3.9787,  -6.5155,  -0.8936],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0915, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.6262,  -2.2978,  -4.1995,   1.2642,   2.5257,  -5.0765,  -1.6762,
         -3.3094,  -3.5460,  -3.9429,   2.7710,  -8.4334,  -1.7948, -14.9292,
         -4.2883,  -6.6507,  -3.3849,  -4.6095,  -1.2974,   2.6176],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1942, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.5581,  -6.0220,  -1.2185,  -4.1710,   2.1643,  -3.4533,  -3.0043,
         -3.8927,  -6.0282,  -1.4375,  -1.7989,  -3.3453,  -1.7819, -10.4865,
         -2.3702,  -6.1048,  -0.6467,  -1.2765,   3.1131,  -5.9507],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3635, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.0420,  -3.7918,  -5.4378,  -1.4809,  -1.6956,   2.9259, -25.4895,
         -0.4582,  -4.7421,  -9.1846,  -6.2824,  -0.7554,  -5.5206,  -2.9191,
        -10.5071,  -6.6196,  -3.9749,  -5.6400,  -3.2591,  -0.5099],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0192, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.2943, -0.1409, -2.6319, -1.2381,  4.1237, -3.9391, -0.2419, -1.6264,
        -1.0696, -3.6853,  1.7449,  1.0908, -2.7196, -1.3672, -1.5623, -1.6362,
         0.9320,  2.8248, -3.1804, -4.4205], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9224, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.6970,  3.4221, -5.3743, -0.9062, -1.5828, -1.3372, -2.9059,  3.4848,
        -4.3121, -0.0608, -1.6819, -0.7501, -7.2580, -0.0786, -2.9074, -1.7124,
        -1.4012, -0.4501, -4.1123, -0.0877], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4157, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.4551, -0.5916, -4.3029,  0.9135,  3.5006, -1.8270,  0.0688, -2.3352,
        -0.7004, -3.8083, -3.4439,  1.1261, -2.2159, -2.2515, -3.2409, -1.5440,
        -1.9821,  0.9348, -1.5145, -5.7454], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4707, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.0063,   1.3526,   2.1895,  -2.0716,   0.2335,  -0.5632,  -1.0434,
          0.7670,   3.9421,  -2.6134,   0.5305, -14.6860,  -2.8386,  -8.8294,
         -1.3279,  -3.2668,   3.0615,  -2.5935,  -3.6909,  -4.4966],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.0475, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.3090,   1.8100,  -4.7831,  -1.3239,  -0.1661,  -3.6377,  -0.1656,
          2.6740,  -2.1094,  -0.9260, -11.3412,  -5.5377,  -4.4128,  -7.4088,
         -0.4776,  -0.4560,   1.8593,  -3.7114,   0.2668,  -5.1401],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2648, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0494, -3.0120, -5.3029, -6.7828,  1.2880, -3.2802, -0.2352, -1.0448,
        -3.5865,  0.0646,  1.2900, -3.0677, -0.0861, -3.1334, -5.7170,  0.6651,
         2.4448, -5.8200, -1.4293,  0.0332], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1881, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4974,  0.4548, -6.7850,  0.6329, -0.1642, -6.2370, -0.4098, -3.7548,
        -2.7540,  0.6109,  1.6504, -8.7151, -1.2486, -3.0503, -3.2406, -3.4938,
         0.1476, -6.7606, -0.1512, -0.9988], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3382, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.5422,  -1.3145,  -4.6223,  -0.4565,  -3.9387,   0.8345,   0.7592,
         -2.8843,  -0.9680,  -2.3258,  -2.3738,  -9.6206,   0.5672,  -1.2056,
         -3.9341,  -4.7912,  -4.1764,  -0.8424, -21.2392,  -0.5374],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3806, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.4923,  -1.6241,   0.9477,  -3.8481,  -4.1223, -14.4056,  -5.4038,
         -2.4264, -11.9282,  -4.7020,  -2.7911,  -7.1617, -14.2547,  -1.9542,
         -1.3141,  -6.7717,   0.1041,  -5.8155,  -7.3665,  -1.1058],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0218, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.1328,  -2.2250,  -2.8847,  -2.5621,  -2.0860,  -4.0171,  -2.4726,
         -3.2019,  -3.9800,  -3.1751,  -3.5012,   0.3401,  -6.8743,  -3.0911,
         -3.5212,  -4.0915, -16.8602,  -4.9256, -10.1148,  -5.8410],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4609, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.5861, -0.7650, -7.6962, -6.0806,  3.3517, -9.1821, -0.5165, -3.3769,
        -6.1368, -0.6970,  2.8277, -2.2764,  0.1474, -1.0981, -1.6180, -0.4218,
         3.9698, -1.7483, -0.9470, -2.7065], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7778, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.6689,  -1.6459,   2.4460,  -1.8141,  -0.6721,  -1.1764,  -1.7178,
         -1.5259,   2.2112,  -1.1751,  -1.3890,  -7.2027, -10.2850, -19.4390,
         -5.7803,  -4.8721,  -0.3089, -12.2536,  -1.4459,  -4.1992],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8457, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7412, -0.8926, -2.5933, -1.3835, -0.2505, -3.3582, -0.2032, -1.9729,
        -0.6602, -2.9411,  4.0983, -0.5842, -0.1403, -1.6487,  0.2523, -4.6379,
         2.3529, -6.3689, -3.5842,  0.5263], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3865, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2682, -1.4762, -2.0571, -3.4902,  1.2341,  2.6027, -3.8027, -1.6011,
        -8.0635, -7.2651, -3.9736, -5.4687, -2.9604, -1.4590, -1.8941, -2.3045,
        -4.6227, -3.1594, -5.0696, -2.4700], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0285, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9220, -1.7102, -2.8668, -3.9789, -0.7888,  3.6830, -3.7022, -0.0670,
        -3.1107, -5.1668, -0.7566,  3.5870, -6.6088,  0.9833, -5.2156, -2.6715,
        -4.3116, -2.4412,  2.0677, -2.9256], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9962, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0736, -3.1766, -0.6122,  0.2768,  4.7701, -3.4551, -1.2726, -1.3778,
        -3.4946, -3.9284,  1.0292,  0.8561, -1.8354,  0.0751, -1.3435, -2.7538,
         0.4694,  3.8998, -1.4312,  0.0734], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7152, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7039,   0.0265,  -4.1041,  -2.2336,  -0.7937,   3.6924,  -0.9504,
         -3.6721, -16.9447,  -7.7505,  -3.9524,  -6.2521,  -0.0747,  -6.3214,
          2.5556,  -2.5203,  -0.9840,  -3.1775,  -1.9155,  -3.3270],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0702, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.8047,  1.2073, -4.1815, -1.4313, -4.2400, -2.5201, -4.7898, -0.5486,
         2.7494, -2.2163,  0.1322, -1.3295, -8.1774,  0.4487, -5.0169, -5.9678,
        -2.8117, -3.2441, -2.3661,  0.3610], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1569, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.5487,  -3.4230,  -3.8045,  -5.2761,  -3.6353,   2.7887,  -2.1309,
         -1.3988,  -0.4705,  -6.0286,   0.5431,   0.9921,  -2.2669,  -1.6010,
         -2.1829, -26.9187,  -5.5907,  -6.7670,  -0.9911,  -0.4536],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6082, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.4647,  -2.2760, -15.0320,  -5.8388,  -2.9389,  -6.5951,  -3.3441,
         -1.7530,   2.2259,  -6.0381,  -3.0261,  -6.9542,  -5.0967,  -2.7276,
         -1.3019,   0.8656,  -3.4679,  -1.6724,  -1.3069,  -3.7814],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7262, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 4.3509, -3.0128, -2.0780, -5.3026, -0.6871, -3.7559,  0.3737,  2.1456,
        -2.4650, -7.9943, -2.7147, -2.2354, -6.6993, -4.0052,  1.8371, -7.4787,
        -0.2121, -2.9468, -5.5351, -1.3275], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4872, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5483, -6.9298, -1.6729,  2.0978, -1.2674, -1.2585, -3.3897, -3.7542,
         0.1191,  1.9122, -3.7420, -6.8517, -1.4594, -7.6849,  0.9767,  2.0103,
        -2.5079,  0.8370, -2.8652, -3.8528], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0916, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0850, -2.3362, -2.2821, -4.3493, -1.5192,  2.8144, -2.3257, -1.6335,
        -0.0390, -4.7991,  1.3448,  3.9548, -2.2832, -0.9806, -2.5817, -0.0503,
        -1.3970,  3.5248, -4.2671, -0.3759], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1333, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.2999,  -2.7615,  -4.1761,   0.1025,   2.8645,  -3.3338,  -0.8138,
         -1.3138,  -5.7525,  -0.4229,  -1.2091,  -4.0581,  -2.8337, -12.8517,
         -5.8565,  -2.4982,  -6.3360,  -1.5753,  -2.5392,   3.1581],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7253, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.2870, -2.6935, -2.3768, -3.7139, -0.4347,  0.2896, -5.3119, -0.1810,
        -1.2878,  0.4123, -4.5524,  3.8048, -7.9305, -1.2631, -1.4655, -1.4598,
        -9.6779,  1.7694, -2.8533, -5.7494], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2481, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8053, -1.4616, -0.3626, -3.5690, -3.5994,  1.8561, -5.4431, -0.1647,
        -3.9058, -2.2526, -5.6465,  2.5828, -0.8914, -1.3015, -2.8022, -4.7104,
        -2.4853,  1.2214, -5.5811, -2.2301], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2776, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8439, -1.4676, -4.9190,  0.7093,  2.8275, -4.7969, -4.0972, -1.3202,
        -1.6050, -6.7882,  1.6491, -3.1581, -0.2296, -3.0573, -2.0781, -8.8015,
        -3.5001, -1.0213, -1.7416,  0.0569], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3091, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.5251,   1.2350,  -4.7168,  -2.3376,  -2.7020,  -3.2080,  -5.2260,
         -0.7576,  -5.5301,  -2.6847, -10.1735,  -4.1525,  -8.5810,  -5.9801,
         -5.9675,   1.2123,  -8.7593,  -2.3596,  -3.4695,  -6.5638],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2624, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.5842, -0.1403, -1.6487,  0.2523, -4.6379,  2.3529, -6.3689, -3.5842,
         0.5263, -2.7905, -0.5025, -6.3715,  3.5677, -1.5121, -0.1255, -3.3918,
        -3.0480, -2.3887, -1.7301, -0.8275], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6477, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.7435, -1.7874, -0.3903, -0.7333, -0.4542, -3.1411,  4.3355, -3.6485,
        -2.3557, -1.3659, -1.6815, -4.8831, -1.2039,  1.0064, -2.8009, -1.1988,
        -1.4817, -2.4734, -1.7941,  2.6936], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9807, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.8474,  -2.4776,  -1.1932,   3.8013,  -1.8393,  -4.8469, -14.6827,
         -7.3251,  -3.3239,  -6.4481,  -2.0567,   0.4096,  -6.6914,  -5.4840,
         -1.7990,  -1.0753,  -3.7872,   0.4931,   4.1598,  -1.8715],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8943, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.8788,   0.2893,   3.1570, -12.0545,  -2.8301,  -5.0759,  -3.0047,
         -6.0814,  -2.8095,  -0.2332,  -3.3394,  -4.9456, -11.6918,  -8.6435,
         -3.9048,  -6.8182,  -0.8404,  -1.8447,  -3.0342,  -3.7809],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0683, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.7669, -2.9486, -4.5607, -2.4753, -1.9196, -4.4599,  2.3404,  1.9641,
        -1.9323, -1.4119, -1.3037, -5.6923, -0.5444, -1.7617, -5.4612, -0.1867,
        -1.9528, -6.2795, -1.0243, -8.5837], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3714, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.8490,  -0.7097,  -2.9737,  -1.7504,  -5.0167,  -4.0842,  -2.1520,
        -24.2741,  -0.7505,  -2.5414,  -4.7683,  -1.6962,  -3.2725,  -0.6157,
         -4.2077,   1.4791,  -1.6783,  -2.8861,   0.1926,  -3.9114],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6733, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3751, -4.3148,  0.5061,  3.6636, -3.1396, -1.8667, -1.7391, -2.0295,
        -6.0180,  1.3299,  0.8666, -2.2073,  0.4254, -0.2012, -2.6540, -1.2759,
         1.8594, -3.4311, -1.1275, -1.6115], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2170, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2806, -0.9796, -5.4186,  1.1471, -0.5578, -5.6106,  0.0063, -0.5417,
        -1.5367,  0.5960,  3.7820, -3.8115, -0.6984, -0.4493, -2.3729, -1.5059,
         4.1307, -2.9945, -0.3607, -3.0441], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1250, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.2894, -0.0594, -2.5010, -3.0081, -3.8751, -1.4892, -4.4812,  0.8287,
         0.3755, -5.2115, -0.2931, -2.1005, -6.9392, -3.6385, -8.7080, -4.3034,
         0.1233, -2.4617, -2.7137,  0.4995], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4334, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.2882, -1.1280, -2.3148, -0.1258, -1.0631,  0.5140, -2.5790,  0.3325,
        -1.6978, -4.8906, -3.3986, -2.0654, -1.4497, -0.8934,  4.3963, -2.7964,
        -3.7607, -4.0478, -2.4949,  0.2720], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4451, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.2286,  -4.1239,  -2.4574,   0.3350,  -5.5134,  -0.7169,  -1.7075,
        -14.9406,  -0.2207,  -3.3962,  -1.7532, -11.1543, -11.8948,  -7.6769,
         -6.7729,  -8.5943,  -5.3358,  -5.4764,  -2.8743,  -6.4349],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0969, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6823, -1.2492, -2.4447, -4.6427, -4.5771,  3.7660, -3.3518, -2.5503,
        -0.5641, -3.9081, -1.0591,  3.2391, -4.6404, -3.6933, -4.5959, -4.0573,
        -5.0931, -3.0392, -3.0607, -6.0100], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8107, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.6967, -2.7429, -2.2252,  4.8787, -5.2828, -0.0699, -2.5674, -2.7715,
        -1.6956,  1.6960, -2.8818, -1.2684, -4.2529, -0.8656, -9.4782,  1.1117,
        -0.7825, -3.1934, -1.2197, -3.9105], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9109, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 4.1203e-01, -1.0418e+01,  2.8011e-01, -7.0221e-01, -2.4289e+00,
         1.1163e-03, -2.0864e+00, -3.2956e-01, -2.9771e+00,  4.0184e+00,
        -3.0529e+00, -1.0889e+00, -3.4039e+00, -2.1290e+00, -4.3796e+00,
         4.4631e-01,  1.7292e+00, -4.6301e+00, -4.2109e-01, -1.5083e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.6334, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9143, -1.2488, -5.5844, -1.9819,  0.6712, -4.3340, -1.9492, -7.7783,
        -6.3241, -3.5285, -5.6333, -4.2256, -0.4702,  2.4221, -4.3460, -4.2168,
        -1.2490, -4.7980,  0.4726,  2.4207], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7298, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.8923,   3.5525,  -3.3245,  -1.6310, -19.0988,  -5.6812,  -7.2703,
         -5.4935,  -1.2970,   0.3076,   1.2476,  -7.9100,  -4.4697,  -4.3096,
         -2.8259,  -6.8901,  -1.0923,   1.7976,  -2.2441,   0.0290],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2856, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.9931, -12.9062,  -2.1934,  -3.1577,   2.1083,  -2.7373,  -4.8620,
         -7.3415, -13.1166,  -1.5554,  -7.8356,  -0.8561,  -0.2383,   3.3566,
         -4.3233,   0.3152,  -2.5577,  -1.5353, -10.7035,  -2.7978],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8465, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.3436, -2.7069, -1.2777, -1.2349, -1.0751, -6.1996,  1.8424,  0.1380,
        -3.5617, -1.5469, -0.0674, -2.8734, -0.7452,  3.2243, -3.3049, -1.6993,
        -3.9810, -0.8902, -6.4411, -1.7901], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5924, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.3588, -6.6441, -0.1836, -2.2589,  0.1043, -5.4318,  1.3657, -1.8373,
        -4.2485, -0.5320, -3.6410, -0.2135, -3.4468,  1.0991,  1.9811, -2.0829,
         0.1179, -2.8481, -2.0407, -5.8383], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7110, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-14.5360,  -9.9318, -19.9260,  -2.9509,  -5.1101,   0.1724,  -4.1201,
        -13.7476,  -4.2821,  -5.8523,  -3.9840, -19.9237,  -2.0221,  -9.2678,
         -3.6099, -11.1653,  -3.5604,  -3.3829,  -7.8828,  -5.3525],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.5218, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6035, -0.1700, -1.3063, -1.7529, -0.3009,  4.1187, -5.4179, -1.1416,
        -0.7309, -2.7400, -1.6663,  2.0486, -1.3703, -0.1238, -3.4436, -0.4701,
        -5.2570, -2.5259,  2.0003, -2.4087], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2631, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-15.0077,  -2.7132,  -7.2040,   0.3633, -20.1864,  -2.7775,  -7.2179,
        -15.4329,   0.0950,   3.5930,  -4.0505,  -5.9984,  -3.6583,  -2.6974,
         -4.0411,  -3.3560,   2.0981,  -2.8333,  -2.1663,   0.1311],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6530, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1622, -8.3045, -0.7493,  0.1596, -4.9507, -2.2169, -3.0985, -1.0327,
        -7.9310,  0.6123,  0.2443, -1.2918, -0.0848, -0.3700, -4.4937,  1.4709,
         2.9875, -1.9083,  0.8512, -0.5686], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6419, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8078, -1.1792, -4.2246,  1.4401,  0.5340, -2.3270, -0.3491, -3.6143,
        -2.5134, -4.6471, -3.5965, -0.3649, -3.2058, -2.1926, -4.0278, -1.5326,
        -5.2188,  1.2674,  1.0296, -2.0889], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9310, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0936,  1.5990, -0.6505, -2.1539,  0.4082, -2.9399, -0.8319, -6.6532,
         0.6459,  1.7401, -2.8021, -2.7500, -5.3809, -0.7273, -5.4972,  1.7456,
        -0.9523, -5.9574, -0.8957, -1.5451], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9846, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.2919, -11.2819,  -7.2462,  -3.7302,  -4.3149,  -6.6750,  -3.4745,
         -6.9915,  -6.3601,  -4.6504,  -4.8108,   1.3771,  -4.5981,  -2.9135,
         -4.7571,  -6.6072,  -7.1736,  -6.4415,  -7.1691,  -5.6239],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6367, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8832, -6.1984, -0.7901, -4.8634, -2.7943, -4.5171, -1.9623, -2.7725,
        -2.6410, -1.3568,  2.4022, -4.4554, -0.7139, -5.6780, -1.3861, -3.7592,
         0.3230,  1.9604, -2.4503, -2.4293], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3483, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.6360,  -0.6981,  -1.3280,  -2.2086,  -4.9611,   0.5270,   1.7023,
         -1.2414,  -1.9106,   0.1777,  -3.2975,   0.2927,   2.9032,  -1.8128,
         -0.0971, -12.4012,  -2.7535,  -6.7806,  -0.2609,  -2.3268],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1056, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0505,  0.3066, -2.3322, -0.4155, -0.5891, -4.8851,  0.7223,  2.0820,
        -3.0611, -0.0538, -2.9350, -2.5237, -7.8441,  3.5344, -2.8517,  0.5578,
        -1.5218,  0.6191, -3.0549,  4.7208], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0288, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4497, -4.5632,  0.9041,  0.7794, -3.3530,  0.1334, -2.1185, -2.8273,
        -4.7173, -1.0103,  1.9487, -3.2616,  0.1091, -0.8921,  0.1368, -1.7590,
        -1.1395, -3.2950, -0.7316, -3.8028], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5455, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.9144,  -5.4699,  -6.1168,  -5.7541, -22.2976,  -5.4184,  -9.3462,
         -4.4789,  -3.9880,  -6.2087,  -3.8915,  -6.9534,  -3.3687,  -4.3231,
         -1.7494,  -4.5118,  -5.8010,  -5.3240,  -9.2038,  -5.3690],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3244, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.5649, -16.2560,   2.1412,  -2.1753,  -8.0253,  -2.4410,  -3.6771,
         -2.5609,  -4.5084,  -0.5523,   2.3567,  -2.9756,   0.9500,  -1.2382,
         -3.7197,   1.4582,   3.4730,  -2.9872,  -0.6262,  -4.7760],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3853, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.5557, -0.8485,  1.7086, -6.4543, -1.3540, -2.4942, -1.9474, -5.7827,
        -4.3736,  0.7429, -6.8400, -2.6623, -0.2322, -4.7279,  0.7636,  1.6374,
        -1.5379,  0.4940, -3.4753, -2.2979], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4119, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.1334,  -3.3237,   4.8525,  -3.5913,  -0.3404,  -1.6765,  -4.7167,
         -0.4930,   1.1532,  -6.2247,  -0.5167,  -3.2790,  -1.2149, -12.8338,
         -3.3039,  -2.4952,  -6.3242,  -4.6929,  -0.6654,  -3.1373],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6479, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.2182, -5.3915, -5.8461, -1.4456, -3.1216,  0.8456, -5.6433, -4.9926,
        -3.4858, -6.4627, -7.9949, -4.8547, -3.6114, -2.5329, -8.6014, -7.0639,
        -4.0524, -5.5779, -2.2280, -4.0239], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7152, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.4483,  -5.9770,  -3.8258,  -0.2298,   0.4152, -10.4618,  -5.7073,
         -0.7863,  -0.2382,  -7.5816,  -0.8140,   0.5277,  -3.1036,  -0.4376,
         -1.7475,  -1.7289,  -0.1142,   4.4202,  -5.1252,   0.2047],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.4880, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2063, -6.7898, -9.4243, -8.7896, -7.7865,  2.4864, -2.0240, -6.1148,
        -4.1738, -8.6404, -5.3589, -2.3889, -6.8863, -0.6638, -3.3430,  1.5474,
        -3.2779, -2.3629, -2.7596, -0.7338], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2345, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.7776, -11.8051,  -9.2982,  -2.5495,  -6.7949,  -0.4161, -12.9812,
         -3.8505,  -3.4123,  -0.2468,  -5.3253,  -4.7982,  -0.1378,  -3.3940,
         -4.1650,  -2.6964,  -6.5095,  -2.2354,  -0.0659,  -7.1252],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7292, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 4.8415, -7.7424, -0.1006, -1.6638, -4.3338, -1.7127,  3.9548, -4.2069,
        -6.6216, -1.1913, -2.1203, -5.3382, -0.9404,  1.9800, -1.9661,  0.1417,
        -1.5151, -3.8923,  1.1623,  1.0490], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5108, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.1826, -0.7278, -1.3852,  0.1387,  3.2976, -4.6804, -3.1772, -1.9148,
        -3.2472, -3.2500, -1.5947,  0.4372, -0.5556, -0.2449, -1.1872, -0.4886,
        -0.2169,  3.5937, -4.1605, -1.3425], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0445, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0787, -0.2930, -3.5506, -0.9573, -3.8094, -0.0628,  1.6248, -1.7685,
        -4.3630, -0.2886, -5.8336,  0.5691,  3.3138, -3.1694, -1.6508, -3.7993,
        -1.1990, -5.0124,  2.6838, -0.7386], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6192, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.0608,  1.8561, -1.7120, -0.6675, -0.9894, -3.7319,  0.5485,  2.8672,
        -3.3244, -2.2532, -2.1911, -3.1867, -4.5718, -5.0311,  1.4929, -1.8070,
        -1.1106, -1.4186, -2.0171, -4.5773], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5382, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.9569,  1.3103, -2.9890, -2.2367, -1.5301, -0.4901, -0.2299,  4.7895,
        -2.8529, -0.9481, -2.5670, -0.2181, -6.3113,  2.0653,  1.6048, -1.8157,
        -1.6496, -2.6206, -5.0463, -0.3077], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0543, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.8906,   3.7088,  -3.6008,  -2.1428,  -1.9063,  -4.8115,  -3.5022,
          0.2191,  -4.5068,  -1.4039, -21.3984,  -6.4889,  -5.1488,  -5.5987,
         -1.2039,  -0.9671,   3.8817,  -2.0147,  -1.6652,  -2.3704],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2906, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.1309,  -7.3065,   0.7749,   0.9619,  -3.6238,  -1.0997,  -2.9053,
         -1.2516,  -4.8353,   2.2309,   1.6226,  -3.9609,  -1.3466,  -1.6577,
         -0.4330,  -0.6861,   2.9759,  -3.8865,  -2.1203, -10.5883],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.8633, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.0619,  -0.6513,   2.0953,  -7.0327,  -1.8146,  -0.9542,  -0.0402,
         -4.8659,   0.9375,  -4.3390,  -2.8023,  -3.2920, -17.7626,  -7.4594,
         -7.3562,  -1.2167,  -0.6232,   2.0737,  -3.5642,  -1.7694],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1250, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-15.0077,  -2.7132,  -7.2040,   0.3633, -20.1864,  -2.7775,  -7.2179,
        -15.4329,   0.0950,   3.5930,  -4.0505,  -5.9984,  -3.6583,  -2.6974,
         -4.0411,  -3.3560,   2.0981,  -2.8333,  -2.1663,   0.1311],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6530, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.9697, -3.3310, -4.7976, -1.3365, -1.0553, -2.0984, -3.1288,  4.4173,
        -1.9368, -2.9728, -7.4374, -9.9169, -4.4339, -8.3246, -5.9818, -3.0873,
        -3.0565,  3.3573, -6.4757, -0.6913], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0159, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.4283, -23.8645,  -1.8199,  -0.5829,  -5.4792,  -1.7555,  -4.4517,
         -3.5513,  -4.0956, -10.3217,  -3.2443,  -6.7200,   0.1475,  -1.8986,
          3.0222, -10.7982,  -1.5105,  -3.9416,  -6.2112,  -3.8400],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5244, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5517,   0.4040,  -2.8982,   0.2112,  -8.3267,   0.6370,   1.0030,
         -5.7732,  -2.6346,  -1.3876,  -5.8504,  -0.8484,   3.1714,  -5.7434,
         -4.3766, -22.4604,  -7.8800,  -6.7678,  -6.2878,  -0.2502],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9305, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.0431,   4.2925,  -0.8922,  -5.4955,  -3.3956,  -2.0345,  -3.7190,
         -6.2538,  -0.9333,  -4.2997,  -3.4524,  -2.0301,  -1.5672,  -0.6155,
         -2.9045,   3.5985,  -1.1746,  -2.7513, -15.2841,  -3.9015],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6928, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5258e-03,  8.7654e-01, -6.3265e+00, -2.1332e+00, -6.1942e-01,
        -2.9628e+00,  7.6391e-01,  3.6752e+00, -2.4331e+00, -7.7332e-01,
        -1.7203e+00, -7.7053e-01, -1.5548e+00,  3.7798e+00, -2.9532e+00,
         6.4791e-01, -2.2972e+00, -2.9620e+00, -4.1400e+00,  1.1377e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.0385, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.9859,   3.8248,  -3.1984,  -8.5726,  -1.8681,  -2.0070,  -6.5769,
         -0.6879,  -0.9955,  -4.3111,  -0.4582,  -6.1661,  -1.1113,  -5.6390,
          0.8854,  -2.6619,  -3.5777,  -0.7445, -11.7787,  -3.8410],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8750, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  4.2741,  -2.9766,  -1.6519,  -1.5931,  -2.3307,  -4.0426,   2.3969,
         -1.2672,  -0.8050,  -1.9108, -24.9677,  -5.9472, -20.2047,  -5.0102,
        -14.4986,  -5.4525,  -8.5635,  -6.7353,  -5.3345,  -1.8095],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4215, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.1579,   2.0255,  -2.2023,  -0.4969,  -1.4734,  -0.4526,   0.1995,
          3.7487,  -2.3440,  -1.1506, -14.7971,  -6.7701,  -2.5592,  -5.4089,
         -1.7344,   0.4712, -10.7180,  -2.6873,  -2.0646,  -4.4380],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6505, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0783, -7.7695, -4.4067, -4.7127, -6.7668, -6.0804, -3.2862, -2.9059,
        -2.6535, -3.0998, -5.0083, -5.3439, -8.0215, -4.7133, -6.4497, -2.5080,
        -7.3124, -2.4680, -1.2304, -3.8584], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.6337, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 4.6542, -2.6323, -1.1557, -2.0253, -1.7567, -4.1281, -1.0829,  2.1588,
        -3.1404, -8.3419, -2.7752, -1.9149,  0.6134,  0.0581, -6.4390, -0.8843,
        -3.7269, -2.0618, -1.7207,  0.7469], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7777, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.3307,  -0.7189,  -0.6124,  -5.3731,   0.1901,   2.4803,  -3.6981,
         -1.5111, -14.0187,  -6.6756,  -3.5818,  -5.7255,  -0.4940,  -5.7500,
          0.2099,   2.7060,  -1.9192,  -0.1770,  -1.3477,  -3.6182],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5483, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2513e+00,  2.0344e+00,  3.9367e+00, -1.8981e+00, -1.7701e-02,
        -2.6696e-01, -3.0204e+00, -6.7036e-01, -9.2100e-01, -2.5193e+00,
        -2.5692e+00, -2.6311e+01, -7.1504e+00, -1.1659e+01, -5.5744e+00,
        -6.1403e+00, -8.0690e+00, -3.6247e+00,  2.6090e-01,  4.7546e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5838, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.4398, -2.1993, -0.5470, -3.2450, -2.6476, -5.9008,  1.9752, -3.7918,
        -0.1723, -5.4628, -1.4134, -4.2790,  0.8731, -1.1541, -5.3615, -4.5234,
        -4.4086, -2.2372, -6.8986,  0.2774], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5778, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-20.7718,  -4.6598,  -0.7740,  -4.3261,  -3.6534,  -5.7112,  -0.6340,
          2.2294,  -3.6954,  -4.4673,  -2.3723,  -0.6395,  -0.2945,   3.3313,
         -4.9014,  -0.3497,  -1.9838,  -6.4987,  -2.5995,   2.1377],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0317, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8214,  3.2349, -3.0536, -1.2686, -4.1889,  0.6079, -5.1380,  1.1621,
         1.2652, -2.4863, -1.1011, -2.8022, -4.9174, -1.9288,  3.1376, -2.7557,
        -4.1235, -2.6893, -3.4919, -3.8503], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8605, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.5963,  -2.8229,   2.7611,  -2.6980,  -0.0681,  -4.3379,   1.1193,
        -15.7997,  -1.7622,  -3.4783,  -2.3185,  -2.4910,  -4.5571,  -2.9497,
         -2.6323,   0.6566,  -6.1560,   0.4049,  -1.2651,  -6.8851],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7938, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.7694, -0.8612, -0.1679, -6.2662,  3.0473, -2.7688, -1.6781, -1.5734,
         0.5573, -2.3974,  3.9987, -3.0380, -0.5026, -1.8142, -2.7870, -0.5453,
         1.4152, -1.9907, -1.0370, -2.0500], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0614, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.2609,  -0.7256,  -4.0184,  -2.3579,  -3.6744,   0.3999,  -0.4333,
          1.9703,  -1.8702,  -4.0639,  -5.2900,  -3.4402,  -6.4548,  -0.9299,
         -0.1842, -11.2476,  -3.5043,  -0.7953,  -1.3777,  -7.3502],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8304, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-15.5424,  -2.5662,  -3.2725, -15.5089,  -8.7529, -11.1549,  -7.5163,
        -19.2116,  -4.2976,  -1.0525,  -4.7073,  -4.8419,  -2.5468,  -5.6155,
        -14.3213,  -3.0882,  -2.8738,  -5.6039,  -1.1662,  -2.3606],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.8001, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.4621, -11.2462,  -3.8814,  -9.4072,  -1.0880, -10.0835,   3.8194,
         -3.0664,  -2.5707,  -4.0273,  -3.4306,  -3.8211,  -5.3100,   0.8999,
         -2.4046,  -0.0379,  -2.2545,  -1.0091,  -6.5211,   1.2775],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2812, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8273, -1.2271, -0.5800,  3.0480, -9.2289, -1.2960, -3.1359, -1.6692,
        -7.2393,  3.7751, -3.7508, -1.1975, -9.4813, -1.9157, -8.2246, -0.6106,
        -0.8508,  3.1547, -4.2578, -2.9863], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7251, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0777,  0.0979, -2.4124, -0.4484, -6.4436,  2.2023, -1.2777, -2.1031,
         0.1304, -3.4384, -2.4063, -4.3241,  3.6584, -1.1540, -0.5169, -0.0658,
        -2.3704,  0.4256,  3.6452, -3.2096], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1544, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.2208,  2.0610, -2.9916, -1.2349, -2.2869,  0.0633, -4.0074,  2.2807,
        -1.0079, -2.7501,  0.3063, -0.8199, -1.8565,  1.4411,  3.7125, -2.3275,
        -1.6983, -0.4952, -2.0009, -5.7669], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9579, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7065, -2.1897, -1.0652, -6.8259,  3.8774, -3.5770, -1.5926, -2.8897,
        -2.1725, -4.2951,  2.0348, -6.1409, -3.6459, -0.6442, -3.2541, -1.6294,
        -5.0138,  4.2733, -8.5648, -0.5468], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2784, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0563, -2.8666, -2.9710, -3.7921,  1.8663,  0.6428, -1.1858, -0.2371,
        -3.1925, -5.2763,  0.3258,  1.5459, -3.3720, -0.8501, -2.2401, -0.3237,
        -7.3976,  1.3303,  0.0198, -2.6867], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7859, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5439, -3.3965, -2.5810, -1.0764, -2.6745, -3.9348, -6.6647, -2.2159,
        -3.4880, -3.2218, -1.1143, -0.2843, -1.2422,  1.6185,  4.0470, -1.9497,
        -0.7997, -9.1697, -2.7630, -8.6368], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5546, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9164,  -2.0171,  -3.6890,  -1.1275,   2.1149,  -1.7866,   0.2044,
         -1.7636, -49.9338,  -4.3822,  -5.7581,  -3.0922, -14.4127,  -6.4549,
         -9.5273,  -5.1657,  -6.1849,  -1.1615,  -1.0629,   3.4174],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7350, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9969, -2.3335, -1.5937, -4.4871,  1.8883,  1.3402, -4.5505, -1.4359,
        -2.8098, -2.0851,  1.6460,  3.4078, -2.2368, -0.4966, -1.0213, -1.3679,
        -3.6390,  1.1311, -0.2980, -2.9753], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3957, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.2842,   0.1195,  -2.2740,  -1.7448,  -2.0955,  -6.2068,  -0.2769,
          2.0132,  -6.4871,  -3.4242,  -9.6388,  -5.3966,  -4.6159,  -6.7214,
         -1.8964, -11.5038,   3.3069,  -1.7297,  -4.5338,  -3.6303],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3510, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3848, -0.5299, -1.2223,  1.9418, -2.4416, -2.7999, -4.0128, -3.3903,
        -4.5639,  1.8694, -0.3589, -2.7630, -1.3864, -2.7772, -0.4317, -3.4118,
        -4.6943, -5.7066, -0.9180, -1.4166], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2700, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.1305,  -4.5985, -14.8650,  -4.8438,  -6.5162,  -0.5816,  -4.2893,
          4.0186,  -9.6049,  -0.8889,  -3.6049,  -0.3639,  -3.0504,  -1.5485,
          2.5835,  -3.0474,  -0.5660,  -4.5169, -12.1853,  -2.9179],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7759, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.4870,  -2.1434,  -8.9379,   0.6121,   2.2998,  -2.7979,  -3.7572,
        -25.2928,  -8.5257,  -6.0775,  -6.4093,  -1.4071,  -3.6688,   2.4860,
         -3.2671,  -9.3471,  -3.6886, -20.6199, -11.8253,  -9.5450],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1200, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.1971,  -2.1237,   2.3168,  -2.6269,  -4.0638, -15.0603,  -6.1295,
         -9.9254, -15.3534,  -7.4147,  -2.4097,  -2.5857,   2.1264,   0.4545,
         -2.9083,   0.3937,  -1.8947,   0.0496,  -4.8501,   4.8597],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6171, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.3154,  -1.8152,  -0.7197,  -5.7065,   0.0330,   0.3247,  -2.7637,
          0.0777,  -0.7083,  -3.2348,   1.1179,   3.5770,  -3.2293,  -1.6623,
        -23.1959,  -3.5769,  -6.3282,  -0.2773,   1.0292,   4.9474],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1213, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.4448,  -7.1846,  -0.9507,  -1.6784,  -2.8684,  -2.7416, -12.8407,
         -6.4802,  -5.9437,  -4.7328,  -1.2218,   0.4557,   0.9463,  -1.7647,
          0.3564,  -1.9074,  -5.1451,   1.8047,   4.2491,  -2.7275],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5410, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5585,  1.5720, -2.4359,  0.1713, -2.7514, -0.4625,  0.0261,  4.0009,
        -6.7232, -1.5288, -2.6898, -1.6590, -2.8104, -0.6853,  0.0813, -4.8660,
        -0.2201, -2.9654,  0.5352, -7.8209], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7395, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.0684, -1.2984, -1.8742,  4.2967, -2.6409, -0.3540, -2.5302, -4.9338,
        -6.8657,  1.3359, -3.3025, -1.0695, -1.9541, -2.1032, -7.6436,  0.6956,
         1.0984, -3.2178, -2.0874,  0.1696], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1174, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.5630,  -6.4307,  -1.8928,   1.2793,  -2.9332,  -3.4528, -10.5182,
         -2.5455,  -8.1659,  -1.0107,  -1.0495,   1.8039,  -5.6591,  -4.4976,
         -1.5892,  -6.8899,  -4.2616,   0.6548,  -6.9607,  -1.3307],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3507, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.2913,  -0.9859, -16.7972,  -4.0307,  -7.6883,  -0.4134,  -6.0198,
          3.1829,  -9.2858,  -4.8204,  -2.5869,  -4.1535,  -4.7581,  -1.8492,
          1.9112,  -3.6092,   0.0367,  -2.8285,  -2.1756,  -5.6541],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9409, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4046, -2.8440, -0.6180,  4.0891, -4.7055, -0.4610, -1.6685, -2.1542,
         1.0522,  3.9142, -2.7182, -1.8594, -8.7314, -6.4400, -3.7220, -4.4603,
        -1.9534,  0.5242,  3.1033, -7.6265], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9342, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.8778,  -0.1081,  -2.4480,   0.2583,  -3.3399,   0.4399, -15.4081,
          2.2622,   0.1483,  -8.4786,  -1.4135,  -2.3032,   0.1919,  -4.2264,
          4.1763,  -2.1216,  -1.0525,  -4.1575,  -0.9783,  -2.6849],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.9683, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.1266,  -2.0112,   0.6533,  -0.9787,  -2.0270,   1.1581,   2.5395,
         -1.8855,   0.1838,  -2.9805,  -7.0416, -10.2339,  -0.3346,  -1.3645,
         -2.4356,  -0.7233,  -0.1643,  -1.6941,   0.9531,   3.2029],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.2655, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.7620,   1.2714,  -2.1408,  -1.8491,  -1.2070,  -6.5557,   0.8735,
          3.1963,  -1.9518,  -1.8562, -13.4966,  -8.9901,  -9.2022,  -8.4874,
         -2.9701,  -2.1602,   2.4922,  -2.8344,  -1.8277,  -1.6014],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8768, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.6440,  -1.5296,   0.4204,  -2.3842,   0.4257, -12.5783,  -2.7441,
         -9.7158,   0.1441,   1.3283,  -5.6656,  -4.7741, -12.7560,  -4.2439,
         -6.9734,  -1.1959,  -1.8798,   3.1845,  -5.3747,  -1.1295],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6043, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  3.0911,  -3.1398,  -0.6109,  -1.1881,  -1.1296,  -5.0675,   1.4686,
         -0.5473,  -3.9797,  -2.2506,  -3.9946,  -0.4220, -10.4419,   2.4226,
          0.3700,  -9.2420,  -1.6268,  -3.2683,  -7.5501,  -4.0741],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5590, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.3333, -2.1766, -0.8511, -6.1772, -3.0301,  0.7972, -3.0895, -0.9779,
        -0.6789,  0.1009, -2.4584,  4.6815, -2.5618, -0.8873, -7.9240, -4.1039,
        -0.0846,  1.3141, -1.0101, -2.3627], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5573, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.7013, -6.1183,  2.6278, -4.0693, -4.3917, -0.3617, -1.0022, -3.7569,
        -6.2093,  0.5359, -0.9046, -5.1484, -1.2039, -2.3372, -0.6913, -4.2631,
         2.2691,  2.1743, -3.2198,  0.5120], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8130, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.2772, -3.3104, -0.4607, -0.6958, -4.4106,  1.0630,  2.4084, -4.0689,
        -3.2931, -2.7433, -0.8456, -9.7554, -4.4603,  0.6836, -4.3768, -1.1277,
        -2.1401, -0.4615, -4.8293, -1.9645], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2256, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.0715,  -1.1678,   0.6616,  -1.6042,  -0.6264,  -3.2159,  -0.5731,
         -4.0473,   0.3652,   1.3278, -14.6502,  -0.1326, -13.2784,  -7.9333,
         -7.7147,  -1.9767,  -3.4110,  -7.0186,  -6.5076,  -5.2511],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9413, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3141, -2.8442, -3.8929, -1.6095,  0.1191,  3.9268, -3.5337,  0.1901,
        -2.2491, -1.4415,  0.0965,  4.8616, -1.9712, -3.1425, -2.5666, -0.6328,
        -0.7786,  2.5522, -3.2595, -0.3211], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1405, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.9696,  -2.2369,  -3.7985,   0.1001,   4.4209,  -5.1879,  -2.1130,
         -1.8172,  -4.0925,  -2.2655,   0.9373,  -3.2940,  -3.2260, -13.8706,
         -4.7014,  -7.2389,  -5.7966,  -5.7568,  -1.0706, -11.0979],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6538, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5239, -2.5765,  2.0573, -4.3337, -4.2345, -3.3481, -4.2985, -2.9538,
        -5.9099, -3.7290, -3.4603, -6.6299, -6.3699, -7.5746, -6.0326, -2.7382,
        -2.3482,  1.8804, -1.7010, -2.8197], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5822, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8927, -3.0303, -1.0759, -5.5562,  0.7793,  0.7576, -3.5704, -1.4665,
        -2.2077, -0.8740, -4.1048, -0.0358,  2.6959, -1.0300, -0.7128, -0.7465,
        -3.8656, -0.1610,  3.2595, -2.4872], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2163, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.8366, -14.8761,  -1.5555,  -7.2336,  -3.0314,  -1.3153,   3.3439,
         -4.2913,  -3.0107,  -1.1614,  -4.6691,  -1.4054,   1.1698,  -5.0311,
         -3.2046,  -3.1099,  -2.2145,  -4.3144,  -1.9499,   1.0191],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0839, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-13.5989,  -3.9922,  -7.0079,  -8.5022,  -6.6471,  -3.3751,  -4.2091,
         -0.8988,   2.8650,  -5.8478,  -2.9024,  -3.4466,  -6.3587,  -3.4740,
          0.9967,  -4.7295,  -1.3917,  -3.7195,  -2.2644,  -8.3012],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3403, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8897,  1.4124, -7.4749, -0.9031, -1.0330, -2.9566,  1.2201,  4.7448,
        -2.2721, -2.5254, -3.0044, -0.2265, -3.7814,  0.3610,  0.8774, -2.5289,
        -0.6264, -1.6441, -1.1669, -0.6082], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2013, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.0498,  -3.2553,  -3.3884, -11.9501,  -8.8848,  -4.8453,  -6.1186,
         -3.2693,  -2.8312,  -2.0367,  -4.6958,  -3.7448,  -3.0199,  -5.3910,
         -1.8440,   1.8822,  -7.7137,  -3.0759, -20.8501,  -8.1757],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1079, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8300, -1.7402, -3.0687, -2.2315, -4.8424, -1.0820, -0.4146, -1.9362,
        -1.1666, -2.8485, -1.0343, -2.0785,  0.6669, -3.8082, -1.2019, -1.6047,
        -0.9039, -5.2826,  0.3702,  0.0403], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0499, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8374, -1.4518, -0.5297, -2.4140, -9.7957, -2.0823, -0.9599, -2.4305,
         0.9393, -0.2588, -5.1786,  1.1719,  1.1725, -5.7951, -1.3103, -7.4380,
        -3.2865, -3.8717,  1.3032,  1.2953], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3879, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7993e+00, -1.2550e+01, -4.0428e+00, -6.4536e+00,  4.2145e-03,
        -4.6689e+00,  4.2128e+00, -8.2388e+00, -3.4555e-01, -2.3204e+00,
        -1.2875e+00, -8.7242e+00, -2.0287e+00, -1.9337e+00, -4.6517e+00,
        -1.5436e+00, -1.8162e+00, -5.4467e-02, -5.3828e+00,  7.9583e-01],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3915, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.7056,  -1.4217,   0.0298,  -2.2349,  -0.9774, -13.0516,   5.3319,
         -1.7026,   0.4630,  -2.5284,   0.1943,  -4.3662,   1.0475,   0.4343,
         -4.1656,  -0.1549,  -0.7275,  -2.9770,  -0.2300,   3.7073],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.0312, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0954, -0.9560, -2.2606, -4.2091,  0.5651,  0.8599, -3.9614,  0.5940,
        -1.9001, -0.9120, -3.4967,  4.2623, -3.6280, -3.1042, -2.0906, -0.5590,
        -2.1057,  2.5795, -3.5545, -3.5941], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5283, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.4481,  -2.9152,   1.3050,   3.6801,  -5.1409,  -2.7843,  -5.1327,
         -3.8152,   1.7742,   3.4686,  -3.3497,  -2.2120,  -2.0455,   0.5129,
         -3.2803,   2.2717,  -2.6261,  -3.7352,  -1.6103, -13.9776],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.0030, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  4.4500,  -7.3519,  -0.5363,  -6.9563,  -8.4296,  -4.8256,  -4.7989,
         -3.3603,  -7.7806, -42.1781,  -7.1723, -16.3862,  -3.8296,  -0.3146,
         -7.7494,   3.8778,  -3.4126,  -1.0288,  -2.3329,  -1.1876],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0652, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1343,  4.6160, -1.4327, -3.3136, -4.6469, -4.1932, -4.1823, -5.2400,
        -2.8447, -4.3239, -1.8298, -1.9308, -2.4190, -3.6458, -0.1233, -2.0952,
        -1.4146, -1.7676, -5.3295, -0.5231], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4887, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6188, -0.2591,  3.4494, -4.0873,  0.4143, -3.2390,  0.8249, -4.9976,
         0.5425, -1.2447, -3.9147, -1.3650, -0.5256,  0.6269, -8.8203,  0.1896,
        -1.7030, -4.0935, -2.1652, -2.3309], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8659, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3332, -2.1790, -7.2665, -2.4126, -2.6848, -6.4235, -7.1712, -1.7864,
        -1.0495, -6.6347,  1.0765, -0.4921, -2.5319, -0.4576, -2.5162, -2.8273,
        -0.2996,  3.7053, -3.0631, -0.6265], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5987, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.6253, -10.2038,  -2.9077,  -1.9554,  -2.7091,  -6.1038,  -3.4390,
         -1.5255,  -7.3386,  -4.0197,  -1.7974,  -8.1337,   0.3416,   1.2789,
         -6.1163,  -3.5046, -25.0001,  -3.0727,  -7.1110,  -1.1597],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5926, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7431,   1.5531,  -3.6678,  -3.5670,  -2.7222,  -3.5710,  -2.1796,
         -3.0399, -12.0527,  -3.0634, -10.9758,  -6.0611,  -5.5198,  -5.8273,
         -7.4409,  -3.1136,  -3.1752,   0.8781,  -6.7690,  -1.7754],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2417, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.9336, -2.8775,  5.3885, -5.4332, -0.4124, -2.8933,  0.2773, -4.5937,
        -0.7990,  1.4097, -4.6784, -1.1525, -2.3114, -2.7021, -3.7037,  1.8154,
        -1.2144, -2.3811,  0.4086, -1.2459], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3083, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1756,  0.4131, -0.4141, -0.1723, -0.3413,  4.9749, -4.2211, -3.5301,
        -3.4716, -3.9231, -0.2274,  3.0656, -1.3571, -1.7267, -1.3107, -2.9829,
         0.9817,  3.9958, -2.9274,  0.4714], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7940, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 6.5849e-03, -4.6378e+00,  1.0027e+00,  1.4382e+00, -3.4099e+00,
         6.7475e-02, -2.3527e+00, -4.0965e+00, -1.0659e+00,  1.2434e+00,
        -6.2133e+00, -2.8688e+00, -3.6661e+01, -5.3479e+00, -8.7000e+00,
        -9.5596e+00, -5.2861e+00, -3.0366e+00, -1.2718e+00,  4.0219e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3364, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.0136,  -0.6246,  -1.1135,   4.7322,  -6.8442,  -2.3431, -19.7676,
         -6.9417,  -2.2020,  -5.6367,  -2.1540,  -2.0499,  -0.1595,  -4.8649,
         -3.4859,  -4.6993,  -1.0335, -14.9642,   2.0659,  -2.6300],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7865, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9142,   0.2203,  -0.3065,  -0.9669,  -2.5167,   4.7634,  -1.6497,
         -5.7204, -14.5167,  -7.4500,  -3.0776,  -4.4859,  -1.9836,  -0.1130,
         -0.8852, -13.4779,  -3.6463,  -1.3783,  -2.3565,  -1.2238],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1843, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3950,  0.4059, -2.0178, -1.0772, -3.3213,  5.3402, -7.9052, -0.5455,
        -2.1649, -0.7038, -3.9404,  2.2383, -1.2166, -3.3167,  0.1293, -2.6059,
        -0.8188, -1.6695, -0.9849, -2.9404], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4255, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7926e+00, -1.1047e+00, -3.3494e+00, -5.7861e-03, -6.1095e+00,
        -3.2322e+00,  1.3252e+00, -2.4984e+00, -9.0705e-01, -1.3500e+00,
        -5.5111e+00, -6.2250e-01,  3.8589e+00, -4.2628e+00, -9.4948e-02,
        -2.9050e+00, -3.4681e+00,  1.4747e+00,  1.7055e+00, -3.3372e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.6593, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.4588,   1.4294,   3.4564,  -3.3683,   0.6181, -19.3815,  -7.3169,
         -4.0492,  -5.1581,  -1.4717,  -1.5585,  -3.6437,  -2.8502,  -1.9324,
         -1.7864,  -7.2199,  -0.8529,   1.2546,  -2.1681,  -2.3964],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0927, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2389, -6.9191, -2.1238, -9.0407, -6.4253, -5.6039, -6.0232, -2.8665,
        -3.0221,  1.6403,  3.0720, -4.0141,  0.0276, -3.0447, -3.5412, -1.6306,
         3.5042, -3.9071, -1.4723, -2.0283], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7329, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.2179, -12.7340,   0.1119,  -5.0987,  -4.9416,  -6.7588, -94.5463,
         -8.3363, -11.3232,  -6.7011,  -2.9083,  -4.2513,   3.5530,  -8.8573,
         -2.3232,  -3.3056,  -2.6570,  -0.9553,   0.1655,  -3.7813],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.9933, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.3942,   4.1971,  -2.0813,  -8.5924,  -2.8164,  -1.1412,  -4.8480,
         -0.2722,   1.6304,  -4.6211,  -3.6771, -11.2595,  -2.3701,  -7.0523,
         -0.3849,   0.3895,   1.4446,  -4.3660,  -0.1288,  -0.5081],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2532, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.8331,  -3.2229,   1.2656,   4.3749,  -3.2800,  -1.8844, -27.9154,
         -4.8817,  -8.0261,  -2.1031,  -3.2283,   3.5656,  -6.3802,   0.4442,
         -1.7995,   0.8365,  -4.8066,   1.7365,  -1.5484,  -2.2426],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9965, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8840, -2.7520, -1.2671, -0.3338, -8.1583,  0.5987, -2.6596, -2.4570,
         0.4611, -1.5372,  0.2446, -2.1103,  3.2723, -3.0664, -0.5280, -2.2560,
        -2.2712, -3.4951,  0.9065,  1.6599], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4817, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7120,  2.1979, -3.7240,  1.1083, -1.5373, -1.7045, -4.3347,  2.4438,
        -1.3294, -1.6897, -3.1374, -3.2698,  1.7927, -0.5785, -4.2540, -2.5405,
        -2.2262, -9.7767, -0.1481,  2.4785], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6471, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.8353,  -1.9823,  -0.5127, -10.7373,  -8.2838,  -3.1842,  -6.9106,
         -0.9967,  -2.1917,  -7.4868,  -6.5655,  -1.3825,  -3.8718,  -7.1550,
         -3.9269, -22.6600,  -4.1158,  -0.0555,  -9.0247,  -6.3691],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2289, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9371,  0.4821,  3.1546, -5.4722,  0.9370, -3.0948, -3.1393, -4.4738,
        -1.5405, -0.0311, -5.1588, -1.4304, -0.1086, -2.8702,  0.9334,  4.3166,
        -2.4276, -1.0142, -3.6667, -1.4306], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3986, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.2627,  -2.1705,  -1.2507,  -4.0657,  -7.0898,   1.8152,  -0.5195,
         -0.1920,  -6.7178,  -0.3778,  -3.3492,   4.6829,  -1.7910,  -2.3919,
        -17.5699,  -5.1695,  -2.5041,  -4.8858,  -0.9084,  -0.5051],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7349, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9360, -1.0250, -4.4532,  0.8101, -1.2396, -7.3109, -0.6206, -4.5706,
        -0.9320, -3.4260,  1.9144,  2.2437, -2.7564,  0.4692, -2.4275, -3.1084,
         1.2380,  2.7566, -9.2655, -3.1385], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8389, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.9365,  -4.0018,  -4.7639,  -3.3484,   0.5927,   2.8478,  -1.8273,
         -0.8157,  -1.9156,  -3.2751,   1.3497, -18.2066,  -7.0501,  -1.3721,
         -3.4453,  -0.6349,  -3.6119,  -7.8930,  -0.3104,  -2.2316],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1925, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.6039, -7.1105, -4.9921, -2.6968, -5.0703, -1.2009, -0.8072,  4.1665,
        -6.8707,  0.4150, -1.2630, -0.4341, -5.2967,  1.4099, -3.1706, -4.0074,
        -3.0287, -0.6753,  0.0587, -4.2184], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2698, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.4054,  -0.9090,  -0.3154,  -2.3796,   4.7365,  -2.8750,  -1.1930,
         -4.8164,  -0.1980,  -3.0788,  -0.0514,   3.0479, -10.5750,  -1.2006,
        -13.3574,  -3.3756,  -8.6791,  -0.8377,  -9.2095,   3.2666],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6203, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.3849,   1.8318, -11.8245,  -1.7798, -18.1674,  -8.0797,  -4.2671,
         -6.1978,  -3.5117,  -3.4435,  -3.1521,  -7.0124,   0.0488,   1.4152,
         -6.8315,  -2.8791,  -1.2048,  -5.6705,   0.3923,   1.5399],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0089, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-36.3452,  -7.8931,  -5.9253,  -6.7236,  -1.0192,  -1.8542,  -1.4673,
         -4.0824,  -0.6838,  -1.7606,  -4.1235,  -1.4635,   2.3199,  -2.4146,
         -4.0392, -12.9013,  -7.4136,  -8.4393,  -5.8540,  -7.5989],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9841, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.7657,  -6.6782,  -4.6674,  -4.6161,  -7.4231,  -0.3500, -19.1651,
          0.8281,  -3.8407,  -6.4452,  -5.0998,  -5.0028,  -6.7542,  -5.0564,
          1.8955,  -5.4911,  -0.3599,  -2.9755,  -7.1438,  -1.6232],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9367, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6062, -4.6195, -4.1766, -1.4208, -0.6526, -2.2327, -0.5079, -5.9079,
        -1.2279, -7.5664, -3.2104, -7.2616, -5.3228, -1.8012, -3.5851, -1.0523,
        -1.4121, -1.5399,  1.6457, -2.7570], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0108, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7800, -2.5520, -3.6017, -1.4261,  2.9153, -3.2571, -0.1433, -0.9668,
        -0.6041, -4.2791,  4.7310, -2.9466, -1.4330, -3.0617, -6.2992, -2.6707,
         1.1285, -1.6614, -0.7632, -4.0272], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6349, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4919,  -2.5746,   2.1008,   3.0028,  -3.1877,   0.2485,  -3.1870,
        -12.7031,  -4.7302,  -0.1915,  -5.8134,  -3.8863,  -3.3906,  -1.8305,
         -4.6556,   0.8079,   3.7385,  -1.9733,  -2.4037,  -4.3579],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3739, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.8176,  -3.5063, -11.4872,   3.9471,  -6.7690,  -1.7376,  -2.2901,
         -1.7591,  -4.2270,   2.6252,   2.4580,  -6.1926,   0.1168,  -3.5465,
         -1.6648, -11.7360,   1.9072,  -1.1171,  -4.3294,  -0.4590],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8792, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.1513, -1.7659, -1.0870,  0.5461,  3.0992, -4.4087, -2.2162, -3.4173,
        -0.4107, -2.8129, -4.5514,  0.7469, -2.3535,  0.0303, -1.1861, -5.2354,
        -0.8506,  0.5325, -2.9605,  0.6073], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3271, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2247, -3.2445, -9.9789,  0.3231,  2.2529, -2.5595, -1.6151, -3.1972,
        -1.1903, -7.1734,  0.3480, -0.4069, -2.3941,  0.1991, -2.2156, -0.8334,
        -4.7267,  2.2399, -3.7938, -4.9351], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2063, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.3163,  -2.9874,  -4.6120,  -3.3864,  -2.0867, -26.5013,  -3.2142,
          0.3025,  -2.1220,  -1.6238,  -2.8711,  -2.0903,  -6.3725,   3.9086,
         -0.7331,  -1.7562,  -0.4668,  -0.3086,  -2.7242,   1.9316],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1015, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.8047,  -3.2402,  -3.1121,  -1.0671,   1.0294,  -4.3250,  -3.7929,
         -2.6071,  -3.1457,  -5.0913,  -1.6301,  -3.9487,  -5.3639, -11.0377,
         -3.4602, -17.8086,  -4.2332,  -8.7602,  -0.8968,  -1.1661],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2231, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1230, -0.2357, -3.1433, -2.5498,  3.7313, -3.2190, -1.6515, -1.6334,
        -1.0284, -3.4155,  3.4670, -2.4838, -0.1567, -2.1676, -0.8306, -5.2322,
         3.0330, -5.0242, -0.6457, -2.4122], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3361, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.6185,   3.1031,  -4.3149,  -1.4447, -10.2136,  -4.3958,  -2.9826,
         -6.4327,  -2.0863,  -1.5284,   3.4720, -16.0547,  -2.5323,  -4.0758,
         -7.3307,  -2.5852,   3.7302,  -2.7090,  -4.6076, -21.2247],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3416, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.3667, -1.4879, -1.6418,  0.4808,  3.6580, -3.4863, -2.3316, -3.1427,
        -1.4925, -3.0960, -1.5444,  0.5013, -3.2514,  0.9944, -1.1209, -2.2120,
         0.1422,  2.2420, -1.6281, -0.9299], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9490, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5824, -1.6187, -0.2785,  0.1547,  3.7376, -1.3489, -1.2051, -3.5158,
        -3.9300, -7.5528, -3.9052, -0.7970, -1.6580, -0.5266,  0.3474, -1.8772,
        -2.3972,  4.6445, -2.6668, -2.8687], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4922, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.4187,  2.4334, -2.3306,  0.3691, -0.6466, -3.7689, -0.2934,  2.4372,
        -2.6554,  0.3066, -3.3518, -2.5192,  2.1947,  3.2525, -1.7629,  0.0846,
         0.0479, -0.8550, -0.7209,  5.4165], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.0472, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6768e-01, -4.4902e-01, -3.5257e+00,  1.7627e-01,  1.6938e+00,
        -3.8601e+00,  3.0871e-01, -5.1109e+00, -1.7729e+01, -5.4476e+00,
        -8.1398e+00, -5.8889e-01, -5.7010e+00,  5.5035e-01, -4.2468e+00,
        -2.7905e+00, -2.0559e+00,  1.3661e-02, -4.8957e+00,  1.4330e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0417, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1320, -3.7299, -1.5653, -1.9143, -1.3456,  0.7857,  4.6648, -1.7003,
         0.5443, -2.4291, -2.6247, -4.9839, -1.3367,  0.1270, -4.0360, -0.5129,
        -1.1925, -8.0768,  0.6592,  1.8051], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4497, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7971, -1.8885, -1.8249, -5.5031,  0.6339,  2.9833, -3.2718, -0.9457,
        -2.2085, -0.5750, -4.8218,  2.9931, -4.1960, -2.2815, -1.1402, -8.3392,
        -0.2907, -1.8305, -3.3412, -3.9652], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1805, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.4641, -1.0228, -3.6132, -0.1042,  1.0958, -2.3776, -0.7581, -2.9077,
        -0.4746, -3.0924,  2.1108,  0.8349, -2.8179,  0.1340, -1.0786, -1.4063,
        -1.1456,  5.2912, -1.8501, -1.1365], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6927, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.4953, -3.2619, -0.7185, -1.4942, -0.5708, -3.7735,  4.7148, -4.4200,
        -0.8868, -2.5362, -2.0093, -4.5682,  2.8770, -0.8302, -0.3558, -0.6850,
        -2.9647,  0.0743,  4.4530, -3.1709], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9316, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9374, -4.1199, -0.9399,  2.1477, -4.7080, -1.7475, -3.3075, -0.8610,
        -2.8763,  1.6896,  1.7463, -2.8736, -0.8036, -2.6179, -3.5709,  0.2620,
         2.4551, -8.6167, -4.7340, -1.6120], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8513, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.8022,   2.8677, -18.5715,  -0.8611,  -1.6168,  -1.7494,  -5.0606,
          3.0237,  -1.9594,  -3.4233,  -7.2090,  -6.6369,  -2.9429,  -8.0083,
         -3.4441,  -2.4214,   3.3230,  -6.3482,  -0.9882,  -1.2154],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2022, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.9239, -8.9050, -0.8562, -3.2457, -6.3023, -1.0168,  1.9000, -8.5930,
        -2.2112, -2.6329, -3.8569, -0.5594, -0.8640, -9.5548, -0.9764, -0.9852,
        -3.3829,  1.9251,  3.6813, -2.5920], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2552, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8819,  1.7787, -4.6634, -1.4949, -6.9649, -0.8353, -3.1811, -3.8287,
         0.2464, -4.4922, -5.0709, -3.1541, -4.7418, -4.4817, -0.4431,  1.7046,
        -1.4449, -0.7257, -1.7778, -3.5996], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4026, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.4012, -2.6743, -2.1029, -7.5134,  4.4733, -2.3102,  0.1708, -1.1582,
        -0.0996, -4.1618,  1.8220, -3.7614, -2.6269, -1.5898, -1.5745, -1.9715,
        -4.1763,  4.8042, -4.5225, -0.1707], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4371, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3943, -7.2496, -2.6541, -2.5578,  3.2493, -3.4715, -2.2827, -3.7367,
         0.0125, -3.7524,  1.2970,  3.5551, -2.9844, -0.3789, -4.3328, -2.3033,
        -5.2421,  0.5293,  1.1355, -4.8746], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0718, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1346, -1.0261, -5.3108, -1.7115, -1.5224, -3.2252, -0.6452, -0.2271,
        -0.3834, -1.3345,  5.1986, -1.3500, -0.0541, -1.3746, -1.3329, -7.6536,
         1.4453,  0.7204, -1.8478, -1.1686], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3469, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9389, -5.5469,  0.1348, -0.9934, -3.1973, -0.6113, -2.6342, -0.5885,
         1.1638,  2.4995, -3.0504, -2.6660, -3.2668, -1.2899, -3.2855, -1.2036,
         1.4613, -2.2250, -0.3878, -1.2975], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4462, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8103, -6.1985, -2.1349, -0.1071, -5.5011, -6.6555, -3.1889, -4.7044,
        -5.4048, -3.1543, -1.7930, -6.1835, -8.1818, -1.0437, -2.2347,  0.1562,
         4.7113, -5.6694, -1.0474, -2.0433], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1095, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8421,  1.5768,  1.2690, -2.4262,  0.4641, -0.8843, -0.1287, -0.7446,
         4.2252, -1.5466, -2.8582, -0.6913, -4.2961, -0.7182,  4.1473, -0.9339,
        -0.2111, -2.8123,  1.3126, -7.6492], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8874, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.3157, -2.1658,  0.7513, -1.5555, -0.8464,  0.7416,  3.9647, -1.7209,
         0.1694, -3.1467,  0.1888, -6.9709,  1.0067, -0.0462, -3.1817, -1.4607,
        -3.6263, -3.8327, -6.1337, -0.9517], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4566, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4781,  1.7576,  3.9573, -1.6967, -0.8145, -0.3490, -5.7025,  1.0294,
         4.2132, -4.4953,  0.4187, -1.8192, -2.5253, -4.6210,  3.9586, -3.3771,
        -0.5708, -4.0474, -0.1389, -4.2424], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0772, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.9426, -0.4625, -3.9553, -1.3933, -2.6461, -1.0955, -8.8431, -0.2582,
         0.2078, -7.2873, -1.6507, -1.0758, -3.8970, -0.8626,  0.8324, -4.5505,
        -0.1505, -4.1613, -0.9695, -5.9981], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3637, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.6116,  -2.9031,  -2.2481,   2.7347, -16.4187,  -4.2450,  -1.7981,
         -1.7746,  -3.4915,   3.2544,  -1.8083,   0.4029,  -2.4447,  -2.5880,
         -3.3886,  -1.1964,   1.6222,  -2.5998,  -0.8748,  -1.7377],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.4557, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.7450,  -1.9202,  -6.8693,  -0.6809,  -3.9745,  -4.5352,  -1.8233,
         -4.1344,  -4.1762, -12.0904,  -4.0716,  -5.2330,  -3.5407,  -1.2742,
        -33.7332,  -5.6718, -15.2209,  -2.2842, -17.0005,  -8.7298],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.8855, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.9450,  -5.8589,   1.3988,   1.7891,  -3.4530,  -1.1839,  -1.2378,
         -6.1973,  -4.9332,   0.9038,  -4.2453,  -4.2828, -11.1685,  -4.3577,
         -4.3757,  -2.6869,  -6.3861,   1.6024,  -2.5966,  -9.2819],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3748, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.5882, -5.9820, -1.7980, -2.2663, -3.9168, -1.5247,  1.8466, -2.8426,
        -2.2203, -3.2936, -2.5289, -8.6906, -2.2392, -1.0890, -3.4664, -1.3360,
        -0.9144, -1.5084,  0.4642,  3.0750], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9822, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7437, -1.3697, -3.2506, -4.4797,  0.4138,  3.0327, -3.6492, -1.1726,
        -2.6627, -1.4144, -1.4893,  2.7702, -3.4227, -2.7045, -4.6189, -3.2071,
        -4.9113,  0.0400,  2.0340, -4.0574], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9931, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9225,  -0.4545,  -7.9485,  -0.0223,  -2.4691,  -1.9072,   0.1073,
         -2.5014,  -1.2143,  -3.9172,   1.5289,  -0.2890,  -2.5125,  -0.8948,
        -16.8026,  -3.3496,  -5.7292,  -1.2152,   0.3346,   2.5262],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.4826, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.0335,  -5.3368,  -1.6243,  -6.8061,  -0.1014,   2.7477,  -6.3321,
         -3.2895, -14.8102,  -5.4467, -11.0788,  -6.8554,  -3.3004,  -3.7560,
          1.4890,   4.5044,  -2.1086,   0.1147,  -6.3004, -12.9661],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2145, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1658, -0.2068, -8.0427,  1.6057, -1.4176, -2.2991, -2.0116, -1.5177,
        -0.7558, -0.9528,  3.5809, -1.0281, -0.9949, -3.3515, -2.3439, -4.3474,
        -0.3367, -3.3847, -5.0677, -1.2875], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8663, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3675,   0.3117,  -3.9540,  -2.5176,  -2.1418,   4.0752,  -1.8830,
         -2.0854, -18.7839,  -2.2783,  -8.6360,  -2.3643,  -4.0068,   3.1591,
         -3.7262,  -9.2326,  -3.4250,  -5.3543,  -1.6786,  -3.0815],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5485, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8352, -1.2564,  0.8572,  3.8526, -3.9061,  0.7921, -1.7741, -3.4195,
        -4.0823,  0.7845,  0.4254, -2.7375,  0.5103, -0.5646, -5.2336,  0.5004,
         1.0667, -3.4156, -0.1112, -9.6879], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4617, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.5281,  -2.5026,   0.2196,  -6.6638,  -1.4644,  -1.1610,   2.1919,
         -2.4521,  -6.0819,  -1.0427,  -1.5760,  -5.5166,   1.8621,   2.0844,
         -5.3042,  -2.0619,  -0.1178, -25.0773, -13.7955,  -2.4690],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6229, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.6200,  -2.6631,  -4.6463,  -0.5716,  -1.9752, -10.3721,  -0.2521,
         -0.6980,  -3.2756,   0.8141, -13.3296,  -5.0102,  -5.9808,  -4.8028,
         -0.9946,  -5.7958,   1.8698,  -3.7748,  -1.9324,  -0.8333],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2422, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4234, -0.5500, -4.5710, -0.6278, -0.9402, -2.2766, -0.2827, -2.2853,
        -1.6388,  0.2415,  3.6829, -4.4666,  0.2648, -1.1525, -2.8958, -0.6869,
         3.5466, -1.9469, -1.5321, -3.4113], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4476, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2978, -4.5688, -2.2240, -2.3857,  0.2432,  3.5566, -3.5606, -1.7976,
        -5.7299, -6.3458, -3.5458, -4.8631, -4.8659, -1.0160,  1.8891, -5.7593,
        -0.9829, -4.0377, -0.5229, -4.4685], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8642, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.6344,  -2.0664,   1.4719, -10.1109,  -4.0269,  -0.7068,  -3.7082,
         -3.8554,   0.2538,   1.1841,  -5.0672,  -2.8426, -12.7528,  -5.3795,
         -2.7271,  -4.5932,  -1.2307,   0.5851,   3.9155,  -7.0971],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0194, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.3474, -3.4152,  2.9057, -1.8085, -2.3312, -0.6623, -1.2054, -3.9637,
        -0.1427,  2.0817, -1.5025,  0.5426, -0.7338,  0.1949, -1.9252,  2.7841,
        -5.1089, -1.2826, -4.6090,  0.2658], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0132, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.9891,  -1.1811,  -2.6224,  -0.5034,  -2.8600,  -1.1160,  -2.7711,
         -3.5193,  -3.3276,   0.5412, -12.0154,   1.5597,   0.9994,  -4.1045,
         -1.3654,  -1.5073,  -1.0473,   0.5415,   3.7463,  -1.2156],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.8379, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.1452,   1.1944,  -2.7088,  -0.2150,  -0.6806,  -2.7464,   0.9758,
          1.7144,  -2.7825,  -0.6332,  -3.7160,  -1.7523,  -8.7039,  -1.3228,
         -0.3392,  -4.1287,  -2.4419,  -1.3549, -13.7730,   1.4408],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.9914, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3453,  0.1995, -4.0189, -1.9747,  2.0436, -3.6441, -9.0518, -8.2944,
        -3.6207, -3.0074,  0.6912, -0.6058, -2.8237, -3.7731, -1.2690, -2.7209,
         1.1313,  2.6279, -1.7198, -0.0903], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2633, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  3.8375, -17.0857,  -1.4539, -17.8245,  -9.2895,  -0.6432,  -3.4647,
         -1.5518,  -6.1827,  -1.1165,  -3.4048,   0.2785,   0.5901,  -1.7141,
         -0.9774,  -1.5697,  -5.8318,   1.1320,   1.4599,  -3.4255],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4119, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2615,  -7.5491,  -8.9242,  -2.7304, -16.0909,   1.5014,   0.8975,
         -3.6440,   0.2635,  -4.7798,  -3.2903,  -1.6026,   0.5594,  -5.1675,
         -4.8893,  -3.3769,  -1.7441,  -5.6745,  -1.9136,   2.0345],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4691, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3483, -0.2329, -3.6250, -3.9121, -6.4559, -1.8538, -1.5107, -5.7081,
        -3.7401, -1.0982, -3.7678, -1.0394,  2.1014, -4.4151, -0.7725, -4.0940,
        -4.0112, -3.1131,  1.5909, -6.0382], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7522, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6722,  0.6275, -2.1529, -2.2209, -0.6431, -3.0681, -5.6815, -1.0226,
         1.6542, -2.3342, -0.4063, -4.2378, -1.6458, -2.7268,  0.5842,  1.2860,
        -2.1125, -0.2693, -3.4344,  0.0288], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6224, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.1968, -1.9439,  0.1333, -0.4418, -2.4380,  0.7362,  3.5841, -2.6590,
         0.9514, -2.4590, -0.8580,  1.1055,  4.3609, -6.4018, -1.4350, -0.8662,
        -0.1768, -4.9457,  3.4848, -9.2924], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8682, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5471, -2.0644,  1.7314,  4.4319, -3.2345, -4.1988, -3.0910, -1.9325,
        -3.7318, -0.5133,  2.4482, -2.5828, -1.2353, -4.0666, -2.5387, -0.1094,
         3.0023, -3.0480, -2.0199, -4.2832], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4792, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.2601,  -1.2530,  -4.0658,  -2.6718, -14.8565,  -3.4076,  -6.4478,
         -5.8638,  -3.7186,  -3.8861,  -1.1583,   3.3396,  -4.2255,  -2.2896,
         -2.0076,   0.1601,  -7.6670,   2.6106,  -5.5979,  -4.0810],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6174, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.2904,   3.7032,  -4.9381,   0.5864,  -2.5726,  -1.3056,  -9.1582,
         -1.3596,  -0.4858,  -3.9703,   0.2515,  -1.8997,  -0.4170,   0.3768,
          4.7336,  -1.5805,  -1.6344,  -9.3756, -16.0722,  -5.5763],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.4702, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4827,  -1.3511,  -0.8022,   4.5784,  -3.0953,  -3.3155,  -9.2218,
         -7.6909,  -2.8475,  -4.8648,  -1.4558,   0.0632, -10.6401,  -7.4660,
         -1.1317,  -2.1676,  -3.7980,  -0.2599, -22.3625, -11.7793],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6046, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-12.2343,  -6.5022,  -8.8010,  -6.1585,  -5.3221,  -1.0193,   0.7738,
         -2.1139,  -2.0693,  -2.0440,  -3.1374,   0.3787,   3.7742,  -2.0417,
         -0.2872,  -1.3080,  -1.9786,  -3.6784,   3.1709,  -2.8395],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6719, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.4451,  -1.6719,   0.5866,  -4.4437,  -0.4516,   0.4966,  -5.4772,
         -3.3975, -14.0810,  -6.2115,  -4.7068,  -6.3440,  -1.9842,  -3.2671,
         -0.1080,  -4.0848,  -3.0311,  -2.8072,  -4.6566,  -3.2349],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6661, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3337, -0.9824, -2.4253, -1.1274, -3.5706, -0.3034,  2.7050, -3.5352,
        -0.1259, -1.1217, -4.6264,  1.5314,  2.5938, -5.2507, -1.7142, -2.0795,
        -2.0060, -0.0502,  4.1648, -3.6072], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1932, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.5370,  2.1760, -4.1769, -1.4651, -1.1186,  0.5684, -0.7683,  4.6290,
        -1.6728, -5.4864, -2.6911,  0.2691, -5.0213,  1.7318,  1.4781, -2.3989,
         0.0342, -2.1394, -6.8508,  0.6421], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0862, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9838,  4.4984, -1.4910, -3.3393, -3.4206, -0.4671, -2.6961, -0.0552,
         2.2900, -2.4039, -1.1983, -1.9281,  0.2975, -7.3678, -8.1291,  2.1584,
        -2.4748,  1.0290, -2.1359, -1.1353], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5977, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.6992,  -2.9229,  -4.1729,   0.6338,   3.7793,  -2.2601,  -1.9515,
        -13.6422,  -4.8315, -11.3579,  -8.4301,  -4.5307,  -1.0754,   1.2659,
          3.9041,  -5.2737,  -0.5801,  -1.3505,  -0.7916,  -4.0476],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9167, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.6280, -2.8336, -8.1521, -1.4185, -1.7773,  2.3545, -7.3007, -3.0559,
        -0.9487, -1.6615, -6.2033,  1.1598,  0.6629, -3.9349,  0.3746, -4.0824,
        -6.3361,  0.8530,  1.4828, -3.0592], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6252, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4300e+01, -6.7605e+00, -2.5321e+00, -2.5297e+00, -8.3424e+00,
        -4.4590e+00, -3.6242e+00, -6.1268e+00, -4.2940e+00, -7.2855e-01,
        -4.3644e+00, -9.4516e-01, -1.6694e+00, -1.9934e+00, -9.9673e-02,
         1.4166e+00, -5.4263e+00, -9.8526e-04, -2.5972e+00, -1.7694e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5573, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6797e+00, -2.6531e+00, -3.8091e+00, -2.9050e+00, -3.2244e+00,
        -1.2099e+01,  2.3428e+00, -5.2870e+00, -6.1125e-01, -1.0250e+00,
        -7.2742e-01, -3.5131e+00,  4.0931e+00, -1.8108e+00, -2.1519e+00,
        -3.7918e+00, -2.7511e+00, -1.8637e+00,  2.3504e+00, -5.1249e-03],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1561, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.0977, -5.9787, -0.9229, -2.3678, -8.5652,  0.9823,  2.4357, -5.8504,
        -1.9200, -6.2800, -6.7103, -5.8961, -4.8153, -1.1167,  0.7493,  1.3722,
        -4.4766, -1.8430, -0.9638, -5.8205], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7445, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  4.0529,  -2.3978,   0.3260,  -2.2673,   0.3389,  -3.0840,   3.6966,
         -7.1087,  -1.7246,  -2.3491,  -1.6553,  -4.2365,   3.3985,  -2.0103,
         -3.9137,  -3.7459,  -2.6051, -10.5249,  -5.7834,  -6.4271],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.4010, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.6719, -14.8527,  -3.5038,  -5.4466,  -0.1781,  -6.2502,  -1.2093,
         -3.8206,  -0.7741,  -4.9200,  -0.7618,  -4.9122,   0.9003,  -1.7365,
         -2.2165,  -1.5120,  -3.4856,  -5.3998,   1.0780,   3.6850],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9494, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.5098, -4.0595, -1.3882, -0.6649, -4.1938, -0.5415,  2.4667, -2.8460,
        -1.1316, -3.7474, -0.4223, -5.3777, -0.9920,  1.5133, -2.9716, -0.2082,
        -2.1930, -3.1041,  0.0200,  3.8270], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2253, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.4654, -4.5058, -2.3049, -2.5942, -2.5049,  0.4561,  3.8352, -3.7210,
        -0.8427, -2.8194, -1.3571,  0.9599, -0.0653, -3.5661, -2.7727, -1.9468,
        -4.0119,  0.6983,  4.1430, -4.6600], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3057, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.7887, -3.8255, -7.7912, -0.2678, -7.4066,  0.4129,  1.7405, -1.7883,
        -8.1034, -4.8074, -4.1334, -4.0169, -1.9186, -1.0203, -6.4945, -1.9858,
        -0.9647, -3.3737,  1.1045,  3.5729], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5139, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9281,  -2.0957,  -6.2495,  -9.1974, -25.0516,  -8.5755, -16.5188,
         -8.1632,  -7.2514,  -8.2398,  -0.6799, -18.2568,  -4.8294, -12.4333,
         -1.3971,  -2.6495,  -1.9072,  -1.3833,   0.4951,   1.4582],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.7927, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0457, -2.2880, -0.9493, -4.0496,  1.0876,  3.3407, -1.3190, -0.7681,
        -1.7609, -4.3615, -1.0631,  1.9454, -3.5217, -1.6903, -4.6654, -1.8544,
        -8.9507, -1.9601, -2.2474, -3.9981], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0060, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5843, -3.6084, -1.2466, -4.4325, -0.6044,  2.0202, -1.3851, -0.6418,
        -3.2858, -2.9323, -4.7443,  0.9672,  0.4459, -4.3669, -1.0791, -0.2342,
        -2.3986,  0.0507,  2.4289, -1.3577], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5495, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 4.4992, -5.0885, -2.8973, -4.4277, -5.6301, -3.2825, -0.9439,  2.2059,
        -6.0296, -1.7841, -2.2302, -2.1468, -2.0886,  3.8916, -2.1176, -0.2568,
        -2.3801, -0.6190, -3.1283,  4.9884], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4733, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2866e+00,  9.3582e-01,  4.3538e+00, -2.8544e+00, -8.0363e-01,
        -3.0552e+00, -2.1522e-01, -4.7035e+00,  1.6843e+00,  2.1862e+00,
        -3.4825e+00, -1.8734e+00, -1.5098e+00, -3.1984e+00, -6.6233e+00,
        -7.0823e-01,  1.4220e+00, -2.1815e+00, -5.7139e-03, -2.0136e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.2466, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.0061,  -8.9672,  -5.6227,  -4.6691,  -1.9290,  -0.7007,   2.0955,
         -7.1487,  -2.0656,  -2.2497,  -5.9948,  -1.7871,   4.0956,  -1.7037,
         -1.5342, -14.6908,  -5.8402,  -3.4989,  -6.2375,  -0.7011],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8078, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.5761,  0.8682,  2.9906, -3.3195, -1.4767, -1.2415, -5.5192, -0.5747,
         2.1471, -3.9012, -0.3823, -2.8213, -2.6574,  0.1109,  3.1618, -1.6521,
        -1.3225, -3.2463, -0.7934, -5.8193], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3013, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.4466, -1.4527, -2.5766,  4.6300, -3.0447, -0.9441, -3.8072, -0.2501,
        -4.8290,  2.2862, -2.4191, -2.7445, -0.4255, -1.0207,  0.4227, -6.9962,
        -0.1535,  1.1608, -2.7403,  0.7496], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2301, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9026,  2.0730, -1.4586, -3.5462, -3.2506, -2.3389, -4.0301, -2.3806,
        -0.2810, -3.3735, -1.0126, -8.5236, -1.1384, -0.0351,  4.7662, -4.4282,
        -6.0807, -3.6030, -1.8972, -3.3426], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2392, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3352,  -2.7260,  -1.9726,  -4.2438,   0.8045,   2.1617,  -2.2038,
         -0.5496,  -2.8216,  -4.3493,  -0.9702,   3.8119,  -2.5492,  -1.8937,
        -11.1601,  -6.2411,  -5.7911,  -5.3646,  -3.5025,  -1.6618],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7279, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.6049,  -1.0133,  -0.0994,  -0.4659,   4.9920,  -2.8973,  -0.9450,
         -1.3392,  -4.9316,   0.6681,   3.4269,  -2.2669,  -1.9613, -15.0792,
         -7.4497,  -4.4367, -10.6374,  -6.8862,  -0.2899, -39.1329],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5070, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.7240, -11.9101,  -5.8647,  -7.8212,  -5.4707,  -4.6268,  -0.9605,
         -6.3920,   3.1809,  -3.3255,  -4.5674,  -1.3995,  -3.2987,  -2.4529,
         -8.0017,   3.9967,  -2.0181,  -0.8777,  -2.1122,  -2.3253],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5486, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.6103,  -0.9268,  -1.3273,  -4.8574,   1.1199,  -0.3369,  -3.1882,
         -1.2601,  -2.6367,  -0.8141, -10.5646,   4.6975,  -1.6416,  -0.3789,
         -2.2487,   0.3860,  -2.3715,   2.7327,  -3.9296,  -0.2936],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.6725, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.9561, -4.3681, -0.2141, -3.4862, -3.2391, -6.9333, -0.3355,  1.3531,
        -3.0156,  0.8080, -2.2970, -4.7620,  1.0944,  2.6113, -2.1668, -0.0173,
        -0.9311, -4.5535,  1.2662,  3.9448], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0643, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7868, -2.5699, -0.5994,  3.2695, -0.9740,  0.3904,  0.3386, -2.6583,
         0.8792,  4.1795, -4.7730, -0.0262, -3.1578, -3.3941, -0.1567,  2.6571,
        -2.8788, -0.6156, -1.9198, -2.0109], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8904, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.8469,  -2.8532,  -0.3762,  -0.6651,  -1.9642,   3.4296,  -2.2756,
          0.2616, -10.2954,  -3.7274,  -6.8070,  -1.3303,   0.3996,   3.6012,
         -3.0472,  -2.4654,  -0.9942,  -3.2305,   1.0200,   4.3692],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.6399, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0960, -3.8545, -5.6108, -0.6109, -1.2081,  3.4537, -5.6158, -0.7481,
        -2.6035, -3.7399, -0.5684,  3.3211, -3.6372, -0.8334, -2.8062, -1.1219,
        -6.8197, -0.2079,  1.7764, -2.6899], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0611, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.0929, -4.5399, -4.0300, -2.6273,  1.5221, -5.6804, -2.2746, -1.2250,
        -2.6670,  0.1756,  4.2438, -2.5161, -0.3637, -4.0257, -3.1849, -1.8172,
         1.4043, -2.4647, -0.6768, -1.9352], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5795, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2146, -2.1628, -2.6353, -5.1405, -6.5649, -1.2955, -7.1469, -4.5596,
        -3.7622, -0.9033, -2.2610,  1.8278,  3.2189, -7.0464, -1.4314, -0.9976,
         0.2544, -5.4477,  4.1936, -3.4199], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4247, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5804, -0.9564, -4.5727,  1.4275, -3.4159, -2.1016, -1.6301, -1.6938,
        -0.9361, -0.2971,  4.3456, -1.0857,  0.1334, -1.3417, -1.5726,  0.3266,
         5.2239, -4.7003, -0.5847, -1.7448], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8878, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3246,  -7.1619,  -1.1314,  -2.0552,  -4.6802,  -3.1107, -13.7833,
         -6.5132,  -6.5075,  -6.4166,  -1.2583,  -1.2267,  -0.1736,  -3.2959,
         -5.7168,  -1.8582,  -4.0666,  -5.0842,  -0.9504,  -0.8206],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9568, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9827,  3.0353, -3.8745,  0.3040, -3.9023,  0.3330, -4.1928,  0.4028,
         2.1154, -1.6100, -1.5987, -1.3982, -4.4011,  1.1568,  4.2421, -3.0232,
        -0.3260, -3.3364, -5.6945,  0.1224], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1814, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2396,  -0.4175,  -5.9289,   0.5483,   1.9451,  -2.8617,  -0.4518,
         -2.3568,  -2.0951, -10.3230,   1.3769,  -0.7747,  -2.5333,   0.2604,
         -0.8629,  -3.6125,   1.4822,   1.9582,  -2.1680,  -1.1416],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.5598, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  3.4329,  -1.5869,  -0.1453,  -1.9000,  -1.1084,  -3.7223,   0.6204,
         -1.0292,  -4.1359,   0.6007,  -3.5248,  -1.5866,  -4.1588,  -2.6912,
        -10.5024,  -7.5303,  -5.5516,  -6.2728,  -7.1404,  -8.2874],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3110, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.0464, -2.4974,  0.4809, -1.1241, -3.6163,  1.4553,  2.1516, -3.2373,
        -1.1681, -4.3450, -0.9637, -5.0201,  2.1234,  1.9774, -1.7691, -1.2108,
        -1.1331, -2.5813, -1.1115,  3.6894], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8427, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.0430,  1.7721, -5.1421,  0.0086, -2.1278, -2.8043, -1.5698,  4.9368,
        -3.4637, -1.1397, -2.0577, -2.9232, -3.4556,  0.5541,  3.0123, -1.8751,
        -0.2353, -3.0724, -2.3106, -5.6701], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3260, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.5930,  -1.6749,   0.7335,  -0.8050,  -1.2585,   1.5151,   3.8485,
         -3.2822,  -0.9136,  -4.0142,  -2.0432, -11.7308,   5.0841, -11.9964,
         -0.4739,  -1.9824,  -8.4500,  -1.5762,   0.0799,  -7.2219],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3878, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.7537, -2.4269, -4.7143, -1.4851, -3.1603, -0.6441, -4.2260,  1.3693,
        -1.7120, -2.8521,  0.0340, -3.8563, -1.2628, -5.0665, -1.0259,  2.7080,
        -1.8844, -0.2829, -0.5390, -3.6470], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7714, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 4.4290, -3.1615, -0.3478, -3.0064, -4.5701, -8.4321,  2.2041, -6.1149,
        -2.3762, -0.1515, -5.2029,  1.6277,  1.0636, -2.1491, -0.7372, -4.2509,
        -0.8611, -3.2200,  3.3131, -1.3437], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6644, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.6490,  -3.5354, -12.1882,  -8.0531,  -4.2353,  -2.6078,  -7.1184,
         -0.3896,   2.1644,  -4.1309,  -0.8983,  -3.8853,  -0.5094,  -2.2047,
         -0.3851,   3.9237,  -1.3481,  -1.0949,  -2.1456,  -2.1983],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6245, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.8379,   0.8509,  -5.1949,  -1.6764,  -2.0004,  -3.0916,  -2.3324,
          4.8673,  -2.7646,  -3.0842, -21.3910,  -6.5071,  -3.2954,  -5.1538,
         -1.1590,  -0.2054,  -4.3912,  -5.1493,  -3.9797,  -2.6388],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4567, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1834, -1.9706, -2.5886, -4.8836,  0.0955,  4.1730, -2.2481,  0.3213,
        -1.7392, -2.5694, -8.6439, -1.0383,  0.1997, -3.0704, -0.8244, -1.6949,
        -5.8665, -5.2978,  2.4440, -1.6645], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9525, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.5036,  -8.7848,  -5.6169,  -6.2435,  -1.4443,  -3.5632,   4.9134,
         -4.4681,  -1.0032,  -1.0369,  -3.0691,  -2.7361,   0.8141,  -7.4699,
         -1.5895, -10.1778,  -7.7804,  -2.9941,  -6.0767,  -1.5689],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9700, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.6838, -3.5339, -4.2648,  0.6845,  3.5692, -2.1398,  0.6409, -2.7852,
        -1.4155,  1.5980,  4.1789, -3.1650, -0.9494, -3.5293, -4.7492, -9.9253,
        -4.3627, -8.4567, -2.3518, -3.7215], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2681, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-12.1922,  -2.1757,  -3.3894,  -4.7896,  -2.0907,   3.3178,  -6.8935,
         -2.3711, -23.2376,  -6.2901,  -8.6619,  -5.8489,  -7.0297,  -0.5486,
         -1.3804,   2.6426,  -9.7860,  -2.0516,  -2.7634,  -6.2591],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0900, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6587, -0.8995, -9.4301,  1.7465,  1.3589, -3.4466, -1.0753, -2.3488,
         0.2020, -5.5911,  1.5747,  0.7710, -2.7887, -1.9407, -1.3675, -5.0207,
        -0.3016, -5.6830, -4.3777, -0.5268], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1902, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.0678,  -0.5520,  -4.2479,   0.9529,  -0.2969,  -4.4237,  -1.9319,
        -12.5903,  -2.8538, -14.9547,  -0.4541,  -0.7302,   3.2845, -11.3001,
         -3.0279, -16.2301, -11.7335, -21.8988,  -1.7303, -11.9205],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8854, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.4326,  -7.7854,   2.6289,  -0.3987,  -8.9737, -10.5266, -13.4261,
        -11.3158,  -4.7203, -21.1779,   1.5449,  -2.9760,  -7.6415,  -7.1134,
         -9.4526,  -4.1692,  -7.2802,  -1.0277,  -1.8724,   4.5203],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6298, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0091, -0.6612, -1.9340,  0.5657, -4.0065,  0.5963, -1.1698, -2.9711,
        -1.4271, -4.0510, -0.4031, -6.3734, -1.5775,  1.5677, -4.2681, -0.4152,
        -2.3934, -3.8624, -0.3078,  1.9312], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7085, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.4428,  -0.1000,  -2.9539,   3.0623,  -2.0918,  -3.0050, -12.3861,
         -6.8887,  -2.7966,  -4.9958,  -1.5444,   0.7104,   1.4834,  -2.8369,
          0.1175,  -0.7140,  -3.4222,   1.5386,   3.4289,  -1.2955],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.8066, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3505, -4.0626,  1.0028,  2.4508, -1.9583,  0.0988, -3.6620, -7.8135,
        -1.8683,  3.2220, -2.1957, -0.4221, -2.6688, -0.8560, -9.1626, -0.9175,
        -0.0437, -2.2720,  0.7296, -1.8730], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6811, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1222e-01, -2.5816e+00, -3.8166e-01, -5.2746e+00, -1.5217e+00,
         1.7358e+00, -2.3213e+00, -6.4176e-04, -2.6208e+00,  2.2984e-01,
        -6.1632e-01,  3.0111e+00, -2.7684e+00, -1.9905e-01, -3.6408e+00,
        -1.8235e+00, -7.3031e+00, -9.5101e-01, -6.2094e-01, -4.1373e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.5999, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2001, -6.4808, -4.5597, -5.0002, -2.4493, -6.4618,  2.5810, -3.6557,
        -2.7987, -3.5741, -8.4809, -7.8076,  0.6222, -4.2055, -2.7598, -7.0885,
        -6.6168, -4.0020, -6.9846, -2.9834], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4453, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.7474,  -1.5319,  -3.0520,  -3.2594,  -5.5131,  -0.6233,   2.3013,
         -3.8102,  -2.2883,  -7.0854,  -5.6227,  -3.6789,  -4.7615,  -2.3753,
         -3.8702,   4.4508, -13.5748,  -3.0486,  -1.6844,  -5.6208],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1451, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5962, -2.3608, -1.7767, -0.1812, -6.2207, -0.3389,  2.4859, -2.7638,
        -0.8935, -1.4974, -5.7768,  0.1269,  4.1221, -3.5397, -0.7566, -2.9959,
        -1.4206,  0.7999,  4.9444, -2.4440], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1542, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.5987,  0.5015, -2.7467, -0.0680, -1.9596, -1.0084, -2.8482,  3.6041,
        -4.1499, -0.1461, -2.1006, -4.9217, -2.4291,  3.5477, -2.6964,  1.3086,
        -1.2033, -0.2506, -0.3775,  4.2714], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6537, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.5053,  -3.5968,  -0.3764,  -1.4889,  -2.6955,  -0.6432,   2.7674,
         -4.2321,  -6.6103, -12.2479,  -4.6685,  -6.9126,  -0.6073,  -1.5277,
         -1.5462, -11.7645,  -0.9154,  -5.4204,  -0.4586,  -2.6047],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2022, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.5948, -2.3900, -1.3705, -7.8707, -7.0615, -4.7084, -5.9917, -4.3237,
        -6.7448,  0.2399, -0.8193, -3.7703,  0.5385, -2.9275, -3.8755, -1.5597,
         3.0667, -4.5592, -3.1124, -3.0023], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8324, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.9361,  0.5566, -1.8706, -2.0224, -1.4029, -3.2454, -5.6147,  2.3455,
         1.7013, -1.8832, -0.2338, -1.0595, -0.5108, -5.7035,  2.6069,  0.4646,
        -2.8591, -1.8745, -0.6450, -0.7114], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0513, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.5507, -0.5290, -3.9005,  1.6556,  3.3544, -1.2397, -0.4242, -0.6336,
        -4.0844, -0.1148,  2.2620, -3.1680, -2.5621, -2.3835, -0.9585, -6.3973,
         0.8833,  0.1430, -1.3198,  0.3465], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9260, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0842, -3.2086, -4.7190, -1.5262,  2.7058, -1.2473, -0.0684, -1.0782,
        -4.2903,  1.8568,  3.5029, -2.9833,  0.2556, -1.6764, -3.7422,  0.2070,
         3.0240, -2.6014,  1.2470, -1.2492], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8338, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0362, -3.0107, -3.5112,  4.0949, -2.0752,  0.5962, -1.8581, -0.7373,
        -1.4474,  4.8768, -4.3301, -5.0797, -3.1278, -1.4788, -2.4727,  0.5348,
         2.0502, -1.4995,  0.1298, -2.0945], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2238, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5573, -0.4909, -0.5039, -2.1897, -2.0754,  3.3458, -4.8574, -1.5832,
        -2.5343, -1.6252, -5.2879, -4.0450,  1.7299, -2.9609, -4.0754, -1.3087,
        -0.3451, -6.3730,  1.3652,  2.6064], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6383, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9770, -2.1938, -5.1260,  2.0487,  1.3008, -2.5411, -0.6637, -4.4377,
        -2.0260, -5.3845,  1.1249, -1.9034, -3.2369,  0.3472,  0.0270, -1.7362,
         0.5285,  4.0714, -2.9149,  0.3588], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1667, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.9885,  -3.4062, -18.0121,  -3.3518,  -6.1875,  -1.7498,  -5.9759,
         -1.8481,  -0.3549,  -8.3907,  -0.4943,  -4.4464,  -5.7213,  -2.7389,
          0.1667,  -4.9540,  -4.1011, -27.3274,  -4.7966,  -9.5184],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9599, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7660,  2.1279, -2.8930, -4.7233,  0.1025, -1.5195, -3.2383, -1.6360,
         4.5129, -1.5867,  0.5428, -2.9484, -0.1146, -5.0278, -3.0294,  2.3763,
        -2.0205, -2.7624, -2.7713, -1.6207], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6498, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1086, -3.4088, -1.1779, -3.4826, -2.4927, -4.2000, -1.0252, -1.9961,
        -0.5119, -1.1838, -3.8980,  1.1548,  2.3248, -4.8738, -2.6808, -0.5425,
        -3.3131, -1.8246,  3.3545, -2.3233], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7605, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.7170,  -0.6072,  -2.4456,  -0.9023,  -2.5865,  -4.3343,  -0.6478,
         -7.0826,   0.0868,  -3.8977,  -1.7423,  -2.2836, -18.6695,  -4.4733,
         -5.5117,  -8.9982,  -9.7017,  -8.9168,  -0.6539,  -1.1312],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5608, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.0327,  -4.2809,  -5.4976,  -1.5219,  -4.0325,  -4.3100, -10.7783,
         -4.7014,  -9.4461,  -3.7500,  -2.6168,  -0.0830,  -4.0000,  -2.0071,
         -3.4013,  -3.5165, -13.5926,  -1.2256,  -5.5165,  -0.6124],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4962, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.7745,  -5.2993,  -1.1173,  -1.3004,  -6.4477,  -4.4394, -24.7989,
         -7.4477,  -7.1293,  -6.0536,  -2.0078,  -0.9297,   2.7512,  -3.7876,
         -2.3607,  -2.5238,  -4.5955,  -3.9661,  -0.2210,  -3.1074],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5278, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3611e+00, -1.9837e+00, -7.2758e-01, -2.1431e+00, -1.1369e+01,
         2.7770e-01, -1.0421e+00, -2.4218e+00,  4.1723e-03, -6.4725e-01,
        -3.7351e+00,  6.7431e-01,  4.2049e+00, -2.9384e+00, -1.8555e+00,
        -3.9614e-01,  5.2154e-02, -4.1946e+00, -4.2864e-01, -8.9324e-01],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.8462, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.7459,  -4.0962,  -3.1240,  -2.3625, -20.8206,  -1.8609, -10.5967,
         -3.5952,  -2.6726,  -2.6897,  -3.9082,   0.3809,  -0.1751,  -2.2278,
         -2.5870,  -3.9262,  -3.8337,  -3.5121,  -0.1117,   2.2310],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3871, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.0146, -2.4071, -0.3642, -1.1022, -1.6696,  1.8019,  2.7898, -1.7446,
        -2.7151, -1.8614, -0.8931, -4.3903,  2.1877,  1.8811, -4.3857, -1.0845,
        -0.2835, -4.1524,  0.6362,  2.6724], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7535, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.6451,  3.6955, -4.7692, -0.6320, -2.7451, -1.7509, -4.8709, -2.5678,
         0.3562, -2.6621, -1.3517, -4.4023,  0.3111, -7.1762,  2.1267,  2.1697,
        -3.2587,  0.2206, -2.1454,  0.6647], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4717, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.5592, -2.0712,  0.1776, -1.2073, -1.4030,  0.9233,  4.1797, -3.6302,
        -4.7707, -3.5834, -1.4741, -3.5181, -3.7255,  1.9579, -5.7929, -0.1168,
        -1.2471, -0.8906, -8.6673,  3.3253], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4988, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.4757,  2.1634, -3.0316, -0.1225, -1.0497, -2.7209,  1.2833,  2.9034,
        -5.3724, -0.3069, -2.5134, -0.3666, -4.9932,  1.0964,  1.1348, -1.2374,
        -2.2338, -0.5373, -3.6385, -0.3067], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0163, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.6125,  -1.0450,  -2.0473,  -0.8527,  -6.9728, -11.4346,   1.5646,
         -1.2365,   0.4649,  -1.7192,  -2.5214,  -0.4754,   4.5908,  -3.9045,
         -0.9595,  -1.2738,  -0.3207,  -6.0715,   0.4225,   0.0861],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.8159, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.4309, -10.3123,  -4.8066,  -5.4365,  -6.2756,  -1.5229,  -7.8031,
          4.2671,  -6.4780,  -3.2809,  -1.8604,  -1.6401,  -2.3390,   3.5314,
         -5.6188,  -0.4575, -16.1910,  -2.9626,  -7.9856,   0.0211],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0791, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8620, -0.3798, -1.1502, -1.2872, -1.8872,  3.1795, -2.3704,  0.0356,
        -1.7159, -1.8368,  1.1760,  4.1939, -2.5754, -2.4337, -3.0365, -4.2962,
        -0.0748,  3.5238, -2.5375,  0.2401], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8047, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.8674,   2.6713,  -1.9472,  -2.4129,  -1.4726,  -2.2249,   1.6503,
          2.8244,  -1.6941,   0.7982,  -2.3157,  -3.4007, -10.6229,  -2.5606,
         -0.5445,  -5.7905,  -3.8492,  -3.0392, -22.3883,  -3.2117],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9332, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.9699, -3.1205,  1.3270, -2.2909,  0.6974, -8.9370,  2.5554,  0.6620,
        -4.4554, -0.5992, -1.3979, -0.8665, -4.3604,  3.5442, -7.2036, -5.4543,
        -1.6157, -2.3168, -2.9145, -1.0791], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8428, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.9552,  -6.3574,  -2.5986,  -2.1929,  -6.4364,  -0.5841,   2.7770,
         -4.6878,  -5.8966, -17.2489,  -6.8283,  -3.5331,  -5.0684,  -1.9720,
         -0.2051,   2.5782,  -3.7441,  -0.3281,  -2.2010,  -6.5230],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4548, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1710, -4.5641, -1.5989, -9.8544, -4.5324, -9.1531, -9.4863, -8.1881,
        -4.0194, -8.8930,  0.8373, -3.7806, -4.2541, -4.6891, -7.7980, -5.3340,
        -5.0013, -6.8469, -3.8104, -1.3261], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.3232, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5504,  1.5715, -3.0641, -3.2222,  0.5459, -4.2305, -0.0631, -7.8838,
        -0.9720,  1.3075, -2.2664, -1.1483, -2.5232, -6.3575, -1.2907, -0.1631,
        -6.8264, -3.5093, -3.1437, -0.6987], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3744, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6554, -1.0664, -6.3138, -1.1619, -2.8334, -2.6124, -1.9662,  3.9426,
        -4.8206, -2.9092, -2.0429, -6.7457, -3.0158,  0.9482, -3.0210, -2.0659,
        -6.4690, -3.5520, -4.8057, -8.3098], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0738, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4876,  0.2442, -0.8564, -0.2980, -8.8237,  2.0314, -2.6859, -6.2292,
        -0.7147, -2.5092, -5.4232, -0.4395, -1.2834, -4.3101, -4.6146, -1.9165,
        -1.1001,  0.5994,  3.8671, -2.7106], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9330, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5025, -2.2823,  0.0720, -3.6021, -0.5779, -4.4939,  2.1294, -1.8747,
        -7.0634, -0.3211, -3.4467, -5.2897, -3.5036,  0.7802, -1.9743, -1.9481,
        -4.3347, -0.6112, -3.3656,  2.7853], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1212, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0777e+00, -6.3541e+00, -1.7255e+00, -6.9301e+00,  7.7093e-04,
        -1.5234e+01,  3.6875e+00, -6.4432e+00, -1.0726e+00, -2.0652e+00,
         3.6394e-01, -5.5188e+00,  1.8522e+00,  1.3375e+00, -5.8960e+00,
        -2.1504e+00, -1.5812e+01, -4.4364e+00, -8.5848e+00, -3.4230e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2741, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.4146, -2.5333,  4.2478, -8.8263,  0.0089, -3.4249, -2.1966, -1.1563,
         3.0276, -3.1406,  0.1468,  0.0783, -1.5248,  1.0079,  4.6090, -3.9812,
        -0.2092, -0.5540, -2.4566, -3.9527], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0622, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.7182,  -1.4880,   2.5089,  -1.9518,  -1.3505, -12.2508,  -6.9199,
         -7.0136,  -5.8180,  -0.8402,  -0.4024,   4.5861,  -2.7335,  -0.1416,
         -2.2553,  -1.1062,  -0.8582,   1.3314,  -3.0438,  -3.1749],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3320, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.8036,   3.7719, -20.6771,  -2.1069,  -1.0312,  -4.7721,  -0.8149,
        -14.5489,  -9.1130,  -7.1394, -12.4085,  -1.5287,  -8.9829,  -1.4571,
         -4.4575,   1.9990,  -3.4553,   0.5570,  -1.3733,  -4.7934],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5764, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.2390,  -0.5055,  -3.4127,  -1.6960, -13.1946,  -3.9585, -24.4865,
         -2.1439, -11.9069,   4.1415,  -1.9688,  -4.5544,  -6.9526,  -9.7985,
         -4.1165,  -5.9501,  -1.9532,   0.3371,  -4.3691,  -2.8825],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9566, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8706, -0.0675, -4.3829, -0.1476, -3.0853, -0.9544,  1.0779, -3.6503,
        -1.0694, -2.1274, -3.1762, -3.0548,  1.4132, -2.4756, -0.5011, -1.0397,
        -1.2971,  1.7253,  4.0145, -4.0557], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4862, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9083,  -0.2502, -10.0510, -10.3944,  -0.1156,  -4.1346,   0.1852,
         -3.0651,  -1.4787,  -3.4765,   1.9895,  -3.7917,  -1.9068,  -0.5990,
         -2.2953,  -0.5472,  -1.5448,   1.9012,  -2.1247,  -5.9489],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5278, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9126, -0.7057, -3.9916, -1.0138,  3.3125, -3.1703,  0.0917, -1.2256,
        -2.0990, -0.1734,  3.4987, -2.5624, -0.6367, -1.4688, -2.1152,  0.2123,
         4.5575, -3.1722, -0.9641, -7.1218], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0330, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.6340,  3.3732, -2.7097, -0.9167, -0.8346, -0.5635, -0.9518,  2.3419,
        -4.4303, -0.5970, -2.1230, -0.3979, -4.2910,  5.0021, -3.6812, -2.0766,
        -1.2448, -6.5751, -0.8766,  1.4936], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9213, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 4.7548, -3.8819,  0.6429, -2.3139, -2.6021, -4.7013,  0.5715,  1.5853,
        -4.8469, -3.4498, -1.0858, -0.4752,  1.3562,  4.5287, -1.8551, -1.4080,
        -8.3187, -6.2936, -4.8918, -6.7931], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9739, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-16.0993, -14.0473, -11.6779,  -3.7704, -27.9426,  -6.4307,  -4.5550,
         -2.5479,  -3.7105,  -4.2389,  -4.2211,  -4.5083,  -5.5975,  -8.6239,
        -10.8470, -10.1695,  -7.6105,  -9.3972,  -9.6446,  -3.8312],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.4736, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1715, -3.1696, -4.7271, -4.7619, -3.7795,  2.8769, -3.2198, -2.8174,
        -4.3427, -4.8381, -0.4529,  2.6255, -5.5177, -1.8283, -1.5816, -3.8432,
         1.3702,  2.7379, -2.8558,  0.4896], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0403, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.4039,  -2.5397,  -2.8503, -13.1602,  -4.0944,  -0.3917,  -3.7133,
         -2.7017,  -4.2021,   0.9841,  -5.3830,   1.1197,   2.3177,  -1.8296,
         -2.5189,  -3.6497,  -0.7301,  -5.9473,   1.3619,   2.5163],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5408, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7868, -0.9807, -0.9925, -0.6193, -4.0210,  4.8936, -1.1856, -2.1278,
        -3.8245, -3.0785, -6.1792, -0.4171, -1.8548, -4.2032, -1.7012, -2.2217,
        -1.5177,  1.5807,  1.3969, -4.4324], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7136, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1556, -6.0911, -3.2201,  0.3758, -4.3476, -2.7471, -8.2027, -7.3232,
        -3.7474, -6.1164, -1.1759, -0.4069,  1.9236, -1.4062, -2.3757, -2.9127,
        -3.2917, -4.1678,  1.7641, -5.2151], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1420, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.0498, -10.2213,  -4.9418,  -8.3212,  -4.9668,  -4.1623,  -6.6432,
         -3.1168,  -7.8600,  -3.7058,  -6.9432,  -4.7132,  -6.3731,  -2.2242,
         -6.4208, -28.2953,  -4.0663, -11.6143,  -6.2774,  -9.8456],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.3381, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6654, -3.9123, -5.5353, -0.2725,  2.5686, -4.1144, -0.6961, -2.3082,
        -0.5839, -2.1552,  4.4781, -1.6035, -1.9565, -3.6090, -2.4553, -2.0036,
         2.2299, -1.4920, -4.9515, -4.0747], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9056, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.6871,   0.3820,  -1.5928,  -1.6232,   0.5479,   2.9925,  -4.4789,
         -0.7008, -12.9099,  -3.7648,  -9.9384,   0.0846,  -1.3035,   3.3894,
         -7.6569,  -1.4708,  -1.9462,  -2.7747,  -1.1778,   2.4395],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2595, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0915,  -7.5403,  -0.3219,  -6.8141,  -2.4818,  -4.2084,  -0.8846,
         -2.4065,  -0.5568,   0.9903,  -7.5668,  -2.2217,  -3.7759,  -4.5191,
         -1.2938,   1.6149,  -2.7450,  -6.3466, -34.1600,  -8.7322],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9031, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8238,  2.0371,  1.4231, -1.6377,  0.0817, -4.7590, -0.9504, -0.6209,
         4.6907, -6.5073, -1.3342, -2.0799, -0.8709, -5.3530,  1.5086, -0.2044,
        -3.1752, -0.9576, -0.8437, -4.4370], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3907, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5908, -7.1618, -4.4734,  1.7711, -4.1496, -1.4759, -3.3643, -4.8952,
         0.6583, -2.6518, -2.1998, -4.0001, -1.6794, -3.9753,  0.8434,  4.8578,
        -2.1632,  0.0830, -3.8411, -1.9158], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2162, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.5402,  -2.2085,  -3.3881,  -1.6489,  -5.7499,  -7.2958, -10.6055,
         -5.0380,  -0.5172,  -2.1522,   3.3076,  -3.8197,  -0.7257,  -0.7701,
         -2.9034,  -0.1059,   4.2051,  -4.0219,  -1.7957,  -2.7214],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.4748, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-20.9527,  -4.1918,  -1.5423,  -1.7888,  -6.4620,  -5.2428,   0.8093,
         -2.6659,  -0.4167,  -2.6433,  -2.8542,  -0.3807,   4.9143,  -2.4044,
          0.3230,  -2.4136,  -2.2559,  -0.8452,   2.5913,  -8.5877],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8505, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5902, -3.8287, -0.4306,  2.2636, -3.7507, -0.2244, -3.8873, -1.3982,
        -3.8588,  2.0694,  2.5444, -2.1041, -1.1555, -2.4970, -9.8511, -0.4343,
         2.4245, -3.1448, -0.4421, -2.4233], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5860, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3811, -0.5622, -0.9380, -0.0828, -9.0117,  3.5750, -5.3512,  0.3129,
        -1.4608, -3.6349, -3.4372,  0.6797,  2.7548, -4.6769, -2.0331, -3.0234,
         0.4658, -3.6515,  2.2194, -6.4513], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9344, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.6091,  -1.0483,  -2.5658,  -2.6976,  -3.9807,  -0.9413,   2.8539,
         -3.3507,   0.5476,  -1.8902,  -2.3873,   0.3221,   2.5855,  -3.0638,
         -3.9114, -10.6109,  -6.5694,  -4.6403,  -4.5971,  -1.6675],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6111, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1263, -0.5244, -5.9364, -4.3355, -4.6413, -5.2736, -5.5396,  0.3891,
        -6.1401, -3.0700, -6.2393, -9.5583, -6.5803, -5.3168, -6.6506, -1.5813,
        -1.4625,  1.0707, -7.6634, -4.0375], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3109, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.4706, -1.4100, -0.3502, -6.0387,  2.1041,  0.0763, -2.5298, -1.3251,
        -0.6049, -4.9398,  0.0632,  0.8022, -2.9704, -4.2493, -8.7991, -5.6458,
        -3.7507, -4.2731, -2.0689, -0.9718], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3676, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2220,  1.0412,  1.7703, -2.1676, -0.3695, -1.3471, -0.5157, -1.1250,
         4.1929, -2.4781, -0.4057, -2.8906, -5.5946,  0.9552,  1.3620, -6.9595,
        -3.3542, -3.1060, -3.4511, -3.0906], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6378, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.3605, -1.1854, -0.1747, -3.8008,  3.8699, -2.4298, -1.6075, -2.0220,
        -2.4249,  1.3191,  4.3815, -2.3153, -3.3081, -3.6269, -4.3501, -6.1501,
         0.0848,  1.9612, -1.2228,  0.0967], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1633, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3097,  1.3848, -4.3762, -0.6701, -1.4568, -2.1659,  0.7221,  4.3259,
        -5.6378, -0.4027, -1.4840, -1.0216, -4.2788,  2.0838, -1.1208, -4.0668,
         0.2892, -0.7019, -0.0577, -2.8458], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1395, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8670,  0.9683,  3.6574, -2.6989, -0.3216, -3.2787, -3.8048, -1.7544,
         2.8003, -2.6714, -0.3374, -0.4420, -3.1629,  1.8443,  2.1584, -3.8458,
        -6.0892, -1.1665, -3.8439, -7.6221], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6239, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.5010,  -4.0018,  -3.9705,  -2.1020,   1.1064,  -9.7052,  -3.8933,
         -3.5638,  -3.3876,  -9.3272,  -0.0818,   1.4546,  -1.9936,  -6.9108,
        -23.7684,  -4.5864,  -8.9268,  -1.2482,  -2.5052,  -2.1905],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9552, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 4.5679, -3.0648, -0.2224, -3.0846, -4.5658, -8.2680,  2.3383, -5.9570,
        -2.3514, -0.2375, -5.1653,  1.8332,  1.0994, -2.0670, -0.7529, -4.3151,
        -0.9038, -3.0383,  3.5263, -1.3263], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5977, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.5634,  -3.4788, -11.8266,  -8.2082,  -4.2085,  -2.6876,  -6.9958,
         -0.2131,   2.2967,  -4.2470,  -0.8326,  -3.9083,  -0.5240,  -2.1827,
         -0.3571,   3.9768,  -1.2793,  -1.0436,  -2.1395,  -2.1443],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5783, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.1810,  -0.7790,  -2.8051,  -1.6666,  -0.8708,  -5.1731,  -2.8984,
        -17.5029,  -3.4030,  -6.1591,  -0.8850, -13.7035,   1.0606,   2.2170,
         -9.3799,  -2.5128,  -0.6618,  -0.5893,  -4.3332,   2.7679],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3730, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7625, -1.6421, -3.9942,  1.0816,  0.5506, -3.6944, -4.0440, -6.3474,
        -2.2806, -2.0207,  3.9984, -0.8991,  0.1139, -3.2595, -2.9927, -0.9225,
         3.4232, -1.0921, -0.6352, -0.2091], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4314, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.1925,  -3.9626,  -0.6973,  -2.4965,  -2.9224,  -4.5941,   1.5637,
         -4.3277,  -1.0647,  -4.5444,  -1.5179, -11.0960,   2.3500,   0.1592,
         -6.7240,  -2.3925,  -3.7541,  -0.8338,  -3.4436,   1.2497],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3428, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-18.9636,  -2.5517,  -8.8419,  -1.0219,  -0.0829,   3.8787, -11.1299,
         -1.4548,  -3.3545,  -0.0297,  -5.2446,   1.8561,   1.8292,  -2.5209,
         -0.1935,  -2.8794,  -0.9549,  -3.4653,   2.2974,   0.1110],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6359, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.1364,  -4.5091,   0.9035,  -0.7624,  -2.5410,   0.7511,  -2.6623,
         -1.4666,   0.7061,   0.2550,  -3.3212,  -1.7813, -10.5915,  -6.6928,
        -25.9145,   1.1639, -16.1386,   1.1582,  -8.1264,  -7.0673],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3887, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.0238,  -3.0778, -10.2571,  -3.7643,  -4.4678,  -7.2461,  -5.3205,
         -8.6152,  -4.1682,  -1.0417,  -2.1328,   0.3905,   3.9075,  -1.9972,
          0.5942,  -2.0129,  -0.9749,  -1.8782,   4.2482,  -2.7728],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9305, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.9679,  3.6324, -2.4095, -1.6281, -3.0761, -5.0269, -4.7848,  1.2013,
        -5.9429, -0.8781, -1.7998, -2.3931, -0.1080,  3.9359, -1.6370, -0.7900,
        -0.4612, -0.6428,  1.3149,  3.7723], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7877, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5270, -1.4835, -3.0624,  3.5043, -3.7454, -1.3209, -4.3081, -1.3342,
        -3.5592,  1.6668,  1.4925, -1.4176, -0.2910, -1.5582, -5.5850,  0.8505,
         2.2985, -7.3697, -1.8154, -0.9531], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5259, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9560, -2.2382,  1.9757, -2.2777, -2.5784, -0.9934, -3.5595, -4.3865,
         0.1260,  2.4474, -4.6308, -2.0844, -3.6098, -9.9485,  1.1892,  2.9906,
        -8.6191, -1.6060, -2.9599, -0.4513], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1085, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.6106, -12.7752,  -2.6614, -11.4174,  -3.7599, -23.0806,  -1.8359,
        -10.0007,   0.1620,  -8.5175,  -4.3893, -24.3322,  -4.8201,  -7.3421,
         -1.4679,  -6.1847,   2.0296,   1.8659,  -4.6847,  -0.5283],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2676, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.7804, -1.5882,  5.5501, -1.5199, -1.8461, -3.3276, -0.7696, -7.3955,
        -5.1231, -1.1847, -1.9114, -0.8478, -1.5965, -3.2300,  0.9153,  1.1935,
        -3.7258, -2.3766, -3.7930, -0.6244], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6991, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4630,  3.3710, -3.1475, -5.5167, -4.0325, -2.9606, -2.7504,  0.0543,
         2.5100, -2.1792,  0.3025,  0.0207, -4.5879,  1.8870,  4.1466, -2.1463,
        -0.4741, -1.7122, -2.3127,  1.0723], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1459, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8005,  0.8640, -0.7469, -2.9161,  0.3060, -1.3635, -2.8642, -4.2554,
         2.1524,  1.4735, -1.1679,  0.0783, -2.6851, -2.5015,  1.9036,  2.7352,
        -1.9716,  0.3551, -0.9164,  0.5968], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6362, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.5167, -3.2367, -0.3455, -2.9606, -2.6597, -4.8331, -0.1982,  2.9312,
        -2.2191, -0.4229, -2.4020, -4.4767, -0.3876,  1.8112, -9.8353, -1.0748,
        -2.5800, -2.3138,  0.4803,  4.5503], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3328, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.8912,  -2.7139,  -8.4080,  -6.4127,  -6.7990,  -4.8847,  -1.8508,
        -13.1134,  -4.5874,  -4.8700,  -7.1676,  -0.1727, -18.9860,   5.4136,
         -3.8756,  -3.4787,  -2.4619,  -3.7657,  -7.2063,  -0.0709],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9652, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3325,  2.0862,  4.2481, -3.4374,  0.3982, -3.0622, -0.9934, -3.7549,
        -1.7901,  3.0385, -3.5521, -1.9416, -2.2745, -1.5940, -1.1781,  3.0431,
        -2.0385,  0.2659, -1.4725, -1.6169], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8979, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9363, -3.6643, -1.3077, -5.0069,  0.8368,  0.8144, -1.6240, -1.7265,
        -0.8789, -3.0388,  2.1936,  1.2820, -4.1051,  0.1115, -4.4563, -2.6358,
        -5.0908,  0.5747, -3.8524, -5.3017], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9906, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.5198,  -2.7165,  -1.9834,  -3.8750,   0.6698,   2.3059,  -2.4306,
         -1.4378,  -3.1729,  -7.4679,   0.4388,   3.2436,  -2.9363,  -2.2751,
         -9.2950,  -6.9720,  -3.0567, -11.1474,  -2.5156,  -1.8134],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8479, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2400, -1.1209,  2.2286, -2.8416, -1.9297, -2.8668, -1.4221, -4.0493,
         2.1097,  0.4528, -4.4708, -0.5670, -1.7430, -1.9003, -1.2590,  0.6966,
        -5.0604,  0.3394, -3.6256, -1.3693], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4819, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6655, -1.2455, -5.3006,  4.6236, -5.0952, -1.3957, -9.7979, -4.4698,
        -6.9104, -3.0152, -8.5375, -0.7525,  3.0692, -4.7083, -0.7631, -3.8892,
         0.2362, -4.6328,  0.4489,  1.5792], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6611, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.4007, -2.5349, -0.0563,  2.9687, -2.5433, -0.1386, -0.5905, -0.3908,
         1.9587,  3.9992, -5.3582, -4.4409, -2.8401, -1.2258, -6.6306,  1.4150,
         0.0490, -1.6041, -1.5834, -0.5554], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0251, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5273, -2.0322, -0.3823, -5.2708, -1.1358,  0.9388, -2.2258,  0.4635,
        -1.9575, -1.2281,  1.3195,  4.3829, -2.8841, -1.8503, -2.8635, -1.7437,
        -4.7745,  0.0580,  0.6076, -3.5215], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2813, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.1586,  2.1663, -2.0953, -0.0932, -0.8913, -0.4620, -1.0081,  5.0694,
        -5.0957,  0.5663, -1.2425, -1.9601, -0.7301,  4.0484, -1.4306, -0.0285,
        -1.3764, -1.1074,  0.8781,  3.5341], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.0550, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.7197,  -3.3907,  -4.6650,  -4.8140,  -5.2713,  -5.0707,  -6.4249,
         -6.7391,  -7.0168, -12.3118,  -1.7155,  -3.6221,  -4.9477, -10.0333,
         -5.3674,  -7.7727,  -8.3894,  -9.4134,   0.9469,  -2.4351],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6087, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.9410,  -4.8675,  -5.7809,  -1.6015,  -0.0977,  -3.3001,  -2.3389,
         -8.6787,  -4.1542, -10.8555,  -2.6354,  -1.3962,  -2.9543,  -5.5968,
         -4.5475,  -4.3328, -14.6939,  -7.2416,  -6.7566,  -4.6500],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0210, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.5842,  3.8041, -3.1147, -0.4755, -2.4569, -1.0925, -6.4490,  1.9473,
         2.2309, -2.9751, -1.0914, -0.6462, -5.9660,  1.5418,  4.1326, -2.8158,
        -1.5836, -4.3227, -0.9715, -3.5122], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1616, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.6306, -12.1453,  -6.5051,  -0.7807, -14.7673,   3.2984,  -1.1036,
         -4.4077,  -9.5057,  -7.1427,  -7.7182,   1.1144,  -9.0954,  -5.9074,
        -30.1197,  -4.5503,  -6.1783,  -6.2470,  -5.1392,  -4.9580],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.9745, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8033, -1.0244, -6.6325, -3.8539, -1.1888, -7.7133, -1.2655, -1.3263,
        -1.1580, -2.1943,  4.1995, -8.3073,  0.4698, -2.3541, -5.0019,  1.1307,
         4.0726, -1.9865, -0.3139, -0.6185], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8935, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.2691,  2.1256, -1.2061, -1.2902, -1.6169, -0.7489, -0.3879,  5.3649,
        -2.4182, -2.2100, -6.4954, -5.7587, -2.6959, -6.3219, -1.1587, -1.2324,
         0.5220, -3.8586, -1.5514, -1.3355], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5003, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.2655,  -5.3709,  -2.9131,  -6.1268,  -4.0831,  -6.4939,  -1.0422,
          0.9997,  -2.1908,  -2.8586,  -3.6137,   0.1585, -11.6203,   0.9185,
          1.4351,  -1.9002,  -3.1548,  -1.9700,  -1.3967,  -5.1030],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8296, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.7156,  -0.8691,  -9.0966,   0.0892,   1.0093,  -1.5409,  -0.2934,
         -2.2388,  -0.5286,   0.1202,   4.6040,  -3.6252,  -6.3165,  -5.4329,
        -16.8907,  -2.1746,  -8.0181,  -0.4329,  -0.7620,   3.4045],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6354, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-13.6281,  -7.3528,  -5.9675,  -5.6346,  -0.5010,  -0.5665,   3.9132,
         -3.3167,  -1.1199,  -4.3541,  -0.4545,  -3.8538,   1.5130,   3.4522,
         -2.6828,  -0.1366,  -1.7957,  -5.0763,   0.4888,   3.5630],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1755, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.6706,  -2.2446,  -2.7358,  -1.9282,  -5.2932,  -1.0996,  -6.1190,
         -9.6278,   0.3227,  -2.8504,  -6.7674,   0.4881,   1.8920,  -4.4249,
         -1.0033,  -2.1666,  -1.9293,  -4.0104,   3.4212,  -3.2853],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0016, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.1047,   1.6570,  -2.3676,  -0.7627,  -3.6729,  -1.2016, -15.1128,
         -3.6384,  -0.9121,  -2.0596,   0.4777,  -0.9694,  -3.3955,   1.7015,
          2.6695,  -3.5630,   0.1609,  -4.3227,  -1.1792,  -5.5378],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1567, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8033, -2.6401, -1.3083, -0.4393, -8.6360,  0.8582, -2.3692, -2.4452,
         0.5333, -1.6116,  0.1508, -1.8835,  3.2524, -2.7064, -0.4691, -2.3585,
        -2.3488, -3.4218,  1.3712,  1.8571], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4209, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7200,  2.5393, -3.5907,  1.2803, -1.7122, -1.6665, -4.1883,  2.5953,
        -1.2288, -1.5434, -2.8595, -3.2957,  1.9507, -1.0256, -4.1291, -2.6265,
        -2.4998, -9.4715,  0.4915,  2.9437], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5878, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5845, -0.7877, -2.9917, -0.9133,  3.2277, -0.8140, -6.3553, -3.1867,
        -1.2550, -5.2914, -0.9906,  2.8272, -2.8223,  0.3141, -1.2715, -2.2093,
        -3.1147, -1.2182,  2.2222, -2.2195], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5217, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5363, -1.6105,  0.7862,  5.3720, -1.5992, -5.7643, -7.3356, -5.6231,
        -3.1827, -4.5247, -4.2554,  0.4985,  4.4745, -8.9431, -1.9503, -1.8134,
        -4.2213, -0.4329,  3.0042, -4.9056], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2781, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6389, -1.1497, -1.8620, -0.1130, -4.6801,  1.8111,  1.4499, -2.3366,
        -1.3495, -1.5687, -0.5099, -1.7634,  4.9678, -2.5837, -0.7897, -3.3786,
         0.3328, -3.8850, -4.5441, -2.5339], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5063, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.9132, -2.7407, -5.3393, -3.2939,  0.4516, -8.3131,  2.3802,  2.3157,
        -3.9522, -2.1499, -3.3634, -2.0625,  0.5320,  2.6538, -3.0103,  0.7508,
        -2.7019, -1.4913, -3.9705,  2.7363], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3328, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9551, -2.2637,  2.4610, -3.9844, -0.7845, -1.8890, -1.8655, -2.9946,
         0.0668, -3.7445, -2.3904, -9.1386, -8.1328, -3.5325, -4.7589, -1.6121,
        -3.8829,  2.8645, -5.2250, -3.7970], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8780, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3197,  -2.0952,  -6.1467,  -0.7120,   1.0330,  -4.5224,   0.7932,
         -2.1294,  -2.1895,  -0.4261,   3.2248,  -6.3011,  -0.0290,  -2.8804,
         -6.4652,   0.8573,   3.4369,  -5.0702,  -3.3419, -10.0073],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2646, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  3.7146,  -1.7582,  -1.5791,  -2.0590,  -6.6625,  -1.4840,   0.7642,
         -3.8295,  -1.9210, -10.6225,  -5.2428,  -4.0681,  -6.3614,  -2.9476,
         -5.8439,  -0.1056,   4.6718,  -3.4793,  -0.7813,  -1.9086],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5752, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8731, -1.7688, -3.1649,  0.5109,  2.7574, -2.8925, -1.1658, -2.9998,
        -0.0109, -4.8052, -1.7865,  2.2531, -1.7125, -0.1352, -0.1036, -0.6780,
        -5.1207,  2.3264,  0.4937, -2.6145], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0745, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3906,  0.0827, -1.2509, -1.3932,  1.2662,  3.7438, -1.5466, -2.5663,
        -1.7127, -3.6770,  0.6510,  2.8831, -2.5923,  0.4358, -0.5070, -4.6769,
         1.6894,  1.4835, -3.5410,  1.1739], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6223, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1761, -1.8819,  0.4561, -1.3999, -1.8847, -8.7798,  4.8225, -1.7092,
         0.0524, -0.7717, -1.0048, -6.5216,  4.7013, -3.2081, -1.3757, -1.1328,
         0.0574, -8.2504,  5.8527, -4.0969], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3626, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.4779, -1.6920,  0.0195, -2.4363, -5.1912,  0.2191,  3.1961, -4.8050,
        -0.3797, -4.1828, -5.9019,  1.5411,  2.5420, -6.6005,  0.9012, -3.1317,
        -1.6425, -6.8374,  1.8653,  0.3785], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5330, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.0103,  -3.6970,  -0.2270,  -4.3932,  -2.8399,  -1.9129,  -2.6688,
          2.3478,  -4.3182,  -0.9627,  -3.3245,  -3.0377,   0.4042,   3.1638,
         -1.9580,  -0.6705,  -2.9621,  -4.2486,  -4.8997, -14.2875],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.4741, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  -3.4781,   -7.4206,   -0.6173,   -6.2303,   -5.5505,  -64.0911,
         -32.4252,   -4.2105,  -13.5811, -145.1342,   -8.4791,   -4.8943,
          -3.0040,   -2.0440,   -1.0470,    1.6800,   -7.0488,   -8.2288,
         -40.1381,   -7.4965], device='cuda:0', grad_fn=<SumBackward1>) tensor(-18.1720, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2824, -3.7350,  2.3772, -2.9374, -4.2414,  0.1140, -2.2000, -3.1010,
         0.7185,  3.1976, -0.8668, -2.4895, -0.8445, -1.0312, -9.6571,  1.4005,
        -1.8069, -3.3218,  0.1145, -1.4928], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5543, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.4704, -5.0816, -0.1478, -1.2336, -6.4042, -0.3361, -2.0320, -3.0014,
        -0.4818, -3.4435, -0.8248, -3.8883, -0.6192, -1.7706, -4.4487, -1.5456,
        -1.1110, -1.7779, -0.0657,  4.5096], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6617, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  -1.2308, -124.0206,   -5.4040,   -2.6735,   -3.5417,   -2.0927,
          -2.2784,    1.5229,    4.0307,   -2.4683,   -1.1338,   -7.0487,
          -8.3204,   -2.8398,   -5.7410,   -3.6079,    0.2027,   -0.7861,
          -3.4681,   -2.9018], device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.6901, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.3694,  2.0452, -2.7122, -0.9901, -2.0495, -3.7278,  0.7805,  3.5150,
        -1.1337, -4.2022, -1.1655,  0.4196, -1.8748,  4.3175, -4.4684, -1.5926,
        -0.5972, -6.8879,  0.4530,  4.5199], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7491, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.0442, -2.8740, -2.0397, -1.1977, -4.4405, -0.3674,  1.3520, -3.9818,
        -0.6760, -0.9623, -4.6520,  0.5013,  1.4194, -2.3364, -0.3356, -1.3741,
        -3.5949,  0.4025,  2.9860, -3.0336], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2580, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.4237,  -6.0261, -35.3804,  -7.5814,  -3.1144,  -6.9314,  -8.2108,
         -1.9215,  -3.0905,  -4.4400,  -3.5929,  -2.7524,  -3.3087,  -5.3263,
         -5.5133,  -5.3083,  -0.2484,  -0.4235,  -7.5559,  -2.6448],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8897, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.5740,  -2.5190,   3.7098,  -2.7274,   0.1008,  -4.2710,   1.0843,
        -17.8108,  -1.8960,  -3.5188,  -2.2758,  -2.2414,  -4.7592,  -2.8819,
         -2.2909,   0.9886,  -6.4110,   0.6061,  -1.3302,  -6.1684],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7593, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.3022, -5.1576, -5.6761, -1.6841, -2.7234,  0.5058, -5.5037, -5.0272,
        -3.7898, -6.5850, -8.0463, -5.6206, -3.5914, -2.5157, -8.4312, -7.1624,
        -4.0091, -5.3317, -2.3047, -3.6527], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7305, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.2349, -5.6851, -3.8832,  0.3798,  1.2518, -9.4818, -5.9507, -0.9151,
        -0.2952, -7.8462, -0.7182,  0.3865, -2.9696, -0.0948, -1.9177, -1.5317,
         0.3800,  4.7983, -4.8084,  0.5212], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3308, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7071,  0.4836, -1.4875, -2.3074,  0.6618,  2.7432, -1.7053, -0.1553,
        -0.9970, -0.6880, -1.4327,  3.6112, -2.0000, -0.3952, -2.4907, -0.3055,
        -1.4731,  5.1560, -3.1343, -2.4835], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.5053, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9031, -1.3380, -2.7503,  0.1135,  4.3798, -2.4927,  0.2933, -2.5434,
        -1.6353, -6.2559, -1.6958,  0.6208, -2.3300, -1.2069, -0.9999, -4.1202,
         1.1713,  2.2840, -4.4394,  0.5332], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1657, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1060, -5.4834, -3.1036, -4.8187, -1.2253, -4.7989,  1.2456,  0.3109,
        -8.2238, -2.3006, -3.9773, -0.3257, -4.2147,  1.0747,  1.7802, -2.6149,
        -2.6125, -3.9461, -5.2732, -5.3863], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9500, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.5703, -1.7366, -1.8083, -6.0422,  4.4963, -3.1940, -0.1417, -3.3882,
        -1.9113, -7.8278, -1.9826,  1.4832, -1.3114, -1.5133, -0.4564, -9.6683,
         1.0956,  3.5459, -3.8654, -3.7779], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9287, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1541,  2.3595, -6.9026, -0.2948, -2.3829, -1.1175, -9.7227, -0.2404,
        -3.4060, -7.8335, -0.5843, -1.9217, -5.4787,  0.2956,  3.3879, -5.9707,
        -2.1740, -4.8364, -1.0340, -3.7768], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6894, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2892, -0.6848, -0.5543,  0.7180, -2.2086, -2.6193, -3.9093, -3.5819,
        -4.6365,  1.6808, -0.1069, -2.6935, -1.0290, -2.9169, -0.5495, -3.1322,
        -3.7853, -5.3203, -0.8987, -1.3615], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1939, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.8094, -1.7015, -2.5174,  0.5827,  0.2140, -2.9599, -2.8512, -1.9668,
        -0.4634, -4.3551, -1.0172,  3.1586, -1.9989, -1.7213, -0.9683, -2.0055,
         0.6624,  4.0075, -1.5389, -2.5068], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9569, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3139, -0.0930,  0.1324, -1.2008, -1.3510, -0.3236, -5.6108,  0.1276,
        -1.8371, -5.6908, -3.2265, -9.3031, -5.3458, -6.0663, -5.6125, -1.4004,
        -8.1665, -2.2641,  1.0051, -4.8725], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2207, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.0566, -3.0693, -0.8135, -1.5360, -0.2312, -0.6006,  5.0860, -1.5598,
        -2.6701, -2.0447, -5.0596,  0.5237,  0.8554, -3.1439,  0.6409, -0.7429,
        -0.0727,  0.9628,  4.2480, -1.1166], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.3644, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0775,   1.1932,   0.8036,  -2.7996,   0.0212,  -4.2145,  -1.9588,
         -6.1720,   0.9423,  -2.3876,  -3.6664,  -4.8650, -12.5577,  -3.3642,
         -5.0895,  -0.5132,   1.5890,  -2.6834,  -6.2628,  -1.5835],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8823, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.7717,  -5.7204,  -4.7256,  -5.7698,  -3.3953,  -6.8389,  -7.9478,
         -4.2476,  -1.1989,  -3.3132,  -2.7467,  -1.7719,  -5.1276,   1.5757,
          3.3226,  -2.2377,  -3.1225, -13.4621,  -3.5406,  -3.5459],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8793, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1576, -5.6761, -1.6841, -2.7234,  0.5058, -5.5037, -5.0272, -3.7898,
        -6.5850, -8.0463, -5.6206, -3.5914, -2.5157, -8.4312, -7.1624, -4.0091,
        -5.3317, -2.3047, -3.6527, -1.7058], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4006, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9742,  1.4121,  4.5187, -2.4013, -2.5025, -4.4700, -0.6669, -5.4067,
         1.2846, -0.3501, -2.9514, -1.3994, -5.0805, -0.4228, -4.6190,  0.8322,
         2.4774, -3.6919, -2.5939, -3.2697], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5638, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.5075,  -2.4377,  -3.6783,  -6.9385,  -6.9562,  -7.0013,  -6.8588,
         -2.5494,   0.9851,   0.2167,  -3.7054,  -2.5785,  -2.6257,  -5.3917,
         -0.1315,   1.5197,  -4.6984,  -2.5701, -17.9307,  -5.0289],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9434, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7680e+00, -3.8552e+00, -4.1657e+00, -4.5662e+00,  3.7473e-03,
        -5.8479e+00, -2.6875e+00, -1.2658e+01, -7.2334e+00, -4.2241e+00,
        -6.5519e+00, -2.7196e+00, -5.3748e+00,  3.2555e+00, -5.3398e+00,
        -1.7267e+00, -3.9883e+00, -2.2592e+00, -1.7212e+00,  1.4082e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6510, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.2808,  -1.7495,  -1.7842,   0.9159,   2.8339,  -3.3160,   0.1614,
         -2.2254,  -3.7693,   1.2450,   2.5852,  -7.8955,  -1.7361,  -2.5181,
         -1.0970,  -3.8012, -56.8049,  -1.8580,   0.4352,  -1.4931],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1076, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5705,  3.9663, -3.9216, -1.4607, -2.9858, -3.4488, -2.9561, -0.5741,
         1.6959, -4.7545, -0.4569, -3.2894, -1.2755, -4.6293,  0.1817,  1.9642,
        -3.9854, -0.8658, -1.6390, -4.8683], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7437, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.5533,   3.1113,  -7.6841,  -4.6027, -11.1456,  -5.8163,  -2.1703,
         -5.2796,  -0.8788,  -0.8739,   1.4652,  -5.1858,  -0.9840,  -3.1420,
         -0.1757,  -6.0380,  -2.5417,   2.3071,  -2.9487,  -1.4764],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7307, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.8968,   1.8916,   4.2893,  -2.4249,  -1.1080,  -3.9330,  -2.2470,
         -3.1287,   1.8136,  -0.7279,  -1.9036,  -0.0740,  -4.9288,   0.2210,
        -13.0241,  -2.8353,  -2.8153,  -6.3135,  -9.9392,  -4.1933],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6639, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.6402, -1.5608, -6.0396, -2.2981,  0.6827, -3.0960,  0.9255, -2.7615,
        -0.8658, -6.6759,  2.2209, -0.7539, -2.8238,  0.9410, -2.2523, -3.4139,
        -2.5001,  2.7781, -3.9824, -2.4305], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7273, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6122, -4.3496, -1.6231, -3.9600,  1.5779, -0.2357, -1.9891,  0.9824,
        -2.8500,  0.3895, -1.6717,  2.4766, -3.6129, -3.9138, -2.7586, -0.8828,
        -8.1147,  0.5284,  0.8899, -2.0319], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6381, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.1584,  1.5866, -2.0953, -1.4879, -3.8076, -3.9418, -0.7547,  0.8190,
        -2.4657, -2.6770, -2.3906, -2.4955,  0.7234,  2.2356, -3.6053, -0.2432,
        -3.8190, -3.9078, -2.8491,  3.1893], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3914, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.9088,  -7.7448,  -9.4762, -11.1808,  -4.3979,  -6.2236,  -5.1646,
         -4.6473,   1.1696,  -4.7442,  -3.4624, -10.5541,  -7.6034,  -5.1522,
         -6.3416,  -2.8474,  -3.6220,   2.0520,  -5.3919,  -1.0833],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0662, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6683, -3.8893,  0.5797, -2.4478, -2.8583, -2.6942, -1.3999,  1.2368,
         4.8220, -2.9282, -1.2989, -8.1517, -6.4233, -4.3587, -4.8205, -4.4428,
         0.7230,  2.4338, -4.5950, -1.5958], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2889, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.2514,   0.0577, -13.4665,  -2.8694,  -9.1284,  -0.4984,  -2.6741,
          0.3045,  -4.7004,  -0.8874,  -3.4033,  -2.6591,  -6.0633,  -1.2738,
          0.5450,  -5.3279,  -0.3106,  -1.9260,  -2.8923,   1.3955],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0515, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7426, -1.2335, -2.8943, -3.5197, -3.3182,  3.6592, -1.5334,  0.2944,
        -2.5675, -5.5842,  0.1617,  3.1096, -3.0453,  0.2655, -3.6479, -3.0019,
        -5.2301, -0.3428, -0.2661, -4.0854], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9261, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1545,  -1.7898,  -8.3545,  -7.0259,  -3.2872,  -5.8128,  -0.7539,
         -0.0838,  -0.3906, -13.2482,  -2.4612,  -2.1680,  -2.9262,  -3.5128,
          0.9667, -13.4705,  -6.0832,  -7.1476, -12.3202,  -9.6660],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0845, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.0787,  -3.1416,  -0.0430,  -3.0585,  -3.5689,   1.8273,   4.2582,
         -2.5914,   0.1953, -16.6134,  -4.4436,  -2.9424,  -5.9682,  -0.6077,
          0.2539,   2.3932,  -6.0961,   0.3536,  -1.4191,  -0.4072],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.9771, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2035, -1.2276, -2.4269, -0.6188,  2.2879, -3.6315, -0.4860, -5.2777,
        -1.7199, -6.8000, -1.2676, -0.3439, -3.1245, -0.2176, -0.8074, -3.9585,
         1.2042,  2.5644, -3.7317, -1.5516], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6169, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2790, -4.3679,  0.5815,  2.8384, -2.5774,  0.1875, -1.6132, -4.0652,
         2.0035,  2.9885, -2.2980,  0.4806, -2.6127, -2.0345, -8.0601,  0.1253,
         0.9272, -3.3501, -0.4349, -1.4601], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2010, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.0810, -2.6917, -1.6376, -8.9881,  0.5197,  0.1066, -2.4338, -0.8676,
        -1.8930, -4.6283, -0.2265,  1.0059, -2.7732, -1.7375, -0.6479, -1.4295,
         0.9430,  4.5638, -4.8037, -1.3030], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4421, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4572,  2.9637, -4.5681, -2.9654, -2.9880, -1.0870, -6.1906, -0.1860,
         2.4666, -1.9387, -0.7856, -2.4487, -1.2677,  1.3693,  3.2675, -1.4911,
         0.2955, -0.9883, -1.6291,  0.6104], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9509, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.7595, -2.4530, -0.7921, -0.4927, -1.3139, -5.3926,  2.6871,  0.6706,
        -2.8386,  0.9154, -2.2591,  0.0620, -0.7188,  5.0728, -1.4234, -1.7827,
        -1.2165, -1.3544,  0.6041, -4.5424], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7904, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4761,  -0.5361,  -3.6598,   4.0225,  -1.9942,  -1.7308,  -2.0684,
        -17.4983,  -9.1092, -20.4382,  -3.1315, -12.4642, -28.7700,  -4.5286,
        -11.4412,  -6.1982,  -5.7326,  -3.4946,   0.3221,   3.6184],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3654, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.7073,  -4.1346,  -4.4946,  -3.4713,  -3.2556,  -4.7183,  -5.0211,
         -5.5391,  -3.9428, -14.0975,  -4.8674,  -6.2182,  -2.3700,  -6.3071,
         -0.6154,  -3.9873,  -4.3232,  -2.0275,  -5.7835,  -9.7221],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0302, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.1426,  -1.0884,  -2.4008,  -5.4963,  -1.0276,   2.7957,  -6.5226,
         -3.4795, -15.0418,  -6.6888,  -6.3396,  -5.7783,  -5.1259,  -0.8374,
         -2.3526,   3.2227,  -5.4640,   0.6739,  -1.4070,  -0.9284],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6215, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.4549,  -1.4352,  -3.6552,  -0.1580,   2.5833,  -2.8868,  -3.4105,
         -0.3437,  -1.4155,   1.5430,   2.9831,  -4.4000,  -4.7908, -16.4259,
         -6.8923,  -2.9669,  -4.0106,  -1.3186,   0.0868,  -1.3470],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3903, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0265, -3.1150, -7.0691, -3.5618, -0.4240, -3.4493, -1.6062,  2.7046,
        -1.8681, -1.2151, -2.4887, -1.3927, -2.8792,  2.8072, -1.3724,  0.0157,
        -3.5196, -3.4143, -5.9341, -0.7185], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2264, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.6786,  -3.2738,  -3.3327,  -2.0747,   2.3000,  -1.5069,  -3.5793,
         -0.5300,  -0.8891,  -1.0888,   4.8214,  -2.4856,  -0.9941,  -3.6168,
         -5.3451,   0.1198,   2.9910,  -9.1508,  -9.8627, -31.3583],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5768, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9235,  0.4526,  2.6986, -1.9692,  0.1481, -3.1904, -0.1914, -5.9604,
         3.0272, -4.6113, -1.8401,  0.4183, -3.5509, -0.1039, -3.6819,  4.0264,
        -1.8865, -2.1462, -3.6659, -0.3723], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3661, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1797,  -4.9704,  -4.0334,   3.4455,  -6.2008,   0.0158,  -4.5596,
         -1.5411, -14.2727, -14.2389,  -0.9301,  -3.8732,  -1.6097,  -2.6620,
         -0.2288,  -4.4392,  -1.7424,  -2.9426,  -3.7932,  -0.2423],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5999, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3905,  3.6680, -3.5180, -2.2300, -3.2094,  0.4507, -4.6248,  1.0173,
         2.3865, -3.1119, -2.1875, -4.0478,  0.4213, -6.5905,  2.1116, -1.4605,
        -2.0835, -1.1226, -1.9551, -3.7416], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6609, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1945, -4.2578, -0.9630, -0.7539, -2.2907,  1.0651,  3.6027, -1.2387,
        -0.7855, -1.0935, -6.3713,  0.6658,  0.3478, -2.8889, -0.6247, -2.9501,
        -1.0991,  1.4117,  4.0350, -1.5355], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8460, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.9044,  2.4434, -3.8076, -1.7280, -1.0906, -3.1650,  0.4426,  3.1156,
        -5.7509,  0.1785, -2.9140, -3.2308,  0.3229,  4.3696, -5.2888, -0.1632,
        -1.6672, -0.8839, -4.1005,  0.0940], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0460, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4743, -1.3098, -0.4534, -1.2685, -0.0160,  3.4141, -4.6566, -0.5343,
        -3.2094, -2.6221, -8.9140,  1.4327,  1.2549, -3.0207, -1.6853, -1.0380,
        -0.4594,  0.4381,  2.0217, -2.4562], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2778, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5227, -2.9060, -0.2695, -3.5466, -7.2292,  0.2423,  4.0294, -2.0711,
        -1.4352, -3.8650, -2.5281, -0.1576,  2.6912, -1.4265, -0.8391, -2.0168,
        -0.9661, -6.3247,  1.2759,  1.7622], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4052, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8191,  0.4517, -2.1375, -0.4173, -2.5810,  0.5358, -3.0274, -4.9019,
        -5.8083,  0.4264, -3.5363, -1.0823, -2.7745,  1.6876,  1.7865, -3.3796,
         0.0962, -1.3019, -0.1127, -7.7002], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7298, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.1023,  -2.3759,  -3.5770,  -1.2329,   4.3273,  -2.3818,  -2.0698,
         -3.5429,  -2.1668,  -2.8590,   2.4462,  -4.2840,  -3.0323,  -1.9638,
        -13.9968,  -5.5255, -24.9047,  -4.8613,  -2.5728,  -1.6868],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8681, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 4.4679, -3.0008, -1.0480, -1.5594, -0.4001, -4.1992,  0.5700, -2.2461,
        -1.6333, -4.5753, -4.0481,  0.1121, -5.2483, -0.0297,  2.1984, -5.0294,
        -2.0991, -1.0920, -1.2202, -6.1020], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8091, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.8008,   3.7581,  -3.0436,  -0.1385,  -2.3045,  -1.4073,  -5.8308,
          2.6058,   1.0709,  -1.8028,  -0.3517,  -1.2030,  -5.6446,  -0.9382,
          1.1257,  -3.4565,  -3.3069, -23.0486,  -5.2992,  -6.6858],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8351, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.7403,  -3.8351,  -8.0528,  -8.6389,  -7.6093,  -7.4391, -20.0453,
         -1.7759,  -7.4872,  -0.6532,  -9.0705,  -4.0132,  -2.5384,  -3.0295,
         -3.6374,   0.2351,   1.4282,  -3.7137,  -0.0512,  -3.5056],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9087, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9782, -0.1260,  1.1060, -5.4168, -1.5030, -3.0656, -5.3683,  0.5086,
         2.2985, -3.4833, -2.9720, -7.1404, -2.5881, -5.3267,  2.4673,  0.8282,
        -3.3400, -0.6245, -1.6596, -4.8846], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0634, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.5599,  -3.7304,  -3.1749,   0.1008,   1.5763,  -4.5729,  -0.9265,
         -0.2260,  -4.3695,   0.9790,   3.1877,  -2.2395,  -3.4752, -12.5166,
         -5.4626,  -3.8810,  -6.7679,  -0.8101,  -0.8610,   2.9744],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2878, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.7573e-02, -1.6815e+00, -1.5530e+00, -4.2797e+00,  4.7007e+00,
        -2.1721e+00, -7.3994e+00, -2.7874e+01, -7.5205e+00, -6.7724e+00,
        -1.8061e+01, -6.1473e+00, -3.9246e+00, -1.5645e+00, -1.4090e+00,
        -3.7980e+00, -2.1519e+00, -6.3732e+00, -4.1652e-01, -2.9131e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0647, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.2370, -1.5632, -4.2955, -0.7459, -1.5789, -2.3998, -3.0364,  4.2472,
        -1.4582, -0.5241, -1.4602, -0.2342, -0.2082,  4.2680, -2.5543,  0.1593,
        -1.6569, -1.4349, -5.2914,  1.1077], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8211, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3203,  3.5490, -3.9650, -2.4786, -1.8693, -6.5773, -0.1463,  2.6808,
        -3.2986, -0.6392, -1.7182, -1.9068, -5.6845,  0.7475,  0.5285, -9.3736,
        -1.8490, -3.6951, -6.7665, -0.0892], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2436, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.5719, -2.9949,  2.0921,  1.7902, -3.8539, -6.3333, -1.1488, -3.7353,
        -7.4272,  0.0735, -0.9713, -2.5748,  0.1887, -0.8549, -3.0059,  1.2747,
         3.3100, -1.8192,  0.1792, -2.0235], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4203, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6074,  0.6773, -4.1866, -4.6179, -1.6801, -7.5013, -5.2609,  4.3943,
        -9.1889, -1.0459, -1.6794, -0.0721, -4.1164,  2.6265, -3.3504, -2.7895,
        -0.5555, -1.9460, -0.1410, -6.1724], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4107, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.3792, -3.3736, -0.9414, -2.9279, -0.2359, -4.3433, -3.3734,  2.3883,
        -0.2211, -0.0872, -3.2323, -0.1840, -5.9416,  2.2136, -0.2735, -3.1399,
         0.5541, -1.8144, -3.4247, -3.6547], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4317, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2977, -0.8880, -3.4897, -2.4460, -5.2752, -2.1579,  2.0773, -2.0874,
         0.2177, -1.6367, -1.7126,  1.9111,  2.6883, -4.8634, -1.7873, -4.0609,
        -0.8101, -1.5936,  1.3621, -7.6878], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7769, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9416,  2.4872, -0.2574, -3.6468, -0.7511, -1.7889, -4.4308, -0.2502,
         3.3754, -3.0482, -0.3589, -3.6769, -5.5041,  0.5064,  3.3215, -4.8156,
        -0.4102, -0.2645, -1.7009,  2.6699], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1243, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.7436, -2.0406, -0.0752, -5.0351,  0.6220,  0.2775, -3.6705,  0.7664,
        -2.3723, -2.8390,  0.7835,  4.9661, -3.9022, -0.3625, -3.8022, -2.5091,
        -2.5305,  1.9501, -4.5862, -6.5121], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5808, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4894,  -6.3083,  -1.4810,   0.1203,  -4.0805,  -2.8231, -14.6897,
         -5.7578,  -4.4547,  -5.9511,  -0.2387,  -2.6736,   3.4197,  -2.9556,
         -1.9595,  -2.5608,  -6.2346,  -2.0113,   1.1798,  -7.6223],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4786, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3788, -8.5390,  0.2699, -0.3432, -1.8948, -0.2204, -0.3903, -4.8932,
         2.7858,  3.8930, -2.2737, -1.6797, -2.5584, -1.4447, -7.7079,  3.7005,
        -0.5819,  0.0372, -0.8754, -4.0403], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4068, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-13.3776,  -9.4508,  -7.5129,  -9.2644,  -2.7068,  -8.4894,  -1.5607,
         -2.5341,  -0.5752, -10.5749,  -3.1915,  -2.8395,  -4.0389,   0.5804,
          3.1630,  -2.7977,  -0.3407,  -3.4847,  -1.3127,  -5.3301],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2820, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3333, -6.1399, -5.1149, -6.7650, -5.2192, -6.4046, -3.5187, -2.9811,
         3.1632, -3.0538, -4.7139, -4.8220, -5.4423, -5.3849, -0.6287, -3.2005,
        -2.4224, -1.8913, -7.1830, -8.5156], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2286, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.0078, -1.2813, -4.2291,  1.8333,  2.8693, -1.5659, -2.8355, -4.7689,
        -1.4656, -4.0738, -1.6998, -0.7284, -4.8878, -0.4575, -2.5431,  0.3893,
        -2.6764, -2.8260, -4.5907, -0.9103], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8220, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6174, -2.5953, -2.2887, -1.9713, -1.1304, -5.0064, -5.7473, -4.9242,
        -2.2268, -6.3555, -4.8192, -1.8917,  3.1811, -1.8649,  0.3497, -1.4425,
        -3.6518, -0.6779,  2.3597, -5.6405], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6481, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4970, -7.3832, -3.7400, -1.2128, -9.7752,  0.7805, -4.4363, -0.9991,
        -2.6624, -7.5808, -0.4072,  1.9456, -5.5174, -2.9138, -7.3401, -8.2337,
        -6.2516, -6.1661, -4.5902, -7.0164], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4499, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5583, -1.7562, -1.6321, -4.7176,  1.5991,  4.4413, -2.5920, -0.3909,
        -4.3108, -4.3694, -4.2676,  3.2047, -1.3211, -0.7920, -2.6491, -5.1854,
         0.7505,  3.8616, -3.2378,  0.5212], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3201, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5749, -4.6476, -4.0934, -5.2202, -7.8258, -5.8978, -5.9639, -8.8629,
        -3.9954, -2.2427, -4.7470, -4.3473, -3.2360, -4.4615, -0.8424, -3.6488,
        -7.6524, -8.0847, -8.7682, -5.0596], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.3086, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.5555,  -1.2642,  -0.2113,  -1.0192,  -2.5827,   1.3139,   2.9523,
         -4.0482,  -1.0176,  -2.0013,  -1.2027,  -4.4254,   3.3910, -11.7109,
         -1.7000,  -1.5726,  -1.5644,  -3.5933,   1.1250,   1.7588],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.3964, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3854,  1.0059,  3.4914, -2.9762, -2.0357, -3.2728, -3.8126, -0.9491,
         0.3637, -6.7669, -1.0656, -1.3004, -5.4188,  1.7756,  2.6941, -3.1743,
        -2.4212, -2.4787, -1.3124, -4.1286], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7084, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5601e+00, -3.3730e+00,  1.5933e-01, -5.7995e-04, -3.2825e+00,
        -3.1864e-01, -1.4207e+01, -5.0539e+00, -9.4616e+00, -4.9355e+00,
        -5.2724e+00, -2.3938e+00,  6.7071e-01,  4.6195e+00, -2.1481e+00,
        -2.0241e+00, -1.3979e+00, -3.8272e+00,  6.5379e-01,  3.4799e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5337, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.4290,  -3.7754,  -0.9167,  -1.6197,  -6.5416,   0.4583,   0.3711,
         -3.3992,  -1.4638, -31.2862,  -2.4330,  -6.5163,  -1.4137,  -1.5870,
          2.8610, -11.7648,  -4.2181,  -2.1768,  -4.5525,  -6.3843],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1965, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.8610,  -5.1234,   1.1582,  -1.2240, -10.0506,  -3.3138,  -6.5316,
         -2.8074,  -6.3901,  -0.6778,  -0.4682,  -4.4788,  -1.0289,  -3.7863,
         -3.7814,   1.6061,   3.7421,  -3.9539,   0.3132,  -1.9043],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.4781, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.3044,  -2.6055,  -5.4942,  -0.8543,   3.0811,  -1.1872,  -1.7946,
        -15.5187,  -5.7302,  -3.6637,  -4.7799,  -1.1655,  -0.6956,   5.1357,
         -9.4653,  -1.9969,  -4.9644,  -6.2648,  -6.2495,  -2.4719],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5995, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-23.6466,   3.0442,  -7.1319,  -1.0961,  -2.5724,  -0.1953,  -4.7435,
          1.3698,  -0.8576,  -2.7583,   0.0566,  -1.3065,  -1.0795,  -3.2254,
          3.8500,  -4.4130,  -0.6255,  -2.8386,   0.6520,  -5.0602],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6289, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4496, -1.5032,  2.3512, -4.9970, -1.8657, -1.7787, -3.9458,  0.2677,
         3.5759, -2.2397, -2.7772, -2.2911, -2.3406, -3.4539,  0.8166,  3.2123,
        -2.3037, -0.6644, -3.1397, -1.5920], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4059, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.6991, -9.3465,  2.3737, -2.5918, -4.5679, -1.8572, -3.2156, -0.8615,
        -4.0449,  2.4588,  3.2350, -2.2300,  1.3973, -2.0582,  0.2505, -0.5965,
         4.8088, -2.0529, -0.0574, -2.3570], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1006, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2768,  0.0623, -6.8244,  1.6050, -1.1705, -3.1520, -0.0372, -2.0726,
        -2.0630, -0.8882,  0.5002, -2.0104, -0.6245, -2.4212, -3.5290,  1.7457,
         2.5651, -2.3807, -0.0697, -0.2407], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1641, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.8794,  3.7398, -3.4776, -0.7197, -3.3014, -0.4152, -4.0077,  2.2538,
         2.9298, -1.5869, -0.1673, -2.3004, -3.5210, -4.4101,  2.6573,  1.1310,
        -2.5705,  0.6088, -0.4361, -1.9277], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6821, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.2858,  -1.5404,   3.6688,  -2.3308,  -5.8474,  -4.8383,  -1.1847,
         -3.3040,  -7.9463,  -0.5948,  -0.1438,  -6.8642,  -1.7464,  -1.5294,
         -5.2627,  -0.2480,   2.6685,  -2.4539,  -1.1311, -13.3965],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7156, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6867, -4.5818, -1.1462, -4.8081, -2.9520, -6.8335, -6.9379, -5.0080,
        -5.2032, -2.1899, -1.2323,  3.2433, -2.2767, -2.7704, -4.1895, -3.7581,
        -2.1524,  0.8470, -7.9877, -2.5402], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3582, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9642e-01, -1.9801e+00, -3.4415e+00,  1.7098e+00,  8.5157e-01,
        -5.6733e+00,  2.3308e-03, -3.7286e+00, -1.7018e+00,  6.7573e-01,
        -5.3835e-02, -2.4913e+00, -3.0083e-01, -9.3767e-01, -1.7210e+00,
         8.6664e-01,  3.8161e+00, -3.4185e+00, -4.1600e-01, -1.4984e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.0069, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.0640,  -4.4686, -12.9731,  -7.1661,  -3.6679,  -4.9630,  -0.4225,
         -0.4805,   4.3681,  -6.1098,  -0.8424,  -2.4880,  -1.9976,  -1.7153,
          5.1723,  -3.4591,  -2.5037, -11.1460,  -2.8602, -20.3769],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2082, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7634, -5.2456, -0.2178,  2.9012, -4.5737, -2.5427, -0.8617, -1.1482,
        -2.6317,  4.3159, -1.5775, -2.4193, -3.9713, -2.3704, -8.4812, -2.0305,
        -4.7478, -3.4408, -1.5349, -3.2246], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3783, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3374, -5.7153, -5.4897, -5.9582, -6.3126, -4.5513, -3.9404, -3.9897,
        -3.4480, -3.6854, -5.7325, -3.0955, -4.7231, -2.7546, -6.5333, -3.3787,
        -4.8781, -8.4463, -8.3745, -4.0776], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.0711, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.5595, -3.9228, -3.3615, -1.2483, -1.1474,  0.6576,  2.9639, -3.4900,
         0.5917, -1.4557, -1.5864, -1.1235,  3.3810, -3.7736,  0.4291, -3.7251,
        -1.5543, -0.7491,  2.9498, -1.8022], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8704, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1935, -0.4101, -2.2271, -1.6406, -3.9786,  2.2589,  0.2966, -2.2301,
         0.3206, -1.9899,  0.8014, -0.4845,  5.3581, -1.3659, -0.2985, -2.7127,
         1.0933, -4.8681,  2.0381, -3.1780], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8705, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.1026,  -0.8292,  -4.6146,   0.1232,  -2.7981,  -7.1959,  -3.2521,
         -4.3311,  -1.4268, -10.5158,  -7.2691,  -4.9167,  -5.5707,  -1.2622,
         -1.7838,  -3.6791,  -1.2540,   2.6202,  -7.8313,  -2.6182],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4151, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.3333,   4.9976,  -8.2231,  -1.6846,  -1.9283,  -1.0447,  -0.8559,
          4.1126,  -4.0405,  -1.9537, -11.2475,  -4.8424,  -3.9327,  -5.1428,
         -0.8520,   0.1121,   5.1004,  -4.8543,  -0.1416,  -2.7628],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.9425, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7029,  -1.6207,  -2.9388,   0.5816,   2.8661,  -4.1886,   1.2932,
         -3.4110,  -3.0608,  -0.2279,   1.3147,  -4.7205,  -3.7890,  -1.3896,
         -4.0703,   0.8810, -24.6056,  -3.4191,  -1.6820,  -9.5862],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2238, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.7189,  -3.1739,  -5.7025,  -1.1751,  -2.3954,  -4.0494, -11.3891,
         -3.3323,  -2.7994,  -5.5594,   0.8714,   0.0306,  -2.9407,  -1.0628,
         -1.3009,  -2.3043,   0.6259,   2.8274,  -3.3434,   0.2539],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.4819, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.1472,  -8.0378,  -0.6283,   0.7543,  -1.6096,   0.5077,   0.1415,
         -0.8562,  -2.9933,   4.4241,  -1.2419,  -1.1171, -15.0998,  -3.1399,
         -6.9470,  -1.7921,  -3.4348,   4.3051,  -2.5988,  -2.3651],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.0938, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.5293, -15.4559,  -3.3356,  -2.5935,  -5.7768,  -0.9652,  -2.2041,
         -4.1540, -14.9694,  -1.4032,  -9.9458,  -2.3739, -23.2887, -13.1267,
         -8.8815,  -4.9226,  -0.8671,  -2.5258,   4.8605,  -3.7070],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0583, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.6820e-02,  1.4883e-01, -9.3674e+00, -4.1718e+00, -3.9991e+01,
        -8.0287e+00, -1.5669e+01, -7.4281e+00,  1.8831e-02, -1.7634e+01,
         5.0284e+00, -3.1075e+00, -1.2617e+00, -2.2056e+00, -2.4369e+00,
        -4.6267e+00,  3.0412e+00, -7.4180e+00, -3.5516e+00, -1.0892e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9913, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.3260,  -0.6674,  -3.3873,   4.1031,  -2.7176,  -3.1373,  -2.0746,
         -6.1716,   0.0983,   1.6699,  -3.2108,  -2.8699, -11.7762,  -4.6306,
         -4.5581,  -6.2197,  -3.1434,  -4.3520,   0.6240,   2.9993],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7374, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.4554,  -0.4861,  -5.3839,  -0.1968,  -2.8400,  -4.2556,   0.7903,
          3.0974, -12.5922,  -2.8314,  -5.2218,  -2.7360,  -3.6349,  -6.3754,
         -8.2167,  -0.1620, -14.5530,   5.5918,  -7.0693,  -1.9086],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5220, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.1223, -3.9207, -0.2387,  1.6527,  5.0189, -1.3565, -5.1822, -3.8191,
        -0.9374, -2.3872, -1.1575,  1.1976, -2.6473, -1.0363, -1.7167, -2.7262,
         1.4142,  1.7662, -5.4196, -0.2604], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0939, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0111, -0.9977, -3.4042, -0.2698, -3.1642,  3.5417, -1.5209, -0.1950,
        -3.0503, -4.2823, -1.8890,  1.6522, -1.9076, -0.8866, -1.3678, -1.0033,
         1.4348,  3.4920, -5.1050, -1.7212], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2328, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.3456, -2.7771, -2.8837, -6.7318,  1.7645,  1.2745, -7.7511, -2.3134,
        -2.6964, -0.4401, -4.8876, -0.1686, -5.7849, -5.8737, -0.9338, -3.1357,
        -2.8934, -4.0907, -3.4573, -4.7328], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9429, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4930, -5.8838, -0.6599, -0.6079, -3.6788, -2.6629, -1.7420, -3.3556,
        -4.8108, -0.7330, -0.3831, -4.4898, -0.2312, -0.4136, -5.4823,  0.2918,
        -1.2111, -3.4257, -1.3653, -9.8593], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6099, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5181, -4.8011, -0.3929, -2.2087, -2.7745, -1.0767, -3.5749, -3.0512,
         0.7071,  2.9209, -3.9523, -2.5101, -3.4369, -3.3329, -0.0981,  3.3492,
        -5.0638, -0.9132, -1.2701, -2.5247], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8262, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8495, -3.9944, -1.0203, -7.4068, -7.7378, -3.7113, -6.2174, -4.9897,
        -1.4546, -1.2796, -3.4583, -4.1697, -9.9784, -6.2223, -4.3739, -6.6215,
        -3.1990, -2.8242,  2.9447, -3.3098], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0437, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6637,  2.3255,  1.5995, -2.4721, -1.4361, -2.0418, -2.6279,  0.7131,
         3.9452, -2.9081, -0.0639, -1.8228, -0.2573, -4.3683,  2.2044, -9.4646,
        -0.3692, -2.9017, -0.8851, -3.9165], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4206, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.7940,  -6.8881,  -6.4512,  -1.2095,  -6.9727,  -3.9929, -11.8684,
         -6.1286,  -3.9860,  -5.7501,   1.8281,   1.1721,  -4.5376,  -2.9912,
         -3.9556,  -4.9468,  -2.9250,   2.3881,  -2.3480,  -2.9936],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9176, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.2968, -1.3650, -2.4272, -2.3149, -1.9069, -3.2374, -1.3311,  1.7925,
        -6.7667, -3.3040, -2.4298, -1.4374, -6.7966, -0.8447,  0.2938, -4.0398,
        -0.7893, -2.7732, -0.6265, -1.1149], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0858, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1064,  0.5921, -3.3509, -0.8357, -4.2463,  0.6872,  2.5597, -2.0556,
         0.3136, -2.0406, -3.5800,  0.3741,  3.3760, -5.1689,  0.0410, -1.8366,
        -3.0093, -3.9105, -3.7826,  2.9269], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3027, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3842, -0.7014, -1.2331, -2.9387,  0.6605,  1.8120, -4.2119, -1.0201,
        -1.3288, -0.5189, -1.0819,  4.2991, -2.8518,  0.3963, -1.6397,  0.3342,
        -6.6915,  1.9498, -4.5693,  0.6382], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1541, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-16.6133,  -4.8266,  -4.3367,  -4.0451,  -2.3125, -15.6431,  -6.5222,
         -7.4476,  -1.2557,   1.1555,   3.8441,  -8.1365,   0.0963,  -1.7475,
         -3.6540,   0.4748,  -2.5978,  -1.9259,  -3.3188,  -3.0103],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0911, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.9244,   0.1489,  -0.0618,  -3.7172,  -1.9135, -41.2685,  -2.8447,
         -7.4870,  -1.7118,  -0.6379,   4.3090,  -3.2280,  -0.6354,  -1.7659,
         -0.4449,  -4.6000,   2.2872,  -0.7757,  -6.3803,  -1.8724],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8762, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.0674,  -3.3038,  -5.9484,  -2.1246,  -3.4498,  -7.0201,  -3.3418,
         -2.5214,  -1.8512, -11.7494,   1.9729,   1.1519,  -2.5162,  -0.4018,
         -0.7501,  -1.0555,  -5.6538,  -2.4432,   1.0240,  -2.7755],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7913, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 6.7843e-02,  1.4014e+00, -2.3539e+00, -6.3488e+00, -5.4200e+01,
        -6.5209e+00, -6.5532e+00, -7.4086e+00, -4.1354e+00, -4.0796e+00,
        -4.8733e+00,  3.9467e+00, -6.6783e+00, -3.0827e+00, -1.8659e+00,
        -5.1948e-02, -4.0030e+00,  1.9793e+00,  2.4938e+00, -4.1236e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3195, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8019, -1.2558, -1.0085,  0.2082,  4.3718, -2.0017, -2.9225, -1.6860,
        -1.8060, -3.8081, -1.5901,  1.5236, -2.1780, -1.6001, -0.7890, -1.7401,
         1.5798,  4.6358, -1.9348, -2.0984], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7951, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.1675, -7.4813, -5.5330, -0.2445, -3.9973, -5.4263, -4.6584,  0.1167,
        -9.0093, -2.7928, -7.3818, -2.4149,  1.0660,  1.4934, -2.4392,  1.2211,
        -1.6570, -1.1143, -9.4360, -1.4438], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9983, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.7293,  -0.9976,  -0.5498,  -0.7785,   5.2108,  -3.0745,  -8.8095,
        -11.1299,  -5.1593,  -2.4830,  -5.7808,  -4.5907,  -0.3975,   2.2270,
        -11.4348,  -7.0751,  -2.6856,  -1.0203,   1.4594,   5.7599],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6020, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1413, -1.4907, -2.7341, -2.3932,  0.3930,  2.9410, -5.9985, -2.1904,
        -1.6402, -3.3020, -6.4344,  0.3359,  1.7282, -2.7731, -2.9890, -0.7805,
        -3.5038,  1.2622,  3.9067, -2.7152], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5260, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8592, -2.5355, -0.4503, -3.2606, -0.0530, -2.3780, -5.4000, -1.2859,
        -2.8074, -3.2908, -6.6631,  4.3511, -0.0078, -1.2843, -2.5083, -3.1655,
        -0.0880,  3.6711, -3.5766, -3.4946], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7543, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.7997,   1.8724,   1.0944,  -1.9492,  -0.3810,  -2.1045,  -0.1708,
         -5.4229,  -1.4218,  -3.0530,  -7.1445,  -6.9349,  -5.0853,  -3.0280,
         -7.5139,  -2.6613,  -3.9297,  -3.6120,  -3.9026, -10.0351],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4592, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5887,   2.1768,  -4.4194,  -2.4818, -12.5172,  -6.6820,  -6.2251,
         -1.6925,  -0.8706,   4.8803,  -1.5122,  -2.9155,  -4.4402,  -2.7907,
         -5.7748,  -0.7593,  -1.5298,  -4.6112,  -1.6775,  -0.5610],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8496, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.1533,  -3.5719,  -6.2276,  -1.0519,  -2.2209,  -4.3880,  -3.2382,
        -12.2391,  -8.2300,  -5.3542,  -5.7277,  -0.5483,  -7.9355,   5.1494,
         -2.3025,  -3.1766,  -5.9112,  -4.3049,  -5.6498,   0.1272],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8977, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3400, -7.2600, -1.6242, -5.5739,  2.2690, -4.1452, -0.4027, -2.1981,
        -4.3602, -2.8749,  2.7757, -4.0351, -0.9416, -8.9935, -5.4126, -3.1585,
        -5.9349, -2.7396, -0.0266,  4.6079], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8185, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7812, -3.3228, -0.6885, -3.4898,  2.0760,  1.2056, -6.0378, -2.1148,
        -0.6790, -3.2875,  1.0991,  3.9134, -3.0598, -1.2643, -2.3905, -0.6578,
        -7.7223, -0.7339,  0.8179, -2.4683], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5293, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5047,  0.8834,  2.1020, -2.0017, -0.4468, -1.7855, -1.3947,  1.1008,
         2.2322, -3.5847,  0.4223, -2.8235, -2.1627, -4.0188, -0.5526,  1.4511,
        -1.0712, -2.1626, -1.1228, -3.4103], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2425, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5278, -1.3045, -1.1884, -0.7910,  1.6036,  4.5358, -1.9709, -0.5480,
        -9.5835, -4.4957, -4.9115, -5.2348, -1.0859, -1.5527,  4.9343, -3.5037,
        -0.7652, -2.3076, -1.8031,  0.3872], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7057, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0763, -2.9354, -3.7529, -1.2478,  3.2310, -2.7512, -1.8392, -0.4164,
        -2.8465, -0.3460,  3.5871, -2.8668, -2.9363, -3.6277, -1.2980, -4.8665,
         0.8144,  0.6912, -1.6407, -1.5924], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4858, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.0109, -7.9918, -4.4003, -4.6868, -3.0667, -1.7768, -2.5187,  1.4930,
        -7.3685, -0.5810, -2.1900, -5.9246,  1.9162,  1.3031, -4.6471, -2.0314,
        -1.2387, -0.1793, -2.7507,  5.2584], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9185, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.4302, -2.3157, -1.1304,  0.2538,  3.0038, -3.4826, -1.1030, -4.0282,
        -0.2230, -2.8058,  0.8113, -1.1264, -2.8223, -1.2741, -1.2144, -2.0071,
        -3.4692, -0.4815,  2.0772, -0.9524], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1360, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  4.2419,  -2.5351,  -0.2477,  -3.7846,   0.4518,  -8.3125,   2.3945,
         -2.8371,  -3.7629,  -0.8836, -15.5950,  -5.5326,  -5.9436,  -0.5816,
         -0.3528,   5.1205, -21.0267,  -2.2211, -18.8549,  -4.9106],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2587, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.9198,  -5.5572,  -1.3226,  -0.3313,  -1.8227,   1.7468,   3.7892,
         -2.0350,  -1.2399,  -0.3593,  -1.0570, -10.2619,   2.7501,  -0.0661,
         -2.7687,   0.1907,  -0.8327,  -1.4006,   0.1999,   4.5161],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-0.7471, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5360, -2.2928, -4.7729, -4.0733, -5.4097, -0.3236, -4.1923,  0.3331,
         3.9175, -3.0664, -1.2905, -3.3725, -0.7061, -3.0001,  0.5282,  1.0015,
        -5.5079, -0.4638, -1.8385, -3.1726], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9619, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.7100, -2.6623,  5.0892, -2.7093, -1.2277, -1.6622, -3.1602,  0.4739,
         1.7948, -0.8249, -1.8264, -4.5143, -2.4388, -4.0356, -0.8137, -0.1397,
        -3.6663, -3.6631, -0.9999, -7.5990], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6938, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8256, -0.3493,  1.4907, -2.1811, -0.7245, -0.8276, -1.5411,  2.3193,
         3.7431, -3.9739, -0.8934, -1.9373, -1.3579, -4.7937,  1.4501,  0.4443,
        -2.0548,  1.0751, -0.9353, -0.0961], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8485, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.7054,  -1.3183,  -3.8828,  -3.2227,  -1.9120,   4.4585,  -4.1329,
         -0.4863,  -1.5862,   0.0191,  -4.4303,  -6.6816,   0.1355, -10.2869,
         -1.7035,  -0.5519,  -1.7817,  -3.6730,  -2.3805,  -3.9926],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5558, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.9544,  -3.1574,  -0.4936,  -6.0763,  -4.0380, -67.2580, -13.0704,
        -12.0846,  -5.9564,  -0.2460,  -5.3924,   5.5463,  -4.5878,  -1.2142,
         -1.3330,  -0.9017,  -4.1243,   1.8108,   3.1681,  -2.0886],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3226, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9011, -1.9627, -2.4821,  0.3116, -0.8121, -5.9336, -0.6316, -4.2682,
        -2.7545, -0.4674,  3.8124, -4.8231, -0.6932, -1.1492, -4.0033, -0.2934,
         2.7305, -4.7426,  0.4066, -1.9727], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5315, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.4430, -1.0974, -1.5107, -0.2035,  4.3813, -6.0036, -0.2045, -2.3560,
        -1.3499, -0.4033,  3.1466, -4.5368, -2.6713, -4.1176, -1.6804, -5.4030,
        -0.2058, -0.5484, -4.7317,  0.1767], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4881, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.5951,  -1.7342,  -1.2957,  -3.3591,   1.6726,   1.5651,  -1.1039,
          0.4782,  -4.7135,  -1.5034,  -7.0082,   1.1588,  -0.3275, -17.1574,
         -2.6373,  -1.6546,  -5.2018,  -0.8336,  -4.9603,  -9.8162],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9514, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.7572, -5.9636, -6.4874, -0.5109, -2.6962,  4.3646, -2.1042, -0.1949,
        -1.7654, -5.0497,  1.1058,  3.4541, -1.7345, -4.3784, -3.5556, -2.0868,
        -7.7065, -1.6275, -3.5448, -2.6810], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5960, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3168, -7.3081, -6.4416, -8.7928, -8.0074, -0.3559, -5.4968,  5.3423,
        -6.6355, -3.3104, -0.9749, -3.3054, -1.9468,  0.8551, -3.4417, -2.6450,
        -8.9471, -5.7861, -3.0303, -6.5479], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1046, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  3.5818,  -3.0899,  -1.9286, -17.3241,  -2.7301,  -7.5239,  -2.7424,
          1.8999,   3.6699, -21.8130,  -0.4862,  -1.1927,  -6.2753,  -0.0236,
          1.2904,  -4.1761,  -1.1747, -10.4764,  -6.0305,  -4.1453],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0345, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5464, -1.3851,  0.6387, -8.4772, -3.6290, -1.8773, -5.4179, -0.6310,
        -2.8076, -1.5803, -3.8747, -7.4517, -6.7068, -3.6367, -5.2161, -1.7612,
        -5.1499,  0.9728, -6.3151, -2.5282], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4690, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1094, -0.8459, -2.0041, -1.3272, -7.6891, -0.9511, -0.7141, -2.8701,
        -1.0241, -3.6443, -0.5665, -1.8081,  5.1056, -4.8325, -2.2412, -1.5822,
        -7.0430, -2.6760, -0.5207, -0.8815], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0613, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.5927,   4.4404,  -2.8234,  -0.8357,  -1.8395,  -1.0869,  -4.2636,
          1.5163,   1.4243,  -1.8518,   0.4948,  -1.3679,   1.0673,  -0.8713,
          3.2092,  -6.0234,  -3.5191, -14.0325,  -3.5344,  -4.4581],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.6881, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.0479, -1.1994, -4.7648, -1.3118, -2.5402, -1.3941, -3.5342,  1.6269,
        -3.8140, -3.4384, -1.0335, -3.5407, -2.3822, -4.1571, -0.9979, -1.6950,
        -0.3305, -1.1988, -3.7415,  1.4798], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8460, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7877,  -5.5872,   1.4221,  -1.8264,  -5.3146,  -1.4570,  -3.7084,
         -2.7258,  -4.9195,   2.7728,  -0.3691,  -1.1238,  -0.6511,  -4.7263,
          0.5494, -10.2677,  -2.4075,  -1.1601,  -2.2209,  -3.1224],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.4816, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.9839, -3.8302,  0.5942, -1.2950, -2.3308,  0.8120,  3.7589, -2.0443,
        -1.0679, -0.9349, -1.0922, -4.6955,  0.4529,  1.1366, -2.4542,  0.2332,
        -1.1110, -2.6245,  1.7001,  4.2200], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.4794, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.6514,  -0.9085,  -2.4829,  -0.1896,  -3.6493,  -2.0779,  -4.1311,
          4.5142,  -3.2978,  -3.1067,  -3.1230,   0.1706,  -5.4249,  -3.2922,
          1.2248, -12.7495,  -0.7333,  -1.5419,  -2.0890,  -2.5262],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1381, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.9573,  -6.8328,  -4.1297,  -4.6287,  -2.9467,  -4.9568,   0.6376,
         -6.1462,  -0.5561,  -0.8257,  -8.2576,  -0.0446,   2.9922,  -2.0657,
         -1.4056, -29.2822,  -5.9842,  -6.4879,  -7.4366,  -4.9580],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9136, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.8751,  0.8842, -4.8014, -0.8735, -2.4314, -1.3498, -8.9220,  0.5315,
        -0.7685, -3.2697, -1.2655, -2.3561, -1.5021, -7.7868,  0.0222,  0.6136,
        -3.6122,  0.0894, -1.7212, -1.1422], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9393, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.2584,  -1.7924,   2.2415,   3.5691,  -2.7757,  -0.2806,  -9.5967,
         -2.4082,  -6.8739,  -2.1743,   0.2142,   5.3945, -12.8650,  -2.8716,
         -3.4350, -10.6343,  -7.9719,   0.6653,  -2.9459,   1.4375],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6681, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6090, -2.0399,  1.2778,  3.7125, -1.5480, -0.0940, -2.0536,  0.6045,
        -7.2845,  2.6987,  1.9950, -2.4700, -0.2926, -2.5788, -0.0587, -5.2557,
         1.6971,  3.1835, -2.9646,  0.7175], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6681, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.5178,   4.5445, -13.3176,  -1.5400,  -1.9146,  -5.6509,  -0.9859,
          3.8734,  -2.3435,  -1.6739,  -8.8444,  -4.3706,  -4.3152,  -4.4207,
         -2.9589,  -0.7177,   3.6028,  -8.0684,  -0.7150,  -1.3131],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7324, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.6917,  -1.9952,   3.8805,  -2.3037,  -1.4011,  -1.1567,  -0.1486,
         -8.4909,   2.5192,  -0.6491,  -2.6718,  -1.6042,  -1.2105,  -3.2750,
          1.1560,   3.5995,  -2.1697,  -3.5900, -10.0195,  -6.3258],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.8774, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 4.3793, -6.2579, -0.8419, -2.2870, -2.1064, -4.2941,  1.3045,  2.0617,
        -6.0649, -0.7947, -2.2154, -3.4367,  1.5463,  3.9433, -3.5501, -0.9432,
        -1.3685, -1.8861, -4.3684, -0.7498], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3965, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.4567,  3.1628, -1.6880, -0.1488, -2.1473, -1.6777,  0.5237,  4.3400,
        -4.7956, -0.5112, -1.1492, -1.5236,  1.2813,  2.3299, -2.0308, -3.7443,
        -1.9611, -2.9167,  1.7332,  4.0971], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.3641, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6954, -0.5924, -3.7246, -1.1192, -3.4923, -3.2359, -0.5629, -1.4767,
        -3.8238, -6.8284, -1.4466, -4.9774,  2.0769, -9.5863, -3.5928,  0.0291,
        -1.0964, -1.2907, -0.4429,  0.8670], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3006, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5595, -0.6671, -1.8671, -1.4287, -4.6485,  1.0955,  1.9110, -3.5271,
         1.0576, -3.2011, -2.5730,  1.9359,  2.9688, -2.2043,  0.0901, -2.1136,
        -1.5335, -2.4888,  3.5146, -1.3660], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9302, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.0379,  -2.1583,  -1.8695,  -4.2753,   2.3897,   1.6193,  -3.3212,
          0.5340,  -0.7292,  -0.4060,   1.3030,   3.8301,  -3.5630,   1.0996,
        -15.2264,  -7.2261,  -2.8890,  -5.3064,  -2.1351,   1.5719],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.0898, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2027,  0.6605,  2.4811, -2.0398, -0.5226, -0.7233, -1.0045,  0.7305,
         4.6137, -2.2524, -0.1760, -2.4632, -2.3436, -6.7779, -1.1660,  0.2139,
        -3.2174,  0.6633, -0.1990, -3.4362], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9581, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.9516,  -1.3536,  -0.1504,  -6.7569,  -3.3350, -10.6318, -40.9474,
         -6.6258,  -4.7367,  -4.9649,  -0.1513,  -5.2234,   2.6128,  -4.9752,
         -3.0779,  -2.8384,  -0.3239,  -5.7636,   2.1072,  -0.8130],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9451, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.3039,  -4.3359,  -5.5905,  -2.4059,  -2.0060, -21.9415, -13.5683,
         -3.8021,  -6.2491, -34.3397,  -6.0761,  -6.9310, -21.6207, -23.4850,
        -37.3188,  -3.3315,  -3.8598, -12.1595,  -9.7840,  -7.2449],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-11.8677, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.7677, -2.5684, -0.5805,  1.9563, -4.0109,  0.6308, -2.6578, -0.7543,
         0.1663,  4.2190, -4.3747, -1.7628, -3.1848, -5.6334, -3.3798,  1.8845,
         1.8198, -1.0122,  0.3554, -1.7866], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0721, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9571, -0.8149, -6.0740,  2.1563,  2.2865, -2.6875, -1.3327, -1.1663,
        -3.6796,  1.3298,  2.4243, -2.5302, -1.3834, -3.5718, -3.7425, -1.0445,
         0.4637, -2.6140, -0.8129, -4.1188], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5435, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.9601,  -0.9605, -10.4878,  -1.3221,  -5.7559,  -6.0335,  -0.6538,
        -11.2464,  -3.6768,  -3.0397,  -1.7820,  -2.8134,  -2.2937,   2.1015,
         -3.1157,  -0.8818,  -3.1163,  -1.2097,  -3.6138,   1.8709],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8535, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1173,   0.5247,   0.2357,  -5.8063,  -2.9153,  -1.3016,  -4.6403,
          1.1463,   4.0391,  -1.9475,  -0.5735,  -1.0858,  -1.6297,   1.7681,
          4.3982,  -1.1910,  -1.8893,  -4.5061, -17.0580,  -5.7486],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.0149, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.1968,  -4.9366,  -5.1941,  -2.2954,   0.4029,  -5.6466,  -0.6636,
         -1.3989,  -0.6067,  -3.5940,   3.7074,  -0.8985,  -6.0505,  -2.3559,
        -20.3770,  -4.7752,  -4.4192,  -6.1638,  -3.0162,  -0.9670],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8223, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8349,  0.0481, -2.6402,  2.7135, -5.1852, -2.3526, -3.5692, -0.9744,
        -4.2941,  0.8848,  1.3491, -2.9775,  0.4467, -2.1700, -1.5154,  0.6834,
         4.1530, -2.2414,  0.5134, -3.4935], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0728, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5860e-03, -1.4957e+01, -2.6472e+00, -5.2936e+00, -1.6492e+00,
         1.5369e+00,  3.8826e+00, -5.0000e+00, -8.4090e-01, -1.4663e+00,
        -2.6602e+00, -1.9201e+00,  2.6633e+00, -2.9985e+00, -1.5265e+00,
        -8.3010e+00, -6.2230e+00, -3.1001e+00, -3.8306e+00, -1.3573e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7847, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0401, -3.4954,  0.8577,  3.6772, -1.4783,  0.4102, -1.2645, -3.0102,
         0.2320,  3.0734, -3.4159, -0.1299, -3.0688, -4.1084,  0.5572,  1.4381,
        -3.2698, -1.2178, -0.6794, -2.2716], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0102, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2832, -1.8279, -4.3082, -0.2668, -2.4300,  1.6996, -1.3076, -0.3655,
        -2.1955, -5.2525, -0.0389,  0.5949, -2.8101,  0.4054, -3.1574, -2.7244,
        -3.3942,  0.1947, -0.9390, -3.1324], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6769, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7474,  -3.6040,  -0.7613,  -0.4779,   0.1763,  -4.7815,   1.5122,
          1.4221,  -3.4300,  -1.4233, -19.6990,  -2.4509,  -8.4448,  -0.2817,
         -5.0562,   4.2453,  -5.4405,  -1.5776,  -8.3244,  -3.7740],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2459, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9262, -6.8770,  1.3883, -2.2748,  0.5456, -0.9610, -1.4773,  1.7120,
         2.8733, -2.0210, -0.6161, -1.1933, -3.1227, -5.2952, -0.0254,  1.9530,
        -1.9568,  0.2292, -2.2406, -3.9703], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4128, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.9351, -6.6987, -2.9034, -5.9003, -1.5937, -4.8908,  3.5248, -6.5221,
        -3.8161, -1.9947, -3.1962, -3.6481, -0.6315,  2.7589, -2.8770, -0.1379,
        -1.7164, -0.9223,  2.0040,  3.9558], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2570, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3802, -5.6530, -2.0997, -3.4774, -0.1431, -2.3672, -1.9414, -2.9598,
        -5.4266, -4.1225, -0.3090, -2.9708, -1.9841, -5.7733, -8.2083, -3.1591,
        -6.0753, -1.2818, -0.2083,  2.5047], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0518, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.5019,   0.4807,   2.2285,  -2.8485,  -4.9018,  -9.4146,  -8.8580,
         -5.2059,  -6.8598, -11.4220, -11.9460,  -6.3601,  -1.8259,  -4.2904,
         -5.4122, -17.3308,  -6.6448, -12.0067,  -4.7770,  -8.7108],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.6304, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2419, -10.9670,  -1.8286,  -7.2871,   0.6188,  -1.9516,  -4.0264,
         -8.7246,  -3.7930,  -5.6715,  -2.9113,   2.5701,  -0.9662,   0.7265,
         -2.4798,  -1.1728,   1.0077,   3.6587,  -2.5415,   0.2482],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.4367, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1155,  2.2140,  2.8239, -2.3451, -0.9802, -1.3974, -2.9623,  1.2531,
         1.6531, -3.2369, -0.1721, -1.8130, -1.6243, -3.6945, -0.4060,  0.4789,
        -9.6717, -1.5776, -1.3026, -4.4192], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6648, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.9655,  3.2246, -2.1968,  0.8813, -2.3274, -1.2737, -0.2801,  4.2534,
        -1.7201,  0.3158, -1.5405, -0.3509, -1.6363,  4.7961, -0.5328, -0.9098,
        -1.6138, -0.1593, -4.3679,  0.5989], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.1937, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0707,  -3.5280,  -2.6489,  -6.9672,  -3.2436,  -2.4633,  -6.1843,
          0.3618,   3.6800,  -7.3176,  -2.9739, -21.0755,  -8.5962,  -9.7011,
         -6.2076,   0.1066,  -6.1177,   5.5316, -20.5340,  -2.1075],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2029, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4282, -8.3720, -2.7315, -3.1143, -4.8318, -0.7535, -6.0158, -1.8377,
         0.3758,  2.7439, -1.9854,  0.9552, -3.3302, -1.3431, -3.0894, -0.3061,
        -3.2365, -3.4506, -0.7509, -6.2435], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4373, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.1992,   1.8184,  -1.1809,  -6.7809,  -2.1623,  -3.2721, -10.5886,
         -3.2480,   0.0613,  -3.9927,  -6.9768, -14.0820, -10.1473,  -2.7725,
         -6.3061,  -1.4234,  -3.5801,   0.9859,   0.6573,  -3.7603],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2475, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-23.1103,  -0.0838,  -2.7088,  -4.6248,  -4.8110,  -3.8804,  -2.3649,
         -4.7820,  -0.3664,  -0.5917,  -2.2304,  -1.8240,  -1.3233,  -3.8399,
          1.1568,  -0.5740,  -4.3472,   0.6378,  -2.7123,  -1.4574],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1919, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.1612,  -0.6957,  -4.1751,   2.8796,  -2.3954,  -4.1946,  -3.4646,
        -14.0004,  -8.7962,  -2.1549,  -5.2206,  -0.9424,  -0.3660,   1.0790,
         -4.9145,  -0.6533,  -1.1987,  -0.5531,   1.9195,   4.2145],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2397, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.6681, -22.2250,  -5.7204,  -6.2368,  -7.8798,  -2.3558,  -4.4005,
          2.5587, -14.4401,  -3.7368,  -2.3652, -18.1843,  -8.4609, -33.2123,
         -4.1176,  -2.2985,  -6.2159,  -2.6260, -20.8471,  -8.7120],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.6572, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1087, -5.0577, -1.5777, -0.5164, -2.8139,  0.2568, -3.1072, -2.9004,
        -4.7196, -2.5703,  2.5513, -2.5987, -2.3477, -4.5863,  0.8728, -4.2527,
        -1.0325,  3.8723, -1.4134, -0.7705], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6910, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7708, -1.0197, -1.1818, -0.1447, -1.1280,  4.1659, -1.4262, -2.6469,
        -1.4255,  0.0799, -7.2983,  2.2290,  1.6689, -3.6962, -0.8736, -3.5700,
        -1.1652, -2.1742, -5.1465,  2.2646], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2130, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.4686, -2.9152, -2.4045, -2.9842, -1.9267,  2.1535, -4.9905, -0.0459,
        -3.2436, -1.8177, -5.2364,  0.3749,  1.0937, -2.8829, -0.1591, -0.5164,
        -1.8273, -0.3363,  4.2454, -2.7964], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3342, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2688, -7.0702, -1.6928,  0.5133, -4.8446, -3.1444, -4.8235, -4.5597,
        -0.6455,  2.2195, -1.0365,  0.4674, -1.0502, -0.5273,  1.8403,  4.7203,
        -2.8663, -0.5969, -1.9096, -3.8545], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5065, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5503, -1.0868, -1.9472, -0.4604,  4.0079, -2.1719,  0.2712, -3.5427,
        -0.9027, -6.5673,  1.8643,  1.0664, -2.5761, -0.5136, -2.3356, -0.0653,
         1.2160,  4.3723, -2.0756, -0.7651], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7881, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9755,  3.0511,  0.0289, -3.0923,  0.8750, -1.5646, -0.7788,  0.3617,
         3.5512, -6.0669, -0.7291, -2.3580, -1.2215,  0.0854,  3.4959, -4.7530,
        -4.1132, -3.5670, -5.8837, -3.8113], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5233, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.5774,  -0.2414,   0.5855,  -2.9453,  -3.4080, -10.8307,  -5.3759,
         -2.9234,  -6.1006,  -0.9213,  -1.0960,   3.0426,  -6.9090,  -1.9814,
         -3.2230,  -2.9579,  -5.7459,  -5.1697,  -0.3892,  -3.9861],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2577, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7456,  0.0082, -2.7539, -3.4368, -2.6996,  2.5976, -3.5252,  0.8279,
        -3.6683, -1.8851, -3.2791,  2.7805, -2.0862, -2.2203, -2.4529, -2.9147,
        -1.4433, -1.1193, -4.4795, -1.5538], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8025, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1339, -1.7055,  4.0017, -4.0228, -0.4816, -2.4621,  0.2272, -4.2072,
        -0.7764,  1.7561, -4.1606,  0.3136, -0.5022,  0.8018, -5.7858,  5.0451,
        -2.1019,  0.8925, -3.8515, -1.7078], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9931, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4549e+00, -1.2907e+00, -1.2444e+00, -4.3494e+00, -1.7930e+00,
        -3.8888e+00, -3.7158e+00, -3.9405e+00, -3.8752e+00, -4.8841e+00,
        -3.9627e+00, -1.8864e+00, -1.9444e+00, -9.7086e-01,  1.1507e+00,
         2.8691e+00, -8.0834e+00, -3.6735e-03, -3.2594e+00, -4.5488e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7038, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.2178,  -1.4944,  -5.9336,  -2.1036,  -1.7381,  -6.0302,  -2.6589,
         -0.4601,  -5.1357,  -0.1003,   2.2905,  -1.9162,  -0.8581,  -0.3549,
         -1.4096,   1.8875,   4.2466,  -1.5114,  -2.3514, -21.3706],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.4610, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3456, -4.4887, -3.3340, -2.5933, -2.5615, -7.5035,  0.7597,  0.2540,
        -2.5628,  1.2912, -2.8143, -2.9766,  0.9085,  2.9130, -4.2304, -1.6194,
        -4.2605, -2.9386,  2.0042,  3.3065], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5896, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-18.3376,  -5.4982,  -5.3565,  -2.8749, -11.9986,  -5.6021,  -5.0432,
         -2.9392, -18.4075,  -7.8795,  -7.3593,  -7.5074,  -1.3458, -37.4489,
         -7.7997,  -2.8457, -14.0639,  -3.4628,  -4.4131,  -1.0725],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.5628, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.0080,  -8.3551, -10.8632,  -1.4590, -12.3117,  -7.6612, -25.8762,
         -5.6443,  -7.7352, -13.0779,  -7.5826,  -4.3516,  -6.5790,  -3.7464,
         -0.9450,   3.0868,  -4.3815,  -3.5997, -13.4453,  -5.1109],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.0324, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1027, -1.6431, -1.4577, -3.8017, -2.5821,  3.0354, -1.3484, -2.5522,
        -0.9251, -3.6169,  2.7176,  3.4656, -2.6039,  1.2152, -4.6534, -2.8162,
        -4.4649, -3.8861, -4.9894, -2.7104], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7360, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3025, -0.4095,  0.3528, -4.0918, -0.6200, -4.6350, -4.9203, -1.3274,
        -0.4767, -2.2589, -1.6101, -5.4689, -4.6145, -1.2362,  1.5045, -3.8193,
         1.0695, -1.8131, -0.7609, -3.7348], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1086, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.3664,  -2.8339, -17.9757,  -3.5227,  -6.1277,  -0.9382, -13.9365,
          1.2532,   2.5130,  -9.3477,  -2.3377,  -0.6404,  -0.4785,  -4.2278,
          2.8409,  -5.4989,  -3.2105,  -1.1037,  -5.1044,  -4.7981],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0421, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2336, -0.6417, -4.9918, -2.9856,  2.2090,  3.2778, -3.2240, -1.8114,
        -2.1291, -2.1019, -5.3857,  1.6346,  0.5608, -3.0974, -0.9507, -1.0518,
        -1.1259, -1.6788,  5.2360, -4.3430], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4417, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.3831, -10.2080,  -8.4562,  -1.6896, -28.2115,  -8.1844,  -2.5999,
        -11.8341,  -9.9401, -13.5091,  -1.1632, -19.2840,   0.8328,  -8.3158,
         -3.9263,  -2.4989,  -1.9255,  -7.2950,  -4.4757,  -0.5897],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.1446, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.0164,  -7.7111, -12.2449,  -3.3865,  -4.2340,  -8.3545,  -9.9898,
         -2.7999,  -9.7196, -14.8797,  -4.4294,  -1.5232,  -2.2336,  -1.9920,
         -0.7615,  -2.8005,   3.3661,  -2.4646,  -1.3310,  -2.9259],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9716, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.4933, -6.0230, -0.6206, -1.0876, -1.6867,  0.9072,  4.3538, -3.3665,
        -0.0401, -1.8318, -2.0921, -3.8728,  2.1073, -0.2302, -2.8826, -0.4876,
        -3.2592, -0.5477,  0.3153,  4.7587], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6046, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.0391,  -0.8296,  -2.4911,  -1.8204,  -5.8442,  -0.7220,   0.2503,
         -4.6689,  -0.1466,  -2.0365,  -0.6954,  -0.1644,   3.5230,  -4.4822,
         -0.3722,  -3.7271,  -1.5323, -11.4550,   3.7293,  -5.5050],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1015, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.3809,  -3.3246, -18.4671,  -8.8092,  -4.9447, -10.2582,  -4.5610,
         -2.8837,  -7.2521,  -5.0057,   1.9386,  -1.4326,  -1.1436, -18.5039,
         -2.6047,  -7.4015,  -2.1108,  -1.4258,   4.1245, -12.7567],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5602, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.3815,  -1.9101,  -2.1116,  -0.5744,  -8.2688,   4.6956,  -9.7593,
         -3.6663, -31.2414,  -2.3142,  -6.1774,  -1.7358,   0.7319,   4.3261,
         -4.4555,   0.0585,  -1.7151,  -1.5199,  -3.0304,   2.2501],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8900, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2069, -1.0081,  2.1011, -2.9286,  0.2994, -2.1675,  0.2210, -2.2199,
         4.3356, -1.7420, -1.3240, -1.6965, -0.7898, -3.8845, -3.5297,  2.0847,
        -3.7451, -0.8883, -1.8368, -0.6133], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1769, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.6872, -2.8826, -1.7209,  2.2858,  3.3709, -3.0117,  0.2172, -1.5747,
        -1.3551, -2.9024,  2.4678, -3.2927, -0.2364, -1.0015, -0.4913, -8.2757,
         2.4362, -0.2387, -2.2005, -0.9462], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0020, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.1570,  -1.0169,   3.5439,  -4.3794,  -1.8087,  -9.9762,  -8.0397,
        -11.2661,  -5.8895,  -0.9550,  -0.3906,   1.9101,  -3.5938,  -0.6246,
         -1.6725,  -6.1167,   0.7539,  -0.0465,  -4.3636,  -2.1603],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8125, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.4253,  -0.1106,   3.3045,  -4.2140,  -1.5964,  -3.7246,  -3.2680,
         -5.5647,  -7.0651,   0.7783,  -4.5514,  -3.7878,   0.4477, -19.3845,
         -2.0738,  -8.9309,  -4.5075,   0.7464, -14.9245,  -8.2396],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5046, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.3571,  1.4579, -5.5172, -0.9351, -2.2901, -0.2434, -6.0212,  0.0932,
        -2.8712, -3.8933, -3.3395, -4.5232, -4.2856, -6.2376, -2.8844, -1.2037,
        -2.7738, -2.0056, -2.1410, -0.8496], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4554, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.4673,  -0.7809,  -6.1838,  -1.2301,  -2.7082,  -5.2325,  -2.9283,
         -0.2276,  -2.4665,  -3.1153, -14.3853,  -3.5346,  -7.0889,  -1.7657,
         -4.4161,   1.9894,   3.9527,  -6.8457,  -0.9751,  -4.2710],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9873, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 9.8002e-01, -5.5749e+00, -1.5102e-02, -2.9075e+00, -2.8065e+00,
        -2.9001e+00,  3.7851e+00, -3.1137e+00, -1.2347e+00, -2.6928e+00,
        -3.3072e-01, -4.2052e+00,  1.8000e+00,  1.6982e+00, -4.2394e+00,
        -3.5116e+00, -1.6397e+01, -6.2064e+00, -3.4099e+00, -4.5342e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7908, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0798, -2.2724, -2.8866, -0.6674,  4.7126, -3.1845, -0.3806, -4.2116,
        -1.6287, -6.5987,  1.1085,  1.3278, -3.7335, -1.9770, -0.7262, -1.2252,
         1.0190,  3.7222, -5.5640, -0.7670], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3507, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 5.5798e-01,  1.9791e+00, -6.7608e+00, -2.4006e+00, -2.3571e+01,
        -9.0834e+00, -6.7395e+00, -8.8873e+00, -4.6401e+00, -5.6408e+00,
        -1.9075e+00, -1.9883e+01, -5.1327e+00,  9.3603e-01, -6.3810e+00,
         4.5031e-01, -3.3619e+00, -9.7835e-04, -3.8439e+00,  3.7694e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0271, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.5919,  -5.5577,  -1.0470,  -4.4013,  -7.3525,   2.0684,  -3.0653,
         -1.6928,  -2.7236,  -8.6528,  -0.4816,  -0.1789,  -3.1128,   1.2478,
        -13.1230,  -9.5228, -16.9399,  -4.7338,  -7.6172,  -0.9056],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8692, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.5620,   2.1124,   2.7206,  -1.4544,  -2.3578,  -3.0393,  -2.0025,
         -3.5237,  -1.6718,   0.0525,  -2.6637,   0.0915,  -0.0293,  -2.8539,
          0.1754,   2.8568,  -3.1255,  -2.8033, -19.8295,  -3.3859],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2147, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3747, -2.3537, -2.1969, -1.0466, -9.2551, -1.4115, -0.0938, -2.3115,
        -0.9708, -1.8112, -2.5504,  0.7360,  0.9935, -2.7695,  0.4600, -1.5854,
        -2.0067, -2.9951,  2.1508,  1.7010], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5346, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.4237,  -2.9830,  -3.7486,  -3.2868,  -6.8990,  -8.3787,  -3.6384,
         -6.8225,  -3.9250,  -3.6758,  -1.1705,  -8.1486,  -3.5652,  -3.1175,
         -2.6503,  -1.4834,   3.5367,  -3.7658,  -2.1078, -11.0823],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1168, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.7972,  -0.4064,  -2.8228,  -6.0496,   0.1955,   1.9855,  -1.9912,
         -0.5745, -11.1684,  -9.9245,  -4.8758,  -6.8137,  -2.4063,  -4.9787,
          1.1224,  -1.6693,  -5.0378,  -2.9029,  -2.6681,  -0.7096],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3247, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.7122, -5.2343,  1.4317, -0.1326, -3.0214, -1.3341, -4.9457, -0.3666,
        -4.5840,  1.0024,  1.8585, -3.7401, -2.5408, -3.2231, -3.3518, -1.4176,
         2.8302, -2.5874, -0.1353, -1.6766], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5940, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.5378, -6.0655,  0.2281, -2.0577,  0.2519, -5.1958,  1.6904, -1.9827,
        -4.2469, -0.1595, -3.7586, -0.2255, -3.1861,  1.7998,  2.5760, -1.8333,
         0.2009, -2.5805, -1.9309, -5.7621], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4850, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.2114,  -1.1196,   0.1540,   1.3567, -24.9007,   0.1005, -28.3504,
        -10.3596,  -1.7111,  -2.8747,  -3.2387,  -6.6380,  -6.2207,  -2.7028,
         -4.7014,  -2.6318,   1.2149,   0.7372,  -5.1906,  -1.5441],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1916, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2436, -4.4129, -2.6119, -1.5042,  2.7517, -4.1857, -1.3417, -1.5385,
        -0.0285, -4.9815,  1.2954, -0.8156, -3.2760, -0.6081, -2.3632, -0.8679,
        -6.3020, -0.0455, -2.8236, -3.8276], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9865, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.7016,  -3.1673,   0.1326,  -0.4082,  -2.2333,   0.9395,   1.8166,
         -1.1397,  -0.6752,  -4.4241,  -3.1251,  -5.8464,  -0.3254,  -0.4920,
        -12.0259,  -2.3853,  -1.4184,  -1.6102,   2.5691,   1.2055],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.5456, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.2058, -0.8844, -1.8358, -4.6893,  1.9690,  1.5072, -2.4648,  0.2404,
        -2.5124, -2.6779,  1.1673,  2.6479, -2.4879, -3.0247, -0.5511, -1.0560,
         0.1222,  4.4331, -3.3454,  0.5253], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  4.3464, -17.1648,  -2.2336,  -2.5456,  -8.0135,  -1.7134,   3.8186,
         -2.7110,  -0.9211, -12.2178,  -5.8529,  -4.5156,  -6.1622,  -2.4378,
         -3.4911,   0.1929,   2.8500,  -6.8753,  -3.0331,  -1.3651],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5023, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3676, -19.0366,  -4.7922,  -2.6162,   3.7096,  -3.5090,  -8.3420,
        -17.4630, -11.7045,  -1.7034,  -6.8742,  -0.7661,   1.9523,  -3.8322,
         -9.1842,  -4.3415,  -3.7151,  -0.6934,  -6.9420,  -0.0614],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1641, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.4658,  -1.4400,  -7.7896,  -2.9150, -14.5235,  -1.6591, -10.9978,
          3.7099,  -6.2720,  -3.9885, -12.8668, -10.1772,  -2.9825,  -8.1336,
         -2.3052,  -4.3020,  -0.0840,  -3.8223,   0.6528,  -3.1070],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7235, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.7290,  1.1454, -4.0085, -0.6069, -4.2792, -0.8667,  0.6510,  4.6714,
        -1.3161,  0.3426, -5.4278, -1.7255, -3.2772,  4.0925, -4.9698,  0.0365,
        -2.6624, -0.5261, -4.4606,  1.8952], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9782, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.1423, -0.8565, -4.4329,  1.5030,  2.6549, -2.6707,  0.3838, -2.9240,
        -2.1412, -6.5966,  3.1185, -2.2725,  0.8965, -1.6222,  0.6020, -2.4628,
         4.9168, -6.3560, -1.5253, -2.9771], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1452, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0542, -2.6142, -4.8333,  3.0652,  0.8615, -2.1720, -1.5773, -4.2964,
        -1.2366, -0.4367,  5.1555, -1.5022,  0.5312, -2.0321, -4.1178, -4.0445,
        -1.7695,  1.9811, -1.8187, -0.9907], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1951, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.2818,   2.4474,  -2.8566,  -4.9246,   0.1831,  -1.3230,  -3.8868,
          0.5723,   3.1003,  -3.0501,  -1.7567, -11.0152,  -8.0346,  -5.4100,
         -5.7539,  -2.5967,  -0.3747,   3.8697,  -7.7427,  -1.2794],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8557, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.0435, -2.1530,  1.0526, -1.5624, -0.5192,  1.2696,  4.0300, -1.4814,
         0.4662, -2.9068,  0.1237, -7.0762,  1.5227,  0.6703, -3.0654, -1.1551,
        -3.6889, -3.6570, -5.9269, -0.5559], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2285, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9772,  0.9557,  0.1926, -1.9017, -0.1899, -3.1869, -4.8287, -2.7036,
        -1.8933, -3.9288, -4.6536, -9.0911, -3.8431, -6.3683,  0.3183, -1.7855,
        -7.2132, -1.6806, -2.9329, -5.7498], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3231, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4687, -0.5714, -4.3154,  4.8411, -1.9386,  0.2074, -2.6649, -0.1804,
        -3.6490,  0.5513,  2.8399, -1.8495,  1.0063, -2.0132, -1.5632, -0.5237,
         5.5549, -3.3448, -0.0497, -2.5829], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6857, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 4.5479, -3.7062,  1.0482, -1.4966, -0.5229, -0.1057,  4.6780, -0.8351,
         0.3595, -1.6685, -1.3834, -4.0939, -0.0720,  1.7712, -2.4985,  0.8631,
        -1.1066, -4.7631,  1.9011,  3.4919], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.1796, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2161, -5.1333, -5.9898, -3.0942, -0.9501, -4.9307, -1.7405, -1.8728,
        -5.9656, -0.8177,  2.8298, -1.7754, -1.9661, -4.0645, -1.6891, -3.1758,
         0.5773, -1.6551, -3.3289, -2.2949], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4627, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8328, -2.8248, -6.3912, -2.3745, -1.7778, -1.2877, -1.7121,  3.9248,
        -1.0698, -2.1416, -4.7422, -3.6916, -4.9247,  0.1017, -5.9244, -2.5262,
        -4.0594, -0.5921, -7.9241,  2.1899], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4790, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1679,  5.9704, -2.4728, -0.0736, -4.0601, -5.1128, -1.6186,  0.8855,
        -4.0490, -2.4994, -3.8993, -1.3235, -9.0683,  0.7287,  0.6754, -1.7444,
         1.1255, -2.5793, -0.4683, -7.8881], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9320, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4199, -4.7556, -1.0597, -4.7767, -5.6571,  0.4172, -5.6705, -3.6157,
        -1.0098, -2.5923, -2.1442, -4.0512,  0.4577, -1.8043, -4.1086, -1.2493,
        -2.7695, -0.1289, -4.3473,  0.1885], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5049, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.8783, -6.3606, -3.8707, -5.5575, -0.7289, -5.0614, -0.2998, -4.0550,
        -2.6572, -3.7778, -5.8127, -1.1372,  0.6726, -3.6443, -2.4014, -9.1139,
        -5.7007, -3.4953, -5.5070, -1.5558], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9472, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.6243, -3.2856, -1.5618, -2.2919, -0.6672, -5.4806,  4.3598, -9.2345,
        -3.6027, -3.3025, -2.7458, -2.8207,  1.6039,  3.2973, -4.1968, -0.2409,
        -2.1877, -1.6641, -0.9556,  3.2441], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4554, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.4487, -5.3291,  0.2449, -3.3767, -1.2421, -3.0492,  1.1677,  1.1318,
        -3.9325,  0.9705, -2.2656, -1.9632, -2.7931,  2.4803, -2.5169, -1.0306,
        -4.3497, -4.2663, -7.6509,  1.1021], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6610, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.2828,   0.1392,  -2.4526,   0.4030,  -3.1006,   0.6162, -17.4584,
          2.4903,   0.5516,  -8.7004,  -1.3912,  -2.3407,   0.1618,  -3.6979,
          3.7353,  -1.6851,  -0.7839,  -4.2578,  -0.9825,  -2.6461],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.9559, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2415, -1.6121, -4.9671, -2.1862, -4.4788, -5.1103, -3.4596, -2.1058,
        -8.0334, -4.1227, -3.0394, -1.1993, -5.2889,  1.8620,  2.6946, -2.6368,
        -1.2847, -2.7466, -1.4045, -6.4811], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1421, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.4115,  1.8780, -4.2678, -0.2166, -1.0693, -0.4160, -0.4219,  2.9967,
        -3.7601,  0.1162, -2.1543, -4.3191, -4.3764,  3.5669, -2.4423, -1.0785,
        -1.1955, -1.8708, -5.3329,  0.6613], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0645, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.1926,  -2.5272,  -1.3199,  -6.1583,  -5.9562,  -4.0183,  -3.9029,
         -4.2740,  -1.1623,  -3.1292,  -0.9454,  -4.5142,   2.5243,  -0.1791,
         -2.9090,  -0.6395,  -1.1337,  -3.2308,  -4.9904,   3.6281],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8015, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.3591,  -0.0596, -16.2122,  -2.8151,  -1.6496,  -9.0284,  -0.4112,
         -3.0288,  -1.3005,  -3.4269,   4.2810,  -8.4640,   0.6324,  -2.6193,
          0.0889,  -6.1359,   0.7311,   0.7530,  -2.4383,   0.4225],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6020, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.0579,  -5.6571,  -3.7643,  -4.6109,  -2.2462,   0.8963,   2.0340,
         -2.8124,  -0.9554,  -2.8437,  -5.7800,   0.3257,  -0.9109,  -2.7625,
         -2.0728, -11.7932,  -5.1946,  -7.3882,  -7.9956,  -9.1731],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0881, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2658, -2.7947, -0.4655, -2.7328,  5.0511, -3.3456,  0.2846, -0.8280,
        -0.8089, -5.8143,  0.3572,  0.3034, -3.2939,  0.2666, -0.5900, -1.6998,
        -0.5132,  4.5825, -5.1977, -1.0492], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9777, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9237, -0.9881, -4.5768,  2.5499,  2.2559, -1.7078,  1.1678, -1.7417,
        -2.9814, -3.7460,  2.7829,  0.0255, -3.0854, -0.1557, -1.0440, -0.7801,
         1.7085, -9.5965, -5.9709, -0.9484], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5878, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0411, -12.8309,   2.2174,  -3.4422,  -1.2041,  -3.0692,  -1.5504,
         -2.3347,   2.4965,  -6.2318,  -0.0409,  -2.1178,  -4.2389,  -2.7622,
          0.8875,  -6.4967,  -0.7900,  -0.6059,  -3.4572,   2.1834],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3715, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.4355,  -4.7919,   1.0827,  -2.1155,  -2.6845,  -0.3789,  -9.5207,
         -6.5151, -13.4335,  -4.9565,  -2.6194,  -5.7461,   1.5887,   4.0933,
         -1.8053,   0.1552,  -2.6229,  -1.2186,  -5.8129,   1.9070],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8415, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.6060, -0.6800, -1.3572, -3.2603, -0.0537, -2.1864, -3.3434,  2.5687,
         2.4107, -7.9821, -1.0784, -2.3545, -3.1648,  0.1690,  2.4550, -6.8629,
         0.4247, -4.7451, -2.6547,  1.8284], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9236, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  3.5988,  -3.2860,  -8.7634,  -7.3339,  -5.5453,  -3.3164,  -9.1542,
          1.9366,  -7.2650,  -2.5705, -23.1276,  -5.1825, -12.0106,  -1.1378,
        -31.4534,  -9.6714,  -4.9359,  -5.5378,  -2.8859,  -4.7192],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.1181, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.9439, -5.7884, -1.7694, -3.3652, -1.1336, -3.1061,  1.9723,  3.0105,
        -3.3495,  0.0281, -2.8427, -2.1412, -5.0692,  4.2663, -3.7068, -4.1156,
        -2.7014, -0.5222, -4.5795,  2.5290], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5720, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6253, -3.4595, -4.8894, -0.3037, -0.4696, -4.3893, -3.3884, -4.2759,
        -4.1808, -9.0644,  1.2676,  1.6340, -2.3696,  0.0564, -1.5311, -2.7081,
        -5.7676,  3.8559, -5.6017, -0.4406], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3826, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8776, -1.4100, -2.8737,  0.7899, -1.9614,  5.1371, -2.2216, -0.8846,
        -1.7523, -0.5501, -6.4168,  1.5835,  1.3542, -2.5284,  0.0155, -0.6767,
        -1.7991, -3.2041,  4.5189, -4.8184], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0288, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.0715,  -1.8974,  -9.7312, -12.8666,  -7.8614,  -9.7720,  -0.1976,
         -7.9926,  -2.4971, -33.4630,  -6.3662,  -7.1542,  -3.7543,  -1.6629,
         -2.4373,  -1.5630,   3.7736,  -1.6338,   0.3988,  -1.8167],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4783, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  5.5809,  -9.3878,  -0.5796,  -2.4458,  -0.9226,  -3.2902,   2.6363,
        -10.3954,  -0.0862,  -1.7601,  -2.1286,  -3.2764,  -0.1404,  -0.8650,
         -2.5504,   0.1316,  -0.7161,  -0.8471,   2.4162,   3.9687],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.2329, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7697,  3.8014, -4.1935, -1.8994, -3.2759, -1.7753, -2.8626,  1.6634,
         1.8843, -2.1847, -1.5782, -0.1744, -1.5924,  0.8051, -0.9345, -5.0759,
        -1.2472, -4.5810, -1.0539, -4.7128], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5879, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0910e+01, -6.0806e+00, -7.2238e+00, -3.1439e+00, -7.0373e+01,
        -5.6096e-02, -3.5956e+00, -4.9740e+00, -2.8059e+00, -2.7734e+00,
        -1.1645e+01, -2.4993e+00, -5.5945e-01, -2.4857e+00, -1.8388e+00,
        -1.0649e+00, -3.0155e+00,  2.3660e+00,  4.8356e+00, -2.1110e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4977, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9951, -0.6401, -5.5880, -1.0649, -1.0986, -0.8175, -3.5210,  2.8365,
        -3.0915, -5.3701, -0.8868, -3.5632, -2.4342, -0.5519,  3.9019, -2.0893,
        -1.0246, -3.2680, -0.9560, -3.8247], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7023, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9829,  -5.1561,   0.6294,   2.1975,  -3.5079,  -2.9279,  -7.4852,
         -2.5032,  -5.1956,   2.5897,   0.8794,  -3.4228,  -0.4311,  -1.6090,
         -4.6995,   0.4198,   2.5278,  -4.7522,  -3.9339, -43.9599],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1662, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0909, -1.4781,  0.5886, -2.7506, -1.0270, -3.6053, -1.7180, -8.3847,
        -2.1815,  0.8493, -2.6918, -2.4240, -1.5347, -2.1477,  1.1555,  3.3996,
        -2.3672, -1.3231, -3.7374, -1.7488], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7609, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3226,  2.9931, -3.1282,  0.0040, -2.9533, -1.2684,  0.4911,  3.8240,
        -2.0124, -0.0911, -2.7942, -1.1922, -2.6894,  2.3851,  1.3866, -2.3136,
         0.6789, -2.5651, -1.9766,  2.1610], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6192, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.8524, -2.3825,  0.6233, -2.2992, -0.0775, -5.6183,  1.4254,  0.6173,
        -2.6511,  0.2669, -0.8078, -1.0962,  1.9519,  3.2785, -1.4122,  0.6356,
        -1.2297, -0.1395, -0.2039,  4.1107], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.2078, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1448, -2.4318,  0.5568, -3.5858, -2.2339, -3.8545, -1.2375, -5.7339,
        -3.3408,  0.5530, -3.8825,  0.8412, -1.8514, -4.8504,  0.5384,  1.2868,
        -6.3704, -1.6680, -3.7666, -1.0306], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2603, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.7403,   0.8381,   2.0063, -11.8197,   0.1764,  -3.1060,  -2.3507,
         -0.3832,   3.7806,  -4.6235,   0.8994,  -4.2557,   0.5151,  -3.4820,
          1.5288,   2.0804,  -5.0222,  -1.6936,  -1.5892,  -5.4240],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.6332, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.2553, -3.9969, -2.0264, -3.8000, -0.3044, -1.4564,  3.1755, -4.4372,
        -1.5390, -3.4413, -1.6551, -4.6335, -0.2199,  2.7839, -1.9510, -1.2048,
        -1.3120, -0.8078, -6.4402,  2.6894], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4161, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.6777,  -4.1384,   0.2705,   1.9870,  -9.3381,  -0.5598,  -2.6660,
         -1.3141,  -2.9831,   2.6901, -11.7230,  -2.0921,  -3.5369,  -1.5490,
         -2.1330,   2.3118,   1.8303,  -3.5355,   0.8284,  -1.2385],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.8784, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2807,   0.8867,   4.1602,  -2.2720,  -0.7922, -17.2184,  -5.9608,
         -3.2638,  -4.7514,  -1.2912,   0.2936,  -2.5290,  -4.2611,  -0.3133,
         -2.1875, -10.3750,   0.4659,  -0.4522,  -2.9533,   1.1677],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7464, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.4997,  -3.6471,  -2.1418,  -3.1437,  -1.1369,  -5.7431,   0.9878,
         -2.1906,  -4.7461,  -5.3064,  -2.9012,  -2.9895,  -9.5038,   1.3729,
         -0.5003,  -2.4604,  -0.9724,  -4.4633, -11.0360,   3.0448],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7989, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.9759,  -1.2561,  -3.1582,  -4.5206,  -0.3906,   3.6426,  -3.5930,
         -1.8179, -19.8751,  -3.6825,  -5.8200,  -3.1091,   1.0573,   3.6507,
         -4.4196,  -0.6801,  -2.4028,  -4.5735,   2.1631,   2.1299],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6316, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.3023, -1.8121, -1.2606, -5.0730,  1.2619,  1.3579, -2.8900, -0.7042,
        -1.6740, -1.6362, -6.4421,  2.3811,  0.8300, -2.8183, -2.9672, -0.8677,
        -1.6043, -1.6228,  0.7722, -2.0116], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3542, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0155,   0.7886,  -1.5471,  -5.0057,  -0.4316,   1.0802,  -6.2497,
         -6.6595, -20.5099,  -3.2707,  -5.6316,  -7.4407,  -7.8053,  -4.1999,
         -2.9512,  -6.1330,  -5.8857,  -6.1476,  -3.5826,  -9.8253],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2712, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  3.9535,  -2.0291,  -5.5537,  -3.6241,  -2.3561,  -3.4376,   2.0582,
          1.2257,  -1.2985,  -0.3271,  -1.0367,  -2.0881,   0.8855,   2.1907,
         -1.9155,  -0.6176, -13.0823,  -7.8182,  -4.2306,  -5.5239],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2313, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.3781,  -3.0398,  -2.2583,  -3.1896,  -1.2906,  -1.4727,   4.1730,
         -2.4583,   0.8556, -10.0026,  -4.7767,  -4.7190,  -5.7759,  -1.1421,
         -0.1030,   3.8201,  -5.7959,  -0.2818,  -1.9672,   0.4962],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3153, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6534, -3.1914, -1.8363, -2.2574,  1.5240,  2.9613, -1.9037, -1.8858,
        -1.6347, -0.3061, -8.4129,  0.9722,  0.0805, -1.8699,  0.2935, -0.3935,
        -3.0876,  1.7484,  3.3934, -2.0498], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0755, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.5115,  -1.1961,  -2.2264,  -0.2954,  -4.1737,   4.9913,  -5.2797,
         -1.5324,  -1.6955,  -3.9632,  -0.0554,   2.5020,  -4.2205,  -9.5693,
        -24.1130,  -5.2186, -19.1729,  -6.8886,  -3.6869,  -0.2705],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3788, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.9106,  -4.0225,  -1.0072,  -0.9487,  -1.8261,   2.2171,   4.0580,
         -1.8544,  -3.1949,  -3.1891,  -4.7963,  -1.9242,  -0.0720,  -7.3432,
         -1.1065,   0.4201,  -5.0275,   0.6740, -10.2478,  -5.0232],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.0652, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5660e+00,  2.3832e+00,  1.1684e+00, -2.6660e+00,  9.9861e-01,
        -6.2278e+00, -1.7575e-02, -5.4128e+00,  2.4331e+00,  2.5494e+00,
        -3.0497e+00,  5.5848e-01, -1.7614e+00, -2.3868e+00,  9.4789e-01,
         2.9707e+00, -4.6683e+00, -3.7004e+00, -2.0566e+01, -3.3116e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1162, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.5644, -4.2038, -2.7700, -4.1333, -2.1293, -0.1762,  2.5719, -1.9077,
        -0.2705, -0.8596, -1.0044,  2.6172,  2.5884, -2.8669, -0.2930, -2.6832,
        -2.3525, -5.0992, -0.7058, -0.8789], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1496, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.3133,  -3.8408,  -0.6419, -13.7261,  -2.3490,  -6.2192,  -1.9637,
         -7.3478,   1.9860,   1.7671,  -3.0823,  -0.8505,  -2.3453,  -3.5253,
         -0.8741,   1.4503,  -1.8921,  -3.5930,  -1.3925,   0.5197],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.4617, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.0712, -10.6903,  -5.5176,  -3.4957,  -4.5534,  -1.8685,   1.4105,
         -8.8342,  -3.3626,  -2.7167,  -0.3706,  -0.5648,   1.0645,   4.5913,
         -1.6682,   0.9936,  -2.7875,  -4.3336,   0.9191,   1.8889],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.9912, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.8245,  -0.1656,  -9.3899,   3.2082,  -3.1285,  -4.3661,  -4.8285,
        -11.2378,  -7.3187,  -5.0767,  -3.3717,  -1.2425,   2.4486,  -3.0410,
         -0.6892,  -4.5046,   0.0902,  -6.0382,   1.3338,   3.1237],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2010, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5720, -1.1142, -2.8613, -2.2607, -7.5978,  2.8757, -0.3773, -2.4799,
        -1.2562, -0.2592, -2.8785,  2.3469,  3.2546, -3.3674, -1.4739, -2.1627,
        -1.5868, -6.6260, -4.6367,  0.5051], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8264, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0602,  1.6100,  3.1662, -2.5721, -2.1995, -2.0821, -3.5753,  0.4123,
         2.8651, -6.0121, -1.9706, -3.6372, -3.0993, -4.0793, -1.6727, -5.7357,
        -5.1960,  0.1315, -8.1143,  0.7317], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1545, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0300, -5.8975, -2.4040, -5.3389, -1.4367, -6.5533, -0.8314, -3.5941,
        -2.3926, -7.3677,  1.7602,  0.6189, -4.6379, -1.3668, -0.5623, -3.5211,
        -3.0914,  1.9397, -4.7085,  0.2307], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6592, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4560, -0.9647, -3.2379, -4.0746,  1.1762, -0.1635, -4.2335, -2.3974,
        -0.9157, -6.3277, -0.0129,  2.2530, -2.2306, -1.0291, -5.1412, -3.5845,
        -1.5205,  1.5574, -2.8114, -1.5571], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9836, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.3627, -3.9317,  0.9810,  2.8950, -0.6154,  0.3009, -2.4583, -1.0858,
         0.7261,  4.0940, -2.7548,  0.7807, -4.0961, -3.3558, -6.1642,  0.2907,
        -0.8047, -1.6137, -1.5339, -1.6003], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0154, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0497, -0.3482, -4.3184, -0.2764,  2.8236, -2.6738, -0.7342, -3.5959,
        -2.6850, -3.5891,  1.2621,  2.6469, -2.6671, -2.3474, -1.0360,  0.0581,
         1.2010,  5.3215, -1.9812, -1.0922], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8041, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.6090,  -3.2368,  -2.5473, -13.7304,  -4.9428,  -1.9909,  -2.0823,
         -2.3046,  -0.8503,  -3.3091,   0.3767,   0.7222,  -2.9354,   1.1539,
         -1.7599,  -1.4938,  -7.6106,   0.9727,   1.5215,  -3.3813],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.4519, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0505,  0.4941,  3.8113, -1.8707,  0.1647, -2.7783,  0.1427, -2.0861,
         4.4203, -1.2281,  0.8195, -3.0271, -0.4313, -2.5414, -0.9066, -1.6390,
        -3.4261, -0.8551, -0.9961, -6.4492], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9717, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.3275,   1.5418,  -2.4720,   1.3088,  -2.4339,  -0.5544,  -5.1781,
         -3.7862,   1.6370,  -6.9957,  -0.9953,  -2.5692,   0.6593,  -4.9042,
          2.3902,  -3.6202,  -5.2148, -10.2043, -18.4978, -14.2328],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7725, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.5719,  -0.8824,  -0.7661,  -5.9130,  -1.5061,   3.4480,  -2.4302,
         -1.7778,  -7.1747,  -9.8002,  -6.4704, -14.7649,  -5.5569,  -1.4097,
         -7.0444,   4.7254,  -5.3826,  -1.1014,  -2.7780,  -4.7707],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5964, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-12.9216,  -2.8769,  -6.2024,  -1.0243,   0.7494,   1.6118,  -2.7032,
         -1.5865, -10.0378,  -9.5265,  -4.5949,  -5.3998,  -0.3726,  -0.5845,
          5.4697,  -7.7724,  -0.4740,  -1.2186,  -3.3736,   0.7479],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1045, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4812,  4.4838, -4.0386, -1.1393, -1.8503, -3.1737, -0.6800,  3.1966,
        -3.3809,  1.1771, -3.0906, -0.6490, -5.7256, -0.9956,  1.0219, -6.0409,
        -1.5773, -1.8365, -2.8713, -6.5346], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8593, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.2196, -5.5651,  1.5999,  1.1899, -4.1780, -0.0628, -2.7457, -0.5343,
        -3.0241,  0.0413,  3.0338, -3.7503, -0.9982, -3.3406, -1.4915, -3.0843,
         2.3364,  2.1777, -2.5489,  0.8246], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0170, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.5888, -22.7453,  -1.9898,  -0.3200,  -5.3794,  -0.8388,  -4.2812,
         -3.0922,  -4.1975, -10.1635,  -3.2948,  -6.4468,   0.2167,  -0.9100,
          4.7254, -10.3671,  -1.4136,  -3.8996,  -5.9953,  -2.9104],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0857, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.0536,   0.8833,  -2.8073,   0.2748,  -9.0001,   1.0977,   1.4209,
         -5.8950,  -2.4042,  -1.5573,  -5.5487,   0.0654,   3.4556,  -5.1310,
         -4.4913, -20.8227,  -7.7394,  -5.7811,  -5.8384,  -0.2786],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6075, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2008,  0.5117, -1.0959, -3.8271, -3.0461, -4.9447, -2.3038, -4.4790,
        -5.7430, -1.8156, -2.8999,  1.1919, -1.4286, -1.1603, -0.2845,  3.6172,
        -3.2562,  0.5713, -1.9134, -0.1685], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8338, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.5189,  2.3527, -1.8045, -0.6611, -1.9436, -3.1225,  2.2353,  3.0573,
        -2.4808, -0.6663, -2.4965, -0.4490, -4.5849,  3.3369, -3.1136, -1.3347,
        -2.5976, -1.8878, -9.0360,  0.5610], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2577, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1483e+00, -7.0888e+00,  6.0307e-01, -8.1414e-01, -3.1153e+00,
        -9.2195e-01, -4.2372e-01, -2.0887e+00,  1.5581e+00,  1.1761e+00,
        -3.8842e+00,  9.6150e-01, -1.9762e+00, -1.4618e+00, -7.1028e+00,
         6.6528e-03,  7.7216e-01, -2.0407e+00, -7.0275e-01, -1.2460e-01],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.4908, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6621, -3.5763, -4.3992,  3.4231, -9.4799, -0.8982, -1.0662, -5.4514,
         1.3980,  2.5970, -5.9545, -0.0508, -2.0160, -3.4851,  1.2808, -1.8133,
        -2.0005, -1.7629, -2.3529, -6.6771], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2974, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1373,  0.1650,  3.0616, -4.8594, -0.5649, -5.0747, -4.2926, -0.3137,
         3.2864, -1.7612, -2.9380, -2.3169, -1.8295, -1.9834,  0.2452,  1.9381,
        -2.3916,  0.2939, -0.7265, -1.4594], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2829, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.2186,  -9.1417,  -3.0296,  -1.5081,  -4.8885,  -2.7472,  -3.5170,
         -0.9050, -14.7359,  -6.8038,  -0.2487,  -1.6313,   0.0174,  -1.6177,
         -2.1201,  -7.5992,   2.0145,   1.1057,  -2.5335,  -1.4035],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1756, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.1022,   0.1047,  -4.0579,   2.7958,  -4.0941,   0.2231,  -1.8311,
         -0.4254,   0.8321,   3.8861,  -2.7031,  -0.2210,  -4.6771,  -4.3052,
         -0.1521,   1.2882, -11.8346,  -4.2085,  -2.8517,  -2.0993],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.7717, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.0172,  4.0793, -2.9695, -2.3668, -1.4709, -1.8759, -0.7939,  2.2608,
        -2.9783, -3.2595, -2.5588, -2.6115, -4.5244,  0.8668,  2.7952, -2.7205,
        -0.3962, -1.6307, -3.4058, -2.4349], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2989, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.4114,  -1.2611, -12.8270,  -1.8969,  -1.1936,  -3.3237,  -3.7844,
          1.0265,  -8.1963,  -3.2607,  -2.3947,  -2.7352,  -7.2440,  -1.1978,
         -0.9801,  -3.7408,  -1.2476,  -0.9176,  -1.2606,   1.7661],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6629, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.6888,  -4.9237,  -4.9942,  -3.9095,  -1.6800,   0.4954,  -2.2657,
         -4.1436,  -0.9119,  -1.3482,   1.5593,   4.5803,  -7.2787,  -1.3585,
         -3.2544,  -0.2178, -13.2786,   0.5648,   1.1544,  -5.4294],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3664, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.6110,  -6.4476,  -9.5031,  -6.0248,  -7.9879,  -3.2959,  -3.8867,
         -7.0847,  -6.2010,  -4.9682,  -6.3863,  -7.4348,  -2.5830,  -3.4903,
          0.8067,  -4.1899,  -4.5447,  -4.8758,  -7.5078, -10.2943],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5255, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5215,  -7.6908,   2.0671,   0.8035,  -2.7617,   1.0636,  -0.9429,
         -4.5720,   1.1201,   3.5580,  -4.1894,  -2.8521, -17.9814,  -6.8266,
         -3.4231,  -4.5759,  -1.1420,  -1.5142,   1.7693,  -5.0785],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7845, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.9775,  -5.9148,  -0.3589,  -1.3626,  -2.7659,   1.0874,   3.0245,
         -2.2701,   0.9914,  -2.1881,  -0.3726,  -4.9486, -11.1420,  -0.1475,
         -4.3612,  -0.8143,  -3.3508,  -1.2556,  -5.0949,   0.5049],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.8881, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.0585, -3.2796, -0.5559, -3.2798,  0.9924,  1.2563, -2.8928, -0.5261,
        -1.2026, -2.3430, -4.0541,  0.6946,  1.5705, -3.0553, -0.8818, -0.8838,
        -2.7427,  1.6751,  3.2808, -3.5030], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9336, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.1861e+00,  1.3599e+00, -2.8732e+00, -5.7131e-01, -1.4812e+00,
        -1.2331e+00,  1.3726e+00,  3.2038e+00, -3.1714e+00,  1.0979e+00,
        -2.2484e+00,  1.2750e-02, -5.7120e+00,  5.3520e-01, -6.2358e-02,
        -3.5483e+00,  1.5332e-01, -3.6170e+00, -3.9885e-01, -1.7681e+01],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.6338, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3335e-01, -3.0705e+00,  9.0690e-01,  2.3325e+00, -3.7901e+00,
         7.6689e-01, -5.1303e+00, -1.7741e+01, -4.7324e+00, -7.8481e+00,
        -5.2143e-01, -4.7480e+00,  8.6223e-01, -4.5127e+00, -2.4552e+00,
        -2.1515e+00, -1.0185e-02, -4.7114e+00,  1.4871e+00,  4.5630e-01],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7672, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.3878,  -4.7016,  -1.4302,  -1.2268,  -2.0074,  -2.4640,   4.5129,
         -1.7139,  -2.4749,  -7.2804,  -9.7277,  -4.5663, -10.0927,  -5.4546,
         -2.8337,  -2.1396,   3.7605,  -6.8025,  -0.2568,  -3.4397],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0864, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0444, -2.0010, -3.6562, -2.3199, -0.8339,  3.0424, -5.2067,  0.0153,
        -3.8958, -0.4440, -2.1050,  3.1676, -3.5662, -4.2179, -2.5564, -2.4876,
        -2.8226,  1.6415,  3.0437, -2.2067], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5727, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.0803, -3.7720,  0.8216, -4.6518, -0.8176, -3.5039,  1.9507,  2.9114,
        -2.6026, -1.2553, -1.0162, -6.3456,  0.6290,  1.1283, -3.4561, -1.1923,
        -3.5731, -0.1469, -0.7559,  4.2472], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9660, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.7532, -0.0228, -5.4680, -0.0337, -1.4107, -0.5067, -5.3557,  2.0114,
         1.2632, -1.8175,  1.2263, -1.7314, -1.4515, -5.7768,  1.1964,  2.0977,
        -2.5802,  0.7649, -1.8636, -0.0987], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0155, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-17.2285,  -6.5697,  -0.8791,  -2.4224,  -1.4826,  -1.4794,  -0.5098,
          2.0645,   3.8431,  -2.6155,   0.2084,  -5.8666,  -7.2464,  -3.2335,
         -6.6574,  -4.0609,   0.3589,   3.9207, -20.6217,  -3.6513],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7065, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4845,  -1.9420, -10.1178,   3.5152,  -3.7591,  -4.2399,  -0.1850,
         -2.6596,  -4.7417,  -0.5769,   2.3791,  -3.6648,   0.4565,  -0.9732,
         -0.6491,   1.2665,   1.2032,  -4.0662,  -1.6565,  -2.1730],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.7534, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.7741,  -4.1789,  -0.1933,   3.2253,  -2.9659,  -2.6097, -16.3143,
         -2.8413,  -4.4897,  -1.8817,   0.8496,  -5.2759,  -3.7747,  -3.9184,
         -3.1229,  -2.0696,  -1.5622,   2.7460,   2.7692,  -3.2466],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6315, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.6712,   3.1318,  -2.1962,   0.1141,  -4.2125,  -4.8786, -14.1561,
         -5.5779,  -6.7564,  -5.1065,  -1.1177, -13.5719,  -1.3587,  -7.1691,
         -5.0612,  -4.6714,  -8.9713,  -3.1627,  -8.1981,  -0.9063],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7249, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.1396,   1.6026,  -3.0784,   1.0349,  -1.6542,  -3.0108,   0.7635,
          0.8148,  -3.0768,  -0.2958,  -2.0824,  -7.7848,  -1.9704,  -2.7809,
         -6.2470,  -3.6188, -14.6543,  -8.0950,  -5.3834,  -3.4790],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1428, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-12.5449,  -3.3835,  -7.3669,  -0.6229,   0.2298,   4.7277,  -3.3275,
         -1.2395,  -1.1212,  -1.7171,  -4.4114,   0.8456,   2.9970,  -2.4416,
          0.0362,  -0.9466,  -1.1650,   1.2118,   2.4337,  -3.9596],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.5883, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.1410, -12.6854,  -4.0464,  -3.8381,  -5.4500,  -2.0881,  -0.2390,
          2.5459,  -5.2559,  -2.8093,  -1.6487,  -4.3744,  -1.7480,   2.8764,
         -4.0084,  -1.6342, -33.0266,  -5.4410,  -3.7555,  -6.6502],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8709, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  3.4754,  -3.7199,  -3.0005, -11.0590,  -7.4275,  -5.1163,  -5.8940,
         -1.2604, -10.4800,  -6.3888,  -0.4824,  -4.7116,   0.8625,  -2.1361,
         -4.0594,   1.5980,   0.4195,  -3.7269,  -0.0743,  -4.2752],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3728, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6309, -0.8756, -3.2342, -1.1853, -3.2594,  1.6206,  3.7397, -2.8470,
        -0.7407, -2.5222, -0.6870, -4.9643,  0.3255,  1.5303, -2.6420, -1.9331,
        -1.4850, -0.8153, -8.9535,  1.5807], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4489, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.9131,  -2.4132,  -4.9223, -34.0941,  -1.1459,  -1.0850,  -5.2672,
         -2.6688,   2.9952,  -3.9868,  -1.9710, -13.1779,  -5.4339,  -4.5194,
         -6.1350,  -0.5770,  -5.8686,   2.0989,   3.1013,  -3.8782],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5431, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.9240,  -3.7796,  -1.6890,  -0.3514,  -2.4762,  -6.1775,  -0.5467,
         -2.5188,  -1.3500,  -4.3459,  -1.3094,   1.6638,  -8.2872,  -0.4480,
         -1.5281, -11.9903,  -3.5389,  -2.1415,  -3.5803,  -5.2993],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1809, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.1677,   1.2279,   2.2936,  -1.5113,  -2.1209,  -0.2716,  -9.3090,
          1.3850,   0.4944,  -3.5053,  -0.2728, -16.6338,  -5.0607,  -3.1662,
         -8.6846,  -5.1443,  -7.0014,  -1.4612,  -2.0139,   3.5440],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1190, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.2444,  -2.6187,  -2.5104,  -1.5979,   3.3120,  -6.3595,  -1.1917,
         -1.0037,  -4.7850,   1.8657,   3.5908,  -4.4397,  -5.1101,  -0.8525,
         -2.4982, -13.4727,   0.4495,  -1.5565,  -5.5409,  -0.8389],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2701, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6983, -0.7319, -2.7283,  5.7891, -0.4685, -2.3815, -3.6625, -2.3632,
        -3.9724, -4.3991, -2.5662, -5.8699, -1.8844, -1.6226, -2.4664, -7.0682,
        -0.7473, -0.8019, -1.8594, -0.7881], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2645, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.7066,  -3.6179,  -4.5961,  -2.4754,  -1.2302,   0.3614,  -2.3563,
         -2.8376, -13.3244,  -4.2363,  -7.5415,  -2.5623,  -9.1750, -12.8574,
         -4.3261,  -4.9724,  -0.4779, -13.3305,   0.4399,   2.7098],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5556, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.3125,  -2.8659, -14.1355,  -4.1080,  -5.6451,  -7.1946, -15.2199,
        -12.4584, -10.8492,  -0.5766, -25.4323,  -3.3847,  -2.4048, -10.5390,
         -4.7655,  -4.9614,  -1.8733,  -7.2925,   3.3622,  -2.9717],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.8814, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3497,  0.1288, -5.4872,  1.9924,  1.9279, -2.9567, -1.5999, -1.4988,
        -4.6861,  0.4628, -4.9220, -4.3455, -0.1566, -3.9721, -0.2818, -3.9496,
         4.2127, -2.5800,  0.4290, -1.4784], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5555, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  3.1531,  -2.3410,  -2.0562,  -0.8278,  -2.0031,   1.8256,   2.8421,
         -2.4517,  -2.6701,  -3.0172,  -1.5488,  -3.4816, -24.9644,  -2.4694,
         -0.8931,  -2.3626,  -1.1243,  -5.3928,  -2.9392,   1.3481],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5687, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0089, -6.2956, -2.0175, -3.6353, -7.5027, -1.3011, -3.1643, -4.4303,
        -0.3547, -1.2812, -1.9182, -5.8488,  3.6997, -3.8610, -1.5806, -3.3054,
        -4.8782, -1.6538,  1.5604, -5.8401], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7809, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.1963,  -5.4287,  -5.7978, -19.9938,  -5.8306,  -2.6049,  -1.8021,
          4.2197,  -5.4591,  -3.9681,  -2.7862,  -0.7293,  -4.0448,  -0.9880,
          3.5371,  -2.2666,  -2.5186,  -2.7775,  -3.4671,  -6.0501],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7976, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.5813,  2.1546, -4.8078, -0.9229, -2.0262, -1.9713, -0.2197,  3.4728,
        -8.9670, -1.0756, -8.1548, -7.1519, -3.3754, -6.0247, -1.2183,  0.2539,
        -1.1174, -6.5537, -1.1976, -1.1038], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5294, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.0258,  -0.3000, -25.5940,  -5.6125,  -3.8767,  -4.9983,  -0.3943,
          0.4980,   0.0942,  -4.0627,  -0.4328,  -1.4554,  -2.4845,   1.3670,
          2.8262,  -3.6044,  -0.8555,  -7.2455, -10.6319,  -8.4444],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9117, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.0080,  -4.1012,  -3.0053,  -6.1450,   1.0021,  -3.8929,  -2.5538,
         -3.8071,  -3.3367, -11.8288,   1.0007,   1.0698,  -7.9057,  -0.8500,
         -1.6120,  -2.1407,  -4.5701,   1.1941,   3.2428,  -2.7108],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8979, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.7624, -2.6605, -0.9751, -1.9448, -0.8119, -7.7507,  5.6104, -2.1977,
        -1.7790, -2.3909, -5.4067,  1.1809,  2.9046, -1.4061, -1.8851, -0.8823,
        -2.2950, -4.6471,  0.1958, -1.6608], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3520, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8484, -5.5940, -4.7362, -6.4936, -8.2347, -5.5811, -5.4968, -4.6711,
        -4.8769, -6.5085, -4.3975, -4.4848, -6.9046, -5.6289, -7.0535, -9.9810,
        -8.6240, -3.6634, -4.4684, -5.7340], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9491, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  4.1850,  -2.2287,   0.5550,  -1.3602,  -1.0453,   1.2519,   4.5946,
         -2.2511,  -1.3869,  -2.2614,   0.0382,  -3.0585,   4.1272,  -1.8834,
         -1.5707, -16.6567,  -6.9312,  -3.5583,  -4.9263,  -3.2188],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.8793, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.2781,  -0.8473,  -7.9771,   0.4207,  -4.2172,   0.1932,  -3.0908,
         -1.2912,  -7.1652,   2.7249,   1.7219,  -3.0565,  -0.7976,  -3.0853,
         -1.8199,   0.5672,   2.2282,  -1.9110,  -1.3599, -14.4556],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3748, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.6314, -3.7840,  0.5369, -1.6264, -1.5564, -2.0704,  4.4574, -6.5521,
        -3.1603, -3.7753, -0.9992, -3.5438,  2.6031,  2.4491, -2.8651,  0.4230,
        -2.6058, -0.2729, -5.6928, -3.2245], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5314, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1087, -1.2847, -0.2779, -1.9948,  4.5181, -7.7425, -1.4980, -1.6498,
        -3.0799,  0.6506,  3.4394, -3.2253, -4.6813, -3.8677, -2.2529, -4.3794,
        -4.4822, -2.3345, -7.5766, -4.2899], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4059, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9522, -6.8458, -2.5460, -1.6163,  3.6090, -3.9997, -1.1784, -3.6348,
        -3.7988, -2.3656,  3.2255, -3.0473, -1.7305, -2.8699, -1.5748, -0.0815,
         3.1417, -2.7846,  0.8834, -2.4127], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7790, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0606e+00, -2.7984e-01, -2.7099e+00,  3.0082e+00, -5.1551e+00,
        -4.2470e+00, -2.9894e+00, -1.7665e+00, -3.5253e+00,  2.7238e+00,
         2.4265e+00, -3.4155e+00, -2.5121e-01, -7.9771e+00, -5.5809e+00,
        -3.0500e+00, -5.1556e+00, -1.0615e+00,  1.4633e-02, -2.3926e+01],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2489, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1292, -11.2201,  -1.7414,  -7.0253,   0.0721,   0.6447,  -9.4511,
         -1.7628,  -4.3829,  -6.0049,   0.1089,   0.1223,  -3.0980,  -4.9831,
        -14.2881,  -5.7387,  -4.9300,  -6.0885,  -2.2384,   0.7199],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2207, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  4.6403,  -3.4592,  -3.0562, -10.0416,  -2.8949,  -5.7082,  -3.1401,
         -3.5286,  -9.3996,   0.1368,  -5.7773,  -2.7470,  -2.5691,  -0.5999,
         -3.4573,  -1.1450,   3.4496,  -1.4588,  -1.8847, -15.3055],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3973, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9700,  0.4378, -3.4338, -0.4855, -9.9932, -1.6993, -0.6117, -2.5825,
         0.9265, -3.4080, -0.3701, -3.6281,  4.8754, -2.1112, -1.6256, -2.3391,
        -5.2491, -0.4969,  3.7275, -4.6955], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7866, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-34.5235, -10.0426, -18.2158,  -6.1443,  -7.6579,  -4.0101,  -4.1026,
          1.4531,  -1.7591,  -2.1921,  -0.8838,  -1.0501,  -1.8784,   0.4693,
          4.4345,  -3.4682,  -0.0459,  -3.0833,  -3.4723,  -9.6404],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2907, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.7920, -5.5728, -1.8583,  2.8639, -3.6893, -0.5592, -1.9023, -0.8973,
        -4.6975,  1.3813,  2.6573, -2.6240,  0.9833, -2.3162, -3.0494,  0.4502,
         4.5292, -2.2310,  0.2346, -0.7085], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8899, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.5798,  -2.7273,  -1.8123,  -1.4957,  -1.1425,  -2.6263,   5.6971,
         -2.6961,  -2.7323,  -5.7653,  -9.8801,  -2.1438,  -6.1587,  -2.7868,
         -0.0728,   3.6609, -10.9232,  -0.0902,  -2.7576,  -0.5684],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.4301, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.9796,  -5.2714,  -4.8607,  -1.7017,  -1.9255,   2.5437,  -4.2865,
          0.4455,  -2.4566,  -0.9245,  -4.6030,   4.7906,  -2.9342,  -1.1275,
        -11.5972,  -5.8946,  -3.1608,  -5.4911,  -0.4339,  -3.5039],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9687, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.6408,  3.2227, -2.3456,  0.4091, -2.4353, -0.4377, -7.1895,  5.1234,
        -1.8601,  0.9292, -2.7016,  0.1222, -5.9262,  1.8293,  1.1821, -2.0284,
        -2.8721, -2.3397, -1.4204, -4.8205], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0959, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-21.7879, -12.5641,  -2.0357,  -6.6059,  -1.1478,  -1.1247,  -2.4165,
         -0.0971,   1.8564,  -2.4744,   0.6519,  -3.1215,  -4.8478,  -3.0230,
         -0.2552,   1.9735,  -1.4498,   0.5365,  -2.0689,   0.4491],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9776, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4078,  -4.1684,   2.8904,  -1.1873,  -5.2810,  -3.6729,   0.7100,
        -28.0955,   0.5794,   1.7706,  -2.8079,   0.3439,  -1.9673,   0.1703,
         -3.0847,   3.4340,  -1.2300,  -3.2279,  -2.7912,  -1.5211],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5772, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1155,  -0.9583,  -5.7989,   0.4057,  -1.6850,  -3.2536,  -2.9827,
        -14.6436,  -3.7318,  -7.7464,  -1.7003,  -2.9063,  -0.4628,   2.3561,
         -2.2845,   0.5975,  -2.1726,  -3.3172,  -2.5412,   4.1279],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5407, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4431e-01, -7.2879e+00, -7.3520e-01,  8.3637e-01, -3.4300e+00,
        -9.7601e-01, -1.9572e+00, -5.2341e+00,  6.6087e-01,  2.0869e+00,
        -2.0889e+00, -7.4043e-01, -1.4370e+01, -7.5878e+00, -6.6539e+00,
        -1.0628e+01, -5.0088e+00, -4.2096e+00, -5.0694e-04, -1.3983e+01],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0826, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.2182,  -2.7878,  -0.2073, -21.4496,  -2.8639,   2.0043,  -2.7211,
         -1.2866,  -4.1454,   0.6211,  -3.8082,  -4.8235,  -0.0485,  -5.2470,
         -5.9108,  -0.6518,  -1.5487,  -2.2620,   1.6560,   3.6040],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6547, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.7493, -2.4202, -0.8496, -1.3296, -2.9703, -0.4094,  2.7527, -1.7204,
        -1.6777, -2.7900, -1.7343, -4.5553, -0.4627, -0.6308, -3.1454, -1.1064,
        -1.2221, -1.3134,  0.1174,  4.4182], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9150, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2847, -23.0820,  -3.3659,   0.7997,  -5.5827,  -2.0388,  -2.3227,
         -1.7713, -16.0290,  -1.2074,   0.4069,  -2.4987,   0.0387,  -0.8112,
         -0.3277,  -0.6895,   3.5853,  -2.9877,   0.9852,  -2.4471],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1315, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.1121,  -0.4975,  -3.4573, -10.9746,  -5.9256,  -5.9788,  -0.3613,
         -0.4179,  -0.6799,  -7.0666,  -1.5260,  -2.9989,   0.8535,  -4.6399,
          2.0954,   1.7577,  -3.5923,   0.0430,  -1.3359,  -1.3787],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5597, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4421, -5.3176, -5.7127, -5.8451, -3.7524, -3.3044, -6.6129, -3.8179,
        -7.5456, -5.4880, -6.8011, -0.0735, -3.4467, -3.2605, -4.3699, -7.0047,
        -6.0302, -4.4707, -5.6260, -2.9869], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8954, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.5830, -1.7273,  0.8228, -3.7723, -2.8052, -4.1489,  2.4514, -1.8714,
         0.2048, -1.3102, -2.0361,  0.9590,  2.5890, -2.7446, -1.4658, -2.0621,
        -0.6541, -2.0371,  4.3422, -3.1987], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7441, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.1270,   2.6452,   3.8390,  -4.7474,  -2.1679,  -2.9879,  -2.7629,
         -3.9112,   1.1317,  -5.1346,  -1.1909,  -1.5831,  -0.7229,   1.1549,
          3.3101, -14.7414,  -2.6193,  -1.2666,  -4.8055,  -2.6086],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1648, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.7934, -6.0107, -0.2772, -3.5499, -4.4801, -2.4451, -7.9187, -0.9912,
        -0.5641, -6.1426, -2.4884, -2.7805, -1.3314, -4.8864,  2.7968, -5.8831,
        -4.2503, -0.6010, -2.9266, -1.6627], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7300, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7690, -5.8802, -0.0451, -2.8977, -2.7052, -7.5593,  4.8169, -7.5450,
         0.0620, -3.2385, -1.3697, -4.1568,  1.7795,  1.8925, -3.8147, -0.2883,
        -0.3541, -1.8665,  1.4549,  4.7348], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6375, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.5195, -10.4149,  -8.4090,  -5.3882, -16.7846,  -9.0684,  -7.1036,
         -1.2994,  -0.7949,   2.8259,  -4.9741,  -1.0529,  -1.1417,  -1.1394,
         -6.5777,  -0.5563,  -1.1092,  -4.1422,   0.2888,  -2.0785],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9720, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.1934, -3.9084, -0.5688, -3.1480,  3.1489,  0.0263, -3.8474, -1.4582,
        -1.2891, -6.5061, -5.1727, -8.5049, -3.4756, -0.4220, -2.0763, -3.1864,
        -4.7362,  0.1163, -5.6135, -4.0653], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7247, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5866, -0.6363, -3.6297, -0.3023, -3.7484,  1.0808,  1.4754, -4.7690,
        -1.2589, -2.1761, -0.7537, -2.4624,  5.7105, -9.1604, -0.0292, -3.0586,
        -0.9451, -2.2364,  2.4898,  2.1304], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2433, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.9747,  -1.5777,  -0.7663,  -1.0059,   0.1434,  -4.6690,   0.1998,
         -4.1885,  -5.2974,  -5.5423,  -5.3257,  -9.0879,  -5.7588, -21.5797,
         -4.8605, -14.0190,  -4.6334,  -4.4022, -12.4830,  -2.5824],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3231, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.0781, -3.2765,  1.1436, -3.1602, -4.0820, -5.3937, -2.2493,  2.0708,
        -5.9422, -8.2285, -7.8457, -8.9611, -3.5525, -6.2210,  0.5237, -6.5138,
         0.0299, -4.4100, -3.0214, -0.5565], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3284, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.3935,  0.3009, -2.6765,  5.0377, -2.9574, -0.6931, -3.7244, -0.7844,
        -4.0029,  1.5000,  2.8067, -1.5470,  0.0424, -3.2945, -0.2964, -6.0676,
         3.5175, -2.1299,  0.4186, -1.3122], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8128, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8300, -2.8448, -2.5738, -5.7245, -4.1929, -3.4296,  2.2213, -1.1508,
         1.0575, -2.1332,  0.2214, -5.2880,  2.1315,  1.6083, -3.1334, -0.2292,
        -1.8341, -2.4468,  1.2666,  2.0273], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2639, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.3107,  -5.5237,  -4.3230, -37.3659,  -8.5368,  -7.3555,  -5.3454,
         -0.3101,  -3.4183,   4.5034,  -6.4607,   0.2292,  -1.8523,  -1.4380,
         -4.5045,   2.2948,   1.5065,  -3.0115,  -2.0474,  -2.8695],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1759, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0425e-03, -2.6465e+00, -2.6688e+00, -4.9143e+00,  1.5416e+00,
        -1.3708e+00, -3.2597e+00,  1.1224e+00, -1.2294e+00, -7.1996e-01,
        -9.8214e-01,  3.3636e+00, -3.6096e+00, -2.1144e-01, -3.4851e+00,
        -4.0430e+00,  4.6214e-01,  6.1247e-01, -2.1398e+00, -1.0177e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.2598, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0431,  0.9400, -2.5898, -0.8201, -4.4056, -1.5481,  1.8024, -5.3206,
        -0.3667, -5.0789, -0.8579, -1.7707,  4.7386, -6.9295, -0.8335, -2.3976,
        -6.9454,  0.0637,  3.4430, -4.4534], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8187, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.3479, -17.6382,  -2.8550,  -7.1477,  -1.1649,  -5.3359,   4.4076,
        -16.1357,  -1.7973,  -3.0683, -13.1719,  -5.1607,   0.1346,  -6.9519,
         -3.9865, -15.8821,  -7.0933,  -5.0612,  -4.8590,  -2.9654],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8040, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3383, -4.4769, -3.0995, -6.0767, -9.3336, -4.4073, -0.1224, -4.0340,
        -4.0256, -1.4573, -3.2886, -6.3773, -2.2711, -0.3493, -4.6729, -4.4355,
        -3.0707, -5.1550, -0.6499,  1.6197], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4011, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.4721,  -2.9396,  -4.5128,  -4.3211,  -0.6520,   1.0880,  -3.1267,
         -2.0998,  -2.8246,  -2.2644,  -5.5426,   2.3932,   1.4814,  -3.1327,
         -1.4466, -10.6162,  -7.1308,  -1.0315,  -7.8530,  -1.2488],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0626, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.2430,  -2.0416,  -1.1239,  -1.4393,  -7.1533,  -3.2157,  -3.0498,
         -5.4226,  -3.3823,  -3.0441,  -1.4882, -16.6875,  -3.4494,  -3.0487,
         -6.5996,  -6.9335,  -7.7148,  -3.2918,  -8.0060,   0.9981],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3668, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-12.6735, -14.8008, -10.4563,  -9.1583,  -7.7016,  -5.9420,  -6.3172,
         -5.4478, -11.4337, -14.0735, -11.8375, -11.8141, -13.3865, -11.0439,
        -10.4837, -10.1947, -11.0067, -15.8370, -49.5605,  -7.9285],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-12.5549, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.4002,  -2.7559,  -1.8659, -11.0025,  -6.1544,  -6.1739, -12.7622,
         -5.3458,   0.2872, -11.2859,  -4.3042,  -3.2924,  -0.6294,  -1.9083,
         -4.0170,  -1.7115,   2.7422,  -3.4822,  -2.3803,  -2.2132],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7928, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.5429,  -3.7249,  -3.2568, -17.2139,  -5.9511,  -3.0207,  -4.4842,
         -0.3086,  -4.3272,   3.1540,  -4.4990,   0.6860,  -3.1941,  -2.8246,
         -0.4344,   1.9390,  -3.6744,  -2.2141,  -4.2881,  -2.1186],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8606, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.2062,  -5.2136,  -2.8347,  -8.6681,  -5.9084,  -2.9619,  -4.5176,
         -0.9266,  -0.7341, -15.2481,  -3.4186,   1.1360,  -0.8272,   0.8333,
         -2.9599,   2.4320,  -4.7427,  -0.5264,  -2.6373,  -2.5780],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9048, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4475,  0.4085, -1.4842, -0.1756, -1.7888,  4.6732, -4.4896, -0.7915,
        -2.7333, -0.9680, -5.3239, -0.2000, -1.1074, -2.4677, -1.8667, -4.0956,
        -0.3724, -4.7223,  1.8495,  2.2510], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3426, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6418, -0.6052,  1.0763, -4.9356, -3.6444, -5.3294, -4.4147, -4.1707,
        -6.8656, -3.2387, -2.1978, -3.3220, -1.9201,  2.3302,  4.8132, -2.5125,
         0.1106, -2.9579,  0.1370, -6.5083], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5399, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3071,  -4.1641,   1.8888,   1.6930,  -2.7097,  -0.0753,  -3.8721,
         -2.1295,  -3.8684,   5.2659,  -2.9057,  -2.2054,  -3.3000,  -0.4432,
         -6.0576,  -3.7645,  -3.6113, -19.3281,  -1.1153,  -2.5615],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7785, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.5888, -3.1558, -0.3370, -3.8650, -0.1932,  3.1287, -2.7064,  0.0081,
        -1.5100, -1.0219, -5.8997,  5.2856, -1.3910,  0.8737, -4.3373, -4.4680,
        -1.5226, -0.5761, -2.4225, -0.2470], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1884, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.6466,  4.5069, -3.2236, -0.6936, -3.0901, -0.9604, -9.5307, -2.5325,
         0.3938, -2.6954, -0.4475, -2.5094, -1.3037, -5.9624,  1.6187,  1.3036,
        -2.8466, -1.3329, -2.3844,  0.0696], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4987, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8982,  1.3456,  2.7835, -3.9975, -1.3790, -1.7283, -1.3960, -2.2226,
         2.7378, -3.2424,  0.6508, -1.3937, -2.6121, -0.3860,  2.9481, -3.7309,
         0.3841, -2.4063, -2.9217,  1.7892], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8838, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.3994, -1.7068, -0.6602, -3.5574, -5.0260,  0.4557,  1.8880, -2.3227,
         0.4689, -3.2775, -1.3819, -0.0720,  3.0152, -5.4341,  0.3124, -2.7703,
        -2.2587,  2.3981,  2.2205, -3.0651], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0187, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2065, -0.1889, -2.7503, -3.2346,  2.6646,  3.2326, -4.9700, -0.6226,
        -2.1354, -1.0230, -5.6871,  1.4403,  0.5982, -1.5997,  0.0158, -1.8111,
        -4.7121,  0.5945,  3.5186, -1.8794], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0378, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.6806, -5.6818, -8.2771, -3.7605, -5.1768,  0.9694,  3.2428, -2.2020,
        -0.5375, -3.5583, -1.6673, -2.6677,  1.5640, -0.0435, -1.8150,  0.5807,
        -1.7468, -2.3636,  1.5390,  3.1776], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4552, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.0552, -4.1875, -6.3492, -1.6576, -4.0699, -4.1647, -6.1635,  0.9626,
         1.8690, -2.4202, -0.5078, -2.6404, -2.9756, -7.9630,  1.6272,  2.7144,
        -2.2653, -1.0774, -3.3681, -0.9542], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0768, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9922, -1.3309, -4.4253,  4.0437, -3.1652, -1.2914, -1.8482, -3.0783,
        -0.8246,  1.5876, -7.2194, -0.4809, -1.9409, -2.0473,  1.4457,  3.7249,
        -3.2223, -1.2246, -1.3400, -0.2823], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2456, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.7454,  -1.9861,  -2.6903,  -1.7214,  -5.0738,   0.0426,  -6.5881,
        -10.0625,   0.7487,  -2.7691,  -6.3576,   0.6691,   2.3927,  -4.3373,
         -0.9301,  -2.1593,  -1.8754,  -3.8355,   3.2495,  -3.2450],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8637, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4741,  -3.2656,   2.3405,  -1.5595,  -3.0387,  -3.9376, -13.2359,
         -5.0498,  -4.0118,  -2.3983,  -0.1122,   2.9607, -15.0215,  -0.6204,
         -2.5782,  -7.1825,  -5.4546,   0.7324,  -9.3093,  -3.6738],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8445, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6559e+00, -4.3834e+00, -3.8306e-01, -3.1565e+00,  2.1702e+00,
         1.8329e+00, -2.9763e+00, -5.0985e-01, -4.8480e+00, -5.4194e+00,
        -5.6445e+00,  1.2418e+00, -2.3003e-01, -4.2813e+00, -3.3327e-03,
        -2.8428e+00, -3.0747e+00,  9.8709e-01,  5.9993e-01, -5.2592e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.8918, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.0431, -15.2873, -10.4167,  -8.1618, -12.0726,  -4.6429,  -3.8113,
         -1.3318,  -0.2923,   3.6945,  -2.4391,  -3.5621,  -2.9943,  -2.2740,
        -10.7194,   0.8307,  -0.3378,  -3.1780,  -1.5265,  -3.2101],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4888, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.0456,  2.6055, -1.8641,  0.7403, -2.5013, -5.0400, -6.6122,  1.8388,
         1.5165, -2.2710,  0.0397, -1.4182, -2.1987, -4.6212, -0.1641, -1.9386,
        -0.7505, -1.6753, -2.2266, -5.0529], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5274, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9393, -2.8009,  3.8175, -4.7586, -1.1664, -1.2378, -3.1084, -4.5484,
        -0.3761,  1.5504, -3.8940, -1.9441, -1.0787, -0.6912,  1.5760,  3.8531,
        -1.5844,  1.1446, -2.1690, -0.4644], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9410, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4354, -2.6383, -3.7445, -4.0300, -0.5579, -7.8226, -8.5952, -0.6032,
        -3.2279,  1.5842,  4.0545, -4.2372, -2.3980, -1.8402, -1.0984, -6.7553,
        -0.3105, -0.3961, -3.3332,  1.1600], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6113, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.4016,   3.2983,  -3.5120,   1.1835, -14.7456,  -7.1415,  -3.0451,
         -5.1572,  -2.0659,   1.6966,  -0.9060,  -2.5627,  -0.1201,  -0.9783,
         -0.7052,  -3.7864,   2.1197,   0.8403,  -7.5840,  -1.3329],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1552, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1900,  -3.6896,  -2.1208,  -2.2376,  -6.4282,  -1.5268,   0.4817,
         -1.7934,  -3.2819,  -2.1847,  -1.3257, -12.9670,   3.0987,  -6.5774,
         -7.3342,  -0.8905,  -3.1360,  -2.9214,  -0.8318,   3.3657],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7245, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  4.2948, -28.4101,  -1.0678, -16.3377, -13.2285,  -6.2737, -14.4600,
         -7.5156,  -8.1425,  -3.1888,  -2.6501,  -3.3841,  -1.2418,  -3.8450,
         -3.5220,  -3.3265,  -2.6134,  -3.4870,   2.1095,   3.0184],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6636, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.5584,  -1.4032,  -2.8058,   4.7790,  -0.6656,  -3.3503, -10.7590,
         -8.1797,  -3.1982,  -5.8268,  -2.8473,  -0.1934,   4.1060, -10.9787,
         -1.4511,  -2.6010,  -9.2023,  -4.3704,  -4.5435,  -4.6845],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4867, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.1889,  -3.6622, -12.4728,  -5.3518,  -5.8395,  -6.8889,   1.6605,
         -6.5561, -10.9369,  -1.4146,  -1.7005,  -3.6281,  -8.5669,   2.3859,
         -3.6184,  -2.2106,  -1.3444,  -3.9116,   1.7255,  -1.7752],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0648, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.5354,  -2.1773,  -0.5761,  -4.9066,  -1.3634,  -0.4232,  -5.6868,
          1.5530,   2.9883,  -3.3192,   0.1656, -15.8559,  -2.9873,  -7.9132,
         -1.3422,  -9.2973,   4.8807, -10.9137,  -5.3435,  -3.4027],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5228, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.2318,  -2.6485,  -0.8409,  -0.7742,  -2.3049,   1.9581,   3.8878,
         -1.6667,   0.9637,  -2.3138, -16.9354,  -1.3377,   0.5001,  -3.9213,
         -2.0012,  -5.8052,  -2.6453,   0.2177,   2.0349,  -2.6678],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.8034, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9947e-01,  1.8420e+00, -5.1203e+00, -3.1639e+00, -1.4503e+01,
        -6.9675e+00, -6.0765e+00, -5.3255e+00, -3.3914e-01, -5.4971e-01,
         1.2734e-01, -5.8465e+00, -1.5184e-01, -5.9216e-02, -3.7249e+00,
         1.4732e+00,  7.8852e-03, -2.3391e+00,  2.3085e-01, -2.5198e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6753, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8648, -2.8178, -6.3599, -3.6704, -5.9174, -9.0885, -1.9542, -2.2114,
         1.3995, -5.8014, -0.8649, -3.1281, -3.2531, -2.5964,  3.0776, -4.1993,
         0.3014, -2.3668, -1.2709, -9.8230], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2205, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.4634,  -2.7406,  -3.3332,   0.8302,   3.6397,  -2.1294,   1.1138,
         -2.7934,  -1.7472, -10.2770,  -3.3532,  -0.7830,  -6.3031,  -2.6683,
         -0.9193,  -9.0709,  -0.0776,   3.0277,  -4.8646, -11.2645],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6625, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 6.5846e-03, -5.0139e+00,  5.4932e+00, -1.0572e+00, -4.0602e+00,
        -1.6756e+00, -2.4929e+00, -5.6059e+00, -2.3084e+00,  2.0590e+00,
        -5.9747e+00, -1.8657e+00, -8.6323e-01, -7.2800e+00,  1.7551e+00,
         1.7468e+00, -2.2290e+00, -4.4313e-01, -3.2168e+00, -5.0255e-01],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.6764, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.0614,   0.6227,  -1.9566,  -0.9036,   1.7642,   4.1054,  -2.3073,
         -4.5646, -10.9765,  -4.9822,  -2.7007,  -5.4705,  -0.1526, -10.2275,
         -1.8865,  -5.1708,   0.9661,  -2.5052,   0.4146,  -3.8957],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5944, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.3979,  -2.9571,  -6.9546,  -3.4051,   0.1011,  -0.5481,  -3.9659,
         -3.0272,  -2.6949, -13.3263,  -5.4160,  -0.0628,  -4.0779,  -1.9384,
        -17.1760,  -2.9223,  -7.3463,  -1.7970,  -0.7607,   3.9598],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0857, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.7408,  -2.4837,  -1.4110, -10.7786,  -5.1542,  -5.8743,  -5.1210,
         -2.0847,  -4.2861,   3.1391,  -8.9353,  -3.6779,  -1.8131,  -5.9869,
          0.7375,   3.0306,  -2.6422,  -0.7856,  -3.5679,  -3.8226],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9889, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4667, -4.5386, -1.7267,  3.1771, -3.3937, -2.1393, -3.3379, -1.9986,
        -4.3871, -0.8102, -6.5652, -1.6417, -3.4601, -1.2399, -5.6761, -1.2003,
         2.9982, -2.1003, -0.3445, -1.7858], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2819, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.8423, -1.9401, -1.6384, -1.2683,  4.6130, -0.6780, -1.3935, -1.2018,
        -1.5719, -2.2933,  3.1277, -6.1320, -2.7647, -3.8252, -2.2425, -6.0253,
         2.3023,  2.8159, -2.0084, -1.2573], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1270, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-17.7103,  -5.0801,  -1.6890,  -4.9295,  -3.1932,  -6.5406,  -4.8011,
         -2.7064,  -6.2482,  -1.5883,   1.3844,   3.9736,  -5.1292,  -2.8061,
         -2.6948,  -0.9018,  -8.3761,  -2.2866,  -1.3909,  -3.6382],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8176, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1828, -1.5843, -1.2755, -3.8420, -0.9059,  1.6692, -2.0783, -1.1147,
        -7.5729, -6.9252, -3.6315, -4.7372, -1.6627,  2.2625, -0.9552, -5.1110,
         0.6829, -4.0161, -5.4220,  0.2554], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4574, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.7205,  -1.4873,  -0.3988,  -1.7308,  -0.4124,   1.0795,   4.9833,
         -1.8902,  -0.5201, -11.3009,  -3.1188,  -7.2727,  -1.6150,   0.6508,
          3.8338,  -8.1006,  -1.7177,  -2.9743,  -4.0045,  -4.1133],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.9695, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.5154,  -2.1347,  -1.2857,  -4.6126,   2.3152,   1.1275,  -2.4363,
         -1.5500,  -3.7634,  -2.7502,   0.6941,   3.7813,  -6.1737,  -3.5408,
        -22.7846,  -6.4991,  -6.1708,  -5.9307,  -3.1517,  -2.9159],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4149, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1861, -15.5673,   0.3224, -25.8015,   0.2486,   0.5661,  -5.5390,
         -1.0332,  -1.9132,  -0.1190,  -4.5668,   2.1886,   2.4838,  -2.5608,
         -1.2036,  -1.6425,  -0.1446,  -1.6153,   5.3375,  -2.5757],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8161, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0121,  0.7910,  4.6611, -1.6812, -2.3095, -2.8775, -0.1482, -6.1633,
         1.3755,  1.4033, -2.7770,  0.2314, -1.2024, -3.2007,  0.3601,  2.0879,
        -2.0601,  0.7595, -2.9082, -1.6909], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8681, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.4650,  -5.0687,  -6.4102,  -6.1259,  -4.9525,  -6.4904,  -3.2785,
         -5.2664,  -2.2005,  -4.9306,  -4.7657,  -5.7792, -12.8059,  -8.1917,
         -3.4215,  -4.6615,  -3.2096,  -4.9201,  -5.8676,  -1.6054],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4208, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8533,  0.5505, -2.9990, -1.3851, -0.3219, -2.8962, -1.3093,  3.7042,
        -1.6461,  0.6020, -2.3113,  0.4968, -5.5983,  0.9368,  0.9629, -2.6837,
        -0.8198, -2.1465, -0.6696,  0.1303], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9128, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.4226,  -1.7700,  -0.6930, -31.9865,  -0.1658,   0.9818,  -2.3969,
         -3.8999, -11.7272,  -7.4658,  -3.7180,  -6.3090,  -3.3363,  -0.5194,
          3.7234,  -5.1388,  -0.6557,  -2.2075,  -2.6877,  -3.3186],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5357, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2075, -1.7972, -5.2716,  1.0056,  0.8046, -1.9610,  0.7950, -1.2105,
        -1.3463,  2.2292,  4.6199, -3.1369, -0.0772, -3.0662, -4.4297, -7.8811,
         2.4654, -6.0722, -2.3626, -0.2178], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6059, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.1574,  -2.4499,  -0.1341,  -1.7529,  -3.1468,   1.4614,   2.1954,
         -2.0306,  -2.9087,  -3.2767,  -1.7302,  -1.0154,   1.4061,  -4.0058,
         -3.4950, -19.7553,  -5.4953,  -6.5992,  -5.2164,  -5.1665],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0979, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.1320,   1.9638,  -4.0810,  -1.6392,  -1.6416,  -3.1045,  -0.4051,
          2.2579,  -3.9024,   0.1245, -12.6828,  -4.4574,  -1.8144,  -3.0121,
         -6.6924,  -1.4759,  -2.8531,  -1.6458,   1.1781,   3.8449],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.0085, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.8932,   0.3484,  -9.0562,   0.8645,   2.1390,  -5.6306,  -2.1222,
         -1.5947,  -5.4233,   0.2252,   3.3424,  -5.1596,  -4.3068, -19.5348,
         -7.5921,  -5.5694,  -5.6938,  -0.2216,  -6.4192,   1.5628],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6368, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-16.5926,   2.6129,  -3.3989,  -6.4379,  -1.9980,  -3.6418,  -3.0779,
         -6.3438,   2.8229,  -4.0697,  -0.6567,  -2.3822,  -1.2564,  -0.6648,
          4.3463,  -2.5071,   0.2951,  -2.0978,  -2.4397,   0.9472],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3270, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.5497,  -2.0105,  -4.3560,   0.2844,   0.9013,  -2.7311,  -2.0142,
         -8.1284,  -6.7638,  -3.4601,  -3.6781,  -1.2065,  -0.9687,  -0.7967,
        -17.4478,   0.2162,  -1.1048,  -5.7096,  -1.3972,   2.7309],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8545, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-17.8425,  -1.7177,  -4.0288,  -7.2039,  -0.8933,   1.2887,  -2.4460,
         -0.5562,  -8.2654,  -6.3578,  -3.4091,  -3.9195,  -0.3997,  -5.7932,
          3.5573,  -4.8053,  -1.2741,  -2.9286,  -2.1698,  -7.3212],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8243, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0648,   1.3956,   1.4077,  -2.0481,  -0.4074,  -1.0861,   0.0913,
          1.6155,   4.6649,  -1.5457,  -6.2941,  -4.0012, -14.8739,  -4.4537,
        -23.1036,  -6.7255,  -1.3480,  -6.1204,  -3.0470, -22.6207],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6283, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.4970e-02, -2.1610e+00, -6.2585e-01,  1.1211e+00,  5.1023e+00,
        -1.6956e+00, -2.5042e+00, -3.9421e+00, -3.4857e+00, -3.2283e+00,
        -8.4172e+00,  1.0404e+00, -8.3941e-04, -3.2217e+00, -8.5930e-01,
        -2.8166e+00, -9.9184e-01, -3.5180e+00,  1.0684e+00,  1.4150e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.3903, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0044,  3.5035, -7.5421,  0.8528, -2.8286,  0.0696, -6.6729,  0.2433,
        -3.4044, -0.1965, -2.5855, -1.4399, -3.9312,  0.2811,  2.2148, -2.0350,
         0.7337, -1.9344, -2.9460,  0.5801], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5021, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8495, -2.2161, -3.5997,  1.0071,  3.8994, -3.8916,  0.2179, -3.7928,
        -0.3474, -4.8236,  0.7088,  1.9767, -2.7526, -0.3647, -1.7399,  0.0428,
        -4.8747,  1.7751,  0.9002, -3.4672], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1096, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.6948, -28.8738, -10.0564,  -2.5482,  -4.0391,  -3.4417,  -7.7359,
         -5.5262,  -5.5457,  -5.1349,  -2.9192,  -1.2624,   0.3300,  -4.9654,
         -1.8428,  -4.8656, -11.4111,  -0.0350,  -2.2390,  -4.0552],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3931, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6813, -1.5013, -0.4226, -3.5015,  1.7869,  3.3347, -2.1938,  0.0930,
        -1.1416, -0.6378, -3.7998,  1.8506,  1.7425, -2.1375, -0.8450, -3.8281,
        -0.4328, -2.6766,  4.7119, -4.9108], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8595, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6159, -1.7172, -0.0110,  0.5028,  3.5185, -1.5208, -0.9619, -3.3048,
        -3.6541, -7.7282, -3.4765, -0.3729, -1.7545, -0.2933,  0.2075, -1.5681,
        -2.1040,  4.4963, -2.6367, -2.7570], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3876, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.5083,  -0.4305,  -1.2875,  -5.8779,   0.6249,   1.1995,  -6.6412,
         -5.2618, -10.4774,  -3.4531,  -6.7460,  -1.1776, -12.4397,   4.4617,
         -5.1016,  -4.6148,  -2.5083,  -5.0610,  -3.3700,  -2.8553],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8263, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6363, -4.9019, -5.8876, -2.7747,  1.5148,  3.1605, -4.3099, -0.4726,
        -2.1056,  0.5268, -4.6389,  0.4968,  1.9202, -3.8030, -0.5230, -2.8229,
        -2.5531, -6.5434,  1.0311,  1.3210], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8501, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.5428, -7.1223, -2.6804, -1.5042, -3.2458,  2.3174,  3.1761, -3.1465,
        -0.0876, -3.1343, -2.7810,  0.1146, -1.4186, -2.2792, -3.1883, -0.5118,
        -2.6164,  2.6195,  3.5827, -2.3999], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0882, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.8673,  -2.8217,  -0.2876,   2.9273,  -2.7873,  -2.3770, -11.5198,
         -9.4251,  -3.0996,  -6.4815,  -2.6321, -11.8752,   4.5596, -10.1740,
         -1.6921,  -1.7203,  -0.7642,  -3.3174,   4.1012,  -0.8033],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1029, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.0687,   1.4732,  -3.3210,  -1.2346,  -2.3086,  -0.2536,   1.4124,
          4.5475,  -1.3493,  -0.9523, -20.6135,  -2.1060,  -7.4365,  -0.9886,
         -2.1789,   3.8464,  -4.1144,  -0.1902,  -3.2244,  -1.4320],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.0178, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8492, -0.9681, -6.8643,  0.6521,  2.0013, -1.8712, -0.5580, -0.8099,
        -2.6319,  0.8723,  3.9431, -4.7617,  0.1758, -2.0984, -4.1080,  1.4356,
         3.0444, -3.4517,  0.1195, -1.0184], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9373, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1017,   1.5343,  -2.0440,  -0.5763,  -4.7670,  -0.3326,  -6.2532,
          1.4922,   0.2362,  -2.6293,  -1.1565,  -1.2698,  -1.7312, -18.1217,
         -0.4530,  -0.6383,  -7.8910,  -2.1378,  -3.3717,  -5.7478],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8980, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5127e+00, -1.1575e+00, -5.9578e+00, -1.8708e+00,  9.5179e-01,
        -2.9549e+00, -8.8849e-01, -1.5823e+00, -9.2183e-01, -6.9167e+00,
         2.1130e+00,  1.3038e+00, -2.4349e+00, -2.4080e-05, -3.4193e+00,
        -2.3722e+00,  1.3175e+00,  2.7962e+00, -1.7011e+00, -4.0485e-01],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.3307, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.0355,  1.4304, -2.1021, -1.2703, -3.1001, -1.8497, -1.2696,  2.5003,
        -4.6367, -1.0340, -1.6911, -2.5432, -0.1621,  2.5257, -2.9242, -1.2115,
        -2.3728, -0.3377, -1.4521, -0.4780], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0972, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-32.3695,  -9.4801, -14.3958, -11.5367, -10.3129, -22.2970,  -6.7279,
        -14.0297,  -4.3737,  -4.5805,  -4.6402,  -9.9757,  -7.2042,  -7.4597,
         -4.5954,  -3.8219,  -5.0993, -12.1179,  -7.3837,  -1.8020],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-9.7102, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.2754,  -1.6120,  -1.3745,  -6.2121,  -2.7384,  -3.2336,  -2.5763,
         -1.9772,  -2.0242,  -4.4870,   0.1567,  -5.8908,  -0.1486,  -1.6658,
         -4.6794,   1.1929,   1.2119,  -1.7711,  -2.4413, -13.2597],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0403, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7799, -1.0556, -6.3792,  1.4367,  2.5839, -4.7489, -3.7962, -5.2641,
        -0.6313, -2.9871, -1.7287,  2.3778, -1.8959, -2.8800, -3.4276, -3.1847,
        -4.0325,  2.2198,  1.7751, -2.0648], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8732, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.5388,  3.1247, -1.6064,  0.6036, -1.3616, -1.9064, -1.8183,  3.8638,
        -3.6087, -0.1482, -1.7169, -2.6184,  0.8550,  2.6274, -2.6674, -1.0611,
        -3.1113,  0.0494, -4.5805, -0.1674], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7355, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5708, -3.1277, -5.6231, -3.1703,  0.1371, -2.5201, -1.2433, -0.1102,
        -1.8857,  2.1840,  1.6720, -3.2426, -1.2211, -1.2560, -3.1519, -5.4144,
         0.2824,  0.7711, -4.6429, -0.1249], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6629, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9056,  2.6664,  2.9435, -2.9620, -0.9046, -2.5796, -0.1007, -8.8961,
         2.1035, -0.0611, -4.3630, -1.5708, -2.2313, -0.5510, -2.2191,  3.9092,
        -1.7651,  0.0402, -2.5374, -1.3008], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2143, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.3131,  -7.2005,  -3.0312,  -4.6051,  -4.1550,  -5.1518,  -6.6389,
         -4.1776,  -7.4940,  -9.5130,  -3.8643,  -3.3417,  -3.8400,  -3.4296,
         -5.3214,  -7.2814,  -2.7024,  -6.4838, -11.4724,  -2.9266],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4472, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8484, -2.6469, -4.1408, -0.5226,  1.5517, -3.2827, -0.9704, -5.9703,
        -7.6036, -6.4939, -4.1319, -6.1685, -2.9387, -2.7910,  0.9529, -6.5519,
        -3.3953, -1.5971, -2.0925,  1.4395], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9101, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.8452,  -0.6804,  -3.5693,  -3.4026,  -4.0122,  -1.6370,   2.5614,
          1.9495,  -1.8312,   1.3661,  -1.6288,  -1.3853,   1.7053,   4.5704,
         -1.1869,  -3.6080,  -3.5294,  -0.0874, -22.7596,   1.4219],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.9294, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.5104, -1.8364, -3.8322, -0.6511,  3.4016, -6.1694,  0.1809, -2.2112,
        -4.5116, -1.4478, -3.8900, -3.3426, -0.7148, -4.0096, -2.5216, -7.6101,
         0.5385,  0.6582, -6.0198, -0.8863], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2693, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  3.4961,  -1.6381,  -1.1819, -18.0962,  -3.2760,  -7.3139,  -0.5885,
         -0.6352,   2.7213,  -7.9373,  -4.9875,  -3.5953,  -1.2608,  -0.1261,
          1.9881,  -7.3896,  -0.2422,  -3.1068,  -1.1017,  -3.8093],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9041, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.0819,  -1.6395,   0.4588,  -3.4959,   3.0233,  -4.3533,  -0.7284,
         -4.9132,  -1.1320,  -4.9687,  -1.4927,  -5.9538,  -3.2225,  -2.9131,
        -11.9755,  -4.4731,  -3.8580,  -2.8355,   2.2238,   1.3937],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5387, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.9268, -2.3363,  1.2563, -2.0170, -0.6910, -4.6198,  4.7482, -1.5834,
         0.3501, -2.3231, -0.4454, -6.7572, -0.5119, -0.5415, -2.0489, -1.1350,
        -2.2720, -5.2161,  2.2292,  0.8186], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0085, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.4053,  1.2977,  2.1003, -3.2777, -1.5134, -4.2092, -1.5808, -4.0478,
         1.8303,  1.5424, -2.6981,  0.9050, -2.8806, -3.0597,  2.5623,  3.1630,
        -5.8162,  0.5260, -3.2009, -2.5910], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4677, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.9292,  -0.7959,  -1.6052,   0.1932,  -3.0939,  -0.5245,  -1.5412,
         -2.4010,   2.2821,   2.0463,  -6.7370,   0.6441,  -8.8659, -10.2958,
        -19.5330,  -7.0196,  -1.9407, -11.0076,   0.2235,  -1.9896],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8446, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0811, -3.1320, -0.3333, -3.4662, -0.8677, -3.7401, -0.2810, -0.4147,
        -4.2672, -1.4623, -1.1549, -1.2348, -0.0931,  4.5863, -3.3204, -1.4785,
        -3.0370,  0.9678, -9.2504,  1.8602], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5600, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.9559,  -5.2177,  -5.8565,  -6.6012,  -2.4281,  -2.3567,   2.8068,
         -5.1208,  -1.0836,  -1.7368,  -2.0329, -12.3176,  -0.2747,  -3.8926,
         -5.3178,   0.1061,  -2.2556,  -1.6912,   1.3217,   4.1056],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0900, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  3.6736,  -1.1235,  -1.3884,  -8.3669,  -5.9332,  -5.0204, -13.9946,
         -6.1737,  -0.2482,  -2.4021,   2.7132,  -4.8778,  -1.9928,  -1.9440,
          0.2522,  -3.4407,   4.4695,  -2.4932,  -3.0461,  -3.7918],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7564, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6754, -4.1120,  2.9218,  2.8205, -2.8105, -1.1539, -1.3397, -1.9578,
        -5.2345,  1.7123,  0.5346, -2.8889,  1.1248, -1.2881, -1.0412,  1.5838,
         4.2625, -4.3562,  0.1157, -2.1825], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7482, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  4.0322, -15.9035,  -1.0812,  -2.3845,  -4.2695,  -1.2597,   1.2093,
         -3.1277,  -2.1271, -12.3785,  -5.7359,  -7.3316,  -5.6344,  -6.2720,
         -2.5881,  -1.0294,   1.0090,  -2.5904,  -2.3369,  -5.2342],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7517, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0755, -8.1315,  3.6797, -2.0977, -1.4006, -2.7568, -3.5509, -4.0035,
         0.7229,  2.2725, -3.5983, -0.3911, -1.4499, -0.3301, -4.0166,  0.2993,
         2.6744, -4.4701, -0.5790, -3.9311], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6067, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.0725,  -0.2486,   1.2239,   4.2712,  -2.1152,  -0.8577,  -1.6006,
         -2.4621,   0.4157, -14.1340,  -2.7918,  -1.5296,  -1.6846,  -1.2595,
         -4.3483,   2.6897,   1.2785,  -3.6857,  -2.6482, -13.7278],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1644, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2656,  0.2182, -1.1616,  0.4788, -4.0460,  4.8120, -6.5764, -1.1517,
        -2.6491, -0.9958, -4.7220,  2.0943,  2.4564, -4.3370,  0.1847, -1.1491,
        -2.4920,  0.3071,  1.9129, -3.4452], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2264, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0047, -3.3193,  0.6223, -1.6756, -3.5356,  0.8044,  3.0565, -2.7161,
        -0.6584, -0.8388, -3.9547, -1.4658,  3.0176, -6.5879, -1.6123, -1.8657,
        -1.8813,  0.7746,  2.9762, -5.1330], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2499, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.6769, -2.9541,  0.0315, -2.7111, -1.0340, -0.2332,  3.6835, -2.0978,
        -0.0176, -3.5425, -1.4975, -3.4882,  1.9697,  3.5270, -2.6934, -0.9845,
        -0.2821, -1.7455, -0.2431,  1.1419], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.5247, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2322, -7.9908, -2.9443, -0.3509, -3.4740, -0.1623, -2.6895, -0.6868,
         0.9944,  2.1296, -2.5864, -2.6931, -2.4931,  0.1681, -5.6306,  0.9656,
         2.3122, -2.5104, -1.3279, -2.4206], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7311, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4709, -3.7369, -2.1169, -1.1152, -4.6705,  1.5893,  2.3607, -2.2182,
        -0.3924, -3.0072, -2.2603, -4.0740,  1.7989,  2.4065, -2.5368,  1.1543,
        -2.0423, -1.5915,  0.8985,  2.4294], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1298, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3000, -2.5177, -0.9456, -6.1737,  0.6353, -0.1863, -1.6082,  0.6824,
        -3.3230, -1.6600, -3.9343,  1.0832,  3.0005, -3.4449, -2.4217, -3.7302,
        -1.1363, -2.6104,  0.9202,  2.8347], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2918, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3323,  -0.8274,  -3.0848,   4.9030,  -2.3855,  -2.5832, -10.9547,
         -7.0962,  -3.1753,  -9.2440,  -2.7939,  -4.1915,   3.6355,  -6.7656,
         -0.2474,  -1.8057,  -2.4760,  -2.6439,   3.5794,  -3.7272],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7108, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1412, -5.9809, -2.9209, -0.9841, -2.2434, -1.3922, -2.1050,  0.9730,
        -0.8861,  3.3654, -5.1521, -6.1755, -3.9640, -4.2543, -1.5626,  3.1104,
        -3.4639,  0.2412, -0.3640, -3.1273], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0014, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6651, -0.6831,  3.1922, -3.6132, -3.0347, -1.8530, -2.0774, -4.9865,
         2.5313,  1.6662, -2.4745,  0.7606, -2.7154, -3.1818,  1.5866,  2.9774,
        -3.2045, -3.5932, -3.5872, -1.2018], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3078, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.5767,  -1.0581, -17.7014,  -5.2782,  -7.4289,  -4.4914,  -3.0711,
          3.0080,  -3.7719,  -9.7440,  -0.9375,  -4.1634,  -2.5826,  -3.3343,
          3.1947,  -1.8434,  -8.5224, -12.2685, -17.1330,  -1.8656],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0285, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5712,  1.0314,  3.9820, -6.3497, -1.7214, -3.5829, -0.1741, -3.2456,
         1.2512,  2.9437, -2.6717, -0.1448, -3.2491, -2.4778, -3.7201,  1.6867,
         1.7862, -4.9909, -1.5488, -1.1112], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2939, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3309, -2.9704, -5.3826, -9.1730, -3.8462, -6.7179, -3.7980, -4.3749,
         1.0709, -3.6672, -3.8152, -2.1081, -3.4660, -7.6562, -3.7462, -0.7155,
        -4.3279, -2.1519, -3.6472, -3.1156], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7970, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.3262,  -0.2488,   0.6322,   2.0635,  -2.8094,  -4.0588,  -0.7570,
         -4.3273, -11.0765,  -0.3007,   0.7448,  -2.1024,  -1.8162,  -2.0249,
         -3.8037,   2.5556,   3.8152,  -2.9838,  -0.7097,  -2.3102],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.5422, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.0183e-01, -2.9286e+00, -8.7696e-01, -1.4362e+00,  3.5862e+00,
        -3.9800e+00, -2.6525e+00, -2.2347e+01, -5.6008e+00, -7.4016e+00,
         1.0906e-02, -6.9877e-01, -2.5288e+00, -4.7725e+00, -1.8167e+00,
        -9.7124e+00, -3.5142e+00, -3.4780e+00,  4.2936e-01,  1.5879e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4466, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.6506, -3.1268,  0.0173, -2.1639, -1.7319, -6.4027,  0.8118,  1.4020,
        -2.0304,  0.8050, -4.2676,  0.2000, -1.6260,  4.2771, -5.4344, -0.0805,
        -2.7220,  0.7255, -4.6701,  1.9970], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1185, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.1775,  -3.2001, -13.0352,   3.6860,  -8.6624, -12.0557,  -4.7212,
         -9.8701,  -4.5504,  -8.3355,  -1.1141, -31.3595, -10.9309,  -2.6283,
         -4.2793, -12.5962,  -8.0064,  -8.0283, -10.5361,  -5.5255],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.0963, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8344, -3.2297,  1.1696,  2.6576, -4.3980, -2.1300, -4.3543, -3.0279,
        -4.9727, -2.4584, -1.0397, -4.0701, -2.4192, -1.3483, -0.8887, -9.8906,
        -1.1720, -0.5037, -2.9676,  0.3575], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3760, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8690, -2.8691, -1.1510,  1.7816, -5.9086, -2.4486, -0.0402, -2.4397,
         1.4643,  3.9525, -2.4044, -0.5401, -1.6982, -2.9831, -2.2971,  1.6868,
        -4.0736, -1.2527, -1.6777, -1.3649], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5066, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-12.3601,  -5.7050,   0.3035,  -5.3190,   2.4355,  -8.3730,  -0.3330,
         -1.4187,  -1.1038,  -4.7157,   2.9352,  -0.7874,  -4.7383,  -2.7327,
        -10.0302,  -3.3763,  -5.0970,  -0.8836,  -3.0234,   3.5137],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0405, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 4.8839, -4.8007, -0.7396, -9.5810, -3.6779, -3.4602, -5.6746,  1.7153,
        -5.3059, -0.5481, -1.1727,  0.1361, -5.3717,  4.6663, -3.9238, -4.0618,
        -1.7593, -0.8951, -4.6928,  0.6618], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1801, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.1851, -3.7960, -0.5434, -2.5577, -3.7310, -1.5768,  0.3599,  1.6240,
        -4.7218, -1.6920, -2.3634, -1.1325, -3.3893,  2.4539,  3.5658, -2.5759,
        -0.6951, -0.6664, -3.7176, -5.9586], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3964, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8610, -5.2102, -6.8396, -2.5865, -4.1805, -4.0721, -8.7822, -5.6618,
        -8.6203, -4.2919, -4.5340, -7.6140, -3.8842, -5.8731, -1.5001, -6.1764,
         0.7712, -6.5696, -3.7618, -5.6836], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9466, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4926, -2.6987, -2.1587, -0.3539,  4.3515, -1.8909, -0.8332, -6.4782,
        -6.1093, -3.1541, -5.4434, -0.8315, -2.0730,  2.7798, -3.9351, -0.0757,
        -1.9496, -2.9512, -3.9962,  2.4120], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8441, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.5137, -1.8330,  0.6526, -3.5075,  0.1120, -0.4270,  4.5850, -2.6712,
        -6.2386, -8.6734, -8.2031, -3.0644, -4.8920, -0.4354, -1.2876,  1.0393,
        -5.6810, -2.7763, -0.7913, -4.2898], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2934, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.7751, -0.0979, -2.9811, -1.8690, -3.1090, -2.0362, -4.9320,  0.3224,
         0.4965, -2.7273, -3.0203, -3.7406, -2.5970, -6.2310,  1.5929, -0.8869,
        -4.7983, -0.6472, -0.9896, -0.2743], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8376, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.8345,   2.8185,   0.7434,  -5.2708,  -0.9131,  -1.3880,   0.3974,
         -5.1852,   3.2108,  -1.2667,  -3.3525,  -4.8428, -31.9915,  -3.0536,
         -5.7625,  -1.5057,   2.1809,  -5.9978,  -3.4942,   0.1660],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6671, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.5553,  -4.6490,  -2.7741,   1.7932,  -5.9728,  -3.0321,  -3.2807,
         -7.7604,  -5.5725,  -4.9756,  -4.0751,  -3.4020, -10.3828,  -8.4353,
         -4.6589,  -4.8131,  -2.3389,  -2.9562,   1.9281,  -4.7127],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1813, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5045, -7.5516,  0.1837,  2.2352, -2.5756,  0.0748, -2.6358, -3.0900,
         0.4958,  3.6187, -2.2292, -1.7992, -1.1621,  0.0821,  0.5384, -3.2227,
        -5.6215, -0.8750, -5.0204, -2.2861], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6172, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0100, -4.1177,  0.2092,  3.0996, -5.0069, -0.4141, -1.9710, -1.2484,
        -4.7024,  0.2771,  2.1361, -3.1307,  0.2120, -3.9574,  0.2376, -4.9432,
         2.9526,  2.4484, -4.0585, -2.2726], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2630, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8964, -0.1702, -1.4037, -0.9930, -4.6245,  2.6344, -1.3887, -3.4542,
        -1.1572, -3.0367, -1.7643,  0.6406,  1.6445, -2.0389,  0.6937, -3.6614,
        -1.2846, -3.0498, -1.0950,  1.3296], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3538, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4839, -0.3998, -2.4853,  4.6566, -3.6751, -4.0083, -4.6800, -1.8706,
        -3.3562,  0.5123,  2.5665, -0.6205,  0.0138, -1.7090, -0.7753,  1.5855,
         3.6063, -5.4451, -5.9168, -3.7545], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4120, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4903, -7.2896, -5.1589, -4.4447, -2.1558, -3.0793,  1.3155,  4.1140,
        -2.2805, -0.5069, -3.4764, -1.0868, -5.2044, -1.2946, -4.8149, -0.4826,
        -6.1495, -5.0011, -4.2020, -4.3375], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1013, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2051, -1.3550, -1.6981, -3.7070,  1.7618,  3.5791, -2.7383, -0.1203,
        -4.2955, -4.0783, -3.8268,  3.1508, -0.8438, -0.7921, -2.6258, -4.6252,
         1.0791,  3.7527, -3.2934,  0.7384], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1571, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.2302,  -0.4738,   1.4596,  -4.8847,  -3.1751,  -7.4024,  -5.9908,
         -4.4371,  -4.4174,  -1.9372,  -3.7688,   3.4749,  -4.5466,  -0.4670,
         -2.2173,  -2.1819, -11.2575,   0.6527,   0.5928,  -3.6158],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9912, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.5398, -3.0586, -4.9988, -3.5513, -2.1477, -5.3871, -1.1395,  0.3861,
        -3.1874,  0.2815, -2.5906, -2.5293, -3.5199,  0.6424,  1.0057, -2.1526,
         0.4388, -1.9858, -4.7568,  1.9872], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6862, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.5318, -3.5442, -2.9245, -3.6393, -3.2273, -6.2522, -3.3419, -4.7596,
        -2.7786, -5.7641, -2.7465, -8.1259, -7.6876, -6.0058, -5.8643, -7.0083,
        -5.1640, -3.5938, -2.2961, -3.6663], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.8461, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.8723,   2.7692,  -2.1606,  -0.8196,  -8.7493,  -5.6619,  -3.8604,
         -4.4241,  -2.1530,   0.6187,  -0.5953,  -3.2213,  -0.7508,  -2.8282,
         -5.5386,   0.5978,  -0.6362,  -2.8694,  -1.8833, -11.2475],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7143, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.2110,  -2.6299,   0.6029,   3.0990,  -1.9565,  -1.0882,  -3.0973,
         -4.3177,  -4.2834,  -3.3883,   0.1693,  -3.2397,   0.0562,  -3.2267,
         -2.3560,  -3.2175,   3.1588,  -1.3539,  -3.1035, -13.2433],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2813, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.6486,  -4.3082,  -3.8811,  -5.1012,   1.0418,  -1.1179,  -3.2266,
         -1.9212,  -5.3284,  -5.9687,  -4.0765, -26.8609,  -4.5201,  -7.1600,
         -7.4412,  -3.6303,  -5.8240,  -2.8422,  -0.9026,   2.0902],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6314, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8274,  2.1570, -2.6509,  0.2686, -2.2377, -1.3785, -1.1630,  3.7291,
        -3.0104, -0.5069, -1.3169, -3.7990, -0.9193,  2.7202, -2.3298,  0.4659,
         0.0167, -2.7621,  1.7964,  4.4501], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.4149, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.6704,  -3.1811,  -3.2924,  -7.2705,  -3.6011,  -7.7363,  -7.2387,
         -5.7463,  -5.4236,  -6.5938, -18.6316,  -1.6880,  -6.0931, -13.9781,
         -5.0271, -14.3781,  -3.4454,  -6.2962,  -0.6101,   1.1245],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2389, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.9122,  -5.8215,  -6.6101,  -0.5271,  -5.1142,  -3.3760,  -5.9226,
         -1.7614,  -2.5835,  -6.4956,  -3.4737,   2.6054,  -3.5789,  -2.9062,
        -20.9362,  -7.8076,  -3.3454,  -4.3043,  -3.8573,  -7.8588],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9294, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.8555,  2.8820, -5.3704, -0.1900, -1.7008, -1.9630, -1.4269,  3.6904,
        -2.8521,  0.0973, -4.8641, -2.6811, -7.3633, -3.7709,  1.7449, -1.2427,
        -0.5776, -0.6314, -1.4342, -2.0853], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3942, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8922, -1.2576, -6.1635, -0.9433,  0.9660, -1.9035, -0.1170, -0.7691,
        -1.7598,  1.1479,  3.8760, -3.5634, -0.0736, -2.5602, -3.1648, -6.7590,
        -0.1611,  1.7892, -1.6816, -0.7397], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3365, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0211, -6.2558, -2.0070, -3.6892, -7.2125, -1.5265, -3.1134, -4.4359,
        -0.3208, -1.3459, -1.9605, -5.9018,  2.9104, -3.8183, -1.5398, -3.3066,
        -4.7864, -1.5417,  1.2511, -5.7851], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8203, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.1021,  -5.4108,  -5.8365, -19.6495,  -5.7722,  -2.5618,  -1.8879,
          3.8222,  -5.4688,  -3.9120,  -2.8591,  -0.6734,  -4.0371,  -0.0762,
          3.5708,  -2.3335,  -2.5268,  -2.7948,  -3.4495,  -5.9974],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7478, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.3579,  -8.4818,  -1.5263,  -0.5137,  -3.8192,   0.7900,  -1.8125,
         -0.3503,   0.6756,   5.0207,  -1.6733,  -1.3045,  -8.9829, -18.8733,
         -4.4483,  -6.6097,  -9.0321,  -0.8012,  -1.7399,   2.5482],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1146, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7817, -1.5629, -3.1464, -2.2209, -3.9316,  1.7448,  3.3042, -2.7471,
        -0.2331, -1.5534, -1.3288,  0.3929,  4.7640, -2.2684,  0.4775, -2.7866,
        -0.8224, -3.2792,  1.9402,  2.0947], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8972, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9406, -0.1912,  1.5357,  4.4996, -2.1544, -1.4057, -3.0280, -1.0616,
        -3.7782, -0.2989,  0.3612, -1.6984, -3.5747, -3.8578, -0.1371, -7.3095,
         1.8019,  1.3014, -2.4516,  0.9210], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0733, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.2587,  1.9491, -1.6807,  0.9307, -2.4702, -2.9384,  2.5064,  2.5559,
        -4.3772,  0.5577, -1.7926, -3.5188,  1.6074,  2.8378, -1.6527, -0.3801,
        -0.6027, -1.1190, -2.4414,  5.1503], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.1810, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5602, -3.1648, -6.7590, -0.1611,  1.7892, -1.6816, -0.7397, -1.4289,
        -3.0385,  2.5415,  3.0871, -3.2619,  0.4640, -8.3029, -1.0073,  1.1447,
         1.8329, -4.8277,  0.1635, -2.3223], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4117, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.8664,  -7.2613,  -2.6262, -13.3446,   0.9797,  -0.4877,  -3.5160,
         -0.7349,  -2.3306,   1.3663,  -4.8454,   1.8638,  -7.7324,  -0.2251,
         -3.0876,  -1.7703,  -4.0095,   0.4963,   1.7843,  -1.5420],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5445, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.6397, -2.0990, -2.8508,  0.2759, -4.0565, -2.8614, -2.9700, -4.2693,
        -3.9348,  0.7975, -6.7859, -1.6875, -8.9685, -7.9556, -3.7694, -6.3131,
        -1.8596, -1.8286, -0.6682, -4.4288], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7937, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 4.0874, -8.0250, -2.8071, -0.9145, -1.8206, -5.1392,  1.9001, -6.4656,
        -3.9155, -4.4589, -0.7373, -5.1242, -1.0585,  2.3481, -2.5814,  0.4793,
        -1.0188, -3.3774,  0.5766,  4.0652], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6994, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2105, -0.3341, -0.8123,  0.2329, -3.7725, -0.6322, -1.4980, -1.7434,
        -4.0880,  1.8531,  3.1968, -2.6764, -1.0660, -2.5123, -2.7693, -1.0088,
         0.6945, -4.6353, -1.5067, -4.1108], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6700, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.2963,  -3.0138,  -8.3387,  -1.0042,  -4.5410,  -1.4826,  -4.0913,
         -2.8361,  -6.2974,  -1.9860,  -4.1077,  -2.1048,  -2.3492,  -4.0092,
        -10.0488,   0.6250,  -3.0130,  -1.1783,  -2.1599,  -2.8101],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6022, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.3578,  -4.4045,  -0.2300,  -1.8483,  -5.7769,   0.8548,   2.2043,
         -4.4328,  -3.3519, -12.1890,  -8.2191,  -7.9421,  -5.8019, -26.8540,
         -2.8488,  -2.3829,   0.7922,   2.8962,  -4.1759,   0.5174],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1776, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.4290,  -1.9527, -13.0871,  -4.2368,  -9.9775,  -0.3133,  -3.6010,
          4.7885, -11.0677,  -1.7190,  -8.9639,  -8.7364,  -1.5943,  -5.6385,
         -2.6322,  -1.3038, -15.3697,  -8.9158,  -8.6081,  -5.6131],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6486, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5886, -1.0228, -1.2563, -6.0730, -8.7270, -0.5926, -1.7701, -2.9583,
         0.5723,  2.2423, -3.8022, -0.9108, -2.6112, -3.0766, -4.1440,  3.6856,
        -1.5892, -1.1683, -1.3403, -4.8630], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2997, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6240, -1.3258, -7.9581,  0.7848,  1.0552, -2.4266, -1.3827, -0.7474,
        -0.6818,  0.5322,  2.8182, -3.1741, -0.2702, -3.3977, -2.2745, -3.9479,
         0.0336,  1.8047, -3.2371, -0.4318], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2925, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7140, -0.6953, -2.1835, -5.0582,  1.8384,  1.9087, -7.8345, -2.9289,
        -2.9941, -2.0782,  2.0243, -4.5711, -4.2023, -4.1538, -6.5506, -6.3458,
        -3.3340, -4.5140, -0.3993, -0.4032], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8095, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.3553, -9.8229, -6.3364, -4.8597, -1.5117, -2.6354,  2.1611,  3.4809,
        -2.6091, -0.7831, -3.6307, -2.0890, -3.6535,  1.7057,  1.5526, -2.3832,
        -0.8607, -1.9060, -4.7321,  1.1364], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8211, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3681,   0.8913,  -6.2981,  -1.5130,  -1.8917,  -8.0722,  -5.4430,
         -0.5479,  -5.4870,  -3.5070, -25.0995,  -9.5763,  -5.4080,  -4.9036,
         -1.0106,  -2.9135,   4.5163,  -5.1902,  -2.7454,  -2.3525],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4960, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0108,  1.6280,  0.0424, -4.1965,  0.6164, -0.8779,  0.5273, -1.0345,
         4.3442, -3.9394, -1.1118, -0.7521, -3.8504,  2.1852,  2.9489, -2.2234,
         0.8522, -1.0023, -0.6025,  0.4456], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.5006, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4613, -3.1599, -6.4459, -4.2787, -5.7113, -7.3552, -4.4420, -2.9009,
         0.0407, -3.4124, -4.1705, -4.7466, -2.7833, -5.4728,  0.0547, -3.5189,
        -3.0112, -4.9382, -5.9289, -5.4306], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.1037, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2070e+00, -4.6711e+00, -7.5195e+00, -5.1274e+00, -6.8242e+00,
        -6.7794e-03,  9.8434e-01, -3.7410e+00, -1.7924e+00, -3.6488e+00,
        -1.7190e-01, -6.6772e+00,  1.5467e+00,  2.1525e+00, -3.2522e+00,
        -2.9074e+00, -3.0259e+00,  3.1314e-01, -6.9122e+00,  2.0491e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7220, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8385, -1.5089, -2.4135, -2.3675,  2.5294, -1.7044, -2.1759, -0.1097,
        -1.3631, -1.9999, -1.0208,  1.4605, -3.1620, -2.5925, -4.5146, -1.2183,
        -5.4767,  1.4584,  2.4233, -3.3682], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5981, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7778, -0.4835, -1.7782, -2.1099, -3.8391,  2.3683,  2.5342, -2.2062,
         0.1981, -3.6629, -1.3679, -4.2725,  1.4450,  2.8161, -5.4128, -0.0614,
        -1.9844, -4.1775,  0.4289,  2.7617], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1291, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.2803,  -6.0863,   1.9558,   3.3297,  -2.0368,  -1.0244,  -6.4072,
         -3.7699, -31.4337,  -1.1727, -12.4367,   3.1941,  -0.9308,  -6.8791,
         -7.9423, -14.1596,  -2.4363,  -5.9174,  -1.7636,  -2.5783],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9107, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9579, -7.7980, -6.8517, -6.1186, -0.5236, -2.5426,  2.3320, -2.1519,
        -1.7813, -3.4151, -0.8948, -9.1917, -1.0889,  1.4843, -3.3852, -2.3007,
        -4.1960, -1.6618,  0.2744,  4.0412], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6364, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7427,  0.7721, -1.5743, -2.5949, -8.0373,  3.5355, -5.0486, -1.0232,
        -1.3292, -4.0577,  1.4813, -2.5822, -3.2951,  0.6569, -4.1958, -0.5698,
         1.6411,  2.1211, -1.6505, -1.1397], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4816, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.3076,  -4.5874,   0.1580,  -3.3490,  -0.6671,  -4.6094,   1.9791,
          1.4352,  -4.6032,   0.4873,  -3.1318,  -0.1427, -16.1921,   1.4488,
          0.4965,  -6.2354,  -1.0232,  -2.4891,  -7.2319,  -9.3643],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7657, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7012,  4.9383, -1.8041, -3.2084, -3.5708, -0.5237, -2.6163,  0.3609,
         2.3040, -2.4494, -1.2729, -1.9306,  0.2263, -7.0576, -6.4706,  2.3296,
        -2.4185,  1.3990, -2.3162, -1.1186], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4450, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2774, -0.3517, -0.7972,  2.0815, -4.7180,  0.1652, -2.3770, -3.8152,
        -0.0318,  3.0745, -4.6172, -4.1472, -3.2824, -0.4774, -8.2324, -2.6202,
        -0.7601, -1.0491, -1.7795, -0.7687], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9891, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5977,  -6.7838,   1.4152,  -6.2640,  -2.2041,  -2.1280,  -2.9570,
          1.0892,   2.8710,  -1.8583,  -4.9682, -12.5313,  -3.9200,  -5.6766,
         -3.7359,   1.2838,  -2.0868,  -5.6378,  -4.3800,  -2.9447],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2008, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.5308,  -3.7794,   0.9676,  -4.5970,  -3.8963,  -2.8686, -14.8154,
         -7.0126, -11.0770,  -4.4001,  -3.7635,  -6.7686,  -7.4321,  -4.1843,
         -5.4556,  -3.0644,  -8.6189,   1.7152,  -5.0217,  -2.7436],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0674, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3261, -0.9873, -3.0394, -2.6458, -4.7687,  2.2474,  1.8495, -3.4201,
        -0.0809, -1.5017, -3.3620,  1.8696,  2.0295, -2.3351, -0.5420, -2.7536,
        -1.7704, -3.7992,  2.6935, -5.6059], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4124, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0329, -1.4157, -4.4631, -0.6596,  1.9142, -2.1095, -0.3143, -9.9389,
        -5.2810, -3.2447, -5.3155, -2.6834, -0.8543, -0.9637, -4.3580, -1.5509,
        -2.5291, -2.2413,  2.0074,  1.9562], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2039, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.6315,  -2.2974, -13.6861,  -2.9620,  -6.2097,  -0.3214,  -2.1380,
          4.8250, -19.5120,  -2.3629,  -3.6465,  -3.6053,  -2.6763,   0.7115,
         -1.4543,  -2.0270,  -9.2311,  -7.5956,  -3.8311,  -5.0053],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3828, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4834, -0.4905, -4.0788,  0.7843, -3.6176, -5.7236, -0.9382, -3.4470,
        -0.8458, -3.4192,  2.0314,  2.2803, -4.5137, -0.5160, -2.4414, -4.7770,
        -2.9404, -0.9291, -5.3745, -5.1969], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3819, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.6058, -14.0875,  -3.6708, -16.2872,  -0.3642,  -2.5767,   0.3818,
         -0.9462,  -6.1882,  -3.9344,  -4.9634,  -6.9968,  -4.8727,   1.4119,
         -6.0773,  -2.4680,  -3.2262,  -2.0854,  -4.8136,   1.3040],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0533, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9951, -0.7198, -3.7940,  0.9364,  3.1981, -2.9894,  1.1784, -2.2084,
        -1.3997, -6.5553,  2.3998,  0.4722, -3.3468, -0.6490, -1.4886, -0.7519,
        -4.0565,  0.9097, -5.7754, -3.3369], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4486, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.7996,  -3.2701,  -3.9486,  -0.8305,   0.7094,  -5.6159,  -3.9191,
        -15.2911,  -6.5134, -10.1720, -12.2976,  -5.0305,  -1.2276,  -2.0211,
          3.0475,  -7.5191,   0.0374,  -2.9630,  -0.4279,  -3.4768],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0765, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0135,  -3.3094,  -2.2163,  -6.7095,  -3.4133,  -2.4491,  -5.9183,
          0.6783,   3.5121,  -7.0700,  -2.9813, -19.1767,  -8.3788,  -9.3478,
         -5.9954,   0.5011,  -5.9567,   4.1172, -20.6262,  -1.6813],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0217, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.3687,  -2.6600,  -5.2034,   0.6663,  -0.5582,  -3.3795,  -0.5087,
          1.9336,  -1.7387,  -1.0457,  -2.8801, -16.0805, -11.5211,  -1.5895,
         -0.1987,  -3.8725,  -0.4525,  -1.2277,  -1.2372,  -2.1832],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7053, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.1709,  -6.3165,  -1.3776,  -3.6598,  -3.1246,   1.6334,   2.4998,
         -1.7416,  -1.0788,  -1.3335,  -0.9304, -11.3349,   2.3415,   0.1134,
         -5.3599,  -0.5841,  -2.8352,  -0.6503,  -4.2976,   2.0887],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.7889, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  4.0722, -12.9335,  -1.4121,  -1.8771,  -5.2222,  -0.0798,   2.2284,
         -1.3445,  -0.6589,  -7.0055,  -4.4769,  -3.9493,  -6.7417,  -3.5784,
         -7.1222,  -1.6780,  -5.4679,  -3.1860, -28.2333,  -4.9083],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6788, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1088e+01, -1.0596e+01, -6.7794e+00, -4.9646e+00, -1.0417e+00,
        -1.0948e+01, -4.1156e+00, -5.1821e+00, -1.4607e+00, -7.0454e-02,
        -1.1212e+01, -8.4734e-01, -2.5749e+00, -3.3548e+00, -4.4640e+00,
         5.0496e-03,  8.6080e-01, -2.3220e+00,  1.0534e+00, -2.6109e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0856, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.1165,  -6.6201, -10.8331,  -5.5204,  -4.8973,  -8.1580,  -5.8823,
         -7.1727,  -0.6661, -14.4164,   3.9204,  -6.2104,  -2.3249,  -2.6450,
         -0.3322,  -4.1770,   3.3891,  -4.6757,  -0.5563,  -3.8155],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2855, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.1462,   2.1099,  -1.0425,  -3.4340,  -2.2237,  -1.0619,  -5.6717,
          0.3429,  -1.8824,  -3.1191,  -2.7858, -13.6816,  -3.7808,  -7.6376,
         -1.7238,  -2.7953,  -0.8656,   1.8133,  -2.3125,   0.6520],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9123, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.1030,   3.4779,  -2.5778,  -0.9445,  -1.8277,  -2.2686,   1.9451,
          3.4309,  -1.5610,  -0.2255,  -2.0114,   0.6653,  -1.9937, -45.5214,
         -5.4122, -12.2352,  -3.3399,  -5.2189,  -6.2361,  -0.5190],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1135, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.5642,  -3.1172,  -1.3740,  -4.7273,   2.1734,   2.7887,  -4.9510,
         -0.7286,  -2.1015,   0.1892,  -6.4298,   2.1840, -14.6646,  -1.6652,
         -1.7729,  -3.4127,  -2.8606,  -0.0856,   1.9435,  -2.4732],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.0825, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.7127,  -2.5306,  -9.1467,  -3.9429,  -6.9055,  -6.3075, -18.6507,
         -6.1789,  -5.1183,  -7.9055,  -8.1119, -10.1244,  -7.0476,  -0.3192,
         -4.8290,   6.2208,  -4.5480,  -0.5678,  -4.1507,  -2.6746],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1776, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.7158,  -1.6296,   1.0336,  -5.7203,  -1.7567, -23.7338,  -4.7117,
         -8.6260,  -6.8475,  -0.7174,   2.0564,   2.4920,  -4.1696,  -0.3021,
         -3.1268,  -1.2933,  -5.4718,   1.9924,   2.5618,  -3.3081],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2997, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.7450,  -4.4906,  -0.3975,  -5.3554, -13.7001, -12.8955,  -7.3107,
         -2.6817,  -5.5867,   4.0468,  -5.2809,  -2.4596,  -3.8648,  -6.7271,
         -2.0964,   1.7755,  -3.0815,  -1.0075, -15.1498,  -5.1897],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4354, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.3036, -2.5874, -0.7894, -4.6032,  2.1665,  0.7830, -1.9351, -0.1227,
        -3.4877, -0.4928,  0.8805,  5.1489, -3.3465, -0.5449, -2.9067,  0.3171,
        -3.4606,  2.5653,  1.5113, -6.0145], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8611, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-13.5741,  -4.6549,  -6.1244,  -7.8983,  -4.1356,  -2.4947,  -1.8406,
          2.5131,  -4.6470,  -2.3874,  -2.3735,  -2.6644,  -4.0052,   1.2909,
          2.0983,  -2.6225,  -1.5238,  -3.3345,   0.4130,  -3.4943],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0730, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1023e+00,  7.6018e-01, -1.5715e+00,  5.1875e+00, -1.6578e+00,
         5.4569e-03, -3.9289e+00, -6.3529e+00, -2.7431e+00, -3.7293e+00,
        -7.9751e-01, -5.7052e+00, -1.3980e+00, -2.2021e+00, -7.6973e+00,
         1.2399e+00,  2.8358e+00, -2.4762e+00, -7.7482e-01, -4.1359e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.9122, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0618, -0.0884, -1.1057, -1.1879, -0.5000,  4.5240, -2.3813,  0.2364,
        -3.7429, -4.1903, -1.1753,  1.7276, -2.6037, -2.8458, -1.1592, -3.1642,
         2.9866,  1.2793, -3.7793, -1.4766], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0354, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-12.8649,  -4.3704,  -5.9452,  -8.0122,  -1.6715,  -1.6553, -20.2982,
         -9.2341,  -8.4733,  -2.8350,  -4.8591, -15.9199,  -1.4696,  -6.6353,
         -1.8736,   0.9637,   6.1938,  -5.8661,   0.2202,  -1.4362],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3021, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.5194,  -6.7180,  -1.5176, -28.1165,   0.4538,  -3.0458,  -6.3479,
         -1.5356,  -4.3285,  -0.9672,  -4.5954,   1.3102,   1.8013,  -3.4519,
         -1.7758,  -2.9015,  -3.0320,  -1.4837,   3.9612,  -4.9028],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6857, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9344, -0.6245, -3.6085, -0.5957, -4.4700,  2.4204, -3.6088, -4.5093,
         1.1251, -2.2531, -0.8159, -0.5195,  3.9674, -1.5956,  0.6947, -2.7327,
        -0.0510, -5.1333,  1.6653,  2.3722], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0104, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6713,  1.1072, -3.3844,  0.1704, -1.2714, -0.8825, -3.8077,  3.8985,
        -4.3408, -0.0941, -3.7977, -1.1781, -4.9133, -0.6655,  1.5867, -8.8694,
        -0.7418, -3.1572, -2.5870, -2.7480], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0174, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.9686,   0.6511,  -8.2648,  -6.9599, -11.8601,  -7.5981,  -1.4774,
         -1.1118,   2.7031,  -4.2134,  -0.6862,  -3.9312,  -6.6994,  -3.0923,
         -0.7280,  -7.6876,  -9.3101, -53.0331,  -8.7724,  -5.1184],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.0579, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2981,   0.0701,  -3.6900,  -4.0880,  -0.2599,   2.0904,  -2.7647,
         -0.3904,  -0.0817,  -4.9296,   1.6009,   1.6245,  -2.3636,  -3.8384,
        -12.6675,  -6.9245,  -4.5028,  -4.1780,  -0.8808, -16.6082],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3040, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3688,  2.2659, -3.6956, -1.4519, -2.1348, -1.5186,  2.4152,  3.2066,
        -3.9929,  0.6282, -1.2320, -1.0486, -5.1969,  0.4045, -8.3287, -4.4811,
        -3.3304, -2.8012, -3.4414, -0.2778], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7690, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.2307, -2.6548, -0.4648, -2.0539, -5.7458, -0.5590,  3.5707, -1.4316,
        -1.2504, -0.9537, -0.9944, -8.9942, -0.5054, -0.3455, -2.4946, -0.2169,
        -0.8239, -1.4110,  1.7220,  2.3525], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0012, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.5465,   1.8299,   1.4070,  -1.9007,  -0.2203,  -3.3653,   0.6984,
         -7.3494,   0.8650,   0.8023,  -5.1452,  -0.6126,  -3.8926,  -4.3296,
         -1.7174,   0.4470,  -8.2033,  -5.1079, -12.7483,  -3.3305],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8210, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.4198,  -9.9183,   0.4571, -27.3782,  -3.1445, -10.0287,  -2.9704,
         -2.0508, -10.7075,  -6.6584,  -1.1451,  -3.7470,  -4.4061, -15.8368,
         -5.1967,  -7.2845,  -4.5156,  -3.7627,  -3.8408,  -0.9882],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4271, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1477,  -6.9457,   0.0874,  -2.8442,   0.1555,  -4.7852,   2.3144,
          2.7600,  -3.3747,  -0.3602,  -2.2715,  -1.3669,   0.9396,   3.5900,
         -3.7445,   1.0324, -15.5888,  -2.1810,  -8.7777,  -0.3776],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1943, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.6328,  1.1643, -3.2756, -1.8170, -1.3543,  0.0266,  0.1585,  4.9216,
        -2.5260, -0.4799, -2.8259, -0.2669, -6.3412,  2.5380,  2.2345, -1.8085,
        -1.1957, -3.0977, -4.5473,  0.2950], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8282, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.6768,   3.1556,  -3.5388,  -1.7492,  -1.7859,  -5.0657,  -2.6610,
          0.4017,  -5.0197,  -0.8613, -17.6580,  -6.2561,  -4.3009,  -4.8020,
         -0.9048,  -0.4957,   4.4160,  -2.0990,  -1.0021,  -2.5459],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8725, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.6312, -2.0467, -1.7537, -2.2994,  3.6654, -5.5686,  0.9835, -1.2531,
        -0.7710, -7.7356,  1.7278,  0.8522, -3.5053, -1.5231, -6.7382, -7.0744,
        -2.9381, -0.3229, -3.3200, -4.5161], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1753, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.7725, -2.7916, -1.9693, -1.6484, -2.0162,  0.9259,  3.2559, -2.4179,
        -1.3031, -3.9116, -1.7047, -2.8638,  2.1153,  1.6386, -2.9593, -0.6686,
        -1.7911, -2.3030,  1.2646,  4.5059], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6935, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.5111,  -4.3892,  -2.7322,  -2.6226,  -3.6505,   1.9508,   2.9079,
         -4.7137,   0.6741,  -5.4248,  -9.7450,  -1.8967, -14.6444,  -8.9032,
        -35.8828,  -5.6909,  -2.6467,  -4.8575, -10.8718,  -3.1406],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9896, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.3670,  -0.9934,   1.8371,  -2.3753,  -1.2917, -10.6893,  -6.4183,
         -5.1485, -15.7449,  -9.1357,  -5.5882,  -1.2807,  -0.4440, -10.8499,
         -3.3063,  -1.7390,  -0.8367,  -1.7390,   1.4568,   4.1037],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7275, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1658,  0.4140, -0.7973,  4.3513, -1.7208, -0.4257, -1.3850, -0.2247,
         0.5335,  5.4226, -3.1572, -1.3885, -2.5319, -0.2553, -3.9192,  2.4826,
         0.3628, -2.3736, -1.0017, -1.8399], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.4310, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3042e+00, -2.9609e+00, -3.2009e+00, -4.4293e+00, -1.9596e+00,
         3.1613e+00, -2.7603e+00, -2.3981e-03, -3.8829e+00, -3.8948e+00,
        -6.2327e+00, -2.0251e+00, -1.8967e+00, -5.7388e+00, -3.4601e+00,
        -1.3283e+00, -3.4025e+00, -7.9602e-01,  2.0257e+00, -4.3406e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5715, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6021, -0.4026, -1.8774, -0.6511,  2.6084,  3.6162, -2.0271,  1.0181,
        -1.9591, -2.0743, -4.0944,  2.6732,  1.0726, -3.1280,  0.3502, -2.4481,
        -0.7712, -0.6331,  2.5035, -2.8966], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6861, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3688, -2.4275, -2.4846,  0.9611,  3.9760, -3.9229, -0.0167, -4.7649,
        -0.6762,  0.2796,  4.7416, -4.1645, -2.3992, -3.0429, -4.5004, -1.7433,
         1.9855, -1.6241, -1.2073, -1.8796], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2139, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3723, -1.8464, -3.2574, -0.4037, -4.2652,  1.7861,  3.6568, -4.2171,
        -1.8915, -2.4094, -0.8307, -0.4303,  2.2067, -3.4616, -1.0149, -0.6192,
        -0.8854, -4.7384,  1.9489,  2.5134], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0266, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9616, -4.5136, -5.9695, -7.9569, -7.4021, -6.0113, -4.6417, -6.8055,
        -4.8562, -4.9525, -7.4138, -4.0991, -5.3246, -3.8400, -4.0740, -5.1253,
        -6.6071, -8.1470, -5.7214, -8.9265], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.9175, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5684, -2.8734,  0.6086,  3.1378, -1.2688,  0.3278, -0.9727, -2.0730,
        -1.2287,  2.7076, -2.0601, -0.4634, -3.7454, -2.2140, -5.6659, -2.0113,
        -2.1954, -6.8480,  0.4673, -0.9970], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4968, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1612,  1.4781,  0.7601, -5.5786, -0.7446, -3.2845, -2.5176, -9.3838,
         1.3541,  1.2612, -3.4628, -0.4614, -1.0647, -2.2677, -0.9207,  3.1303,
        -5.9422,  0.3297, -2.7171, -4.8829], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0038, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.8007,   2.1603,   1.5121,  -2.5594,   0.6802,  -3.8197,  -2.4132,
         -3.9642,   1.7331,   0.4513,  -2.6530,   0.2949,  -1.2718,  -2.5365,
          2.7367,   3.0669,  -2.5722,  -1.1281, -17.0249,  -2.8294],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.0969, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 4.5210e+00, -6.0095e+00,  2.9511e-01, -2.1785e+00, -3.1764e+00,
        -7.3528e-01,  1.8668e+00, -1.9736e+00, -1.1077e+00, -1.1104e+01,
        -6.8892e+00, -5.3002e+00, -5.2045e+00, -4.8433e-01, -5.0086e-01,
         4.9396e+00, -2.6464e+00,  2.4290e-03, -2.3127e+00, -8.0064e-01],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.9399, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-12.9361,  -6.5633,  -3.3601,  -3.8940,  -2.6493,  -0.6712,   2.8635,
         -7.2349,  -0.2170,  -1.9355,  -3.4068,   0.4036,   2.6693,  -3.4881,
         -1.7512,  -5.4163,  -5.6756,  -2.3241,   1.7181,  -8.1952],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1032, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1840,  -8.0805,  -5.3953,  -3.3323,  -5.1363,  -1.5658,  -3.0452,
          4.2753,  -4.1819,  -0.9007,  -2.7668,  -6.2132,  -1.0626,   3.0368,
         -4.7669,  -3.0057, -35.1705,  -6.8366,  -4.4502,  -5.9126],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8847, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.0370, -10.2455,  -2.8023,  -5.8119,  -4.9646,  -5.1803,   1.6939,
          2.9178,  -4.1038,  -1.2546,  -4.1857,  -0.3334,  -3.0464,   2.1312,
          2.1285,  -3.8045,  -1.4477,  -3.0953,  -1.1227,  -0.1495],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1357, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.9672, -4.3762, -1.7735, -1.9350, -6.6765,  1.1336,  2.3953, -3.6815,
        -0.9875, -3.1179, -1.2452, -5.9566,  0.6536,  1.3738, -2.6846,  0.8903,
        -1.8805, -3.4843,  1.1474,  2.6866], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2776, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.8185,  -6.6626,  -2.5786, -21.3695,   3.8645,  -8.7410,  -4.4968,
         -3.1213,  -3.2229,  -2.9296,  -0.3116,  -7.2745,  -6.4651,  -2.4765,
         -2.2738,   0.2051,   1.3074,  -2.3597,  -0.6420,  -3.4167],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7892, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8139, -2.2241, -2.0465, -5.3167, -0.9409,  2.0570, -3.9007, -1.3720,
        -3.7154, -5.1017,  0.8303,  0.7769, -3.2273, -0.1324, -3.5475, -3.2047,
        -0.4652,  1.3359, -2.5004, -2.4553], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9482, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.9092,  3.8865, -2.5332,  0.3592, -2.8166, -1.1509, -3.7174,  2.4663,
         1.8081, -2.5451, -1.7138, -2.5021, -5.1590, -1.5762, -0.4331, -4.2784,
        -1.6012, -9.1291, -9.2764, -2.9493], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0976, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.2326,  -0.1581,  -1.8065,  -2.5634,   0.4384,   3.0854, -18.0308,
         -0.8497,  -1.6766,  -6.0529,  -0.3463,   2.4050,  -2.6528,  -0.7200,
         -8.4061,  -6.7281,  -3.6372,  -4.9729,  -0.5659,  -2.0124],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9742, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.2197,  -4.9323,  -2.4718,  -6.1985,  -3.8408,  -5.7604,  -0.5355,
          1.1381,  -2.3469,  -2.9515,  -3.5440,   0.6494, -11.2996,   1.0132,
          1.4472,  -2.0625,  -3.1941,  -1.8506,  -1.6131,  -4.8401],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6707, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.0472e+00, -8.4844e-01, -2.7139e+00,  2.9727e-01,  2.5281e+00,
        -2.3135e+00, -1.0306e+00, -2.4315e+01, -6.4888e+00, -3.7038e+00,
        -5.2980e+00, -8.2834e-01,  7.6977e-03,  4.7977e+00, -4.0566e+00,
         8.8278e-01, -1.2265e+00,  9.7890e-01, -4.0165e+00,  3.5642e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1368, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6538e+01, -4.3300e+00, -1.1945e+01, -2.7291e-01, -1.7030e+00,
         4.3358e+00, -1.5748e+01, -2.4072e+00, -1.1817e+00, -4.6010e+00,
        -2.5525e+00,  2.7375e+00, -3.8405e+00, -1.4167e+00, -8.8924e+00,
        -6.1866e+00, -4.3166e+00, -4.2802e+00, -2.8241e-01, -1.8549e-02],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6720, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3487,  -0.0865,  -3.3401,  -1.3245,  -2.2690,  -1.0825,   1.6855,
         -0.9899,  -0.4988,  -4.1931,  -7.4161,  -3.2165,   1.5053,  -4.9587,
         -4.9521, -12.5306, -42.1857,  -3.7351,  -6.1634,  -4.6035],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1352, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0519e+00, -4.3606e-01, -3.6698e-01, -7.2848e+00, -2.1619e+00,
        -2.0804e+00, -1.4452e+00,  1.5300e-01,  4.0105e+00, -2.0065e+00,
        -1.0597e+00, -1.5704e+00, -1.3469e+00, -4.3985e-01,  1.2400e+00,
        -3.6908e+00, -4.2865e-01, -4.3438e+00, -3.6669e-03, -6.5826e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.7448, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.8272e-02, -1.6071e+00,  1.4477e-02, -4.4648e+00, -3.7824e-01,
         6.2869e-01, -3.6755e+00,  4.3067e-03, -1.6164e+00, -3.9491e+00,
        -2.5669e-02,  2.5457e+00, -1.9254e+00,  6.3591e-01, -4.8277e+00,
        -4.0422e-01, -7.2562e+00, -3.6659e+00,  1.3644e+00, -1.4869e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.5031, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0499,  2.2949,  3.8253, -1.9215,  0.5587, -2.2172, -3.8290,  0.3836,
         2.5433, -4.3768, -0.8715, -3.2590, -2.5281, -3.5110, -4.1069,  1.2516,
        -4.2160,  1.0988, -1.5794, -0.1613], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1836, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7885,  2.2262, -5.1336, -2.3886, -2.7702, -2.3366, -3.9737,  0.0263,
         0.8896, -1.9680, -0.5272, -3.6943, -4.4984, -2.4142,  3.3846, -2.1099,
        -0.6099, -0.3771, -4.5986,  0.9424], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5860, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  4.6765,  -4.9756,  -1.7667,  -2.1696,  -3.4798,  -1.2352,   1.2047,
         -6.8885,  -1.9108, -22.6742,  -6.3920,  -4.6503,  -6.0784,  -1.0219,
         -3.8012,   0.3525,  -4.5709,  -4.6896,  -2.7268,  -2.8576],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7828, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9337, -1.9566,  4.5827, -4.3614, -0.2208, -3.2715, -0.7379, -4.0730,
        -1.0013,  2.8548, -3.6281, -2.1310, -2.6487,  0.0848, -5.7173,  2.5053,
         1.7417, -3.9325, -0.5617, -3.4416], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4924, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7272, -3.9422,  0.7226,  3.2483, -4.1990, -2.2385, -2.3120, -2.9170,
        -5.4040,  2.2774,  1.2215, -2.5894, -0.6819, -2.1293, -3.8878, -0.1648,
         1.8664, -2.6605, -1.6779, -4.0108], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6103, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.0649,  -3.1795,  -1.3278,  -4.5665,  -0.7295,  -4.8015,  -2.7428,
          1.0310,  -2.8215,  -0.8205, -11.4426,   2.5385,  -5.4292,  -1.1024,
         -1.7883,  -4.2220,   0.0256,   1.7074,  -3.7539,  -1.9066],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3199, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.2216, -1.9644, -2.2412, -9.3510, -1.7619, -1.5237, -2.6372,  0.3065,
        -1.1303, -2.0103,  0.6915,  3.2081, -3.3475, -1.5376, -3.8751, -1.7114,
        -6.2723,  0.4483, -0.5398, -2.2083], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8840, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.6817, -0.3137, -4.9929, -4.5217, -4.3540, -4.5156, -4.6774, -1.2947,
        -4.7359, -3.0552, -6.6611, -6.8655, -5.0063, -5.0714, -2.0847, -1.7817,
         3.0032, -2.4118, -2.7026, -4.2332], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7479, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3891, -3.6358, -3.7085, -2.2457, -9.4539, -5.2639, -6.0948, -4.7179,
        -2.4316, -3.2895,  5.6158, -4.0812, -0.7748, -3.7105, -4.3058, -3.1835,
         0.5491, -3.0410, -1.5275, -3.8404], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0765, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.8698,  0.3663, -2.7071, -0.9838, -2.7875,  0.2849,  2.0472, -4.2295,
        -0.1707, -1.3642, -3.2385, -1.0021,  3.7825, -3.6467,  1.3116, -2.3363,
        -0.8720, -3.2205,  4.6253, -1.6443], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1828, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2587, -1.9039, -2.8335, -0.9845, -6.1207,  1.8713,  2.3531, -3.2839,
        -0.5705, -2.1104, -0.2923, -3.9386,  2.0968,  2.0847, -2.8112,  1.1170,
        -2.3817, -7.3621, -4.2202, -1.0456], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6297, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.4885,  -2.6481,  -0.4488,  -0.6387, -18.2965,  -5.8180, -15.7022,
         -3.5106, -14.2102,  -9.2700,  -5.8232,  -6.4081,  -5.1108,  -1.1121,
        -13.9930, -13.4986,  -5.9161, -17.3510,  -2.6590,  -3.4953],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.3699, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.2781,  -2.0297,   0.5351,  -4.2027,   0.1211, -12.3427,   1.0298,
         -3.8715,  -8.2185,  -0.2169,  -2.8399,  -2.6615,  -4.2072,   0.7752,
          0.2695,  -3.3580,  -0.7665,  -3.3213,  -1.6276,  -4.0375],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.4346, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.4149, -2.6690, -3.1777, -2.3127, -2.9289, -3.9152,  1.9089, -1.2672,
         0.4597, -2.0410, -1.9265,  1.2926,  3.2674, -3.0168, -1.8657, -3.4164,
        -1.0905, -5.3943,  0.5156,  0.2646], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3449, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.6431,  -5.3908,  -0.5784,  -0.1468,   0.6161,  -3.5280,   0.5112,
         -1.9306,  -2.7454,   0.5940,   1.6832,  -2.7966,  -2.4471, -10.4680,
         -6.9918,  -5.6092,  -5.5540,  -4.0044,  -1.9812,  -1.8184],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7615, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6727, -2.5268, -0.7943,  0.0329, -4.0063, -2.2618, -2.9349, -6.0468,
        -4.2660, -2.5050, -5.6534, -1.8477, -7.0533, -7.1380, -4.0114, -5.9355,
        -1.3158, -3.8000,  2.9170, -3.5672], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4693, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.9604,  -1.5738,  -2.8871,  -0.9631,  -1.7804, -10.8663,  -9.4371,
        -12.3023, -31.7549,  -4.6229,  -8.2218,  -1.6360, -10.8286,   3.8886,
         -5.8585,  -3.2744,  -2.9616,  -4.0941,  -4.9030,   1.5444],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9247, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9519,  -0.7984,  -3.2734,  -7.4427,  -4.1658,  -0.5539,  -4.1328,
         -3.8205, -12.4695,  -5.5248,  -5.3300,  -5.4185,  -1.7291,  -1.1111,
         -0.8234,  -3.8413,  -1.0653,  -2.7329,  -3.0652,  -0.1563],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5203, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5997, -3.1941, -6.4908,  0.1096,  1.2003, -1.6529, -0.7499, -1.4516,
        -2.9694,  2.1638,  3.1159, -3.2296,  0.4040, -8.3672, -0.9403,  1.0147,
         2.3246, -4.9987,  0.2167, -2.3138], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4204, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.7754,  -7.2081,  -2.5566, -12.7788,   0.6842,  -0.6627,  -3.6453,
         -0.7011,  -2.3561,   1.3666,  -4.9743,   3.3458,  -6.8548,  -0.2244,
         -3.0693,  -1.7112,  -3.9265,   0.3154,   1.5499,  -1.5736],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.4378, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4090,  -4.9146,   0.2459,   2.0636,  -3.1461,  -1.7331,  -5.7416,
         -5.6067,  -4.2096,   0.5753,  -6.0326,  -5.4065, -14.6958,  -8.2348,
         -5.1389,  -6.5400,  -0.4713,  -3.6248,   2.9525,  -4.2900],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8179, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.7745,  1.3414, -7.7460, -2.7649, -2.9348, -1.9550,  1.8258, -5.6710,
        -3.8560, -4.2079, -6.5186, -6.3250, -3.2709, -4.3482, -0.3500, -0.4994,
        -6.7139, -3.2302, -3.3985, -3.2485], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1049, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.7588,  -1.8353,  -0.4161,  -2.8056,  -1.0023,  -2.6089,  -2.0418,
          1.2484,   2.8565,  -2.7117,  -0.7906,  -4.1507,  -0.0786,  -5.8900,
         -0.2196,   0.5937,  -2.0748,   0.7315,  -1.4512,  -1.0521],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.7229, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.7597, -1.7537, -1.1251, -3.9767, -3.9218, -5.4084, -8.4457, -2.3022,
        -7.2162,  3.4520, -2.7450, -2.2204, -2.7033, -4.5200,  0.7833,  3.9869,
        -2.5522, -5.4679, -2.8512, -5.0813], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6655, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9530, -2.1312,  0.0265,  4.4336, -4.1143,  0.5328, -2.7112, -1.5443,
        -4.7188, -1.7767,  2.7095, -4.5026,  1.0354, -2.4570,  0.9170, -4.5796,
         1.1489, -6.4527, -3.3227, -5.3979], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7429, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  5.6111,  -6.6110,   0.3648,  -2.4385,  -3.5527,  -3.2341,   1.8409,
         -2.2552,  -0.9817, -14.3776,  -2.7842,  -7.9037,  -0.1218,   0.6641,
          1.7283,  -2.8923,   0.7994,  -3.1293,  -2.3739,   0.7424],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.0453, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.7009,  -2.3096,  -1.1959, -10.3757,  -5.2468,  -5.3907,  -4.9045,
         -2.0707,  -4.3872,   2.2991,  -8.0341,  -3.5903,  -1.6912,  -5.8144,
          0.6057,   2.7064,  -2.8008,  -0.7614,  -3.7081,  -3.8092],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9389, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.7518,  -4.4849,  -3.5895,   0.7587,   1.9203,  -6.1126,  -3.1737,
         -3.3169,  -2.0932,  -6.4271,  -1.7515,   0.4122,  -3.1189,  -1.6734,
         -1.7975,  -0.9276,  -2.5341,   3.5535, -12.8245,  -3.0983],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.4764, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.1228,  3.5427, -2.8040, -0.9297, -2.3901,  0.0579, -3.7401,  2.8242,
         0.0880, -2.7064,  0.8936, -1.1013, -1.3353,  1.6812,  3.7100, -2.3486,
        -1.4896, -0.8472, -1.9804, -5.3175], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6535, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9718, -10.1685, -14.2976,  -4.7074,  -8.1307,  -6.8723,  -5.0292,
          0.5341,  -6.2973,   5.3411,  -7.8821,   0.0509,  -1.8328,  -1.8109,
         -1.6020,   4.9305,  -2.5251,  -0.6282,  -5.3063,  -3.0165],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6111, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.1263, -3.3118, -0.0676, -0.7955, -1.1097,  0.4766,  4.0940, -2.7368,
        -0.2523, -2.1785, -2.6140, -3.6568,  0.5772,  1.6368, -3.0212, -0.5582,
        -4.1140, -2.9816,  0.5248,  2.7040], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7629, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9606, -1.2851, -2.2616,  1.2616,  3.9253, -3.4940, -1.4986, -3.5162,
        -1.9684, -2.5595,  2.9110, -2.2517, -2.8440,  0.6568, -1.7278, -3.5940,
        -1.8103,  2.8180, -3.0491,  0.0776], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0585, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.1080, -11.3375,  -3.7647,  -6.2352,  -0.1521,   1.5874,   3.6014,
        -13.0242,  -0.4506,  -0.5105,  -3.1713,   0.0414,   3.6503,  -4.9820,
         -2.5290,  -5.1334, -14.1610,  -6.3583, -18.7414,  -4.9761],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3877, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-19.8128,  -0.9656,  -2.6237,  -3.2716,  -1.0835,   3.2148,  -3.6461,
         -0.8273,  -5.3131,  -3.4048,  -5.2768,   1.5006,  -0.5624,  -2.8476,
         -0.5649,  -1.8258,  -1.5352,  -5.1185,  -0.6972,  -2.8128],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8737, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.1394,  -4.6812,  -3.2116,  -2.8695,   0.0251,   3.5055,  -3.6600,
          0.1690,  -1.6527,   0.0711,  -4.9218,   3.8935,  -3.4840,  -1.8983,
        -15.1265,  -4.9078,  -4.0503,  -5.1383,  -2.2072,  -2.9862],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9635, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4842,  -1.0017,  -2.8633,  -2.4734,  -7.7267,   0.4562,   1.3031,
         -2.3223,  -1.6522,  -1.1804,  -0.9059,  -4.2627,   5.3846,  -2.2116,
         -1.2922, -12.3111,  -2.3825,  -6.9780,  -0.6061,   0.4400],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2535, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.5353, -11.7197,   1.2944,   3.4508,  -2.9584,  -1.1903, -17.5084,
         -7.6263, -10.9243,  -7.1409,  -5.4461,  -1.0910,  -3.2694,   0.7214,
         -6.2846,  -5.8184,  -1.8126,  -2.8045,  -0.9660,  -5.9884],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4309, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8633,  2.9732,  1.6129, -3.5402, -0.2271, -1.7895, -1.1885,  0.8740,
         3.5499, -1.9037, -6.1874, -1.7469, -1.7045, -3.9405,  1.6617,  2.5106,
        -2.6899, -1.0263, -3.4890, -0.8274], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0471, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.2388, -3.2615, -0.1675, -2.8391, -1.3467, -3.9464, -3.0281, -0.0374,
        -4.5676, -3.4712, -0.0806, -5.1587,  0.6886,  1.1880, -3.1109, -9.7093,
        -3.2753, -1.9483, -0.0196,  2.4836], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0185, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4121, -0.2262, -2.5341,  3.8020, -4.9642, -3.7691, -3.4247, -0.2292,
        -4.4903,  0.8526,  2.5331, -4.0680, -1.2804, -5.0424, -3.2240,  1.2970,
         3.8054, -2.7045, -0.1729, -1.7478], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5000, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.6989, -5.9305, -0.9935, -5.2888,  3.1090, -7.2759, -2.0814, -1.6894,
        -8.6870, -7.8026, -1.7685, -3.2676, -0.5576, -2.8499, -2.4275, -2.5341,
        -0.4660, -3.8543, -1.4366, -2.8778], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3689, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7646, -0.1930, -3.9408,  4.6954, -3.2534, -0.2662, -4.4886, -3.0497,
        -5.6404,  3.0000, -2.9157,  1.2353, -1.6189, -2.4025, -5.7378,  1.9247,
         1.4845, -2.6590, -0.7335, -2.7614], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4543, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.8822, -2.9706, -0.6506, -2.1735, -1.5185, -7.3398, -1.0469, -0.9665,
        -2.5885, -1.1904, -3.5791, -0.3257, -2.0248,  5.0483, -4.2946, -2.3360,
        -1.6772, -6.4686, -1.3309,  0.1257], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8213, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7840, -1.1163, -3.3934, -3.9660,  0.9606,  3.5009, -1.8952,  0.7210,
        -1.3690, -1.1779, -2.7915, -0.8645, -4.2826, -2.3841, -3.8441, -3.4690,
        -4.5569, -2.5441,  0.0155, -8.3429], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3292, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4607, -4.8309, -7.4859, -6.8919, -4.1511, -3.8956, -2.6120, -6.2797,
        -8.4622, -4.5321, -5.4141, -3.0669, -2.7558, -1.4408, -5.4569, -1.8589,
        -5.4674, -6.4425, -2.0425, -0.3653], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.3957, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.5176,  -2.2350,  -2.2986,   3.1127,  -1.9697,  -3.8669,  -2.1936,
         -2.2104,  -5.8901,  -1.5848,   0.1685,  -1.9664,  -3.0464,  -2.0477,
         -1.3791, -11.6993,   2.3130,  -6.7773,  -7.3908,  -0.6608],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6570, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3504, -2.1267, -1.8103, -4.8571,  1.1342,  1.7706, -2.5381, -0.1590,
        -4.3560, -3.9894, -0.5414,  2.6528, -0.9554, -1.4200, -0.6452, -3.9509,
         0.6494,  4.1184, -2.9782,  0.4607], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0446, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3296, -5.2261, -0.3494,  0.1182,  4.8474, -2.6391, -1.3671, -3.4634,
        -2.8363, -4.4370,  1.0294,  3.1527, -1.0289, -0.1033, -4.0055, -9.8332,
        -2.3346, -4.9391, -0.3128, -1.3442], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8701, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9321, -0.2686, -4.4171, -0.6677, -0.9596, -2.3114,  0.7417, -2.5730,
        -1.0338,  0.6708,  3.9629, -3.5831,  0.5804, -1.6588, -2.4320, -0.6763,
         3.2479, -1.8391, -1.4962, -3.4973], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2571, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.3898,  -3.9161,  -1.0997, -13.2163,  -4.4142,  -5.9232,  -1.0565,
          0.1382,   2.7433,  -6.6108,   0.4054,  -2.6890,   0.3052,  -4.2068,
          1.0713,   1.8749,  -3.9572,  -2.3629,  -3.5667,  -4.4425],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.4767, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.7890,  -8.1487,  -9.8944,  -4.1622,  -1.8715,  -2.5919,   4.7803,
        -13.7199,  -1.5217,  -2.5686,  -3.5858,  -0.7115,  -0.0525,  -5.9417,
         -5.4113, -17.2103,  -9.6756, -12.1511, -13.1813, -10.0513],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2730, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2370,  0.2153, -1.7178, -1.4738, -0.1406,  2.9627, -1.8599,  1.4335,
        -2.5658, -0.6706, -3.8311,  1.9585, -2.2791, -2.7083,  0.3719, -4.9298,
        -0.9031, -6.6914,  1.2914,  0.6672], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1554, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  5.7759,  -5.3625,  -0.7496,  -1.4637,   0.3701,  -4.2964,   2.6778,
         -1.3662,  -3.1704,  -0.7500,  -3.9395,  -3.7733,  -1.8270,   1.6468,
         -3.6259,  -1.0941,  -3.0223,  -1.8983,  -4.3977, -54.4566],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2361, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.5505, -5.2571, -6.3543, -1.1416, -2.9564, -0.2825, -3.7803, -4.0072,
        -2.1728, -2.2912, -1.1086, -4.1163,  2.9014,  2.3566, -1.7778, -0.3395,
        -0.8534, -3.5598,  1.5562, -1.4621], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6548, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.0408,   4.1842,  -4.0941,  -2.6025,  -5.0048,  -4.1923,  -1.8842,
          1.3925,  -5.2686,  -3.4920, -10.4222, -14.1726,  -6.9960,  -6.7279,
         -3.4925,  -1.8899,   2.6493, -15.5018,  -2.7262,  -2.0546],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0628, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.7460, -2.5789, -4.2576,  1.7127, -1.0269, -6.3469, -3.8510, -1.7092,
        -2.7236, -6.3619,  1.5299, -0.0614, -5.9837, -1.4669, -2.0032, -4.4076,
        -1.9227,  1.0482, -2.0180,  1.7620], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9960, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-26.9508,  -5.6364,  -4.3814,  -5.1887,  -6.2727,  -9.0063,  -8.7416,
         -2.1196,  -4.1229,  -3.1198, -20.7847,  -7.9381,  -6.1671,  -6.2217,
         -0.3074,  -7.5451,   3.0608,  -7.3429,  -2.9281,  -3.1007],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.7408, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0089,   1.7349,   0.2146,  -6.0052,  -0.5847,  -9.7072,  -9.0881,
          2.9931,  -4.6233,  -3.3312,  -5.4024, -17.8558,  -2.5905,  -6.7339,
         -0.5553,   0.6056,   4.3003,  -6.7973,  -1.7674,  -3.4192],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6311, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.1593,  -2.8187,  -0.8623,   0.7276,  -3.9717,  -2.5913,  -3.5821,
         -5.1624,   0.2908,   2.2656,  -1.8202,  -1.9060, -12.1778,  -4.2083,
         -5.1453,  -1.2580,  -3.5612,   2.9159,  -1.6099,  -4.5775],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6606, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.0903, -6.9484, -2.8285, -6.5871, -3.8335, -0.0592,  2.4282, -6.2747,
        -0.4193, -4.2915, -4.5571, -3.0820,  1.3772, -2.8037, -1.2647, -0.6681,
        -2.7260,  1.5207,  3.5999, -3.2474], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4378, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4052, -2.0628, -1.9230, -1.5414,  1.7353,  3.7558, -3.6146, -0.7041,
        -3.6202, -0.9713, -4.8484,  1.8969,  2.0400, -2.2686, -0.2851, -2.8624,
        -0.9339, -3.4726,  2.5707,  1.8485], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9333, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3075, -4.1025, -2.7399, -9.1518, -3.6257, -5.0462, -2.7586,  0.4564,
         3.7398, -8.3519, -7.5472, -1.3560, -1.3129,  0.5033,  4.1523, -3.1083,
         0.0164, -1.1698, -1.3082,  0.5682], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1725, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.7412,  -3.3747,  -0.9719, -11.7191,  -1.6938,  -8.8166,  -0.5330,
         -2.5439,  -3.0961,   0.1497,  -2.2281,  -0.9072,   2.7058,   3.6357,
         -2.2831,  -0.6221,  -1.2347,  -2.1049,  -1.8456,   2.8825],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.5930, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.1098, -0.8372, -2.7289,  1.8632,  4.2338, -2.1147,  0.9296, -2.6630,
        -0.5589, -0.3874, -2.3475, -3.1676, -1.8349, -3.1300, -1.3489, -5.8182,
         1.6070,  1.6771, -2.1381,  1.2595], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8698, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1196, -0.0354, -3.6386, -0.0572, -3.6937, -5.6472,  3.4052, -2.9695,
        -0.1213, -9.0199, -4.1193, -3.7445, -4.5610, -0.4949,  2.0440,  1.3161,
        -5.4412, -2.6318, -0.9000, -3.4447], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2937, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 5.1869, -3.2885,  0.3203, -1.5175, -1.1015, -0.4140,  3.2912, -4.0239,
         1.5960, -2.5119, -4.9884, -3.1590,  0.6384, -8.6950, -2.4809, -2.4737,
        -2.6305,  2.1461,  4.1160, -1.9616], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0976, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.5946, -3.1108, -0.6907, -5.4624, -1.4793, -1.5355, -2.9546,  1.5260,
        -2.1327, -0.4964, -4.7866,  2.5944,  1.2359, -2.3168,  0.7341, -1.4460,
        -0.8627, -2.1875,  0.8262, -5.4974], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4319, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.0887,   2.8247,  -2.8142,  -3.2848, -18.2431,  -3.4394,  -7.4220,
         -1.1293,  -0.0862,  -2.5040,  -5.5109,  -2.4725,  -2.1015,  -6.7330,
         -3.3189,  -1.8339,   0.7156,  -1.3626,  -0.8990,  -1.9922],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0259, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.9464,  -4.9116,   1.4892,   3.2860,  -2.8052,  -2.0485,  -2.5851,
         -5.8055,  -0.7815,  -1.2334,  -4.4084,  -1.8169, -12.3503,  -5.6986,
         -5.7606,  -8.0159,  -4.5948,  -0.9487,  -0.4300,   0.6538],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9856, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.1160,  -4.7546, -12.6207,  -4.9166,  -7.2317,  -3.7228, -19.2914,
         -0.2773,  -5.3980,  -4.6147,  -8.2721,  -9.0397,  -4.1796,  -5.6024,
         -2.6853,  -0.3575,  -1.8433,  -4.0269,  -1.5976,  -2.7239],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6136, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.3955, -2.4544,  0.3520, -3.2951, -0.6513, -4.5676, -0.8176, -0.1986,
        -2.5437,  1.4100, -1.1811, -0.9827,  2.0566,  1.2140, -2.8906, -1.6501,
        -3.5139, -1.4904,  0.2730, -4.6764], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1606, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1206, -1.8652, -3.2977,  1.9431,  4.5470, -1.8481,  0.3550, -1.8455,
        -1.6648, -4.3094,  3.7445, -2.4048, -0.2769, -2.3385, -0.6456, -6.3345,
         2.0830, -0.3714, -5.5037, -2.2869], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2221, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3748, -0.4503, -4.3779,  2.3438,  2.7050, -2.5382,  1.7352, -2.1404,
        -0.6005, -1.9123,  2.1560, -4.2536, -0.5145, -2.0246, -0.6522, -3.9989,
         2.5920,  2.5894, -2.3002,  0.3376], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6840, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4624, -3.4316, -2.8333, -0.4954,  3.6168, -1.7623,  1.0546, -2.8300,
        -1.4450, -3.7468, -0.3107,  2.3030, -6.5476, -1.5020, -2.3731, -1.7940,
        -2.1532,  2.7387, -2.9126,  0.0842], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3901, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.0793, -2.5296, -1.3695, -2.7807, -3.1899, -4.6892, -0.7194, -5.1111,
         0.5288, -3.8645,  0.1738, -3.0572, -3.0783,  0.4633, -5.1799, -2.1628,
        -3.7205, -2.2226, -9.9841,  0.1131], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4650, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0165, -2.4264, -1.0936, -3.4431,  2.3625, -3.7868, -3.1582,  1.3383,
        -3.3124,  0.3200,  0.3282,  4.0586, -1.7786, -0.8151, -2.8227, -1.6937,
         0.0389,  3.0140, -2.3623,  0.0351], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9107, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0090, -0.3292, -3.9369,  1.9791, -0.5618, -3.0861, -1.1622, -1.0692,
        -0.0657, -5.8064,  5.5194, -3.6764, -1.0994, -0.8933, -2.6660,  0.1373,
         3.0799, -3.4317,  0.1501, -7.0120], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3470, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  4.2433,  -9.5239,  -3.2443,  -3.2397,  -4.9336,  -2.6886,   2.9340,
         -4.5412,  -0.0227, -10.8120,  -2.7117,  -7.5514,   0.1785,  -0.9917,
          4.4183,  -4.3006,   0.0916,  -2.7107,  -0.4077,  -5.4832],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5649, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6451,  0.0941, -6.9979, -3.3386, -3.1737, -5.4787, -0.3561,  1.4026,
        -3.6528,  0.5454, -2.4837, -2.5166, -3.0050,  1.9856, -1.7523, -0.7523,
        -3.7279, -3.2935, -5.9551,  3.3220], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0890, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.0981,  -5.6441,  -0.1066,  -3.8343,  -0.6703,  -1.7781,  -4.8612,
         -2.1810,  -2.4823,  -3.7876,  -0.0887,  -9.8247,  -7.0955,  -7.4885,
         -8.5675, -45.6722,  -5.0727,  -4.1263,  -1.7392,  -4.5550],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1337, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8114,  1.7088, -2.7142, -2.6321, -4.7677, -4.6834, -2.6608, -2.2576,
        -5.4517, -2.9331, -9.1906, -4.0424, -7.1905, -6.8592, -2.6266, -1.0330,
         2.8681, -4.2818, -4.0959, -3.1485], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3902, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.0556,  0.8869, -2.1172, -3.1184, -1.0199, -1.0883, -0.2902, -3.2881,
         0.3647, -3.4120, -5.2842, -0.1167, -4.5196, -2.8398, -0.3592,  1.8226,
        -5.6847, -2.0402, -4.7249,  0.1829], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1351, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.9854,  3.6986, -1.8105, -0.2202, -3.5028, -1.7952, -3.6445,  0.4515,
         1.8224, -1.6353,  0.5793, -2.1665, -3.8266,  0.0910,  0.6036, -2.0417,
        -0.4368, -2.2552, -1.1974, -0.1098], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8205, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.3990,  -6.7136,  -5.9129,  -3.7419,  -3.9660,  -3.8532,  -4.6040,
         -4.5655,  -6.0140,  -3.2767,  -7.1231,  -7.1222,  -4.4115,  -7.2927,
         -4.5870,  -3.6836, -10.0496,  -3.5555,  -7.0141,  -2.2753],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2581, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.6413,  -8.2828,  -4.9359,  -9.4747,  -4.5320,  -3.8437,  -7.7842,
         -4.6524,  -5.0882,  -7.1512,  -2.5295,  -4.5004,  -5.1863,  -2.9098,
         -3.4640,  -2.8340,  -5.2071, -11.8344,  -2.7602,  -7.0773],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5345, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1219, -8.4647, -2.2099, -0.3133,  2.8054, -4.8543, -3.6107, -4.1155,
        -8.2669, -7.3594, -0.6756, -3.6219, -1.8591, -8.0147, -6.6487, -4.6008,
        -5.4637, -2.8127, -3.9886,  3.3528], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7922, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1517,  4.3962, -3.3220,  0.5856, -2.9005, -0.7628, -1.7115,  3.2158,
        -2.8851, -0.7035, -2.7291, -1.4563, -1.0438, -3.5204, -6.6003,  0.3063,
        -2.5649,  0.8940, -6.3813,  1.7310], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3302, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.0981,  -4.8948,  -0.4421,   0.0632,  -5.5303,  -3.3697, -26.7001,
         -3.0180,  -7.5617,  -1.6026,  -1.6013,   4.3713,  -2.6242,   0.0298,
         -2.9285,  -5.4079,  -6.3515,  -0.1312,  -3.2978,  -0.8560],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6976, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5134, -2.8605, -2.8744,  2.1566, -2.3673, -0.2369, -1.2049, -0.6930,
         1.7362,  4.0148, -3.7535,  0.3202, -1.3054, -0.2311, -4.6687,  2.9455,
        -0.9790, -3.6237,  0.0467, -3.8890], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9990, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1905, -1.1461, -1.1383, -6.2371, -0.4688, -1.9684, -3.2861, -2.0908,
        -5.5521, -0.8293, -4.9313,  2.1755,  1.0344, -2.4851, -0.1575, -1.0947,
        -2.0307,  1.5901,  3.9860, -1.8641], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4343, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.4453, -2.0170, -1.0536, -2.5921,  2.6086, -3.3843,  0.3079, -2.0965,
        -0.5488, -7.5008,  0.9035, -2.5933, -1.2029, -1.4559, -0.3191, -3.6139,
         0.9887,  3.0147, -1.6601,  0.7609], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0950, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6743,  1.5734,  0.4178, -2.8332, -3.9933, -2.8162, -1.7954, -1.3067,
         2.6018, -7.3288, -2.1663, -4.9668, -0.1919, -5.4655,  0.7703,  1.2691,
        -2.7010,  0.8402, -2.1349, -4.0452], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0473, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.9438, -0.6269, -0.3900, -3.1491, -2.6073, -1.8725,  0.2290, -0.3232,
         3.8402, -5.3139, -0.3709, -3.1437, -0.6909, -5.2509,  0.8805,  0.4062,
        -3.3215,  0.6026, -1.1383, -1.3019], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5743, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.2370, -3.0453, -3.6190, -0.9475, -2.7907,  0.1287,  2.1011, -2.0228,
        -0.2803, -2.1739, -9.3712,  2.6239,  2.6972, -3.2366,  0.5857, -5.8298,
        -2.6152, -6.4089, -1.2203, -1.5403], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2601, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1821, -4.4067,  1.6831, -0.1049, -2.9665,  0.2959, -2.1694, -0.4921,
        -1.3728,  2.9303, -3.4738, -1.3087, -3.7965, -2.5606, -1.6640,  3.1048,
        -3.4050,  0.6359, -1.2921, -1.4612], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1503, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.5327,  -1.8133,   0.2284,  -9.1677,   0.8233,  -5.4755,  -4.2034,
         -2.1983, -15.1933,  -5.9573,  -3.4673,  -4.0769,  -1.2547,   0.7803,
         -1.8359,  -3.3476,  -1.8789,  -1.7130,  -9.8537,  -0.6208],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4846, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.6506,  -0.7413, -12.9744,  -4.7515, -14.2977,  -1.1772,  -4.9851,
        -36.8542,  -3.7234,  -4.5423,  -5.4159,  -3.8104,  -3.8165,  -1.9807,
         -1.5621,  -1.4483,   0.3618,  -2.0933,  -0.9303,   1.1498],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3122, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8395, -2.2885,  2.8970, -3.4303,  0.1078, -3.2527, -3.7580,  0.4184,
         1.7186, -4.9127, -2.7922, -0.2801, -2.4936, -5.8837,  0.9231,  1.7384,
        -2.3737,  1.1920, -1.4076, -1.1517], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4435, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2954, -2.5340, -2.1302,  0.0231,  2.8987, -2.3695, -1.5761, -2.7763,
        -1.0294, -4.1707, -1.6179,  1.5295, -1.9437,  0.6700, -2.2171, -3.4300,
        -7.2260,  1.1305,  1.4788, -2.6387], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4612, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.0414, -0.7138, -1.6852,  0.8580,  1.2687, -3.5963, -2.2792, -9.8558,
        -5.5558, -3.0784, -2.6450, -1.2845, -0.3431,  0.1077, -3.4133,  0.0884,
        -0.6346, -2.8082, -0.6625,  4.7147], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5738, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.3645,  1.8860, -7.6947, -2.9574, -1.4767, -3.1439, -4.7344, -1.7545,
        -0.6648, -3.5632, -3.5639, -3.6730, -1.5336, -6.7267,  1.6206,  1.9724,
        -2.4626, -0.9634, -2.4680, -2.6327], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2085, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.4584,  -4.5547,  -0.4709,  -0.8364,  -0.0281,  -4.8970,   0.1643,
         -1.6801,  -5.5279,   1.5138,   2.8506,  -4.3093,  -2.7962, -17.8239,
         -9.9556,  -8.7180, -20.6079,  -6.3330,  -0.6666,  -0.7051],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8420, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5138, -1.8994, -3.8578, -4.3814, -0.2022, -3.5636, -1.3820, -0.2795,
        -0.6605, -4.2002,  1.5653,  1.4975, -2.4981, -1.8508, -7.9911, -5.3358,
        -3.8061, -4.2945, -1.1392, -1.1934], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3993, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8579, -0.9317, -6.2642,  2.1366,  0.8246, -2.8436, -0.4773, -2.4978,
         0.0844,  0.7855,  4.1370, -2.1347, -0.6526, -4.1947, -3.7019, -1.6645,
         1.8153, -1.9215, -2.9612, -2.2623], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3291, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1189,  1.9622, -2.3246, -7.8392, -1.2709, -3.4584, -3.7910, -0.7138,
         3.4134, -2.6236, -0.8364, -5.0140, -2.3970, -7.0872,  2.7333, -1.6238,
        -2.8297, -4.2604, -1.8115, -5.6206], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4256, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1535,  -2.6853,  -1.6449,   3.2071,  -4.4048,  -0.3200,  -2.3768,
         -4.3315,  -1.1206,  -0.3160,  -2.7135,  -3.9909, -10.6423,  -6.0271,
         -4.2740,  -5.0351,  -0.8206,  -0.1045,   4.1997,  -3.5728],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5064, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5959, -3.5684, -1.3572, -3.0293, -0.3070,  2.0929, -1.9398, -0.4928,
        -1.2506,  0.3979, -0.8456,  4.1746, -1.7290, -0.4521, -1.3843, -0.1885,
         0.5008,  5.2750, -3.1616, -1.2556], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.5558, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  -4.1157,   -4.8646,  -12.2592, -125.5921,   -2.6942,   -8.9200,
          -4.4258,  -22.9070,    0.4823,   -6.5953,   -5.4940,   -2.9157,
          -3.0504,   -3.1631,   -6.0198,    4.5254,   -4.2274,   -1.0322,
          -2.7895,   -5.7265], device='cuda:0', grad_fn=<SumBackward1>) tensor(-11.0892, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.0399,  -4.6615,  -3.6091,  -3.8171,  -1.8817,  -5.3687,  -6.2344,
         -5.2576, -10.8998,  -3.4036,  -5.8193,  -0.7345,   1.1798,  -4.3662,
        -15.5977,  -1.2964,  -0.0896,  -2.6935,  -3.8074,   3.4872],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7956, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.1826,  -4.0300,  -0.3121,  -2.2813,   1.1516,  -4.0906,  -4.1023,
         -2.2104,  -2.5405,  -2.6175,   1.1554,   2.4250,  -2.5105,  -4.3808,
        -16.5860,  -5.6069,  -3.3259,  -5.0280,  -0.1412,  -2.3955],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8622, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.3856, -10.2235,  -6.7854,  -2.7456,  -5.6873,  -0.9453,  -1.5150,
          0.9454,  -3.7957,  -1.8731,  -2.7928,  -2.0981,  -0.6767,   3.1518,
         -4.7602,  -2.9252,  -8.0177,  -5.7108,  -5.2199,  -6.6625],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6362, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.6002, -3.6075, -4.2377, -2.1484, -2.7824, -0.0382,  1.0670, -3.5919,
         0.3443, -2.5174, -2.7713, -1.6722,  1.1434, -2.3746,  0.6343, -2.5096,
        -3.0805, -1.2327,  2.6689, -2.7163], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3911, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 4.1919, -3.8806, -2.0141, -4.1721, -1.4290, -5.0840,  1.7895,  2.5092,
        -2.3373,  0.6161, -1.5434, -1.1036,  0.6856,  3.2048, -3.1892,  0.6823,
        -2.8570, -2.5285, -5.1073, -1.8945], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1731, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.3752,  1.9797, -1.8919,  1.6082, -1.9351, -1.1902,  1.6475,  2.9156,
        -1.6109, -2.1219, -1.2827, -0.8285,  1.0145,  0.6514, -2.5100, -4.5387,
        -4.6956, -7.0546, -5.8839, -8.9660], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6659, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4737, -6.8620, -4.9564, -5.0945, -4.8935, -1.0299,  0.1095, -3.6545,
        -4.5876,  0.9452, -2.2635, -1.2140, -2.3786,  2.9271, -3.5684, -0.5173,
        -4.6112, -1.0963, -3.6300,  0.6729], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4588, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.9635, -12.6748,  -2.9232,  -6.3861,  -1.7273,  -6.3598,   1.5757,
         -4.4966,  -6.8022,  -4.3380, -13.0161,  -4.7721,  -5.9070,  -0.7846,
         -0.0540, -15.6168,  -4.1156,  -1.7146,  -3.4309,  -1.8372],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8672, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.6331, -13.5340,  -3.7039, -15.7314,  -0.0860,  -2.5378,  -0.0554,
         -1.4261,  -6.2317,  -3.8263,  -5.0732,  -6.8908,  -5.0658,   1.3514,
         -5.6213,  -2.3580,  -3.2540,  -1.9545,  -4.7390,   1.7484],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9811, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8199, -0.7735, -3.6173,  0.1529,  2.8414, -2.4923,  1.2536, -2.3084,
        -1.3128, -6.0261,  3.1193,  0.4106, -3.4516, -0.5999, -1.5602, -0.7057,
        -3.8669,  0.6814, -6.2669, -3.4890], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4416, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.3606, -0.6847,  1.4745,  3.8295, -1.9260,  0.4015, -1.4745, -1.8595,
        -2.6828,  3.8954, -4.5936, -1.0413, -3.7603, -4.3891, -0.4568,  1.3535,
        -4.1920, -3.1414, -3.1946, -2.8308], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2817, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.1265,  3.2873, -4.8234, -1.6607, -3.3537, -1.5341, -1.7461, -1.3401,
        -0.8367, -2.0091, -1.3438, -5.0232,  0.9068,  2.3516, -3.4813,  1.2009,
        -2.5342, -0.4027, -4.7223,  2.7756], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1581, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.8869,  -1.7925,  -1.4042,  -2.3962,  -0.8810,   3.7127,  -2.3854,
         -2.1853, -13.0137,  -3.7847,  -7.2104,  -0.6986,  -1.3979,   4.3107,
         -3.6914,   0.1650,  -3.7769,  -3.3450,  -3.7707,   2.4745],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1979, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.2440,  -2.5084,   0.0546, -10.5691,   1.5063,   1.5152,  -6.9314,
         -0.3224,  -2.3073,  -0.4060,  -4.9979,   1.3311,   1.7219,  -2.4017,
          0.3710,  -3.0576,  -1.7776,  -3.1247,   2.6615,  -3.7589],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.7123, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  4.2433,  -9.5239,  -3.2443,  -3.2397,  -4.9336,  -2.6886,   2.9340,
         -4.5412,  -0.0227, -10.8120,  -2.7117,  -7.5514,   0.1785,  -0.9917,
          4.4183,  -4.3006,   0.0916,  -2.7107,  -0.4077,  -5.4832],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5649, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8225, -0.9648, -4.4873,  1.7129,  2.1595, -3.8837, -1.5060, -2.4845,
        -1.4566, -2.3903,  1.5312, -3.6438, -2.1438, -3.7763, -1.8473, -1.9436,
        -2.7505,  1.1315, -5.6062, -2.9385], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9055, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.0219,  -5.7390,  -5.3199,  -7.2894,  -4.8074,  -6.9880,  -4.7865,
         -4.9250,  -2.9994,  -5.3395, -10.4890,  -7.4001,  -4.0223,  -5.5018,
         -5.2005,  -3.9255,  -6.6449,  -5.9131,  -4.0765,  -3.4250],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5407, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8073,  1.4615, -1.3200,  0.5288, -0.4204, -1.8964, -4.1793,  0.3727,
        -2.4944,  1.4335, -3.2492,  0.3620, -5.4827,  2.5697, -2.5859, -3.7543,
        -0.6265, -1.4278, -0.4456, -0.0852], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1023, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.3778, -0.8666,  4.9842, -8.3698,  0.2433, -2.8624, -0.8771, -3.0847,
         2.4034, -2.1433,  0.4570, -3.4470,  0.4565, -3.4669,  1.5263,  2.0915,
        -3.7840, -0.0709, -3.2152, -0.8089], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0606, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.6347,  -0.4865,   1.4595,  -3.5696,  -1.9331,  -4.3677,  -2.0189,
        -18.9062,  -4.5501,  -1.1186,  -3.1778,  -4.5273,  -1.5641,  -3.0056,
          3.4195,   3.6942,  -5.6999,  -1.9363,  -2.2742,  -0.6517],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6924, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3068, -7.0849, -2.4419, -1.1601, -3.5478, -1.7909, -3.9319,  0.1933,
        -5.8155,  2.5826,  3.2265, -2.8581, -1.2861, -3.6060, -1.2443, -3.0730,
         3.0128,  0.3497, -5.4565, -0.1963], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8218, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  5.2414, -11.5803,  -1.2530,  -3.6981,  -2.6299,  -1.3951,  -2.3207,
         -5.0564,  -4.4310,  -7.0762, -20.0076,  -2.1231,  -4.9027,  -1.7904,
         -4.4622,  -2.4995,   3.4262,  -3.2927,   1.9191,  -2.0263],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4979, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4866,  -0.1140,  -1.1078,  -0.4737,  -0.5357,   4.9681,  -1.6515,
         -3.7562, -15.8321,  -4.9080,  -4.5514,   0.0673,  -3.1744,   4.9595,
        -14.3761,  -0.4499,  -2.2314,  -3.7659,  -1.5572,   2.2790],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.4349, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2374,  -1.0924, -10.0928,  -3.1347,  -4.8211,  -1.9167,   0.1800,
          4.0935,  -9.7252,  -1.4201,  -2.1617,  -6.7031,  -0.6851,   2.2561,
         -2.6511,   0.6948,  -2.7301,  -1.5760,  -4.7382,   1.8813],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3790, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-18.1797,  -5.3265,  -2.4420,  -4.9654,  -2.8728,  -6.3013,  -4.8185,
         -2.8298,  -5.9795,  -1.4455,   1.0727,   3.9153,  -4.4508,  -2.4342,
         -2.6833,  -0.7357,  -7.8954,  -1.7971,  -1.6725,  -3.5669],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7704, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3209, -1.5920, -1.2923, -3.6403, -0.7744,  1.2258, -2.1343, -1.0350,
        -7.2080, -6.7653, -3.5329, -4.3223, -1.4917,  1.7337,  0.2332, -4.6469,
         0.8770, -4.0880, -5.1587,  0.4971], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3218, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6598, -2.3913,  2.0495,  2.8078, -2.7283,  0.5696, -1.3947, -2.6079,
        -6.4907,  0.7106, -0.3505, -2.4429,  0.7059, -1.5203, -1.5739, -3.9513,
         4.7027, -2.5510, -1.9863, -4.4852], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2294, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2534, -2.1058, -4.6173,  0.1593,  3.9880, -4.1413,  1.3074, -3.5849,
        -7.6849,  0.5229, -0.7157, -3.4128, -0.5470, -3.5510, -0.7722, -3.7216,
         0.2141,  2.3003, -2.7778,  0.9865], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5704, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5325, -0.9287, -0.8731, -2.7197,  4.1520, -2.9488, -0.6479, -3.5955,
        -0.0904, -4.6210,  2.7720, -1.6576, -2.7879, -0.2149, -1.5045,  0.3370,
        -6.2855,  0.5006,  1.7854, -2.7928], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1827, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.6421,  -5.4983,  -9.6811,  -1.7983, -13.7044,  -8.6282, -34.9453,
         -5.7114,  -2.6669,  -4.8409, -10.9205,  -3.2179,  -6.9513, -29.9811,
        -14.5851, -12.7174,  -5.3961,  -6.2063,  -7.7004, -16.5324],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-10.0521, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0069,  1.3992,  2.9674, -1.9264, -1.3242, -4.2920, -1.6051, -3.8792,
        -2.5071, -0.7060, -3.5700, -5.6251, -2.8222, -1.1987, -1.1442,  4.1554,
        -4.6669, -5.2209, -6.2237, -3.8102], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3503, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.4148,  -2.1994,  -4.0381,   0.0458,  -3.4935,  -0.5360,   3.0755,
         -5.1244,  -6.1957,  -4.5569,  -0.4580,  -5.0767,   0.4156,   3.1167,
         -2.1291,  -0.7344,  -2.5923,  -1.4837, -10.0484,   2.0873],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2170, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.5430,   3.2683,  -1.7456,  -0.0945, -17.7103,  -4.2122,  -7.2057,
         -2.9301,   1.9797,  -1.0374,  -4.3610,  -0.7445,  -1.9476,  -1.0204,
          2.2844,   3.3336,  -3.5658,  -1.7352, -21.0680,  -2.6267],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9798, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.9214, -1.1889, -2.0156, -1.3029, -3.7066,  2.0222,  2.3142, -2.2883,
        -0.0597, -1.5524, -2.3633, -1.6885,  4.5648, -4.2105, -2.7278, -2.6208,
        -4.1882, -3.5834,  0.1944,  3.1436], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5589, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.3837,  4.3120, -4.6753, -0.3255, -3.0522, -0.3139, -3.4106,  2.5086,
         2.9942, -2.0084,  0.8979, -1.8578, -3.1719, -4.2417,  0.9642,  1.9796,
        -3.2110, -0.7181, -1.7446, -1.5221], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7606, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.6836,   1.9111,  -5.8637,  -0.6341,  -2.7959,  -3.0220,  -0.1074,
          3.1833,  -4.4315,  -0.3214, -10.6066,  -3.1023,  -0.7139,  -0.8577,
         -5.8650,  -1.8766,  -3.1150,  -3.3660,  -1.9856,   3.6061],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.0824, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.4042,  -2.5686,   1.5670,   1.6055,  -4.2221,  -0.7545,  -0.6522,
         -3.1961,   1.1682,   3.2850,  -2.0167,   0.4488, -14.3285,  -6.3409,
         -2.8498,  -5.4701,  -0.0750,  -0.6799,   2.9754,  -3.1244],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.8317, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.1104, -3.3575, -0.6561, -1.7124, -4.2686, -0.3158,  2.9798, -4.2213,
         0.1930, -3.0017, -0.7204, -4.8394,  1.9870,  0.6243, -9.0771, -0.5729,
        -3.1872, -1.5610, -3.8696,  2.0788], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5194, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.2620,  -3.3867,  -3.4318,   0.3445,   3.3030,  -1.6302,  -1.4305,
        -13.6238,  -2.9214,  -5.9021,  -4.5857,  -0.4473,   3.9750,  -6.1072,
         -0.5423,  -2.2386,  -1.5144,  -2.9551,   0.4933,   3.1296],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.9605, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.8464,  -0.4091,  -1.7100,   3.5882,  -2.5297,   0.7078,  -4.6280,
         -0.7782,  -4.7307,  -0.2027,   2.1940,  -1.2212,  -0.0765,  -1.4170,
         -3.7703,   0.9964,   3.2013,  -2.0798,   0.0711, -18.3462],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.6493, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4045, -1.1301, -2.9200, -0.6046, -4.3345, -0.4817,  1.8926, -2.4055,
        -0.4992, -0.8335, -0.2519,  1.3918,  4.4774, -2.5337, -1.2867, -4.7082,
        -0.5604, -4.7676,  2.2568,  2.3211], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8691, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.3281, -2.1133,  0.1448, -3.5838,  4.2470, -2.2326, -1.1394, -2.8084,
        -4.2427,  0.6744,  1.6776, -3.1886, -1.5452, -3.3066, -1.7270, -4.7151,
        -7.9018,  1.1732, -2.1858,  0.1483], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6477, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.3657, -5.2698,  0.0939, -3.8175, -2.0327, -1.1727,  2.9703, -2.0049,
        -1.1790, -2.4346, -2.9895, -4.3719, -2.4129,  1.1543, -2.7776, -2.1568,
        -4.0544, -0.3865, -4.6471,  0.4288], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7847, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.6324,  -2.6513,  -3.0478,   1.2254,   3.5121,  -3.3060,  -0.4264,
         -1.1405,  -2.4333,   0.6349,   3.4086,  -4.2937,  -1.5197, -26.0823,
        -10.3180,  -7.6109,  -5.3084,  -1.7818,  -2.5064,   4.9222],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9678, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.1798, -1.8778,  3.8865, -4.5014, -2.2213, -3.7778, -3.4445, -3.2377,
        -2.1931,  0.4844, -6.9905, -0.5329, -1.9683, -3.3654,  1.9232,  3.5592,
        -2.9810, -0.6981, -2.4630, -0.2701], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5425, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.4005,   1.6858,   3.6197,  -1.9128,  -1.4244, -14.2413,  -3.6354,
         -8.6912,  -2.6987,  -5.5071,   5.3290,  -6.1126,  -2.5084,  -2.7141,
         -3.0637,  -5.6227,  -0.1676,   2.9170,  -1.7594,  -1.0062],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5457, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3987, -1.3093, -6.6772,  2.3427,  0.6851, -3.1061, -0.0255, -1.1353,
        -3.5263, -1.1433,  0.2138, -3.5429,  0.5339, -3.0433, -3.0262, -3.6871,
         2.7856,  2.1299, -1.9895, -2.5567], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3738, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7550, -1.3783, -0.4530, -1.1214, -0.5999,  2.5831, -3.8661, -0.8449,
        -2.3032,  0.6916, -6.3572,  2.3680,  2.8084, -1.8992, -0.5888, -3.8952,
        -1.0802, -2.6835,  1.7930,  1.8816], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8350, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.1891,  -2.9232,  -1.2602,  -5.0853,   1.6351,   2.9341,  -3.0251,
         -0.4742,  -4.6956,  -4.6409, -12.8458,  -4.1950,  -4.8096,  -3.4068,
         -2.6429,  -0.6747,  -3.6240,  -1.6286,   1.5850,  -2.1485],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5369, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.4876, -5.8070, -1.6645, -3.4473, -0.9732, -3.0619,  1.7763,  3.3014,
        -3.0018, -0.0309, -2.8981, -2.0212, -5.0351,  3.9141, -3.2162, -3.9715,
        -3.0410, -0.5792, -4.5100,  2.0591], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5360, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1962, -2.4037, -0.3107,  4.4948, -5.4875, -2.8258, -3.7360, -0.2652,
        -7.4362,  2.1960,  2.3556, -4.6241, -1.5260, -3.7999, -1.3955, -3.5463,
         1.7790,  2.9013, -3.3215, -2.1530], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5150, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.5092,  -1.6137,  -6.3982,   2.3698,  -4.2769,   0.5148,  -1.0515,
         -0.1341,  -5.1337,   2.9284,  -2.3301,  -4.1813,  -8.3506,  -6.2557,
         -5.4105,  -2.5627,  -0.3872,  -4.3187,  -5.1759, -11.1991],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2238, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.1281, -4.0452,  0.3135, -1.4079, -2.3766,  2.9678,  2.1690, -1.9197,
         0.3170, -1.0445, -1.6615,  0.7506,  2.6907, -3.6581, -0.4281, -2.6238,
        -1.2439, -4.9249, -0.7054,  1.2303], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7736, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.6471,  -0.0831,  -1.6433,  -1.8119,  -0.0562,   3.9658,  -4.4725,
         -2.5540, -11.7027,  -9.0824,  -3.5156,  -5.1416,  -1.0143, -12.3776,
         -4.8613,   0.3635,  -2.8589,  -0.5387,  -3.3658,  -0.9417],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3170, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.0624,  -5.0855,  -0.7738,   0.5841,  -3.8317,  -1.1643,  -1.6774,
        -28.2850,  -5.3994,  -0.7455,  -6.5655,  -3.6180,  -4.6254,  -5.1222,
         -7.9465, -10.1316,  -5.3106,  -3.9821,  -3.7976,  -2.0286],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2285, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.3144,   1.1933,   2.7187,  -1.9620,   0.8478, -15.0952,  -5.7074,
         -3.8024,  -5.6328,  -0.1667,  -8.2375,   3.4254,  -4.5353,  -2.0738,
         -3.5256,  -5.0725,  -6.8678,   0.2935,  -8.5356,  -5.6102],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6330, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.1412,  -0.3857,   2.6757,  -1.3939,  -1.1719,  -1.5870,  -1.8661,
          0.2776,   2.6550,  -2.9346,  -3.1998, -16.8287,  -8.1298,  -3.4161,
         -5.8417,  -0.8521, -10.5821,  -0.5797,   2.4879,  -2.5044],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6518, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2569, -2.7696, -1.1962,  2.1934, -4.6219,  0.0272, -3.3774, -1.8080,
        -4.7495,  0.9995,  1.1350, -3.0759,  0.1438, -0.8803, -1.4831, -0.7389,
         3.9847, -2.4708, -1.0288, -2.1768], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2075, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1265, -2.7444, -5.1959,  0.0445, -1.6666, -3.9857, -2.4248, -1.4191,
        -5.2205,  2.0437,  2.7778, -3.2410, -0.0440, -0.7905, -3.4540,  2.2347,
         1.9178, -2.9017,  0.0217, -2.2402], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3707, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9001, -0.2440, -1.4975, -2.4903, -0.4344,  4.8887, -3.6693, -2.2768,
        -3.0681, -2.2672, -4.9552, -0.8154,  0.1400, -2.7350, -2.3381, -1.3321,
        -2.2032, -4.1736,  4.9751, -1.8650], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4131, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.1254,  0.4631, -2.2536, -0.7554, -3.8654,  1.6296,  2.2032, -4.1002,
        -1.4061, -3.4864, -2.2728, -3.7455,  0.8731, -0.2372, -1.5727,  1.2956,
        -1.8219, -4.1566,  0.5227, -0.1577], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5985, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6373, -1.0198, -2.1030, -0.0397, -4.4451,  2.4800,  2.2079, -2.8691,
        -1.2539, -1.8007, -0.0264, -1.8482,  5.0033, -2.3778, -0.3131, -3.5991,
         0.2918, -3.9476, -5.1761, -1.7360], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4105, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.9228, -0.7771, -2.3220,  2.2034,  1.0761, -5.4672, -0.0833, -1.9060,
        -3.3042,  0.5671,  3.0893, -1.8664, -4.0310, -3.7586, -3.4905, -6.0285,
        -0.4936, -2.0414, -6.6449, -1.6936], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8025, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4305, -1.2704,  0.6954, -6.0848, -4.0436, -2.4742, -4.5356, -0.5416,
         1.8223, -2.9262, -2.1204, -1.8347, -4.4222,  1.5888,  2.6164, -5.7375,
        -2.0151, -3.2667, -5.1513,  0.7742], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1679, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5456, -8.3962, -6.9417, -2.3025, -8.8942,  0.0939, -4.9410, -7.3949,
        -1.8271, -2.5768, -4.3778, -2.4791,  3.5571, -3.2520, -2.4979, -1.9759,
        -5.9951,  0.1704,  2.8455, -3.4477], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3589, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 4.0856, -6.3879, -0.7509, -2.6706, -7.2630,  1.1306,  2.1860, -4.7801,
        -1.6212, -5.8694, -6.4626, -4.4776, -4.0751, -0.6276,  0.5584,  3.0766,
        -3.4485, -1.7278, -1.1548, -5.2413], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2761, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.4192, -1.7685, -0.9442, -2.5074, -1.3059, -0.1795,  3.9001, -2.3953,
        -1.2166, -2.1867, -0.6893, -2.5945,  3.5677, -2.5866, -2.0442, -3.9004,
        -4.7279, -3.5541,  1.7348, -5.4282], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3204, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5483, -0.9104, -3.0221,  1.2791,  1.6655, -3.2954, -1.0982, -2.8311,
         0.2987, -1.9571,  0.2550, -2.2917, -1.1040, -3.4005, -0.4436, -3.8863,
         2.1934, -1.2621, -4.8731,  1.1055], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2563, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.6125,  -2.2050,  -0.0936,  -3.4295,  -1.3188,  -0.6298,  -0.9404,
         -2.5589,   2.3630,  -3.1299,   1.5539, -11.3259,  -5.3082,  -3.1833,
         -4.4717,  -1.3488,  -1.5109,  -0.7628,  -5.0219,  -5.3444],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.4640, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6190,  0.2284,  3.1606, -3.4399, -3.2911, -2.9853, -6.6592,  0.5559,
         2.2007, -2.4469,  0.1573, -7.2259, -5.7440, -3.5596, -5.1218, -1.3521,
        -0.7695,  4.3070, -4.1482, -0.7076], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9730, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2341, -1.5098, -3.4610,  2.0322,  2.1209, -2.0301,  1.4639, -2.0371,
         0.3458, -9.0533,  2.8685,  1.4779, -3.7518, -0.5575, -1.4465, -8.7011,
        -4.2753,  1.4403,  1.2646, -2.7456], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4894, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 4.7808, -3.7599, -1.6755, -4.3419, -3.8157, -4.6655,  0.6809,  0.9524,
        -3.5119,  0.6580, -3.3653, -0.4717,  0.2399,  2.9330, -2.5853,  0.0398,
        -3.3627, -2.8240, -6.2135, -1.5521], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5930, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0404,  -2.0884,   1.1200,   2.5565,  -2.3618,   1.1432,  -3.6491,
         -0.7863,  -3.9288,   2.3827,   0.8346,  -2.1604,   0.5300,  -2.4110,
         -4.4053,  -0.9148,   3.2999,  -2.5107,  -7.6592, -36.3731],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0711, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7847, -4.6346,  0.1488,  2.8404, -4.8112, -1.7561, -1.6018, -0.3871,
         0.3188,  2.5025, -1.7765, -5.4140, -2.3247, -1.0875, -4.6083,  2.4720,
         1.4776, -1.9764,  0.0081, -3.3878], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3891, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-21.7373,  -3.1162, -10.3107,  -0.3329,  -6.9993,   1.1511, -17.8091,
         -3.6889, -17.4440,  -8.6457,  -6.3235, -20.0567,  -3.5636, -17.3880,
         -3.5281,  -7.3654,  -0.1303,  -2.8224,  -3.1680,  -2.5373],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.7908, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.3067,   0.8930,  -0.7890,  -0.6180,   1.0632,   3.0110,  -3.1257,
          0.1231,  -0.5338,  -1.1371,  -3.8773,   4.2581,  -1.6049,   1.4135,
         -2.8452,  -2.8734,  -4.1543,  -3.2616,  -1.6732, -14.2064],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.7122, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0372,  -5.4918,  -2.5706,  -1.6525,   3.8521, -17.1129,  -3.4937,
         -4.9127, -10.2400,  -2.9438,   0.3232,  -6.8669,  -3.6725,  -4.5294,
         -4.9824,  -6.0147,  -2.9530,  -6.0529,  -0.1794,  -1.8595],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2695, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.1374,  -0.8475,  -4.3144,   1.1933,   2.7187,  -1.9620,   0.8478,
        -15.0952,  -5.7074,  -3.8024,  -5.6328,  -0.1667,  -8.2375,   3.4254,
         -4.5353,  -2.0738,  -3.5256,  -5.0725,  -6.8678,   0.2935],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0250, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.3228,  -4.3950,  -7.3436,  -2.5717,  -1.8744, -12.4078,   0.5324,
         -4.4529,  -4.5922,  -7.0009, -38.8195,  -7.1098,  -7.9458,  -6.3155,
         -4.1797,  -0.5843,  -5.4769,  -0.7386,   2.2604,  -7.4138],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0376, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.0222,   4.9349, -12.0532,  -2.8283,  -3.0778, -10.4816,  -7.2251,
          0.5991,  -3.3114,   1.7050,  -2.2825,  -1.2277,  -1.8768,   2.1935,
         -3.2138,  -2.5894,  -0.8431,  -5.1713,   0.9479,   2.8612],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1481, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4270,  3.4548, -2.5306,  0.2886, -3.9161, -0.3498, -7.4413,  2.5687,
         1.3909, -2.6304, -2.4841, -4.0357, -1.3309, -2.9793,  1.4484,  2.9227,
        -2.7608, -0.7897, -1.7796, -1.5974], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2989, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1755, -4.8065, -2.0297, -4.6478, -2.3639, -4.5729, -2.2653, -2.5382,
         1.3794, -4.2930, -1.0668, -3.5625, -4.7622, -1.5680,  3.1123, -3.6736,
        -2.3143, -2.4208, -5.2671, -0.6888], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6263, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5339,  0.7719, -1.3843, -3.6580, -0.4685, -3.4999, -2.5627, -1.6944,
         3.4344, -4.0684, -0.1395, -0.2763, -0.6150, -0.8607,  4.2067, -2.2404,
         0.3063, -2.3131, -0.7621, -5.3431], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2850, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.6974,   4.5387,  -2.5887,  -1.5446,  -1.8224,  -6.6650,  -0.6326,
          1.7008,  -1.8087,  -4.8387, -30.6598,  -4.9068,  -3.9558,  -5.2267,
         -0.5941,  -5.7802,   2.2114,  -3.8958,  -1.6255,  -1.9930],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5893, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1771e-01, -3.9990e+00, -2.1659e+00, -7.1460e+00, -2.0169e+00,
        -3.7215e+00, -8.8603e+00, -3.0999e+00,  4.5593e-03, -4.8824e+00,
        -2.1655e+00, -1.5888e+00, -9.1155e-01, -4.3380e+00,  1.6322e+00,
        -4.1961e-01, -2.5311e+00,  5.7166e-01, -4.1358e+00, -1.0596e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5676, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5107, -1.5520, -3.2492, -1.8978, -4.7159,  1.2835,  0.7034, -1.7853,
        -0.1614, -1.9181, -2.3433,  1.1998,  3.0603, -2.5097, -1.2168, -8.4574,
        -4.7648, -3.5261, -3.5441, -1.4768], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0191, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3639, -1.4403, -0.1271,  3.5729, -3.6620,  0.9115, -4.1020, -1.2379,
        -5.0337,  1.1996,  1.2260, -7.0620,  0.0783, -0.7637, -1.3130,  2.5073,
         4.0230, -3.2760, -2.2417, -4.8256], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1965, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.1473,  -3.4589,  -0.4380,  -1.9114,  -6.9282,   1.1360,   0.9315,
         -3.4667,   0.4231, -11.2589,  -6.0992,  -3.9919,  -4.6013,  -4.5152,
         -3.8559,  -0.9493,   3.2790,  -2.5091,  -0.5443,  -2.4961],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5554, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2857, -1.9902, -2.7014, -0.8455, -2.7508, -0.8439,  2.4723, -3.2448,
         0.4695, -1.8647, -1.5466,  0.8777,  1.9356, -4.3873, -1.3243, -0.5543,
        -7.6291, -0.2052,  2.4971, -9.7949], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6858, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.5527,  -6.8907,  -6.0998,  -6.3365,  -2.0090,   1.0767,   0.7833,
         -3.2814,  -2.0859,  -2.8224,  -4.3540,   0.5054,   1.3989,  -4.2403,
         -1.9766, -15.3609,  -5.1987,  -5.6760,  -4.6842,  -4.2516],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9028, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  4.1511, -12.9768,  -3.1559,  -4.0465,  -2.0218,  -3.2477,  -2.1942,
         -6.5987,  -2.3610, -22.8996,  -2.7104,  -7.4174,  -0.9185,  -0.9799,
          2.5954,  -9.7589,  -2.8562,  -1.9996,  -0.3683,  -6.3139],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3039, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.3546, -2.5032, -5.9370, -0.0136,  2.5724, -1.9307,  0.2313, -1.7353,
        -4.0558,  2.6415,  2.2888, -3.5360,  0.6884, -1.1897, -1.4927,  2.1106,
         3.2018, -2.6379,  0.8658, -1.7787], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6282, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.1685,  -2.9893,  -2.8774,   1.2346,   2.9786,  -4.0100,   0.2194,
         -3.8067,  -4.0086,  -4.0842,   2.3263,  -3.5025,  -5.8627,  -0.3340,
        -10.0374,   0.6382, -27.7273,  -6.0544,  -8.5647,  -2.1395],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9885, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.6039,  2.3660, -2.8936, -3.5647, -4.4262, -0.5629, -2.8761, -2.7229,
         1.8140, -0.6862,  0.2709, -0.7540, -2.9236,  1.4343,  3.7208, -2.3235,
         0.5220, -1.4382, -2.4031,  2.7678], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7038, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4106,  -0.1000,  -2.0701,  -4.0123,  -3.4269,   0.5939, -16.6591,
         -0.4351,   0.6739,  -3.7377,  -0.6887,  -3.3698,  -1.1590,   0.5216,
          4.2601,  -2.3585,  -1.1087,  -3.2991,  -3.6448,   0.0738],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1178, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2687, -1.5491,  3.2670, -3.1296, -1.7082, -3.5250, -1.8806, -3.8210,
        -0.4180, -5.5095, -1.3583, -3.3943, -0.9460, -5.3814,  0.3667,  2.8410,
        -2.1556, -0.1463, -2.0602, -1.9364], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8357, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.2470, -2.1470, -0.7016, -5.8854,  3.5843, -5.3255, -4.2451,  1.4700,
        -4.2810, -2.2667, -3.5643, -4.9714, -0.3467, -2.8796,  0.4602, -1.6481,
        -2.2769,  1.5703,  4.0081, -1.6620], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4931, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5119, -0.4053, -4.0947,  1.3092,  3.1770, -3.5718, -1.4960, -3.2624,
        -2.2525, -4.5326, -0.0706,  0.8572, -2.4561, -2.4159, -0.6375, -1.4879,
        -1.2191,  5.1987, -2.5211, -2.7546], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2574, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6679,  2.3005,  0.2371, -2.2607, -0.3681, -2.0989, -1.3204,  0.8157,
         3.2787, -3.6552, -5.7007, -4.0219, -1.9352, -5.2614,  0.2487,  2.8550,
        -4.1077, -0.6097, -1.1293, -0.8756], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3638, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.8612,  -2.5901,  -4.1696,  -3.4651,  -0.8440, -13.6543,  -1.6688,
         -4.1018,  -4.1730,   0.7890,   1.1991,  -6.8349,  -3.2823, -10.1232,
         -4.7788,  -5.5507,  -6.1409,   1.2296,  -6.1472, -10.9108],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4039, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.2718,  -3.6460, -21.4232,  -2.0566,  -6.1786,  -0.6536,  -5.4916,
          2.8912, -10.1237,  -3.8106,  -0.8663, -12.2332,  -8.3964,  -2.8482,
         -2.6883,  -0.8433,  -2.5060,  -0.1165,  -2.6319,   3.0098],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3442, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3108,  -2.8063, -22.3172,  -5.5427, -18.8926,  -2.9744, -13.0687,
         -7.4007,  -8.2236,  -4.6244,  -4.3463,  -2.1659,  -1.2426,   2.8435,
         -8.8510,  -2.2761,  -2.8393,  -2.8884,  -0.7568,   1.1244],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4780, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.4063, -3.7928, -0.4954, -1.5906,  0.9987, -5.9680, -4.3186, -1.1806,
        -0.7405, -5.1573,  2.8304,  0.4595, -4.1786, -2.7895, -4.6805, -1.6231,
        -3.2620,  2.9106, -1.2191, -0.5813], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6986, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.5483,  -9.8310,  -7.2812,  -6.0863,  -2.6218,  -3.3455, -11.5462,
          0.4490,   5.5370,  -2.9508,  -2.3271,  -3.3923,  -0.4286,  -4.9183,
         -2.0637,   1.3248,  -3.0563,  -2.2655,  -1.0661,  -4.1987],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1808, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.1996,   2.5756,  -2.6256,   0.2192,  -1.1561,  -5.6131,   1.3969,
          2.2729,  -7.1405,  -3.2220, -11.2747,  -8.4432,  -3.4226,  -4.6842,
         -0.2571,   0.5306,   3.3528,  -3.9573,   0.0191,  -0.9593],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.0094, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.1760, -5.0065, -2.5182, -2.6981, -4.3793, -2.4662,  1.4203, -3.4597,
        -1.0155, -4.6872, -3.1409,  0.3790,  0.0080, -3.8792, -0.9556, -3.7666,
        -4.7085, -1.0056,  1.3839, -3.9144], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1617, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5230, -0.3284, -5.8802, -4.6062, -4.8887, -5.1004, -5.3680,  0.4283,
        -5.7083, -3.0822, -5.9146, -9.4936, -6.3180, -5.4926, -6.4467, -1.3594,
        -1.5165,  0.7673, -6.1925, -4.1751], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.2099, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.8170, -15.1290,  -5.8347,  -5.1955, -12.3065,  -7.5245,  -0.6188,
         -9.6287,   1.0516,  -7.0314,  -1.9085,  -1.8775,  -1.7975,   0.4440,
          1.7590,  -2.6260,   0.5028,  -9.2520,  -3.0707,  -5.2370],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4549, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0013, -3.6763,  1.4229,  3.3565, -3.7993, -0.4656, -4.8922, -5.5390,
        -0.3974,  1.1700, -4.3686, -2.4738, -9.9288, -6.7067, -3.8539, -5.5877,
        -1.5126, -0.2392,  3.5790, -4.2551], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5084, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8094, -3.3352, -4.3294, -5.1441, -6.7808, -7.0755, -4.7491, -4.7550,
        -4.5953, -2.7472, -3.4252, -7.0906, -6.7155, -3.9915, -3.3395, -3.2697,
        -5.4371, -8.0066, -4.8000, -8.9310], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1164, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.6664,   1.5945,  -5.1806,  -5.1064, -12.2531,  -7.3388,  -5.2346,
         -4.3981,  -1.9805,  -2.9679,   2.6024,  -3.4643,  -1.5242,  -2.9325,
         -3.3193,  -1.1517,   1.1697,  -2.9596,  -1.3070,  -2.6366],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0028, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.5736, -4.5780, -1.8005, -4.9574, -2.0317, -7.7889, -1.3401, -1.6150,
        -5.2537, -4.4918, -0.4362, -0.9540,  1.6176, -4.0278, -2.5159,  0.2100,
        -3.9674, -1.0229, -5.5373,  1.2962], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3311, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2797e+00,  1.2591e+00,  3.3880e+00, -1.7776e+00,  1.5209e-02,
        -2.8273e+01, -4.7919e+00, -3.3598e+00, -5.4123e+00, -5.5029e-01,
        -3.3381e+00,  4.1860e+00, -7.8276e+00,  2.7977e-01, -2.0449e+00,
        -4.7866e+00,  1.4850e+00,  7.3106e-01, -3.3636e+00, -1.4762e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0469, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.3126, -5.7443, -6.1078, -5.5091, -7.0186, -3.1898, -4.7212, -1.9222,
        -6.3529, -6.5725, -5.4937, -5.5971, -4.1288, -5.0420, -2.7628,  0.5408,
        -6.5906, -3.3215, -5.8803, -5.1068], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9417, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.4048,  -3.7144,   1.2495,   1.5675,  -3.3288,  -0.1846,  -2.5515,
         -0.0743,  -4.3636,   4.4056,  -4.5149,  -1.8322,  -1.4796, -21.2975,
          0.3614,  -1.2171,  -2.3885,  -3.2774, -17.7586,  -3.8929],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2348, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.7493,  -2.4578,  -5.3423,   1.4737,   3.1572,  -2.2166,  -1.2567,
        -10.1361,  -8.1304,  -5.9385,  -6.2200,   0.0710,  -1.0402,   4.0593,
         -6.1839,  -2.3272,  -2.2624,  -0.7297, -10.0855,   1.2823],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0517, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3608, -8.0356, -7.7539, -5.2121, -5.7966, -4.5599, -4.9092, -6.1066,
        -5.7124, -5.5209, -5.8721, -6.2734, -4.4417, -4.1973, -4.2548, -3.5788,
        -3.7649, -5.8921, -3.4182, -4.9426], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2802, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7230, -0.5478,  1.7407, -2.4984, -1.0552, -2.1201, -1.7435, -3.6460,
         2.8629,  2.6245, -3.0648, -1.6408, -3.5462, -0.4628, -4.0688,  2.5159,
         0.7412, -4.2368, -1.3344, -2.5877], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3396, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3667, -4.4660,  1.6662,  3.7564, -2.8660,  0.9054, -0.4527, -4.9713,
         0.2809,  0.8221, -3.8111, -1.4131, -1.7403, -1.0956,  2.5151,  4.3131,
        -3.2937, -0.0844, -4.2178, -2.8134], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9167, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5608, -0.3612, -2.8275, -0.2656, -5.1457,  3.1329, -2.8582, -3.5584,
        -1.1886, -1.7267, -2.8968,  0.9533,  2.5084, -3.0441, -1.9504, -3.7839,
        -1.0379, -4.6676, -1.5072,  1.5331], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5626, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1556, -5.7511, -4.6382, -4.7273, -2.8025, -4.6510, -1.4393, -0.4371,
         1.7541, -4.3183, -2.6320, -1.4263,  0.4731, -2.6828,  2.3147, -2.5029,
         0.7483, -4.3624, -1.8477, -2.1442], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4114, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.6853, -8.4547, -5.0769, -1.8977, -1.1058,  4.7242, -3.7665, -6.0077,
        -2.7533, -3.2701, -5.0487, -0.0648,  0.1796, -3.1471, -3.9240, -2.7608,
        -1.4457,  1.0817,  3.7275, -2.6021], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5649, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.3421,  -1.3386,  -1.6803,   2.3111,  -1.4917,  -4.9389,  -4.1710,
        -11.0550,  -3.6444,  -8.5962,  -1.1257,  -3.1928,   4.3825, -20.5274,
         -0.6969,  -1.3994,  -5.7208,   0.2793,   1.4517,  -1.6905],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4094, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.9086,  -6.9424,  -6.9934,  -0.5665, -11.9070,   3.1058,  -2.9363,
         -1.3876,  -3.3466,  -1.3462,  -3.5333,   0.9396,   3.3082,  -1.3674,
          0.5991,  -1.7369,  -3.3069,   0.9595,   2.3408,  -5.6599],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3843, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.7644,  -2.0513,   1.0492,  -1.4307,   0.5533, -15.7189,  -1.5294,
          0.6012,  -4.0458,  -0.6474,  -1.5915,  -4.1412,   1.2974,   0.4204,
         -4.6515,  -4.8939,  -8.9248,  -6.6639,  -3.1484,  -4.3742],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9564, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7569,  -4.2932,  -0.1160,  -5.5764,   3.3268,  -7.5373,  -0.9687,
         -0.5358,  -7.0634,  -0.6333,   2.0363,  -2.4273,  -1.6800, -13.5056,
         -4.2794,  -4.5861,  -7.3179,  -1.9760,  -6.2477,   0.1767],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2981, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.7077,  -4.4933,   0.4742, -11.5829,  -2.2604,  -8.0133,  -1.2560,
          0.6629,   2.6172,  -4.0704,   0.4783,  -1.7060,  -0.7781,  -5.8277,
          2.4529,   2.9885,  -2.2554,   1.1959,  -1.9616,   0.0117],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.5808, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-14.1291,  -3.5180,  -9.0668,  -1.0572,  -0.3703,   2.7304, -17.4838,
         -3.3612,  -4.2847, -25.4412,  -5.7007, -10.6413, -59.4838,  -2.4384,
          1.4595,  -3.5883,  -2.4406, -15.0016,  -4.9655,  -3.2536],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-9.1018, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.5189, -12.0750,  -4.7908,  -4.9812,  -0.7456,  -4.2296,   3.5679,
         -3.9558,  -3.8073,  -8.2719,  -8.4627,  -3.6673,  -1.7997,   2.2988,
         -1.9096,  -1.1759,  -1.0970,  -3.6550,   1.3729,   2.8976],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9003, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.1667, -4.3769,  1.0043, -3.9669,  0.3796,  0.9623,  2.7383, -2.8696,
        -1.1103, -1.5940, -1.4204, -5.6488,  2.1506, -0.9800, -3.0570,  0.5846,
        -1.3265, -4.0793,  1.8239, -0.2422], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9931, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0099,  0.0710, -9.7449, -2.8407, -5.5709, -4.6386, -5.1137,  1.7025,
         3.2267, -3.9964, -1.2179, -4.3115, -0.4705, -3.0559,  2.2733,  2.6214,
        -3.7996, -1.5124, -3.2716, -1.1002], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1880, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5671, -3.1578, -3.9289, -0.9306, -4.0043, -7.4893, -2.8473, -2.1928,
        -1.1214, -1.1897, -1.5878, -5.0638,  1.4539,  0.1247, -3.9033, -3.7529,
        -1.6482, -1.0050, -1.4438,  1.0077], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2624, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.3942,  -3.3422,   3.8938, -16.9965,  -2.8662,  -3.2813,  -5.3642,
         -2.3889,   0.4682,  -2.5785,  -1.7359, -13.6112,  -6.4203,  -4.5136,
         -4.4914,  -0.8060,   0.7496,   4.8964,  -5.7884,  -0.8765],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2724, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.5259, -2.2584,  0.7801, -3.4325, -2.3045,  0.7402,  4.2721, -1.1691,
        -0.8238, -3.7971,  0.2026, -5.4300,  1.3532,  1.9195, -3.4961, -0.8974,
        -2.3478, -0.3548, -4.3083,  3.7070], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8060, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.5575, -2.2732, -0.5408, -4.2112, -0.7474, -2.4186,  2.1831, -2.1698,
        -2.1972, -1.2074, -3.5076, -4.1059,  1.1445,  2.4074, -4.8520, -1.9939,
        -3.4226, -8.3591,  1.2651,  3.1686], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4640, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7434, -0.6549, -3.7392,  0.2557, -4.5952, -0.6259, -1.2592, -2.0407,
         0.4904,  2.0312, -2.6930, -0.2103, -2.7015, -1.1285, -6.3863, -1.8470,
         0.5088, -4.0313, -4.3154, -1.5284], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9607, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3874,   0.9600,  -1.8239,  -0.8627,  -0.3581,   3.6458,  -1.8721,
         -0.2415,  -1.3526,  -1.2341,  -9.4178,   0.5604,   0.9228,  -2.0178,
         -3.6543,  -4.1835,   0.4015, -70.5942,  -1.9456,  -1.3732],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8414, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5969, -0.6983, -2.8419, -0.2628, -4.8747,  0.8168,  0.3034, -3.7978,
         0.4865, -3.5575, -3.9041,  1.6581,  2.3062, -6.8794, -3.5384, -3.9240,
        -2.7697, -6.5179, -3.8170,  0.4992], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2455, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.2191,  -2.1423,  -1.7820,  -3.0895,   1.7706,   2.7390,  -1.9520,
         -1.2985,  -3.8207,  -1.7517,  -4.5393, -11.1767,  -2.7497,  -2.1050,
         -3.1415,   0.8387,  -6.7467,   0.7838,   2.6423,  -5.0068],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2374, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 4.9632, -1.7586,  0.5139, -2.1052, -4.9666,  0.4071,  3.2747, -3.1875,
        -1.0971, -2.1937, -4.9722, -4.2715,  1.1003,  1.9528, -2.0850, -1.7973,
        -4.7839, -1.2242, -5.3771, -0.9237], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4266, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2863e+00, -2.0653e+00, -3.8956e+00,  2.3125e-01,  2.0354e+00,
        -2.5620e+00,  1.4708e+00, -1.5070e+00, -1.2644e+00, -5.2625e+00,
         3.0389e+00,  3.5275e-03, -5.2542e+00, -1.2945e+00, -2.3576e+00,
        -2.5209e+00,  6.7503e-01,  3.7228e+00, -2.1420e+00,  8.1680e-01],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-0.9709, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2415, -3.0436,  2.8912,  0.9948, -3.8632, -1.4050, -0.3092, -3.5820,
         1.2133,  4.0563, -3.1948,  0.7570, -2.0566, -1.0689, -6.7013,  1.5942,
         0.6714, -2.5470, -1.2979, -2.9685], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0551, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2281, -1.6948,  0.6859,  2.7780, -5.6468, -1.7110, -1.1013, -1.3761,
        -3.4791,  2.0730,  1.3955, -2.4069, -1.4093, -3.8593, -1.6753, -3.2875,
         2.3417,  2.8931, -3.5001,  0.2201], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2494, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-24.2389,  -1.8856,  -2.9718,  -7.5388,  -0.3495,   2.3761,  -1.8662,
         -2.7400,  -4.1469,  -1.8085,  -8.9082,  -2.4971,   0.2815,  -7.6491,
         -2.4125,  -2.5452, -15.1806,  -3.6037,  -4.8406,  -4.8201],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8673, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.3754, -0.3723, -3.7406,  1.2696, -1.6150, -0.5446,  2.0451, -1.3992,
        -2.8071, -0.3113, -2.7526, -1.0201, -2.6701,  0.1398,  2.1077, -3.2718,
         0.8685, -1.0359, -0.2255,  0.4550], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6252, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-12.3646,  -5.0127,  -3.5850,  -4.8071,  -0.4701,   0.9295,   2.2721,
         -4.8873,  -0.2281,  -1.5343,  -5.6836,   1.0232,   3.6083,  -2.9133,
         -2.7723, -10.8773,  -6.0666,  -3.8181,  -4.9334,  -1.5256],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1823, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9126, -1.1692, -0.6488, -6.6438,  1.9624,  2.8746, -3.5172,  1.1081,
        -1.9199, -0.2230, -6.3407, -0.7646,  1.4047, -2.6809, -0.6685, -0.6968,
        -2.6707, -5.0227,  3.8266, -2.2969], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3500, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.7986,  2.9696, -2.4084,  0.4022, -1.3855, -2.0103,  0.9362,  3.0878,
        -2.4818,  0.7507, -2.3220, -2.2526, -5.0008,  2.4584,  0.8464, -2.0843,
         1.2988, -2.0860, -1.3084, -1.5130], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.5652, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.1477, -3.5749, -3.0711, -2.6988, -3.9020, -6.7711, -6.7441, -2.4298,
        -0.7750,  0.9004,  4.4218, -3.5124,  0.5878, -2.3050, -1.2523, -7.4259,
         0.3415, -0.2709, -2.3819, -0.8357], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0776, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.9373,  -8.4289,  -7.9821,  -8.0409,  -6.7714,  -6.8257,  -7.6432,
         -7.2078,  -6.3799,  -8.3336,  -7.2669,  -5.8314, -11.2566,  -6.9321,
         -5.9278,  -7.1460,  -7.6401,  -7.2685,  -6.4285,  -9.1023],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.5175, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.9301, -1.6114, -1.3111,  0.8088,  3.5343, -2.5903, -1.9631, -3.6196,
        -1.4159, -3.1082, -1.0691,  1.0714, -2.8983,  1.4389, -1.5378, -1.7356,
         0.7501,  2.4695, -1.6395, -0.3800], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6938, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6941, -1.6832,  0.2120,  0.3888,  3.6217, -1.6807, -0.7856, -3.3014,
        -3.5448, -7.1965, -3.2761, -0.2812, -1.8670, -0.3083,  0.0930, -1.3825,
        -2.0783,  4.8773, -2.6240, -2.4655], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2988, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.0067, -5.7231, -3.1792, -6.0926, -0.5055, -8.2888, -2.5202, -4.2257,
        -0.7497, -2.7490, -3.6919, -3.0026, -0.5934,  2.2111, -4.4101, -1.6036,
        -2.7658, -3.1174, -4.1460, -0.1878], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2174, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5222,  -4.9794,   1.7598,   2.5216,  -1.4930,  -0.6338,  -2.9469,
          0.0886,  -5.3073,   5.9467,  -1.5454,  -3.6080, -15.3190, -20.2444,
         -3.3576,  -7.0923,  -4.5317,  -5.0835,   0.4305,   4.0405],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1938, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.5869,  -7.8990,  -0.4077,  -1.1325,  -0.3165,  -0.7849,   5.0908,
         -3.1857,  -8.8653, -10.2465,  -5.1278,  -2.7608,  -5.0960,  -3.9371,
         -0.5087,   2.0429,  -8.9132,  -7.1478,  -2.4827,  -0.9214],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0506, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3286, -5.5535, -3.3501, -3.2393, -4.1856, -5.8530, -2.4100, -4.1692,
        -4.0359, -4.7699, -3.7759, -0.2818, -3.2611, -1.5224, -4.4689, -1.8839,
        -4.6376,  2.7391, -3.5164, -3.2139], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2859, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2742, -1.9533, -2.8713,  0.6766,  3.5442, -2.4188, -0.0405, -2.0933,
        -0.5501, -0.2894,  3.6472, -2.6042,  0.0899, -2.9822, -1.3913, -3.4812,
        -0.7862,  0.9439, -2.7543,  0.4320], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8078, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5122, -0.4244, -6.8349,  5.5917, -1.6742, -0.1274, -2.6340, -2.4861,
        -7.9527,  1.1778, -0.7640, -2.7748, -0.5360, -0.6398,  0.5806,  0.1506,
         4.0396, -4.4098, -0.6519, -1.9321], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1907, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.5516, -2.8455, -5.1549, -4.9041, -2.9409, -0.8027,  2.5791, -2.6633,
        -2.1556, -2.5167,  0.0207, -7.2342,  2.1576,  2.3057, -4.0423, -1.2985,
        -3.5581, -2.4673, -3.9403,  1.7817], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8064, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.4468, -9.0043, -3.7264, -8.5740, -2.5933, -1.0431,  2.7797, -7.0262,
        -0.8308, -6.1232, -2.8231, -5.7466, -4.6775, -1.7809, -3.2967, -1.6867,
         0.3878, -5.2563,  1.5469,  2.6503], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8189, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.7955,  0.3894, -3.4370, -0.0503, -1.4700, -2.1834, -7.3257, -6.9586,
         0.5134, -2.6829, -0.0822, -2.1816, -2.0232, -4.8136,  3.1107,  0.6519,
        -3.0978, -0.6064, -4.5083, -5.0625], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9511, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.7473, -2.7353, -1.3607, -1.8722, -0.4261,  1.4612,  1.8325, -2.4065,
        -0.7539, -1.5092, -0.2366, -4.8363,  1.3490, -6.6519, -1.3871, -2.3566,
        -4.8181, -0.3811,  2.0831, -5.9380], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4598, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.7667,  -1.0241,  -6.5783,  -5.1199,  -3.5039,  -3.2878,  -1.1579,
          0.8098,   2.6107,  -3.0422,  -1.1957,  -1.6242,  -4.7382,  -0.4793,
          2.4305,  -1.8781,  -1.3035, -14.0036,  -5.0471,  -3.5565],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7728, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.2591,   2.6804,  -5.7084,  -1.8211, -19.3876,  -4.9755,  -7.9501,
         -0.6700,  -3.1521,  -0.4153, -15.2701, -14.7469,  -3.9104,  -4.7011,
         -1.5187,  -8.2486,   2.3167,  -2.2959,  -0.0960,  -3.0883],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7109, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1896, -0.3829, -1.8176, -4.8783, -4.2552,  2.6676, -1.6366,  0.1613,
        -1.5071, -3.3263,  0.3844, -1.2292, -3.2237, -0.4472, -1.6842,  0.0568,
        -2.4558,  3.1956, -1.7919, -0.8269], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4593, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.2552,  3.2149, -2.5810,  0.5322, -0.8520,  0.0094,  2.0571,  3.4644,
        -4.3325, -4.4428, -3.0056, -1.2988, -5.2975,  1.0372,  0.1866, -1.6631,
        -1.3032, -0.8900, -2.6745,  0.7047], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8440, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.1872,   0.9713,  -2.8661,  -0.8447,  -1.8260,  -2.1928,   0.8482,
          3.8542,  -1.9927,  -1.5880,  -3.0528,  -3.8004,   1.3850,   3.2783,
         -6.2624,  -2.0323,  -2.7591, -17.2743,  -3.5044, -21.0908],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9781, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3227, -2.8869, -0.4208,  4.6658, -4.8457, -2.1592, -2.4392,  0.8108,
        -1.5972,  4.0292, -7.5827, -0.4544, -2.6208, -0.7722, -2.5518,  2.0268,
        -4.3715, -4.1978,  0.9327, -3.5887], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4673, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5459, -4.1801, -1.4298,  2.5355, -3.2477, -1.6618, -2.7063, -1.8673,
         1.7364,  3.3757, -4.6054,  0.0432, -3.7745, -1.7371,  0.9693,  2.1104,
        -3.3072, -0.7524, -2.3846, -1.2684], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3849, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6805, -0.0786, -1.7702, -0.7556, -7.4621,  3.3048, -2.7515, -0.0630,
        -1.8827, -1.0239,  0.1343,  2.6163, -3.1584,  1.6486, -5.2749, -3.8760,
        -6.2677,  0.6967, -2.2906, -5.7560], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0346, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1033,  2.2893, -1.6887,  0.1884, -4.4801, -1.4360, -2.9284, -1.8709,
        -3.1774, -4.0000, -2.5894, -4.3104,  0.1817, -4.9168,  1.8021,  2.5161,
        -6.1945, -0.3780, -3.4665, -1.2841], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0423, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9377,  -2.9137,  -2.8166,  -4.2037,  -2.0343,   2.5728,  -7.5677,
         -2.7582,  -6.6910, -14.0611,  -6.6243,  -4.5662,  -4.4202,  -9.6454,
         -4.3686,  -8.4264,  -6.1564,  -7.2768,  -5.2766,  -1.7237],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0948, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.4367, -3.2125,  3.2645, -1.4896, -0.2252, -1.8965, -2.7845,  1.6657,
         2.6146, -3.0396,  0.6422, -4.3873,  0.0314, -3.6799,  1.1317, -4.5024,
        -1.3643, -0.2605, -5.2167,  1.5403], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0803, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9322, -2.7364, -6.1238,  2.3934, -3.4483,  0.5433, -2.2132, -4.8582,
         0.6243, -0.6179, -4.6068, -0.1175, -4.4965, -3.9520, -5.4342, -3.1112,
         2.4538, -3.6204, -1.4988, -2.2833], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3018, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.1556,  -2.6672,  -1.2223, -11.9341,  -7.4358,  -3.8798,  -5.0153,
         -3.7059,  -2.9507,  -3.8245,   3.6996,  -4.4322,   0.7422,  -1.5646,
         -1.4732,  -0.3062,   2.0635,  -2.1815,   0.4325,  -3.7550],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3627, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.7484,  -0.8277,  -0.0740,   3.2026,  -3.5830, -16.0186,  -2.8582,
         -5.6408,  -2.0370,  -2.1043,   1.4996,  -7.4358,  -5.1322,  -3.8253,
         -2.5006,  -1.6294,  -2.3849,   1.9685,  -2.7641,  -1.0464],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7470, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1001, -5.6861,  2.7584, -1.2551, -1.2870, -3.0253, -4.8676, -0.0717,
         0.6395, -5.2643, -4.8705, -0.8020, -2.6732,  0.2488,  2.4284, -3.4778,
        -3.0543, -2.6835, -6.7041, -2.8989], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2323, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7122,  4.7895, -4.0156, -0.5926, -0.9320, -1.9357, -1.3627, -0.9910,
        -3.2375, -1.1391, -3.7191, -3.5830, -8.6552,  2.6467,  0.3393, -3.5579,
        -0.1128, -0.4762, -4.9607,  0.7451], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7231, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8044, -4.8981,  1.4602,  1.5473, -1.9764, -0.5168, -1.5601,  0.3233,
        -1.5332,  4.3728, -2.4263,  0.4653, -2.6172, -2.9667, -7.3420, -0.6252,
         0.7885, -4.4200,  0.3136, -1.5132], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1964, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6475, -1.6884, -4.6512, -5.6228, -7.6744, -1.1912, -4.7026, -4.2386,
        -2.0290, -5.6571, -4.2561,  2.9242, -3.4289, -4.2083, -0.8478, -3.4653,
        -1.0827,  0.0843, -2.9275, -2.2373], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0774, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5806,  -2.1543, -15.7815,  -3.3307,  -6.3954,  -3.6617,  -6.1842,
         -5.1032,   2.5277,  -2.5766,  -2.0000,  -3.0518,  -3.7055,  -3.0665,
          2.3459,  -3.1418,  -0.5609,  -2.9000,  -4.0329,   2.3200],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1517, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.0093,  -1.7822,  -2.8915,  -5.2506,  -4.4026,  -1.4971,  -0.6511,
         -0.9684,  -7.3421,   3.4675,  -3.5267,  -4.4639, -20.1383,  -4.6884,
         -2.8561,  -5.1190,  -1.0993,  -5.2870,   1.7842,  -1.8290],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5275, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.3567,   2.0348,  -2.2503,  -3.5079, -12.3697,  -7.0131,  -3.1595,
         -4.4228,  -0.7578,   0.4467,   2.7572,  -7.7956,  -0.1296,  -1.9313,
         -0.2968,  -1.7111,   3.8432,  -3.3279,   0.0609,  -0.9080],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.9041, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.6873, -5.3833, -1.8916,  2.7921, -5.7412, -6.7425, -1.6764, -4.2516,
        -2.4521, -7.3773,  1.3502, -2.6433, -0.5416, -2.3651, -0.5925, -3.9679,
         2.2319,  0.7479, -2.6188, -0.2117], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4511, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9671e+00,  5.2417e-01, -1.5425e+00,  1.7550e-01, -3.9109e+00,
         3.8327e+00, -2.5730e+00, -3.2636e-01, -4.0393e+00, -3.1823e+00,
        -5.0910e-01,  3.0254e+00, -3.6657e+00, -3.0617e-03, -8.1162e-01,
        -2.1036e+00, -7.4958e+00,  2.6691e+00,  4.7029e-01, -1.9199e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.2176, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1984,  0.4750, -2.6644,  0.3777, -6.8579,  2.8524,  1.6692, -3.2889,
         1.3230, -1.5162, -0.3055, -2.8956,  5.1783, -0.7775, -0.0749, -1.5809,
        -2.6810,  1.0308,  2.4082, -1.9662], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6246, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8945,  0.5939,  3.6640, -4.4455, -3.4472, -2.8093, -0.0220, -1.3469,
         1.2865, -7.3603, -5.9257, -0.5728, -2.2666, -0.8407, -4.1300,  2.0892,
         1.3188, -4.7707, -1.0242, -3.0741], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7489, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0329,  3.6446, -2.9752, -2.3437, -4.0450, -3.3860, -5.9310, -0.3861,
        -4.5199, -1.0688, -2.0051, -6.1817, -2.1758,  0.1085, -2.9079,  0.0316,
        -2.8558, -3.2748, -2.9040,  1.9916], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2609, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5732, -5.5359, -6.3074,  0.4113, -4.0088, -4.0008, -4.7968, -7.2625,
        -7.2426, -6.1391, -2.1950, -0.8679, -0.1075, -2.9642, -2.3654, -4.0030,
        -4.6712, -1.2695,  1.1890, -4.6034], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6157, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1887,  -1.2064, -11.1972,  -3.6921,  -3.5041,  -2.0010,  -0.6671,
          4.2912,  -5.8133,  -1.2307,  -2.8139,  -0.6155,  -3.5355,   1.8269,
          3.4205,  -3.4476,  -0.1696,  -3.0577,  -0.9198,  -3.8850],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.0203, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8587, -3.2774,  2.5962, -6.7089, -2.0873, -2.1448, -1.3622,  2.1813,
         1.9761, -4.5825, -0.9487, -3.8677, -1.1258, -1.9498,  4.7233, -2.4408,
         0.9958, -2.9401, -4.2990,  0.0270], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5047, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2036,  3.5749, -0.8871, -1.9140, -1.2157, -2.2266,  2.4375,  3.2510,
        -1.9903,  1.1671, -2.3433, -1.7082, -9.1189, -1.6143, -0.2712, -2.3164,
        -1.0003, -1.1372, -1.8154,  2.6323], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9850, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3782,  -3.5191,  -2.7102,  -0.8253,   1.4059,  -4.3168,   0.0310,
         -1.5271,  -2.7075,   2.1348,   2.0169,  -4.0746,   0.2172,  -2.1667,
         -0.8719, -11.5986,  -0.5795,  -0.1928,  -1.9967,   0.3556],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.7152, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4858, -0.7017, -1.2979,  0.0714, -4.7203,  1.3993,  0.9260, -2.5469,
        -2.7232, -1.3227, -0.7575,  1.0808,  3.0071, -3.7245, -4.0682, -7.6045,
        -7.4039, -3.1991, -5.3392, -1.2223], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2817, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.3823,  3.9383, -3.6976,  0.1633, -2.2942, -0.3912, -4.3388,  1.3706,
         0.6468, -2.7176, -2.0905, -1.8145, -0.5877,  0.9327, -1.7154, -2.4076,
        -2.0163, -3.2822, -3.8124, -1.1954], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1464, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2813,   5.1243, -23.9282,  -1.5720,  -2.9688, -12.6055,  -9.6331,
        -27.1022, -14.2115, -23.1571, -12.0266,  -4.4530, -12.7707,  -3.8489,
         -7.3632,  -7.6621,  -1.2782,  -8.4174,  -5.3186,  -4.5714],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-9.0523, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.2919, -4.9827,  0.2428, -2.1847, -1.8138,  0.5390,  3.6082, -1.6795,
         0.2042, -0.5727, -1.5381,  1.8628,  2.9175, -3.2449, -1.2783, -3.8637,
        -1.7710, -3.3852,  2.4464,  2.9402], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.4631, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  3.6640,  -1.9505,   0.5546,  -3.1954,  -0.8636,  -5.5746,  -6.1387,
          2.6838,  -3.6666,   0.1435,  -2.1440,  -1.7441, -11.3182,   4.2049,
         -1.9713,  -1.1179,  -1.8040,  -1.0147,  -7.9332,  -0.7131],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.9950, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8817, -2.1465, -4.5454, -1.5376, -1.4395,  1.1747, -4.8211, -0.3182,
        -1.8729,  0.9379, -6.7907,  2.3961,  0.8566, -3.8342, -1.0974, -3.2072,
        -2.1182, -3.3060,  2.7098,  2.9223], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5960, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 4.3456, -2.4411, -0.5001, -1.8953, -1.3968, -3.8376, -0.3178,  2.0822,
        -2.6605, -8.6609, -2.2901, -1.4993,  0.8150, -1.5382, -5.4604, -0.3199,
        -3.9914, -1.7163, -1.3934,  1.5726], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5552, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.4078,  -2.4161, -11.9665,  -5.1900,  -1.7847,  -3.9132,  -1.9892,
         -1.2734,  -1.5012,  -0.6772,   3.7742,  -2.0162,   0.3524,  -2.3401,
         -2.4944,   1.1781,   3.6595,  -2.5587,   0.6463,  -1.4947],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.6706, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.8221, -2.8242,  0.2555, -1.5122, -0.1812, -1.5454,  4.8090, -3.5421,
         0.2669, -2.6528, -0.1197, -9.5482,  2.7942,  2.2881, -3.9645, -0.8200,
        -1.7986, -1.5815, -4.5431, -0.4060], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0902, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1648,  2.2369, -2.7072, -0.1445, -4.3105, -0.2338, -4.5420,  1.2532,
         2.7283, -2.2065, -0.8923, -2.5656, -3.6968,  1.6924,  2.8252, -3.3510,
        -1.2685, -3.6000, -3.3887,  2.2776], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1529, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7782,  -5.1084,  -0.5542,   0.0246,   0.7514,  -3.8517,   0.4757,
         -2.0830,  -2.5976,   0.8937,   2.1035,  -2.9165,  -2.5117, -10.0489,
         -6.7512,  -5.5969,  -5.5087,  -3.7397,  -2.0327,  -1.7266],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6779, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.7702,   2.5757,  -3.2064,  -1.0100,  -1.0034,  -0.6056,  -0.0485,
         -4.3736,  -4.0800,  -1.3960, -12.0106,  -3.6093,  -7.2730,  -2.9708,
         -0.6973,   3.1437,  -3.0487,  -9.2106, -17.5753,  -7.3441],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5987, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8454,  2.4280,  2.3247, -5.7662, -1.9208, -1.8161, -1.1711, -5.3367,
         1.5365, -6.2074, -3.8763, -0.8743, -3.4722,  2.8975,  3.6211, -2.3510,
         0.9603, -2.5789, -3.6908,  1.7922], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3173, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1542e+00, -2.6778e-01, -2.0000e+00, -2.6982e-03, -8.2980e+00,
        -9.1668e-01, -2.3126e+00, -5.0372e+00, -8.2031e-02,  2.1539e+00,
        -5.9395e+00, -3.0752e+00, -1.2722e+01, -6.4955e+00, -5.7499e+00,
        -6.0146e+00, -4.2777e+00, -5.1353e-01, -2.2590e+00,  3.7226e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2621, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.7492, -2.5301, -0.1895, -0.4846, -2.9534,  1.0529,  4.3636, -2.4834,
        -0.7696, -1.9960, -1.2686, -3.4874,  3.8620, -4.9313,  0.4614, -3.5418,
        -2.2496, -6.7543, -0.9371,  0.6539], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0717, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-13.3974, -36.5347,  -3.2765,  -5.3136,  -6.0018,  -5.7363,  -6.1908,
          1.4685,   3.4831,  -1.5943,  -0.2828,  -3.1076,  -0.0396,  -3.8459,
         -1.9277,   2.5568,  -2.5048,  -2.7671,  -0.8676,  -1.0271],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3454, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4176e+00,  4.1229e+00, -3.0984e+00,  6.9628e-01, -2.1710e+00,
         2.2656e-03, -5.3608e+00,  2.6007e+00,  1.4983e+00, -2.2135e+00,
         1.2119e+00, -1.5879e+00,  2.5595e-02, -2.4727e+00,  2.7071e+00,
        -5.6623e+00,  3.4220e-01, -1.5848e+00, -8.4692e-02, -3.8971e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.0172, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.1954, -3.0833, -1.5155,  0.6703,  2.0304, -2.5862,  1.6848, -3.6923,
        -0.9426, -3.3851,  3.3317, -2.4941,  1.2081, -3.6815, -0.0370, -3.9577,
         2.1261,  3.1748, -2.4657, -1.2845], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6852, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8788,  0.0853, -3.7048, -4.6026, -0.1225,  3.7755, -2.7551, -2.3452,
        -4.9376, -3.8072, -6.3176, -1.8446, -2.4953, -2.9963, -2.2706, -4.8098,
        -1.5320,  0.3638,  3.8679, -3.0670], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2197, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8713,  0.0236, -4.4553,  2.6905,  1.4133, -3.7234, -1.3744, -2.5807,
        -0.9273, -6.3392,  3.4859, -0.9682, -0.7352, -9.1827, -6.9738, -4.6141,
        -3.0070, -1.8884, -1.5131,  4.0928], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9724, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3136, -6.0018, -5.7363, -6.1908,  1.4685,  3.4831, -1.5943, -0.2828,
        -3.1076, -0.0396, -3.8459, -1.9277,  2.5568, -2.5048, -2.7671, -0.8676,
        -1.0271,  0.8621,  4.0748, -3.0252], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5893, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9141, -4.0061, -3.5892, -3.4432, -4.8048,  0.0420,  3.1089, -2.2815,
        -0.1811, -0.3812, -2.6378,  2.5715,  3.8126, -1.8991,  1.3000, -3.0734,
        -0.2188, -0.0663,  5.0190, -2.7763], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8209, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7211, -21.3369,  -4.4950,  -4.6045,  -8.1288,  -5.5759,  -4.7294,
         -0.2130,   0.2050,  -2.3526,  -8.3921,  -2.8452,  -2.9912,  -3.9145,
          2.4214,   2.7695,  -2.7723,   1.3020,  -1.3613,  -1.3306],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5533, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.0827, -13.4963,  -1.3480,  -0.8678,  -4.6798,  -3.8675,  -4.8962,
         -0.4612,  -4.1547,   0.7204,   3.7761,  -2.9413,  -1.0520,  -3.5382,
         -0.8233,  -2.9219,   0.2761,   1.8479,  -5.2465,  -0.1999],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1896, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 4.2481, -3.0672, -0.3624, -3.2602, -5.1435,  0.5007,  3.1108, -2.4804,
         0.2478, -0.4837, -1.1788, -5.5144,  2.8162, -0.2083, -2.4051,  0.2088,
        -3.1112, -0.7297, -1.9657,  4.4820], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7148, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0538,  -1.1734,  -3.3488, -14.1178,  -3.4356,  -1.0446, -10.4639,
         -5.2059,  -2.7860,  -6.8680,  -7.1976,  -1.6382,  -4.3285,  -1.3801,
         -9.6314,  -8.5518,  -4.1394,  -3.0769,  -1.9332,   1.3838],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6496, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.8671,  3.1908, -3.2388, -2.0403, -4.7142, -0.9728, -2.6309,  1.2309,
        -4.3465, -5.4796,  0.0485, -1.4399, -1.8319,  1.4152,  2.8853, -3.0098,
         0.0325, -5.2993, -1.5061, -2.2854], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4063, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-14.6414,  -3.2054,   0.1498,  -7.1873,  -4.0191, -27.0624,  -9.4073,
        -10.3657,  -5.0942,  -0.1673,  -6.4744,   3.5456,  -4.0396,  -1.5783,
         -2.9946,  -0.6856,  -3.6234,   0.3613,   3.2354,  -2.7906],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8022, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.5032,  -3.2976,  -0.4371,  -3.7177,  -3.4327,   0.3700,   1.4530,
         -4.4106,  -3.6705,  -5.5713,  -2.6992,  -6.2628,  -3.1722,  -5.0606,
         -6.6421,  -5.3367, -44.8434,  -3.5975,  -5.5419, -15.4466],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9407, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.8570,   2.2340,  -6.8450,  -2.8052, -12.9525,  -3.8116,  -7.4472,
         -1.5812,  -0.3121,   3.1549, -10.0683,  -1.7546,  -2.8907,  -2.1154,
         -4.7848,   0.8828,  -5.8219, -10.6365,   0.7081,  -2.7596],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5732, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.6049, -3.1345, -1.3621, -3.6376, -3.7482, -3.2411,  0.1297,  1.9206,
        -1.9326, -1.5504, -2.2253, -3.1401, -4.5005,  1.3436, -1.0228, -3.4952,
        -1.4515, -3.3209, -0.9200,  1.1772], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5253, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9555,  1.3986, -2.4335, -3.0249, -1.7305,  3.1886, -3.4944, -1.5032,
        -2.3154, -1.7631, -2.3491,  0.2403,  0.9874, -1.3408, -1.7323, -2.5836,
         0.5507, -5.8389,  2.3038,  0.2246], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2086, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8097, -0.4208,  3.9205, -2.1056, -0.6669, -1.0493, -1.8195,  1.0592,
         3.5589, -2.3847, -0.3729, -3.9042, -1.0474, -2.1696,  2.4455,  2.3375,
        -5.7240, -0.9681, -2.8085, -3.4453], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8187, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.5715,  -0.6509,  -3.9503,   0.2377,   1.1727,  -4.7141,  -1.0069,
         -2.1787,  -1.6646,  -1.6117,   4.5984,  -2.2721,  -4.0099, -12.7457,
         -2.2566,  -4.4386,  -1.1047,   0.7158,  -1.7809,  -3.6746],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2454, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.8442, -2.5868, -7.5347, -0.2038, -2.4194, -4.2112, -2.3470, -2.0174,
        -2.3605,  1.7632,  3.9516, -3.0043,  0.0907, -3.5268, -3.4212, -6.2574,
        -6.6189, -7.3985, -5.5645, -1.7154], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2113, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.2596,  -3.2647,   0.3154, -19.8964,   1.0296,   1.1089,  -6.3321,
         -1.4747,  -4.0361,  -3.0400,  -4.4087,   2.9967,  -1.1059,  -1.9500,
        -11.7916,  -1.0514,  -6.0744,  -4.0470,   2.9043,  -2.1202],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0989, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.0166e+00,  2.3402e+00, -3.0429e+00, -1.1381e+00, -3.2727e+00,
         1.9453e-03, -2.0967e+01,  3.0562e-02, -2.3868e+00, -8.9750e+00,
        -1.3340e+00, -3.0200e+00, -3.3504e+00, -8.0916e-01,  2.2975e+00,
        -3.1531e+00, -1.5363e+00, -3.3494e+00, -1.4518e+00,  6.3049e-01],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5735, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0195, -6.2885,  2.4728, -3.6137, -4.6583, -3.7017, -3.1238, -4.5177,
        -3.1185, -7.7866, -7.6225, -2.7953, -2.5589, -1.2225,  0.5017,  1.3733,
        -3.4132, -2.1063, -7.4550, -6.6094], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5632, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.4768,  -7.2127,   1.3733,   1.1400,  -3.5391,   0.4902,  -2.9990,
         -0.4724,   0.9915,   3.9406,  -2.8389,  -0.5887,  -2.7278, -15.0399,
         -3.1825,  -8.0177,  -0.1813,  -1.4613,  -0.7193,  -3.9152],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2718, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.8906,  -6.6805,  -3.6097,  -0.3054,  -0.0527,  -4.6585,  -5.0524,
         -3.1976,  -1.4168,  -1.3949,  -0.5645,   0.4643,  -3.8597,   1.1050,
         -9.4335,  -6.2198,  -3.5132,  -4.4062,  -1.3283,  -2.2722],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4144, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  3.2566,  -3.9403,  -2.2764,  -3.7482, -27.1506,  -4.6206, -28.1158,
         -4.6508,  -5.6412,   0.2064,  -4.3413,   1.1997,  -7.6788,  -1.3937,
         -3.4631,  -0.8409,  -3.3617,   2.2078,   2.7371,  -3.6192],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7617, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0366,  0.6547, -5.5572, -1.8656,  2.0466, -3.8890,  1.2822, -1.7633,
         0.0252, -1.2983,  4.5098, -3.3264, -3.2594, -2.1990,  0.0442, -5.4293,
         2.6282,  1.7194, -2.7374, -0.8655], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0658, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.6787,  2.7400, -6.4265,  0.1513, -1.0601, -2.5805,  1.9728,  2.4114,
        -3.3834, -0.5364, -0.3703, -1.7057,  2.1346,  2.6635, -1.6512, -2.0877,
        -0.9104,  0.3952, -2.9470,  5.1716], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.2670, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1568e+01, -3.7327e+00, -1.1961e+00, -2.2368e+00, -5.9924e+00,
        -3.6202e+00,  1.0112e+00, -2.7332e+00, -1.9162e-03, -2.9100e+00,
        -2.2798e+00, -6.6422e-01,  5.0918e+00, -2.5792e+00,  8.7829e-01,
        -2.5603e+00, -1.7967e+00, -3.2426e-01,  2.8500e+00, -6.5180e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5441, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  4.7628,  -3.0150,  -2.1913,  -2.9841,  -2.8828,  -4.6127,  -4.1166,
          3.7475,  -5.9886,  -2.5647, -23.4914,  -5.6084,  -2.9570,  -2.7399,
         -0.8014,  -0.7991,  -1.0299,  -2.1674,   0.2614,  -1.0677],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0123, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3411, -1.6945, -2.5073, -3.3620, -3.9509,  2.9272, -1.7169, -1.3280,
        -2.4789, -2.2804,  1.2714,  3.1823, -3.9367,  0.2703, -4.8174, -0.6858,
         0.5700,  4.3219, -4.1840, -1.9353], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3338, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.4170,  -0.3891, -23.2832,  -0.6606, -34.5985,  -5.8566,  -1.3606,
        -32.5221, -10.7211,  -5.8959, -10.3676,  -6.2511,  -1.0304,  -2.4121,
          3.1124,  -4.8713,  -0.7556,  -1.4518,  -1.1016,  -6.2129],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.4023, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4637,  1.6266, -1.9278,  0.6373, -2.1810,  3.3412, -1.0293, -0.4137,
        -3.1501, -0.3730, -4.0077, -0.4775, -4.7066, -2.9754, -1.8048, -2.9451,
        -0.0576, -3.6125,  0.3563,  2.7813], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3192, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0914,  3.7800, -2.0016, -1.6235, -1.4467, -1.5304,  0.5857,  4.4222,
        -1.8360, -0.1029, -0.3670, -2.6412,  2.0713,  3.3122, -3.4113,  0.1072,
        -2.1243, -1.7282, -1.5676,  3.6747], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.1759, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3841, -0.4541, -0.5413, -4.1654,  1.3300,  2.6381, -3.3585,  0.1556,
        -2.1516, -2.6534, -2.9836,  2.7300, -4.4475, -0.3340, -0.7707, -5.8811,
         1.6342, -5.0855, -4.3251, -3.2814], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6665, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.7455,  -5.6582, -16.6321,  -5.5565,  -1.8559,  -3.5089,  -0.9827,
         -3.4868,  -3.8221,  -3.6522,   0.6187,  -4.0048,  -1.4968,  -3.8617,
         -4.4255,  -3.6366,  -0.4927,  -3.5093,  -1.4477,  -5.7273],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8942, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9880, -0.7293,  3.1750, -1.1958,  0.3263, -2.2423, -2.6115,  0.3097,
         1.9623, -2.0721,  0.0564, -5.6759, -6.9640, -3.6997, -3.9390, -0.4141,
        -1.4149,  2.4157, -6.6770, -0.9795], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5679, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.2472,  -1.1554,  -0.5898,   1.3079,  -7.5806,  -0.9114,  -2.5791,
         -6.7429,  -2.6082,   0.8676,  -3.0941,  -1.4209,  -3.2179, -17.5127,
         -6.5798,  -2.8462,  -4.0897,  -5.3177,  -0.4513,  -2.9332],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7851, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8245, -2.7317,  1.3636,  2.7786, -5.3525, -0.2981, -1.5687, -2.4085,
         1.1886,  2.8375, -2.4087,  1.2016, -2.2766, -0.3049, -4.5836, -9.2080,
         0.6848, -4.2101, -0.5894, -3.5101], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5610, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.2874,  -0.0926,   2.6225,  -5.8816,  -1.6667,  -2.1101,  -1.8418,
         -3.2982,   0.3691,   0.6783, -13.9646,  -1.5041,  -3.9741,  -5.4499,
          0.2570,   1.1705,  -2.9207,  -2.2969,  -4.8750,  -3.7058],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6886, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0980, -1.4740, -0.9881, -2.3471,  2.4934,  2.4412, -2.7345,  0.8705,
        -3.4920, -3.0080,  0.9393,  3.0247, -3.1309, -1.6058, -1.1204, -3.7776,
        -0.6090,  1.7649, -4.3170, -1.0483], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1108, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9389, -6.8982,  1.3356,  1.9804, -2.9075,  1.1310, -2.8844, -1.3099,
        -2.3284,  4.4798, -2.9269, -4.0782, -3.0966, -5.5025, -5.0129,  0.1195,
         1.0304, -2.7211,  0.8888, -1.8868], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6263, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 4.1703, -7.3343, -2.5502, -0.9432, -1.7960, -5.2135,  2.2291, -5.9130,
        -3.3270, -4.3485, -0.5987, -4.8732, -0.6682,  2.2586, -2.5529,  0.4953,
        -1.1332, -3.1286,  0.9237,  3.4645], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5419, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.6126,  -5.5367, -15.5198,  -4.7568,  -3.9067,  -5.4609,  -2.2863,
         -5.6874,   1.8800,   3.0915,  -3.1436,   0.1503,  -1.6043,  -2.2964,
          1.6046,   4.7237,  -3.3686,   0.8014,  -3.9706,  -1.6357],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5768, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.0317, -3.2779, -0.7563, -4.5446,  0.5066, -4.4680,  2.5627,  1.0563,
        -2.3893, -0.0632, -2.8895,  0.6999, -1.4799,  3.8007, -3.2530, -0.0427,
        -1.1939,  0.0860, -8.0486,  6.2611], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7201, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.5073,  -4.6628,  -5.3728,  -0.3928,  -1.0870, -18.9140,  -0.2240,
          0.6941,  -2.8089,  -0.5063,  -1.4472,  -1.0350,  -4.4860,   3.1793,
          1.6294,  -1.8769,  -1.3958,  -3.1943,  -1.3376,  -5.9855],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3359, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8585, -3.8455, -3.4041, -3.0392, -2.2897, -0.9605, -2.4586, -1.7590,
        -2.8880, -1.0509, -7.6680, -1.2630,  2.8009, -1.4969, -1.6200, -2.5498,
        -1.0990, -4.0829,  0.9047,  1.1186], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0255, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.1557, -3.4110, -1.0007, -3.4199,  0.9733,  3.0447, -3.6124, -0.5280,
        -1.5599, -1.8122, -5.4916,  1.5532,  1.8135, -2.3479,  1.5271, -1.1898,
        -0.9461, -6.6636,  2.9937,  0.9815], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9626, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.3291,   2.7881,  -3.9202,  -2.7315,  -4.2968,  -2.4506,  -3.0798,
          0.9555,   1.6241,  -7.6716,  -1.0642,  -0.5785,  -4.1335,   0.2028,
          1.8981,  -7.2689,  -1.8725, -17.4899,  -7.4318,  -4.6933],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9443, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.2402, -2.5101, -0.6794,  0.5616,  2.9757, -3.4921, -0.7935, -4.2217,
        -0.3435, -2.5968,  0.7647,  0.7961, -2.6322, -1.0153, -1.3624, -2.1195,
        -3.4607, -0.1455,  2.2441, -1.1784], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9484, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.9312, -2.2982, -1.2810, -2.1077, -1.3170,  1.2059,  0.4012, -1.6903,
        -0.3480, -0.4958, -1.3667, -3.0969,  3.2657, -1.4568,  0.1659, -3.3172,
        -0.3803, -4.1438,  2.7879,  0.8384], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6852, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.4448,  3.6813, -1.3684,  0.5635, -2.5764, -2.6181,  0.3392,  2.2648,
        -4.2804, -1.7282, -2.1553, -0.9155, -4.5048,  1.4535, -0.5183, -0.0443,
        -0.7344, -3.1696,  0.2363,  2.7870], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.5922, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5869, -3.4119, -0.1265, -0.7745, -1.0995,  5.3379, -1.6459, -1.1427,
        -2.7173,  0.7621, -4.8912,  2.9683, -0.7068, -4.0381, -1.0599, -0.9387,
        -0.3419,  0.8186,  4.4027, -4.5459], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7869, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.1871,  2.9145, -3.0849,  0.7367, -0.7899, -4.6893,  2.5242,  2.8529,
        -2.6120,  1.6860, -2.0059, -1.5013,  1.2092,  3.9871, -1.6827, -0.2297,
        -2.2737, -0.2680, -5.6914,  5.0983], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.1316, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.6197,  -2.8736,  -4.4865,  -0.5333,   0.5144,  -0.1491,  -3.6160,
         -0.3263,  -1.4608,  -3.7061,   1.7454,  -0.0842,  -5.4208,  -4.2375,
        -17.6054,  -4.4646,  -6.5895,  -3.8788,  -4.4362,  -7.8174],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7523, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.0873,  -1.9641,  -1.4675,   1.5777,   3.9289,  -2.2461,  -6.2916,
         -4.1748,  -3.0977,   0.6064,   2.4850,  -7.0985,   0.4031,  -1.1393,
         -5.0256,  -0.1728,   3.0288,  -2.0398,   1.3321, -25.5663],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3418, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.2195,   2.3898,  -6.5997,  -1.9819, -12.1046,  -6.8731,  -9.3871,
         -3.9706,  -4.0924, -11.3372, -15.2627,  -8.9457, -11.5847,  -7.1935,
         -4.8201,  -0.5983,  -3.7791,  -0.8710,  -6.7562,  -6.1688],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8859, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.4500,   2.0839,  -1.7064,  -7.5886, -12.3948,  -4.8740,  -8.7967,
          0.1357,  -2.9049,   2.3947,  -5.2615,  -5.1650,  -2.7271,  -0.8765,
         -5.0687,   2.2176,  -2.5901,  -2.6458,  -1.7326,  -6.6584],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1355, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3644, -1.4810, -2.5125,  0.8910,  2.5894, -1.6772,  0.9580, -1.2054,
        -4.2646,  2.2888,  0.9202, -3.2789,  0.8457, -3.7345, -1.7578, -4.2353,
         0.4564,  2.4078, -2.6885, -0.1927], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9518, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.5416,  3.9888, -4.7005,  0.3022, -1.8110, -0.9555, -5.1896,  1.4838,
        -0.3722, -5.4318, -0.2912, -1.6403, -2.7357, -0.0840,  3.5197, -7.6472,
        -1.9732, -3.4684, -1.6348, -2.6806], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5390, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.6906, -12.2439,  -5.7983,  -9.7056,  -5.1077,  -4.7662,  -0.4038,
         -0.9270,   1.3933,  -3.4807,   0.5927,  -1.2826,  -0.2075,  -6.9336,
          1.9422,  -2.4299,  -2.8623,   0.1262,  -3.9874,   1.2589],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8757, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1548, -2.7363, -4.2346,  1.2458,  1.4257, -2.4539,  1.0008, -2.1563,
        -0.3934, -2.2196,  3.2337, -2.1427, -0.5137, -6.2015, -5.3223,  1.3026,
         2.3165, -5.7214, -4.7816, -1.4297], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5468, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.5033,  -4.7159, -14.8963,   1.6677,  -2.0761,  -0.2007,  -2.2673,
         -2.4192,  -4.7387,   0.6730,   2.6562,  -2.6075,   0.6434,  -1.5297,
         -3.4066,   1.9763,   3.3049,  -3.7734,  -0.2440,  -4.0550],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.8756, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.7861,  -0.7917,  -2.2704,  -4.5029,   0.5237,  -1.6300,  -4.0358,
         -3.2725, -13.3080,  -6.6498, -11.0019,  -6.8286,  -4.6297,  -2.4462,
         -1.9663,   4.0091, -16.7188,  -1.2105,  -1.7959,  -8.3024],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7807, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.2008, -3.8130, -1.5061, -8.4614, -3.7987, -7.0171, -0.2143,  0.1146,
         3.5296, -4.2568, -1.6794, -1.6301, -2.6200, -6.2463, -5.5022, -5.3527,
        -2.7260, -3.0359, -1.2569, -2.6049], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7438, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.7427,   3.0190,  -3.6166,  -2.5429, -17.3507,  -4.3254,  -6.0932,
         -8.8007,  -6.5473,  -0.3646,  -2.5169,   0.3407,  -4.3705,  -3.6384,
         -2.7733,  -3.5304,   1.0124,   1.1607,  -2.9607,   0.3329],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1412, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.5534,  -0.1718,  -0.6087,  -3.2455,   1.5213,   2.4739,  -4.8338,
         -4.2483,  -0.5612,  -0.9273,   0.1013,   4.8565,  -1.5446,  -3.9366,
        -18.0417,  -2.5975,  -7.0414,  -1.0714,  -0.1872,   3.6178],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1000, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.6363, -3.2182, -3.5082, -6.6217, -8.9053, -6.1018, -3.1413, -3.2847,
        -0.2047, -1.6925,  3.8442, -3.5480, -2.0153, -2.9693, -0.7108, -5.3635,
         2.9489, -3.8945, -1.4095, -4.8771], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1655, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4693,   0.0589,  -2.7743,  -0.7282,  -5.0132,   2.2835,   2.7775,
         -2.4133,  -0.9319,  -1.2075,  -2.1235,   0.8904,   1.4196,  -3.1730,
         -0.4432,  -3.7075,  -4.5983,  -1.0674,   3.3433, -10.9032],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.5390, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2470, -1.8521, -0.4100,  3.8953, -1.7349, -1.0789, -2.2792, -0.2323,
        -1.6905,  1.5572, -3.4021, -2.9001, -9.8683, -8.9339, -3.5674, -5.8557,
        -0.3339, -1.8755,  2.1145, -5.1929], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3444, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.9884,  -8.1090,   2.3835,   1.4608,  -3.2893,  -0.7055,  -2.4838,
         -1.7805,  -4.1018,  -1.4159,   2.9080,  -3.4180,  -1.3288, -10.6707,
         -6.4974,  -3.2445,  -5.2177,  -0.2844,   0.0118,   1.2549],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3258, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.6213,   2.9754,  -7.5194,  -0.2869,  -2.3610,  -0.1185, -10.2590,
          2.8654,   1.6044,  -3.1795,   0.6286,  -1.7360,  -0.1189,  -6.6205,
         -5.8755,   2.1527,  -5.6947,  -1.4657,  -3.4200,  -5.3730],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3712, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8094e+00,  1.9472e+00, -9.4932e-01, -1.7223e+00, -4.6638e-03,
        -3.1165e+00, -2.1057e+00, -5.9297e+00,  4.8319e-02,  2.2133e+00,
        -5.3009e+00, -1.2300e+00, -3.1257e+00, -2.7294e+00, -3.5315e+00,
         2.6383e+00, -3.6956e-01, -3.0900e+00,  1.8097e+00, -2.3733e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.5866, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.6461,   0.5581,  -2.5995,  -0.7346,   2.0700,   3.9022,  -2.1921,
         -2.0862, -13.2416,  -3.8392,  -5.0843,  -0.1575,  -0.6175,  -0.6531,
         -7.4014,  -0.3908,  -3.0699,  -3.4393,  -3.9008,   2.3621],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1081, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 6.4796e-01, -3.1700e+00, -4.3306e+00,  4.3706e-01,  4.1034e+00,
        -2.9432e+00,  6.5761e-01, -1.7356e+00, -3.6401e+00,  2.6573e+00,
         2.4299e+00, -1.7715e+00, -1.7697e+00, -1.2233e+01, -5.0505e+00,
        -3.4355e+00, -3.8236e+00, -3.1123e-01, -1.2517e-01,  2.0471e-03],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.6702, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.6010,   2.0338,   3.4561,  -4.8694,  -1.9203, -13.2564,  -8.5774,
         -7.9434, -18.9715,  -5.2140,  -0.5559,  -2.1253,  -1.9910,  -5.6566,
         -1.0834,  -3.0229,  -4.4639,   0.5777,   2.1434,  -4.9824],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0012, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5352, -4.7882, -4.6270, -0.2858, -1.8299,  4.9028, -3.7393, -0.2120,
        -2.2736, -0.1126, -3.4941,  2.1099,  1.0442, -4.0558, -1.3449, -3.0442,
        -1.2900, -2.7376,  3.8359, -2.2774], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4877, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.2111,   0.0731,   1.4194, -12.1985,  -3.0674,  -3.5737,  -9.2634,
         -1.2141,  -1.8438, -10.6681,  -2.7851,  -3.1364,  -4.3913,  -6.0943,
         -1.6950,  -1.8863,  -4.7698,  -0.5278,  -0.7270,  -6.8438],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8702, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5509, -2.5437,  1.2196,  2.8867, -2.1338,  1.1876, -4.4481, -2.0196,
         0.8798,  2.3918, -2.5133, -0.6122, -4.8753, -1.6143, -4.2455,  2.6998,
        -2.4319, -2.7491, -2.2745, -1.7199], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2733, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9135, -0.4148, -2.6528, -1.4001,  2.1209,  4.1540, -4.7594, -0.2861,
        -1.9787, -0.3253, -4.9503,  2.3992,  1.3956, -3.0597,  0.2477, -1.2709,
        -0.8844, -5.0641,  2.4172,  2.3681], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8929, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.3792, -0.6152, -3.6462,  0.1060, -1.1481, -0.1810, -3.7942,  3.9474,
        -1.9302, -2.7532, -1.9537, -2.6368, -4.5218,  0.1509,  2.4024, -1.8288,
         0.6317, -1.7114, -3.5736,  2.0467], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9815, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.2725, -3.2652, -1.7921, -2.6244, -0.3224, -4.9333, -0.1451,  1.7845,
        -1.3597,  0.2147, -3.9345, -1.6650, -0.2951,  4.3620, -3.0194,  1.4532,
        -3.4200, -8.1037, -5.7478,  1.2708], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4635, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.6270, -2.0005, -3.7300,  0.0415,  2.8635, -5.5804,  0.2217, -2.3004,
        -4.3693, -0.5682, -3.3183, -3.3295, -0.6249, -3.9319, -2.2960, -6.5467,
         0.5105,  0.8460, -5.9719, -0.7868], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0749, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3867, -1.9137, -3.1893,  2.1958,  2.8740, -3.0473, -4.0595, -1.6365,
        -1.7933, -5.6938,  1.3586,  2.5433, -3.1833, -0.8587, -0.1237, -7.1034,
         1.8512,  3.5743, -1.5488, -1.2512], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2196, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.8149, -3.7549, -0.3239, -3.2735,  0.6945, -4.6724,  1.8854,  1.8972,
        -4.0115,  0.6197, -1.8546, -3.5226, -0.6967,  2.7878, -2.0258,  0.2324,
        -2.1556, -3.5147,  1.4383, -3.6033], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1520, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.1888, -1.6372, -1.2499, -2.2581, -6.0479, -0.3481,  0.2234, -3.6115,
        -1.4880, -9.0432, -5.2968, -3.8344, -5.4493, -2.9236, -6.5800,  1.5460,
         3.5389, -3.2935, -0.5492, -2.4109], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3762, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6743, -3.8868, -4.4153, -7.7006, -3.9459, -5.5913,  0.1666, -1.2396,
        -6.5616, -1.5488, -3.2639, -5.1582, -0.1031,  0.0866, -4.0289,  0.1954,
        -2.6969, -1.3266, -4.5269,  1.6630], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7779, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.6665,  -4.2835,  -0.5923,  -3.6466,  -3.3010,  -2.2382,   0.4272,
         -1.4239,  -2.3345,  -1.3039,  -2.0685,   1.5846,   0.6149,  -5.3636,
         -1.5802,  -0.5623,  -0.6525, -10.3387,   5.5113, -16.6854],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2785, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.0770, -4.7409, -1.7765, -4.0519, -5.5628, -5.3890,  1.2054, -5.8192,
        -0.5956, -4.9524, -6.4975, -2.1718,  1.9056, -6.1622, -3.7859, -1.3234,
        -3.5406,  0.2203,  1.1030, -3.5193], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6689, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0639,  -2.2491, -10.7312,  -3.8962,  -4.8002,  -1.5916,  -4.2105,
          4.1388,  -8.8946,  -3.5508,  -2.1262, -17.8664,  -4.3434, -29.7154,
         -5.5071,  -1.9483, -23.4330,  -3.0410, -19.0234,  -6.1246],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.6489, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3490, -4.6611, -4.5257, -0.7286,  1.7690, -1.9501, -1.5544, -0.2915,
        -2.2494,  1.8474,  3.5706, -2.1938, -3.2103, -2.0667, -0.2642, -4.2428,
         2.1286,  0.8599, -3.8875, -0.0645], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1532, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.6440,  -2.1571,  -1.8517,  -7.3064,  -6.8952,  -9.1493, -10.4767,
         -4.7232,  -0.1224,  -1.5409,   1.4941,  -3.0388,  -2.0084,  -4.1550,
         -0.9938,  -5.1617,   3.4625,  -2.5979,  -0.2370,  -3.3449],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9580, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3462,   0.1918,  -1.8639,  -2.0758,  -4.0288,   1.4652,   1.5959,
         -2.0593,   0.9648,  -0.3988,   0.2159,  -2.6214,   1.7122,  -6.8830,
         -1.7920, -10.8489,  -3.2346,  -4.8649,  -1.1591, -14.8691],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6450, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.4125,   2.6377,  -7.6930,   0.7886,  -2.1923,  -6.0260,   0.8042,
          2.4162,  -3.9743,  -1.8699, -14.3726,  -6.5428, -10.3310,  -5.6824,
         -4.4205,  -0.6788,   0.0599,   0.4227,  -4.8103,   0.6566],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1110, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.3581,   0.9562,  -3.6263,  -0.2678,  -1.9179,  -5.6414,   1.2746,
          1.4730,  -4.1916,  -2.0337, -12.5749,  -6.6201,  -6.8567, -10.0610,
         -5.0430,   0.3214,  -4.3789,   0.3220,   0.1834,  -2.2424],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0642, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3493, -2.0254,  1.6477,  0.2698, -3.6981,  0.2208, -3.3479,  0.6167,
        -5.8082,  2.5903,  2.0995, -2.2731,  0.2898, -2.5971,  0.4773, -1.8977,
         5.3949, -3.0601,  0.6386, -2.9641], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7388, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6087, -5.5981, -7.1613, -5.0803, -4.1256, -5.3096, -3.1485, -4.9441,
        -4.4886, -4.7038, -6.3098, -9.7641, -4.1614, -6.1345, -4.1482, -3.4217,
        -6.8941, -5.2882, -5.0367, -7.4285], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.3878, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  3.4986,  -5.8299,   0.6435,  -1.9398,  -4.0653,   1.2284,  -2.4656,
         -4.2835,  -3.5506,  -2.4254,  -3.6358,   2.3931,   4.0250,  -2.8627,
         -1.6899, -11.0725,  -1.8447,  -4.8277,  -1.9871,  -0.0193],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.0356, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3003,  0.2469, -1.9546, -1.8585,  0.1236,  2.9932, -6.6742, -0.9975,
        -3.5219, -5.4285,  1.1297,  3.6304, -3.8201, -1.3849, -8.7751, -6.1211,
        -3.0634, -4.7301, -0.6270,  0.1233], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2005, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1445, -0.5494, -4.4359,  2.4304, -0.0662, -3.1099, -0.0353, -1.7138,
         0.0987, -1.5701,  2.4155, -2.6454, -0.6513, -4.8762, -2.6464, -2.9787,
        -0.8915,  1.7265, -3.2809, -1.2652], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3595, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2524, -1.4725, -2.9860,  1.0412,  2.6283, -2.0361, -0.6978, -2.2509,
        -1.4860, -4.4731,  1.9489,  2.6377, -3.3802, -0.9502, -0.8644, -1.8376,
        -4.1537,  1.8668, -0.1530, -5.4610], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2166, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.8772, -2.4911,  5.3876, -2.8732, -0.5334, -1.8974, -2.4611,  0.9614,
         0.8050, -1.1969, -1.2448, -4.5795, -2.6083, -4.1005, -0.3157,  0.2132,
        -3.0580, -3.6784, -1.0302, -7.8190], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5821, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7341,  1.7246, -0.0560, -1.1185,  0.6705, -1.8303, -4.0364,  0.9633,
         2.8143, -2.1810, -2.9793, -1.6328, -3.4899, -6.7995, -1.3903,  0.1372,
        -2.0193, -0.4626,  0.0692, -0.7423], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2046, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8587, -0.8031, -1.9106,  3.8283, -7.4438,  0.3254, -2.4650, -3.9820,
         1.0689,  3.1947, -1.5911, -1.3459, -2.0314, -1.0423, -3.6012, -1.3654,
         0.7164, -1.4556, -1.6049, -0.9625], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3665, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.7567,  -5.2520,  -2.9211, -15.0237,  -7.5546,  -5.2476,  -6.3330,
         -3.2366,  -4.0079,   1.7548,  -4.8299,  -2.5779,  -3.4233,  -1.9280,
         -6.6965,   1.2801,  -3.4171,  -2.4138,  -6.8822,  -7.1188],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3793, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.3788,  -4.8248,   4.7538, -17.2262,  -3.0321,  -2.9371,  -4.6133,
         -3.1084,  -0.4035,  -2.0268,  -1.3377,  -5.0760,  -3.5235,  -5.8694,
         -1.1344,   0.6741,  -1.1740,  -0.6514,  -0.7933,  -4.3884],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8536, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.9308,  -4.5702,   0.5299, -11.5366,  -2.3048,  -7.9413,  -1.2394,
          0.6212,   1.1107,  -4.0896,   0.5819,  -1.6728,  -0.7406,  -5.8099,
          2.4604,   2.9310,  -2.2342,   1.2871,  -1.9422,   0.1028],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.6763, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-14.2177,  -3.5281,  -9.0344,  -1.0449,  -0.2621,   0.5126, -17.6670,
         -3.2775,  -4.2714, -25.2078,  -5.3340, -10.6708, -61.6707,  -2.5053,
          1.1811,  -3.6562,  -2.4046, -15.1566,  -5.0087,  -3.2716],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-9.3248, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-41.0825, -21.7215,  -2.7466,  -5.6348, -19.6952,  -1.9521,  -2.3042,
        -15.1262,  -2.4996,  -0.6266,  -7.3945,  -1.1905,  -2.1586,  -4.3497,
          2.0010,   3.0430,  -4.7946,  -2.5482, -12.5159,  -3.0711],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.3184, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5450, -12.8690,  -5.2202,  -6.0173,  -4.8573,  -1.0287,   1.3963,
         -5.0589,  -2.5742,   0.5046,  -2.0554,  -2.5592,   1.0628,   2.0057,
         -2.1530,   0.1761,  -2.7355,  -2.2109,  -3.5924,   1.3258],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.4503, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.0681,  -7.6884,  -7.4721,  -5.9295, -11.6019,  -7.5446,  -7.5877,
         -7.0830,  -7.8256,  -7.6980,  -6.7020,  -6.3917,  -9.6484,  -8.0286,
         -7.2815,  -7.1771,  -7.3854,  -8.1887,  -6.4596,  -8.3685],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.7065, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.6677,  3.2690, -3.6192, -2.2370, -1.1046, -1.3503,  1.4595,  3.4792,
        -3.3982, -0.7447, -3.0030, -0.8747, -3.5329,  2.8019,  1.2969, -2.1208,
         0.9469, -1.3015,  0.2420, -1.1887], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.4656, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.1280, -2.4927, -0.1371, -4.6074,  2.7212,  2.5432, -2.5895,  0.8533,
        -1.6743, -3.1318,  1.9270,  3.1831, -3.6137, -1.3373, -1.4563, -3.7895,
         2.5535,  2.5787, -2.0249,  0.1609], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.5231, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.4218, -10.5154,  -3.6541, -17.9932,  -1.1487,  -2.6974,   1.9483,
         -4.2335,  -2.3035,  -9.4002,  -4.8575,  -3.6538,  -6.5309,   4.3334,
        -11.6911,  -2.1180,  -2.5877,  -4.7160,   0.6078,  -6.6967],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5665, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.5198,  -0.7574,  -1.1474,  -6.9466,  -5.5445, -13.3627,  -5.4817,
         -2.9799,  -2.6401,  -2.1688,  -1.5612,  -0.9745,  -1.3971,  -2.0137,
         -4.1683,  -1.0145,  -4.7372,  -0.1617,   2.8405,  -3.8887],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1313, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8788, -3.8594, -2.5760, -3.5129, -3.3622, -2.2612, -4.5869, -5.3238,
        -4.6959, -0.8901, -3.0004, -2.3266, -7.1221, -5.6074, -3.9841, -6.5117,
        -2.3969, -0.8355,  2.5784, -3.3111], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4732, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8011,  0.5250, -2.4662, -0.4124, -3.5443,  2.3888,  2.0636, -3.7842,
         0.1579, -1.8741, -1.2570,  1.9399,  2.7700, -3.7675, -2.3444, -8.3161,
        -3.1862, -7.3456, -0.4392, -1.0009], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6347, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.1750,  -2.4461, -17.9142,  -8.3841,  -4.5459,  -5.0135,  -1.8051,
         -0.1653,   1.7181,  -2.3528,  -5.9976, -50.3154,  -6.5739,  -5.9709,
         -7.4673,  -3.6621,  -3.3783,  -4.4594,   4.0794,  -7.0581],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.8944, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.4828, -2.1854, -1.7978,  1.1807,  0.8031, -3.0145, -2.6467, -2.6202,
        -0.4094, -4.0205,  0.1340,  3.0718, -2.1785, -1.0698, -1.2786, -1.2518,
         1.3842,  3.6697, -1.7370, -2.1992], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7342, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1817, -0.0138,  0.6518, -0.8766, -1.1540, -0.6292, -5.1411,  1.0062,
        -1.8462, -4.1284, -2.9485, -8.3913, -5.3326, -4.9882, -4.8207, -1.3643,
        -7.5894, -1.7136,  2.2266, -4.7886], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7512, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.5449,   1.4790,   2.4794,  -2.9848,  -1.7060,  -9.8946,  -5.9541,
         -5.8928, -13.1220,  -4.6995,   0.2936, -10.6704,  -2.3730,  -3.3732,
         -0.3285,  -2.0676,  -3.7853,  -1.5045,   2.0442,  -3.4522],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5029, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8934, -4.9446,  2.8323, -0.4990, -5.3049, -1.6644, -2.0376, -1.4202,
        -5.8400,  1.2394,  1.2157, -4.4737,  1.0392, -3.2054, -0.4658, -3.6441,
         1.7998,  2.3440, -4.1738, -0.5573], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4327, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.4183, -1.5876, -0.3892, -0.6890,  4.5995, -1.9709,  0.6271, -3.3907,
         0.5806, -5.3582,  1.4037,  1.7323, -3.9759, -2.0160, -3.3445, -2.9191,
        -2.0826, -4.2849,  3.0728, -4.8489], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1712, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8838, -3.0974, -0.1740, -4.5131,  2.7369,  0.8987, -4.5819, -3.2671,
        -1.9186, -0.3103, -2.5577, -0.6444, -4.8478, -0.1123, -4.0434, -3.1558,
        -2.1028, -2.8823, -4.7135, -0.8934], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0532, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3372, -0.2612, -4.3655,  0.4422,  2.7946, -2.6100, -0.5135, -3.9180,
        -2.2543, -3.4583,  1.3976,  2.8428, -2.9277, -2.0065, -1.2307,  0.2167,
         1.1565,  4.8242, -2.2252, -0.8762], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7655, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.2737,  -3.4053,  -2.3136, -11.6822,  -4.2855,  -1.4694,  -2.3133,
         -2.0903,  -1.0165,  -2.8507,   0.2697,   0.4415,  -3.0657,   1.5134,
         -1.8469,  -1.5139,  -6.9246,   0.9465,   1.7621,  -3.2817],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2200, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2740, -2.2828, -2.1932,  2.0531,  3.0918, -2.9307, -0.0229, -0.7393,
        -1.9667,  1.7341,  2.0111, -2.4247,  0.7167, -3.3235, -2.0958, -4.5696,
         3.0772, -0.9620, -2.1284, -0.6892], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7459, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1320, -3.3695, -4.4997, -2.1542,  0.6872, -2.7210, -0.1074, -0.8621,
        -3.7437,  2.5834,  3.6028, -3.7438, -0.4218, -3.6872, -1.4726, -4.1682,
         2.1426,  0.7812, -1.9585,  0.2430], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3501, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-12.9876,  -4.4898,  -3.1887,  -5.3820,  -1.3743,  -5.0177,   3.8112,
         -8.0442,  -4.2568,  -6.5614,  -5.2649,   1.2431,   0.5392, -10.9089,
        -14.3343, -35.9306,  -7.7054,  -7.6206,  -5.2555,  -3.0316],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.7880, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4972, -4.2374,  1.2024, -0.6341, -2.6652, -2.9362, -9.0063, -5.9343,
        -4.4199, -6.5157, -3.2064, -3.2815,  1.9620,  3.4835, -2.7928,  0.8512,
        -1.9096, -2.9055, -4.2005,  0.5525], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4046, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6493,  0.2761, -3.9766, -0.1164, -1.6056, -3.8068,  0.5724, -3.5291,
        -3.9476,  0.9931,  3.1812, -3.4488, -1.1926, -1.8741, -2.4628, -6.6044,
         2.1039,  1.3553, -2.8624, -0.5837], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6089, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2002, -3.9062, -5.8310, -3.8845, -1.9706, -6.2762, -4.9422, -9.1191,
        -5.9957, -5.7365, -6.5961, -1.9446, -1.2840,  1.2246, -3.2938, -2.9289,
        -2.2459, -5.6056, -3.1283,  1.7504], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7457, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.9809,  -0.8835,  -2.3173,   2.1169,   3.4719,  -1.5684,  -0.1822,
         -2.3517,   0.3065, -12.9623,   0.8356,   1.7347,  -2.4438,   0.0468,
         -4.7329,  -0.7751,  -3.2719,   0.0962,   1.9430,  -5.9828],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.5451, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.5076,  -5.0604,  -3.2830,   1.6544,  -3.7126,  -0.1702,  -2.0927,
          0.1156,  -5.0125,   5.0855,  -8.8613,  -0.3582,  -1.7974,  -6.1030,
          1.4750,   2.0683,  -5.2112,  -0.7684, -23.3912,  -6.7726],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1852, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.1567, -2.8553, -1.6848, -4.3782, -4.3543, -1.9955,  0.0076, -1.8953,
         0.8676, -0.9054, -2.6216,  1.8003,  2.6048, -2.2128,  0.5928, -0.6924,
        -3.4897,  1.4899,  3.6644, -3.2110], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8556, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0013, -4.4084,  0.7481,  1.3237, -2.6017,  0.5432, -1.7661, -1.5858,
        -0.3017,  3.8481, -2.2535, -0.6636, -1.3139, -1.5812, -2.0425,  3.9187,
        -1.4856, -0.6552, -1.9250, -1.0762], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7640, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.9467e+00, -3.4324e+00,  9.4304e-01, -2.7946e+00,  1.1862e-01,
        -8.8304e+00,  5.1068e+00, -1.0989e+00, -6.7808e+00, -3.1773e+00,
        -2.0601e+01, -1.9479e+00, -5.3765e+00,  1.5427e-02,  1.0754e+00,
         2.1699e+00, -4.5498e+00, -5.8850e-02, -1.1436e+00,  1.2285e-01],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3146, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.5490, -5.1700,  1.2992,  1.2466, -3.7150,  1.1560, -0.5611, -1.6580,
        -5.0596,  4.4607, -1.6730, -2.7851, -1.7358, -2.1459, -4.0101,  2.0946,
         0.3408, -5.2960, -2.9123, -3.4839], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5078, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.6428, -31.8821,  -6.5473,  -0.8107,  -5.0452,  -1.2757,  -2.6163,
         -4.5865,   2.0695, -11.0104,  -2.8316,  -4.4967, -22.2873,  -5.8624,
         -4.0940,  -5.3253,  -4.0419,  -3.9416,   1.5279,   3.6274],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6036, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.5326, -1.8840,  0.8609, -1.6676, -0.8989,  0.9738,  4.1360, -2.9870,
        -0.1203, -2.1090,  0.2725, -8.8574,  5.9877, -3.0020, -3.4684, -4.2935,
        -2.3977, -3.2758, -3.9723,  0.4760], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1847, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.9166,  -1.8729,   1.8119, -13.3358,  -0.5034,  -0.6949,  -2.8178,
          0.1176,   2.0184,  -2.8926,  -4.1030, -10.6547,  -4.4891,  -4.6084,
         -1.6395,  -3.7943,   4.1056,  -3.5602,  -6.9931,  -2.2785],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8551, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4567, -4.5526, -0.4241, -5.5840, -1.5134, -1.4697, -6.3897,  1.7911,
         2.1709, -2.2236,  1.0574, -2.1444, -0.5445, -6.7910,  1.5113,  2.1578,
        -2.5822, -2.3609, -1.3834, -2.0383], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7885, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4381e+00, -1.4525e+01, -4.2029e+00, -2.2709e-04, -7.6487e-01,
        -4.0569e+00, -3.3407e+00, -1.1214e+00, -2.7324e+00, -2.0880e+00,
         1.4934e+00,  3.5711e+00, -3.6012e+00,  4.8178e-01, -1.4146e+00,
         5.3106e-02, -5.5673e+00,  2.9374e-01, -2.0384e+00, -4.1626e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5081, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.5117,  -4.2403, -10.1974,  -1.7667,  -4.4716,  -6.2416,   2.5646,
         -1.9819,   0.7878,  -3.7247,  -4.1945,   1.0203,   3.9788,  -2.3597,
          0.6023,  -4.3567,  -1.1052,  -4.7104,   1.7766,  -3.9577],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.4045, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.3043,  -3.8474,  -3.6440,  -4.5952,  -4.1499,  -3.6827,  -2.7369,
         -1.0613,   0.9220,  -5.5729,  -1.7484,  -1.6161,  -3.1772,  -3.7521,
          2.6370,  -3.9137,  -3.4844, -10.5631, -20.7560,  -2.2233],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7331, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.5035,  -5.7095,  -3.6595,  -2.6057,  -5.4471,  -5.7419,  -4.2854,
         -6.4571,  -9.0424,  -3.6823,  -6.1767,  -3.9603,  -5.8381,  -6.6430,
         -3.7211,  -6.0981,  -3.6456,  -1.8670,  -2.6894, -10.5398],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1657, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8179, -1.0815, -1.1100, -1.8998,  0.9875,  1.7080, -3.5760, -1.5195,
        -4.5103, -2.5354, -3.1667, -4.2538, -6.4159, -5.7065, -2.1493,  0.4737,
         0.4232,  4.7578, -2.8397, -0.3626], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7297, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  3.3902,  -1.4693,  -3.5201,  -5.9731,  -2.8609,  -4.8041,  -4.8030,
         -4.9180,  -8.6951,  -1.0687,  -0.1227,  -9.7547,   0.7279,   0.6506,
         -4.5435,  -2.3227, -27.5275,  -6.2202,  -4.9512,  -5.5581],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7172, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9894, -3.5173, -1.6061, -4.5003, -6.0347, -0.2714,  2.3258, -2.5659,
         0.2554, -2.9775, -0.4610, -3.1454,  0.8290,  2.7196, -5.7863, -1.6317,
        -3.0253, -0.3660, -5.5580,  3.1285], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9089, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.7681, -6.0863,  1.3477,  1.2877, -2.3779, -1.7807, -0.4574, -0.6133,
         0.5743,  4.8637, -1.3158,  0.2293, -3.7921, -1.9007, -5.3893, -1.4082,
         0.0729, -5.0723, -2.0588, -2.8290], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3737, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.5396, -6.0309, -3.8377, -4.5746, -2.4059,  0.4810,  4.0862, -6.7341,
        -0.0937, -1.7200, -3.8126,  1.2654,  2.8874, -4.1097,  1.0685, -2.2127,
        -0.4821, -1.4519,  1.6181, -2.3680], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8983, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7210, -3.0381, -3.8435, -4.7745, -1.1455,  2.3679, -2.5025, -2.6558,
        -4.1392, -1.2598,  0.5421,  3.9518, -2.7017,  0.5068, -1.9455, -1.6689,
        -0.0081,  1.4460, -1.4445, -0.4743], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5254, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8466e+00,  6.4237e-03,  2.4722e+00,  4.5700e+00, -2.5942e+00,
        -1.5895e+00, -1.2673e+01, -4.1107e+00, -6.3929e+00,  1.3567e-02,
        -1.4325e+00,  5.0040e+00, -1.4736e+01, -1.2135e+00, -2.6318e+00,
        -7.2044e+00, -4.5662e+00, -1.2300e+00, -7.1962e+00, -2.4248e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9888, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1430, -2.0298, -2.4925,  4.7051, -1.8200, -0.2542, -2.0357, -5.9346,
         2.1877, -1.7107, -4.0040,  0.3550, -3.6547, -1.4569, -0.8400,  2.2518,
        -1.5973, -0.5472, -2.7100, -4.8809], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4806, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1993,  -3.2846, -14.0483,  -5.8252,  -4.6771,  -5.4191,  -0.5941,
         -2.4586,   1.1330,  -1.9393,  -6.6984,  -0.9323,  -2.6391,  -3.2424,
         -2.3522,   3.7242,  -2.0118,   0.6709,  -2.6446,   0.2025],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7618, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.4673, -3.8279, -0.1998, -1.5314, -1.4588,  1.3580,  4.2481, -4.3013,
        -0.2539, -1.5927, -0.6472, -3.8460,  2.7461,  0.8173, -4.2227,  0.7567,
        -0.8906,  0.1467, -2.6198,  4.8069], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.4522, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0448, -1.3248, -3.1704, -2.7708,  3.1800, -2.4838, -0.6178, -2.6527,
        -2.1123, -4.0636, -9.1258, -2.2958, -0.5913, -3.3320, -0.3017, -5.3723,
         2.6844,  2.1553, -3.0665,  1.0135], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8647, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.5557, -10.4618,  -5.2974,   0.0900,  -5.0989,  -4.6340,  -9.2406,
         -9.7083,  -4.1836,  -5.3331,  -3.0838,  -2.9853,   2.6150, -10.6892,
         -4.5917,  -4.1878,  -3.1603,  -2.7251,  -3.4335,   2.4344],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4615, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.9547,  -1.8861,   1.9303,   2.6165,  -3.1295,  -0.1835, -10.9783,
         -4.8674,  -7.9886,  -6.4440,  -0.3787,  -1.7955,   2.1198,  -9.1213,
         -1.5377,  -2.2386,  -2.5325,   0.1228,   3.6046, -16.1394],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9891, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.2758, -10.6752,  -4.3573,  -4.6678,  -7.5495,  -3.9378,  -6.7371,
         -6.2940,  -6.6039,  -3.6070,  -6.7928,   0.5193,  -4.3547,  -4.4795,
         -5.4796,  -5.5314,  -5.8462,  -6.5482,  -6.7164,  -3.4471],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4191, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.9668, -16.7523,  -8.2903,  -6.3354,  -7.1533,  -7.9494,  -7.0718,
         -8.2872,  -7.6629,  -7.0482,  -9.2840,  -6.5873,  -8.3297,  -7.5710,
         -7.4157,  -7.6183,  -7.7974,  -6.9343,  -7.0987,  -7.0390],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.9597, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7151,  1.0081, -1.8540, -1.0224,  1.4630,  3.2427, -0.9076, -2.0132,
        -3.0702,  0.2096, -5.8177,  1.8421,  0.7131, -2.0621,  0.5248, -0.7513,
         0.5924, -2.0760,  5.4596, -1.8409], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.4537, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4320,  1.8332, -0.7033, -4.6986,  1.2113, -2.5839, -1.4219,  1.7008,
         4.1684, -1.4551, -0.8858, -1.8843, -3.5846, -4.6698,  1.6658,  2.3540,
        -2.8974,  0.6882, -2.1889, -0.9880], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8886, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7503, -5.7813, -3.3486, -4.1939, -2.9535,  0.2496,  3.7277, -4.1257,
        -0.3672, -2.4574, -1.0063, -2.7958,  2.8732, -5.4312,  0.4607, -2.8852,
         0.3692, -3.8000,  1.0755,  3.0718], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7534, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3508, -2.2052,  1.1360,  3.5843, -2.5112, -0.2778, -3.3058, -1.8202,
        -3.1066,  1.3165, -1.9534, -2.5568, -2.0284, -4.2928,  1.9242,  4.0427,
        -3.1607, -1.6766, -3.1729, -2.9529], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2684, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.0091,  2.5434, -1.8685,  1.7205, -2.0202, -2.7034,  1.7617,  3.4053,
        -3.6643,  0.9040, -2.6260, -0.2277, -6.5376,  2.2930,  0.8979, -2.2022,
        -1.3231, -1.5762, -0.3941, -0.6957], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.5152, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.0104, -2.3932, -2.0767, -8.5859, -4.7382, -6.3897, -4.8070, -4.2409,
        -0.6268, -2.4936, -1.5381, -6.0319, -1.6668, -3.0993, -7.0564,  0.1499,
         2.9129, -4.2256, -1.5916, -8.8852], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2187, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.3488, -2.9479, -0.1414, -4.1174,  0.9995,  2.5801, -2.5081,  0.6737,
        -2.3516, -0.5818, -7.4438,  5.4625, -3.3064, -0.1902, -3.3112, -2.4669,
         0.1887,  3.1623, -4.1513,  0.1782], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0311, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-18.5234,  -2.0900,  -0.3066,  -1.9326,  -2.0268,   1.4146,   3.8962,
         -1.9708,  -0.0681,  -1.9477,  -0.1399,  -4.6998,   2.8790,   2.0396,
         -2.0534,   0.4241,  -1.1474,  -3.9306,   0.3607,   2.4935],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.3665, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.5070e+00,  3.2485e+00, -4.3879e+00,  4.6074e-03, -3.5848e+00,
        -1.5542e+00, -1.2106e+00,  1.5890e+00, -4.4174e+00, -3.8217e+00,
        -2.7026e+00, -5.5940e+00,  2.3402e+00,  1.0343e+00, -3.1091e+00,
        -1.8228e+00, -3.4951e+00, -5.0276e+00, -3.0978e+00,  2.0380e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.6032, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-57.6817,  -3.7111,  -1.3618,  -1.8067,  -2.0566,  -8.7292,   3.8994,
         -5.6310,   0.8512,  -2.4507,   1.1329,  -4.8282,   2.4122,  -1.4754,
         -3.8537,  -0.3379,  -3.1140,   0.4310,  -3.6367,   2.1924],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4878, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.1155, -0.2890, -3.2549, -1.5151, -0.9791, -5.2738,  2.3065, -7.7706,
        -4.8527,  1.1529, -4.1180, -0.2387, -3.7358,  1.3616,  2.7705, -4.1048,
         0.9098, -2.9852, -1.2031, -3.5256], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6615, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2074, -1.5973, -0.5738, -1.8985,  0.2014,  2.3433, -1.9290, -2.2209,
        -1.6236, -1.5120, -5.5186, -0.5207,  0.4342, -6.1415, -1.0569, -0.9153,
        -1.4241, -3.3619,  3.2851, -5.5576], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6398, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.5801, -2.0716, -1.7981,  0.8587,  3.3711, -1.9379,  0.3911, -3.5332,
        -1.2243, -5.9357,  3.1614,  0.3521, -2.2271,  0.6539, -1.3603, -3.2319,
         2.5177,  2.8966, -2.8137,  1.4052], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.4973, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8544, -6.5337, -4.3880, -2.4635, -6.6020,  4.3614, -4.0361, -3.2679,
        -3.9792, -0.7443, -4.1855,  0.4575,  0.3383, -2.2766,  0.6451, -3.2553,
        -2.2491,  1.8302,  3.0443, -1.2160], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0187, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9897, -1.5616, -2.5427, -3.3146, -3.9794,  3.0084, -1.4972, -1.3115,
        -2.4323, -2.2290,  1.3646,  3.7534, -3.5957,  0.3469, -4.7988, -0.7812,
         0.5694,  4.6232, -4.0392, -1.7944], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2101, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9609e+00, -5.5907e+00, -6.0898e+00, -2.7144e+00,  4.6852e-01,
         3.5595e+00, -6.5819e+00, -4.4821e-02, -2.3314e+00, -3.7322e+00,
         4.9447e-03, -9.8350e-01, -3.1504e+00,  3.1736e-01, -2.1346e+01,
        -3.8697e+00, -8.1934e+00, -1.4441e+00, -4.9341e+00,  1.9079e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5354, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.7651,  -1.7997,   3.7280,  -8.1612,   0.6482,  -3.2112,  -3.3668,
         -0.1461,   3.8225,  -6.6685,   0.5828,  -2.4562,  -4.0294,  -4.8730,
         -1.6854,   1.0107,  -2.7820,  -3.0098,  -5.9381, -18.0360],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9068, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.6884, -2.1198, -4.1345, -2.3906, -0.6714, -6.2700,  1.8016,  2.5094,
        -4.3733, -0.7965, -2.0442, -0.3423, -1.8691,  3.3776, -4.3864, -0.8317,
        -4.3052, -2.6498, -3.2928,  1.6483], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5226, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0424, -6.1743,  0.1684, -1.5523, -4.1750,  1.6706,  3.0976, -1.8236,
         0.9499, -2.2424, -0.3312, -2.9459,  4.7922, -7.3069, -1.4875, -2.4764,
        -7.7908, -1.0950, -1.8564, -4.1677], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8395, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.0931e-01, -1.9045e+00, -1.4837e+00,  1.7234e+00,  4.4720e+00,
        -2.0486e+00, -6.3022e+00, -4.1800e+00, -3.0745e+00,  5.6809e-01,
         3.0340e+00, -7.2997e+00,  3.8676e-01, -1.1286e+00, -4.9801e+00,
         1.3731e-02,  2.8664e+00, -1.9215e+00,  1.3777e+00, -2.5599e+01],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2685, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4981, -5.2596,  0.7816,  2.5126, -3.1281, -1.0557, -2.7533, -9.8950,
        -1.4067,  0.9933, -4.2108, -1.8840, -1.6141, -1.9840,  2.3986,  3.6230,
        -2.3794,  1.0436, -1.2855,  0.2107], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3895, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.0372, -0.7021, -3.4064, -1.4513, -2.9130,  2.8687, -3.0055, -1.4720,
        -2.7814, -1.0940, -6.5039,  2.3379,  0.4134, -3.6569,  0.5433, -2.4360,
        -1.3627, -0.2242,  4.0820, -2.4519], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5627, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.5627,  -1.8750,  -3.1651,  -0.2658, -19.5095,  -0.0595,   1.0829,
         -4.6443,  -0.5574,  -4.3146,  -4.2615,   0.4154,   2.2819,  -5.0016,
         -2.9310,  -1.1059,  -2.7262,  -5.6384,  -1.0474,  -0.8921],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7889, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8043,  0.9532, -1.7034, -1.8829,  1.1253,  2.5578, -1.4959,  0.4723,
        -1.4204, -0.4149, -1.5122,  3.7022, -1.8429,  0.1297, -2.3223, -1.1506,
        -1.2331,  5.1211, -2.6163, -2.1820], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.3760, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.2497, -1.6301, -2.0152,  0.8048,  4.5734, -2.4614,  0.7128, -2.7435,
        -1.7848, -5.2594,  0.3680,  1.6968, -2.2602, -0.8028, -1.2631, -3.2763,
         1.9347,  2.6271, -4.1267,  1.2090], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6973, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2585,  0.7307,  1.6336, -3.2298, -0.7355, -2.8233, -1.2281, -4.4963,
         1.3801,  2.0753, -4.9934, -5.5432, -2.2011, -3.9550,  2.6441,  2.5958,
        -2.1998,  0.3260, -1.1239, -1.0027], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3703, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.9385, -2.6586, -0.9954, -4.2779,  0.6456,  1.8999, -2.3906, -0.8404,
        -2.5735, -0.8461, -9.1859,  1.5491, -1.2617, -2.9012, -1.4015, -2.2516,
        -2.2200, -5.4377,  2.5420,  2.2811], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4693, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5110, -0.5502,  2.5799, -2.1195, -6.1181, -1.8999, -0.9704, -4.2310,
         2.2915,  1.8003, -2.1500,  0.7214, -3.4921, -1.3779,  2.7064,  4.2083,
        -2.4607,  0.9058, -2.1500, -0.6003], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8209, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.7289, -7.4102, -0.6233, -3.5069, -0.6539, -4.5610,  0.9585, -2.1172,
        -4.1769, -0.1153, -8.8878, -5.0200, -3.9093, -4.6289, -0.2058,  0.4827,
        -2.1496, -6.6177, -2.4607, -2.3950], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7135, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.2699,  -2.8989,  -1.0604,  -1.3989,  -1.1768,   1.3581,  -1.4769,
         -4.2813,   0.1235,  -8.2130,  -4.3621, -11.2080,  -1.5477,  -2.7944,
          4.0104,  -5.8388,  -3.3768, -10.5282,  -5.1333,  -1.9366],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9735, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0571,   0.7658,  -2.4900,   0.1280,   0.8079,   1.6444,  -3.5235,
          0.7291,  -3.3219,  -0.9731,  -4.0205,   2.0290,  -2.9053,  -1.6243,
        -14.2265,  -3.0389,  -5.9051,  -1.4065,  -4.5398,   2.7995],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1564, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8524, -3.9940,  0.9833,  2.4550, -2.5900,  2.1832, -1.9052, -3.3164,
         1.3506,  4.2395, -4.5260, -0.7862, -2.4400, -4.8923,  2.5122,  3.0859,
        -6.5863, -1.5172, -0.7155, -2.4103], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0361, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.3287,  -6.1501,  -2.2893,  -2.3262,  -0.0129,  -9.7119,  -1.0436,
         -4.8345,  -4.8949,   0.1134,   1.3157,  -4.9844,  -3.4302,  -4.8058,
         -2.6487,  -8.0539,   2.7116,  -2.5379,  -0.3128,  -3.5158],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3871, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1501e+00,  2.7060e-01, -3.1869e+00, -3.6608e+00, -1.8321e+01,
        -2.6228e+00, -6.5323e-01, -9.5070e-01, -4.6454e-01, -5.4661e-01,
        -3.2236e+00,  1.3551e+00,  1.6115e+00, -3.6099e+00,  1.6868e-01,
        -4.8602e+00,  8.8562e-03, -5.7526e+00,  4.3921e+00, -9.7057e-01],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1583, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.8629,  1.4132, -3.9916, -0.7743, -2.7476, -2.7277,  2.4538,  4.4945,
        -2.6053, -0.4035, -3.7748, -2.8471, -3.5152,  0.5242,  1.9687, -5.5896,
        -1.5775, -2.9223, -3.8880,  1.1987], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1224, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6415, -0.6449, -0.4108, -6.1003,  2.3710,  0.5955, -1.8675, -0.5851,
        -1.0626, -2.9319, -4.1326,  1.7888,  2.7905, -3.0978, -0.9213, -1.1164,
        -0.5202,  1.8220,  4.5720, -2.4690], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6781, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.7298,  3.8287, -2.9247, -0.9243, -1.3311, -1.1654, -5.7551,  4.8430,
        -3.9556,  0.5108, -2.3909,  0.2206, -3.2985,  2.5217,  2.6604, -3.2125,
         0.7452, -3.3461, -1.8736, -4.6570], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8887, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.1382,   1.2618,   3.9584,  -2.7137,  -2.2266,  -8.7329,  -1.9127,
        -24.7800,  -2.9741,  -0.8124,   2.8104,  -5.0769,  -0.2352,  -4.3864,
         -0.4048,  -7.0929,   2.4219,   1.7681,  -3.4085,  -0.1712],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6923, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1531, -3.4973, -6.9445, -3.3911, -6.5042, -1.2692, -0.3154,  5.5593,
        -6.1155, -2.7942, -4.4608, -2.1893, -1.1432, -4.1627, -3.4463, -2.8661,
        -0.4881, -0.8579,  0.9921,  5.4225], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2313, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.0212,  -0.5424,  -4.6781,   2.5743,   2.6375,  -5.0478,  -0.9530,
         -2.2464,  -2.3769,   0.1267,   3.1066,  -4.0697,  -2.4772, -11.5129,
         -2.9287,  -7.3531,  -0.5862,  -0.4001,   2.7811,  -4.4301],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.0199, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3345, -4.2818, -0.7511,  2.2608, -3.7250, -3.3554, -3.4466, -2.6194,
        -2.6510, -1.0129,  1.8002, -1.5353, -4.2142, -5.2994, -3.7927, -6.6585,
         0.7375, -5.2200, -1.9647, -1.9226], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4993, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.3919,  2.5342, -2.4439, -1.5578, -3.1652,  0.4105, -8.9125,  1.5218,
        -0.4594, -4.2958,  0.0555, -3.2748, -2.8430,  1.2543,  3.7076, -0.9987,
         1.3949, -1.3790,  0.4706, -5.1895], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1781, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.6095,  0.2635,  0.4316, -2.7485, -0.6859, -2.7357, -1.1459, -4.2289,
         2.0668,  3.2218, -2.5403,  0.9388, -3.5998, -1.9177, -4.5662,  0.9583,
         2.6370, -2.3926, -0.2025, -1.6088], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8623, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.7533,   0.4260,   2.8006,  -1.7655,  -0.1003,  -1.2060,  -2.8578,
          0.7748,   2.1776,  -5.5100,  -2.0482, -13.9393,  -5.2523,  -6.4566,
         -0.5322,  -3.2328,   5.6827, -13.2991,  -1.1513,  -1.6283],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5436, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7584, -4.6594, -2.7741,  1.4888, -5.5692, -3.0461, -3.5172, -7.7293,
        -4.4328, -4.8898, -3.9876, -3.1924, -9.5326, -8.0970, -4.4084, -4.2923,
        -2.2411, -2.9245,  3.0822, -4.2316], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9356, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  3.3159, -15.0478,  -2.5518,  -2.1937,  -2.8002,  -3.5196,   1.6794,
         -1.7288,  -2.0544,  -4.1705,  -1.5610,  -3.5131,  -2.9658,  -4.4906,
         -2.7064,  -5.9716,  -2.0595,  -5.0972,   1.5692,   2.9342],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6467, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6122, -7.4062, -7.1506, -4.0589, -5.3731, -1.7450, -3.2792,  3.9591,
        -3.7376, -2.2218, -4.2408, -4.9351, -2.4335,  0.6150, -5.1992, -1.4501,
        -6.8613, -6.2359, -4.9328, -4.0089], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7654, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1417,  -0.2077,  -2.9437,   5.1418,  -6.8080,  -0.8732,  -2.5981,
        -13.5653,  -5.6547,  -7.5372,  -4.3439, -17.1502, -18.8273,  -2.8694,
         -5.0395,  -4.6269,  -5.1673,  -2.9838,  -0.9570,   4.1886],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7482, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2239, -0.2783,  4.5433, -3.7793,  0.6394, -2.2945, -0.0983, -2.4249,
        -0.1901,  3.8144, -1.2896,  0.5045, -2.7680, -0.2464, -5.7792, -0.0788,
         1.6283, -1.0754,  0.7298, -0.8289], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.5748, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.6387e+00, -3.2266e+00,  1.0345e+00, -2.8026e+00,  1.1763e-01,
        -8.9764e+00,  5.1747e+00, -1.2888e+00, -6.7679e+00, -3.2609e+00,
        -2.1106e+01, -1.9976e+00, -5.2871e+00,  5.3531e-02,  1.1922e+00,
         3.8203e+00, -4.3219e+00, -1.0022e-02, -1.1073e+00,  1.2491e-01],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2498, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.4657, -5.0398,  1.3420,  1.2348, -3.6162,  1.2191, -0.5411, -1.5735,
        -5.1767,  5.0058, -1.7558, -2.6655, -1.6814, -2.1025, -3.9503,  2.4384,
         0.0589, -5.4125, -2.9173, -3.5288], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4564, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.3852, -10.0755,  -1.9280,  -2.1730,  -0.7192,  -2.6906,   5.2088,
         -3.5892,  -5.6675,  -4.0294,  -2.8744,  -3.9271,  -5.2494,   2.0491,
         -6.6340,  -0.3359,  -0.3955,  -4.2612,   1.5722,   1.4613],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1437, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7454, -0.3758, -1.1984,  2.2908, -3.6362, -0.3102, -1.0885,  0.4443,
        -5.8007,  4.5886, -3.8128, -0.0633, -4.4228, -1.4189, -3.3762, -2.1051,
         2.4099, -1.7130, -1.3202, -0.9405], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1797, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.9368, -11.8642,  -2.9652,  -6.0439,  -1.6729,  -6.1976,  -0.7375,
         -4.6484,  -6.9804,  -4.2344, -12.7828,  -4.1896,  -5.6093,  -0.6492,
          0.1007, -17.2689,  -4.1907,  -1.5467,  -3.4982,  -1.9356],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9426, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.6686,   0.7045,  -4.6719, -10.9909,  -1.8013,   1.7265,  -2.4556,
         -0.4243,  -1.0513,  -5.5817,   1.6318,   1.2398,  -2.5882,  -0.8910,
         -9.5855,  -5.0309,  -4.9219,  -4.2816,  -1.8849,  -1.0243],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7276, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5764, -2.9526, -0.6739, -4.9162, -2.7233,  2.2391, -5.5480, -1.8263,
        -4.8293, -4.1267, -4.8261,  2.6880, -2.1070, -0.6699, -2.2834, -1.1834,
        -6.0745,  1.8382, -0.5109, -3.7156], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4389, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6807, -4.2626, -1.5079, -3.1981, -1.7803, -0.9363, -0.0401, -3.9073,
         2.0725,  1.9553, -4.7564, -0.8238, -3.1196, -1.1169, -0.2719,  4.0849,
        -1.2041, -0.1285, -1.2346, -1.5557], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4206, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.4758, -4.7228, -6.6782, -5.1424, -6.5168, -2.4315, -6.1948,  4.0549,
        -6.1080, -2.5243, -2.7319, -1.0332, -5.8264, -0.9229, -1.4344, -3.8587,
        -2.7355, -1.8541, -0.9858,  2.5550], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1784, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.6814, -2.5475, -8.5871,  0.8774,  1.4667, -2.1162, -0.2938, -9.6014,
        -8.4758, -4.7228, -6.6782, -5.1424, -6.5168, -2.4315, -6.1948,  4.0549,
        -6.1080, -2.5243, -2.7319, -1.0332], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4994, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2619, -2.8865, -1.0790, -1.5877,  1.8297, -1.7557, -1.5744, -3.5757,
        -1.1562, -3.6071,  0.1654,  3.1373, -2.9888, -0.3751, -1.9144, -1.6119,
        -2.0800,  3.3652, -2.9936, -1.6486], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2800, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2370, -1.0362, -0.8853,  2.4878, -6.8592, -1.0543, -2.5941, -2.8169,
         2.1927,  3.3078, -3.0396, -0.8966, -2.4716, -1.8487, -4.2266,  2.2059,
         2.1894, -3.8398, -2.8152, -6.0885], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6663, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 4.2739, -2.7595,  1.1814, -1.9833, -1.6057, -0.2906,  2.6174, -1.2309,
         0.8084, -1.3672, -1.7790, -5.0752,  0.7709,  2.5145, -2.4157,  1.7997,
        -2.2563, -4.0190,  1.6925,  2.1436], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.3490, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.5418, -3.1774, -0.7369, -2.6687,  0.3069,  0.0177, -8.7956, -1.4018,
        -1.8356, -4.0792,  0.8093,  2.7879, -3.1538,  0.0715, -3.4807, -0.1798,
        -2.6808,  1.6435,  3.2329, -4.6557], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4259, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1168, -0.3753, -3.7972, -0.5038, -4.9122,  1.3721, -0.4761, -3.9501,
         0.4901, -3.2993, -3.9350,  0.3261,  0.8262, -2.7209,  0.6855, -3.5239,
        -2.2697,  0.6454,  2.9903, -2.6209], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3583, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.7483,   0.4601,  -4.1622,  -0.3389,  -3.5317,   1.1668,   1.0015,
         -2.7009,   0.5200,  -1.7442,  -0.0281,   0.9233,   4.7670,  -2.6726,
         -0.2995, -12.0610,  -2.4825,  -4.1515,  -1.3470,   2.0922],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.3169, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9619, -4.5297,  0.7189,  1.2818, -1.3790, -0.0846, -1.8146, -1.3266,
        -5.8770,  0.5626,  1.1811, -1.7122, -0.0338, -0.9144, -1.1634,  1.6494,
         3.6981, -3.3808, -1.0976, -3.3751], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0779, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1104, -1.1310, -2.0532, -1.2437,  3.8576, -4.3910, -0.5909, -1.9204,
        -3.2233, -4.1639,  0.7315,  2.2186, -3.2069, -1.1418, -2.1961, -6.4044,
        -0.1143,  2.2474, -2.9321,  0.3634], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3202, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6335, -1.6275, -4.3239,  2.2104,  1.9744, -2.8712, -1.5172, -3.0853,
        -1.8568, -4.5777, -1.2111,  2.6772, -2.5471, -0.0286, -1.1430, -2.3070,
         2.4668,  0.6995, -3.6807, -1.5072], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1945, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.8097,   0.4745,  -5.0198,  -1.6579, -11.1208,  -6.0350,  -4.9434,
         -4.8334,  -1.2718,  -0.3222,   1.8478,  -3.8075,  -0.2929,  -4.1662,
         -1.7539,  -0.0903,   2.7562,  -2.6239,   0.4264,  -2.9726],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2299, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5931, -6.8484, -5.0023, -5.0874, -4.5634, -0.9810,  0.6024, -5.7771,
        -4.4478,  1.1519, -2.2950, -1.1716, -1.9430,  3.0449, -3.5000, -0.3234,
        -4.6816, -0.4988, -3.5974,  1.1609], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4176, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.9368, -11.8642,  -2.9652,  -6.0439,  -1.6729,  -6.1976,  -0.7375,
         -4.6484,  -6.9804,  -4.2344, -12.7828,  -4.1896,  -5.6093,  -0.6492,
          0.1007, -17.2689,  -4.1907,  -1.5467,  -3.4982,  -1.9356],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9426, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3466,  -0.0701,  -0.4753,  -1.4401,   1.6562,   4.4909,  -2.0779,
          0.3017,  -0.2957,   0.4728,  -4.6950,   5.1194,  -8.4985,  -2.2935,
        -33.3760,  -4.3180,  -9.8422,  -0.3603,  -4.3727,   3.1946],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9613, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.3202,  1.7074,  2.7678, -2.2063,  1.4410, -2.0800, -0.1371, -2.6680,
         1.9530, -5.7508, -5.7045, -1.4983, -5.1366, -1.7973, -7.1008,  2.9941,
        -2.4456, -0.8036, -2.7853, -3.8249], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6698, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7654,  0.3381,  0.9389, -1.7187,  1.1892, -2.7469, -0.2387, -6.9263,
         2.2421,  1.6690, -1.8321,  0.4985, -1.9232, -2.0029, -6.9516,  2.1913,
         2.1446, -2.5552, -0.8992, -2.6101], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0979, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.4403,  -3.1605,   0.3666,   3.0350,  -2.7798,   1.5298,  -3.1917,
         -0.0759,  -2.8160,   3.9156,  -3.1412,  -3.2066, -12.0472,  -2.8170,
         -4.1401,  -2.3029,   0.8166,   5.2549,  -4.8000,  -1.0198],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.5510, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.3625, -10.2017,   3.9086,  -0.8567,  -3.2975, -14.4238,  -8.3533,
         -2.6129,  -5.5639,  -1.3650,   0.7963,   4.6243,  -3.3757,  -0.7911,
         -1.6113,  -2.2494,   1.6687,   3.7207,  -1.7378,  -0.9846],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1534, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.9478,   0.8867,  -5.9860,   0.8252,   1.0905,  -3.3389,  -0.5025,
         -1.9859,  -0.4256,  -5.9433,   5.3326,  -6.3945,  -5.1216,  -6.8069,
        -29.4566,  -2.0281,  -5.4949,  -2.4433,  -2.5512,   4.2927],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5000, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.0731, -2.7778, -4.7060,  2.1807,  2.8844, -4.7229, -2.5948, -3.7618,
        -1.1066, -6.3909, -0.6938,  0.0262, -2.5502,  1.0121, -2.2141,  0.4212,
        -4.4600,  2.3095, -2.2666, -2.5045], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5421, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4907, -0.4893,  1.6687, -3.3314,  0.0967, -1.4207, -1.5672,  2.0018,
         4.3304, -2.7337,  0.5135, -9.1259, -5.8795, -3.5378, -4.5521, -0.6698,
         0.3461,  0.5579, -5.7027, -4.8802], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9933, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.8318,  -1.4659, -16.9182,  -9.8905,  -4.3872,  -4.1261,  -3.6697,
         -1.3384,  -0.8493,   0.2472,   5.5294,  -3.6608,  -2.0718,  -3.7451,
         -2.7985,  -3.9371,  -1.2299,   1.9065,  -6.0171,  -5.1276],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3691, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3281,  -6.0743,  -5.3567,  -4.8491,  -6.1128,  -5.4593,  -6.1971,
         -6.0743,  -3.1268,  -3.8901,  -3.5309,  -3.7521,  -5.8234, -10.4221,
         -4.2481,  -7.9471,  -5.6188,  -6.3058,  -3.9279,  -4.2203],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2633, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.0379,   1.4837,   2.4648,  -1.8920,  -0.2670, -11.0436,  -3.4417,
         -6.1797,  -0.3173,   0.8301,   1.6463,  -5.2122,  -1.7755,  -1.4371,
         -5.4178,   1.5371,   3.7364,  -2.1816,  -3.4425,  -3.0961],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.7522, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.1103,   0.8239,  -1.3752,  -3.0648,   1.9700,   2.4285,  -6.7006,
         -3.9596,  -8.2032,  -6.0587,  -2.9330,  -5.6098,  -1.3656,  -3.4305,
          0.5505, -15.1924,  -2.1900,  -3.8097, -12.1348,  -1.0433],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8704, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4747, -0.6270,  1.6680, -2.1185,  0.1327, -1.0110, -1.0890, -5.4874,
        -2.4384,  0.1234, -2.8801, -0.3575, -4.7036, -4.3118, -0.6566,  0.7736,
        -4.6542, -1.3767, -3.0565, -3.5577], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1051, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6023, -5.8721,  3.2243, -0.9815,  0.1626, -0.8534, -5.7826,  0.9490,
         3.0213, -2.8123, -0.3502, -9.9344, -2.7479, -6.5817, -0.3228,  0.0492,
         3.7545, -7.6449, -2.0852, -2.6247], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0017, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7917,  2.4812,  3.5125, -3.1429, -1.3660, -1.3700, -1.8027, -3.1353,
         1.1649,  2.7273, -4.8082,  0.1778, -0.9508, -1.0458,  0.3716,  3.1922,
        -2.6417, -1.4221, -4.4208, -2.5329], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9902, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3327e+00, -5.0758e+00,  4.1063e+00, -2.6249e+00,  9.5720e-01,
        -2.4904e+00, -2.8020e+00,  1.8117e+00,  3.1952e+00, -2.8734e+00,
        -1.9185e-01, -4.2050e+00, -1.5584e+00, -6.0666e+00,  3.3491e-03,
        -1.4711e-01, -2.2164e+00,  1.9794e-01, -4.8369e-01, -1.3211e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.2059, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.0687,  -9.5058,  -7.8515, -10.8511,  -5.1519,  -0.2775,   0.5649,
         -7.7354,  -8.9703,  -1.9567,  -2.3362,  -2.6791,   0.2214,   3.1836,
         -1.8995,  -6.0268,  -4.6435,  -2.8109,  -5.0566,  -1.2452],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2548, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.8376,  -1.2527,   1.3615,  -1.9415,   0.2971,  -2.1629,  -5.9872,
          1.2246,   1.7027,  -3.0169,   0.0479,  -6.6409,  -9.6044,  -3.0371,
         -5.4133,  -0.5487, -17.3762,   4.6216,  -4.2836,  -3.1459],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8997, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3276, -4.4030, -0.5053,  1.4921, -1.8194, -0.6864, -3.3287, -3.1993,
        -2.9175,  2.2245,  2.5229, -3.3790, -0.9201, -0.9576, -3.1942,  0.2103,
         3.3257, -1.9525, -2.1727, -2.9720], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2980, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3940, -1.6438, -2.4133,  1.2209,  1.7878, -2.3316, -0.8772, -3.7551,
        -0.7971, -4.2793,  0.5607,  0.8931, -3.5236,  1.8640, -3.6123, -0.1915,
        -3.6288,  0.7703, -4.4558, -2.7552], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5281, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9055, -4.5282,  1.8877,  3.3228, -1.6458, -1.6275, -1.2053, -1.7491,
         1.6400,  3.6361, -2.5942,  0.5462, -2.1249, -2.7159,  1.1003,  3.5874,
        -4.0060,  0.7012, -8.3204, -5.9330], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0467, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9757,  1.8033, -1.8622, -1.5713,  2.4049,  2.9114, -1.7285, -0.9714,
        -1.0188, -3.9259,  0.7840,  0.7582, -2.4577,  1.3870, -0.6050, -3.1494,
         2.9938,  3.6268, -3.8114,  0.7195], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.2844, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0921, -2.9412, -2.7303, -2.7481,  0.6051,  2.4552, -1.8725, -1.3444,
        -1.2562, -0.6163, -7.1186,  1.4109,  2.1987, -3.9544, -0.6181, -1.9463,
         0.3173, -4.3048,  2.3483,  2.7349], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0237, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.0800, -3.9063, -1.7645, -2.2950, -0.4714, -4.7016,  3.1047, -7.3771,
        -3.1845, -2.8029, -2.3209, -5.5368, -1.0161,  1.3602, -3.1045, -5.2583,
        -1.8637, -1.0079, -4.7077,  1.9320], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2501, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.9514,  -3.8654, -11.2531,  -1.3767,  -3.8714,   3.8464,  -5.3131,
         -3.8404,  -2.1628,  -1.7851,  -4.2256,   1.3908,  -2.2379,  -4.0577,
          0.5908,  -3.7479,  -0.2092,  -1.8508,   2.4230,  -2.2805],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.4389, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0177,  1.5505,  2.1786, -2.5073, -0.2478, -1.7559, -0.9706, -3.2678,
         4.5710, -2.8752,  1.1810, -5.3663, -3.8338, -2.9094,  1.2705, -1.2116,
        -0.5400, -0.4338, -2.1920,  1.7045], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9836, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4431,  2.7641, -0.1771, -2.9759,  1.1769, -2.7378, -1.0825, -4.8219,
         1.2229,  1.7198, -2.5466,  0.6776, -2.8391, -0.1396, -4.8238,  0.5839,
        -0.0220, -1.6891,  0.6641, -2.0449], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0767, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5722,  0.1970, -3.8183, -0.1366, -3.0954,  1.2958,  0.1529, -1.5470,
        -0.9576, -3.3367, -1.8568, -2.7815,  4.6605, -2.6757, -0.6885, -3.1613,
        -1.1172, -3.2261,  0.2121,  3.0236], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1715, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4567, -6.7230, -4.7895, -6.8769, -3.8841, -0.2770, -1.3518, -3.4779,
        -4.0900, -3.9302, -3.4195, -3.3590,  1.9514, -4.6775, -4.0186, -2.9492,
        -3.0069,  1.4842, -5.0629, -3.5819], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4749, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3217, -2.5146, -5.5791, -3.0012, -5.0707,  1.0870, -4.1251, -2.4292,
        -8.0889, -5.3721, -5.1997, -5.8342, -3.1726, -1.5385, -1.8268, -3.8836,
        -1.9023, -5.5950, -9.1947, -3.3681], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0965, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.7610,   3.7323,  -3.2025,  -0.0838, -34.5419,  -2.4538,  -8.2329,
         -0.2686,  -1.8752,   2.9209, -13.2437,  -2.3449,  -2.3743,  -7.9276,
         -4.7233,  -0.6895,  -8.4953,  -8.8304, -23.2769, -15.0499],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4100, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.0576,  -4.3028,  -3.0051,  -2.6470,  -3.1531,  -2.3001,  -7.9255,
         -0.7704, -10.5632,  -0.7566,   1.5843, -11.4993,  -1.3521,  -3.0711,
         -4.6147,  -0.2132,   1.0572,  -2.7323,  -1.2887,  -9.7656],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4688, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1730,  4.1455, -1.6993,  0.0888, -3.1078, -0.4607, -5.1151,  0.7676,
         1.4848, -6.5389, -1.1074, -2.8504,  0.2753, -4.3913,  2.8422, -0.4297,
        -7.6002, -1.4949, -4.9942, -1.3327], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6846, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.1345, -2.3673, -2.7486,  1.0875,  3.3185, -3.0223, -1.3910, -2.7258,
        -2.8905,  0.1724,  1.6376, -4.1030, -0.0777, -2.5704, -0.2214, -0.6775,
         1.9424, -2.0494,  0.6895, -2.4293], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9280, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 4.7696, -1.3560, -1.8294, -3.7763, -0.5842, -4.1125,  1.5904,  1.6002,
        -2.2139,  0.2384, -2.4073, -6.3380,  2.2910,  3.5594, -2.3774, -1.9773,
        -3.3629, -4.2394, -4.9973,  1.7684], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1877, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.8437,  -2.6921,  -4.0430,  -0.0152,   0.2153,  -1.7563,   0.2288,
         -1.7526,  -1.0705,   0.5555,   4.7895,  -1.7209,   0.9195,  -2.0677,
         -0.0801,  -2.1855,   2.4821, -12.0820,  -1.6914,  -4.7143],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.4762, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.5506,  -3.8099,  -0.6427,  -3.6860,  -0.1969,  -3.8819,   2.4985,
         -3.0672,  -9.7372,  -2.8577, -11.5390,  -1.8842,  -4.6969,  -0.6491,
          1.3334,   1.7609,  -4.2609,  -0.5345,  -2.2109,   0.5614],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.4026, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0454,  0.3925, -1.5714, -0.9085,  1.4228,  2.9532, -2.1450, -0.6398,
        -3.7690, -2.5678, -4.8784, -0.7661, -3.4278, -2.0695, -1.1001, -3.6334,
        -5.2252, -9.9937,  1.5236,  2.6264], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8411, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8241, -0.3834,  1.3359, -0.9549, -3.7434, -1.5990, -3.5758, -1.0406,
        -3.7504,  1.4744,  2.8390, -2.7474, -0.6421, -2.8886, -1.1458, -4.1512,
        -7.8450,  0.9782, -2.4276, -2.0078], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7050, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.8707,  -1.2713,  -0.5190, -20.8013,  -2.7919,  -0.6294,  -4.4770,
         -0.3735,  -1.3641,  -7.4130,   2.3780,   3.0205,  -6.4180,  -0.9090,
         -3.6305,  -5.3849,  -4.4832,  -0.4390,  -3.1533,  -0.3643],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9948, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3095,  1.3067,  0.4687, -2.6448,  0.5827, -2.3951, -0.4674, -4.5808,
         2.9055,  1.2944, -4.5113,  1.1023, -2.2603, -0.3679,  1.3454,  1.5302,
        -4.3225,  0.0845, -1.9918, -4.2125], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0722, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.7270, -3.4500, -0.1889, -3.0124, -0.5791, -4.7802,  1.2390,  3.4475,
        -3.2857, -2.5914, -3.3906, -0.7230, -4.1342,  2.8730,  1.1834, -2.8477,
         0.7673, -4.0433,  0.3870, -4.9588], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2681, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.0923, -0.9312, -0.2380,  1.2452,  0.0400, -2.7715, -2.4607, -3.4697,
        -0.3178, -3.2323,  2.0551, -7.0802, -6.2971, -0.5482, -2.9897, -2.1872,
         1.6399,  3.5746, -1.6235, -1.6409], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3570, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.5881, -10.1665,  -8.2424,  -6.9987,  -8.1893,  -7.1532,  -7.4009,
         -6.3921,  -7.3322,  -6.7522,  -6.8854,  -8.3036,  -7.5645,  -6.7856,
         -7.3438,  -8.6324,  -5.9302, -13.3427,  -6.8488,  -8.0665],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.7959, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.3150,  -3.7231,  -3.8591,  -4.9781, -16.7124, -72.7132,  -3.7389,
        -12.8233,  -5.7733,  -7.2635,  -4.1910,  -9.4795,  -1.8155,  -0.1873,
         -0.6014, -14.6050,  -3.8762,  -2.5197,  -2.2559,  -0.5608],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.8496, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.6898,  -9.5077,  -2.2500,  -5.5524,  -0.1566,  -9.8398,   0.6706,
         -2.9663, -10.4235,  -0.8271,  -4.8501,  -4.2951,  -4.3629,   1.3541,
         -0.6998,  -2.3808,  -0.7698,  -2.3193,  -2.4560,   2.0170],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0653, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1082, -4.2726,  1.3121,  3.4587, -3.0118, -2.3614, -9.2920, -6.3230,
        -3.5090, -3.6401, -0.2735, -1.3880,  3.2703, -3.8575, -1.3373, -1.5389,
         0.0882, -3.0156,  0.4447, -7.0038], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1679, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3459,  0.7354,  3.5362, -2.9391, -0.4595, -1.0602, -1.5879,  1.7096,
         4.3265, -3.1886, -3.7770, -5.0262, -1.8938, -3.0544,  2.6199,  1.0496,
        -5.0847, -0.0304, -1.2512, -4.8138], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0768, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.0309,  1.3512, -4.1797, -0.3976, -2.5301, -1.5149, -0.1209,  3.8101,
        -2.7831,  0.8962, -0.1897, -1.5000,  1.1692,  3.9516, -2.7788,  0.3489,
        -3.4046, -2.2568, -3.7750, -1.5225], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6198, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2194, -2.1513, -3.7869,  2.9973,  2.9281, -2.2258,  0.9260, -2.5688,
        -0.8737,  1.5546,  2.9690, -2.1538, -0.9615, -1.8050,  0.1965,  0.6533,
         4.4265, -3.0182,  0.8073, -3.2056], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.4256, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.7164, -20.3457,   0.9712,  -3.4515,   0.1936,  -2.8889,  -0.0530,
         -3.5866,   2.2669,   2.4058,  -2.8177,   0.8762, -12.5438,  -3.2470,
        -15.9865,   0.7749,  -2.9019, -26.3574,  -2.8728,  -6.6232],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0452, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1485, -4.7601, -5.2467, -2.8197, -1.4306, -1.4395, -0.5797, -8.6695,
         0.0274, -0.7107, -1.9485,  0.7555, -0.8147, -2.2452,  1.9015,  4.5805,
        -3.5278,  1.0636, -2.1351, -0.8640], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8006, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1405, -4.0010, -2.0468, -2.4573,  1.1833,  1.9382, -2.1540,  1.0192,
        -1.8636, -0.6626, -0.3154,  5.6092, -2.1309,  0.1367, -2.9048, -1.2133,
        -5.1358,  1.0150,  1.9744, -2.2304], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7690, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0619, -2.1465,  0.7874, -2.8775, -2.0395, -1.8088, -3.2016,  2.6873,
        -1.4945, -1.9771, -1.1378, -1.9308,  0.0259, -0.5590,  3.5033, -5.1401,
         1.5052, -3.1683, -0.0181, -8.2337], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5643, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.2079,   4.0481,  -2.6231,  -1.2351,  -3.0066,   0.3715, -10.8200,
         -1.6654,  -0.7757,  -2.6890,  -0.7385,  -2.6382,  -1.6241,   1.7671,
          3.2495,  -2.3149,  -0.5617,  -4.1193,   0.0520,  -5.6271],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.4371, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.8391,  -0.6013,  -0.9302,  -7.4201,  -4.8908, -11.0277,  -5.2182,
         -2.9863,  -2.3212,  -1.8716,  -1.6108,  -2.3039,  -1.4900,  -2.0228,
         -4.1122,  -0.7559,  -4.6884,  -0.5382,   2.8108,  -3.8467],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9832, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5468, -0.2847, -4.8250, -1.8456, -3.2121, -1.1521, -4.4001,  0.9938,
         2.1617, -1.9648, -0.6491, -1.8859, -2.3417,  2.9416,  3.7350, -1.9863,
         0.7993, -1.6565, -1.2063,  2.2555], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8535, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5264, -1.2108, -4.3771,  2.6224,  2.2522, -7.1196,  0.3744, -1.9331,
        -5.8963,  1.5163,  1.7995, -2.1849, -0.9787, -3.2377, -5.5585, -3.4043,
        -4.5583, -3.3364, -4.2292, -1.9501], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1968, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.7887,  -2.7785,  -5.1291,  -2.4816,  -1.1507,  -0.4478, -18.3228,
         -0.7320,  -1.4689,  -3.2363,   0.2615,  -2.8928,  -2.0246,  -4.5098,
          2.2619,   1.8023,  -2.8150,   0.7275,  -1.1104,  -0.8217],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1540, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9509, -2.0086,  2.4008, -5.0337, -2.5211, -2.9994, -0.8394, -5.4697,
        -0.6949,  0.5682, -1.7807,  0.4779, -0.7664, -1.2973,  1.6419,  4.2144,
        -3.3260,  0.4327, -2.5603, -3.0037], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1758, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1662e+01, -4.8057e+00, -2.2152e+00, -1.9863e+00, -1.7859e+00,
        -2.6382e+00,  2.2168e+00, -1.4261e+00, -2.6048e-01, -4.3248e+01,
        -5.2818e+00, -6.8754e+00,  1.0122e-02,  1.0538e-01,  5.1216e+00,
        -4.1315e+00,  4.6626e-01, -1.6459e+00,  6.8259e-02, -4.0866e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2030, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.7012,   1.3197,   0.7481,  -2.3935,  -0.1327,  -1.6340,  -1.0607,
          2.0758, -29.7071,  -2.7261,  -2.0440,  -3.5488,  -4.0150,  -3.4209,
          0.5860,  -3.3090,  -0.6781,   0.1851,  -1.8562,   2.5692],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8872, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6313, -5.0975, -3.5939, -7.1838, -5.5613, -4.6082, -4.4703, -5.5992,
        -5.0739, -3.4536, -4.3738, -4.2635, -5.0617, -6.8619, -5.9550, -7.3200,
        -5.7230, -3.1781, -3.3271, -7.6678], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2503, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.4101,  -3.7708,  -1.1676,  -3.8564,   0.8819,   1.1194,  -3.6996,
          0.1432,  -3.1349,  -3.6356,   0.1851,   2.3546,  -2.1031,  -0.7172,
         -1.1896,  -0.5762, -12.6241,   0.7215,   0.5236,  -4.4524],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.7704, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7211, -2.1471, -4.1922, -2.1538, -0.9924,  2.1452, -4.3687, -0.1706,
        -1.6842,  0.7011, -6.4846,  2.6965,  0.5267, -4.0119, -0.7597, -3.2983,
        -1.7053, -3.1283,  3.0166,  2.9104], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4411, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 4.9660, -2.4161, -0.4930, -1.7937, -0.9872, -3.6053, -0.4329,  1.7743,
        -2.5556, -9.4251, -2.1836, -1.3636,  0.9445, -0.1745, -5.2819, -0.1809,
        -3.9365, -1.3293, -1.1494,  1.8458], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3889, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-13.5954,  -4.8597,  -9.0514,  -5.0995,  -6.7542,  -0.3460,  -5.2833,
          4.9845,  -2.9519,  -1.3692,  -3.3488, -12.0306,  -4.5011,  -0.2454,
         -6.3079,  -1.5527, -20.9408,  -8.6704,  -4.6834,  -3.7749],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5191, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.7646, -4.2920,  1.5533,  0.0093, -3.4163, -0.3879, -2.4696,  1.0673,
        -4.8224,  2.6403,  2.3959, -3.2249,  0.6558, -2.4014, -1.5524, -3.2653,
         2.2944,  2.5082, -2.8249,  1.7326], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7282, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8429, -7.6852, -7.7424, -6.6984, -9.0290, -7.5854, -8.4983, -6.1869,
        -5.4776, -8.7797, -7.9560, -6.8745, -6.2560, -9.3136, -7.2686, -6.6851,
        -8.7337, -6.5944, -8.4820, -7.6925], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.5191, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4045, -2.6708, -3.9295, -2.8260, -6.3628, -8.2965, -3.8081, -6.3342,
        -3.5807, -3.7311, -1.4381, -6.3713, -3.4734, -3.2074, -2.2356, -1.2086,
         3.2654, -3.2871, -1.6931, -9.5684], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8081, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.1875, -2.5782, -3.4048, -5.2033, -7.1209, -2.8846, -4.6828, -2.9660,
         0.7998, -0.8963, -6.6350, -3.8228, -1.6474, -3.7854,  1.2875,  0.5780,
        -2.9018, -3.7936, -6.5272, -6.4507], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1224, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0177,  -1.8214,  -4.0540,  -1.1061, -10.7489,   2.0699,  -3.8181,
         -0.5015,  -1.9558,  -1.7170,  -2.2456,   2.8321,  -4.5790,   1.0989,
         -5.8422,  -1.2176,  -4.2224,   1.5213,   1.3337,  -3.3345],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1163, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5639,  0.6823, -2.8363, -4.1915,  0.0552, -9.2897, -4.9833, -3.7803,
        -4.4273, -0.1038,  0.5582, -0.1757, -6.5381, -2.2275, -2.3111, -0.2401,
        -3.6555,  0.3746,  1.2278, -4.6463], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5536, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7526,   0.8360,  -0.9598,  -0.9497,   0.6563,   4.5037,  -1.9071,
         -1.5101,  -4.1433,  -1.4468,  -2.6677,  -1.0265,   2.2311,  -2.9922,
         -0.7951,  -2.6248,   0.7678, -10.0917,   2.1444,  -0.4605],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.1594, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2182, -0.1529, -3.3348,  2.6408,  0.3413, -4.3290, -1.0308, -3.1077,
        -1.0847, -2.8582,  2.4522, -1.9555, -2.3329, -3.0770, -3.5409, -2.2497,
         3.1044, -1.5166,  1.2516, -2.2846], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2642, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7172, -1.4463, -3.7038, -4.9019,  1.5465,  3.8467, -4.7824, -3.6268,
        -8.6983, -5.3124, -4.5663, -5.3766, -5.1110, -4.0300, -1.0841, -0.7490,
        -4.6505, -2.9047, -2.5663, -8.7448], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5290, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-21.6682,  -4.6258,  -2.7238,  -3.8362,  -1.1991, -11.8497,  -7.8523,
         -3.6427,  -4.4559,   0.0226,  -7.9324,   5.1006,  -3.3293,  -1.1907,
         -2.6779,  -1.0980,  -4.7027,  -2.7017,   2.3710,  -6.6317],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2312, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.1875,   1.5373,   3.1983,  -6.3768,  -5.0026, -14.1114,  -3.9441,
         -6.2457,  -2.1056,  -0.2389,   5.4640,  -3.5606,  -0.8612,  -1.9146,
         -1.5238,   0.4805,   2.2706,  -2.3761,   0.1051,  -4.0552],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1724, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.7133, -1.8055, -1.6571,  1.8635,  1.0153, -4.1198,  1.2621, -3.7704,
         0.6584,  1.2546,  1.6866, -2.9027, -0.7507, -1.4736, -1.1232, -5.4212,
         2.3048, -1.3927, -2.9708,  0.7087], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7960, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.5257,   1.4351,  -1.8546, -12.2943,  -3.4704,  -1.8547,  -5.4151,
         -8.3370,  -7.8206,  -4.1878, -11.3401,  -7.5232,  -1.7352,  -1.9578,
         -1.5242,   0.8745,   3.8588,  -6.8089,  -1.5605,  -3.0569],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9049, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.8332,  -0.1285,  -2.2064,   0.2440,  -2.2361,  -4.3789,  -1.8715,
          1.8863,  -6.1404,  -3.6970, -28.7420, -13.2976,  -4.9900,  -5.0200,
         -1.1672,  -0.4149,   3.9319,  -5.6381,  -0.6885,  -1.9271],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6824, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4110,  5.3946, -6.7133, -0.1422, -2.7434, -5.0850,  1.9310,  0.0070,
        -3.8912, -3.4923, -3.2475, -1.2717, -3.9603,  0.2160,  1.0348, -2.0568,
        -1.0766, -2.1603, -1.4082,  3.0417], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3517, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7198, -0.5435,  2.4471, -2.4898,  0.0120, -3.5053, -5.7934,  1.7596,
         2.7821, -1.8161, -0.6053, -5.7691, -3.9449, -4.8469, -1.1846, -5.4888,
        -5.7062, -2.3763, -1.0939, -2.4048], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1644, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.7341,  -3.6147,  -4.4652,  -0.1974,   0.2862,   4.9693,  -3.5904,
          0.4969,  -0.6666,  -4.1097,   1.0111,   3.3744,  -3.4535,   0.5376,
         -2.8605,  -1.4778, -26.6824,  -0.7921,   0.8606,  -3.2443],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5176, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3838e+00,  2.0104e+00,  1.4570e+00, -2.6950e+00, -1.4910e+00,
        -1.6123e+00, -9.9741e-01, -4.6280e+00,  1.2773e+00,  2.2680e+00,
        -1.4874e+00,  1.2326e+00, -2.8645e+00, -1.9676e+00, -3.8788e+00,
         3.4139e+00, -2.0913e+00, -2.8874e-03, -2.5306e+00,  3.2385e-01],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-0.8324, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.6691,  -0.0398,  -1.3105,  -1.9348,   0.7155,  -2.5671,  -0.5186,
          1.1603,   2.4243,  -2.5267,  -1.1640, -10.2127,  -3.2493,  -4.5990,
         -0.5061, -11.6872,   0.9254,  -1.1436,  -4.5578,   0.2102],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3625, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  3.8477,  -4.8913,  -1.7159,  -4.8914,  -0.1651,  -2.1484,   3.5075,
         -1.4546,  -2.7016,  -3.6351,  -5.5338,  -1.7052,   0.1335,  -4.8045,
         -2.0746, -22.6359,  -5.4783,  -4.4326,  -4.4561,  -0.1120],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2674, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1776,  2.9743, -1.2893, -2.6423, -0.0936, -0.8222, -1.3226,  0.5602,
         3.3089, -3.1403, -0.1924, -2.1458, -1.7308,  0.2593,  2.0035, -3.1723,
        -0.0533, -3.1482, -0.1722, -3.2159], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8606, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.6202,  -0.8886,  -3.5285,  -6.6784,   1.7873,   0.4290,  -0.0202,
         -0.1777,  -1.0191,  -6.1677,   2.5536,   2.0931,  -5.6405,  -1.3098,
        -13.5838,  -4.8656,  -7.9337,  -6.3434,  -3.1944,  -0.2307],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9170, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3165, -7.2263, -4.5735, -4.2856, -2.6235, -2.7707,  0.7237, -5.0701,
        -1.9194, -2.1470, -2.6056, -4.9730,  1.3285,  2.8745, -2.5273, -1.0776,
        -4.0499, -4.7255,  1.4149,  2.3807], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3085, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.8043, -18.7309,  -3.1953,  -4.7745,   1.3904,   1.8469,  -7.0210,
         -1.4669,  -3.4140,  -1.9076,  -0.6482,   1.4809,  -4.1749,  -3.6760,
         -0.3757,  -1.2345,  -7.1044,   2.1143,  -3.5016,  -2.5636],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9880, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.0242, -6.9882,  2.1055,  0.4639, -2.7084,  1.6086, -2.5951, -0.6352,
         0.8361,  2.7174, -1.8400,  1.0864, -1.7185,  0.1419, -1.8793,  4.9955,
        -7.8075, -5.3766, -5.4165, -4.4527], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3744, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-13.2297,  -5.2174,  -4.4939,  -4.3794,  -0.2265,  -1.2535,   5.6311,
         -2.1193,  -2.2347, -12.4721,  -2.6205,  -5.3821,  -0.5817,  -7.4170,
          2.9984,  -2.4156, -10.5412,  -2.1235,  -3.5684,  -2.9305],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7289, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2249,  2.7611, -3.7946,  0.7948, -4.3150, -2.7746, -3.4632,  1.9439,
        -2.7354, -1.9522, -1.3219, -5.5853,  1.9104,  1.9658, -2.9843,  0.7193,
        -3.8348, -3.3002, -5.0519,  0.8557], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6194, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6107, -1.2999, -7.6235,  1.1972,  0.7675, -2.3233, -0.9342, -0.7056,
        -0.4718,  0.3063,  2.0576, -2.8395, -0.0328, -3.3985, -2.1621, -3.7714,
         0.7353,  1.6865, -3.3330,  0.0169], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1869, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4421, -3.9338, -8.7767, -5.0012, -7.9854, -2.2499, -7.3969,  0.0685,
        -5.4902, -2.9482, -3.0101, -8.8265, -9.1966, -3.6830, -4.0464, -2.4493,
        -7.8508, -4.9090, -5.8598, -6.5328], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2760, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.0969,  -6.1604,  -1.3981,  -2.7073,  -3.0624,   0.8527,   2.0412,
         -1.4424,   0.4853,  -2.5539,   0.9689, -11.2138,  -0.7052,   0.6104,
         -3.6467,  -0.6496,  -2.1189,   0.3301,  -4.9492,   2.0844],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.7666, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9699, -6.7668, -0.8610, -2.5555,  4.4347, -4.5911,  0.4877, -1.5136,
        -2.1224, -4.4977,  1.2790, -2.2876, -5.7184, -3.0876, -2.5391, -0.9974,
        -3.5316,  2.5500,  2.7715, -3.7432], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0130, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9048, -2.8706, -0.8462, -6.3517,  0.7405, -1.3381, -6.4408, -3.6776,
        -7.7255, -6.2263, -4.5218, -5.4235, -4.4584, -3.9951, -1.6300,  3.5552,
        -4.7437, -2.5217, -3.1797, -2.2642], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4412, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5656,  0.2643, -1.7350, -4.7560,  1.5688,  3.0958, -6.7897, -1.5307,
        -0.9336, -2.4269, -6.0988, -2.4571,  0.4165, -3.4850,  0.9659, -0.8836,
        -0.7934,  0.5809,  0.8323, -3.2875], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4509, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.8673, -1.1665, -2.7610,  1.8986,  2.6543, -1.6111,  0.6969, -5.4035,
        -3.0134, -4.4661, -1.2930, -2.2792, -4.8017, -2.0434, -1.8942, -5.0226,
         1.9606, -4.9373, -5.6451, -0.3304], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9295, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.0757, -1.4856, -0.8024,  0.3035,  4.6965, -1.5168, -2.7208, -2.6291,
        -2.2278, -6.3844, -0.7965, -2.9851, -3.1614, -4.7500, -0.8334, -1.4903,
        -8.5552, -0.1364, -0.0119, -2.8559], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8634, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.8205,  4.2625, -3.5987, -0.1130, -0.7610, -1.4494, -0.3998,  4.5228,
        -2.6464,  0.4926, -3.0260, -0.3655, -3.5133,  3.0443, -1.2969, -3.4484,
         0.1229, -3.3666, -3.3181, -4.8396], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8939, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0201, -6.0535,  0.2432, -1.6016,  4.6899, -3.3229,  0.1904, -1.5886,
        -1.9362, -2.7396,  1.3467, -1.9208,  0.0164, -5.1354, -2.5627, -6.4468,
        -1.1833, -2.8520, -2.8930, -2.8604], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0315, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.7816, -1.5681,  0.4620, -2.4693, -1.1241, -3.0645,  4.2136, -2.6088,
        -2.3909, -2.2344, -0.6453, -4.4131,  0.3797,  1.4056, -6.7279,  0.0487,
        -2.0313, -0.7594, -5.2105,  2.8686], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2544, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.2502,  -5.6297, -52.4673,  -3.1405,  -8.1741,  -2.8375,  -4.2493,
          3.0206,   3.1227,  -4.7452,   0.6993,  -1.4101,  -1.6098,  -0.5824,
          3.3122,  -2.6511,   0.1051,  -3.2098,  -0.1288,  -2.9015],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4864, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2390, -6.3557,  0.2223,  0.4691, -1.9059,  0.3195, -0.2410, -4.4972,
         3.3434,  3.7596, -2.2249, -1.1977, -2.5680, -0.9125, -7.1261,  4.5615,
        -0.7962,  0.7131, -1.0049, -3.4958], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0088, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9426,  -4.8352,  -2.6383,  -6.0113,  -1.8284,  -5.4494,   1.7377,
          2.9434,  -5.4105,  -2.7651, -11.9599,  -5.3738,  -7.0562,  -5.9048,
         -3.3461,  -2.9979,   2.3065,   4.6452,  -1.9210,   1.0923],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8858, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.3653,  -3.5164,  -5.4339,  -3.5221,  -2.9055,  -8.7503,  -6.1992,
         -4.0154,  -5.9412,  -4.1244,  -2.7715,  -3.8244,  -4.2719,  -1.5260,
         -4.2674, -12.1803,  -4.8084,  -2.4951,  -4.3201,  -4.9070],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8573, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.1500, -3.0802, -0.7665, -7.7419, -1.9336, -0.2306, -4.9369, -1.8606,
        -1.7936, -1.2162, -0.7580,  2.0706, -1.3398,  0.7151, -1.7313, -5.9884,
         1.8480, -2.6263, -5.4805, -1.6797], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9340, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4441e+00, -6.0572e+00, -2.5386e+00, -2.3401e+01,  4.2038e+00,
        -8.6184e+00, -4.0114e+00, -3.2171e+00, -2.8485e+00, -2.6607e+00,
        -7.9777e-01, -7.1903e+00, -4.9300e+00, -2.3794e+00, -2.0523e+00,
         4.2692e-01,  7.8561e-01, -2.1622e+00,  2.0849e-02, -3.3052e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6589, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7341, -2.3068, -2.0532, -5.1144, -0.4556,  1.5431, -3.7714, -1.4026,
        -3.6902, -4.8812,  1.1515,  0.2804, -3.2275, -0.2278, -3.3817, -3.7302,
        -0.0224,  0.5029, -2.4930, -1.9916], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9503, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.4351e-01,  2.5402e-03, -3.2903e+00, -3.4882e-01, -1.1441e+00,
        -2.7467e+00, -4.4498e+00, -2.9920e-01,  2.0345e+00, -2.8048e+00,
         7.6971e-01, -1.5466e+00, -1.6255e+00,  7.3286e-01,  4.1970e+00,
        -2.1951e+00, -3.3821e+00, -2.9404e+00, -5.3755e+00, -3.5539e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.4405, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-14.4143,  -1.8375,  -0.9676,  -2.6252,  -2.3349,   3.5503,  -2.9724,
         -0.7292, -10.8901,  -3.5525,  -4.5055,  -1.6703,  -1.1480,   4.2773,
         -8.6128,  -6.1806,  -3.8578, -10.4982,  -4.4368,  -4.2890],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8848, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2996, -0.6606, -3.9610, -0.8938, -3.8924,  0.2585,  3.8284, -3.4571,
         0.2055, -1.3677, -0.7819, -1.2821,  4.2502, -2.2877, -0.2582, -3.7012,
        -1.6157, -1.8488,  0.6156, -5.0637], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3107, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.3004,  0.4146, -2.3453,  0.3956, -0.7431, -3.9108,  2.0798,  2.7171,
        -2.4758,  0.9093, -3.1620, -1.4077, -6.3454,  3.2716, -1.9386,  1.3049,
        -1.7221,  0.7224, -2.2853,  3.8504], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.5485, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6360, -1.8995, -5.5951,  1.6631,  1.6342, -1.9720,  0.6718, -1.9353,
        -1.3823,  0.6596,  2.8359, -2.4162,  0.2308, -3.5794, -0.2390, -5.1760,
         2.4105,  2.3376, -5.2755, -1.1731], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9918, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.2455,  -3.5765,  -0.6247,  -2.9638,   1.0824,  -4.3711,  -7.3432,
         -2.2645,  -2.5230,  -3.8210,   0.7018,   2.4757,  -7.2435,  -4.2183,
         -4.5735, -20.8397,  -1.7598,  -4.0456,  -0.8482,   0.5512],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3225, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.7161, -2.8658, -1.1974, -5.5548,  0.2337,  2.6140, -7.6707, -0.2431,
        -2.7135, -3.4777, -0.0811,  2.1673, -2.5312, -0.8802, -1.3991, -1.5781,
        -5.0662,  0.6838,  0.6147, -2.4244], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5327, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2888, -1.1553,  2.3964, -2.9010,  0.0352, -2.0406, -4.2326,  1.1612,
        -9.1772, -2.1281, -5.5077, -2.8574, -2.3755, -4.0544, -2.1318,  1.2351,
        -1.7849, -2.1892, -2.7202,  0.2113], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0753, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.2775,  1.6757, -2.2706,  0.3334,  0.0759, -0.9062,  1.7049,  4.7124,
        -1.9540,  0.5954, -2.5259, -0.2796, -3.9868,  0.4346,  2.1878, -2.2666,
         0.8600, -2.4638, -0.2704,  0.7432], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.1162, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4045e+00,  2.4451e+00, -2.8778e-01, -2.9758e+00, -5.5423e+00,
        -2.8013e+00, -5.7125e-01, -8.5784e+00,  9.8001e-01,  3.8750e-03,
        -3.3869e+00, -7.6205e-02, -2.4601e+00,  6.7041e-02,  8.2987e-01,
         5.8024e+00, -2.7645e+00, -1.7305e-01, -1.6304e+00, -2.8314e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.3678, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.0557,  -3.0011,  -4.7439,   0.2091,  -1.2825,   5.0706,  -2.4500,
          0.7594,  -1.6989,  -4.9496,   1.1815,  -2.1977,  -5.4584,  -2.8038,
        -25.0847,  -5.4886,  -6.9187,  -5.4485,  -2.1288,  -1.3729],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6432, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.3545,  1.9432, -3.6805,  0.0112, -3.3632,  0.0267, -3.4029,  1.0565,
         2.5899, -3.0263, -0.0561, -3.2967, -8.8718,  1.9977,  2.9881, -1.7011,
         0.5071, -9.5366, -6.3865, -4.8369], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8342, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2471, -1.3725, -1.4638,  1.1715,  1.8268, -3.6118, -2.1522, -7.9432,
        -6.8345, -3.5580, -4.1536, -3.4382, -1.6696, -4.1401, -3.4737, -3.5104,
        -1.5071, -2.4305,  1.1884,  2.9444], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3188, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.5887,  -5.6824,  -7.2769,  -8.5824,  -5.4269, -10.1291,  -4.4997,
         -5.9029,  -5.7589,  -6.7122,  -4.7723,  -6.1650,  -2.7545,  -7.1155,
         -3.0671,  -3.2102,  -3.9896,  -5.0464,  -6.7467,  -7.7743],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8601, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.6543,  -6.6331,  -4.3454,  -6.0557,  -6.3187,  -3.4478,  -1.9820,
          1.8186,  -4.2995,  -3.4988,  -3.6100, -13.4507,  -2.7320,  -1.0266,
         -3.0378,  -2.5327,  -8.2224,  -4.4405,  -8.0865,  -0.4524],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7004, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.7019, -14.9911,  -2.4185,  -1.9712,  -2.6983,  -3.5061,   2.0331,
         -1.4312,  -2.1387,  -4.1342,  -1.3873,  -3.4595,  -2.9426,  -4.8352,
         -2.6383,  -6.0113,  -1.8284,  -5.4494,   1.7377,   2.9434],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6213, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.3901,  -0.8835, -23.5883,  -2.5350,  -7.8433,  -0.0928,  -1.0706,
          0.1387,  -2.8882,  -1.8195, -10.2866,  -7.3999,  -3.5739,  -3.7436,
         -0.1614,   0.0270,   4.3013,  -5.1724,  -1.6636,  -0.2905],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6968, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.9236,  -5.4236,  -2.9319, -12.6099,  -6.6541,  -3.8777,  -3.6026,
         -0.4099,  -1.6802,   4.6804,  -2.5623,   0.3922,  -4.1738,  -0.7699,
         -5.7515,  -1.9705,   2.7825,  -4.0594,  -1.9207,  -2.6429],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9555, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3664, -20.6658,  -5.6061,  -2.5153,  -5.0982,  -2.5843,  -0.0959,
         -2.0966,  -5.5450,  -4.2303, -17.7140, -15.1051,  -4.0646,  -5.6895,
         -0.2969,   0.6814,  -1.8096,  -5.8420,  -0.6623,  -2.8161],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2561, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.4681,   1.0835,  -8.9703,  -0.0510,  -6.5716,  -1.0139, -12.9074,
         -0.9624,  -0.5511,  -3.1882,  -0.7385,  -2.0065,  -2.5639,   2.1824,
          2.2589,  -7.4041,  -3.2898,  -3.6016,  -0.6232,  -3.8696],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5660, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.2458,  -0.0758,  -2.5983,   3.9452, -13.1591,  -0.3713,  -3.6818,
         -2.1158,  -1.7540,  -0.7719,  -2.5978,  -2.9755,  -2.0615,  -2.2840,
         -7.4168,   0.3021,  -0.5044,  -3.3496,  -0.3235,  -1.4198],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2230, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-14.8274,  -3.9890,  -5.2350,   0.0489,   1.0830,   4.6093, -16.9716,
         -1.7442, -23.3202,  -4.4046,  -3.4363,  -2.7810,  -2.6222, -13.6224,
         -6.8920,  -3.5133,  -3.4330,   0.1669,  -6.9597,  -3.6769],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5760, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.6468,  3.7496, -3.3499, -0.3289, -3.7853, -4.8535, -2.1867,  1.3065,
        -4.6773,  0.3668, -4.4023, -1.1755, -6.2026, -0.0581,  1.1198, -6.0720,
        -0.5262, -3.2102, -4.2498, -3.6861], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4934, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.4560, -4.6353, -1.5835, -9.4017, -5.7595, -4.0877, -3.3008, -1.5464,
         0.1180, -2.0355, -2.4778, -0.9275, -1.9591, -9.8942,  0.8582,  1.0191,
        -5.2190, -1.5530, -2.1961, -1.8623], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7994, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9674,  2.7274,  2.5427, -1.7315, -0.8407, -1.9799, -0.5147, -3.0544,
         2.6718,  0.5495, -4.2211,  0.6496, -2.4255, -1.8265, -3.9537,  2.1552,
        -5.9827, -8.0339, -2.2736, -4.5331], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7021, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8013,  2.6988, -2.7998, -3.7680,  1.3893, -3.3423, -1.6936,  0.0620,
         2.6950, -1.9567, -1.0730, -0.8352, -2.9314,  1.1606,  3.6946, -3.2839,
         0.7022, -2.8887, -0.2810, -3.6532], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9453, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.6017,  -2.2438,  -0.5575,  -2.8128,  -0.2264,  -4.7148,   3.8870,
         -5.6346,   0.3856,  -2.9812,  -5.7522,  -1.4814,   4.7216,  -2.7026,
         -4.0539, -43.9649,  -3.0661,  -5.1922,  -0.3437,   0.3785],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7877, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.5875, -1.9168,  1.6209, -2.7433, -3.7957, -3.2239, -0.5089,  2.3208,
        -3.4894, -2.5649, -1.7887, -0.4196, -7.9442,  4.3762, -2.8410, -0.8433,
        -4.7152, -0.4224, -7.4420,  1.4705], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6142, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.2169,  -4.2228, -15.6294,  -0.2548,  -8.3272,   3.0378,  -5.8445,
         -4.6826,   0.2861, -10.9802,  -2.2517,  -6.1971,  -0.1173,  -0.7986,
          4.6016, -10.4664,  -1.8051,  -2.4931, -12.1193,  -9.5415],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7011, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.0409,   1.2430,  -1.7623,  -1.8868,  -7.5139,   0.6577,  -3.0137,
         -3.8611,  -0.3519,  -4.0707,   0.6824, -17.8333,   0.6566,  -0.2473,
         -3.6034,  -1.1901,  -1.9065,  -2.4457,   1.7804,   1.4346],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2636, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  3.6933,  -3.7360,  -4.0630, -16.1096,  -8.7137,  -9.5090,  -6.4996,
         -5.0964,  -2.2204,  -6.9855,   1.4634,  -4.4822,  -3.3043,  -4.9332,
         -2.7072,  -5.0484,   0.2031,   1.5415,  -4.8299,  -1.2264],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1282, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1676, -2.9361,  0.2122,  2.6508, -1.7210, -2.5372,  0.7690, -1.3817,
        -0.0095,  1.8404,  1.6988, -2.6757, -1.2611, -1.0586, -1.9166,  2.6250,
         3.1567, -3.7197, -1.6168, -4.3273], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6688, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4577e+00, -6.5728e-01, -1.9756e+00,  4.0246e+00, -2.7015e+00,
         1.9076e-01, -1.8038e+00,  8.1559e-03, -1.9464e+00,  4.8709e+00,
        -3.1767e+00, -3.1157e+00, -2.2760e+01, -3.3924e+01, -3.4866e+00,
        -1.6354e+01, -4.1607e+00, -6.1480e+00, -4.3349e+00, -3.1243e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1017, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.5752,  -2.7642,  -6.6286,   0.6926,   4.6721,  -6.5806,  -0.8046,
         -1.3461,  -3.3895,  -0.4494,   3.6168,  -7.2139,  -2.0310,  -5.1569,
        -35.4681,  -2.3868,  -4.1677,  -1.1616,   1.5161, -28.7953],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0711, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1805, -0.0448, -2.6670, -4.4634,  2.9297, -2.6550, -1.1182, -3.4481,
        -0.9177, -5.5837,  0.1963, -2.5952, -2.1313,  0.1014, -0.1530, -1.9190,
         2.0744,  4.5378, -2.2187,  0.5583], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2349, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.2822, -1.8321,  5.2100, -9.0951, -1.3045, -3.6127, -0.1016, -5.8016,
         0.5209,  3.2157, -1.7424, -0.1833, -1.0453, -1.4306,  2.1239,  4.5683,
        -3.9148, -1.3154, -1.6025, -0.8080], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8934, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2022, -0.9846, -1.4288, -4.4266,  4.1745, -4.5368, -0.3020, -1.5371,
        -0.7902, -3.2744,  2.8032, -3.8398, -0.6791, -1.1224, -1.0515, -3.8062,
         2.3179, -1.5688, -0.2024, -5.6953], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6076, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.6211,   0.7842,   2.7285,  -1.7080,   0.2637, -12.3146,  -3.7941,
        -12.5070,   0.3117,  -7.5779,  -0.6521,  -2.1429,  -0.7921,  -5.1249,
        -23.8329,  -1.7446,  -4.6761,  -3.5086,   0.9084,   5.2897],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6355, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1606e+00, -8.1092e-03,  4.7010e+00, -6.2618e+00, -8.8088e-01,
        -3.6474e+00, -1.3490e+00, -5.5639e+00, -6.3957e-02,  1.3342e+00,
        -2.6500e+00, -6.5209e-01, -3.4228e+00, -6.0977e+00, -1.2517e+00,
        -9.7053e+00, -5.7854e+00, -9.6384e-01, -1.1204e+01, -4.9369e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9785, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.3447, -12.3502,  -4.1991,  -3.7510,  -5.7742,  -0.6921,  -5.2307,
         -1.3767,   2.9287,  -4.0893,   0.9223,  -3.9589,   0.2694,  -4.9819,
          2.6963,   3.8738,  -2.9128,   0.2412,  -1.5267,  -4.0118],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2634, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0859, -4.2095, -3.7097, -1.6418,  1.1479,  3.3270, -1.2980,  0.7037,
        -2.3183,  0.1215, -4.8659, -2.1716,  3.1747, -2.5981,  0.6395, -3.5667,
        -0.8810, -2.8098,  1.5865,  2.8430], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9306, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.1072,  0.8089, -4.3029, -1.6377, -5.6232, -2.8788, -7.8373,  1.8541,
         0.1171, -6.3477, -0.8934, -4.2370,  0.4248, -3.5985,  0.5536,  3.3353,
        -4.4722,  0.2220, -2.1491, -1.1378], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8846, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.8058, -3.5310, -4.0647, -1.1219, -0.0592,  5.5394, -6.0550,  1.3352,
        -2.2417, -0.0175,  0.4861,  3.3039, -3.0274,  0.8424, -3.0387, -0.8899,
        -5.5130, -4.5940,  2.4756, -5.2520], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6614, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.3976, -2.5301, -1.5149, -0.1209,  3.8101, -2.7831,  0.8962, -0.1897,
        -1.5000,  1.1692,  3.9516, -2.7788,  0.3489, -3.4046, -2.2568, -3.7750,
        -1.5225,  1.5515, -2.1980, -0.0467], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6646, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.0106, -19.8249,  -1.1940,   0.1213,  -0.6819,  -4.9922,  -0.2428,
         -3.3466,  -1.1328,  -6.6555,   1.7076,   2.1845,  -1.5557,   0.7977,
         -1.8610,  -2.9099,   1.9251,   2.7099,  -3.0437,   0.9918],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.0007, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8949,  0.4054, -1.6214, -1.5630,  0.6805,  2.5055, -2.7500, -0.6824,
        -3.5343,  0.1395, -7.0614, -1.4425,  0.4134, -2.6605,  0.6628, -3.2507,
        -3.4693,  1.8848,  3.9898, -4.6304], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2439, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1990, -0.0850,  1.4533,  3.5604, -2.2939,  1.0659, -2.2938,  0.3485,
        -1.9520,  4.0019, -2.3493, -1.1279, -8.2045, -4.9079, -3.2268, -6.4759,
        -0.4049,  2.3484,  2.4185, -5.5302], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2927, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3815,  4.0258, -1.9394,  0.1370, -2.6849, -3.0995,  0.1963,  3.3890,
        -6.1040, -0.4881, -0.3767, -0.6830,  2.6972,  3.4269, -2.1297,  1.4412,
        -1.2257, -2.7722, -4.2950,  2.5804], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.4643, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9195, -5.3525, -6.1714, -4.4097, -7.3189, -8.6402, -6.1151, -4.4430,
        -3.7400, -2.3807, -4.1652, -7.1002, -3.6851, -6.6676, -4.9624, -0.9176,
        -3.3133, -3.5754, -2.6932, -4.7841], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.7678, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.3460,   2.8145,  -1.5317,   0.3244, -10.6776, -10.0436,  -5.4477,
         -7.8230, -11.2115,  -8.2909,  -1.0235, -48.3385,   1.1975,  -1.2684,
         -8.1681,  -1.9906,  -4.9512,   0.3854,  -5.4154,   0.1126],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0001, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.5447, -2.9370,  1.9568, -1.8933,  0.6943, -7.7663,  3.1543,  1.7156,
        -4.5529,  0.1908, -1.9984, -0.4767, -4.0580,  3.2826, -6.8255, -4.9799,
        -0.0703, -2.3550, -2.0501, -0.9931], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4209, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.7963,  -7.0965,  -1.7083,  -2.2019,  -4.8821,   0.7203,   2.3992,
         -5.7503,  -5.9461, -21.2359,  -6.3251,  -3.7642,  -4.0281,  -1.8335,
          0.1118,   4.6640,  -3.2223,   0.2773,  -2.4066,  -6.1974],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3315, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.0069,   2.4295,  -2.9505,   0.4930,  -3.2027,  -1.7820,  -7.8048,
         -1.7746,  -0.2767,  -2.6651,  -4.0875, -23.7390,  -3.0346,  -4.4472,
         -1.6860,   0.9051,   4.5800,  -7.6246,  -3.2469,  -2.4159],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0162, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5758, -6.2376, -2.5539, -4.5798, -3.0133, -2.8293,  0.5582, -1.4398,
         1.0977, -3.2716,  0.1004, -3.6355,  1.6475,  1.1428, -2.1960,  0.0503,
        -1.2812, -1.9863,  2.6677,  4.9825], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3676, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1118, -4.1243, -5.4881, -7.0142, -4.7253, -4.0092, -5.7637, -4.1926,
        -4.5458, -9.4933, -7.2840, -5.3713, -4.5315, -5.0590, -7.7979, -3.7906,
        -5.0045, -1.9002, -5.0950, -0.6788], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1491, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.2766,   0.5111,   3.0174,  -2.2501,   0.1315,  -0.6039,  -0.0167,
         -3.7897,   4.0213,  -1.9865,  -0.2286,  -3.7688,  -4.3160, -10.1214,
          2.0584, -10.1548,  -4.5997,  -5.5539,  -5.9348,  -3.7008],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5781, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6539, -0.3161, -2.0134, -3.9252, -0.1503,  2.3526, -1.6779, -0.1119,
        -4.6981, -2.4657, -4.7865, -1.9484, -5.6327, -5.2113, -1.3691, -2.3680,
         0.3611,  2.0313, -3.9416, -2.3294], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0927, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.4135, -1.8585, -1.8723,  2.3341,  4.4400, -2.2735,  0.7204, -0.9731,
        -0.4863,  1.5763,  3.7270, -3.0909, -2.9619, -2.4509, -3.1910, -4.2795,
         1.3073,  0.3808, -2.7106,  0.0755], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.5087, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.9716,  -1.2791,   0.9042,  -4.8946,  -2.2052,  -0.2350,  -1.8107,
         -0.2507,   0.7587,  -2.8067,  -4.1976, -14.5039,  -4.1345,  -5.7854,
         -4.2659,  -6.3001,  -9.3002, -17.8425,  -2.2011,  -1.6140],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3468, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.8634, -4.6240, -1.8093, -3.8202, -0.6395, -6.2027,  1.8427,  2.3313,
        -1.4775,  0.4859, -1.8711, -1.8432,  1.7617,  2.4552, -3.8455, -6.1679,
        -3.5206, -4.0978, -3.2554,  1.6530], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5391, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7096, -2.9242, -0.3683, -7.8277, -5.1512, -8.5197, -2.1722, -5.7173,
         2.8507, -4.3021, -3.6661, -6.8565, -2.4525, -4.6771, -2.4332,  0.2417,
        -1.7573, -1.7898, -1.1976, -2.1048], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2267, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9249,  0.7133, -1.8497, -1.5804, -6.7084,  3.1795,  0.1474, -2.2713,
         0.4092, -0.7086, -2.3730,  2.6150,  3.4159, -2.6030,  1.9227, -1.8483,
        -5.3276, -4.8411, -0.8091,  0.6149], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0914, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.9450,   2.0387,  -2.2683,  -2.2658,  -3.4957,  -0.5633,   1.4464,
          3.8743,  -2.4941,  -0.3368,  -8.5380, -11.9820,  -5.1874, -15.1544,
         -8.9131,  -8.5611,  -0.6987,  -5.2188,   4.5192,  -5.5784],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7161, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.1118,   1.3436,  -3.6744,  -0.5076,  -1.5616,  -3.7812,   0.7032,
          0.9508,  -2.7624,  -0.8576, -13.2620,  -4.1338, -14.1632,  -1.2123,
         -7.9980,   5.4283,  -3.8447,  -0.4410,  -7.0132,  -3.6426],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0271, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.7904,   1.3803,  -6.8249,  -3.4952,  -1.3092, -12.9968,  -2.6932,
         -6.1845,   0.1046,   1.8656,   6.0184,  -3.1185,  -2.2346,  -2.8723,
         -4.5391,   0.8914,   3.3001,  -2.6946,   0.7818,  -2.2899],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.0350, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.9747e+00,  1.8883e+00, -5.5403e+00, -2.0618e+00, -2.7246e+00,
        -8.5093e-01, -4.7539e+00, -4.7619e+00, -4.1777e-04, -3.2634e+00,
        -2.9935e+00, -4.3486e+00, -3.2280e-01, -2.7390e+00,  1.6714e+00,
         3.4464e+00, -2.9322e+00, -6.4156e-01, -2.0971e+00, -1.1224e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.6087, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.5052,  -1.4935, -21.4750,   0.7345,  -3.7340,  -3.0735,  -1.6941,
        -12.1037,  -2.1124,  -6.2686,  -0.3511,   0.8771,   0.3081,  -9.4365,
         -0.1894,  -2.5707,   0.4767,  -4.0306,  -0.5784,  -9.4238],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1322, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.9482, -6.0901, -2.9717, -2.5540, -0.6147, -0.0131,  2.6421, -2.5846,
         0.0652, -2.8111, -0.3153, -3.0541,  0.0732,  0.7291, -3.6573, -0.4698,
        -3.2620,  0.3192, -4.9664,  5.0865], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6199, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.4630, -2.8224,  0.6481,  0.6926, -6.4443,  0.6785, -2.5577,  1.1343,
        -5.2554,  1.7758,  1.1304, -7.6574, -0.5580, -2.8037, -3.9008, -2.4018,
         0.9673, -4.4153,  0.1673, -1.7318], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6909, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.1631, -7.4972,  0.8117, -2.2433, -4.7903, -3.1868, -0.8686, -5.3748,
        -0.2483, -0.9701, -2.8216, -0.5737, -1.4077, -1.9663,  1.4030,  2.8705,
        -2.2350, -1.5820, -3.0140,  0.3856], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6573, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.1835, -1.4545, -5.1628, -5.2757, -4.9177, -5.3471, -6.1838, -4.4064,
        -7.2925, -8.7069, -6.3020, -4.4571, -3.7105, -2.4088, -4.1296, -7.1087,
        -3.6454, -6.6163, -4.9562, -0.9071], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1086, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0073, -3.2971, -4.4265, -4.6678,  0.9622, -5.5931, -0.5028, -1.6930,
        -1.2783,  0.9132,  4.5832, -1.5306, -0.1333, -0.5944,  0.0410,  1.5702,
         4.0830, -1.3266,  1.4133, -1.4447], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7465, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.0692,  -4.9215,   1.8768,   1.9171,  -2.4253,  -0.7710,  -3.1604,
         -2.3041,  -4.3256,   4.5217,  -3.7934,   0.7053,  -2.9864,  -3.1167,
         -2.6087,   3.8703,  -0.9853,  -1.1338, -31.2790,  -7.8944],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0442, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0923, -8.2265, -4.7403, -7.0218, -2.6124, -2.6422,  0.6760, -3.3599,
        -0.7126, -4.6183, -4.1199, -1.4202,  2.2308, -4.0326, -2.2900, -7.7785,
        -6.2341, -4.2781, -4.5358, -2.1477], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7478, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2023, -2.0517, -2.3038, -2.4698,  0.2765,  3.9230, -2.2708,  0.5989,
        -1.2268, -3.4433,  1.4196,  4.4281, -2.6861,  0.7746, -1.2496, -0.6219,
         1.1948,  2.4704, -1.9773,  0.2701], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.3574, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6169, -1.8580, -7.9699, -0.5360, -1.8652, -4.4994, -0.1228,  1.1588,
        -3.9450, -2.0625, -3.6965, -1.8572,  0.2234,  2.1618, -1.8314, -0.1079,
        -0.9562, -0.4904,  3.2090,  2.2650], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3199, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.4006,  -1.6750,  -3.3244,  -0.8113, -11.7922,  -4.4616, -25.9988,
         -1.0333, -13.4671,   4.2745,  -2.2154,  -4.6440,  -5.8591,  -9.0445,
         -2.8297,  -4.6393,  -0.9984,   2.4742,  -4.0920,  -3.1786],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6458, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.5495, -1.2046,  0.2224, -4.3960,  3.3412, -1.4674, -2.5790, -0.4163,
        -2.9941, -1.9072, -1.1812,  2.8233, -2.9340,  1.0865, -1.1556, -0.4818,
         2.2186,  3.4673, -4.7452,  0.6750], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.5539, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5987, -0.1817, -3.5524, -0.9167, -1.8251, -2.0311,  2.6985,  3.1017,
        -2.5096,  0.0699, -0.8934, -0.0754,  0.4166,  2.9042, -2.9817, -1.4083,
        -1.9863, -4.9070,  1.6097,  1.6578], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6204, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 4.1260, -3.0194, -0.7067, -2.3579,  0.0624, -3.8242,  2.6103,  2.3203,
        -2.8592,  0.1023, -3.1500, -4.1094, -5.0161,  2.3786,  0.2276, -5.7217,
         0.5171, -3.3023, -0.9094, -3.3997], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3016, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2026,  1.4018, -7.2050, -6.0421,  0.2519, -4.3886, -5.2595, -3.8393,
        -0.3880, -6.6322, -1.9495, -7.1892, -1.7047,  1.3602,  1.1878, -2.3853,
         1.8776, -1.8402, -0.7886, -7.4178], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7577, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  3.4039,  -1.8145,  -3.5434,  -2.8383,  -7.8797,  -1.9691,   0.6351,
         -5.3732,  -4.3079, -26.8787,  -8.0903,  -6.9263,  -4.5635,   0.1828,
         -1.1944,   4.2404,  -3.0896,   0.0951,  -1.9737,  -4.2997],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8092, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.3990,   0.0951,   1.7619,  -2.8401,   2.3555,  -3.8278,  -2.0081,
         -5.3589,  -8.4671,  -7.6453,  -8.1077,  -1.4396, -33.2263,   1.9589,
          0.3071,  -2.9395,  -2.6826, -11.6941,  -3.4520,  -6.2090],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8909, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.0684, -12.9020,  -6.7174,  -1.6300,  -3.3233,   0.5750,   1.0500,
         -1.8417,  -1.8598,  -1.1848,  -3.2505,   0.2443,   2.7953,  -4.7133,
         -2.0633, -33.5955,  -4.5612, -16.7496,  -0.8277, -19.5259],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8575, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.6286,   0.9963,  -3.0259,   0.7468, -12.1525,   1.7222,  -1.7599,
         -2.9800,   0.3486,  -3.2529,  -0.1523, -14.8438,   2.2360,   2.8572,
         -3.1383,  -0.2981,  -2.3053,  -1.0828,   0.6760,   4.6628],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.6687, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.9131,  -2.2913,  -3.9671,   1.4942,   2.3450,  -3.9722,  -1.7122,
         -1.6926,  -1.3662,  -1.6388,   2.1993,  -5.0668,  -0.0695,  -1.7337,
         -5.5357,   1.5962,  -5.4551,  -2.9646,  -3.1319, -12.6229],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2336, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.8232,  -1.6634,   2.1415,   2.3211,  -3.0920,   0.0310, -11.6060,
         -4.7972,  -7.8783,  -6.2773,  -0.2406,  -1.8078,   4.6278,  -8.9716,
         -1.2145,  -1.9153,  -2.4221,   0.4028,   3.9714, -17.7308],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8472, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.2606, -10.2641,  -4.3102,  -4.6330,  -7.6749,  -3.9138,  -6.7358,
         -6.3054,  -6.5861,  -3.4745,  -6.8615,  -0.2296,  -4.2550,  -4.4449,
         -5.4996,  -5.4887,  -5.7960,  -6.5431,  -6.6453,  -3.3710],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4147, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1684, -5.9378, -2.9558, -5.4971, -0.7315, -1.2863,  0.4000, -5.1249,
        -2.6817, -3.1611, -1.5092, -2.0708,  2.7212, -3.6588,  0.8672, -4.3072,
        -2.6710, -3.3835,  1.8416, -2.7056], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3510, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5469, -4.6141, -1.3488, -5.6749, -0.0550,  0.1334, -4.3585,  0.0602,
        -1.5488, -1.8781, -0.1817,  4.4708, -2.5360, -2.9316, -3.3978, -1.5766,
        -4.4744,  0.3868,  0.6979, -2.1426], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6258, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.5309,  -5.2495,  -3.5341,  -0.8346,   2.0128,  -0.2718,  -1.3503,
         -0.3351,  -7.5437,   1.6683,   3.4241,  -2.6174,  -2.7184, -22.6806,
         -4.1321,  -4.6815,  -7.3692,  -4.4178,  -4.3681,  -4.6492],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5590, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.8822,  -6.7250,  -5.2943,  -3.9345,  -6.2911, -11.4448,  -7.5767,
         -5.2489,  -3.1189,  -3.1167,  -7.0859,  -5.6509,  -3.7423,  -4.9093,
         -4.9430,  -4.2165,   1.1242,  -3.3324,  -3.0215,  -6.0679],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9739, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.2230, -2.4675, -3.5380,  0.0958,  2.0591, -1.4968,  0.2498, -3.0579,
        -4.4758, -4.6688,  0.3102,  1.8139, -5.3605,  0.6344, -3.3124, -3.4661,
         0.7009,  2.7304, -2.3412, -2.0602], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3937, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.9127,  -2.3564, -11.0442,  -6.9804,   5.3808,  -2.5288,  -1.6005,
        -27.9646,  -7.6976,  -3.3206,  -3.3809,  -0.3804,  -0.6061,   4.9101,
         -4.4107,   0.1015,  -1.7379,  -0.9945,  -5.5755,   2.3089],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2983, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4640,  -2.1704, -15.1079,  -2.8324,  -5.6407,  -0.4465,  -0.8315,
          5.7454,  -1.0453,  -0.3793, -21.2808,  -2.9158, -20.2351,  -1.2552,
          0.2243,  -1.8031,  -4.9651,  -0.1032,  -3.2941,  -1.0791],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0940, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.4942,  -2.7783,  -7.0894,  -0.2629,  -0.3296,   4.3052,  -5.4015,
         -0.2124,  -3.4087,  -5.0073,  -4.4448,   2.2420,   0.6152,  -2.6562,
          1.3857,  -1.9594,  -1.4683,   2.4468,   3.5038,  -1.9700],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.6492, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.6817,  -1.5299,   0.3245,   3.7628,  -2.5075,  -0.5364,  -1.7255,
         -1.9332,   1.9487,   3.9512,  -1.4274,   0.4654,  -1.8107,   0.7876,
         -1.6584, -47.3498,  -4.9572, -13.4425,  -2.8298,  -5.4183],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8284, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9963,  4.3767, -9.3463, -1.4712, -2.3477, -5.5469,  1.7861,  3.2417,
        -7.6717, -4.6298, -7.6946, -5.3186, -4.2461, -2.7918, -2.7416,  0.2222,
         3.7368, -7.8455, -4.2892, -0.7862], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8180, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6333, -4.3013, -0.8852, -6.3927, -1.4227, -1.3298, -7.2858,  0.1665,
        -0.9156, -2.0464,  1.8451, -0.8298, -2.5874,  0.3413, -1.2172, -1.1610,
         1.0268,  2.5881, -1.8248, -1.5896], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4727, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.5636,   2.8731,  -1.7987,   0.6793,  -1.8729,  -5.7253,   1.7354,
          2.8432,  -3.1401,  -3.9952, -11.5470,  -6.6416,  -3.8286,  -3.4747,
         -1.1158,  -3.3331,   1.4897, -11.0500,  -2.1673,  -1.0767],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5855, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8785, -1.0458, -4.9294, -8.3671, -7.0769, -1.8935, -2.1788, -0.7805,
        -3.4884, -0.8302, -5.1674,  1.1872,  2.3389, -2.3909, -4.8900, -4.1194,
        -1.7144, -2.3264, -7.8069, -2.5610], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.0960, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.6968,  2.3991, -1.4439,  0.9093, -2.1143, -2.1946,  2.3055,  3.0518,
        -2.1111, -1.4748, -6.6024, -7.4016, -3.0622, -4.0216,  0.0868,  0.6323,
         4.9820, -8.6106, -1.7095, -1.1546], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3419, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.2539,   1.6834,   1.5968,  -4.0131,   0.1242,  -1.8069,  -9.2527,
          1.5604,   2.8964,  -2.0574,  -0.9639,  -7.5733,  -6.1371,  -3.2106,
         -4.2825,   0.0985,   0.3040,   1.6617, -10.3665,  -2.0613],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3027, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.8472, -4.1023,  2.4183,  2.7471, -2.8961, -0.2611, -2.6105,  0.3408,
        -5.3315,  3.3290,  1.2857, -2.1128,  0.5918, -0.6746, -1.2937, -6.7608,
         0.7611,  0.1621, -2.5496,  0.2701], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7920, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7257,  0.4107, -6.0078, -7.6129, -4.5589, -2.8285, -2.6491, -3.5524,
        -4.2261, -3.7449, -6.1065, -2.0412, -1.2161, -3.8702,  2.2916,  2.0558,
        -2.6145,  1.4490, -6.2896, -4.0283], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8433, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9076, -1.1227, -3.7609,  1.0456, -1.4969, -1.4101, -4.5414,  2.4916,
         2.5481, -3.6879, -1.4420, -1.6203, -1.9208,  0.4569,  3.9320, -3.9557,
         1.0039, -1.5336, -1.3291, -3.7534], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1502, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 4.4586, -4.9119, -2.6568, -2.1926, -1.4256, -3.6456,  2.6652,  4.4758,
        -4.5137, -1.2968, -1.0399, -1.3516,  1.2942, -0.8214, -4.3333, -0.0063,
        -2.7399, -1.2621, -2.9623, -0.2534], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1260, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.9166,  -1.8186,  -7.6764,  -1.1449,  -1.2542,  -3.0871,  -5.4874,
        -12.8019,  -2.5383,  -4.9487,  -0.6227,   0.3407,  -1.9465,   1.1628,
         -2.5741,  -0.3471,  -2.6189,   3.0212,  -2.5670,   0.9249],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5450, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7236,   0.2823,  -4.1235,   2.5143,   2.3973,  -1.4578,  -0.0465,
         -2.3434,   0.7189,  -4.9859,   2.9452,   0.4241,  -2.5476,  -0.1669,
         -2.5544,   0.9097, -24.8478,   0.1742,  -3.9931,  -7.1766],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3301, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  3.9753,  -1.9733,  -2.9812, -17.8446,  -3.1043,  -3.1476,  -0.1538,
          0.1557,   4.4235,  -5.3054,   0.8910,  -1.1847,   1.2762,  -0.9722,
          4.4636,  -7.4076,  -2.4474, -11.8878,  -1.9971, -22.3213],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3771, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9394, -0.8136, -2.4801,  0.1399, -4.7310,  0.9715, -0.5227, -3.7677,
         0.9508, -3.5020, -3.7217,  2.1916,  1.6682, -6.8170, -3.3325, -3.6909,
        -2.6538, -6.1019, -4.1739,  0.1897], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1568, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.1557,  -0.4206,  -4.2356,   2.1266,   3.1508,  -5.0491,  -3.4519,
        -14.4863,  -5.9670,  -3.9655,  -4.1391,  -2.3203,  -1.0461,   3.2896,
         -9.5199,   0.6306,  -4.1080,  -1.4985,  -3.6532,   1.4275],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6696, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9330e+00, -1.6434e+00, -2.3812e+00, -1.7017e+00,  1.2396e+00,
         2.5881e+00, -9.6214e+00, -5.1986e+00, -5.9154e+01, -4.7563e+00,
        -1.2316e+01, -1.1226e+01, -6.8812e+00, -7.8678e+00, -1.6526e+00,
         4.7380e-03,  2.6704e+00, -5.7901e+00, -1.1333e+00, -8.8905e-01],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4321, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.8990,  -0.1711,  -0.7256,   5.0435,  -2.9980,   0.2561, -10.7220,
         -6.1340,  -3.2157,  -2.9887,  -2.0371,   1.1243,  -0.6828,  -8.3804,
         -2.5796, -17.7665, -15.1179,  -3.9974,  -4.2860,  -0.3042],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8291, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4928e-01,  1.6855e+00,  1.1487e+00, -3.0067e+00,  5.7233e-02,
        -1.1369e+00, -1.7842e+00, -1.2222e-02,  4.8526e+00, -3.8069e+00,
        -9.9714e-02, -1.9564e+00, -1.1315e+00, -2.2637e-03,  4.8205e+00,
        -2.3027e+00, -3.7765e-01, -2.7035e+00, -2.4261e+00, -2.3250e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-0.5578, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8465, -2.0804, -0.5364, -6.1897,  2.9858,  2.2843, -1.9169,  0.9610,
        -2.5730, -1.3824,  1.9062,  2.8238, -2.7295,  0.4286, -2.2103, -0.8152,
        -1.0531,  2.8076, -2.0936, -1.0013], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6115, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.5389, -2.7579, -1.8926, -1.5350, -1.6706,  1.5709,  3.3718, -2.2707,
        -0.8487, -4.0285, -1.2082, -2.7329,  2.6223,  1.7242, -2.8756, -0.5273,
        -1.5497, -1.7148,  1.5962,  5.0022], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.4593, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.2876, -1.6731,  0.2838, -2.0429, -1.5034,  0.9615,  4.0334, -4.4310,
        -1.5586, -2.2252, -0.0618, -5.4367,  3.2391,  1.7039, -2.4537,  2.0575,
        -3.1588, -1.4905,  2.9701,  4.3078], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.2096, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  -4.5069,   -4.7620,  -12.2032, -109.1650,   -2.5502,   -8.6480,
          -4.6376,  -27.1022,   -1.1676,   -6.9501,   -5.4171,   -3.1908,
          -2.8381,   -2.7747,   -5.7568,    4.7594,   -4.0141,   -0.5127,
          -2.7396,   -5.7480], device='cuda:0', grad_fn=<SumBackward1>) tensor(-10.4963, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.5069,  -1.7850,   2.3359,  -5.5678,  -0.6080,  -1.5233,  -6.3234,
          2.0110,   3.4283,  -2.1575,   0.6427,  -3.0483,  -1.0670,  -4.7262,
          0.1427,   1.8638,  -2.7130,  -1.4599, -12.5063,  -3.1862],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.8377, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3173e+00, -6.7050e+00, -4.7907e+00, -1.7165e+00,  2.3340e-01,
         2.0177e+00, -7.6582e+00, -4.1423e+00, -4.1163e+00, -1.3761e+00,
         1.8816e+00,  4.6973e+00, -2.7306e+00,  6.9324e-01, -3.1286e+00,
        -4.7516e-03, -6.9560e+00,  2.5415e+00, -1.8171e+00, -2.9197e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1657, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.4281, -12.5949,  -2.9120,  -5.2103,   0.6491,  -3.3702,   5.5351,
         -6.2286,   0.6163,  -1.8170,  -0.6492,  -8.0309,  -0.3090,   0.0550,
         -4.6422,  -0.6256,  -2.0768,   0.1802,  -5.1159,  -0.7277],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6851, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.7034, -10.6108, -32.8029,  -6.4262,  -4.9066, -23.5698,  -9.8900,
         -2.2072,  -3.6166,  -6.0106,  -3.4163,   0.8738,  -2.8483,  -2.1067,
         -3.5616,  -3.2385,   1.4930,   2.9957,  -6.7918,  -1.8347],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2090, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9141,  0.1676, -5.0091,  1.9900, -1.9460, -5.5062,  0.1371, -2.9445,
        -1.0756, -5.1360,  1.8955, -1.5204,  0.5146, -4.3905, -1.2864, -2.6713,
        -1.7437, -4.0493, -3.9423, -2.5801], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1005, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7230,  0.6174, -1.5403,  4.2501, -3.9296, -5.2171, -0.5583, -3.2609,
        -1.3144, -4.3422, -1.4365, -3.6287, -7.2750, -2.8609, -5.0327, -5.8768,
        -4.5281, -0.2655, -3.3291, -4.2695], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7761, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.0672,  -1.2830,  -9.4513,   1.6552,   3.8402,  -5.9808,  -0.6752,
         -2.0978,  -2.0835,  -3.6168,   2.6789,  -2.3949,  -0.9970,  -3.5333,
        -10.7741,  -6.5501,  -7.6341,  -0.7062, -14.5167,   3.6903],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3249, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7761, -9.3543, -5.3835, -4.0920, -4.2358,  0.0982,  0.5694,  3.6234,
        -2.8484,  0.1736, -1.5404, -1.8236,  0.6701,  4.2331, -3.4211,  1.4247,
        -2.8644, -2.3923,  0.5913,  4.3760], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1986, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8847,  2.1390,  2.3980, -4.3187,  0.2088, -0.8659, -1.0324, -3.6206,
         4.7203, -2.3575,  0.8296, -3.1769, -0.9663, -3.5068, -0.0274,  1.5747,
        -2.7273, -0.2434, -1.8452, -0.2406], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7472, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2712, -0.1575, -2.6699, -0.6778, -0.6365,  2.4663, -2.4719, -0.1797,
        -2.1096, -0.3203, -8.7654,  2.4092,  0.1244, -2.6715, -1.0747, -1.7667,
        -2.7298, -1.9624, -3.3226, -4.3205], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7054, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.7596, -1.0693, -0.8259,  1.2415,  3.6235, -2.1509,  0.2833, -1.4330,
        -4.1119,  2.2250,  3.3670, -1.9476,  1.4230, -2.0444, -1.0000, -5.3176,
         1.9217,  1.4911, -3.3063, -0.1563], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.3514, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-12.1597,  -9.7369,  -7.7243, -10.0944,  -6.9169,   0.4539,  -6.7599,
         -3.2622,   2.0965,  -2.6683,   0.4279,  -1.7896,   0.1293,  -1.6811,
          1.6694,  -6.7715,  -0.7666, -19.4088,  -8.4025,  -5.6878],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9527, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.1850,  -0.3951,  -0.4507,  -1.0997,  -3.0034,   1.5714,  -3.6185,
         -1.4096, -21.7294,   1.0781,   0.5791,  -4.0980,  -1.9488, -10.8289,
         -3.0188,  -5.8446,  -1.7000,   2.0549,   4.7869,  -2.4425],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6351, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.8794,  -0.2872, -22.0417,  -3.3102,  -5.6115,  -5.8041,  -4.7319,
         -6.0341,  -2.2446,  -2.7062,   2.5734,  -3.3768,  -2.8551,  -2.3067,
         -1.0175,  -0.7593,   5.1899,  -3.6507,  -2.7855,  -8.7279],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6684, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.0352,  -3.9590, -15.1150,  -7.7515,  -5.1340,  -1.3757,  -5.2185,
          4.2225,  -7.6701,  -1.3843,  -1.8241,  -0.1864,  -6.2271,   3.1245,
         -2.8623,  -0.1866,  -3.8583,  -2.5498,  -2.0958,   1.5151],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9251, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.2326,  1.3874,  5.3876, -2.9793, -0.3268, -2.5941,  0.6691, -3.1929,
         2.8679,  1.8254, -6.1776, -0.3306, -1.3872, -2.4116,  0.6734,  2.4992,
        -1.4503, -0.7938, -5.1514, -5.3806], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8549, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.5231,   1.5306,  -1.9801,   0.9578,  -2.1314,  -1.1166,   1.7201,
          4.2447,  -3.3527,   1.0543,  -1.5851,  -4.1417,   1.4358,   2.7892,
         -2.5152,   0.2447,  -2.9799,  -0.9728,   0.0885, -18.1332],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.1160, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5782, -2.5531, -0.6076, -4.1353,  1.9288,  2.6365, -1.8931,  0.4310,
        -8.9364, -5.3574, -3.5066, -4.7904, -1.8646, -0.7485,  2.8055, -3.9742,
        -2.3501, -4.7313, -6.6034, -2.3324], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6080, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.8897e-02, -4.7779e+00, -3.0807e+00, -4.4979e+00, -3.8270e+00,
        -3.1472e+00, -7.1078e+00, -6.7138e-01, -7.4364e+00,  1.9688e+00,
         3.5528e+00, -3.9162e+00, -2.8982e+00, -4.5391e+01, -6.1646e+00,
        -7.0162e+00, -3.5113e+00, -9.5183e-01, -4.3508e+00,  2.1677e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0509, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.1668,   2.8290,  -2.7582,  -2.9955,  -4.8340,  -4.2470,  -2.7662,
         -5.7050,  -1.2500,  -1.4294,  -0.6267,  -0.5547,   2.8150,   4.6598,
         -1.7553,   1.0016, -10.8004,  -3.2665, -20.2051,  -1.0012],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5861, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.7704, -1.1694, -4.8919,  2.2934,  2.8537, -1.5781, -2.5901, -4.5674,
        -0.9663, -4.1375, -1.2318, -1.0043, -4.2699, -0.2494, -2.1931,  0.6964,
        -2.6725, -2.0303, -4.0119, -1.1545], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6052, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.8425,  -4.0380,  -0.4297,  -4.7176,  -8.5463,  -4.5137,  -1.4114,
          2.2220,  -7.7955,  -5.0800, -21.3317,  -3.1512,  -4.5044,  -0.9690,
          2.2160,  -2.7046, -18.2043,  -1.1692,  -4.4468,  -6.2544],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6994, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.9864, -2.3420,  0.9100, -1.9859, -0.3106, -4.1889,  3.9851, -2.2336,
        -0.1876, -1.6819, -0.1799, -5.3712, -0.7499, -0.2426, -7.3374, -2.6365,
        -2.2800, -2.7806, -2.8343, -1.1346], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6298, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.5055,  -1.2669,   1.9043,   2.9267,  -3.7023,  -0.2807,  -4.3694,
         -1.3314,  -2.6338,   0.8102,   2.8432,  -1.2032,   0.7803,  -2.6170,
         -4.1746,   1.6015,  -1.6506,  -3.4633,  -0.4624, -16.0434],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.6919, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.6581, -3.0524,  0.8160,  2.6033, -2.2579,  0.0109, -3.1305, -4.2513,
        -3.7859,  0.6228,  3.3537, -3.2968, -1.5928, -5.1264,  0.8644, -6.9667,
         2.1001,  2.7833, -2.9951, -1.5489], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2754, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.4524, -2.7628, -6.2813,  2.1638, -3.3997, -1.0980, -1.8548, -1.4629,
        -2.1912, -1.5368, -4.0119, -1.1170, -2.3238, -5.7907, -2.5250, -3.5542,
        -3.4456, -3.3037, -9.3764, -6.1479], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2736, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.4854,   2.0863,  -2.8335,  -0.0188,  -2.2129,  -0.1294,  -6.5752,
          1.9981,   0.8835,  -3.4067,  -0.6263,  -1.3743,  -0.1020,  -6.1455,
          0.5703,  -5.2317,  -1.8682,  -3.4865,  -1.6705, -10.1999],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.0414, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.1432,  -6.5442,  -9.8943,   1.2037,   1.3992,  -3.3374,  -1.7739,
         -0.6622,  -0.4879,   3.1628,   4.0496,  -3.6743,  -1.7660, -14.7913,
         -4.5203,  -4.9469,  -3.7758,   0.1214,  -0.6329,   5.2318],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3391, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.2489,  -5.9408,  -2.0357, -10.5587,  -0.2912,  -2.3069,   1.3711,
         -2.2044,  -2.0650,  -0.2292,   0.4890,  -1.7946,  -2.0916, -10.9360,
         -8.4915,  -4.5872,  -9.7842,  -2.0988,  -1.1512,   3.4882],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6234, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7639,  1.4552, -1.6376,  0.1646, -3.4563,  0.6294, -4.0788, -0.6069,
        -3.4958,  0.4413, -4.0752, -5.5857,  2.7950, -1.5057, -0.9203, -3.4951,
        -1.6594, -5.0875, -0.6522, -0.8547], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6695, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4342, -4.8858,  1.0824,  0.4992, -3.4366, -1.2992, -2.8505, -0.4088,
        -4.5058,  1.3489,  1.8405, -3.3672,  1.0408, -2.3463, -1.3211, -4.5865,
         1.7482,  1.3878, -1.9585, -0.6825], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2068, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1435, -6.2897, -5.4533,  0.7042,  0.5313, -0.9806, -0.8288, -1.1935,
        -2.4394,  2.7820,  3.5479, -6.1308, -0.6059, -1.7398, -1.8337,  1.9663,
         4.2952, -2.1730,  0.3520, -2.5069], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0070, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7986e+00, -1.1298e+00, -2.1657e+00, -9.5106e-01, -7.9336e+00,
        -2.6092e+00, -3.1141e+00, -3.5964e+00,  1.1310e+00,  2.8543e+00,
        -2.7194e+00, -4.3174e-03, -3.5634e+00, -1.1234e+00, -4.9127e+00,
         2.5588e+00,  1.7596e+00, -2.2874e+00,  5.9482e-01, -1.1448e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.8078, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.1787,  -2.1442,  -5.3336,  -7.8632, -23.9467,  -9.7465, -13.8770,
         -7.6229,  -8.1498,  -7.0775,   0.4030, -21.3291,   0.6961, -12.1158,
         -0.8533,  -2.7530,  -1.4928,  -1.1480,   0.5821,   2.3908],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1280, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.3057, -2.3243, -0.7920, -3.5919,  2.2292,  3.7387, -1.2894,  0.1966,
        -1.9345, -3.6736,  0.1874,  0.6871, -3.4405, -0.8115, -4.7344, -1.2443,
        -6.5943, -1.5476, -2.4608, -3.5671], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5637, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-13.9172,  -4.4761,  -3.1307,  -3.0204,  -2.9458,  -3.7115,  -1.4515,
          3.7436,  -8.4871,  -0.9330,  -6.2689,  -1.7054, -12.5878,  -3.4856,
         -1.7070,  -1.9165,  -0.2105,  -0.7695,   0.5565,   0.3458],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3039, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.0541, -2.7146, -0.0828, -3.8179,  1.1026,  2.7097, -5.3022, -0.8340,
        -1.1220, -5.4612, -0.2114,  0.7723, -2.3851, -2.6284, -6.3254, -2.7614,
        -4.8260, -4.2510, -7.1358, -6.8916], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6056, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.6447,   1.9935,   1.5690,  -3.1821,   0.2231,  -2.7961,  -0.6470,
         -0.8354,   2.7010,  -1.5097,  -1.7648, -13.9646,  -2.5375,  -5.0462,
         -9.2423,  -4.6518,   2.9432, -10.5058,  -3.7952,  -5.5138],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2604, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.5926,  -8.6077,  -2.0894, -10.5627,  -5.0810,  -7.2401,  -0.1903,
        -22.2792,   0.6249,  -0.5857,  -2.6576,  -2.9748,  -2.2307,  -2.5407,
         -7.6878,   0.6361,  -0.1903,   0.9801,  -1.6748,  -4.8964],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4920, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  3.2541,  -2.4264,  -3.9652, -10.2445,  -6.9685,  -4.4811,  -3.8367,
         -0.9978,   5.2274,  -6.0718,   0.5084,  -2.1613,   0.2283,  -5.3567,
         -0.5792,  -0.4705,  -2.4933,   0.3751,  -0.6408,  -1.1848],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1143, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.8465,  -1.6748, -14.8133,  -7.4823,  -5.3348, -15.9220,  -6.0949,
         -2.8939,  -8.5639,   1.3399, -16.7266,  -7.0864, -28.3293, -14.1290,
         -3.9756, -14.9071,  -1.5925,  -3.3173,   3.2943, -15.6608],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.3358, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.7392,   3.5804,  -6.4404, -12.4550, -13.7862, -41.2244,  -4.3435,
        -12.3681,  -5.7465,  -5.5311,  -1.5381,  -8.0726,   5.2645,  -2.0589,
         -0.5691,  -1.8717,   0.4144,  -5.1717, -42.6266,  -0.4135],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.8349, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9674, -1.1151,  3.2659, -2.5121, -1.2830, -3.0336, -1.5736, -4.0243,
         0.1247,  1.9725, -3.3194, -1.0076, -1.0582, -7.5564,  1.2931,  2.3997,
        -5.9864,  1.3869, -3.6054, -3.9631], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7781, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.3829,  0.7928, -6.9161,  0.0911, -1.9429,  0.1183, -4.0642,  2.2293,
         0.1839, -1.9942,  0.2279, -3.8775, -3.4326,  1.4732,  2.9634, -4.0842,
        -1.8698, -0.4050,  0.2507,  0.6837], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9595, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9922,   0.6268,  -5.7887,   0.4422,   0.4793, -12.3043,  -1.7855,
         -3.0733,  -2.7336,  -2.9072,   3.0709,  -3.1957,  -4.2795,  -1.1318,
         -3.1173,  -1.6165,   2.6060,   3.9338,  -1.2837,  -0.5078],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.7779, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.6809, -8.4402,  0.3229, -2.2162, -2.7218, -3.7774,  4.4659, -4.5406,
         0.7064, -4.5825, -0.4178, -3.2359,  0.7892,  2.8634, -4.3932, -1.0546,
        -1.9807, -2.5469, -0.6706,  1.0266], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3862, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9350,  1.6653, -5.6321, -0.4955, -0.8667, -3.8298,  1.4270,  1.6629,
        -4.6044, -3.8915, -3.1444, -2.6154, -3.7018,  2.1920,  2.1476, -7.2404,
        -0.6224, -1.0898, -1.2446,  0.1302], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5344, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2042, -1.5147,  1.1494,  2.2690, -1.0862, -1.4757, -0.6955, -5.5828,
         2.3981,  2.7799, -3.4127, -1.2443, -5.9968, -1.1588, -4.3021, -1.8446,
        -0.7962, -5.9631,  0.9341, -1.0094], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3878, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  3.1105,  -1.9263,  -0.7778,  -1.5006,  -5.2647,   1.5560,   1.5887,
         -3.3558,  -1.4236,  -8.8102,  -3.6268, -10.3477,  -0.4543,  -4.6570,
          3.8214, -10.2998,  -2.3133,  -2.0700,  -4.6910,  -4.8049],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8124, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1425, -3.0379,  0.1183, -0.9996,  3.7878, -2.4317, -0.4277, -2.4902,
        -1.2962,  0.9586,  4.4492, -2.2055,  1.6793, -2.6298, -0.4654, -2.7624,
         4.7915, -0.4068, -0.7715, -0.7679], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.4025, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.5933,  -6.1185,   2.4809,   0.8094,  -2.7746,  -0.2535,  -2.2012,
          0.0716,   1.4216,   3.9388,  -2.0095,  -0.1577,  -4.2312,  -3.8606,
         -1.0530,   1.8217,  -1.5328,  -2.2502,  -2.4274, -10.0733],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.4496, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9086, -1.5293, -3.9557,  1.6514, -1.8975, -2.2727, -1.9702, -3.3072,
        -1.6306, -3.9920,  2.2724, -3.5137, -0.1135, -2.6905, -3.8433,  1.2012,
         4.4263, -1.5299,  0.8109, -2.0642], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3928, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  4.2363,  -2.8915,  -1.0527,  -2.2730,  -1.5247, -20.0362,   1.0562,
          0.8385,  -6.1782,  -1.0736,  -3.3381,  -2.9937,  -0.4302,   2.8725,
         -1.8838,  -1.9891,  -0.6698,  -1.2209,   1.8300,   5.1603],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.5781, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.7409,  1.2712, -6.5436, -2.4798, -0.5399, -6.6886,  0.6307,  3.7167,
        -6.1231, -1.3224, -2.0084, -2.0931, -0.4501,  4.7892, -1.9076, -0.3057,
        -3.4259, -3.7503, -0.6472,  1.5940], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2772, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.3513,   1.1917,   0.1036,  -2.8358,   1.9093,  -2.1720,  -3.3439,
          1.6464,   3.5641,  -1.8620,   0.0962,  -1.6885,  -0.0459, -14.2659,
         -1.6168,  -0.9583,  -2.5269,  -0.1308,  -3.8738,  -2.1642],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.8162, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.5479,   0.1692,  -1.6925,   3.9145,  -4.4047,  -1.7759,  -4.6929,
         -1.4075,  -3.0799, -14.1170,   0.4647,  -1.7387,  -7.9137,   0.1970,
         -2.9943,  -0.6032,  -4.5388,   2.9932,   3.4669,  -3.5522],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1427, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6326, -0.4672, -1.9370, -6.3710,  1.9994,  2.8823, -6.3881, -3.9130,
        -9.2115, -6.5884, -4.0025, -3.9806, -0.6934, -1.3146,  4.0619, -4.5444,
         0.7989, -1.4595, -0.0590, -0.2894], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4055, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.6600,  0.6038, -2.3275,  1.1283, -1.0192, -2.6644,  2.2955,  2.1780,
        -3.2845, -0.7438, -3.9777, -0.5652, -4.0909,  2.4231,  2.5395, -1.8336,
        -0.6071, -1.3169, -1.6528, -0.1420], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.5699, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7099,  0.3093, -4.1384, -3.6222, -6.9063, -1.8851, -2.3326, -5.7094,
        -1.8916, -1.1579, -3.2525, -0.1583,  1.8397, -4.2917, -0.1414, -4.5338,
        -3.2193, -3.5088,  0.7925, -5.8605], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6189, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9765, -7.0293, -1.7045, -3.0687, -4.6321,  2.4183,  3.7099, -1.5906,
        -0.0276, -0.0965, -0.8566,  1.7302,  4.0697, -2.0090,  0.5195, -2.0147,
         0.1551, -4.4445,  0.6527,  2.5614], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6817, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.2102,  0.5520, -1.3177, -0.7980, -4.4212,  2.7007,  2.7186, -2.0451,
         0.1120, -2.1353, -1.8935,  2.3671,  4.6883, -2.5187,  0.0859, -2.2942,
        -1.0708, -2.9929,  0.4717,  2.5321], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.5735, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9932, -0.0260,  1.1763, -2.9322, -2.1300, -2.1635, -4.0549,  1.8768,
         1.9383, -5.1806, -2.6477, -1.3803, -2.0140, -6.8567,  2.8625,  1.1797,
        -2.0621,  0.9826, -1.8876, -1.9058], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5109, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9260,  4.1138, -2.3762, -2.1298, -1.7622, -2.7675,  0.4237,  3.2077,
        -3.4112,  1.6913, -3.6111, -0.8811, -2.9327,  1.1281, -2.5113, -1.1620,
        -4.1416, -1.9458, -7.3848, -2.0118], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4695, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.1729,  0.9739, -2.2526,  1.2757, -1.0345, -1.0327,  1.8118,  4.6130,
        -4.3181, -0.4788, -3.7468,  0.3035, -3.0370,  2.0348,  2.3809, -2.6174,
        -0.6855, -0.8879, -2.7555, -3.3630], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.5322, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.8568,  -1.7543,   2.0269,   2.6429,  -3.0579,  -0.2404, -11.0481,
         -4.6864,  -8.5270,  -6.2941,  -0.0544,  -1.6403,   1.2071,  -9.0020,
         -1.1862,  -2.1276,  -2.4311,   0.8758,   4.0893, -17.6709],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9868, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3598, -4.1668,  1.8266, -0.6718, -2.5094, -3.0584, -8.9587, -5.7794,
        -4.1308, -6.1662, -3.4337, -3.1837,  2.2592,  4.6071, -2.6000,  1.3041,
        -1.7149, -2.9343, -3.9818,  0.5803], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2036, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6357, -5.8879, -3.9190, -2.0583, -4.8628, -3.1918, -1.3463, -0.8128,
        -2.4745,  4.7441, -4.1246, -1.2970, -3.5531, -2.1437,  1.0904,  4.3060,
        -3.0862, -0.1887, -5.1770, -3.9608], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1290, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2720,  2.7695,  3.1090, -2.6429, -1.6512, -2.6555, -0.9113, -4.5007,
         5.0094, -6.1869, -0.2844, -2.2975, -1.8501, -4.1333,  3.0270,  1.3479,
        -1.8497, -2.3446, -0.5910, -4.9178], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1413, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.3350, -3.7472, -0.5939, -1.2771, -1.6163,  0.7337,  3.9897, -2.2202,
         0.3699, -2.9708, -1.1117, -5.0445,  1.9013,  1.8099, -1.7480,  0.5885,
        -0.8237, -2.0336,  3.4162,  3.1826], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.1930, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 8.4773e-01, -7.4071e-01, -2.7134e+00,  1.6141e+00,  3.0077e+00,
        -3.0898e+00, -4.1758e-01, -2.9228e+00,  4.1492e-01, -3.4895e+00,
         5.1147e+00, -3.2327e+00,  9.1568e-01, -2.7601e+00, -5.6669e+00,
         2.8474e+00,  4.5436e+00, -2.6412e+00,  1.1842e-02, -1.7400e+01],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.2879, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.1470, -15.6891,  -2.8881,  -6.3941,  -0.7675,  -4.3812,   5.0120,
        -14.3297,  -1.5315,  -3.0112, -12.4887,  -2.9663,   0.1726,  -6.8573,
         -3.7655, -13.8851,  -6.5626,  -4.3445,  -3.7732,  -2.6680],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0633, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.5762,  -6.3059,  -1.3918,  -5.8650,   4.7453, -11.2941,   0.1380,
         -1.9192,  -0.1184,  -3.6292,   2.8742,  -1.9745,  -4.6141,  -0.5811,
         -2.6151,  -1.0358,  -0.1857,   4.4872,  -0.9649,  -2.7858],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.8306, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2978, -3.2392, -4.5623, -0.1707,  2.6165, -9.7225, -2.8210, -2.7897,
        -3.2662, -4.3119, -0.7450,  0.5055, -2.1216,  1.5646, -1.6609, -2.4335,
        -2.9797,  3.8335, -1.9941, -5.3635], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3480, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8466, -3.8271,  1.8859, -2.5670, -0.2261, -5.3783, -1.1744, -3.1548,
         2.2885,  3.1363, -1.6054,  0.1890, -2.2582, -3.0618,  1.1974,  2.6316,
        -1.7801, -0.4134, -3.2287, -2.0902], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1142, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4062,  0.2433, -1.6223, -3.3929,  1.8610,  2.1308, -4.2265, -0.2618,
        -3.2490, -3.5795, -5.1071,  1.0308,  2.0540, -2.4382, -0.6765, -3.9928,
        -3.8655,  0.9226,  2.3975, -7.0463], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6612, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.2362,  -4.0383,  -5.4627,  -2.1049,  -2.5240,   3.2730,  -3.8770,
         -2.5415,  -3.6448,  -4.8155,  -0.9303,   0.7451,  -1.9369,  -3.4031,
        -13.9102,  -5.9062,  -4.5823,  -5.1915,  -0.4446,  -2.2894],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6411, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.6692, -4.3034, -3.6433,  2.2845, -3.3186, -0.6188, -2.4864, -0.4316,
        -0.0167,  4.8031, -3.1740, -0.6221, -2.1291, -4.0154,  2.3288,  3.7578,
        -3.9558,  0.1783, -0.8042, -2.4993], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9668, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.0581,  -7.0004,   0.0294,  -0.3778,   4.0417, -14.2442,  -2.4375,
         -3.8859, -13.8627,  -7.0112, -23.6632,  -7.0474,  -0.9536,  -5.8775,
         -2.6328,  -1.2435,   2.5835,  -1.7658,  -0.9495,  -1.7861],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5571, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4754,   0.6289,   2.5778,  -3.8446,  -2.0508,  -8.5631,  -6.0498,
         -3.8792,  -4.5683,  -1.4962, -15.7872,   4.5125, -10.3473,  -0.0168,
         -1.5461,  -1.6190,   1.2239,   1.5909,  -3.2185,  -1.1176],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8023, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3369, -3.4357, -4.4692, -1.0526, -0.3594,  0.7844, -2.2431,  1.5608,
        -0.9416,  0.4559, -0.5477,  5.2035, -2.4798, -0.2352, -4.3130, -0.0735,
        -2.7838,  1.8880, -5.9117, -4.9199], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5605, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.2094,  -3.2191,  -2.5275,   3.1357,  -2.4048,  -0.8986,  -3.2064,
         -3.7113,  -5.0985,  -1.1943,   2.8947, -10.3845,   0.0963,  -5.7323,
        -20.1604,  -3.2089, -33.0281,  -8.7806, -58.6475,  -7.7761],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.2031, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1625,  1.0034,  1.9229, -5.9145, -1.0537, -0.7669, -2.9280,  2.5166,
         2.9923, -2.7160,  0.2742, -3.0088, -0.6201, -1.0273,  4.1865, -2.3740,
        -0.4261, -5.7308, -3.3754, -3.9968], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2102, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1965, -6.3176, -4.2820,  5.5081, -3.0131,  0.9518, -1.2308,  0.3567,
        -1.5313,  2.8838, -5.0501, -5.2005, -1.1342, -4.8437, -1.4490,  1.2693,
         2.2598, -2.7702, -0.1937, -6.2635], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5623, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.4624, -11.5545,   0.8593,  -1.0687,  -1.7108,   1.1452,  -0.2853,
         -2.4697,   2.5877,   3.7790,  -2.8943,  -0.6769,  -2.9189,  -1.2403,
         -0.4462,  -2.7613,  -5.2050,  -4.4174,  -2.0654,  -6.9192],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.9863, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.0699,  3.5502, -1.5135,  0.2720, -3.4051, -0.0779, -4.0300,  2.5135,
         2.4442, -2.4454, -0.5091, -1.2110, -1.5005,  1.8820,  0.0571, -1.9773,
         0.1180, -6.8530, -3.6207, -8.0023], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2189, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 5.0456, -2.8058, -0.0758, -3.0800, -4.9211,  0.8582,  3.5641, -2.5419,
         0.5637, -0.1391, -0.9541, -5.3619,  2.9847, -0.1166, -2.1775,  0.6276,
        -3.0549, -0.3661, -1.8190,  4.6744], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.4548, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.3141,   1.7347,  -1.9906,   2.0792,  -2.0049,  -1.5441,  -0.2433,
          4.0698,  -2.8057,  -1.8299,  -3.6573,  -2.9472,  -5.7370,  -8.9528,
          3.5528,  -9.4144,  -3.2963, -21.9573,  -1.5424,  -3.6414],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9407, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9147,  0.8654, -3.0505,  2.1242, -1.4106, -2.5360,  1.0964, -4.5576,
        -2.4542, -0.3581, -0.1283, -5.7026, -0.4561, -1.7991, -2.7047, -0.7120,
         4.2531, -2.0680, -1.6415, -3.0558], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3605, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7474e-01, -2.8256e+00, -9.7883e-01,  9.9242e-01, -5.7718e+00,
         3.0761e+00,  3.2118e+00, -6.2899e+00, -5.9205e-04, -2.6111e+00,
         1.1119e+00, -5.0709e+00,  2.1744e+00, -4.5223e+00, -3.3208e+00,
        -5.3021e-01, -3.9478e+00,  5.7511e-01, -4.5166e+00,  1.1032e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.4308, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2375, -2.8416, -3.7434,  0.0103, -2.7446, -1.9654,  2.9501, -6.3913,
         0.7415, -2.0044, -3.5520,  0.8745,  1.8437, -3.1134, -3.5165, -1.2293,
        -1.3901, -4.0694,  1.6651,  2.8825], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4416, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.5064,  2.2676, -1.4837, -1.9857, -7.4296, -6.0207, -4.2330, -3.1214,
        -0.5378,  2.2551, -0.2284, -3.0322, -1.4781, -3.1012,  0.4341, -4.2255,
         2.1670,  3.3550, -4.0138,  1.2393], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3833, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2170, -1.9685, -3.3891, -3.0008, -4.3822, -8.9942, -3.9903, -7.3061,
        -2.3266, -0.2817,  0.9793, -4.7014, -3.3805, -3.8998, -6.5674, -0.3366,
        -0.1670, -2.6958, -3.9479, -8.2699], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5922, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9884, -5.5842, -4.0307, -2.3766, -0.3450,  1.4435, -2.6508,  0.6633,
        -0.1796, -1.4717,  2.3411,  2.5290, -3.3605, -0.1939, -4.1064, -1.5923,
        -8.1760,  4.9342, -1.3024, -0.1996], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2824, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.5266, -1.8861, -0.8280, -3.0122, -0.1340, -7.1554,  2.8159,  3.0232,
        -2.3739, -0.9552, -0.6581, -0.6874,  2.2986, -0.8177, -2.9802, -1.1305,
        -2.7818, -0.1984, -4.7019,  2.5316], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9553, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.0880,   0.4604,  -0.9091,   4.9906, -13.1217,  -2.8577,  -5.8397,
        -10.8721,  -9.7786, -23.9520,  -8.0857, -26.6370, -20.6764,  -6.7832,
         -9.3514,  -5.6286,  -2.7023,  -3.1274,   0.0983,   1.0444],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.5909, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7575, -8.0472, -2.4483, -3.9243, -4.7310, -3.7434, -2.4887, -0.9754,
        -4.7092,  2.8353,  0.7308, -2.6694, -0.3586, -1.7738, -1.6630,  1.6092,
         2.1008, -2.2401, -2.2249, -1.2546], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0367, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5713,   1.4737,   3.0671,  -2.2784,  -1.8625,  -0.5808,  -0.8853,
          3.0382,   3.6053,  -0.9908,   0.3127,  -2.9652,  -3.3750,   2.2137,
          3.8061,  -2.3575,  -1.5544, -11.4011,  -4.5994, -14.9902],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.6448, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.9834, -5.8872, -1.8987, -1.7543, -4.2730, -1.8138, -4.0242, -1.1583,
         0.9045, -4.3474, -3.5704, -1.3645, -0.3478, -6.7933,  2.0068,  2.6617,
        -4.1207, -0.2579, -3.1328, -1.0837], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9136, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.2172,   4.0104,  -2.4214,  -2.4435,  -3.7620, -13.6481,  -3.8500,
         -5.8190,  -1.5162,  -0.9474,  -1.1923,  -4.4984,  -2.1976,  -3.7616,
         -3.4350,   0.4090,   1.0975,  -2.3501,  -2.7671,  -5.8079],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7342, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8403, -6.8983,  1.5851,  2.1764, -2.7721,  1.5096, -2.8765, -1.0855,
        -2.0799,  4.6463, -3.0570, -4.0203, -3.1795, -5.3015, -4.9717,  0.2463,
         1.0818, -2.5979,  1.2543, -1.5963], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4889, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.0575, -2.6232, -1.6225, -2.0570, -1.5068,  1.7860,  4.1150, -4.1160,
        -0.3701, -2.5867, -1.5197,  0.0529,  1.5636, -1.7656, -1.9664, -3.2509,
        -7.0507,  1.9336,  0.9015, -3.9016], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1464, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9038,  2.1780,  1.7459, -5.2832, -1.6417, -9.6905, -3.2244, -5.0552,
         0.3976, -0.8934,  5.9636, -2.3032, -1.0266, -1.7023, -1.3141, -5.5326,
         2.7563,  2.9368, -4.4711,  0.9313], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6066, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 5.8125, -7.5598,  0.2639, -3.5987, -2.0799, -3.3363, -1.3547,  3.5672,
        -1.0087,  0.3601, -1.8843, -3.2900,  2.2379,  3.7310, -3.4493, -0.6994,
        -3.8885, -3.9229,  1.3632,  2.5844], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8076, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.9730,  -4.1272,   1.4594,   0.1746,  -2.5321,   0.2628,  -1.2731,
         -0.8039,  -0.5642,   3.5687,  -1.2313,   0.1675,  -1.5090,  -2.8101,
          0.6109,   1.6572,  -5.2503,  -5.3321, -11.0774,  -6.5261],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.8054, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6041, -0.8557, -2.6867, -0.6969, -4.5119,  1.3397,  1.4052, -2.2038,
        -1.6802, -0.9777, -5.3891,  2.0737,  2.9396, -2.9250, -2.5554, -6.4382,
        -6.8422, -2.6956, -4.1878, -0.0998], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1796, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.5929,  -3.9533,  -5.2215,  -2.5126,   0.6366,  -7.0831,  -2.1680,
         -2.1758,  -0.7727,  -4.7889,   0.4642,   1.2321, -10.0041,  -2.3094,
         -2.6537,  -3.7149,  -2.7033,   2.1872,   2.7118,  -2.2941],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3358, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3720, -3.3447, -2.0896, -1.7574, -5.7807,  2.6373,  2.9259, -2.6166,
        -3.1085, -3.8233, -6.7838, -1.1975,  3.9951, -4.7761, -1.6172, -3.9831,
        -4.1826, -0.0931,  2.0479, -3.4016], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0661, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 4.2054, -6.0332, -0.5474, -1.9049, -5.7029,  2.3047,  1.2945, -4.8046,
         1.1825, -0.9210, -0.8260,  2.9243,  4.2538, -1.5421, -1.2462, -2.0197,
         0.2565, -2.9941,  4.7854, -6.2283], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6782, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7032,  1.5359,  1.6896, -2.8292,  0.6036, -1.8119, -2.7519, -0.3605,
         4.3431, -5.7928,  1.8658, -2.1122,  0.2782, -5.4681,  2.5153,  2.6796,
        -1.8393,  1.6550, -2.2924,  0.7835], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6506, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8723, -3.1977, -9.3114, -3.9283, -5.2050,  0.0224, -2.1371,  5.0823,
        -2.7091, -0.9471, -1.2190, -1.7072, -3.7974, -2.3074,  3.6434, -1.7921,
        -0.8750, -4.0998, -5.1102,  0.4895], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2989, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.9176,  3.2246, -1.5055,  0.1196, -3.0816, -0.0412, -2.7733,  3.1107,
        -2.2367, -4.3114,  0.3748, -1.9618, -0.9407, -4.3336,  2.4532, -0.0083,
        -4.5816, -1.5944, -1.8851, -1.4408], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9748, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 4.9887, -3.5539,  1.3542, -1.2629, -0.6669, -2.9658,  1.7816, -2.1566,
         0.7309, -3.5094, -1.0900, -6.0543,  4.5007, -4.5210, -4.4593, -3.9179,
        -8.2080, -2.8796, -1.4682,  2.6423], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5358, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.1129,  -0.6747,   1.7791,   4.6161,  -4.2897,  -0.4327, -12.0269,
         -6.6052,  -3.5620,  -3.1639,  -0.6634,   0.2174,  -5.0038,  -2.4004,
          1.7288,  -1.5414,  -5.8049,   2.0089,   2.8651,  -2.3760],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.7721, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.8552,  -2.2717,   0.2464,  -0.8678,  -2.3374,   0.7819,   4.7204,
         -1.9223,  -0.8840,  -1.3173,  -2.3305,   1.3572,   1.1025,  -2.7667,
         -6.8865, -11.4347,  -6.0777,  -3.6712,  -3.8984,  -0.4407],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.8522, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1449, -3.8076, -6.1720,  1.2970,  1.2818, -1.7878, -0.3460, -1.3351,
        -3.5517,  1.6241,  0.1242, -3.4865, -2.2505, -6.1762, -7.4090, -3.3186,
        -4.0633, -0.4216, -0.5515, -3.4350], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3465, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0370, -4.1651,  2.4542,  3.3045, -5.7889, -3.3574, -1.6850, -2.9342,
        -4.5413, -0.1823,  0.1419, -1.2138, -0.3872, -1.6127, -2.4488,  3.0024,
         4.4263, -3.0082, -3.0099, -4.6664], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3354, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.5594,  -0.3662,  -0.6407,  -3.3978,   1.9601,   1.8987,  -2.3734,
         -0.0933, -11.7127,  -6.3771,  -5.5137,  -6.8571,  -1.9234,  -7.2106,
          4.4611,  -3.4527,  -1.3165,  -1.3760,  -3.3544,   1.6529],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3776, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.7969, -5.3011,  2.2127,  1.6216, -1.9219,  1.3736, -1.9802, -2.3894,
         2.2141,  4.6198, -4.1705, -0.7938, -7.1941, -1.3098, -6.5978,  2.5114,
         1.7214, -3.7421,  0.2567, -1.6334], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0650, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9048,  -3.7313,  -0.3754,   2.8114,  -1.9417,  -1.0744,  -0.4118,
          0.0429,  -2.0906,   4.8959, -10.3438,   0.4829,  -1.3975,   0.8081,
         -3.5831,   2.2624,   0.4492,  -4.7713,  -0.9574,  -4.2295],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.3030, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 4.9897e+00, -1.4145e+00,  4.2813e-01, -1.2673e+00, -1.8341e+00,
        -7.2356e+00,  1.5358e+00,  1.0329e+00, -2.7952e+00,  2.8478e-01,
        -4.1916e-01, -3.1285e+00,  2.9753e+00,  3.5940e+00, -2.5785e+00,
         1.5901e+00, -1.6982e+00,  3.9757e-03, -5.4764e+00,  3.1272e-01],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-0.5550, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.9556, -1.9679, -6.9024,  0.0601, -2.4055, -1.8304, -0.6534, -1.3718,
        -1.1557,  1.2095,  4.7673, -1.5384, -7.5632, -8.2026, -7.4828, -4.9646,
        -7.4138, -6.1309, -1.8425, -1.5856], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1465, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3719e+00,  2.3237e-02,  2.4471e+00, -2.2948e+00, -4.3580e-01,
        -1.7988e+00, -3.1140e+00,  1.6075e+00,  2.4549e+00, -6.2137e+00,
        -1.1162e+00, -4.1797e+01, -3.4163e+00, -2.2819e+01, -2.8562e+00,
        -3.4456e+00,  2.3824e+00,  7.7086e-01, -4.1002e+00, -1.4058e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3750, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.0443,  -3.4021,  -0.0227,   2.8351,  -2.2961,   0.5953,  -2.0337,
         -0.1900,  -4.0012,   5.0989,  -4.3536,   0.0341,  -2.4275,  -2.5740,
          1.5183,   3.5086,  -1.3902,  -1.9396, -10.9582, -10.6243],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.6334, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2832, -3.2315, -1.4426,  4.8605, -5.3167, -1.2916, -3.1272, -2.2349,
        -4.0881,  3.2033,  2.6961, -3.4757,  0.0264, -2.5686, -1.9842,  0.6335,
         3.1416, -4.4655, -1.5287, -3.7912], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4134, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8737,  0.7828, -6.6849,  3.1366,  3.0402, -2.3224,  0.3565, -2.5544,
         0.3746, -4.7122,  2.1045,  3.4803, -2.5711,  1.3066, -1.9913, -4.2791,
        -1.9752,  4.7029, -3.4196,  1.0393], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6030, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.2405,  -2.6629,   1.0238,   3.8788,  -4.8552,   0.3869,  -1.5238,
         -3.3889,   1.1036,   2.3492,  -2.2587,   1.1213, -13.1780,  -5.9422,
         -3.1975,  -3.6755,  -0.2352,  -0.2119,   0.8349,  -3.6314],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.7652, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2360,  2.5677,  3.0451, -2.9440, -1.1932, -1.6243, -0.3160, -1.1112,
         5.3870, -1.9973, -0.1403, -3.5232,  0.3979, -3.4272, -4.3221, -1.8139,
        -1.5720, -2.8911,  0.1964, -5.8480], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2683, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8734, -1.8654, -8.7237, -4.0350, -1.9878, -2.5659, -2.2778, -5.2540,
         1.1856, -0.0843, -2.7745, -1.9096, -3.9759, -0.0409, -2.6506,  4.8814,
        -2.5303,  0.4915, -4.6303, -0.1694], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0395, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.6119,  3.7559, -2.0951, -0.3828, -4.4838, -0.9916, -1.5934,  2.8105,
         1.8046, -1.4814,  1.5511, -2.0058, -0.1897, -3.5979,  2.4908,  1.5498,
        -2.2471, -0.7550, -3.5476, -2.6672], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.5232, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  3.1801,  -8.8465,  -0.2405,  -6.5663,  -1.4466,  -7.7544,  -0.5251,
         -0.0145,  -1.6083,   0.3684,  -4.0456,  -3.8785,  -3.8452,   0.1369,
          2.5721,  -3.9565,   0.9377,  -6.2888,  -2.2054, -10.4383],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7233, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.1503,  -1.3205, -14.9284,  -4.7359,  -4.1153,  -3.8296,  -0.6375,
          0.1894,   2.2866,  -4.4246,  -0.9415,  -0.7573, -18.9857,  -4.4895,
         -3.5244,  -5.6327,   0.6590, -30.8934,  -2.5802,  -8.2463],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6029, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3516,  -3.4759,  -4.2348, -26.4498,  -7.2138, -24.1995,  -6.1405,
         -9.5394,  -8.4877,  -8.3261,  -5.6184,  -6.3302,  -4.6719,  -2.8536,
         -1.2128,   3.2889,  -2.4769,  -0.8961,  -2.8740,  -2.4238],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3244, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3457,   2.7498,   1.3223,  -2.2697,   0.5711,  -2.4899,  -2.1011,
         -3.9645, -60.0183,   1.6501,  -3.0000,   1.2058,  -1.3108,  -0.1332,
          2.0964,   4.1721,  -2.1549,  -0.7559,  -8.2890,  -3.1314],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9599, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9364, -2.9338, -2.6695, -2.6332,  0.8731,  2.6142, -1.6389, -1.2412,
        -1.0946, -0.4908, -7.0888,  1.2115,  2.3061, -3.9814, -0.3144, -1.9692,
         0.5992, -4.1578,  2.6186,  2.9143], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9006, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.0217, -3.8786, -1.5607, -2.2534, -0.5336, -4.5077,  2.6126, -7.3552,
        -3.1419, -2.7273, -2.1865, -5.5292, -1.1406,  1.3261, -3.0237, -5.4052,
        -1.9231, -0.8259, -4.5525,  2.1935], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2196, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0178,  -0.8422,  -3.5748,  -4.0119,  -7.0792,  -4.4004,  -8.0043,
         -3.5985,  -2.5451,  -0.5796, -13.9650,  -5.9500,  -4.9265,  -5.4647,
         -4.7649,  -3.4052, -11.7487,  -5.8114,  -8.1986,  -7.7937],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5341, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.4515,  -3.5552,  -4.2839,  -3.3914, -16.4522,  -2.6417,  -7.9713,
         -0.2817,  -2.3970, -11.9349,  -5.0443,  -6.5944,  -1.9535,  -4.6076,
         -0.8967,  -2.2554,  -2.0434,  -0.7936, -16.6961,  -3.3932],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8368, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.7599,  3.2172, -2.0118,  0.6580, -2.9797,  0.9756, -9.4774,  2.5503,
         1.2944, -2.7196,  0.3879, -0.6069, -0.5649, -6.2121,  5.1525, -0.2013,
         0.0861, -3.0494, -0.4112, -5.9284], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0300, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.6612,   2.2177,   4.0001,  -2.4010,   1.0572,  -1.3069,  -4.6320,
          1.2932,  -2.5720,  -3.8220,  -2.1590,  -3.0220,  -0.7650, -11.8580,
         -1.6027,  -0.4842,  -3.6883,  -0.3563,  -0.7880,  -1.9999],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.8275, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6596, -4.9061,  2.2599,  3.2864, -1.9558,  0.4873, -1.0762, -0.5827,
         2.1376,  4.8632, -2.6998, -6.4231, -3.2031, -1.3905, -4.9219,  0.6762,
         1.9652, -4.0523, -0.9892, -3.1683], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2177, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.2675,  -3.4881, -11.8442,  -3.3874,   0.5671,  -6.2551,  -1.6680,
        -20.3778,  -9.0766,  -4.7511,  -3.6487,  -0.8518,   0.2310,   2.3331,
        -25.4663,  -0.8275,  -1.7629,  -4.9928,  -6.0534, -15.3807],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8985, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.1438, -1.8202, -0.0752, -1.6460, -0.7132,  0.0151,  4.9865, -1.4442,
         0.7254, -2.4122, -0.8197, -5.7437,  2.2760, -1.6185, -3.6056, -2.7620,
        -1.6022, -2.2238, -4.7946,  4.6516], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7741, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.6527, -0.8050, -2.6684, -1.4683, -4.3433,  2.4867,  1.3161, -2.3293,
         0.3781, -0.7955,  0.6737,  0.2102, -2.3986, -1.7643, -4.1604, -1.6575,
        -2.9523, -1.9415,  4.4430, -1.4186], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2424, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.2581,  -1.3291,  -3.7555,  -0.0438,   4.2817,  -3.6417,   0.5760,
         -1.4513,  -4.4883,   1.8208,   3.0489,  -3.5179,  -1.0620,  -7.5606,
         -6.8479, -21.8905,  -4.7590,  -5.4522,   0.0433,  -1.5755],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8673, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.7398,  -2.9248,  -3.1538,  -3.4898,   1.3713,  -0.3056,  -9.2163,
         -3.2178,  -5.6629,  -0.8093,  -4.3255,  -7.0872, -17.0825,  -4.7958,
        -12.7431,  -6.0643,  -4.1965,  -5.8056,  -4.7190, -26.6754],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3322, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.9499,  1.1418, -3.0252, -1.2232,  0.4262, -3.6194,  1.8842,  3.0764,
        -3.7303, -0.3394, -1.2901, -0.4156, -8.9255,  1.1304,  1.5281, -3.2903,
        -0.4635, -3.4189, -1.7157, -6.1019], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3211, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7440,  -1.4278,  -5.6517,  -4.6881,  -4.7965,  -3.4815,  -0.8517,
         -3.3240,  -2.8852,  -3.0900,  -0.6471, -11.9477,   2.8938,   1.0731,
         -7.6670,  -1.6807,  -3.8650,  -1.3936,  -4.2461,   1.7139],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9353, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.3838, -2.2754, -0.0928,  0.3529, -5.5040, -2.6023,  1.1317, -3.6463,
        -2.0846, -2.6113,  2.0388,  2.3841, -1.4841,  1.3507, -2.4092, -1.1289,
        -0.0751,  3.6641, -2.3547, -5.8356], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0783, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0601,  1.6460, -1.5711, -2.7786,  1.6204,  3.3070, -3.1672, -1.4513,
        -2.3349,  0.3827, -3.3117,  4.3188, -6.7995, -2.5041, -0.7146, -1.6299,
        -5.2924,  2.6260, -5.4046, -3.2991], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4709, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.2631,  -5.1328,  -5.9977,  -6.4958,  -6.8970, -10.1914,  -2.4540,
         -5.0692,  -4.6752,  -5.0516, -10.1691,  -6.4703,  -4.7699,  -6.0697,
         -2.7403,  -6.4160,  -3.9675,  -2.8935,  -6.7595,  -5.8108],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6647, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  4.7009,  -4.4695,   0.0558,  -2.1454,  -1.7405,   2.6724,  -2.8639,
         -3.2897,  -3.8013, -15.7410,  -5.5179,  -3.6953,  -2.1656,  -1.8892,
         -0.7801,   4.5408,  -1.8972,  -1.1202,  -0.9485,  -3.4369],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1766, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.6950, -36.3799,  -8.5097,  -2.2067,  -7.4957,  -1.1511,   1.3516,
          3.6214,  -7.3795,   0.4621,  -2.3119,  -6.2373,   1.8353,   2.8390,
         -3.5345,  -2.9844, -10.1598,  -3.8233,  -5.3028,  -5.4142],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2238, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9189,  -1.8305,  -3.0152,  -5.3431,   0.5097,   1.8323,  -3.2078,
         -0.4111, -10.1311,  -6.1076,  -3.2787,  -4.5232,  -1.1692,  -2.1876,
          4.6988,  -1.6909,  -0.0689,  -1.9332,  -1.2135,  -3.6985],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2844, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.1300,   3.3524,  -1.8591,   1.6256,  -2.6541,  -4.0946,   0.1470,
          2.3631,  -2.3170,   0.7427, -14.8076,  -3.8631,  -4.6638,  -1.1695,
         -0.4335,   3.7709,  -4.9661,   0.4463,  -3.3867,   0.3680],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.5634, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7290,  1.3006,  1.7929, -3.0014, -1.0181, -0.8809, -1.1090,  1.8344,
         3.8732, -1.9364,  0.1358, -2.5621, -3.7591,  1.3887,  2.7981, -3.3719,
        -2.4191, -0.0122, -0.8816,  2.6622], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.3947, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.0571, -19.9222,  -2.7065,  -4.7150,  -0.1496,   0.6486,   4.3623,
        -10.5583,  -3.5369,  -4.2485,  -2.2198,  -0.5024,  -0.1164,   4.1664,
         -1.1197,   1.1189,  -2.4657,  -1.8652,  -4.0716,  -6.0696],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6457, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 5.1320, -2.8523,  1.4374, -2.4750, -2.6862, -4.2268,  1.1016,  1.8502,
        -4.1763, -3.3886, -1.5321,  0.1078,  2.4252,  4.3699, -2.2507, -0.7140,
        -7.4516, -5.5672, -4.6667, -5.4831], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5523, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.0345, -2.6742,  4.2360, -3.7759,  0.5406, -7.3236, -0.7028, -4.0084,
        -0.4491,  2.9421, -2.5441, -0.6099, -1.5233, -5.6874, -5.0866,  1.0135,
         2.0273, -1.6551,  1.4687, -3.8952], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3836, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  3.4496,  -3.1931,  -1.8733,  -2.2909, -16.0742,  -6.0728,  -0.3505,
          0.1839,  -3.7432,  -1.7807,  -0.6912,  -1.0951,   2.9595,   4.2556,
         -1.1005,  -1.4815,  -0.8167,  -3.2267,   0.0772,   3.7495],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.4557, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7256, -0.7518, -4.3810, -1.3503,  0.3613,  4.0552, -2.4028,  0.1554,
        -2.8310, -0.2402, -4.4740,  2.9456,  3.0166, -3.3878, -1.6946, -1.4407,
        -3.3460,  1.6555,  2.8894, -2.3488], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8148, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2898, -2.4868, -2.8861,  0.9568, -1.9994, -3.4823, -1.0319, -4.6252,
        -8.9964, -4.7015, -8.3572, -3.7391, -1.8814,  2.5523, -2.1579, -2.9269,
        -3.0472, -5.9192, -0.0181,  0.9910], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.7523, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.9807, -2.0555, -3.4366,  2.0097,  1.5440, -3.6118,  0.5199, -2.8817,
        -2.5871, -5.8803, -0.3579, -2.6522, -3.5874, -6.8656, -0.7782, -1.2001,
        -1.1751,  4.0653, -1.1997, -0.7450], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4947, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.7763, -0.9135, -1.2192,  1.6182,  4.2607, -1.3623,  0.0920, -3.0789,
        -0.4713, -5.1066,  1.1702,  2.5550, -3.5128, -0.5391, -5.1539, -3.4414,
        -7.1758, -2.2836, -2.0388, -5.4601], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5642, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9576,  -3.2785,  -2.4750,  -6.0710,  -4.0852,  -3.1079,  -0.4940,
         -3.5019,  -4.1239,  -3.8846,  -4.9162,  -3.9276,   0.0865,   1.6483,
         -3.6431,  -3.3076,  -5.5540, -12.8490,  -5.2230,  -1.7730],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6719, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.9881,  4.5399, -2.6682,  1.1531, -1.7512, -0.2520, -3.2019,  2.5478,
         1.5508, -2.3333, -1.1023, -3.6766, -2.2547,  0.2607,  3.3411, -1.9542,
         1.1615, -1.5805, -2.8597,  0.0688], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.3511, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9312,  0.1282, -5.6714, -2.3237, -2.4653, -3.1610,  1.4034,  2.0622,
        -1.9366, -1.2736, -3.4833, -1.3712, -3.0962, -1.1278,  3.1328, -2.9259,
        -0.6075, -2.7696, -0.0378, -8.1979], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9327, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.8506,  -2.5493,   2.6152,   4.1299,  -3.5194,  -0.1010,  -1.1230,
         -0.1823,   0.0614, -10.6459,  -6.5431,  -2.7036,  -3.4363,  -1.9419,
         -1.1637,   4.7072,  -3.2279,  -0.0465,  -3.5146,   1.8267],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.4104, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.9351,  -4.8711,  -4.3398, -10.5227,  -5.0328,  -5.2020,  -5.6767,
         -6.0370,  -4.3560,  -4.5995,   1.0904,  -6.5066,  -1.8539,  -5.1777,
         -3.9711,  -5.8198,  -0.1167,  -3.5785,  -2.3491,  -4.1040],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3980, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.0805,  -1.0980,  -4.8631,   3.8761,  -7.1961,  -4.0326,  -6.7884,
         -5.4570,   1.7458,   1.0349, -10.8500, -15.1681, -38.5839,  -8.0755,
         -7.7060,  -5.2230,  -2.3990,  -3.1481,  -2.8443,  -2.1137],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1985, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.2647, -2.5360, -0.2444, -5.3607, -2.5837,  2.4687, -1.8046, -0.2760,
        -1.3978, -0.2192, -0.1235,  4.2193, -2.1703,  0.5945, -2.3484, -1.1031,
        -3.8734,  3.0325,  0.3984, -2.2981], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7180, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.6321e-01,  1.9743e+00, -5.1786e+00, -1.3760e+00, -4.2734e+00,
        -1.3198e+00, -8.0170e+00, -1.4792e+00, -3.3327e+00, -4.1372e+00,
         5.1372e-03, -3.0307e-01, -5.0534e-01,  9.0155e-01,  4.4693e+00,
        -1.8108e+00,  8.3118e-01, -1.5913e+00, -4.9169e+00,  1.9940e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.3951, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.2188, -0.9577, -3.1614,  1.3891,  2.9031, -4.4819, -0.3311, -1.2346,
        -0.6549,  2.7460,  1.6328, -2.3055,  0.2171, -2.8103, -2.7932, -4.8751,
        -0.7737,  1.2036, -2.3872,  0.7667], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7345, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9965,  -1.6852,  -7.3349,  -2.0298, -11.6203,  -6.4854,  -9.0763,
         -5.2296,  -4.1351,  -0.2461,  -1.6325,   3.7129,  -4.0670,  -3.1656,
         -4.2032,  -1.7862,  -5.0146,   1.4772,  -2.5558,  -1.8159],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4945, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.5359,  -5.1259,  -0.5847, -13.9101,  -2.6914,  -7.3479,  -0.7543,
         -1.9989,   4.3439,  -6.2827,  -0.6092,  -3.3023,  -0.1079,  -2.6997,
          0.8478,   4.4427,  -4.5105,  -0.6772,  -8.1137,  -5.8796],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6713, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0887,  0.6014,  0.3763,  4.5582, -1.9986,  0.6182, -3.3537,  0.1218,
        -3.4856, -0.2598,  4.4668, -2.4630,  0.4746, -1.7739, -0.3953, -4.5253,
         2.1742,  2.2357, -0.9177,  0.2803], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.2677, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.6560,  -5.6522,  -6.7152,  -7.5267,  -8.5952,  -6.3261,  -6.0203,
         -9.5062,  -7.8376,  -8.6022,  -6.6520, -13.1961,  -8.6719,  -6.2509,
         -9.8953,  -7.2168,  -7.2191,  -7.1056,  -7.8824,  -7.8492],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.7689, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7311, -5.4666, -3.7090, -6.1551, -4.8110, -4.0032, -5.2345, -2.2878,
        -2.6261, -3.1763, -3.9922, -5.0069, -3.0867, -1.6809, -3.7400, -2.0494,
        -4.7858, -3.9804, -5.4614,  1.6054], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.8189, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.1382, -0.5348, -2.4676,  1.8599,  3.9024, -3.0686, -1.0575, -2.9773,
        -3.1643, -4.9832,  1.2043,  2.3652, -2.1003,  1.0047, -3.5646,  0.1114,
        -4.4451,  2.0005,  2.8647, -2.4549], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7683, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.6478, -21.7587,   3.8398,  -8.6472,  -1.7151,  -7.4301,  -4.8127,
         -1.0563,  -6.3295,   4.5848,  -2.4017,  -3.2655, -17.9014,  -7.1561,
         -4.6740,  -6.2716,  -0.7663,  -0.2722,  -0.4684,  -4.5452],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5848, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1661,  0.3222, -0.0414,  3.6193, -5.4324,  1.3759, -0.1065, -0.6385,
         1.2771,  5.0742, -3.1244, -1.7625, -3.8496, -1.7132, -7.8679,  4.8861,
        -0.3372,  1.2470, -0.5206,  0.1867], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.5286, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 4.0435e-01,  1.8409e+00,  2.7476e+00, -2.0462e+00, -1.0938e+00,
        -3.2764e+00, -1.4643e+01, -6.1331e+00, -8.4284e+00, -7.9000e+00,
        -4.2533e+00, -3.9558e+00, -4.7897e+00, -4.8927e+00, -4.8335e+00,
        -7.5283e+00,  4.9691e-01,  4.4422e-03, -9.1508e+00, -2.5989e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0015, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.9824,   0.7137,  -5.1598,  -0.7689,  -2.3335,  -4.3108,   1.0846,
          3.3628,  -1.4920,  -0.2785,  -2.4828,   0.7532, -26.6170,   1.7741,
         -2.1361,  -3.4065,   0.1616,  -2.5116,  -2.5493,   1.0187],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1598, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.2106,  3.3873, -3.6641, -2.1722, -4.5172, -7.2119, -4.8644, -4.3381,
         0.5550,  0.3690, -0.4024, -1.6138,  0.8641, -2.2896, -4.1201,  1.8961,
         1.9437, -2.4267,  1.3830, -2.2883], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3650, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2595,  1.8601,  1.3497, -2.1673,  1.3463, -3.0049, -0.4775, -3.9975,
         4.3431, -5.5092, -0.8667, -1.7411, -2.4408, -4.6684,  2.2719,  2.8252,
        -1.8549,  1.1680, -1.7813,  0.0234], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8791, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.9277,  -3.5829,   1.3845,   3.3324,  -1.6655,   0.2419,  -2.1699,
         -0.8817,  -3.9913,  -1.6220,   2.8626,  -3.0376,  -0.8919,  -2.4790,
         -2.3645,   0.6126,   3.4358,  -1.7683,   0.1923, -23.3500],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.8335, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4516, -3.5279, -1.8965, -3.4637, -3.4476, -8.3089, -0.8233, -0.6155,
        -4.3095, -0.1047, -1.2312, -2.9864,  1.1279,  2.3965, -2.1116, -0.3897,
        -1.4103, -3.2242,  1.8121,  2.4463], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5760, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.6344,  -2.4752,   1.8466,   3.9781,  -1.6021,  -7.9686, -21.5072,
         -7.0504,  -3.3667,  -4.0207,  -0.9618,   1.8366,  -1.7624,  -2.4163,
          0.4733,  -1.8795,  -3.9846,   1.3619,   0.7844,  -3.4057],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7377, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4507,  -0.0753, -12.2651,  -5.0099,  -3.4649,  -4.5098,  -1.5245,
         -5.0363,   4.3795,  -5.5564,   0.8216,  -1.4726,  -3.1938,   1.7604,
         -0.3466,  -3.5482,  -1.3305,  -2.3748,  -0.6145,  -0.7345],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3273, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-37.1061,  -4.5725, -10.8612,  -3.0385,  -0.5936,  -2.9294,   4.8525,
         -8.0658,   0.3449,  -3.6032,  -0.2465,  -5.5319,   2.3998,   2.6623,
         -6.1900,  -0.3688,  -2.9454,  -1.9461,  -3.2683,   1.2553],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9876, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6306, -3.9273,  1.1170,  2.9630, -3.3338,  0.0474, -3.8098, -9.9328,
        -3.6978,  4.1719, -1.2454,  1.9345, -1.5495,  0.0872,  1.5285,  4.8525,
        -2.7878,  0.6910, -2.9291, -1.5165], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9984, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.5505, -1.7405, -0.1800, -3.8472,  1.2460,  2.8862, -2.6344,  1.1463,
        -1.8908, -0.2767, -3.8479,  0.4069,  3.1082, -5.0315, -0.1022, -1.7208,
        -1.5523, -0.5663,  3.9187, -3.2550], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6692, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6305, -0.1960, -2.8804,  5.5572, -0.7393, -1.2410, -3.6794, -0.4712,
        -3.6918, -2.0041,  2.4217, -3.3074,  1.4237, -2.4031, -3.0627,  1.2836,
         3.9937, -1.4739,  1.7872, -1.2651], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.5789, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.5880,  3.6070, -6.2619,  0.2031, -2.9771, -1.2054, -2.0222,  2.0752,
        -4.7207, -0.9386, -2.0587, -1.0857, -5.5284, -2.4133,  2.0095, -2.4355,
        -0.0777, -2.3715, -3.0974,  1.4798], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2616, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.2456,   3.8946,  -1.4105,   0.2671,  -2.8503,  -1.5282,  -2.4667,
          4.6785,  -3.7189,  -0.3313,  -2.4375,  -1.0196, -16.2579,   5.9182,
         -3.4880,   1.7843,  -2.7100,  -0.4796,  -4.8177,   0.9809],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.2374, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.5409, -2.1857,  1.3343, -3.8297, -3.4760,  1.7238,  3.1673, -2.3988,
         1.3352, -3.7389,  0.0425, -2.1557,  4.4265, -2.3927,  0.3052, -2.4693,
        -0.3668, -3.5095,  2.9883, -2.2825], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.5471, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7810,  -3.0788,  -4.5769, -11.2904,  -5.8337, -14.8496,  -1.7443,
         -9.4233,  -5.2203,  -7.4241,  -6.1626,  -3.8554,  -2.3583,  -5.3246,
          0.4354,   4.6025,  -2.5767,   0.8118,  -2.6463,  -4.7581],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4027, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2492,  -1.4008, -13.8581,  -0.1731,  -0.3604,  -3.7127,  -1.5746,
         -2.8873,  -8.3105,  -4.1403,   0.0335,   0.5714,  -3.4048,   0.7652,
         -1.4552,  -3.7018,  -5.4178,  -2.1946,   2.8682,  -1.5992],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6601, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.0822,  -2.5317,   4.7969,  -3.8329,  -1.7200,  -2.5642,  -0.6853,
         -7.5980,   1.2054,   0.3455,  -4.8912,  -0.9543,  -0.1166,  -1.9660,
         -1.9617,   4.0053,  -3.6749,  -1.2309, -24.3189,  -6.0944],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6853, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4753, -3.0304, -4.7384, -1.4338, -2.2268,  2.1350,  3.7191, -2.2119,
         1.4207, -2.1324,  0.1806, -3.1471,  5.6889, -2.2581,  0.2071, -2.0688,
         0.1510, -3.7645,  4.4489, -1.6149], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7576, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.9352,  -1.4470,  -6.4437,   2.0693,  -4.0489,  -3.6752,  -2.8368,
         -8.7002,  -5.0536,  -3.7802,  -4.3306,  -0.8224,  -1.0528,   4.1860,
         -4.8368,  -1.2625,  -3.3528, -22.6222,  -9.2800,  -5.2081],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2217, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.6982,  -1.3402,   2.9327,  -1.1181,  -4.8404,   0.8701,  -3.4934,
         -1.5377,  -3.1995,   5.4142,  -2.6534,  -1.0808, -14.4964,  -6.4382,
         -4.1327,  -4.1637,  -1.5329,  -5.7028,   3.1541,   3.7163],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.0670, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.7147, -12.2980,  -4.5082,  -9.1396,  -5.3040,  -4.6250,  -0.1105,
         -1.4838,   5.8111, -13.8189,   0.4501,  -2.7274,  -4.7011,  -0.4865,
         -1.9080,  -9.2751,  -1.5476, -11.0208,  -4.7469,  -3.4187],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4287, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5432, -1.2351, -0.1175, -5.8030, -1.9739, -3.0384, -4.1931, -2.3973,
        -0.2993, -5.0330, -2.5428, -9.2999, -6.5025, -5.0621, -5.1319, -1.4588,
        -0.0921, -1.3939, -3.9238, -2.0164], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2029, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-60.3571,  -8.8590, -25.0422,  -5.0077,  -2.8006, -30.5013,  -2.0831,
        -57.6729,  -3.7003,   0.7172, -10.6569,  -0.3245,  -1.6865,  -1.3547,
         -7.0170,   3.3634, -17.6378,  -0.6953,  -2.6507,  -3.6510],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-11.8809, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7122,  3.4311, -3.7745, -0.9280, -0.6887, -2.7846,  2.2933,  4.0784,
        -2.2560, -1.9816, -9.1315, -3.4352, -3.3239, -5.2421, -3.2447,  0.5140,
         3.6545, -5.6225,  0.0377, -3.5139], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7315, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1532,  0.5388, -4.5508, -1.5368,  1.7589,  2.5337, -1.2015, -0.7436,
        -8.7654, -5.5534, -6.7541, -9.8155, -4.7624, -1.2849, -0.6456,  1.0146,
        -3.4898,  0.7696, -1.9059, -0.0530], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3300, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3943, -1.3018, -6.6019,  2.9464,  1.4367, -3.4610, -0.8088, -0.6222,
        -0.2835,  0.2317,  4.1004, -2.4708,  1.3121, -3.0069, -3.6071, -2.1007,
         0.7920, -6.8637,  1.1710, -0.5226], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0528, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-13.3403,  -2.9917,   0.2622,   0.4356,   5.4536,  -1.7182,   1.0244,
         -2.1102,  -2.8423,   2.2541,  -0.3608,  -3.9587,  -1.4648,  -1.5743,
         -2.9223,  -3.6271,   4.3995,  -1.0664,   1.5218,  -0.3830],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.1504, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.2524,  -6.2516,  -0.7023,  -1.0147,  -1.7400,  -1.9787, -18.1881,
         -4.1019,  -1.6635, -15.6957,  -3.7182,  -9.9589,  -0.3074,  -2.6625,
          6.3350, -10.1936,  -1.9003,  -2.2435, -18.5955,  -6.7402],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0535, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.3017,  3.9668, -4.4121, -3.3593, -2.1612, -2.0276, -5.7581,  2.7180,
         3.0725, -2.5599, -3.1766, -3.8585, -6.7712, -1.0105,  4.3647, -4.7597,
        -1.6490, -3.9496, -4.1856,  0.0335], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7591, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1026, -4.5579, -1.5988, -5.3324,  1.0449,  0.3521, -1.7852,  0.6711,
        -0.2820, -2.3719,  1.6877,  4.4422, -4.2115,  0.0796, -3.3663, -1.0572,
        -3.7295, -0.7973,  3.4757, -4.7013], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2570, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5269, -2.0768,  2.1786,  3.6978, -1.4362,  0.3188, -2.9474, -1.5884,
        -3.5983,  0.0433,  3.5437, -5.2224, -0.1155, -2.4416,  0.0991, -2.5993,
         1.5269,  4.6301, -1.6530,  0.5864], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.4291, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.0825,  1.7985,  2.5179, -4.3539,  0.6611, -2.1944, -4.1561,  1.5892,
        -3.2036, -3.2528, -0.1812, -3.6180,  0.2303, -5.5867,  2.5962,  2.2606,
        -2.3168, -0.2049, -1.3529, -2.9164], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0883, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5841, -4.6584, -2.4716,  0.4400,  3.5854, -3.9766, -2.1388, -2.5251,
         0.0360, -3.8181,  2.0982,  4.3236, -4.4149, -3.6382, -4.5629, -0.2846,
        -2.8207, -3.8946,  3.4892, -4.6000], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6708, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8749,  0.9971, -2.9605,  0.4959, -5.9486,  2.2814,  2.7210, -2.5697,
        -2.3774, -1.9888,  0.1670,  0.3650, -4.8088, -4.4516, -2.9348, -5.0058,
        -3.2541, -6.6457, -0.9525, -2.9779], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1362, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.0109,  0.8691, -1.3784, -0.0274, -1.2324, -3.6891,  2.4359,  2.7316,
        -3.7540,  1.1084, -2.4944, -1.8945,  0.8822,  5.2449, -2.6227,  1.5557,
        -1.8097, -0.5694, -6.9453,  0.5646], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.5518, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.0328, -2.3210, -0.2543, -9.9723,  1.7285, -2.6458, -3.2024,  0.4877,
        -1.6886, -0.2628, -0.5197,  4.2517, -3.3020,  0.7849, -1.8327, -0.8557,
         1.5201,  1.1415, -1.8822, -3.9997], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0896, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0477,  1.4526, -2.8189, -2.6984, -2.7251, -1.4326,  1.9351, -2.1431,
         1.3179, -2.0701, -5.2281,  1.1834, -2.5245, -4.4753, -3.0304, -4.7384,
        -1.4338, -2.2268,  2.1350,  3.7191], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3925, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.1714,  1.4481, -2.7010,  0.1351, -5.3897, -2.1270, -2.7989,  0.2324,
         0.7527, -0.7094,  0.9233, -0.3970, -0.3664, -1.5990,  2.0693, -5.8364,
         2.3344, -2.8488, -1.6772, -5.5287], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1456, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5349,  1.2854,  1.9739, -1.8637,  1.3987, -1.1269, -0.6996, -3.1097,
         5.7008, -4.2200, -1.9515, -2.1855, -0.1680, -3.3837,  3.7971,  1.6532,
        -8.4489, -0.8505, -3.9959, -2.1220], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1426, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.3272, -4.5083, -1.4156, -4.4154,  0.0939, -3.8155,  1.0488,  2.7999,
        -2.6680, -0.5249, -1.1192, -0.6091,  0.1448, -5.1191, -3.5657, -1.3652,
        -1.7146, -1.0353, -6.9557,  2.6099], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5404, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3261, -2.4597, -3.3112, -0.1860,  2.4623, -3.1246, -1.9260, -5.3479,
        -3.9484, -2.3494, -2.1502, -1.2591,  0.2572, -3.5164, -2.4489, -0.9523,
         3.3897, -2.0474, -0.6349, -5.6541], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8267, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.8220,  -0.6226,  -0.5271,   4.1058,  -2.5462,  -2.2605,  -6.2360,
        -12.2232, -12.5742,  -5.5707,  -4.5713,  -0.0882, -15.5477, -11.9061,
         -4.0020,  -3.0943,  -1.7207,  -0.0196,  -3.8733,   1.7178],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1691, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4736,  2.2277,  3.8308, -2.5143, -0.0819, -3.1823,  0.7931, -5.7069,
         2.3467, -1.1336, -2.8184, -0.7381, -3.5664, -0.2307, -2.3050,  4.6447,
        -4.1455,  1.6861, -2.2599,  0.8227], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6902, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9588,  2.4969,  1.8783, -2.5722,  1.3571, -3.1875, -0.9823, -5.5970,
         0.7334,  1.6930, -1.9187,  1.5977, -0.4920, -0.2716,  2.0662,  3.3903,
        -3.7995, -1.5827, -3.0547, -1.4712], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6838, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0650, -4.3515, -4.6623, -6.1116,  0.7621, -2.7581, -3.2691, -2.1417,
        -4.8595, -0.4007, -0.4161, -5.5257, -1.4381, -4.0699, -2.6226, -4.6584,
         2.6960, -4.0272, -0.4487, -3.8160], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.6592, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.4890, -1.6180,  4.6969, -2.9819,  0.8864, -2.6966, -0.0950, -5.2399,
         5.2500, -4.2074,  0.9771, -2.4048,  0.9640, -4.9623, -1.3503,  3.4847,
        -3.5339, -0.7806, -2.9674, -1.7705], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8930, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.4219, -3.0381, -0.3148, -2.2899, -2.2163, -3.8681,  2.3945,  3.2534,
        -2.5875,  0.6295, -3.3207, -0.1067, -3.8316,  2.3236,  1.0402, -2.7356,
         0.2782, -2.3634,  0.3922,  1.6734], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6133, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2265,   3.2916,  -7.9059,   1.0956, -12.5693,  -4.6803,  -4.3045,
         -3.8266,  -1.5821,   2.8128,   1.1041,  -3.2125,  -0.0574,  -2.6356,
         -2.6465,   0.7184,   2.8080,  -3.1903,  -2.3195, -10.1724],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5250, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.7109,   2.2259,   3.9320,  -4.7641,  -4.3873,  -6.0709,  -1.5561,
        -27.6509,  -0.3579,   0.3542,  -6.3257,  -0.1156,  -0.9338,  -2.9538,
          1.0870,   2.6084,  -1.7236,  -0.5832,  -0.7545,  -0.5549],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5118, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.9337, -0.7769, -1.7035, -0.3572, -2.7398, -0.7485, -4.0723,  5.3230,
        -2.6892, -1.5158, -1.2249, -1.6198, -5.6704, -0.6932, -0.4681, -1.0375,
        -1.9399, -0.8545, -2.0100,  1.8377], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1014, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.5082,  -8.9390,   1.1648,   4.1407,  -5.2390,  -1.6397,  -1.3691,
          0.4059,  -1.7886,   4.7835,  -2.4758,  -1.0699,  -0.0547,  -3.9659,
          2.6018,   4.6513,  -2.1677,   0.9715,  -6.8331, -13.8624],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.5597, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9804,  1.2556,  3.0543, -1.7829, -1.4437, -2.4500, -2.7254,  1.0668,
         4.3006, -2.7190,  0.6018, -1.5957, -1.1051, -7.1595,  1.0283, -0.1907,
        -1.5187,  1.2055, -0.8878,  0.0987], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6474, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6208, -3.1994,  4.3186, -4.3193, -0.6497, -1.2762,  0.4132, -3.7549,
         6.2309, -7.6311,  1.5095, -2.4291, -3.0848,  2.0170,  3.4053, -2.7346,
        -2.5726, -2.7904, -2.2855, -5.4783], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4466, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.1728,  -2.3835,  -3.9284,   1.5684,   2.3782,  -4.9662,  -1.5899,
        -16.8373,  -7.2027,  -4.4530,  -4.4284,  -2.1570,  -3.4334,  -0.6918,
        -23.0878,  -1.5131,  -2.2958,  -6.0091,   1.5762,  -4.8899],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2259, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-21.3130,   0.9329,  -3.9674,  -5.0078,  -1.8130,   3.6347,  -4.6372,
         -0.3173,  -3.0672,  -4.7171,  -5.3202,  -0.7702,   1.0321,  -0.7385,
          1.0863,  -0.1181,  -3.8527,   2.5143,  -2.6576,  -5.7947],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7446, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8922,  3.0459,  2.9078, -2.7200,  0.5071, -1.1100, -0.9343,  1.1520,
         4.6686, -1.0892, -1.0676, -3.8022,  0.9229, -4.9460,  2.8342,  1.4003,
        -3.3507,  0.1014, -3.0508, -0.9821], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.3203, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4657, -3.2692,  2.0439,  2.0065, -1.6864,  1.5348, -1.9668, -4.5171,
         2.3624,  3.6343, -2.7247, -1.5260, -1.0645, -1.7111, -6.9054,  5.1951,
        -1.3214, -1.7979, -1.7150, -1.5117], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8203, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.2462, -3.4406, -0.8673, -3.9704, -0.2749, -2.1923,  1.5811, -3.7456,
        -1.4875, -2.8340, -0.8376, -4.8084,  2.3674, -3.3859, -0.5122, -2.5548,
        -1.4345, -3.0488,  2.8312,  3.7819], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1793, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0278, -3.9956, -1.7834, -2.6311,  1.3714,  3.8200, -1.8338, -1.3793,
        -2.7408, -1.9498,  1.1314,  3.5144, -8.5331, -0.6702, -2.5948, -1.1837,
        -3.8767, -1.1622,  2.5316, -3.2339], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3114, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.0665,  -5.1497, -14.6367,  -5.6228,  -1.2586,  -5.3514,  -4.0152,
         -1.6061,  -2.0154,  -1.4172,   1.6786,  -5.0766,  -4.4185,  -0.7991,
         -7.1193,   1.6051,   2.7500,  -4.5199,  -2.8925, -15.6772],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9305, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.6658,  -0.6921,  -5.4494,   0.7354,   3.1259,  -2.6154,  -0.1627,
         -4.1484,  -0.3051,  -4.9066,   0.0817,   2.9211,  -1.8721,  -0.4722,
         -2.9448,   0.3104, -49.1733,  -0.6887,   1.0451,  -6.7202],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6799, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.4955,  0.5952,  4.6766, -3.8251,  1.4338, -4.5880, -1.8491, -4.3361,
         2.1042,  1.4958, -0.6716,  1.2078, -0.0794, -0.4125, -2.1616,  4.1270,
        -5.2019, -0.0853, -2.8019, -0.6692], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.5768, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.0201,  -2.8611,  -0.3972,  -2.7218,  -2.0969,   4.6978,  -6.8197,
         -7.1958, -16.3191,  -3.4028,  -7.5207,  -0.1578,  -1.5802,   1.9937,
         -9.2292,  -5.4418,  -9.4819,  -3.1774,  -5.3217,  -3.4724],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9743, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9700, -0.0815, -2.9814,  2.3238,  2.4798, -1.4566,  0.9342, -1.5308,
        -0.4167, -2.7041,  4.2852, -5.4092, -1.7299, -5.2218, -0.4455, -3.9154,
         2.2796,  3.7068, -2.4476,  1.2743], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7513, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2550,   0.1797,   3.1966,  -1.3344,   0.7754,  -1.5937,  -3.8365,
          0.8711,   5.4268,  -2.4800,   2.1658,  -9.2105,  -5.9285,  -3.0396,
         -3.1646,  -0.7514,   0.2782,   3.3933, -13.2019,   0.0632],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.5723, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.8387,  -1.8794,  -0.8108,  -4.4613,   2.9790,   3.0545,  -4.0181,
         -1.1976,  -0.5198,  -2.3988,   1.3938,   4.2548,  -3.6026,   0.0914,
        -14.8014,  -4.3903,  -4.8951,  -5.5169,  -0.3969,  -0.3991],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.8338, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.5970, -1.3218, -1.7906,  3.1488,  2.2779, -1.5964,  0.5909, -1.1430,
        -3.6643,  1.6843,  4.1121, -3.1641,  0.3995, -1.5381,  0.1504, -4.8219,
         3.1633,  3.1100, -2.8836, -4.8177], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.4351, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.2994,  -1.1719,   5.3498,  -2.2283,   1.6218, -10.6919, -11.1033,
         -2.7348,  -6.2554,   0.7363,  -1.8206,   2.7531,  -3.4901,  -3.5594,
         -3.6901,   0.6468,  -5.1389,   1.3122,   4.1607,  -3.5498],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.8777, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.1147, -2.2754,  0.0322, -2.8579,  0.2777, -4.5225, -2.8516,  2.4249,
        -3.5150,  0.5010, -4.0913,  1.3996, -4.2128,  2.6827, -2.3939, -4.8542,
        -2.6316, -5.3042, -3.3727,  1.4298], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6010, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.0234,  -4.5548,  -3.9515,  -7.7020,  -5.5894,  -8.6568,  -1.4179,
         -4.5554,   1.6700,  -4.7641,  -2.9558,  -4.1028,  -3.4998,  -8.9223,
         -4.5024,  -5.3292,  -3.3933,  -5.9364,  -4.3500, -17.0447],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0291, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.3674,  -3.3859,  -0.5122,  -2.5548,  -1.4345,  -3.0488,   2.8312,
          3.7819,  -2.2305,  -0.6668,  -2.1235,  -7.1670,   1.1320,   2.7897,
         -4.2600,  -3.8509, -10.3142,  -8.3445,  -3.7188,  -6.1377],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3424, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7646e+01, -1.0740e+01, -5.0948e+00, -6.5695e+00,  5.3660e-01,
        -2.2683e+01,  2.3407e+00, -8.1313e+00, -6.3266e-01, -3.3625e+00,
        -1.0551e-02, -4.4456e+00,  1.9227e+00,  1.1469e+00, -2.5482e+00,
         3.7976e-01, -1.6870e+00, -3.8056e-01, -2.7081e+00,  4.4188e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7947, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9943, -1.9239, -4.8845, -2.5327,  2.5885, -2.3921,  0.3647, -2.7299,
        -2.5113,  0.2272,  3.3557, -2.6176, -1.5560, -3.6436, -0.7728, -7.9419,
         1.1143,  0.8253, -2.9586,  1.4732], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3755, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.8191,  -6.3564,   4.3760,  -5.5921, -16.6561,  -3.1332,  -2.5797,
         -2.5119,   0.9334,   2.3811,  -2.0372,  -1.5694,  -2.6761,  -0.4587,
         -2.5427,   0.0639,   2.7620,  -3.5144,   0.9345,  -1.8940],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.0945, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.3871,  3.1487, -2.3258,  1.2400, -1.9015, -1.0411, -5.3475,  1.0510,
         0.4678, -1.8162,  0.1340, -6.5352, -3.6913,  1.6465,  4.0014, -6.0909,
        -0.6107, -1.5393, -1.6158,  2.0778], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9180, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.8064,  -1.1517,   1.7920,  -8.5721,  -5.6697,  -3.5262,  -3.6384,
         -0.1419,   2.5808, -13.0209,  -3.0368,  -4.4677,  -3.2275,  -4.0528,
         -6.6026,   1.8854,  -3.1763,  -0.9970,  -2.4356,  -4.7428],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0198, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.0475,  -3.7728,  -0.8853,  -2.5996,   3.1548,  -5.3175,  -4.2245,
        -11.2645,  -3.2370,  -5.7019,  -2.2635,   1.0620,   0.9756,  -2.7727,
         -0.5421,  -0.3950,  -3.8738,   1.7291,   3.0984,  -0.9649],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.9921, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.6280,   1.9548,   2.0394, -14.9615,  -3.1539,  -5.2947,  -3.2939,
         -5.3288,   1.8109,  -0.1769,  -4.0345,   0.5971,  -1.2141,  -2.5656,
          1.8869,   4.6781,  -2.8858,   1.5543,  -1.9401,   0.6460],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.7155, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4399,  -8.3505,   2.3171, -15.7652,  -2.6737,  -7.9678,  -9.6684,
         -5.9607,  -6.1217,  -5.4436,  -4.1449,  -1.7612,  -1.4773,   4.2650,
         -3.6745,  -1.4076,  -4.0127,   0.9693,  -5.2757,   1.9400],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8327, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.2027, -3.3549,  1.0900, -2.8462, -2.0720,  1.8225,  3.9898, -3.1620,
         0.1326, -2.6261, -2.7344, -5.7985,  0.4631,  0.4354, -2.4790, -2.3667,
        -3.0294, -1.2195,  1.9033,  4.5232], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7563, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8548, -1.2411, -4.0879,  0.8311,  1.7910, -3.3975,  0.5979, -2.4426,
        -0.3249, -0.6178,  1.2042, -3.0315, -0.3345, -2.1945, -2.2757,  1.1707,
         2.6778, -2.4378,  0.1561, -6.9243], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1368, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.2866, -13.2124,  -0.5547,  -2.2936,  -1.7658,   1.1164,  -5.2201,
         -2.6227,  -2.7962, -10.2517,  -5.2713,  -4.6190,  -4.4903,  -0.8730,
         -2.7988,   1.9685,  -3.6655,  -2.7268,  -3.8310,  -1.4834],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1552, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.7037,  -3.8831,  -3.4131, -10.6077,  -5.2470,  -8.8711,  -4.7844,
         -3.3984,   0.3693,   0.1844,   4.7037,  -5.7485,  -0.1657,  -1.4758,
         -0.2143,  -0.9864,   0.1104,  -1.5581,  -0.3855, -10.3501],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7009, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3228,  2.9136, -3.6735, -1.1791, -2.2711,  0.9034, -6.7325,  1.5619,
         1.7923, -1.0948, -0.6790, -2.1976, -3.2100,  2.2218,  4.1176, -2.4254,
         0.3961, -4.6444, -3.8241, -3.8208], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2084, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.4150,   4.3404,  -9.8807,  -1.8117,  -5.3142,  -2.0011,  -3.5538,
          0.1459,   0.9579,  -0.8485,  -0.9891,   0.0265,  -4.7745,   2.0874,
         -1.5246,  -4.6797,  -3.4887, -11.5123,  -6.3556,  -6.3292],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7545, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1837,  -2.4470,  -2.0714,  -1.9983,  -4.9772,   1.7665,   2.4557,
         -1.8972,   0.0802,  -7.8395,  -6.6126,  -6.5462,  -7.3072,  -8.2076,
         -2.1050, -21.7333,   1.3581,   1.5486,  -3.0268,   1.8130],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4966, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.8744,  -3.4876,  -4.3688,   0.3392,  -6.0991,   1.4495,   3.0869,
         -2.4111,  -1.4956,  -0.0477,  -3.9752,   1.8035,   2.0317,  -3.2034,
         -0.6297, -36.8508,  -5.6884,  -6.7537,  -3.5604,  -2.7107],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8723, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.0444,   1.2611, -10.2272,  -2.6656,  -0.3186,  -2.1165,  -5.9606,
          3.1467,   2.0044,  -2.6650,   0.0374,  -0.8978,  -1.3008,   0.5849,
          2.6147,  -2.7781,  -0.1988,  -4.4208,  -1.5751,   0.7161],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.2402, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.6398, -1.5151,  0.2055, -0.6645, -0.9916, -6.6196,  2.0509, -0.0657,
        -2.5770, -0.0335, -3.2900,  0.9145, -7.5630,  2.7658,  2.2221, -3.2653,
         0.6017, -2.3324, -2.9745,  2.1508], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9170, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.6619e+00, -4.4484e+00,  1.0202e+00, -8.0277e-01, -9.0281e-01,
        -3.1999e+00, -1.3994e+00,  1.2593e+00, -4.8243e+00,  2.9929e-03,
        -2.9490e+00, -4.2980e-01, -7.9871e+00,  5.5726e+00, -3.5850e+00,
         5.2852e-01, -3.1907e+00, -7.8720e-01, -3.8047e+00,  3.5134e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.1876, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 6.7760e-01, -9.1008e-01, -9.5395e-01, -6.6387e-01, -2.6071e+00,
         7.0389e-01,  2.8970e+00, -5.2859e+00,  3.2157e-03, -1.0825e+00,
        -8.7509e-02, -1.4655e+00,  4.0741e+00, -7.0309e-01, -5.4803e+00,
        -6.5554e-01, -1.4650e+00, -4.0356e+00,  2.6068e+00,  3.8743e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-0.5280, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.6753, -2.9167, -4.3277,  1.6834,  3.3649, -2.8797,  2.1435, -2.1708,
        -0.5837, -4.2448,  2.3304,  2.4716, -1.5813, -1.0101, -1.3802, -2.4115,
         2.1181,  3.3698, -4.0312, -2.1771], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.5279, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.0890,  4.3530, -2.6557,  1.2199, -1.7735, -0.2654, -3.1741,  2.6773,
         1.8720, -2.2409, -1.0462, -3.6848, -2.3156,  0.2995,  3.3994, -2.0706,
         1.2560, -1.6010, -2.7863,  0.1950], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.3126, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.7910,  -3.3975,   0.5979,  -2.4426,  -0.3249,  -0.6178,   1.2042,
         -3.0315,  -0.3345,  -2.1945,  -2.2757,   1.1707,   2.6778,  -2.4378,
          0.1561,  -6.9243,  -3.4210, -12.6320,   0.2277,  -1.3526],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.6781, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.2019, -3.1829, -6.2768,  2.2020, -4.2317, -4.9819,  1.5440,  1.8303,
        -2.7224, -1.1504, -0.9491,  0.3372,  1.0864,  5.2202, -2.2452,  0.2198,
        -2.9305, -0.0501, -6.0693,  3.0148], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9769, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.0270, -2.3136,  2.1294, -1.3413, -0.2343, -3.9970, -0.7904, -2.9670,
         0.0486,  4.5771, -3.6314, -0.4288, -1.4500, -3.0829,  2.4137, -0.3275,
        -2.6378,  0.1980, -2.6279, -1.2568], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8846, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.1391,  -2.3188,  -1.6582,  -4.6964,  -0.6737,  -2.9624,  -1.5217,
         -1.7033,  -1.5295,  -8.4395,   0.4503,   1.4509,  -2.5796,  -0.0854,
         -4.2824, -15.6382,  -6.7134, -21.2201,  -2.2100, -24.6992],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0446, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.6011,  1.4123, -0.5869,  0.7110, -1.6743, -0.1877,  0.3217,  5.2936,
        -1.8039, -0.5432, -2.5317, -0.3452, -4.2546, -6.6764,  0.8793, -2.5999,
        -0.2186, -2.5449, -1.5178, -4.4072], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9837, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.2713, -1.4850,  1.4955, -3.3214, -0.0612, -1.7381, -3.3271,  1.4362,
         3.0526, -2.4348,  0.5022, -2.0127, -1.8317, -4.4143, -2.3005,  3.1563,
        -1.8791,  1.0881, -2.5270, -1.1963], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8763, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.2506,  -6.3872, -10.3749,  -5.7066,  -4.8611,  -8.3092,  -6.0178,
         -6.8099,  -0.4392, -13.8963,   2.9661,  -6.0016,  -2.0082,  -2.9462,
         -0.5148,  -3.3177,   3.5037,  -3.8964,   0.1062,  -3.8780],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1520, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4954, -0.5187, -1.3612,  5.0406, -7.2194, -6.5032, -0.7875, -4.6584,
        -3.5617, -3.9958,  2.5621, -1.8253, -2.1631,  1.5382, -1.8059, -1.3473,
        -3.6137,  5.1458, -2.7235, -0.3479], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6321, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 7.7139e-03, -2.3588e+00, -4.8903e-01, -9.4936e-01, -1.7579e+00,
         2.6870e+00,  3.9109e+00, -1.5042e+00,  1.4247e+00, -2.4092e+00,
        -1.9751e+01, -4.5135e-01,  8.9451e-01, -3.8076e+00, -1.7514e+00,
        -6.2889e+00, -2.2274e+00,  3.5520e-01,  2.1312e+00, -2.8207e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.7578, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8158, -2.9529, -2.6945, -2.6549,  0.9957,  3.0004, -1.6098, -1.0045,
        -1.0872, -0.4947, -7.0879,  1.0236,  2.4193, -3.9278, -0.2668, -2.0465,
         0.5748, -4.1391,  2.7677,  3.2273], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8387, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.3224, -3.8646, -1.5642, -2.3099, -0.6347, -4.4525,  3.1875, -7.5911,
        -3.1514, -2.7348, -2.1940, -5.5358, -1.1997,  1.5150, -2.9094, -5.4040,
        -2.0251, -0.8314, -4.5130,  2.2658], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1813, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.7055e+00,  3.3026e+00, -2.8907e+00, -8.9529e+00, -2.1004e+00,
        -1.4454e+00, -5.3985e+00,  9.9306e-01, -1.2584e-03, -3.9556e+00,
         5.9290e-01, -5.7973e+00,  4.2041e-02, -4.6027e+00,  2.3204e+00,
         1.1790e+00, -2.6186e+00,  9.1667e-02, -9.6102e+00, -3.8462e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.9996, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  4.5406,  -1.8661,  -1.5370,  -1.9716,  -1.6139,  -3.0177,   3.5107,
         -0.8799,   0.3438,  -2.1292, -37.7276,  -3.7390, -17.8117,  -4.7097,
        -12.9983,  -5.5886,  -7.1128,  -6.6193,  -4.8377,  -1.6079],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3686, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.4609,   0.6129,  -1.1835,   3.0824,  -5.1634,  -4.4325,  -9.9576,
         -6.0565,  -3.0291,  -1.6655,  -2.7612,   0.6664,   4.2664, -12.4604,
         -2.4047,  -1.8442,  -7.0869,   1.6015,   1.4185,  -3.6246],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5742, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0484, -1.7003,  1.4861, -3.7447, -1.9480, -3.7079, -7.0988, -3.8393,
        -9.4370, -1.5188, -0.6639, -1.7909, -3.5524, -3.8174, -4.7005, -3.8057,
        -1.9728, -2.6252, -4.0893, -2.3885], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2982, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9819,  0.5204, -6.8539,  0.7596, -1.0444, -6.5514, -1.2182, -1.6944,
        -4.8843,  0.5785,  1.9837, -2.4354, -0.0163, -8.4007, -4.1984, -7.5633,
        -0.1449, -1.9894,  4.0167, -2.7606], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1439, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8769, -0.7985, -4.8390,  1.0836, -3.1718, -6.7450, -6.2181, -8.2475,
        -6.6173, -1.5417, -4.9586, -3.0222, -6.9161, -5.8257, -4.9840, -4.4459,
        -1.0067,  0.0135,  5.3991, -4.7898], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6754, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.6391,  -3.0184,  -3.1274,   0.8765,   3.2149,  -1.3761,   0.8071,
         -1.0830,  -0.3885,  -2.2740,   4.6521,  -6.0231, -13.1116,  -4.1280,
         -2.3093,  -4.2604,  -0.6363,   3.1896,  -1.2818,   0.8332],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.5542, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  4.0078,  -2.7836,  -0.8329,  -2.7555,  -1.8668,  -5.1967,   1.1784,
          3.1840,  -2.0645,  -1.1533,  -1.4990,   0.8590,  -3.6620,   3.1306,
         -2.0759,  -0.8824,  -5.1546, -37.1275,  -2.0272,  -8.7069],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2714, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-29.8389,  -7.0162,  -4.5857,  -5.7905,  -0.5247,  -0.7813,  -9.3339,
         -4.0220,   0.4802,  -2.2773,  -3.5386,   1.2932,   3.0093,  -2.2024,
         -3.4286, -10.9743,  -6.1211,  -6.9270,  -6.1390,  -6.4485],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2584, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.5568,  -5.8836,  -4.7106,  -4.9039,  -6.4546,   0.1944, -23.0638,
          0.2863,  -3.6109,  -6.3376,  -1.5150,  -4.6122,  -6.0354,  -4.5828,
          2.4064,  -5.1332,   0.5735,  -3.0312,  -6.8917,   0.4309],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5216, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4913, -0.6627,  1.8082,  2.8795, -4.0972, -3.2116, -2.9070, -0.7193,
        -4.3222,  1.6115,  3.5403, -2.4098, -0.2388, -1.6644,  0.3305, -6.5188,
         5.5086, -2.0088, -0.9868, -2.5856], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9073, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8759,  2.2487, -2.2844, -2.5089,  1.8320, -2.5082, -1.4971, -4.7368,
        -0.1812,  1.6535, -2.2635, -0.5609, -2.3646, -5.2808, -0.8701,  4.2835,
        -2.9094,  0.0557, -1.1380, -4.2439], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4575, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.2592, -4.5443, -0.8974, -4.3055,  2.7872, -1.6994,  0.0400, -2.2523,
        -2.4606,  0.7547,  3.3181, -2.1584, -1.3498, -7.7198, -8.7859, -3.5404,
        -4.3745, -0.0665, -1.2054,  3.3031], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7708, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.1650, -1.7329, -1.7239, -3.1096,  2.1873,  4.0307, -1.8135,  1.7680,
        -2.0993, -1.6150,  2.2490,  2.8600, -1.7221, -1.2834, -3.3278, -3.4394,
         0.5517,  1.8186, -2.4318, -0.0418], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.4520, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0924,  0.0341, -4.6812,  1.8928,  0.5915, -3.0220, -2.7124, -2.4601,
        -1.2025,  1.6475,  4.5485, -7.0550,  0.2244, -3.4337, -3.9499,  0.2298,
         3.4737, -5.3521, -1.9278, -1.2660], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2756, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.7258, -12.8488,   4.9236,  -8.2034,  -1.1199,  -1.7662,  -0.7248,
         -2.8098,   4.5002,  -0.5601,  -1.3965, -13.3532,  -2.5224,  -8.6783,
         -4.4807,  -9.2180, -24.5710,  -5.8180,  -2.7705,  -1.3893],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7266, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.9661, -2.3570,  0.5212, -4.5389, -4.4073, -4.1816, -2.1716,  1.3446,
        -5.2751, -1.6748, -1.5313, -8.0033,  0.8925,  0.7796, -2.0044, -2.1846,
        -4.2324, -2.9409, -6.6754, -1.1581], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.2916, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1244, -3.6145, -0.6602, -2.6853,  4.2994, -5.2606, -1.1496, -4.4502,
        -1.6524, -0.2869,  3.1440, -1.5900,  0.5270, -4.7732, -1.4992, -4.1520,
        -1.8152, -1.0616, -6.3063, -0.2553], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7183, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.0610,   2.6155,  -2.2329,   0.2868,  -1.8805,  -0.4027,   0.4284,
          4.7089,  -5.2094,  -2.0370, -22.8395,  -2.6186,  -8.7837,  -2.5554,
          2.6994,  -3.5723,  -2.0508,   0.2270,  -1.9468,  -0.9898],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2046, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3188,  0.9905,  3.0551, -1.7323,  1.5502, -1.6579, -3.6143,  1.5667,
        -0.8824, -3.5110, -0.8257, -2.1353,  0.3474,  1.2281,  4.5128, -3.6971,
        -1.1271, -3.4422, -0.8244, -3.2317], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7875, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.1781,  -3.7492,  -2.6458,  -7.4676,  -4.3769, -11.5648,  -0.2289,
         -2.6051,   3.8485,  -7.5348,  -2.3143,  -4.0407,  -4.3263,  -2.0061,
          2.0459,  -1.7478,  -1.8699, -10.0126,  -6.8289,  -4.5384],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6571, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7377, -2.9546, -3.8496,  2.0668,  2.7113, -2.9725,  0.1742, -4.2441,
        -2.1645, -6.3100, -3.7292, -1.4835, -2.8037, -1.2591, -0.3048, -0.0993,
         1.3973,  5.0964, -2.4211, -4.8900], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5389, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 4.1355, -1.7025,  0.4827, -3.3034, -0.7084, -4.7286,  3.9248, -2.1336,
         0.7024, -1.2462, -0.1870, -3.3163,  2.8575, -1.4695, -3.9617,  0.4609,
        -2.6392, -2.1586, -6.4163,  3.3629], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9022, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.3486,  -2.4709,  -1.1645,   4.4397,  -6.2928,  -4.1344, -27.3489,
         -5.3133,  -6.0576,  -7.5054,  -4.7733,  -2.7634, -10.2314,   4.5735,
        -23.2002,  -2.2139,  -3.8442,  -7.5765,   0.7303,  -2.1368],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4316, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1460e+01, -8.3970e+00, -1.0025e+01, -3.9259e+00, -4.3681e+00,
        -8.4549e-01,  2.0296e+00,  4.4570e+00, -3.7772e+00, -6.9427e-02,
        -3.6924e+00, -3.0828e-03, -4.5692e+00,  2.0785e+00,  2.8148e+00,
        -1.9779e+00, -1.0133e-01, -5.5124e-01, -2.0776e+00,  2.5585e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.0951, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-12.5183,  -3.6933,  -7.4704,  -7.9405,  -0.9027,  -8.5637,  -4.9785,
         -5.0279,  -2.0253,  -1.2521,   1.7657,   3.3550,  -4.0840,  -2.3407,
         -4.7717, -12.0972,  -1.7774,  -4.9696,  -1.3128,   0.3805],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0112, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.2614,  3.3801, -1.5726, -0.0880, -1.9565, -1.1409,  2.0427,  3.5330,
        -0.7225, -2.6060, -0.5068, -0.6142, -5.0457, -1.2882,  1.1256, -3.3306,
         0.4002, -3.9477,  0.1436, -7.3341], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8134, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.3820, -3.2930,  0.6014,  0.1356, -1.2152,  0.8081, -1.4589, -4.6234,
         0.3447,  2.5703, -1.5776,  0.5643, -2.9784, -2.2086,  2.4089,  4.0451,
        -2.6998,  0.5104, -2.6694, -2.3333], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6343, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.2029,  0.0536, -4.0105,  0.7998, -0.8802, -1.2545,  0.7663,  3.7705,
        -2.1855,  1.4630, -2.0145, -0.8738, -0.7391,  4.5991, -2.5389,  1.2313,
        -2.7009, -0.3110, -3.9701, -0.0749], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.4334, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.0995, -2.1767, -2.3806,  1.1987,  3.8432, -2.1140, -0.3197, -3.6768,
        -0.4857, -4.7797,  0.2972, -0.9145, -3.8415,  0.8220, -3.2749, -3.9039,
         1.2088,  1.1269, -2.3617,  0.7996], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0417, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7757, -1.8453, -2.6156, -3.8494,  2.7143, -4.1928, -0.4165, -1.6314,
        -4.0328,  2.2529,  3.7761, -4.2850, -0.0538, -1.2614, -0.4232,  1.3309,
         5.5885, -1.0526,  1.5605, -2.2700], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6741, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7305,  2.5077, -2.9048, -1.5102,  0.0897, -2.7759, -1.0306, -5.7252,
        -1.0758,  2.7477, -5.4183, -0.9177, -3.1098, -2.6753, -3.6018,  3.2128,
        -0.2019, -2.8615,  2.3006, -2.4069], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5044, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8464, -2.5697, -3.0611, -4.8344, -1.2548, -3.4173, -7.5334, -6.1032,
        -2.7658, -1.1525, -4.4495,  1.9475,  2.5691, -2.8141,  0.7534, -3.1159,
        -3.2966,  1.6097,  3.6661, -2.2860], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9977, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5590,   4.1380,  -0.8169,  -0.9398, -31.7029,  -7.7967,  -1.5600,
         -0.6733,  -1.5098,  -7.1469,  -3.9854, -11.2866,  -1.7223,  -2.9444,
          4.9043,  -3.5422,  -2.4949,  -4.6285,  -3.6383,  -5.1744],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2540, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.4721,  -2.0890,  -3.4393,   0.0947,   3.1878,  -3.6043,   0.1914,
         -2.0270,  -1.4971,   1.6938,   1.5052,  -4.3008,   0.2535, -18.6437,
         -2.2684,  -5.8270,  -1.4369,   1.7530,   4.8089,  -9.8318],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2975, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.6689,  -2.3579, -22.5988,  -6.5028,  -3.9484,  -3.5197,  -0.6511,
          1.6215,   2.5714,  -5.5201,  -0.4657,  -0.7217,  -2.3607,   0.6850,
          2.5465,  -1.7408,  -2.0513,  -4.5066, -13.4171,  -2.3335],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3970, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.7804, -0.0926, -3.7541,  2.8257,  1.1112, -2.8828,  1.1061, -1.4147,
        -1.0971,  3.2434,  3.7032, -2.3322, -2.1321, -9.3735, -5.8412, -3.6473,
        -3.7116, -2.0520,  0.5087,  5.5744], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0519, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-16.5880,  -5.3061,  -7.0801,  -4.7166,  -3.4596,   0.6676,  -6.1023,
          4.7180,  -8.7910,   1.1272,  -2.6848,  -2.3034,  -2.7574,   1.2162,
          3.4521,  -3.4327,   0.0296,  -2.7121,  -0.4886,  -5.6654],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0439, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0832,  2.4003, -1.0451, -3.2422,  2.0873,  3.4295, -4.8383, -3.5996,
        -0.4726, -2.6832,  0.4251,  0.3052, -2.9036, -1.3453, -2.3873, -5.1390,
         1.7644,  1.6082, -3.6412, -2.6932], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2027, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5656,  -2.9471,  -3.2913,   2.7335, -10.8144,  -5.3679,  -0.2079,
         -3.1651,   1.0536,  -4.7448,   2.7958,   2.3649,  -2.6847,   0.7446,
         -3.1140,   0.6290,  -3.7406,   2.5287,   3.6141,  -2.4827],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.4331, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.3122,  -4.6932,  -0.9171,   2.2019,  -6.3957,   0.1889,  -9.6932,
         -0.3773,   0.5358,   5.9650,  -0.9110, -10.4057,  -3.0090,  -3.1867,
         -6.2385,   0.1209,  -1.8046,  -6.9751,  -2.6972,  -2.9148],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5759, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0293, -1.2855, -2.6311, -0.6170, -5.1175,  2.1364,  3.1801, -3.0295,
         0.2981, -2.2187,  0.5561, -3.4022,  2.4612,  2.8929, -2.5678,  1.8504,
        -2.3085, -6.5218, -3.7251, -1.3705], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1725, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.3946,  6.1515, -2.7266, -2.2579, -3.0158, -0.6132, -4.4486,  2.8029,
        -0.0226, -2.1192,  2.0431, -1.6202, -0.1577, -1.4074,  5.1625, -3.4948,
        -2.7395, -3.4273, -0.3866, -1.2889], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6586, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.6041,  -1.5037,   0.5553,   4.0422,  -2.4791,  -0.5387,  -1.7168,
         -1.6995,   1.9862,   3.9944,  -1.1743,   0.7248,  -1.8090,   0.9337,
         -1.4541, -61.2321,  -5.4063, -13.9894,  -2.7641,  -5.8286],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4982, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.1922, -1.4699, -1.4876, -0.6195, -2.4673,  0.0810,  2.5448,  1.0865,
        -2.8576,  1.3421, -2.7235, -0.1223, -3.2937, -0.2364,  1.7044, -2.0103,
        -0.5233, -2.1253, -1.8467,  2.6022], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.5615, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.4237, -0.6272,  1.4130, -0.0405, -0.1341, -2.3578,  4.7604, -5.1155,
        -0.0706, -2.6760, -0.3470, -2.6404,  0.8845,  1.0218, -6.4082,  0.8196,
        -2.5433,  1.1086, -5.1092,  2.1204], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7259, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3405,  0.1617, -1.2747, -3.3857,  2.8076,  2.7956, -3.5771,  1.0836,
        -2.2902, -1.9334,  0.8626,  5.3433, -2.5732,  1.7145, -1.7461, -0.3230,
        -6.5194,  0.2845, -0.9795, -3.0618], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6976, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4103,  0.1453, -2.8516,  1.0847, -4.8181,  1.9175,  2.5188, -4.0075,
         0.0184, -1.4388,  0.6522, -2.6560,  0.9867, -4.0840, -3.8485,  0.8595,
        -1.7570, -1.9662,  2.1053,  4.9476], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7301, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8256,  2.1128,  2.1203, -2.9508, -0.0211, -2.4579, -0.2875, -6.2087,
         1.8323,  1.7597, -3.0660,  0.4974, -1.3710, -0.8364, -4.4897, -2.3301,
         3.4027, -3.2858, -1.5753, -4.1508], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2566, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1748, -0.4941, -0.9236,  5.4139, -1.5896,  1.2345, -8.6893, -5.8809,
        -2.3582, -2.6341, -0.8670,  0.9125, -6.3554, -4.5295, -0.9646, -2.0144,
        -6.5289,  1.9642,  1.6879, -3.3589], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8575, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9890, -7.7566,  0.6047,  2.4335, -2.3838,  0.4883, -2.6081, -2.3903,
         1.1428,  4.0371, -1.9189, -1.3951, -1.0159,  0.6614,  1.3190, -4.1979,
        -4.4077, -0.1858, -5.3390, -1.8164], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2859, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.6042,  -2.5086,   0.2825,  -1.7240,  -3.0838,   3.1524,   2.8703,
         -5.0066,  -5.5730, -12.5195,  -8.3323, -11.6358,  -7.7650,  -2.9427,
         -0.8712,   1.5782,  -0.7640,  -6.6253,  -1.5123,  -1.1043],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1240, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.3020e+00, -5.2916e+00, -4.4325e+00, -5.5316e-01,  2.5209e+00,
        -5.1980e+00,  2.6305e-01, -1.3375e+00, -5.5801e-01, -3.1791e+00,
         4.6597e+00, -1.9451e-02, -6.5161e+00, -1.7740e+00, -2.0433e+01,
        -4.9398e+00, -4.2590e+00, -5.0447e+00, -1.7667e+00, -2.1497e-01],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2188, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.8227, -1.9520,  0.5263, -2.8184,  0.9197, -2.6653,  5.3968, -2.8706,
         1.0231, -2.2415, -0.0181, -4.8264,  1.6603,  2.0979, -2.4128, -0.9432,
        -4.5667,  0.4915, -4.9430,  1.3581], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6481, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-12.7956,  -2.3518,  -4.7753,  -0.6471,   2.0711,   5.5701,  -6.7769,
          0.2397,  -1.7703,   0.2184,  -5.2753,   2.3714,   0.1743,  -2.3652,
          0.4703,  -3.7923,   0.6278,  -3.0141,   3.2185,   3.3839],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.2609, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  4.8890,  -3.4616,  -3.8283, -13.2422,  -6.5907,  -2.7303,  -3.8777,
         -0.0407,   1.5642,   3.0897,  -2.3027,   0.5139,  -1.6052,  -3.2223,
          2.4848,   3.6725,  -4.0626,  -2.2654,  -2.6796,  -0.8293],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.7262, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6262, -7.2350, -5.7088, -6.2871, -1.7658, -1.9398,  3.9648, -3.8256,
        -0.9329, -3.6608, -3.3432, -2.0814,  3.4143, -2.9894, -1.4483, -2.6987,
        -1.3891,  0.3036,  3.9507, -2.1302], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0214, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1918, -0.4937,  1.4980, -8.0657, -0.6322, -1.5506, -1.9196,  1.4628,
         3.5551, -2.1976,  1.4324, -2.5569, -1.3859, -5.2293,  1.8105,  1.6588,
        -9.4244,  0.0904, -1.6164, -1.5394], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4648, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.2730,  -0.4824,  -1.0051,  -2.6301,   2.5503,  -3.5608,  -2.9814,
        -15.1857,  -6.2345,  -4.0662,  -3.7222,   0.3096,  -1.0324,   0.1901,
         -4.0465,  -0.2794,  -2.1156,  -1.0176,  -1.7058,   0.8825],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3203, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7558, -4.8549, -7.6940, -4.6342, -4.8040, -6.3123, -3.2999, -3.2072,
        -4.7129, -4.4759, -4.5231, -5.9770, -6.4860, -6.4340, -4.7160, -6.4433,
        -2.2420, -2.6043,  3.9390, -5.9430], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.4590, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.0968, -11.3176,  -0.5861,  -3.2676,  -2.2296,   0.5110,  -4.0047,
          0.4083,  -4.2090,   2.5968,   4.3176,  -2.9175,   0.4961,  -2.9057,
         -1.3991,  -8.3441,   3.6062,  -6.7032,  -3.7548,  -0.8715],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.0239, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.3154,   5.0804,  -3.1173,  -4.9881, -15.8451,  -3.9735,  -4.6809,
         -0.4874,   0.1196,   5.2893,  -5.5301,   0.1396,  -1.7767,  -1.0540,
         -4.8068,   2.7605,   1.9694,  -2.6153,   0.5285,  -2.3764],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.7840, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.8691,  -0.6785,  -4.0582,  -2.8002, -18.6423,  -4.9046,  -5.7992,
         -4.2715,  -3.3279,  -0.8739,   0.0417,   2.0885,  -5.2459,  -0.9036,
         -2.0266,  -3.7633,   1.9153,   1.8985,  -2.1358,  -0.6173],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9487, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.4976,  -1.7808,  -0.3233,  -3.5229,   3.5255,  -2.4296,  -5.2706,
         -0.6933,  -0.8475,  -1.6113,   1.4192,   4.1594,  -2.7064,  -1.1000,
         -3.9939, -20.7193,  -1.4662,  -4.8760,  -0.7105,   0.9069],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1269, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.4181,  -2.6501,  -1.3935,  -0.9960,   4.4369, -11.1584,  -3.2869,
        -11.5788,  -2.6651,  -7.9801,  -1.1050,   1.5684,  -4.9215,  -0.7924,
         -1.6631,  -1.4341,  -5.2868,   3.4913,  -5.9946,   1.2797],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8274, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.5777, -10.5555,  -9.0227,  -8.7108,  -5.8874,   0.3332,  -1.9322,
          4.3726,  -3.9367,  -0.0407,  -1.4969,  -0.1279,  -3.6626,   3.0501,
          0.2562,  -2.9582,   0.3672,  -3.4779,  -0.5403,  -3.6283],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6088, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3243,  -0.8841,  -3.3110,   2.7248,   3.3657,  -4.0800,   0.3071,
         -2.7958,   0.7032, -35.7226,  -1.9591,  -0.6013,  -4.3360,  -0.2509,
         -0.5604,  -2.2119,   1.2280,   3.1438,  -2.5106,  -1.3121],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6194, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.3651,  -1.1888,  -3.4672,  -3.0041, -13.2270,  -6.8189, -11.9132,
         -6.5679,  -4.1156,  -2.6962,  -1.6032,   5.1160, -15.8783,  -0.8930,
         -1.8102,  -8.6928,  -6.3143,  -1.7340,  -5.0781,  -2.8050],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5663, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.0587,   2.1727,  -4.2988,  -0.8537,  -1.8919,  -3.0178,   1.1221,
          2.3826,  -3.5801,   0.1935, -13.2348,  -3.8873,  -1.9430,  -3.7115,
         -6.9789,  -0.6376,  -2.5192,  -1.2232,   1.8471,   4.6298],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.7686, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7259, -1.1751,  1.0069,  2.5589, -2.7573,  0.7907, -3.6191,  0.6132,
        -4.0028,  2.0533,  2.1667, -2.1120, -0.0989, -0.8633, -1.2442,  2.3230,
         5.0494, -2.9444, -0.9351, -3.1472], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.4532, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.9265,  -4.0758,  -1.7794,  -1.0592,  -2.2100,  -6.9334,  -0.6426,
         -0.3155,  -0.7558,  -0.1840,  -1.2796,  -3.4296,   2.1896,   4.1384,
         -2.1402,   0.0466, -14.5707,  -5.7683,  -4.0553,  -3.0990],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1999, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2520, -10.4794,  -4.9800,  -9.4393,  -5.5022, -20.8046,   0.6005,
         -3.5206,  -0.0554,   0.1812,  -5.9905,  -2.3459, -13.9678,  -7.3494,
         -5.1586, -15.1720,  -3.4428,  -0.4129,  -0.2008,   6.0253],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2634, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.6681e-04, -6.1019e+00,  2.3040e+00,  9.1978e-01, -2.3231e+00,
         1.1802e+00, -1.2849e+00, -4.1170e+00,  1.8565e+00,  2.1429e-01,
        -4.0197e+00,  7.7995e-01, -2.2669e+00, -3.4388e+00,  1.1019e+00,
        -9.7796e-01, -1.0300e+00,  7.8209e-01, -1.9919e-01, -5.1512e-01],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-0.8568, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.8300,   3.8674,  -0.7028,   0.8223,  -4.2951,  -4.2609,   2.3437,
          0.1009,  -5.1992,  -1.8713,  -9.5695,  -4.3453,  -4.2310,  -4.6758,
         -0.5944,   2.4499, -14.9791,  -3.0767,   0.4785,  -2.8869],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.4398, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6726, -3.7259, -1.2126,  0.6998, -3.3008, -0.1629, -3.8603,  0.7631,
        -3.2191,  3.0433,  2.5048, -2.8666,  0.2468, -3.5605, -2.4349, -5.3353,
         0.1800, -0.9688, -0.9303, -4.5313], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5172, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.7951,  -2.6385,   0.8419,   1.3668,  -0.8004,   0.4105,  -0.2414,
         -3.6119,   2.2500,   4.3228,  -1.7136,  -5.0650, -10.7387,  -4.0051,
         -4.3881,  -0.0438,  -7.7585,   2.5891,   3.7072, -10.5278],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.8420, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.7419,  -6.3253, -13.4536,  -5.7962,  -8.3373,  -6.9650,  -5.4128,
          0.1964,  -2.7657,   2.7721,  -2.3476,  -3.3788,  -0.1061,  -3.4353,
         -5.5675,  -1.2035,   2.6348,  -5.1604,  -3.3273,  -1.9719],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7346, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.4698, -3.0066,  1.5216, -6.1687,  2.6311,  3.5181, -2.8722,  0.8404,
        -2.8123, -1.9440, -4.1355,  3.3038,  3.7148, -2.8147,  0.1174, -3.0012,
        -0.2945, -4.7381,  0.2685,  0.7178], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7812, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1516, -20.2899,  -7.6189,  -2.4536,  -3.3164,  -1.3525,   1.1765,
         -3.2068,  -2.8655,   0.8069,  -1.3187,   1.3046,  -2.8432,   4.8492,
         -7.6875,   0.6372,  -2.3011,   0.3037,  -4.7321,   2.1362],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5462, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4997, -0.1481, -2.0599, -3.7940,  0.1918,  3.1181, -1.5177, -0.0306,
        -4.7939, -2.4799, -4.8191, -1.5167, -5.7494, -5.3849, -1.2522, -2.3123,
         0.5495,  1.6838, -4.0312, -2.3873], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0117, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.9198,  0.3086, -1.9580,  0.1389, -3.4625,  2.2047,  4.5571, -4.9566,
        -0.0101, -2.5779,  0.5285, -4.0351,  0.7060, -2.7926, -2.4411, -0.4685,
        -3.9112, -1.5234, -2.8617,  3.6153], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3430, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2954, -10.6172, -41.5961,  -8.8744, -12.4471,  -7.5903,  -7.2565,
         -5.8355,   0.0911,  -0.4650,   5.1643,  -3.6427,  -0.3833,  -3.5461,
         -0.3964,  -4.7993,   1.0176,   3.5300,  -2.6653,  -2.6108],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3109, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.9341, -5.2661,  0.3006, -4.1213,  0.1177, -4.1205,  3.0724,  2.0803,
        -2.1710, -0.6495, -1.3826,  0.3489, -2.3157,  5.2235, -3.3272,  1.1412,
        -2.5792, -0.7690, -4.0911,  2.6573], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6459, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.3521, -7.0888,  0.3030, -1.9117, -1.9483,  1.3823,  3.2352, -0.9723,
         0.1167, -6.7614, -4.2146, -8.7023, -7.6972, -3.0646,  0.5053, -1.5422,
        -7.5486, -1.1246, -3.4400,  0.2749], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4924, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.0177, -4.0933, -1.6684, -3.9941, -3.4459,  3.1256, -6.2394, -0.4969,
        -1.8680, -0.0174, -2.5136,  4.3642, -5.0953,  1.6764, -2.7095, -0.6348,
        -2.7253,  1.9260,  2.6154, -2.3112], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2044, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.5032,  -2.9944,  -0.5234,  -0.5592,   4.3786,  -2.1852,  -4.3388,
         -1.0162,  -2.1223,  -2.4096,   4.5823,  -2.4963,   0.5206,  -1.5185,
          0.0383,  -1.2706,   3.3543,  -2.5517,  -4.9292, -27.3375],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1941, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.1880, -1.8788, -1.8133, -1.7974, -0.6403, -0.2290, -3.8625, -2.9331,
        -0.6776, -4.4669, -3.4375,  1.0885,  3.3037, -2.2792,  0.6589, -2.2113,
        -0.1904, -4.0063,  2.5690,  3.3249], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8145, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.6274, -2.1211, -0.4945, -1.9282,  0.7311, -3.8419,  2.8711,  3.9580,
        -1.8248, -5.9504, -2.1058, -2.8729, -1.7117,  3.3967, -4.9635,  0.2388,
        -3.2427, -2.0424,  1.1588,  5.1402], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6489, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.0438, -0.8279,  0.2823,  2.1161,  3.1707, -4.0854, -1.7038, -7.6203,
        -8.8932, -2.2462, -5.9089, -0.4348,  1.3467, -7.0879, -8.1249, -1.2965,
        -1.5305, -5.3660,  2.0470,  2.6591], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1230, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2975,  2.7380,  2.9065, -6.4080, -0.2098, -2.9005, -1.6584, -3.1563,
         2.1101,  2.9723, -2.5810,  1.8567, -2.9525, -1.3362, -3.3162,  4.3265,
        -1.2113, -0.3514, -1.4565,  0.6567], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7634, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.0549,  -2.9267,   1.7347,   2.0171,  -5.8362,  -4.6644,  -7.0894,
         -8.2228,  -6.6328, -14.7009,  -7.2411,  -1.4606, -15.6195,   5.8363,
        -17.1854,  -2.7419,  -1.8538,  -1.2689,  -2.9704,   2.7937],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5044, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2690, -3.5862,  2.9365,  0.7005, -2.0820,  0.8775, -1.1043, -1.2024,
         0.7841,  5.0248, -2.1019, -0.2434, -2.3294,  0.6200, -5.1219,  1.0191,
         2.0846, -3.5867,  1.7244, -3.6596], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.5258, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.6189,  -1.4832,  -3.2769,  -2.3077,  -4.1794,  -1.0510,   3.1987,
          2.8158,  -1.6070,   2.1414,  -1.3341,  -0.7797,   2.1170,   4.9892,
         -0.9098,  -3.1361,  -3.1269,   0.3791, -24.0656,   1.8861],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.5675, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.6079, -1.5088, -3.5730,  0.8591,  4.5887, -4.8400,  0.6290, -1.9905,
        -4.2508,  0.2513, -4.2212, -3.0485, -0.4211, -4.2066, -2.1734, -5.6839,
         0.9758,  0.7594, -6.7548, -0.3481], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7783, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9844,  3.1137,  0.2802, -1.9939,  0.9616, -0.7190, -0.2310,  2.9450,
         3.6136, -4.1848, -1.8558, -4.5569, -0.4278, -3.9697, -0.9551, -4.2511,
        -3.5574,  0.9349, -2.5352,  0.2091], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2082, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.7817, -1.7487, -1.2079, -2.4579, -1.4870, -5.4444,  0.1508, -0.7917,
        -1.3860,  0.3584, -0.1591, -0.3690,  2.0458,  4.7769, -2.1818, -0.8213,
        -3.5705, -4.1868, -4.9627, -1.1662], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1414, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.8511,  -3.1123,   3.2044,  -1.0568,  -0.5779,  -2.1795,  -1.2640,
         -5.3588,   0.8239,   0.0390,  -2.6894,   0.1624,  -2.9460,   0.3157,
        -14.4701,   3.0182,   1.2081,  -9.9315,  -1.0666,  -1.8224],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1277, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8170, -2.5684, -5.6778,  0.3318, -7.9812,  2.7190,  3.8874, -7.2903,
        -2.7149, -3.4778, -0.9461,  1.5301,  4.3509, -1.6045,  1.0597, -2.7445,
         0.1508, -1.8576,  4.7799, -6.9367], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3904, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8518, -1.7079, -3.5246,  0.6470,  1.8147, -1.5272,  1.1395, -1.9396,
        -0.6234,  0.9804,  5.2248, -1.2776, -1.5905, -1.4586, -3.2934,  0.7342,
         3.6681, -0.9248, -1.8817, -9.5528], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8473, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4721,  3.1243, -2.2323, -2.1765,  1.1317, -3.5295, -0.2002, -6.0623,
         1.5420,  0.6428, -2.7438, -1.6707, -2.6611, -0.1485, -6.4377, -0.8173,
         1.7643, -8.9914, -1.2510, -2.0798], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8134, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0342e+01, -5.7280e+00, -7.6434e+00, -1.8610e+00, -6.7492e-01,
        -3.2243e+01, -5.1132e+00, -2.5083e+01, -1.2306e+01, -3.0933e+00,
        -1.1222e+01,  1.1088e+00,  2.9205e+00, -7.8196e+00,  1.0613e-02,
        -2.3977e+00, -2.8351e+00,  1.9665e+00,  2.6085e+00, -1.6339e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5691, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7468, -2.7413,  0.5381, -3.5259, -1.7800, -1.6576, -3.8778,  2.0886,
         1.8796, -5.6762, -2.9655, -5.8221, -5.4704, -7.9920, -1.3087, -0.7762,
        -0.3867,  0.1481, -1.8571, -0.6922], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1811, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.3180, -1.3153,  3.6584, -1.8604, -0.2344, -0.3871, -6.0000,  2.5807,
         3.5756, -1.9983, -2.9598, -1.6893, -5.7330, -3.0914, -1.8440,  1.3516,
        -4.1261,  0.4478, -0.6085,  0.6670], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9942, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4013,  -3.3480,  -0.5458,   0.1114,  -4.4308,  -0.1931,  -1.8214,
          0.3166,  -3.3951,   2.8066,  -2.3871,   0.1811,  -1.7142,   0.4682,
         -5.1081,   3.4727, -11.9026,  -0.5062,  -4.7316,  -9.7085],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2419, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.5129,   0.1334,  -4.1411,   0.7801,  -3.1125,  -0.9751,  -1.8693,
         -4.8505,   0.5402,   1.1555,  -5.4743,  -2.5551, -13.6160,  -6.7834,
         -5.8761,  -4.1793,  -1.3604,  -1.2424,   3.0015,  -7.6472],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1292, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.3757,   1.1589,  -2.7404,   1.2660,  -3.6681,  -2.2149,  -3.0922,
          0.7632,  -2.1184,  -6.2930,   0.1042,  -2.9726,  -6.7432,  -2.1587,
          2.5579,  -6.3590,  -3.3871, -16.2189,  -6.8562,  -3.0986],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0348, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.7474, -1.4016, -0.9497,  1.5688, -3.4504, -2.4112, -6.1157, -6.1161,
        -2.9873, -6.9010, -0.2215, -4.0670,  5.0921, -2.7797, -0.9147, -2.2267,
        -4.1789,  0.6901,  0.9883, -4.8992], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1014, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.2806,  -5.5778, -14.3083, -10.2533,  -7.8955,  -6.3586, -27.1440,
         -7.5721,  -6.0135,  -0.8869,  -2.6444,   4.8577,  -3.4884,  -4.3276,
         -2.9726,  -4.4571,   0.4952,   1.3318,  -3.5394,  -3.7199],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.5878, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.7996,   3.3463,  -1.4783,  -1.4282,  -2.7520,   0.4182,  -3.3737,
          2.7220,   2.5641,  -3.5914,   0.9581,  -2.0428,  -0.0345,  -5.4000,
          2.6491,  -9.1728,  -7.1024,   0.1425, -21.1778,  -0.5175],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2236, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 4.6984, -1.0729, -0.8237, -1.1681,  1.0594,  0.6294,  5.4410, -3.2826,
         0.2514, -1.9688, -2.3065,  0.9906,  1.7398, -4.1151,  0.4671, -9.1106,
        -3.5727, -4.0852, -2.9146, -4.2861], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1715, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.5201, -2.6520, -2.8512, -4.2110, -0.4595, -7.6635, -0.3151,  0.2910,
        -1.3879,  0.0780, -0.7992, -0.4556,  1.8470,  2.2944, -2.6010,  0.4263,
        -2.6327, -3.5844, -2.6845,  2.8193], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2011, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.6186,  -6.0364,   2.4071, -15.6141,  -0.4683,  -2.3785,  -3.9676,
         -1.2792,   2.7301, -10.7892,  -3.1551,  -2.5188,  -0.5787,  -4.4890,
          0.9308,   2.3938,  -0.4526,  -0.7149,  -0.9502,  -3.9631],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.4756, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.8666,  -4.3959,  -8.4058,  -4.1418, -46.3206, -10.0288,  -6.8753,
         -8.8363,  -9.3027,  -3.5891,  -7.1301,  -1.2673,   0.3431,  -0.1016,
         -2.5902,  -1.1968,  -0.4825, -14.4076,   1.8438,   1.9328],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3910, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.8200,  -5.3372,  -1.9145, -11.7509,  -6.6222,  -3.4886,  -3.8644,
          0.6242,  -3.0245,   2.6004,  -9.3740,  -0.8304,  -0.5180,   0.3105,
         -3.2897,   3.0615,  -5.8290,  -0.5265,  -6.9354,  -8.2381],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1063, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.3807, -5.4623, -2.0082,  2.4492, -3.7707, -1.7358, -2.1324,  0.0556,
        -2.2674,  3.3754, -8.2941, -2.1590, -6.2512, -5.8818, -2.4740, -0.8771,
        -4.5420, -2.7700, -2.0309, -1.9079], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4533, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.8967,   0.3026,  -3.6244,   5.1260, -10.4846,  -4.3718,  -0.5047,
         -2.3930,  -2.7012,  -3.3927,   1.1937,   0.6274,  -3.2775,  -1.4379,
         -3.9625,  -0.4216,  -3.7259,   3.4460,   1.5654,  -3.3770],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.6155, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.3261, -2.4800, -0.9005,  1.0350,  3.9108, -4.0846,  0.5609, -1.6306,
        -3.8824,  1.4923, -2.5750, -2.8831,  0.1196, -3.5442, -0.3121, -4.6124,
         1.2055,  1.7633, -2.0107,  0.4368], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9033, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.6926,  -1.2136,  -2.2452, -37.2174,   1.4243,   1.8442,  -5.3144,
         -7.2325, -41.5387,  -8.9877, -10.0883,  -9.6341, -10.1969,  -2.6831,
         -1.0696,  -0.3354,   1.5480,  -5.6026,   0.2846,  -1.7118],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.2831, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.3545,  -1.9207,  -0.6696,  -5.1492,   2.8581,   0.8578,  -5.4692,
         -1.8603,  -1.2486,  -2.8486,  -6.2010,   2.1337,   1.1320,  -5.1573,
         -0.6459,  -1.8710,  -0.9620,  -7.4912,   5.2425, -19.2236],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3570, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4136e-01, -3.0285e+00,  1.3247e-02, -2.1418e+01,  2.7404e-01,
         2.4448e-01, -1.4780e+00,  1.2968e+00, -4.3433e-01, -1.9316e+00,
         2.0711e+00,  4.8034e+00, -1.3458e+00,  4.9069e-01, -2.0351e+00,
         1.0516e+00, -6.0316e+00,  7.9411e-01, -5.7566e-02, -2.3482e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.4655, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.2095, -11.9083,   4.5908,  -3.1581,  -0.9745,  -2.2441,  -2.8844,
          1.2583,   2.6960,  -6.1233,  -2.1356, -29.8446,  -7.1110,  -7.0886,
        -18.0945,  -3.9494,  -1.4885,  -0.4283,   5.5585,  -3.7949],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3457, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.0466,   1.8125,  -8.1951,  -6.0593, -22.5277,  -6.6322, -17.7751,
         -3.9895,  -2.8180,   0.3177,  -2.9508,   1.8556,  -2.4130,  -5.9076,
          0.8449,  -3.4550,  -0.6288,  -3.7215,   2.1003,   3.0921],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8548, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.3157,  -3.7565,  -1.3541,  -7.4058,   0.4083,  -0.6706,  -3.9308,
         -2.1447,  -2.2413,  -2.7187,   2.0632,   4.5155,  -3.6043,  -6.1020,
        -11.7980, -17.3781,  -3.8623, -19.7737,  -5.1492,   0.1746],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3022, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-14.9588,   2.4924,  -5.0097,   0.4143,  -4.2695,  -3.0707,  -3.9222,
          1.9283,  -3.0909,  -1.9765,  -2.9218,   0.3235, -11.2693,  -3.2272,
          1.8775,  -4.0779,  -1.7610,  -2.4831,  -0.0231,  -2.3425],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8684, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.9898,  -3.9198,  -0.1156,  -1.9687,  -4.7089,  -3.2376,   1.8374,
          2.0691,  -2.3373,   0.7865,  -1.1122,  -5.6704,   2.7291,   0.4379,
         -4.0365,  -2.2510,  -5.6270,  -5.8250,  -6.4324, -19.7953],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9094, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.3377,  2.7731, -2.4109, -0.0083, -1.6495, -3.2165, -4.3375,  2.9229,
        -1.3907, -4.9715, -0.2416, -4.3299, -2.0336, -3.9023,  1.5275,  2.1699,
        -2.6373,  1.4725, -0.4951, -4.0672], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0744, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.8266,  -2.0446,  -2.4899,   1.9417,   2.5826,  -4.5099,  -0.9221,
         -1.0627,  -0.1575,   0.2493,   5.2478, -10.8513,  -2.4445,  -3.1583,
         -3.8156,  -1.0590,  -1.5711,  -3.6949,  -4.5582, -20.9863],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7566, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4287, -2.4645,  0.9604,  3.1058, -1.9334, -2.0056, -1.2075,  0.3599,
        -6.4308,  2.5745,  2.1832, -3.4438,  1.5853, -1.8962,  0.2697, -3.7456,
         3.3336,  1.5364, -5.1199, -1.1755], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8471, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.2459,  -6.2079,  -1.1860,  -5.0135,  -0.5775,  -5.1849,  -9.0417,
         -3.4543,  -4.0950,   0.1666,  -0.8573,   4.3691,  -3.1833,  -1.7538,
         -3.5099, -10.6171,   0.8367,   0.0662,  -2.6732,   1.2627],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6450, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2436,  3.7241, -4.7821, -0.7896, -3.0384, -4.0375, -0.0818,  4.7156,
        -2.9209,  0.1861, -0.6136, -0.6559, -1.7679,  3.2555, -1.3796,  0.3166,
        -1.9217, -0.9330, -0.9746,  3.2034], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.5370, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.9412,  -6.4561,   3.0996,   3.0964,  -3.4615,   0.9499,  -3.5603,
         -0.2883,  -2.5800,   1.7356,   2.5978,  -3.5280,   0.3784,  -2.9626,
         -3.2770,   1.2763,   2.2460,  -4.4511,  -1.7246, -16.4959],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.6232, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9903, -0.3492, -1.8824,  0.8252,  4.7689, -3.4935, -0.1912, -3.3188,
        -1.7293, -5.4266,  0.3741,  0.3291, -3.0855, -2.4780, -4.4298, -1.9427,
         1.1596,  5.0773, -1.5914, -5.7432], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2559, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.8971,  -5.2713,  -0.7269, -10.1733,  -0.7650,  -7.9372,  -4.8359,
         -6.6586,  -2.1728,  -4.8668,  -4.3113,  -1.6814,   0.7373,  -2.7485,
         -0.8192,  -6.5641,  -2.1716,   1.6680,   2.2019,  -1.6901],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1342, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.3978,  -2.6353,   2.0942,   0.6598,  -1.4453,  -0.5530,  -0.4306,
         -3.2944,   1.8088,   1.3691,  -3.7203,  -2.2894, -11.0076,  -6.2283,
         -6.4184,  -3.1232,  -0.0566,  -1.2591,   4.3986,  -1.5491],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.7039, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.2757,  -4.0611,  -3.0175,  -6.5705, -11.4618,  -5.4671,  -4.3427,
         -4.0630,  -1.2726,  -6.8099,  -6.3403,  -5.3311,  -6.0331,  -2.4145,
         -4.3616,   3.9553,  -5.3958,  -2.2774,  -3.6128,  -5.2940],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4224, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.5424, -36.4411,   5.4312,  -7.8852,  -9.3204,  -7.5372, -11.0555,
         -5.7035,  -5.4681,  -0.4924,   2.3932,   1.8281,  -9.6051,  -3.0032,
         -2.1388, -15.6356, -10.0203,  -2.5317, -13.2403,  -4.3410],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.9655, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.1241,   2.6020,  -6.8071,  -2.6585,  -0.3512,  -2.2575,  -1.1244,
          1.3950,  -5.0394,   0.1318,  -2.8492,  -4.9618,   2.2103,   3.5792,
         -2.7584,   1.2815, -17.4686,  -4.3508,  -4.1260,  -4.9137],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3671, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.4102,   4.7398,  -3.6528,  -6.7823,  -9.2672,  -5.7416,  -5.8159,
         -6.9202,  -5.3163, -11.6413,  -4.1511,  -3.3595,   2.7607, -10.2864,
         -5.1042,  -3.5667, -13.4058, -25.1526,  -7.2292,  -7.6774],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3990, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.6373,   2.0744,  -7.1320,  -4.5324, -12.0993,  -4.7041,  -5.5762,
         -5.1462,  -1.6862,  -4.3221,  -1.5630,  -0.7676,  -7.3480,   1.7478,
          1.1655,  -2.1879,  -0.7467,  -3.9334,   0.1082,  -5.4036],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0208, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1248e-01, -1.3563e+01,  4.7366e+00, -2.1922e+00, -1.0386e-02,
        -3.6631e+00, -5.1475e+00, -6.9551e-01,  1.9804e+00, -1.3515e+00,
         1.5898e+00, -1.3060e+00, -1.9870e+00,  1.2275e+00,  4.7089e+00,
        -2.4214e+00,  1.3366e+00, -2.4017e+00, -6.5331e-01, -3.9359e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.1980, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.2180, -2.5481, -0.8022,  1.8244,  4.4348, -3.2600, -0.3700, -2.1864,
        -2.0094, -3.8811, -0.5679,  1.1749, -1.9598, -0.7330, -1.1192, -0.3450,
        -4.5843, -3.8610, -3.5766,  1.0432], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1772, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3127,  0.7199, -2.5347, -3.2844,  2.2596,  3.0257, -2.8625,  0.6393,
        -9.5398, -2.4587, -5.9998, -2.5177, -1.9491, -6.0514, -4.1159, -0.0815,
        -5.8660,  1.7999,  3.7667, -2.0713], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0217, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9512,  0.5115, -3.3230,  5.8978, -7.8343, -3.9341, -3.9438, -0.5929,
        -3.2248,  0.5438,  4.7417, -2.2600, -0.5784, -2.6378,  0.3346, -3.6074,
         0.9279,  1.2171, -8.0305, -1.5294], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5137, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9522, -7.9879, -5.5210, -5.5725, -1.9875, -4.1316,  3.7975, -4.1690,
        -2.1712, -5.0788, -5.1617, -2.6467, -2.9883, -3.1885, -2.5874, -9.0882,
        -3.5241, -5.8278, -1.8687, -1.8477], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.7752, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2738e+00,  4.5376e-01, -1.0090e+00, -7.0293e-03, -7.0614e+00,
         6.1740e+00, -1.4454e+00,  8.9050e-02, -2.2892e+00, -2.1261e+00,
        -7.8280e+00,  1.8245e+00, -8.3121e-01, -2.5489e+00, -1.1875e-01,
        -1.9660e-01,  1.0487e+00,  7.5003e-01,  4.3557e+00, -3.7722e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-0.8406, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.3350,  -3.1563,  -5.5177,  -4.0681,  -5.3810, -24.5815,  -7.6249,
         -6.9301,  -5.5564,  -6.5493,  -4.2765,  -9.3952,  -7.0713,  -4.3802,
         -4.4964,   0.5621,  -3.8598,  -4.8539,  -5.2267, -11.4277],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4563, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.6546,  -3.1874,  -0.7092, -19.8461,  -2.1657,  -9.4234,  -0.2263,
         -3.7992,   5.9305,  -6.1594,  -0.2594,  -4.7843,  -2.9003,  -5.6327,
          1.8624,   1.4157,  -3.5506,  -0.9592,  -2.5865,  -0.3650],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7846, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2118,  1.6464,  3.1624, -2.5769,  1.1641, -1.4307, -4.6169, -3.5333,
         4.5387, -0.7263, -0.5693, -3.2512,  1.0139, -7.0851,  2.1353,  2.6390,
        -5.6821, -0.1046, -2.1846,  0.3375], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9168, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.2210,  -4.3152,  -6.7753,  -1.5886,  -2.3435,  -0.4805,  -4.5016,
         -1.5924,  -0.3885,  -1.3722,  -1.2523,  -0.0256, -11.5226,  -0.5092,
         -3.3922,  -0.8124,  -8.8843,  -0.8508,  -1.1374,  -2.4630],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9714, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5038,  0.1472, -2.6473,  1.8977, -1.3947, -1.1805, -1.2767, -1.2092,
        -5.9230,  1.0239,  1.2531, -1.9070,  0.4965, -0.0591, -3.8891,  2.3226,
         0.5449, -3.7215,  0.4215, -2.7379], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9671, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.7367, -6.0585,  0.3293, -0.7937, -2.6882, -0.6544,  0.0313, -1.6268,
        -0.1794, -8.3456, -7.0827, -2.7391, -3.2997, -0.5743,  0.2100,  3.5413,
        -3.8219, -0.4601, -1.7000, -4.2598], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9218, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.8856,  -3.1480,  -6.3734,  -4.1800,  -3.5356,  -3.9188,  -7.8476,
         -4.2132,  -4.6236,  -3.6756,  -3.5796, -15.5933,  -3.0541,  -6.4671,
         -7.9780,  -4.0774,  -5.2604,  -4.1622,  -2.9725, -10.3629],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7455, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.2949,  1.9516, -1.8732,  1.0539, -1.9155, -3.4314, -0.6789,  3.5300,
        -1.4372, -1.3665, -0.9618, -0.4802, -2.8052,  5.4126, -0.5730, -0.2837,
        -3.1841,  0.6255, -3.7916,  0.1933], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.3860, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2374, -2.2627, -2.3741, -2.2222,  5.2927, -3.0388,  1.4477, -0.9022,
        -1.8521,  2.4579,  4.6889, -2.1247,  0.8403, -3.0554, -2.7041, -7.4784,
        -4.2588,  0.4793, -3.9835,  0.8027], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0742, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.1403,   1.7266,   1.5830,  -3.7993,   0.1047,  -1.7093,  -9.0631,
          1.9552,   3.2336,  -2.0323,  -0.8774,  -7.3684,  -5.9271,  -3.0656,
         -3.9673,   0.3388,   0.6732,  -0.6362, -10.2963,  -1.8850],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2576, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.0048, -3.9293,  2.6524,  3.2441, -2.7048, -0.0227, -2.5538,  0.4041,
        -5.1603,  3.5340,  1.9022, -1.9310,  0.8156, -0.6381, -1.3493, -6.9786,
         0.8803,  0.9438, -2.4737,  0.4419], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.5959, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6020, -5.5211,  0.8930,  3.3345, -5.4102, -1.5960, -4.3226, -4.8671,
         0.7600,  3.8526, -1.6268,  0.2404, -2.4731, -1.8390, -0.8195,  2.6961,
        -3.8336, -1.4934, -1.7372, -4.1192], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5242, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.4131,  -4.8879,  -5.3268,  -5.5523, -13.4383,  -1.9870,  -4.4956,
         -1.8302,  -3.3272,  -2.2562,  -5.8963,  -0.3839,  -3.6506,  -2.5566,
         -4.9045, -13.3145,  -4.2095,  -3.6847,  -2.9558,  -0.8641],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5468, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8072, -0.8477, -4.4581, -1.5292,  0.1126, -1.4442, -3.3551, -0.2999,
        -3.7957,  3.5376,  1.9789, -3.1788, -0.5890, -2.9353, -0.7609, -5.4000,
         3.4641, -1.0987, -2.1037,  0.0710], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4220, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.5444,   1.1110,   0.6739,  -4.5644,  -3.4302, -10.9394,  -5.9022,
         -3.6865,  -4.8756,  -1.7933,  -6.0597,   2.3175,   4.3496,  -2.9851,
          0.8513,  -2.6968,  -1.3360,  -3.7046,   3.6218,   2.9619],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1316, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0103, -0.0726, -2.6169,  0.7282, -4.1010,  2.7247,  2.0000, -2.3128,
         0.3383, -2.4160, -0.4386, -0.9087,  5.2309, -6.8623, -0.5959, -1.1135,
        -2.9355,  2.3106,  1.9117, -4.3783], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8759, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.1412, -23.1597,  -5.4964,  -5.0129,  -3.2838,  -1.2143, -16.4965,
          4.8395,  -6.7387,  -0.8514,  -4.3623,   0.1000,  -1.4208,   1.9920,
          4.5712,  -4.3423,  -2.2570,  -4.0095,   0.2175,  -0.3053],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4186, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7147, -12.5342,  -3.6652,  -2.9135,  -3.7596,  -2.8537,  -2.5514,
         -2.0902,  -5.9167,   1.3527,   0.9647,  -2.6021,  -4.2595,  -4.4055,
          0.2209,  -6.6755,  -0.0196,   0.6331,  -1.4252,  -0.2198],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7718, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2254, -2.6478, -4.5315, -0.8360,  2.2907, -0.8492, -2.1080, -4.2956,
        -1.4335, -3.3821, -3.6185, -1.0438, -6.1892, -4.1114, -8.9323, -5.7927,
        -3.9907, -4.4216, -2.8327, -1.0315], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1491, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.5493,  -1.4684, -12.6435,  -0.7744,   2.5196,   5.1697,  -2.9293,
         -0.1906,  -1.3619,  -4.0804,   0.7695,   1.8233,  -2.9981,   0.6313,
         -3.1611,  -0.1670,  -3.2396,   0.7862,   2.5667,  -8.7307],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.9514, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.9378,   3.3551,   4.2541,  -5.8458,  -1.8007, -16.7893,  -2.4949,
         -0.0440,  -1.6100,  -5.8015,  -4.0286,  -2.4270,  -5.3953,   2.7162,
          2.6461,  -4.9150,   0.0599,  -6.2048,  -4.7624,  -6.0601],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9543, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  4.7303,  -6.0787,   0.0627,  -2.6544,  -1.9298,  -2.7010,   0.7854,
         -6.3542,  -3.4512,  -0.7576,  -2.2494,   0.9884, -24.2109,  -2.1725,
         -1.6687,  -3.3217,   0.7742,  -3.2087,  -5.8716,   1.8666],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8712, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0637,   0.7338,  -1.5961,  -6.3048,  -4.3512, -11.9357,  -3.3607,
         -6.0774,  -8.0841,  -5.9606,  -4.7248,  -2.5948,  -5.3705,  -2.6459,
         -0.6979,  -1.7119,  -0.2938,  -5.2070,   3.0350,   0.7094],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5251, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8514,  0.3234, -2.1481, -0.2708, -6.0956, -9.7281, -4.1140, -6.3050,
        -2.6812, -2.5018,  3.3801, -3.4426, -4.2714, -1.7694, -0.1543, -4.8449,
        -0.9211, -5.7647, -4.2945, -1.2605], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9358, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.3753, -1.7123, -3.0185,  2.6889,  4.1472, -1.1208, -1.3573, -1.1750,
        -0.3417,  2.4445,  5.5187, -2.9254,  0.7688, -0.4007, -2.0860,  2.9318,
         4.8923, -0.6545,  1.3389, -1.1422], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(0.5086, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9147, -4.5187,  0.5643,  3.1855, -4.7797, -1.1254, -1.3464, -0.3584,
         0.8296,  3.3172, -1.4488, -5.8741, -2.5244, -0.5176, -4.0132,  3.3499,
         1.6897, -1.6350,  0.3235, -3.0518], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0924, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.1871, -2.3301, -0.7924, -3.0974, -0.8602, -3.6283, -1.0840,  1.3399,
        -3.0588, -0.0636, -0.6588, -1.3442,  3.1836,  4.4622, -2.6902,  1.8262,
        -2.2751, -0.6388,  0.4494, -1.5307], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.4802, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.9931, -3.7650, -0.9210, -2.4461, -0.0283, -5.5603,  3.9334, -7.6852,
        -4.1670, -2.8139, -2.5811, -2.2091,  2.2824,  3.5615, -3.5096,  0.4124,
        -1.9601, -0.8813, -0.6372,  3.6382], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1172, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.7173, -4.5092,  1.1273, -3.7519, -0.7930, -2.6232,  1.6066,  2.1344,
        -3.6590,  1.3287, -2.2620, -1.7257, -2.4465,  2.9855, -2.4879, -0.5765,
        -4.6759, -3.3744, -7.2719,  0.5750], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3341, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4401, -4.5854,  0.7756, -0.5465,  5.5893, -3.3264, -0.0793, -3.8818,
        -1.7707, -2.9541,  4.5144, -1.1017,  0.1055, -4.6065, -1.1762, -7.4201,
         0.8513,  0.3104, -5.9990,  0.0612], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3840, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9096, -5.8011,  1.7914,  4.2799, -4.5449, -0.5235, -1.0300, -2.5618,
         2.3193,  4.1715, -2.5108,  1.5997, -0.6971, -2.9428,  2.2859,  2.4298,
        -2.2825,  1.9107, -0.9504, -2.0350], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.3001, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.5791,   0.7107, -10.2932,  -0.5669,  -1.6086,  -6.1837,  -0.8265,
         -0.5487,  -6.1528,   1.8044,   2.9616,  -2.5784,  -4.7038,  -8.9003,
        -10.5421,  -5.0825,  -5.8918, -11.1364, -11.5312,  -5.1623],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3406, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3396, -2.0432, -0.5723, -1.2776,  5.0316, -2.2887,  0.8081, -2.0244,
        -0.4123, -3.1915,  1.9467,  2.3575, -1.6451,  1.2016, -3.8652,  1.2863,
        -5.6300,  3.2469,  2.2235, -2.4404], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.4314, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.5166,  -5.2538,  -4.7460,  -2.0091,  -2.6316,  -1.9425,  -6.7161,
         -5.5381,  -3.8127,  -6.1590,  -1.7675,  -1.0288,   4.1071,  -2.8920,
         -3.0075,  -3.2809,  -4.6406,  -1.9478,  -1.5697, -10.7134],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5033, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1734, -1.3505, -3.8509, -2.6047, -3.5171,  0.8990,  0.8068, -2.6104,
         1.0162, -3.8040, -0.8470, -0.2092,  3.6905, -1.8928,  0.5354, -2.0311,
        -0.0062, -4.2962,  2.9069,  0.8506], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0244, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-27.3144,  -5.1158,  -3.6314,  -1.8746,  -7.6926,  -7.4887,  -3.2928,
         -2.8982,  -0.7901,   0.3292,   5.0050,  -6.0772,  -0.5572,  -3.5444,
         -1.2196,  -5.5095,  -0.1000,   1.3948,  -2.4046,  -0.3919],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6587, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5459, -0.2810, -3.7394,  3.5621,  2.9941, -2.1636, -0.5827, -0.4429,
        -0.8699,  0.6059,  0.9329, -4.1878, -1.3285, -7.6233, -5.6006, -4.2016,
        -2.8325,  0.2428,  1.5381,  5.4018], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0561, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.9332,  -8.2059, -11.7021,  -0.6010,  -4.8997,  -1.9765,  -9.6721,
         -5.5239,  -6.4019,  -5.3464,  -5.4424,  -1.8546,  -3.5467,   1.3460,
         -3.4044,  -2.7950,  -3.7243,  -6.1605,  -3.5082,  -4.1120],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7232, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0247,  0.3561,  0.4149,  4.8851, -3.2110, -2.6750, -2.3383, -2.5364,
         1.7503,  3.9597, -1.7223,  1.3665, -2.5550, -4.6329,  1.5750,  2.4364,
        -3.9708,  1.0934, -0.0570,  0.2377], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.3324, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.3001,  -1.3852,   0.3963,   1.2270,   4.8269,  -3.1935,  -4.5292,
         -5.3911,  -3.1583,  -5.4685,  -0.6417,   0.7467, -13.6467,  -1.0753,
         -0.7566,   1.0680,  -3.4976,   1.0235,  -1.4618,  -1.7223],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.7670, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.9233,   0.9907,  -1.0257,  -0.8190, -10.6448,   2.1272,  -2.6636,
         -0.7212,  -1.3000,  -0.8695,  -3.1993,   2.8732,   1.7597,  -2.3852,
          0.0883,  -2.4371,   0.5342,  -5.0778,   2.0028,   1.8872],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.1402, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6959e+00,  3.9105e+00, -1.3709e+00, -3.4131e+00, -4.0727e+00,
        -2.8607e+00, -2.4058e+00,  1.5527e-01,  2.8644e+00, -1.6585e+00,
         1.9512e-03, -2.8141e+00, -2.0837e+00, -4.1890e+00,  2.3722e+00,
         2.3427e+00, -2.6399e+00,  9.8991e-01, -4.5655e+00, -1.0443e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.1089, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6288, -4.3802, -5.6530,  1.3452, -2.2672, -0.3390, -2.4899, -1.1284,
         0.2241,  4.9117, -2.8605, -2.5155, -5.6762,  0.3865, -7.8595,  2.8123,
         3.6871, -6.9850, -2.6370, -3.4626], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9758, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.6362,  -4.2254,   2.0718,  -1.1625,  -1.8867, -10.7274,   4.2689,
         -1.4182,   0.4409,  -2.5096,  -2.6744,   1.4266,   3.7701,  -4.9175,
         -2.5349,  -2.0676,  -1.5224,  -1.8445,   2.2713,  -3.9383],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.2772, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8418, -1.1601, -0.5193,  0.9743,  4.8491, -1.8164, -2.8660, -1.5592,
        -1.2782, -3.5527, -1.2896,  1.7780, -2.2384, -0.7662, -0.7639, -0.3057,
         2.5462,  5.5127, -1.7204, -1.7024], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.3360, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1735, -3.3720, -2.7517, -1.9864,  0.7408,  4.5862, -3.0862, -0.5043,
        -2.8248, -0.7313, -4.5385,  0.5206,  1.0061, -3.1646, -2.4275,  0.3890,
        -2.5442,  2.4615, -2.8357, -3.5007], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4369, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-19.8431,  -9.3763,  -8.4442,  -3.3752,  -1.6676,  -5.8546,   0.4219,
         -0.1377,  -2.1167,   0.2566,  -3.8562,  -3.2393, -10.2420,   2.2218,
          1.5496,  -3.5872,  -1.2494,  -1.5254,  -2.0999,   3.9082],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4128, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4236, -0.2322, -2.5031,  0.0368, -4.5129,  2.7046,  2.9915, -1.8416,
         0.9503, -3.4704, -0.7344, -3.5764,  1.8458,  1.4979, -2.4470,  1.1088,
        -2.2534, -2.0598,  2.7119,  5.1916], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.3008, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.3189,  -4.3780,  -2.6914,  -6.3557,  -6.9775,  -5.2608, -10.7579,
         -7.0411, -14.6360,  -6.8075,  -6.3776,  -3.9029,  -4.9604,  -7.5538,
         -3.5147,  -9.1418,  -4.1330,  -9.7087,  -2.2707,  -5.2227],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4005, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.9772, -2.2313, -2.5494, -2.3987,  0.5288,  2.4969, -5.1290, -1.3227,
        -1.7264, -1.3854, -3.1042,  2.2635, -1.9752, -2.8291, -5.7309, -2.4384,
        -4.5485, -1.5645, -3.0329, -7.7730], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1237, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1081,  -1.3409,  -4.6476,   0.3964,   1.4229,  -3.9080,  -0.1813,
         -1.4563,  -3.2032,  -1.7045,   1.1417,  -5.0481,  -2.0824, -35.3787,
         -6.2160, -12.0629,  -6.1580,  -1.7248,  -0.0878,   0.2050],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2071, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.0215,   1.3276,   2.8068,  -1.4073,   0.8850,  -1.4065,  -3.5978,
          0.3506,   4.5794,  -1.9965,   2.1187,  -9.7517,  -5.9059,  -2.8553,
         -2.7898,  -0.7132,  -0.0423,   5.4669, -12.1509,  -0.0800],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.4092, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.8738,  -1.7175,  -0.2583,  -4.1775,   3.3286,   3.1368,  -3.7519,
         -1.0215,  -0.7379,  -2.0120,   1.6374,   4.0799,  -3.7054,   0.1671,
        -15.6958,  -4.3165,  -4.6627,  -5.2255,   0.1525,  -0.4128],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.7160, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1239,  1.9429, -2.6622, -1.1416, -1.5096,  2.1856,  2.8817, -2.2986,
         0.8097, -3.1757,  0.5571, -0.2041,  4.0976, -2.2515,  0.6473, -1.0418,
        -2.6052,  2.8248,  4.5420, -2.5588], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.1042, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.7839,  -3.0642,  -6.4095,  -4.6156,   1.6043,  -5.6424, -13.6795,
         -2.4298,  -2.4174,  -1.3728,  -1.5093,   1.5699,  -1.4985,  -0.5682,
         -2.6003,  -4.8110,  -1.3393,   3.4158,  -2.0152,  -0.2330],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8200, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9526, -0.1698, -3.9336, -0.0133, -4.4296, -0.2650,  2.3206, -2.7606,
         0.0421, -2.0276, -3.9289, -3.3879, -1.9048,  3.4688, -1.9159, -0.1353,
        -2.2162, -2.4714,  0.5110,  2.3616], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1904, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  3.3736,  -1.1943,   0.4637,  -2.5249,  -0.3200,  -0.4349,   4.0020,
        -14.4895,  -0.8975,  -1.5905,  -4.8421,   1.3504,   2.9627,  -1.0770,
         -1.2282,  -8.1808,  -6.1508,  -3.3554,  -3.4871,  -1.9547],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.9788, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2630, -7.7671, -0.3018, -0.5697,  4.8755, -7.8854, -2.2194, -1.1504,
        -1.4342, -4.4910,  2.8879,  4.2973, -4.4944,  0.1702, -0.7820, -1.2146,
         2.1179,  2.7788, -5.5809, -1.1595], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2593, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4926,  0.2114, -7.0144,  2.5821,  1.4919, -4.7564, -1.5539, -3.4488,
        -4.1726,  0.2760,  2.1989, -3.4994, -2.5045, -1.8423, -0.1068,  1.3335,
         5.3137, -2.1391,  0.6258, -1.9963], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0747, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.2613e+00, -2.8489e+00,  6.1855e-01, -8.2801e-01, -1.1707e+00,
         2.4539e-04,  4.7893e+00, -2.7320e+00,  3.8862e-01, -2.8282e+00,
        -1.8386e+00, -4.1148e+00,  1.1055e+00,  2.0708e+00, -2.0399e+00,
        -2.7339e-01, -1.3121e+00, -8.2374e-01,  2.1264e+00,  5.5436e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-0.0953, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8584,  0.8512, -1.2690, -3.5624,  0.7907,  4.7078, -3.5438,  2.4387,
        -2.3712, -1.1449, -2.8026,  5.0767, -3.6654, -5.0111, -4.0921, -0.7699,
        -5.3103,  2.2423,  0.9052, -1.9274], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0658, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.4152,  -7.3905,   2.9179, -10.0613,  -6.3697,  -3.8638,  -0.7805,
         -2.9197,   2.2834,   2.5593,  -3.5029,  -3.8423, -16.5699,  -2.9604,
         -5.5518,   0.1288,  -1.4366,   4.4960,  -3.9539,  -4.0771],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0655, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.1971, -0.6278,  1.3085,  4.6292, -1.3534, -0.8953, -1.1470, -0.2714,
        -3.5482,  5.4060, -2.8039,  0.2605, -6.3598, -4.0906, -5.8531,  2.9724,
         0.2929, -1.7923, -0.7649, -2.6543], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8745, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.7137e-01, -2.9677e+00,  5.2720e-01, -2.6192e+00, -8.8851e+00,
        -1.8315e+00, -4.9658e+00, -5.2370e+00,  3.0835e-02, -3.9612e+00,
        -1.5544e-03,  4.6633e+00, -6.1488e+00, -1.5853e+00, -2.4380e+00,
        -8.9793e-01, -5.4546e+00,  1.4343e+00,  2.6446e+00, -3.9883e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1127, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4617,  0.4943, -3.3114,  0.0803, -4.6618,  1.3694,  2.4894, -1.5940,
         0.6871, -0.6821, -2.3297,  3.1160,  3.0947, -2.9132, -0.3126, -3.4722,
        -1.0296, -4.2716,  2.4022, -5.0743], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9190, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3449, -2.0993, -0.5391, -5.5753,  2.7161,  2.5663, -1.9651, -1.2221,
        -1.4183, -0.5030,  2.4332,  2.6935, -3.1060, -1.2651, -6.5339, -6.4592,
        -4.6036, -5.8399, -4.6790, -0.5197], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9632, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  4.4213,  -3.3103,  -2.0636,  -2.8740,  -7.0806,  -3.6911,  -2.7285,
         -2.5318,  -5.8414,  -3.9229,  -3.6432, -31.2150,  -0.1607,  -5.1100,
         -5.1006,  -4.4402,  -2.5638,  -2.3379,  -5.8404,   0.2067],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4914, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  5.7469,  -4.7132,  -0.2882,  -1.8576,  -0.9069,  -4.3601,   3.5186,
          3.4660,  -2.7038,  -1.1333,  -1.2248,  -2.3433,  -0.3885,   4.1388,
         -2.3715,  -1.6338, -12.4218,  -3.3460,  -6.7792,  -0.4576],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.5030, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.5449,  -5.1313,  -5.2036,  -9.4655,  -8.8934, -12.0573,  -5.5479,
         -4.4439,  -7.9099,  -3.9215,  -6.3382,  -4.3224,  -4.6107,  -4.0881,
         -3.8543,  -4.6986,  -5.5751,  -6.4214,  -8.6203,  -4.1553],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0902, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.3012, -15.1640,  -6.6161,  -3.6428,  -2.3693,  -1.1561,   2.5765,
         -0.6347,  -7.4677,  -1.1528,  -0.0978,   0.8279,  -7.4364,   3.5866,
          1.5954,  -2.5823,   1.5861,  -1.5489,   0.5631,  -3.0751],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3255, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2003,  0.5222,  0.7730, -1.9810, -0.8785, -2.0272, -0.8164,  2.1763,
         5.4944, -0.9217, -8.4812, -3.3999, -2.0103, -5.6693, -3.0859,  0.3455,
        -0.9647, -0.6625, -0.6700, -4.3723], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6915, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.8470,   3.6101,  -2.3995,   1.4811,  -1.3517,  -0.9011,  -4.3809,
          1.9681,   1.5743,  -2.8591,  -0.3412,  -2.8964,  -4.0586,   0.9850,
          1.0628,  -3.1557,  -2.5585, -15.0093,  -3.0963,  -5.2541],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.7867, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.5592,  -5.5481, -12.4730,  -3.6758,   4.1260,  -9.5738,  -1.5439,
         -1.1138,  -7.4241,  -1.8038,  -3.6572,  -4.5215,  -1.6039, -16.0935,
         -3.8345,  -7.0072,  -9.2926, -10.8554,  -2.8580,  -0.1954],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1754, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8757,  0.0790, -1.8636, -2.8580, -3.4065,  1.1998,  1.4606, -2.8625,
         2.1419, -1.0395, -2.0035,  2.6619,  3.1763, -2.7761, -2.7870, -4.9353,
        -6.1750, -3.7094, -5.9655, -4.8433], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8191, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2944, -2.8185, -0.5887, -3.8675,  0.8877,  2.4910, -2.9556,  1.0797,
        -3.0716, -3.0450,  1.4221,  2.5682, -1.4394,  2.1626, -0.4669,  0.4357,
         0.0513,  3.9717, -3.0586,  0.2754], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.4630, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1414,  -6.1433,   1.1229,   2.9062,  -2.7544,  -1.3186, -16.3101,
         -6.0784,  -5.6895, -18.5688,  -3.5942,  -1.6464,  -1.3248,   1.3695,
         -5.9657,  -0.6292,  -1.9216,  -4.8107,   1.9179,   2.9534],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4314, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.7575,  -3.4035,   0.5943,  -0.8352,  -3.9843,  -2.1512,  -1.2655,
         -5.3941,  -2.1790, -16.9444,  -6.3603,  -7.7447,  -4.2486,  -0.7534,
         -0.6372,  -2.7657,  -4.5283,   0.7863,  -1.4075,  -4.1148],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4047, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.4983,  -4.8695,  -9.1712,  -3.1482,  -3.7955,  -0.5161,   1.7221,
          4.4984,  -4.6041,   0.7649,  -0.8127,  -1.4563,   0.6906,   0.8879,
         -2.3425,   1.9708, -20.2307,  -1.8007,  -8.1758,  -0.2180],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6052, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.4114, -3.5674, -4.3280, -3.3865,  2.1412, -2.0538, -1.0796, -0.4051,
        -3.2693,  2.1385,  4.3827, -3.5310,  0.5323, -4.4219, -0.7119,  0.2991,
         3.9049, -1.9525,  1.5014, -2.3495], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7873, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6614, -3.6440, -0.4033, -1.6956, -0.3787,  1.5727,  4.8480, -1.6483,
         1.9137, -2.6839, -2.0344, -4.1944, -0.5667,  0.7051, -3.3851,  0.2330,
        -0.8886, -9.3900,  2.4499,  1.8589], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9496, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5007, -1.2629, -1.4853, -5.7705,  2.1653,  2.7770, -3.0722,  0.2982,
        -2.4946, -0.2427, -4.5035,  3.6506, -3.7060, -1.7755, -0.6782, -5.8925,
        -0.8858, -1.3173, -3.7966, -3.6138], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8053, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.0361,  5.1131, -2.5971, -2.4797, -3.0614, -0.9427, -2.8367, -0.1251,
         2.4237, -3.0042, -0.5798, -2.4862, -3.7195,  2.4115,  0.7744, -2.1231,
        -0.8241, -7.1986, -6.8666, -4.7186], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5402, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.5426,   3.7228,  -2.1323,  -0.5648, -10.6601,  -3.8203,  -7.2625,
         -0.8777,  -2.3186,   5.6088,  -3.5713, -11.2331,  -6.1203,  -6.2265,
         -2.8855,  -2.9051,   0.1800,  -1.1651,  -8.1034,  -4.1929],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0993, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.7583,   2.8239,  -2.6293,   0.7292,  -3.1592,   0.3879,  -1.7065,
          2.6329,   3.6809,  -3.5855,   0.3907,  -6.8383,  -5.7659,  -2.0775,
         -2.2690,  -5.7970,  -3.2381,  -1.7853,  -0.9506, -12.4436],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.0421, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.9800,   0.6252,  -2.9823,   0.6238,  -4.2511,   2.8607,   3.0843,
         -1.4332,   0.0988,  -1.0878,  -2.4852,   3.0309,   4.9337,  -2.7530,
         -2.8151,  -8.8759, -34.5192,  -2.5569,  -5.8784,  -0.5398],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1450, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0232e-01, -2.8059e+00,  6.4973e-01, -3.9492e+00,  1.8741e+00,
         2.2650e+00, -1.7609e+00,  7.1231e-01, -7.4802e-01,  3.3766e-01,
        -5.1363e+00,  5.0689e+00, -2.4677e+00,  3.1343e-03, -3.0191e+00,
        -6.1498e-01, -4.8076e+00,  4.1114e-01, -3.2450e-02, -2.7162e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-0.8419, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7958,  0.3313, -3.4395,  2.1122,  0.0243, -2.9285, -0.5776, -0.8153,
         0.1214, -6.3312,  6.8414, -3.6040, -0.4161, -0.7001, -2.1728,  1.6376,
         3.9621, -2.3912,  0.4387, -6.7428], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8723, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.1739,  -1.3432,   3.5842, -11.4184,  -1.6198,  -2.7373,  -1.0026,
         -4.3363,   2.1248,  -0.9476,  -2.8122,  -0.7973,  -3.2544,  -3.2107,
          1.1944,   3.3872,  -3.2086,   1.9010,  -0.8824,  -1.2903],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.5422, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.8642, -1.8727, -0.7120, -5.5588,  1.7912,  1.3482, -2.1067,  0.9265,
         0.3383, -0.3074,  2.2394,  4.6147, -2.2997,  1.1237, -3.2720, -3.0122,
        -0.9862,  0.5513, -5.4978, -0.7782], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.5803, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.2397, -19.3311,  -8.5279,  -3.1644,  -9.0779,  -2.1317, -16.3859,
         -7.5679,  -3.8950,  -8.4740,  -4.2325,  -7.4432,  -1.9434,  -6.8471,
          3.4869, -10.9541,  -6.4330,  -4.6504,  -1.8178,  -6.7466],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2949, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7822,  1.1056, -3.1468, -1.0419, -0.1775,  4.5543, -2.4375,  1.3550,
        -2.7908, -1.7555,  0.2978,  3.7954, -2.5364, -0.3502, -3.5217, -1.3116,
         1.1259,  4.5603, -1.7413,  0.1167], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.4341, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.6649,   1.4444,  -3.9598,  -3.5122,  -1.3531,  -7.8548,  -3.7204,
        -13.3872,  -4.3614, -10.4061,   4.8416,  -3.2251,  -5.3899,   0.7146,
         -5.0878,  -2.9447,  -6.0850,   0.8263,  -0.5187,  -2.2935],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5969, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.4720, -4.2231, -0.5443,  2.4174, -4.0201, -0.7161, -1.6132, -0.3493,
         0.8406,  4.5708, -3.3589, -1.8346, -1.7576, -1.7651, -1.7597, -1.9051,
        -0.8204, -2.7975, -0.6688, -3.9351], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2356, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.7354, -8.1651,  0.3476, -4.4928, -0.7546, -9.8871,  4.3805, -2.0860,
         0.5894, -1.6663,  0.8453, -5.2639,  2.7937,  1.1913, -3.8738, -1.5414,
        -2.2392, -2.1696, -1.9199,  2.3812], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4898, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.8298,  -4.5628,   0.7001,  -4.1309,   3.2948,   3.1251,  -2.9458,
         -0.4130,  -0.6033,  -1.6573,  -1.7758,   1.7741,  -7.0879,  -3.3724,
        -52.1624,  -5.9255,  -6.8151,  -6.1282,  -4.2044,  -1.9662],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8343, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.9701, -10.4024,  -1.1285,  -4.6183,  -4.9710,  -4.7807,  -7.3731,
         -4.9035,  -4.2657,  -6.4682, -10.9152,  -4.2472,  -7.1918,  -4.2019,
        -12.1606,  -4.0078,  -5.9779,  -3.4237,  -3.2118,  -3.0969],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.7158, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6214, -3.3185,  1.0378,  3.2361, -0.9881, -1.4953, -0.5160, -7.6617,
         1.2041,  1.6903, -3.4558, -3.3081, -9.2812, -4.3574, -3.9334, -3.3056,
        -0.6766, -1.1071,  3.1165, -4.1942], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0468, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1250, -4.0402, -5.5312, -7.1519, -5.2496, -4.2430, -5.6569, -4.2601,
        -4.6056, -9.4551, -7.3718, -5.4534, -4.4794, -5.1031, -7.8731, -3.7081,
        -4.9685, -1.8169, -5.0669, -0.7508], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.1955, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.9440,   0.8521,   3.2219,  -2.1961,   0.5439,  -0.5151,   0.1965,
         -3.8741,   4.3525,  -1.7942,  -0.0950,  -3.6218,  -4.6326, -10.8552,
          2.1330, -10.9086,  -4.8256,  -4.9429,  -5.8888,  -3.4621],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5128, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5975,  0.1805,  1.9772, -2.0998, -0.0586, -0.9358, -0.8784,  3.0409,
         4.2136, -3.7032, -0.4833, -1.9916, -1.4005, -4.1899,  2.1828,  0.6562,
        -1.7889,  2.3041, -0.9816,  0.6350], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.3959, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.6644,  -4.3010,  -3.0098,  -5.0942,  -0.4631,   0.1122,   4.2897,
         -3.3430,  -0.4741,  -2.5928,  -5.1182,  -0.0572,   2.0750,  -2.2414,
         -1.0591,  -3.6698, -11.1284,  -2.3571,  -6.1288,  -1.6115],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6918, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.4217, -1.7902, -1.2925, -1.1622, -3.5221,  2.9978,  1.2815, -6.4174,
        -0.7550, -4.3863, -0.3510, -7.2292,  2.8523, -3.5535, -0.5244, -4.0603,
        -0.6358, -4.6003,  0.7738, -3.6138], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6783, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7480,  1.2142,  3.5209, -3.1590,  0.8675, -3.3818, -3.6722, -4.9948,
        -1.6411,  1.6423, -4.7544, -0.5319, -1.1567, -1.8753, -3.5458,  1.3519,
         3.6876, -3.4830, -0.6141, -3.7117], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3493, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9289, -0.6086,  0.2532, -2.8654, -1.4435, -8.1737, -6.2960, -3.0842,
        -3.1965, -0.2246, -3.6747,  5.2559, -5.7390,  1.4370, -1.3529, -0.3084,
         1.1006,  0.6178, -1.5595, -0.1511], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7471, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8684, -6.9926, -3.7249, -1.9559, -4.4442, -1.3890, -5.3262, -8.9267,
        -3.7657, -5.9421, -2.8596, -3.9567, -1.9391, -3.7565, -2.5194, -4.6284,
        -1.8884, -2.6743,  1.2125, -4.2396], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6793, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3973,  -4.8204,  -1.0555,   3.5438,  -2.2002,   0.3598,  -2.0306,
         -3.4181,   2.3850,   2.6097,  -2.4368,  -0.2212, -10.4616,  -4.2885,
         -7.3950,  -7.0005,  -6.1795,   3.1305,   3.6689,  -7.8689],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3538, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1317, -1.6562, -0.4835, -2.1085,  2.4932,  2.8997, -2.3061,  1.9440,
        -3.2722, -2.5585,  0.2761,  0.5662, -2.1089, -0.5405, -2.6054, -0.7571,
         2.8494,  3.7518, -2.2824, -0.9269], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.4979, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.6249, -4.2163,  3.4442,  2.7786, -1.9704,  0.1013, -1.2372,  0.3436,
        -1.0797,  5.3057, -2.0677,  0.5988, -2.7494, -6.6906,  2.2442,  1.7210,
        -6.3909, -2.4725, -3.0414, -2.3044], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9154, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.3202,  -0.1541,  -3.1074,   1.2465,   2.3200,  -4.0264,   0.0970,
         -1.2114,  -3.9369,   1.8697,   0.6305,  -2.8899,   0.2344,  -9.9213,
         -2.8816,  -5.0431,   0.9741,  -2.0859,   4.7418, -12.6296],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.0047, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6519,  0.4621, -5.4841, -5.7610, -3.5901, -4.2858,  0.0666,  0.5914,
         4.3478, -2.3348, -2.9362, -1.6127, -1.7370,  0.8575, -7.8551, -4.8508,
        -1.0754, -1.3662,  0.3189, -3.3807], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1639, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4703,  2.9503, -1.9261,  1.3122, -1.9696, -2.9139,  1.3149,  4.2174,
        -4.5232,  0.7553, -1.1352, -0.9467,  1.1543,  4.1679, -3.1156,  0.6793,
        -2.1162,  0.4164, -6.0902,  1.3498], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.4944, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5073e+00,  7.7579e-03, -5.7831e-01, -2.7552e+00,  1.8045e+00,
         1.4206e+00, -1.5829e+00, -2.3329e-01, -9.2127e+00, -3.4004e+00,
         2.1991e+00,  3.4951e+00, -8.8477e+00, -2.4106e+00, -1.8922e+01,
        -2.2494e+00, -4.0664e+00, -1.4037e+00,  3.1559e+00,  5.2905e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.0898, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.6140, -2.8730,  0.5736,  1.1243, -1.5771, -0.2883, -1.6505, -1.5111,
         1.4986, -0.4068, -5.5495, -0.2812, -4.4595, -1.3293, -1.1269,  3.8549,
        -3.9990,  0.4836, -0.9535, -2.4774], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0781, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6437, -1.7877, -4.2772, -2.4895, -3.6537, -1.4385,  1.5809, -2.4444,
        -3.5170, -2.1619, -0.5559, -3.4163,  2.8904,  1.9610, -2.6261,  0.1338,
        -1.6558, -1.4585,  1.5300,  0.2490], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2891, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.8760,  -1.0218,  -5.0273,   1.5487,   0.0642,  -3.9144,  -1.3403,
        -36.5998,  -2.9018,  -6.6085,  -0.5226,   0.4527,   4.9760,  -3.2366,
          0.4859,  -1.5145,   0.0922,  -3.9813,   3.7917,   0.2285],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7953, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9229, -4.4481,  1.9457,  4.8055, -1.8308, -1.9400, -1.9122, -2.0407,
        -7.6006,  1.8784, -9.8032, -4.0813,  0.1209, -2.0332, -0.3881, -8.7497,
         0.8710, -1.1812, -0.4174, -2.7749], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1751, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.3589,  4.2243, -0.2703,  1.7264, -1.8865,  0.1877, -2.3599,  4.7036,
        -2.9360, -4.6612, -2.3381, -6.0923, -2.3522,  1.1562,  3.3148, -3.1382,
         0.7573, -3.8191, -1.7551, -7.2665], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1223, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.0762,  -3.3017,   3.9574,   1.3074, -12.1882,  -2.5542,  -2.3648,
         -2.3609,  -3.4141,  -0.4164,   2.5426,  -4.7537,  -0.6831,  -3.5110,
         -1.5936,  -2.0706,   3.6267,   2.2170,  -1.6470,   0.2451],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.3520, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.1649,  0.1060, -1.6455,  0.8613, -1.4824, -2.5292,  0.2808, -1.3797,
        -2.3467, -1.1038, -6.4116, -6.9253, -4.3064, -3.9619, -1.8923, -0.7818,
         1.7248, -4.0623, -1.3940, -2.3249], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9205, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8132,  0.3142, -1.7262, -1.8190,  0.5615,  5.5812, -3.1232, -2.3886,
        -3.1303, -1.6492, -4.5453, -0.2224,  0.2589, -2.6236, -1.9484, -1.0489,
        -1.9698, -3.8268,  4.9896, -1.4477], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0789, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.9281,  -2.4181, -18.0788,  -0.8868, -24.2123,  -3.9477,  -2.1576,
        -36.3437,  -2.6629,  -7.1666,  -0.2679,   0.8151,   5.7577,  -2.8613,
         -0.4168,  -2.5642,  -5.7712,   1.2728,   2.4658,  -3.2230],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1798, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5233,  0.0564, -2.2729, -2.7075,  2.7267,  2.8242, -4.1775, -0.3542,
        -4.0166, -3.0727, -1.6696, -0.1400, -1.0112, -2.1525, -1.5974, -1.4969,
         2.1556,  2.7837, -5.4517, -0.7662], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1432, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3189, -3.7444, -1.5986, -3.5135,  3.9347, -2.4435, -0.5802, -3.1250,
        -2.4498, -5.4837,  0.0277,  0.1486, -5.3758, -0.6863, -2.1481, -1.5148,
         3.0704,  5.3470, -2.1501,  0.5005], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2552, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.0689, -0.7657, -1.2152, -2.4024, -0.7943, -4.2454, -5.1416,  3.5169,
        -3.4434, -0.2559, -3.3632,  0.8825, -2.9653,  1.3868,  3.7102, -5.8070,
        -1.8115, -3.5088, -0.6513, -2.3313], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3569, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.4042, -0.8441,  2.2883, -1.8175,  1.6177, -1.2908,  0.2435,  0.4827,
         3.6226, -2.7629,  0.9441, -2.4983, -0.2795, -4.4142,  4.0308, -1.0284,
        -3.6756, -1.8411, -3.8026,  0.0754], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0177, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.6068,   3.8074,  -6.9847, -13.4481, -13.9590, -43.7305,  -4.5672,
        -13.0194,  -5.4975,  -6.4715,  -1.5795,  -6.7168,   5.3848,  -1.8458,
         -0.4544,  -1.7807,   0.4589,  -4.7544, -42.9210,  -0.0575],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.9872, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8271, -0.8112,  3.6310, -2.2355, -1.0150, -3.0703, -1.7183, -3.7890,
         0.5853,  2.1082, -3.3521, -1.0376, -1.0091, -7.0717,  1.3163,  2.5926,
        -6.0062,  1.2044, -3.6136, -3.6646], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6392, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9304, -0.5527, -9.1603, -6.6240, -1.9386, -4.9232, -0.5077,  0.3794,
         3.2852, -3.5617, -0.2735, -2.2452, -3.8561,  1.8744,  2.3095, -3.7579,
         1.2699, -9.0122, -9.2223, -9.0480], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9248, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.1380,  -3.9097,  -1.2039,  -5.2668, -11.4888,  -3.4776,  -4.4283,
         -0.9913,  -3.9587,  -2.5044,  -7.0951,  -2.7022,  -3.5307,  -4.9937,
         -0.9269,  -0.1473,  -3.6031,  -0.3596,  -4.4472,  -5.8501],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4874, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  3.2527,  -2.9425,  -3.2268, -22.9562,  -3.4861,  -6.1020,  -1.9645,
         -4.1036,   0.3514,   4.6405,  -5.7928,  -0.3908,  -1.4306,  -1.1634,
         -3.4061,   3.5303,  -0.8576,  -0.7500, -23.7129,  -5.2361],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7874, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.6460, -2.5386, -0.0835, -1.1104, -2.8595,  1.7672,  4.8712, -1.6433,
        -0.1761, -1.5485,  0.5563,  0.8647,  0.2894, -4.9708,  1.7164, -2.2862,
        -3.5648, -6.4206,  0.6504,  0.4954], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7173, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8415,  1.4817, -0.4044, -1.2997,  1.3464,  3.0114, -2.0958,  0.4917,
        -9.6915, -2.2522, -5.4043, -0.1159, -1.5927,  5.4859, -5.3467, -2.0022,
        -3.9119, -1.0591, -3.4847, -0.2308], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3958, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1398, -0.1694, -1.3377, -3.4369,  0.2691,  3.6835, -1.7532,  0.3103,
        -3.1704, -2.4296,  0.4123,  3.6774, -3.8563,  0.9393, -2.1694, -2.8081,
        -5.2846,  0.3749,  1.2806, -3.0028], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1806, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.7616, -0.3868,  1.1382, -1.9151, -0.3110, -8.4571, -4.1298, -4.4036,
         0.2199, -0.4556,  4.7081, -7.4696, -0.4729, -2.0618, -4.8514,  1.1556,
         0.6169, -2.4878, -2.2938, -9.7230], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1171, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2339,  -0.8543,  -3.5263,  -6.4375,   2.0978,   2.3452,  -3.3650,
         -2.8680,  -0.1929,  -0.3972,   1.8395,   4.1166,  -2.2994,  -0.1602,
         -2.4585,  -1.1376, -15.5382,   4.5708,  -0.9873,  -0.7797],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.4633, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.0667, -3.7675,  0.0209, -0.7156, -1.3157, -6.3594,  3.1095,  1.3992,
        -2.8412, -1.1624, -2.7713,  1.0742, -1.9789,  4.7393, -1.8800,  0.0466,
        -1.6200, -0.1975, -5.1133,  3.3784], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7444, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5348,  2.3684,  4.3767, -2.6516, -0.7516, -1.9800, -1.0949, -4.9082,
         1.7082, -0.7196, -5.0866, -1.7074, -5.3766, -3.8572, -4.3346, -0.4538,
        -0.1947, -2.8796,  0.7199, -0.4202], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4389, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.5920, -1.9506, -0.5556, -2.8439, -1.1499, -4.1020, -1.6344,  2.7327,
        -1.9233,  0.8922, -2.4666, -4.8572,  1.2739,  3.1871, -3.5534,  1.1968,
        -1.6198,  0.3581, -0.6900,  4.9122], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6101, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  4.8771,  -4.1002,   0.6383,  -1.9484,  -1.7440,   0.5955,   3.6295,
         -1.5809,  -5.8012,  -2.9772,  -2.3455,  -4.1778, -19.5880,  -5.1365,
         -2.4180,  -4.5383,   0.3186,  -2.9972,  -7.4845,  -2.3757],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9577, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.9977,  -3.6364, -11.1194,  -4.9587,  -1.6859, -11.0139,  -3.7126,
         -6.3807,  -1.5162,  -3.5341,  -1.0623,  -3.4404,   1.3867,   2.2241,
         -3.8976,  -1.1168,  -0.8506,  -1.0062,  -4.5090,   4.1863],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0821, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.1735e+00,  2.2759e+00,  3.7838e+00, -3.3357e+00, -3.9468e-01,
         3.1220e-03, -4.7086e+00,  1.5396e+00,  2.0769e+00, -4.5714e+00,
        -2.7568e+00, -5.5804e+00, -7.3792e+00, -2.6741e+01, -4.0149e+00,
        -2.5719e+00, -4.7112e+00, -1.0836e+00, -5.3445e-01, -7.2863e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5582, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.7465,   2.0411,   5.4462,  -1.2124,  -0.2401,  -1.5799,  -0.6627,
         -0.9242,   4.1519,  -2.0782,  -1.2014, -14.4088,  -4.8231,  -6.8876,
         -0.2391,  -0.2394,   2.8378,  -5.5867,   1.0453,  -1.4039],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.5856, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2377,  1.6326,  1.0714, -2.5331,  0.3979, -6.4070, -0.4280, -7.9799,
         3.6879,  0.8229, -2.4178,  1.3682, -0.6878, -0.0504, -1.7762, -0.4373,
        -8.8232, -0.6549, -1.1769,  0.9675], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3831, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.8624e+00, -3.2244e+00, -3.8840e-01,  3.2927e+00, -3.6948e+00,
        -3.3152e+00, -1.0142e+01, -7.4192e+00, -3.8891e+00, -2.6955e+00,
        -1.4021e+00,  1.3351e+00, -3.7554e+00, -4.3703e+00, -2.9147e-05,
        -1.5094e+00, -3.5763e+00,  2.2846e+00,  3.1209e+00, -3.2853e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2748, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7392, -0.9442, -2.9794, -0.5656, -0.2468,  4.6068, -0.8401,  0.3549,
        -1.0287, -1.1694,  1.8979,  3.1948, -2.4456,  0.5277, -3.0186, -0.1228,
        -3.3452,  2.8592, -0.2292, -1.9607], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.5097, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.2722, -1.8552,  0.6493, -0.9646, -0.2884,  1.9724,  5.3130, -2.6994,
        -4.0167, -3.2254, -2.2755, -2.7197, -1.1228,  2.5088, -5.6309,  0.7535,
        -1.6992, -0.2296, -9.4320,  4.0359], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8827, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.1911,  3.4272, -2.7991,  1.3078, -1.2499, -1.1833,  2.8747,  2.3433,
        -3.2807,  0.5984, -2.6932,  0.1036, -3.9839,  2.0678,  2.2814, -1.2700,
        -1.2714, -0.5756, -2.2467,  1.9148], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.0722, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8564, -1.1742,  2.3994,  0.2692, -3.1737,  0.7030, -3.2548,  1.3655,
        -5.1744,  3.5331,  2.6102, -1.9209,  0.7646, -2.2016,  0.8538, -1.7941,
         5.9121, -2.7407,  1.2320, -2.6452], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.2647, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5394,  0.5157, -0.1093,  5.6211, -1.4425, -0.5607, -1.5322, -0.8431,
        -5.5386,  3.2517,  1.7996, -2.4769, -0.9473, -0.8480,  1.0658, -1.3290,
         6.2540, -4.1786, -0.0742, -1.2383], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.2075, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.0571,  -5.6769,  -0.7194,   1.3173,  -1.0315, -10.9307,  -0.1598,
         -2.2202,  -2.3876,   1.8512,   3.1222,  -2.4871,   0.7677,  -7.0080,
         -5.2366,  -5.5445, -26.5943,  -4.7655,  -1.5267,  -1.3533],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6820, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2295e+00, -2.7543e+00, -9.7481e+00, -6.0781e+00, -4.7038e+00,
        -7.1087e+00, -1.1313e+01, -5.1986e+00,  5.1521e-01, -6.2103e+00,
         1.6099e+00, -3.2555e+00, -1.7802e+00, -1.9679e+00,  5.4833e-03,
        -4.1228e+00,  2.9334e+00,  5.6695e-01, -2.6549e+00, -1.0401e-01],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1799, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.2971,  -2.6480,   2.8396,  -4.3478,   0.9523,  -1.7075,   1.4897,
         -4.3416,   1.2327,   4.1928,  -8.3841, -10.3001,  -4.6881,  -4.8015,
         -3.7913,   0.6428,   0.6968,  -5.2136,  -0.7994,  -4.6329],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1656, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8190, -3.3840,  2.1493,  3.9020, -4.1482,  1.2136, -1.9206, -0.8128,
        -0.0972,  6.0750, -1.4162, -0.0555, -2.5789, -4.7098,  0.8670, -1.0102,
        -5.1726, -2.5482, -1.0378, -1.7349], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9120, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4093,  -7.7246,   2.6958, -15.5783,  -2.4971,  -8.4652,  -9.7204,
         -5.9979,  -6.2285,  -5.4737,  -3.8670,  -1.3803,  -1.6668,   4.3596,
         -3.6333,  -1.2383,  -4.0219,   1.1181,  -4.9872,   2.7425],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6987, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.3225, -3.3228,  1.4423, -2.6322, -1.7106,  1.7385,  4.1627, -2.9505,
         0.2201, -2.5849, -2.5147, -5.6063,  1.4481,  0.6488, -2.3684, -2.1711,
        -2.8990, -0.6678,  1.8575,  4.9317], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.5328, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-12.7066, -12.2128,  -7.4825,  -6.9136,  -4.1567,  -2.3559,   2.2309,
          4.8019,  -2.1731,  -0.7083,  -2.9369,  -0.6544,   1.4133,   2.1571,
         -3.2466,   1.5588, -12.3328,  -2.8692,  -6.3025,  -1.8122],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3351, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.0514,  0.8892, -3.0106, -0.5816, -4.3543,  1.6921,  2.0949, -1.5280,
         0.7871, -2.9904, -1.2807, -8.3203,  2.4162,  3.3967, -4.3576, -0.4557,
        -3.1667, -0.7450, -4.3618,  1.1812], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2873, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.3068, -1.2150,  1.4915, -0.8629, -2.3271,  2.3867,  3.6637, -2.5223,
        -0.4647, -0.9870, -3.7809,  2.9446,  2.4654, -4.3478,  0.2949, -1.2601,
        -0.5295,  2.3572,  4.8902, -2.0926], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(0.1706, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1999, -0.4576, -2.3979, -0.6357, -4.2766,  1.4265,  1.4926, -3.0853,
         1.4046, -2.1353, -4.0902,  0.4494, -1.9616, -3.2241,  0.8122, -1.2267,
        -0.7448, -4.5697, -2.6107, -0.8032], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3917, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.9911,  -2.6974,   1.9194,   2.0618,  -5.7763,  -4.4901,  -6.9792,
         -8.4116,  -6.6046, -14.6786,  -7.2925,  -1.2379, -15.8841,   5.9117,
        -18.0548,  -2.7492,  -1.9607,  -1.4814,  -2.8716,   3.0207],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5124, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2422, -3.4574,  3.1628,  1.5320, -2.0131,  0.9433, -1.0583, -1.0494,
         0.8475,  4.9905, -2.0668, -0.1481, -2.6146,  0.6663, -4.9698,  1.5062,
         2.7818, -3.6220,  1.8167, -3.5549], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.3775, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.4757, -2.3251, -0.9972, -5.5518,  1.5946,  0.1105, -1.4410,  1.6741,
        -3.3746, -0.3946, -3.2374,  2.4912,  4.0947, -4.0226, -3.2840, -3.9024,
        -0.4400, -2.1809,  1.8136,  3.4234], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8213, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.1468,   2.3098,  -3.7310,  -6.8061, -13.9599,  -5.3998,  -5.3891,
        -15.0817,  -5.5543,  -2.2099,  -0.5364,   5.1034,  -3.4241,  -0.8649,
         -1.5207,  -5.5006,   1.9934,   2.8991,  -3.3596,  -2.6051],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0745, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1088,  -4.6561,  -0.9238,   1.6527,  -1.4153,   1.4692,  -2.3060,
         -2.5092,  -3.3502,   5.2060, -12.5275,   0.2161,  -1.7149,  -1.7751,
         -1.6623,   5.2768,  -2.0697,   0.0540,  -3.5888,  -1.6965],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.4215, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9801, -0.0337, -2.5724, -0.8757, -4.3475,  2.6141, -3.4589, -2.1728,
        -1.9520, -2.9611,  3.4401,  1.0224, -1.8766, -4.5544, -2.5643, -1.4226,
        -1.9035,  0.2406,  2.2615, -1.4025], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3250, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2015,   2.2789,  -1.2223, -12.2199,  -9.9771, -11.2007,  -1.6786,
         -5.8404,  -3.1225,   1.9322, -15.2733,  -6.6577,  -2.7899,  -3.3365,
         -2.2447,  -4.9667,   3.6408,  -2.2368,   1.0923,  -2.6397],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9832, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.6103,   1.1411, -11.9437,  -7.1351,  -3.7834,  -3.3184,  -0.0907,
          1.0446,   2.0307,  -3.4222,   1.1702,  -1.4924,  -3.0585,  -7.7220,
         -0.6715,  -9.0365,  -1.4511,  -2.9579,  -1.0720,  -5.9246],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0152, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.2481,  3.5412, -2.5565,  0.5741, -2.0164, -0.5773, -6.7766, -0.2740,
        -0.0082, -7.1557, -1.9571,  0.0548, -3.0454,  1.7121,  3.9260, -1.3736,
         0.8583, -1.1243,  0.0957, -3.9698], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9412, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2175,  3.1199,  1.5774, -5.0089, -0.4710, -1.2742, -4.1429,  2.1833,
         1.8604, -3.0118, -1.7509, -2.5827, -1.0629, -5.6186, -0.4098,  2.0224,
        -6.6288, -1.0473, -3.3791, -0.7061], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5775, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.6748,   2.7820,   0.8401,  -6.3448,  -0.8852, -10.8185,  -8.1680,
          3.6960,  -5.8790,  -2.8613,  -5.4161, -22.9342,  -2.3933,  -5.9416,
         -0.3276,   1.7576,   4.9617,  -5.7445,  -1.0535,  -3.2386],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5822, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.0387, -1.6331,  0.0849, -4.0137,  2.1173,  3.0983, -1.9353, -0.7062,
        -2.5376, -1.9474, -6.6194,  1.2639,  1.1224, -1.2835,  1.0953, -2.0715,
         1.2472, -7.8657,  3.4887,  0.2003], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7928, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.4385, -15.7721,  -4.4476,  -7.0668,  -2.1550,  -6.3517,   2.5272,
         -0.8484,  -3.5835,   0.0963,  -1.6553,  -0.2000,  -4.3988,   3.3052,
          2.7674,  -3.1434,  -0.7913,  -1.2001,  -0.9639,   2.6870],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2317, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.5737,  -4.8901,   0.1646,   2.2344,   1.0790,  -2.6403,  -1.1214,
         -3.3832,  -2.1664,  -0.0533,   2.6230,  -7.3086,  -2.0344, -12.5169,
         -2.4032,  -4.3333,  -1.2510,  -5.1794,   3.3306,  -3.9055],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3664, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4274,  0.7727,  0.5773, -1.2620, -0.9695,  0.5221, -1.8849,  1.6933,
         3.0141, -1.5621,  1.5724, -0.9999, -1.1460, -3.4667,  1.2507,  1.8077,
        -2.1549, -0.0200, -1.2920, -1.6512], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.4813, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2055,  1.8040,  3.4707, -2.9430,  1.2431, -0.9018, -6.7110, -3.6044,
        -4.3500,  3.0290, -0.7600,  0.3101, -2.4064,  1.0319, -4.2902,  1.9389,
         3.6504, -0.8068,  1.8012, -0.2741], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.5487, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  4.9376,  -2.9210,  -1.7149,  -2.1707,  -0.9990,  -6.4060,   1.9723,
          1.9100,  -1.3540,  -1.5312,  -6.6162, -20.4172,  -6.0617, -12.1124,
         -1.6072, -11.0832,  -2.5981, -14.7367,  -3.4567,  -4.1564],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5561, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  3.0693,  -2.3906,  -0.5778,  -3.2004,  -2.2418,  -0.0697,   3.5933,
         -3.9437,  -1.8809,  -4.0259, -36.7760,  -4.5021, -35.9360,  -4.1347,
         -5.4043,   0.8353,  -3.4478,   1.8970,  -7.0462,  -0.9326],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3558, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.4799,  -3.4424,  -2.2666,  -0.7422,   1.9665,  -6.6847,  -3.5883,
         -2.7541,  -3.0761,  -9.2105,   2.3131,   2.2892,  -2.0022,  -7.6615,
        -23.7944,  -4.7816,  -7.9270,  -0.2382,  -1.0229, -14.5133],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8309, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-29.7042,  -5.5774,  -2.5322,   1.5945,   4.6315,  -8.7620,  -1.1179,
         -1.6169,  -2.4493,  -2.3542,   2.3284,   3.8247,  -2.3545,   0.8311,
         -1.1158,  -0.1273,   2.6177,   2.2347,  -1.3910,  -0.7657],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.0903, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.1106, -13.6751,  -0.4902,  -8.1712,   4.8627,  -6.2436,  -1.3725,
         -2.8547,  -2.0383,  -3.5380,  -0.6965,  -3.7018,  -1.3166,  -3.5494,
        -13.3938, -14.1916,  -5.9304,  -0.6604,  -0.9674,  -0.5569],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1298, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.3611,  -3.6832,   4.2237,  -2.4908,  -2.3405,  -0.7285,  -2.9578,
          2.0741,   3.3662,  -1.5107,   1.2767, -12.4173,  -5.7249,  -6.0654,
        -22.6712,  -3.6597,  -2.5771,   1.1920,   0.2312,  -4.0362],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9069, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8683, -2.1154, -1.6461, -2.0443, -4.4076,  1.9962,  2.6884, -2.2134,
        -0.6205, -4.1775,  0.0488, -4.0166,  0.6881,  2.5647, -2.8733,  0.2957,
        -1.8845, -1.1089, -0.0233,  3.5448], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1087, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8921, -0.9648, -0.4260,  2.3853, -3.3220,  0.2482, -2.6313, -0.0863,
        -3.9425,  6.3478, -4.0865,  0.9547, -4.2212, -1.1823, -8.6276,  4.1494,
        -2.4341,  1.0059, -2.3478, -2.0183], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1546, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.7089, -12.3153,  -4.1353,   0.3770,  -0.7504,   2.8435,  -3.9189,
         -0.0562,  -2.2509,  -0.8518,   1.9877,   2.3929,  -2.6492,  -0.8800,
         -1.2702,  -2.9615,  -4.7213,  -2.2785,   3.6281,  -1.7096],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.8114, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.5321,  -1.1869,  -5.0399,   1.1236,   1.8134,  -3.5999,  -1.9198,
        -11.3124,  -2.5507,  -6.7066,  -1.5467,   0.7521,   4.8907, -16.3270,
         -1.1521,  -2.6755, -15.3835,  -6.4328, -21.2174,  -1.7316],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.5367, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.2208,  -2.3260,  -2.6472,  -0.3457,   4.6530,  -1.6229,  -1.6062,
         -3.6106,  -1.7866,  -2.3944,   3.0120,  -4.7438,  -2.4302,  -2.4039,
        -15.3955,  -3.5516, -20.9512,  -4.8582,  -3.3688,  -2.6407],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4620, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4905,  1.8121, -1.1303, -3.2483, -2.8928, -0.1825,  2.3699,  5.3134,
        -1.8587, -8.0897, -1.5526, -4.1965, -5.2895, -1.3445, -1.4291, -2.3625,
        -0.7376, -2.2055, -0.2428,  2.9964], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3381, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.4322, -5.4014, -2.1513, -3.1418, -0.2568,  2.3521,  6.0587, -4.0928,
        -0.3218, -2.2552, -1.9999,  2.2904,  5.1567, -1.0101, -1.8651, -4.0723,
        -0.2773, -3.9966, -1.4535,  3.1273], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.5939, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 4.2063, -2.8068,  1.3828, -1.4112, -2.7085,  1.7798,  3.6186, -2.6490,
         2.2873, -1.1472, -0.6010, -6.2774,  3.1764,  1.2090, -2.7897,  0.5777,
        -0.8026, -2.8338,  1.2076,  0.7370], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.1922, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5830,  2.1018, -0.2183, -2.1769,  2.3818, -2.0142, -2.7015, -2.0855,
         2.1113, -1.4533, -1.2318, -0.6879, -2.2955,  1.6851,  3.0014, -3.2715,
        -0.6290, -2.1151, -1.5040, -4.4692], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9078, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.0986,  -3.2138,  -4.7720, -30.5251,  -7.9974, -26.5118,  -6.2444,
         -9.5537,  -8.8457,  -9.1912,  -5.5874,  -6.2624,  -4.3760,  -2.3217,
         -1.1550,   3.7051,  -2.4210,  -0.5832,  -2.7705,  -1.9870],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.6356, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4755, -5.3609, -4.6892,  1.3504,  0.3263, -1.1708, -0.9185, -1.1744,
        -0.0281,  0.3341,  6.4971, -1.5906, -2.2709, -9.4221, -5.0351, -3.3306,
        -3.1447,  0.1170,  1.5326, -3.1741], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6814, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.2634,  -2.7968,  -0.3434,  -2.9086,   4.5700,  -5.8151,  -6.3333,
         -0.3630,  -2.7850,  -1.0992,  -7.8064,   6.7141, -13.1602,  -3.8105,
         -3.9016,  -4.4110,   1.5379,   4.2448,  -5.4519,  -0.9559],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1806, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.9822, -1.7268, -0.8844, -4.9099,  1.2211,  0.8685, -2.7145,  1.6587,
        -0.7123, -0.5791,  2.4004,  1.2048, -2.7439, -0.0437, -0.7919, -1.6922,
         0.2098,  5.1410, -2.5621, -0.4580], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6048, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.3239, -2.6280,  0.6721, -1.2197,  0.8600,  1.2139,  2.6707, -2.5914,
        -0.4233, -0.5335, -1.4598, -0.7608,  4.7271, -1.0793,  1.4472, -5.0335,
        -3.9607, -4.6478, -1.0009, -0.8838], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.5654, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.8637, -0.5092, -3.3348,  2.6253,  1.1454, -3.7562,  0.6818, -2.6145,
         0.5141, -2.9610, -1.9019,  1.9533, -2.4856,  1.5867, -3.5498, -2.8507,
        -0.3380,  3.1394, -3.0458, -0.8799], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7859, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6036, -0.6179, -5.1879,  2.9689,  3.2565, -6.4162, -0.1301, -2.9815,
        -1.6486, -3.0458,  2.5300,  3.0890, -2.6208,  2.0538, -2.9251, -1.1282,
        -3.3746,  4.6109, -1.0744, -0.2276], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8237, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.6205,  -0.6752,  -3.7463,  -4.1992,  -6.0439,  -1.3921,   2.4590,
        -13.6284, -10.6032,  -6.9688,  -5.5619,  -3.8471,  -6.5061,  -3.3090,
         -2.6946,   0.7817,  -2.3854,  -3.4934,   1.9818,  -0.8131],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0133, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.1591, -6.5369, -2.0113, -3.6879, -5.3781, -1.6742,  1.8186, -4.0552,
        -2.3602, -1.7077, -7.3424,  1.4033,  3.6330, -2.3942,  0.7075, -1.7722,
        -2.3175,  1.4321,  3.6682, -1.3757], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5055, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.4336,   1.4683,  -7.9504,  -3.1384,  -0.1955,  -3.3787,   0.1822,
          2.2200,  -5.6387,  -4.9011,  -4.6707,  -2.1630,  -4.4008, -10.0923,
         -5.6608,  -0.5843, -48.9078,   2.6994,  -2.5897,  -4.8644],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0567, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3362,   3.7895,  -1.7352,   0.0684,  -3.7967,  -2.0871,  -0.4652,
          1.1186,  -1.2604,   0.1464,  -9.6309,  -3.8631,  -6.8281,  -1.3298,
        -10.6068,   0.1873,   0.2928,  -4.0919,  -0.2148,  -3.1180],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2881, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.9504, -2.9737, -0.4524, -3.7773, -1.4871,  0.7806,  3.6158, -3.1676,
        -0.5814, -2.7041, -3.9570,  1.6642,  0.5756, -2.0688, -0.2906, -2.3485,
        -0.3517, -4.9123,  0.8470, -2.1380], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9889, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4293, -2.7943, -2.8160, -4.4539, -4.5451, -4.3881, -6.1123, -3.7631,
        -1.3205, -3.4480, -4.5238, -1.6556, -5.4583, -3.8864, -2.4984, -2.7194,
        -3.4251, -5.2655, -3.0771, -2.3599], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.6470, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  3.9478,  -3.2001,   0.2465,  -3.5905,  -1.9421,   1.2281,   1.3807,
         -4.0865,  -4.2653, -43.6282,  -7.3301,  -5.2717,  -5.0088,  -0.3750,
         -0.8489,  -1.3417, -12.3155,  -2.4409,  -2.2264,  -7.9662],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9517, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  4.6634,  -5.3328,   0.5139,  -3.0782,  -5.8885,   2.0570,   4.1902,
         -3.4628,  -0.5055, -19.3408,  -3.6387, -32.6741,  -8.1662,  -0.4220,
          1.3908,   4.2068,  -6.7301,  -1.5307,  -3.7058,  -0.4879],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8971, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7966e+00, -7.3347e+00, -5.2654e+00, -5.7414e+00, -6.7475e+00,
        -3.4179e+00, -4.2152e+00, -3.2993e+00, -6.9914e+00, -2.0193e+00,
        -5.5789e+00, -5.3601e+00, -3.7004e+00, -1.3817e+00, -4.8124e+00,
        -2.6841e+00, -4.5727e+00, -3.9586e+00, -4.1663e+00, -3.3886e-03],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3024, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.5692, -3.4167,  2.3648,  2.9089, -3.1382, -0.8017, -5.3133, -9.5557,
        -4.5278, -3.4888, -4.1630, -4.1897, -4.2887,  1.1426, -6.2941,  2.2537,
         2.1119, -3.2021, -0.2710, -4.3351], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3387, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2874,  0.7691, -4.5134, -4.6953, -3.9813, -0.9707,  1.3779, -5.5486,
        -1.6227, -1.3338, -7.8433,  0.8893,  0.5231, -1.9870, -2.1981, -4.1787,
        -3.0185, -6.4566, -1.1328, -2.5633], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5386, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.0909,   0.0727,   3.0161,  -2.3675,  -0.1648,  -1.6820,  -3.1843,
          1.7311,   3.2347,  -6.5202,  -1.2070, -43.4918,  -3.3518, -23.3883,
         -3.4883,  -2.9603,   2.8225,   0.6106,  -4.0235,  -1.1304],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3782, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3846, -0.6344, -8.9002, -2.0274, -1.4878, -2.8613, -3.9550,  2.7397,
         1.6803, -2.7089,  1.0599, -3.2925,  0.2755, -4.7290,  2.7441,  2.3152,
        -3.4759, -0.4504, -2.7092,  0.7350], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4034, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -9.6143,  -1.9119,  -5.5017, -11.0981, -12.2010,  -4.3867,  -7.9420,
         -5.7542,  -9.5864,  -7.2970,  -7.4252,  -4.2566,   0.1214,  -1.5280,
         -0.8235,  -4.5415,  -0.9153,  -1.7313,  -2.3737,   1.0337],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8867, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1107, -0.3207, -7.9262,  0.6899,  0.3938, -2.4456,  1.7273, -0.8280,
        -1.8871,  2.9550,  3.0575, -1.8732, -0.1562, -4.0812, -2.3305, -4.8656,
         0.5566,  0.7574, -1.9249, -1.1992], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1906, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  3.8154,  -6.2548,  -5.3132, -10.6494, -12.8278,  -2.2359,  -5.9305,
         -0.2355,   2.0487,   1.8716,  -5.0622,   0.8416,  -0.7827,  -5.5042,
          1.4527,   5.1781,  -3.5509,   0.1335,  -1.5781,  -4.3980],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.4491, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.2739, -0.9176, -1.2095,  2.2701, -0.0283, -3.1000,  0.5431, -1.4961,
        -2.3661, -4.6833, -4.3172,  1.4668, -2.0308,  0.1330, -0.2344, -0.7088,
         2.4411,  5.1939, -1.4926, -0.5365], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.4900, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.2406,   0.1438,  -2.1712,  -0.6218,  -3.6710,   0.0258,  -1.6033,
         -4.7000,  -2.4814, -15.6036,  -2.3132, -16.5312,  -0.0724,  -1.7547,
          5.0127,  -6.8271,  -0.5324,  -3.9894,  -0.9109,  -2.9532],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3398, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3627,  0.8912, -1.6521,  0.9912, -3.4009, -1.0872, -0.4231,  2.8917,
        -3.9676,  1.9725, -1.9543, -0.0997, -7.2694,  1.5782,  0.7527, -1.9212,
         0.6794, -2.3132, -3.8651,  1.4931], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9533, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.4346,  -1.1206,   4.0871,  -4.9529,  -2.1567,  -3.5511,  -6.8608,
         -4.4056,  -2.2944,  -3.5017,  -0.3303, -12.9313,  -3.1430,  -9.6055,
         -0.1291,  -0.4096,   4.7142,  -7.3400,  -2.0987,  -3.7316],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0598, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.7152, -0.0563, -1.1246,  2.4113,  4.2264, -5.3361, -0.4106, -1.0507,
        -1.0195,  3.2366,  0.1651, -1.7199,  1.2833, -1.8116,  1.3500, -7.0729,
         1.2775, -0.2521, -4.2358,  0.9950], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.4215, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7658, -1.1858, -2.8675,  3.0679,  4.8290, -3.0343,  0.4704, -0.8901,
         0.4705,  2.0905,  5.2084, -2.3923, -0.1963, -4.0109,  0.9616, -6.1310,
         3.6458,  2.8675, -2.1201,  1.9518], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(0.0485, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5726,  -2.3950,  -0.5820,   0.6287,   0.7825,  -4.1234,  -2.3602,
         -1.3003,  -0.7749,   0.4206,   2.7499, -11.8413,  -3.9185, -14.4707,
         -2.6342,  -5.5803,  -1.7851,   2.4559,   4.6421,  -3.5792],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3119, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.6788,  -1.8200,  -7.3515,  -1.0946,   0.2914,   6.4784, -13.4752,
         -1.6371,  -2.6890,  -0.6698,  -3.4876,   2.3685,  -1.0353,  -2.2322,
          0.0484,  -5.5382,  -2.2429,   1.3713,   3.3360,  -1.0827],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1070, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.6245, -2.8668,  1.7617, -0.3355, -0.6243, -4.3256,  2.3639, -3.3779,
        -8.2533, -4.1880, -7.3469, -0.9032, -1.3229,  2.0673, -4.2819, -1.5499,
        -3.3939, -7.2331, -0.8331,  0.2458], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0386, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.3557,  -0.0259, -12.6681,  -2.4548, -17.9757,  -6.5769,  -4.3264,
          2.5100,   1.6275,  -3.8626,  -0.4109,  -5.0413,  -0.4230,  -1.7702,
        -17.1615,  -2.1061,   0.2923,  -7.0667,  -1.8079,  -2.1271],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1366, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.1666,  -3.8021,  -1.9530,  -5.1322,   1.2946,   2.9779,  -2.2929,
         -3.9760,  -3.1592,  -2.8805,   0.9574,   1.4607,  -4.7412,  -1.3846,
        -17.7690,  -3.7898,  -8.7922,  -0.2324,  -5.6759,   2.7288],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8664, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.7567, -4.0302,  0.9148,  2.5483, -3.2824, -3.3850, -8.9576, -7.4868,
        -5.0328, -7.1758, -6.8726, -3.8790,  0.1533, -0.7593, -5.7707, -3.1569,
         1.1431, -1.5132, -1.4547,  1.4534], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8650, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.3307,  -3.2550, -42.0141,  -7.6974,  -0.0706,  -2.9767,  -4.0820,
         -1.9910,  -2.4653,   1.1266,   2.5737,  -3.3349,   0.5696,  -1.0810,
         -0.3639,   2.6342,   3.1714,  -1.6846,   1.9743,  -2.5561],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0927, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2482, -6.0030, -4.0482,  1.0869, -1.3658,  1.4352, -0.2635, -1.2277,
         3.4706,  3.7281, -3.5069, -0.2513, -3.4444, -0.2817, -8.7503, -0.9287,
        -1.5737, -9.1192, -3.4778, -1.2235], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9997, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  5.3624,  -3.0843,  -0.1510,  -4.6995,  -5.4204,  -4.7265, -10.5208,
          0.0363, -24.0503,   4.7256,  -7.4979,  -2.1706,  -2.0726,  -2.3104,
         -9.2362,  -5.7834,   0.6582,  -2.1605,  -1.9288,  -2.5546],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.8793, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.3295,  -0.8100,  -1.8602,   1.2232,  -0.9096,  -3.6179,   0.8647,
         -2.8612,  -4.7343,  -0.2725,   3.2700, -13.2869,  -2.0512,  -1.1816,
         -0.3471, -10.8357,  -3.5825,  -1.9361,  -5.6213,  -2.0541],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5137, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9437,  1.0995,  1.7379, -4.5019, -1.8977, -4.0473,  0.5730, -6.7994,
         1.9970,  2.7938, -4.3152, -0.5937, -2.0019, -2.0837, -4.2763,  3.5228,
         2.8159, -2.0333,  0.6618, -0.5868], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0440, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.7778,  -8.2765,  -6.7706,  -5.2723,  -4.6422,  -1.5074,  -5.4370,
          1.4134,   4.3205,  -2.3183,   0.5245,  -2.4823,  -2.5305,  -1.3293,
          4.3151,  -1.5388,   0.1299,  -1.0915,   1.2696,  -3.3222],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2662, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.7324,  4.2841, -3.5414, -0.8350, -1.9146, -2.6263,  2.6550,  3.4453,
        -3.0666,  0.7908, -4.3154, -3.7878, -6.8733, -6.5796, -0.9893, -0.1082,
         0.5208, -8.4283, -2.5769, -2.8004], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7507, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5422,  -1.8441,  -3.6616,  -3.2323, -12.3737,  -4.2688,  -1.7448,
         -6.8544,  -6.0809,  -2.1645,  -2.6137,  -2.4126,   0.6071,   4.2842,
         -3.2248,  -0.9181,  -3.1339,  -0.2269,  -4.3177,   2.0573],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7333, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.5600, -3.7488, -4.1487, -1.7213, -3.4720, -0.5485, -5.2276, -0.4212,
        -1.1403, -0.8289, -2.2279,  0.9626, -3.7324, -0.7943, -6.3231, -7.5743,
        -3.3753, -5.6900, -0.8596, -1.2411], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9336, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6099, -7.9698, -3.7482, -6.5406, -1.7918, -4.6404,  3.5728,  3.9667,
        -3.4928, -0.7127, -3.7878, -2.7432, -0.8884,  3.0356, -3.0741, -0.1782,
        -5.1003, -3.2619, -4.0683, -5.0319], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4033, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.5736,  -5.7066,  -3.4380, -12.8533,  -5.9953,  -6.6029,  -9.2662,
         -3.7475,  -0.1772,  -2.5074,   0.0835,  -3.1829,  -0.3679,  -3.0842,
        -13.9485,  -6.1413,  -0.5631,  -2.5592,  -1.3833, -16.0506],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7959, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4544, -0.0681,  3.8082, -6.1202, -1.6801, -2.8645,  0.1469, -6.5429,
        -1.2139,  2.6129, -2.3338, -0.7692, -2.0022, -2.0688,  2.0103,  3.7855,
        -9.9386,  0.3547, -4.1365, -3.1311], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6803, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.7139, -0.6037,  4.6303, -3.0015,  0.2122, -1.5410, -2.5656,  0.5792,
         4.8486, -2.7194,  0.1485, -4.4388, -2.0173, -1.9139,  1.4977,  2.5960,
        -1.5507,  1.0303, -2.0224, -0.5582], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.4052, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.9675, -2.5468, -3.4024,  1.2980,  3.7926, -2.1510,  0.0742, -2.5129,
        -0.5332, -2.2295,  4.6981, -1.2292,  0.6604, -2.0606, -2.4630, -1.1766,
         3.7622, -1.4731, -1.8110, -1.1207], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.4229, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.2721,  -0.2091,  -4.6555,   2.4317,   3.0822, -12.5956,  -2.9269,
        -11.0713,  -5.8486,  -7.9022,  -4.6133,  -3.0836,   0.0161,  -2.1362,
          3.2643, -10.7478,  -1.8562,  -2.8860,  -3.8224,   1.0258],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2403, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7608, -2.1900, -1.2280, -0.4458, -1.8715, -0.5077, -2.7045, -3.7148,
        -4.6152, -6.5404, -3.5993, -1.9456, -6.8469, -4.4546, -1.4878, -5.6912,
        -1.6743,  2.9745, -6.0785, -2.8251], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9604, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.5112,  -2.1203,  -4.0824,   0.7280,   3.1677,  -9.6791,  -3.1420,
        -17.5061,  -4.9650,  -2.9216,  -4.7625,  -1.5869,  -2.9525,   2.0550,
          3.8916,  -3.0931,   1.7479,  -2.5807,  -0.7450,  -2.6290],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6344, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.3563, -4.3348,  1.9468,  3.6197, -3.4426, -4.9193, -4.3802, -0.4093,
        -3.5192,  0.1809,  2.0065, -1.1154,  1.0047, -1.5304, -6.5821,  2.5556,
         2.3241, -1.8807, -1.4172, -9.2035], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3870, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5356, -0.1179, -0.2769,  0.2412,  0.8041,  5.3424, -3.7222, -3.1823,
        -5.7184, -3.9795,  0.2744,  2.1108, -4.6819, -3.8548, -9.1628, -7.0448,
        -4.6754,  0.3371,  1.4764, -4.8420], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1104, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.8972,   5.0646,  -5.1116,   0.5653,  -1.5900,  -0.1335,  -3.1225,
          1.1126,   3.3175,  -1.9613,   0.0578,  -2.5951,  -2.3228,  -3.7940,
         -2.1197,   1.5913, -11.2738,  -0.2247,  -2.5103,  -1.3372],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.1745, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.2719,  0.9214,  5.3412, -2.8424, -2.5571, -3.5476,  0.6233, -4.1701,
         1.3484, -4.6890,  0.8950, -1.7977, -6.8395, -4.8020, -1.0273,  3.3066,
        -3.6513,  1.6843, -2.7717,  0.5008], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2173, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.3547, -17.1720,  -8.8628,  -8.9142, -10.2576,  -6.0024,  -5.0269,
          0.0575,  -3.6969,   1.3118,   2.1143,  -2.5101,   1.1651,  -1.3691,
         -3.4372,   1.5255,  -0.2700,  -3.3039,  -0.6037,  -2.3702],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4489, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.3255, -1.6126, -3.8250,  1.3594, -1.9070, -5.8395, -5.3425, -1.4136,
        -0.6752, -7.6789, -0.5279,  1.1433, -2.5078, -1.2059, -0.3839, -1.5080,
         1.6441,  2.4345, -6.5265, -2.9321], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7990, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  3.9984,  -2.8527,   0.5889,  -2.7311,  -0.7970,  -4.3620,   4.5776,
         -2.2848,  -1.3970,  -0.7642,  -5.4096,   2.5687,  -4.7484,  -1.5675,
         -4.8463, -12.7650,  -5.4163,  -4.8966, -12.7540,  -6.0380],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0948, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8316e+01,  8.8388e-01,  2.4737e+00, -3.9262e+00,  3.7914e-01,
        -4.1064e+00, -1.9826e-02, -2.9130e+00,  5.2834e+00, -5.4411e+00,
        -1.4017e+00, -2.2526e+00, -7.4010e+00,  2.1138e+00, -4.3555e+00,
        -9.2234e+00, -2.9211e+00, -1.3967e+01, -2.5100e+01, -1.7826e+01],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.4019, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.5586,  2.4807, -1.1611,  0.8183, -1.8670, -3.0759,  2.8332,  4.7828,
        -2.3989, -1.0922, -2.5332, -4.6663,  1.0801, -6.5269, -2.5570,  0.8074,
        -0.6529, -1.6110,  3.4207,  4.1793], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.2591, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.4399,   0.4825,  -1.8105,  -1.2121,  -0.4195,  -4.3638,   0.5559,
          1.5057,  -2.7882,  -1.7109, -10.8951,  -6.1327,  -3.9232,  -4.7544,
          0.5918,  -0.3971,   4.7746,  -3.0643,   1.9500,  -3.0035],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.8027, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.4707, -2.2605, -3.5870,  1.2472,  4.2104, -1.4534,  1.8146, -1.1169,
         0.7056,  1.8604, -1.8577, -4.2114,  0.0055, -2.6573, -0.2477, -3.1677,
         0.1527,  3.2658, -1.9237,  0.5409], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.3605, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.1290, -1.9250,  0.5205, -4.1316,  2.2421,  3.7883, -5.2845, -0.1774,
        -1.8137, -2.5768, -0.6517,  3.4259, -2.0091,  0.2201, -5.9337, -4.9478,
         0.0358,  2.5205, -4.1201, -2.7487], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1848, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3247, -0.3362,  4.2252, -4.3215, -0.5634, -3.6099, -2.2633, -2.2407,
         2.7346, -3.7748,  1.0196, -2.0514, -1.2839,  0.5046,  4.3429, -1.5765,
        -0.8156, -2.2628, -4.1257,  0.5968], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0063, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.0962,  2.1014, -2.7410, -0.7771, -3.4581, -0.7062, -4.6161,  0.2027,
         1.0438, -4.2220, -0.5536, -2.7569, -3.5577,  1.2676,  2.6385, -2.4955,
        -0.3562, -4.7783, -1.9942, -1.9144], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3289, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.2847, -0.3084, -1.3875,  0.0289,  2.6346, -1.5748,  0.1486, -1.7901,
        -1.9248,  2.7621,  3.4965, -2.0892,  0.2334, -2.7334,  0.3319, -4.4764,
         4.3219, -2.4463, -0.4763, -2.3986], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.3182, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.1476,   2.4792,   2.2241,  -3.8640,  -3.7234, -25.2083,  -4.9953,
         -9.2734,  -4.5731,  -6.9535,  -0.3702, -12.4444,   4.1749, -15.6631,
         -2.4948,  -1.9361,  -0.5918,  -4.3786,  -0.9559,   0.5239],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7086, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4100,  1.8821,  4.6868, -1.6673,  1.1831, -1.2120, -1.1713,  2.7122,
         4.6623, -2.5602, -0.5686, -3.9685,  0.1371, -7.1445,  2.8217,  3.1607,
        -4.3432,  0.7938, -3.9108, -0.5117], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.3714, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.5267,   4.2945,  -6.8284,  -6.8687, -48.1720,  -4.0515, -19.5012,
         -7.2057,  -8.4788,  -6.2112,  -0.3997,   2.1504, -10.3353,  -6.5355,
          0.5582,  -1.3190,  -4.9508,   2.9020,   2.7628,  -1.6422],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9153, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5974,  2.0026,  3.3380, -2.3556, -0.7448, -3.1825, -0.6276, -2.7988,
         3.6084,  2.3239, -1.9917,  1.6571, -0.9887, -0.9341,  0.8587,  5.1021,
        -2.6178, -0.1555, -0.9551, -3.1771], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.2118, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.2477,  -5.5567,  -4.8812,  -5.5041,  -4.4095,  -5.1981,  -4.2223,
         -8.4571,  -3.5318,  -3.8596,  -4.1065,  -5.6254,  -6.1338, -12.8873,
         -4.3396,  -6.1962,  -7.7548,  -7.0110,  -5.7654,  -5.0152],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.8352, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3437, -2.7212, -3.3166, -4.4758, -2.6337, -1.2287, -0.4297, -1.3467,
         4.1000, -2.9158,  1.3241, -2.8999, -5.9228, -2.7290,  0.9269,  3.3963,
        -2.1975,  0.4547, -1.3292, -1.3341], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5311, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.8491,   2.6120,  -1.1751,   1.5690,  -3.6165,  -1.1558,   1.3250,
          2.0139,  -3.1731,   1.6083,  -1.1386,  -2.1026,   2.4568,   2.3821,
         -2.5750,   1.3742, -11.0043,  -4.3553,  -7.5202,   0.1949],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-0.9716, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1457e+00, -1.6780e+00, -2.1515e+00,  6.3355e-01, -5.0871e-03,
         4.3772e+00, -5.2580e+00, -1.8915e-01, -3.3142e+00, -2.0477e+00,
        -4.0770e+00,  7.4003e-02,  1.4656e+00, -4.7621e+00,  5.5457e-01,
        -2.0921e+00, -1.8120e+00,  3.0259e+00,  4.8145e+00, -5.0243e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-0.9806, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.7887,   0.4788,   3.6177,  -7.6357,  -3.2299, -11.7503,  -3.6308,
         -4.3869,  -1.7559,  -1.3218,   4.1878,  -8.5450,  -3.6507,  -2.0708,
         -2.2354,  -4.7393,  -3.4903,  -5.9517,  -6.5820,  -1.6595],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4070, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.6556,   5.3667,  -1.5511,  -0.6266,  -2.3205,  -2.4006,  -6.9970,
          1.0007,   0.5901,  -2.4018,   0.7773,  -3.0098,  -0.4941,  -2.0787,
          4.4613,  -5.4174,  -0.5048,  -0.3033, -15.3357,   2.0586],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.5921, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1093,   0.0407,  -1.6023,  -1.3626,  -0.6527,   5.2669,  -0.9823,
         -2.0995,  -1.5106,   0.0202, -13.6338,   1.8683,   1.9827,  -5.3874,
         -0.5252,  -1.4018,  -4.0096,   2.8804,   1.0714,  -3.4417],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.2794, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.4258,  -1.8745,   1.5770,   3.7080,  -2.6559,   0.1286,  -9.3324,
         -6.1047,  -4.2917,  -3.1193,  -2.3553,   0.4136,  -1.8308,  -2.5661,
         -2.5353, -22.0886,  -4.1757,  -9.4980,   0.6640,  -0.5487],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3956, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.0969,  -0.5787,   2.3806,  -3.9501,  -0.9797,  -3.4001,  -2.5985,
         -4.1419, -56.9823,   1.8470,  -1.9434,  -1.6945,  -1.5221,  -7.8812,
          2.2273,   3.0208,  -4.7772,  -0.9323,  -4.0309,   0.8207],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4607, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4890,  -3.5718,  -3.0162,   1.0492,   2.5779,  -0.7607,   0.2923,
         -0.9854,   0.0692,   2.1160,   4.3988,  -3.8170,  -1.0062,  -0.4813,
         -2.7062,  -1.4346,   2.4975,  -1.6377,   0.8138, -10.6633],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-0.9377, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9665,  2.9386, -0.4587, -0.9791, -8.3631, -5.2516, -2.9784, -4.2317,
        -2.2164,  0.3515,  4.6731, -8.9567, -0.6699, -3.4766, -5.9247,  0.4028,
        -1.1697, -2.4400,  0.0129, -9.1404], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4422, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0844, -3.5625, -4.7736, -2.9912, -4.4870, -3.3582, -6.3230, -9.6507,
        -4.2144, -0.6914, -4.1414, -4.4859, -1.6011, -3.3864, -6.4108, -2.6958,
        -0.0465, -4.6195, -4.1581, -2.9614], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0822, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.7301,  -1.0162,  -3.7563,   1.2883,   2.8722,  -4.3890,  -3.0934,
        -13.5698,  -7.5565,  -5.6475,  -5.8119,  -1.2528, -11.7161,   1.2333,
         -1.8970,  -5.7705,  -1.1418,  -3.4211,  -3.1024,   1.1216],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2948, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4845, -7.9347, -4.3098, -2.3908,  0.9434,  0.8847,  3.1060, -6.5173,
         0.9063, -2.0342,  0.1729, -3.6048,  4.1416,  1.4069, -2.5454,  0.1440,
        -3.7223, -3.3633,  1.8151,  2.4472], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0969, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3692, -3.6081,  5.1033, -5.8585, -1.0848, -3.8764, -1.1052, -5.5918,
         0.7883,  3.6139, -0.8841, -0.3037, -3.0846, -0.4189, -3.8537,  2.7829,
         3.7667, -2.0368,  2.2015, -1.4228], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8121, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8855, -1.6263, -4.6193, -3.9624, -1.1254, -2.5580, -3.0614, -0.2525,
        -4.6569,  2.7082,  3.7855, -2.7922,  0.8037, -3.3526,  0.3642, -4.2766,
         2.8777,  3.3862, -2.5298,  1.1162], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2329, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.0379,  -4.1493,   0.0473,   3.6579,  -4.3353,  -1.7818,  -2.5429,
         -0.2930, -27.3129,  -6.3009,   0.7408,  -2.1457,  -2.9678,  -2.6679,
         -0.5994,  -2.6794,   6.4433,  -3.1897,   0.6355,  -2.3515],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7415, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 5.2091, -4.1194, -0.5428, -6.7168, -5.7713, -3.1892, -2.5763, -0.9452,
        -1.8553,  3.5247, -5.5959,  0.3172, -3.2430, -1.5840, -3.6329,  2.5321,
         3.6497, -2.8152, -1.7905, -2.9453], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6045, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.1109, -2.0980, -0.2014,  3.6976,  4.8955, -2.1032, -5.5411, -1.4057,
        -2.5890, -2.9207,  4.5402, -2.3537,  0.0628, -1.7926, -2.8527,  2.5241,
         2.3417, -2.0076,  0.7778, -2.0382], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.4477, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.0456, -4.5445,  0.5825, -1.5071,  0.3870, -2.5865,  3.1399,  0.4926,
        -4.4818, -0.1296, -3.3843, -2.3827, -4.8278,  0.6406, -0.6204, -2.2247,
         1.4983, -0.6808, -0.4505, -1.2264], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0630, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.1830, -1.7713,  1.9730, -1.2731, -1.3403,  1.5353,  5.1909, -2.2873,
         1.1574, -1.7092, -0.5164,  0.0857,  4.3956, -1.6439,  1.0210, -0.3730,
        -1.0026,  3.4514,  4.8454, -3.8271], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(0.4547, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.4232,  2.3192, -2.9936, -0.0258, -0.8096, -2.9735,  2.9655,  3.6083,
        -2.5552, -1.4261, -2.6082, -1.1944, -5.7852,  6.8193, -1.7268,  0.9782,
        -2.3015, -0.6736, -3.8613,  3.2368], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.3792, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0966, -0.6549, -0.7691,  2.1708,  4.3905, -3.4083, -0.4944, -2.7606,
        -0.6321, -2.9236,  3.7963,  1.6892, -1.8607,  1.6402, -1.0131,  0.5668,
        -0.9392,  5.4396, -3.2137,  1.4562], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(0.0192, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1771,  -3.6766, -10.8769,  -3.1730, -11.2005,   0.3512,  -3.6789,
          4.7720,  -2.4049,  -7.9168,  -1.7909,  -3.7690,  -6.5002,   1.7378,
          1.9263,  -2.5742,  -5.0387,  -2.8809,  -2.1150,  -2.4259],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1706, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.2878, -1.0563,  0.8863, -4.0540,  3.0919, -0.9813, -4.2791, -9.1308,
        -3.2087, -0.0718, -3.0943,  0.0259,  3.7144, -1.5804,  1.1747, -1.3461,
        -1.0091,  3.4496,  4.0389, -2.1396], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7141, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5934e-01, -3.2868e+00, -2.8057e+00, -3.3344e+00, -1.2144e+01,
        -2.2647e+01, -9.6643e-01, -3.9327e+00, -4.4751e+00,  1.6990e+00,
         2.5249e+00, -6.0351e+00, -2.9927e+00, -1.4002e+01, -6.2404e+00,
        -2.4055e+00, -5.0957e+00,  1.5794e-02, -1.5262e+01,  2.2774e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9734, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9847, -3.1695, -4.0684, -1.3314, -7.6024, -1.2673, -0.0173, -2.4711,
        -0.3663, -3.4530, -0.9923,  1.1178,  2.6720, -2.7858, -1.0540, -3.0151,
        -1.2848, -6.8596,  2.1775, -5.6433], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1200, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.5633e-01,  8.0647e-01,  5.7636e+00, -2.4648e+00, -1.8549e+00,
        -4.6804e+00,  7.5741e-02, -5.3239e+00,  2.1434e+00,  1.4453e-02,
        -7.3945e+00, -1.0593e+01, -1.5956e+01, -2.3658e+00, -6.6927e+00,
        -2.7720e+00,  3.3367e+00, -2.0288e+01, -9.8355e+00, -9.9646e-01],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.9410, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.5356,   0.8403,  -2.3017,  -2.0271,  -0.8018,   5.7514,  -2.3931,
         -1.8313, -12.2619,  -5.4372,  -2.7891,  -3.6735,  -1.1463,  -5.4215,
          0.1744,   4.0728,  -3.5817,  -0.6168,  -2.8011,  -0.3827],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.9082, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.4362,   0.2365,  -2.0559,  -0.0936,  -4.3739,   3.0809,   0.7061,
         -2.9379,  -1.3195, -12.1796,  -6.5227,  -2.7376,  -2.2559,  -1.3248,
          0.8171,  -4.2105,  -4.4706,  -0.3077,  -0.9826,  -1.7437],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6556, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.7387, -2.5819, -6.9991, -3.5345, -4.2706, -2.3939,  1.2183,  2.9117,
        -4.0424, -3.8307, -3.3658, -4.4157, -1.6872, -0.9070, -4.0051, -2.6231,
        -7.4274, -5.9077, -3.8349, -4.5705], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.3003, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.2606,  2.3314, -3.0764, -0.9981, -2.3348, -1.7264,  2.5689,  2.1796,
        -2.4262, -1.0028, -1.5643, -0.8586, -4.3273,  2.3972,  3.1855, -1.0493,
         1.6266, -2.8780, -1.5304, -3.4078], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6315, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3667,  1.0549, -1.6474,  5.4899, -4.9161,  1.0571, -2.7452,  0.8509,
        -3.8195,  3.0130,  4.1058, -3.3605,  0.4815, -2.6035, -1.1726, -0.3672,
         4.2539, -1.6865,  1.1243, -3.8194], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.4537, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.6761,   0.6564,  -2.6316,  -1.0673,  -3.5859,   3.7790,   3.7522,
         -2.1393,  -1.1349,  -1.3146,  -0.6804, -15.0976,  -3.1932,   1.2721,
         -2.2732,   0.7219,  -1.6371,   0.6035,  -6.0702,   5.7996],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.3458, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 5.7996, -4.5538, -0.6355, -5.5035, -8.1374, -8.6498, -2.1626, -1.7260,
        -3.1754, -3.6172, -1.6514, -0.9860, -0.7422,  1.3967,  5.3678, -1.8745,
        -0.0787, -1.6118,  1.2159, -0.6471], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5986, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.0086,   1.8017,   3.6190,  -3.4136,  -1.4439, -10.1528,  -4.8805,
         -6.2845,  -0.8007,  -6.8486,   2.2552,  -3.4524,  -3.5592,  -1.6148,
         -4.3969,  -2.5546,   0.2461,   5.0516,  -4.3118,  -2.3087],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3029, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.8731, -1.3489,  3.7578, -2.9148,  0.2309, -1.3039, -2.2139, -3.8099,
         2.3251,  2.8793, -2.7726, -0.3717, -4.1088,  0.1015, -4.5492,  0.2265,
         2.5696, -2.8927,  0.0480, -2.0378], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0029, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.0853,  -1.2884,   0.9386,  -9.1277,   2.5491,  -6.7044,  -4.8854,
        -12.0630,  -3.4089,  -4.5406,  -6.1837,   2.8661,   1.5825,  -4.4695,
          0.0521,  -0.8179,  -1.1598,   1.7308,   4.0195,  -1.3632],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1180, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 6.4090, -2.3800, -2.9469, -3.9914, -3.2471, -6.2793, -5.5449, -0.7215,
        -4.2390, -2.4436, -1.5662, -4.0948,  1.0902,  3.2770, -3.4903, -1.3642,
        -2.1540, -6.0129, -0.5176,  2.6748], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8771, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9406, -2.9509,  1.7944,  1.8973, -1.5060,  0.7382, -0.6525, -1.0410,
        -4.4063,  0.2372, -1.2827, -2.5578, -1.5193, -2.2265, -0.4283,  5.3796,
        -1.3444, -7.0513, -3.4651, -1.2493], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2288, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7075,  1.3761,  3.1978, -2.2882, -0.8501, -9.3927, -6.1374, -4.5319,
        -9.7043, -6.5601, -5.4793, -0.6374, -6.1949,  5.7141, -5.6527, -1.8099,
        -2.1941, -4.9750, -1.8608, -0.1775], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.1433, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.7889, -11.8449,  -2.8632,  -3.6504,  -0.7617,   0.2206,  -0.0762,
        -17.5208,  -0.6865,  -3.1932,  -5.2426,  -1.4834,   2.7863,  -4.1594,
         -5.3193, -31.4183,  -6.9984, -10.4793,  -3.0994,  -1.6279],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.3315, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0821e-01,  1.0090e-01,  1.4114e+00,  4.7318e+00, -1.9388e+00,
         2.3638e+00, -1.6784e+00,  1.2996e+00, -9.3765e+00,  7.4523e+00,
        -3.5862e+00, -1.8676e+00, -1.4731e+01, -1.1709e+00, -5.6923e+00,
         1.3657e-01,  8.1340e-02,  6.5099e+00, -7.1918e+00, -1.2834e-02],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.1933, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9642e-01, -3.2399e-01,  5.6348e+00, -2.1324e+01, -1.5382e+00,
        -4.7161e+00, -9.7522e+00, -7.8016e+00, -8.6402e-02, -6.2001e+00,
        -1.5918e+01, -7.6390e+00, -3.6501e+00, -5.0012e+00, -5.7100e+00,
        -4.1757e+00, -3.6647e+00, -3.1129e+00, -9.4037e+01, -3.0873e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-9.6150, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7130, -0.8421,  0.0892,  4.2788, -6.1315, -1.4392, -3.1735,  0.3311,
        -7.2147,  2.0204,  1.5764, -3.0524, -1.2489, -2.1812, -1.3042, -1.9889,
         3.3558, -1.8813, -0.0927, -3.5594], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2586, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2385, -3.3609,  0.8161, -2.7059, -3.1811,  2.3217,  3.1165, -2.8373,
         0.5670, -8.1895, -2.6324, -5.8925, -1.7948, -1.1975, -6.3509, -3.8533,
         0.1982, -5.5864,  1.8823,  3.8263], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8046, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3705, -4.2080,  0.3337,  3.2210, -1.3944,  1.4465, -2.1414, -0.1340,
        -9.4501,  6.4025, -1.3946,  0.5248, -3.6437,  1.0291, -3.8077,  2.0187,
         2.6452, -4.0354,  1.3231, -2.8761], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7756, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6597,  0.5453, -4.0179, -1.8091, -4.0780,  3.7124,  1.5501, -2.1070,
        -0.8857, -0.6540, -0.0959, -0.1672,  5.5185, -2.4436, -4.0131, -2.2922,
        -0.0866, -2.4324,  3.2179, -9.0875], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2143, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.2224, -14.5923,  -6.6335,  -3.3323,  -2.2123,  -1.2049,   2.5805,
         -1.1686,  -7.5433,  -1.1139,  -0.3704,   0.9324,  -7.5660,   3.6170,
          2.2624,  -2.5737,   1.7333,  -1.5980,   0.5266,  -2.8688],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2674, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2817,  0.7624,  1.4279, -2.0744, -0.7646, -2.4865,  0.1075,  2.4189,
         5.8426, -0.9455, -7.0588, -3.3892, -2.0341, -5.6920, -2.9182,  1.1062,
        -1.2288, -0.8431, -1.0639, -3.9658], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5041, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.1064,  0.6844, -5.3452,  0.5280, -2.3173,  0.5706, -1.1369,  6.5187,
        -1.1728, -0.5593, -7.5369, -8.9419, -4.2850, -6.3642, -4.0750, -1.2169,
        -1.2967,  4.8851, -5.9402,  0.0859], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8511, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2508, -1.6421, -2.7909, -4.9237, -0.8217,  0.7120, -2.2996,  0.5164,
        -3.0441, -0.6795, -4.7030,  3.2289,  2.3418, -5.0038,  0.6984, -1.5766,
        -5.9593,  2.6065, -4.3644, -3.8395], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7897, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-16.4079,   0.9916,  -3.8084, -41.7832,  -4.1621, -13.0640,  -8.2971,
         -3.8534,  -2.8254,  -1.3149,  -8.4235,  -5.1849,  -5.0306,  -4.7542,
         -2.7465,  -1.5479,   0.2567,   5.0913,  -2.6871,   1.3278],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.9112, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.4560,  -8.2925,  -3.6712,   0.8341,  -4.3788, -11.2375, -43.3590,
         -4.6924, -11.8541,  -8.7768, -18.4495,  -1.2293,  -6.5764,  -4.2420,
          2.1480,   4.1942,  -9.6388,   0.4885,  -2.4817,  -4.9953],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.9833, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4341, -6.1869, -4.0014, -5.4271, -1.7037, -0.1838,  5.4979, -8.8260,
        -1.7135, -2.1584, -4.8958,  2.0775, -2.4591, -5.5839, -2.1159, -8.3976,
        -5.0073, -3.0922, -3.7596, -0.7981], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.2585, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2107,  1.5965,  3.2358, -2.8583, -0.2826, -4.1109,  0.4409, -2.9563,
         2.8044,  3.8569, -2.5088,  0.9522, -2.4297,  0.1306, -4.1891,  1.5760,
         3.2309, -2.0198,  0.9508, -2.2649], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.4528, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.6428,  -3.3662,   3.5668,   4.5577,  -1.4052,  -1.5962,   0.2933,
         -0.7421,   2.2250,   1.7817,  -2.9397,   1.4038,  -3.8085,   0.9664,
         -4.6857,  -0.0363,   4.2403, -10.9099,   0.2319,  -6.2039],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-0.8535, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.8389,   2.0490,   2.8718,  -5.2435,  -3.7294, -16.8548,  -2.4276,
         -5.2810,   0.0796,   3.1668,   2.3842, -16.7792,  -1.5438, -32.4160,
         -3.2685,  -1.5259,  -3.9557,  -1.5882, -10.6642,  -3.7113],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.2638, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.5460,  -6.9875,  -5.3100,  -4.8549,  -6.6770,  -7.4128,  -4.8803,
         -5.4031,  -2.0618,  -3.2705,   5.3929,  -4.0188,  -2.0688,  -2.9516,
        -11.3223,  -5.1223,  -1.6687,  -3.3141,  -2.8097,  -5.5297],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2408, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 4.8004, -3.0500, -0.0466, -2.2943,  0.2152, -3.3915,  2.7359, -1.5728,
        -1.4482, -3.7114, -3.5827,  0.5101, -3.9412,  2.2727,  3.6695, -5.5029,
        -1.1757, -1.4916, -0.1305, -4.8789], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1007, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.4295,  -2.1199,  -2.1309,  -3.6916,   1.8222,   3.2489,  -2.6698,
          1.3855,  -0.0465,  -5.8598,   3.5952,   1.6191,  -3.7192,   2.3280,
        -13.9359,  -3.7730,   0.6913,   0.4072,  -6.5270,   1.3525],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.4727, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.3796, -3.2272,  2.3697, -2.4501, -1.0904, -0.4846, -3.3034,  2.1727,
         3.7254, -3.5408,  0.6658, -4.5356, -0.6825,  0.4377,  4.5438, -2.1317,
         1.7461, -2.5111, -2.8732,  3.4290], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6060, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3486,   0.0420,   4.8387,  -2.6199,   0.1103,  -3.7147,  -1.0832,
         -3.2069,  -1.7960,   3.3403,  -2.6907,  -0.8685,  -8.4543,  -5.3852,
         -2.9037,  -2.2262,  -0.9389,   1.8625, -17.3941,  -4.0035],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.4720, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8271, -0.0521, -6.8411, -3.3062, -3.4036, -5.2630, -0.5468,  1.7389,
        -3.6262,  0.7806, -2.3188, -2.1224, -2.6561,  2.7939, -1.3990, -0.2057,
        -3.7029, -2.9567, -6.1467,  3.8488], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8606, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.4454,  0.2798, -2.9208,  3.6379, -3.9523, -2.9123,  0.8720, -0.8037,
        -1.3646, -1.1646,  4.5644, -1.5122,  1.5413, -2.8088, -0.4532, -1.3217,
         5.5522, -7.6353,  0.1607, -1.8814], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6784, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.2064, -6.1582,  2.3276, -1.2919, -2.0588,  0.7206, -2.2744,  0.1274,
        -5.8291,  0.9932,  1.4343, -3.9355, -0.2664, -2.2209,  0.7678, -6.5628,
         5.6397, -3.0319, -2.1161, -3.7091], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3825, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6253, -9.9705, -3.8052, -6.2700, -5.0561, -4.1900, -3.9776, -6.0946,
        -4.9063, -5.8228, -3.6915, -6.0278, -1.9822, -4.5366, -9.7164, -5.6372,
        -5.5433, -4.6615, -6.0200, -4.9175], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.4726, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-19.6269,  -4.3639,  -5.7727,  -3.7098,  -4.1930,  -6.2829,   3.7946,
          3.6117,  -2.8499,   2.1956,  -1.4169,  -0.6009,   0.7219,   2.5471,
         -8.9425,   0.2328, -23.2280,  -5.4954,   0.0465,  -2.1885],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.7761, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.8597, -2.6944,  0.5679, -0.4165, -3.8552,  2.4772,  0.6348, -2.8774,
         1.3290, -1.7185,  0.3569, -4.3410,  3.4646,  3.1095, -2.6502,  0.9438,
        -1.5718, -0.3624,  1.7977,  5.4017], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(0.0728, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.1492e+00,  1.3621e+00, -4.0112e+00, -2.6086e+00, -3.8303e-01,
        -4.0162e+00, -2.8830e-03, -8.0704e-01, -5.6102e+00, -3.5332e+00,
        -4.1880e+00, -2.4701e+00,  2.9235e-01,  1.5441e+00, -4.6456e+00,
        -2.6904e+00, -1.7719e+00, -2.9056e+00,  2.4741e+00,  4.8606e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.3981, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.0930, -2.1384,  6.1647, -1.6716, -0.9227, -3.2005, -3.8253, -4.2320,
        -1.7511,  1.5133, -3.3061, -2.6852, -1.4701, -3.6286, -6.3863,  0.6938,
         3.0272, -1.9429,  1.5039, -0.6214], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2393, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.0653,  -4.3218,  -4.6954,  -1.4171,  -4.3727,  -4.6160,  -4.5349,
         -5.4265,  -8.7885,  -4.5270,  -5.9645,  -3.9463,  -5.1822, -10.2710,
         -2.8595,  -1.1868,  -3.6784,  -3.6729,  -5.6018,  -6.8117],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.8470, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8903, -6.7120, -3.8776, -1.9672, -6.6281,  5.2711, -4.0934, -2.9159,
        -4.1671, -0.3117, -3.7978,  0.8024,  0.3847, -2.0405,  0.9744, -3.1151,
        -1.6461,  2.1897,  3.1811, -0.8485], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7604, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9482, -1.2612, -2.5421, -2.9166, -4.0063,  3.5770, -1.2409, -1.0628,
        -2.1012, -1.8450,  1.9743,  4.2434, -3.1540,  0.5226, -4.7031, -0.7307,
         0.8475,  5.4788, -3.3918, -1.4361], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8848, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.5513, -2.5160,  0.7912, -1.7282, -2.7498,  2.5400, -0.2747, -4.4725,
         1.6939, -1.4034, -0.6560, -8.2123,  0.5720,  0.4200, -4.7603, -1.8026,
        -8.7532, -3.0143, -3.6793, -6.2956], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0875, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 4.7389, -2.1000, -0.4373, -3.4419, -0.1884, -3.2749,  1.8063,  3.5497,
        -0.3696, -0.2838, -0.6868, -1.6002,  1.0117,  2.8865, -1.8294, -1.4235,
        -4.4062, -0.6105, -6.2192, -1.3152], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7097, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.3853,  0.3912, -4.9078,  0.3528, -1.2846, -0.3443, -4.5230,  3.1248,
         1.5286, -1.7305,  1.9549, -1.5196, -1.0738, -4.7170,  2.7515,  2.9071,
        -2.5593,  1.7469, -1.8820,  0.3542], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.4522, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.4111,  3.6089, -2.8259, -1.9930, -3.1678, -0.9967, -4.3084,  3.1001,
         3.9878, -2.2474,  0.3671, -6.7278, -6.0315, -3.6723, -3.1412, -0.5369,
         2.0289,  0.1410, -3.4208,  1.7478], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0838, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 8.3496e-01, -3.7849e+00, -2.5096e+00, -7.0242e+00, -9.2217e+00,
        -4.5674e+00, -3.7038e+00,  7.5780e-02, -6.8694e+00,  3.9158e+00,
        -2.5319e+00, -2.7551e+00, -5.7014e+00, -3.4477e+00, -2.9399e+00,
         7.0629e-03,  1.8247e+00, -2.2805e+00, -1.6850e+00, -2.1971e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7281, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4655,  1.2368,  3.5663, -1.6671,  0.1472, -2.9599,  1.0846, -4.4296,
         2.3209,  3.9945, -1.5238,  1.7665, -2.1436,  0.9478, -0.4671,  6.0325,
        -9.9203,  0.5512, -2.6670, -1.4053], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.4500, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.6369,  2.4461,  1.4297, -2.6957,  0.6001, -1.8793, -4.0790, -1.3466,
         3.1405, -4.5274,  1.7301, -2.5307, -2.5385,  1.4368,  4.6466, -2.0355,
         0.7382, -4.8483, -2.4556, -3.7389], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3072, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2907,  2.4519, -5.9588,  0.2610, -1.6206, -0.1541, -4.7574,  2.9350,
         0.4361, -2.5127,  1.1295, -2.5795, -0.0347, -3.0841,  3.5141,  2.6925,
        -3.0811,  0.6043, -1.3460, -0.6111], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6503, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.1680,  2.0353, -2.3673,  1.7267, -2.3052, -0.9641, -3.5151,  2.9827,
         3.5094, -1.9785,  2.0515, -1.9425, -2.4764,  1.3982,  4.8211, -3.5007,
         0.0557, -2.5174, -2.4925,  1.9762], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.0667, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1840, -1.1060, -9.7161,  3.6290,  2.7725, -1.9937, -0.5534, -2.0829,
        -0.5443, -4.7027,  2.6410,  2.6407, -2.6882,  0.4434, -3.2675,  0.2341,
        -7.2840,  0.7249,  0.9576, -6.8552], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4467, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-12.5850,  -6.6900,  -9.3055,  -6.3978,  -6.4214,   0.0685, -13.1297,
          1.9423,  -3.0148,  -0.6626,  -3.4696,  -1.0261,  -3.1475,   1.5130,
          4.0578,  -0.9529,   1.3818,  -1.4320,  -2.5955,   1.9098],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9979, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4590,  -2.6023,   4.3402, -13.6277,  -0.2394,  -4.1326,  -3.1530,
         -1.8560,   2.2028,  -3.3398,  -2.3765,  -3.4154,  -2.7672,  -4.3276,
         -0.4064,   0.3415,  -4.8167, -11.7640,  -1.7122, -15.7624],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5937, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7748,  -6.2406,  -0.6610,  -0.9847,  -4.7232,   3.3644,   3.1097,
         -5.2832,  -1.9873,   0.3901, -43.0683,  -6.5218,  -4.0171,  -3.5387,
         -7.8819, -34.6075,  -7.1938,  -7.8412,  -7.1473,  -3.3238],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.0466, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.2939, -0.8594,  0.2864, -3.7087, -3.7466,  1.4715,  3.9277, -2.9958,
        -3.6525, -1.0830, -1.9339, -0.5044,  0.7456, -2.2675, -0.5809, -2.6201,
        -3.0731, -4.1313, -0.0605,  2.8720], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9810, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4272, -0.8882, -4.3516, -0.3368, -2.9887,  1.0083,  2.8053, -1.4372,
         1.0816, -3.0557, -0.9870, -2.7853,  1.9958,  4.0299, -1.5533,  1.2705,
        -2.3369,  0.8768, -3.1303,  4.6466], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.5282, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.1465, -2.8226,  3.3357,  1.9271, -2.8041,  0.4544, -2.7948, -1.4453,
        -1.4819,  3.1201, -1.9233, -2.0452, -1.5419,  0.1099, -0.7729,  6.3105,
        -1.1527,  0.7944, -1.2300,  0.9826], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.1563, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  3.6597,  -0.9521,  -1.4764,  -7.0595,  -8.5978,  -5.9304, -12.1737,
         -6.3533,   0.2279, -13.9236,   2.5487,  -6.2568,  -3.1356,  -4.5250,
         -0.8906,  -5.4384,   2.7634,   0.0365,  -2.1261,  -0.3967],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5000, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.4168,  3.7967, -0.5880,  0.2789, -0.3991, -1.5302,  3.0052,  5.2828,
        -3.6224,  0.9425, -2.6587, -0.8005, -3.2261,  0.1630,  1.6811, -7.2666,
         0.7058, -0.3801, -2.0508, -4.7253], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.5904, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.3973, -3.6318, -3.3030, -4.4987,  0.6073,  2.0306, -1.8621,  0.9971,
        -1.3128, -3.2666,  2.8208,  1.8749, -2.8056,  0.3907, -3.6302, -1.7314,
        -2.5454,  3.0369,  3.4248, -2.2555], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8029, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  -1.9262,   -3.7088,    4.1482,   -9.8500,   -1.8290,   -3.3747,
          -1.3404,   -3.3562,    3.4775,  -13.2090,   -5.1708,   -7.2664,
        -137.1438,  -18.2502,  -10.4642,   -9.4530,   -9.2665,   -2.0822,
          -1.6127,    0.5336], device='cuda:0', grad_fn=<SumBackward1>) tensor(-11.5572, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0298,  2.8269,  1.3593, -3.2346, -0.5295, -1.0083, -0.2834,  2.4853,
         3.6812, -1.6118,  0.5998, -2.0032, -1.5229, -4.1688,  1.3941,  1.8796,
        -1.7748,  1.6656, -0.7313,  0.2715], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.2368, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.5332,  -2.8224,  -0.4533, -13.8741,  -6.2967,  -7.9154,  -3.3656,
          0.7163,  -0.3683,   1.0986, -10.3440,  -1.4266,  -2.7930,  -4.7303,
          2.0701,  -8.8603,  -5.4218,  -8.4907,  -4.9744, -19.4015],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7560, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.1841, -0.2542, -7.9697,  0.6518,  0.4724, -2.4133,  1.9546, -0.8474,
        -1.9630,  2.9747,  3.0613, -1.8464, -0.0612, -4.1422, -2.2601, -4.8688,
         0.5451,  0.7952, -1.9952, -1.2486], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1800, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  3.5898,  -6.2330,  -5.2162,  -9.5203, -12.6295,  -2.2273,  -5.9807,
         -0.0361,   2.1572,   2.8018,  -5.0988,   1.1060,  -0.8255,  -5.5210,
          1.4269,   5.0859,  -3.5852,   0.0645,  -1.7069,  -4.1821],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3265, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.9049, -12.4466,  -1.3574,  -3.7725,  -2.7297, -15.9019,   2.6509,
         -2.3005,  -0.2190,  -2.9494,  -1.2462,  -3.4771,   0.7052,  -0.2929,
         -3.7012,  -0.5627,  -0.6828,  -0.4249,   2.0695,   2.6769],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.6434, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 4.1209, -4.0588, -0.2313, -2.1276, -0.4410, -4.1513, -0.8954, -1.0158,
        -3.3643,  0.3886, -3.0703, -1.9572, -0.5698,  4.6492, -2.0924, -6.1755,
        -2.4751, -4.3911,  0.3329,  2.5950], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2465, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.7769, -2.7265,  0.5183, -2.9971, -1.6019,  2.0318, -2.4710,  1.5987,
        -3.6435, -2.6878, -0.3139,  3.3721, -3.0310, -0.7318, -5.6685, -0.9983,
         2.7912,  2.7448, -2.2923, -1.3241], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8327, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.2294, -1.6208,  0.1190, -5.0687,  2.9452,  1.7720, -2.9572,  0.3798,
        -0.8777, -1.5381,  1.8755,  2.9328, -8.9616,  0.5584, -4.5747, -1.1066,
        -3.8162,  3.5612, -1.0291, -0.3032], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8240, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2586, -1.3188, -0.6173, -1.9369,  3.3967,  3.2573, -2.5315,  1.8486,
        -3.4045, -2.4004,  1.0399,  4.0480, -3.4577, -1.3050, -0.5135, -3.5397,
        -1.1120,  2.2692, -4.1475, -0.8554], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8269, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4450, -6.6320,  1.7231,  2.6735, -2.6058,  1.8098, -2.7730, -0.5930,
        -2.2501,  5.5323, -2.8800, -4.1284, -2.9785, -5.0577, -4.7539,  0.2042,
         1.4930, -2.4618,  1.4634, -1.5379], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3099, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1822,  -0.0799,  -2.6821,  -5.5466,  -3.9713,   0.2763,   3.1745,
        -10.5031,  -0.1212,  -2.4449,  -9.4338,   0.0845, -14.5261,  -7.4590,
         -0.2608,  -4.4908,  -5.3916, -12.0287,  -3.5114,  -3.7136],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.2406, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.5333,   2.8692,  -5.6555,  -4.2462, -10.7060,  -2.3287,  -5.1473,
         -3.5108,  -6.4846,   2.2934,   3.8676,  -6.0299,  -0.0468,  -3.1091,
         -3.7412,  -3.9904,   4.0588,  -5.5599,  -1.4140,  -3.2284],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5288, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.2725,  -1.6222,  -4.3851,   0.7552,   3.2076,  -2.2971,  -0.2825,
         -5.2425,  -4.2981,  -0.7296,   3.5965,  -2.9057,  -2.4740, -15.0950,
         -3.8764,  -7.5331,  -1.7138,  -3.3265,   1.8551,   4.1587],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2240, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.7553,  -1.7006,   3.4035,   0.6786,  -5.2424,  -1.5166,  -2.5571,
          0.6696,  -0.0496,   5.5117,  -6.9120,   0.6091,  -2.2729,  -1.9250,
         -4.0499,   3.4285,   0.5737, -12.8548, -21.9660,  -9.8376],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8383, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.3308,  0.2618, -3.9941,  0.7072, -4.2374, -0.7116, -0.6344,  0.6472,
        -4.2453,  4.2678, -4.9098,  1.1071, -1.8838, -1.1138, -3.7786,  1.9946,
         2.3230, -0.4302,  1.7068, -0.7021], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9478, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1398,  -9.5191,  -0.3507,  -0.4690,   5.0311,  -7.3020,  -2.1435,
         -3.8087, -10.4330,  -4.1854,  -2.7352,  -4.6778,  -0.4791,  -3.5257,
         -4.0326,   0.4805,   0.5892,  -5.6012,  -2.3224,  -8.0930],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3359, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-27.9976,  -1.9261,   0.2302,  -1.8233,  -1.6910,   1.8835,   4.5275,
         -1.6996,   0.3378,  -1.9356,   0.1317,  -4.2511,   3.8972,   2.6162,
         -1.7861,   0.8785,  -0.7648,  -3.2676,   1.3532,   2.7233],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.4282, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.9045,  4.3203, -4.3761,  0.2461, -3.5244, -1.0582, -0.5680,  1.9479,
        -4.8405, -4.1244, -3.4357, -5.1492,  3.1055,  0.2952, -2.6144, -1.0820,
        -3.5443, -5.7569, -2.9469,  2.9498], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4126, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.3231,   3.3118,  -5.0013,  -3.2746,  -4.6201,  -7.7019,  -6.7239,
         -7.2591,  -9.8910,  -5.8434,  -7.9321,  -2.5734,  -0.4970,  -7.0969,
          2.1882,   1.3135,  -6.6387,  -4.3065, -25.4332,  -4.7315],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1194, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3286, -4.6774, -3.6867, -2.5987, -4.6425, -2.5245,  0.4354, -3.2938,
        -2.7496, -3.7164, -7.7234, -4.7333, -2.5151, -3.3461, -4.4748, -7.0438,
        -7.1174, -3.9392, -5.0050, -1.5921], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9137, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0326, -0.2039,  2.1849,  2.0862, -2.6863,  1.4964, -2.2788, -1.1750,
         0.5988,  4.7805, -2.1770,  1.5218, -3.1107, -2.2713, -2.6466,  2.1038,
         2.2288, -3.2685,  0.3275, -1.9110], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.2716, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.2506e+01, -3.8415e+00, -1.1001e+01, -7.7079e+01, -5.8615e+00,
        -7.1155e+00, -5.5699e+00, -8.6326e+00, -3.4385e+00, -1.4760e+00,
         4.3051e-02, -1.0421e+00, -4.6713e+00,  1.1491e+00, -1.6199e+00,
        -2.1879e+00, -6.6920e-01,  3.5490e+00, -3.1360e+00, -3.5289e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.4318, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.0175,   1.4636,  -3.0771,  -1.4722,  -0.4206,  -3.7592,   1.9187,
          3.1589,  -1.3799,  -0.8332,  -0.9653,  -2.6599,   0.8408,   5.4633,
         -3.0224,  -6.2233, -14.9774,  -4.9866,  -3.5776,  -5.0319],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.9762, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.1596,  -4.2268,  -2.2610,  -3.4166,  -2.3959,  -3.3738,  -2.1076,
          5.3211, -14.0378,  -1.3858,  -0.3535,  -4.9185,   2.2835,   0.7772,
         -3.3612,  -1.8792,  -4.3103,  -1.5573,  -5.2518,   0.7803],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2258, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.1127,  -4.4368,   3.9020,   2.6924,  -4.8545,  -1.1100, -24.7720,
         -7.0907,  -2.8392,  -3.7156,  -0.2788,  -2.0481,   4.3794,  -4.2672,
          1.8875,  -1.0151,   0.8257,  -4.0191,   6.4519,  -2.7952],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1608, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5297,  -3.6135,   2.8441,   2.0304,  -2.2278,   0.3596,  -3.4997,
         -2.0056,  -3.9205,   6.6654,  -2.3208,  -1.6407,  -3.2466,  -0.1497,
         -6.4375,  -3.8572,  -5.5369, -18.3416,  -0.3635,  -2.8749],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5333, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.3910, -2.9822, -1.5155, -0.7988,  4.7559, -2.7096,  1.1485, -0.9947,
        -1.0752, -3.5745,  4.7601, -2.4760,  0.7886, -1.7290, -0.8504, -6.2534,
         5.6797, -7.0306, -7.4300,  2.2435], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9826, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.3301, -12.1125,  -1.5663,  -5.1054,  -0.1299,   0.8038,   2.1717,
         -1.3404,   0.4230,  -3.4926,  -8.6885,   1.0143,  -5.0493,  -6.5942,
         -2.6523,  -7.5389,  -6.9629,  -6.5646,  -3.3008,  -5.4021],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6209, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1808, -2.1127, -3.0903,  0.8546,  2.2612, -5.3054,  1.4908, -2.4999,
         0.2182, -3.6517,  2.0102,  3.7193, -2.5656,  1.4436, -2.2626, -1.6450,
         2.0856,  3.5121, -1.7023,  0.5918], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.4915, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5614,  0.1701,  1.7372, -2.4941,  0.4576, -1.5164, -0.8824, -2.3379,
         6.0996, -0.4601, -1.1650, -5.0196, -9.0545, -5.0292, -1.2998, -4.8036,
        -4.9718, -1.6128, -0.6033, -0.5171], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8432, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.8191,   1.5486,  -1.5668,   0.6148,  -1.0305,   3.3417,  -1.8998,
         -0.3785, -12.4945,  -3.8571,  -4.9361,  -0.7130,   0.5480,   5.5486,
         -5.4896,  -0.7913,  -5.7547,  -4.0323,  -4.9526,   1.2052],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.9454, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.6473, -1.9774,  0.5708, -2.3987,  0.3465, -4.6919,  2.8559,  3.2489,
        -7.8397, -0.3477, -3.5247, -3.8744, -0.7166,  3.4855, -2.2961,  1.3200,
        -2.6044, -3.3253,  0.9444,  1.4500], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8864, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.2610,  -4.5192,  -5.2335,  -0.4509,   2.5946,  -3.0934,  -0.3582,
         -0.9413,  -2.4042,   2.0311,   3.3505,  -2.3037,  -1.1904, -12.8029,
         -2.5298,  -6.8581,   0.9520,   0.5089,   5.1223,  -4.6085],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.7498, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.2439,  -7.7565,  -7.6586,  -5.1570,  -6.9771,  -6.4768, -21.5976,
         -5.0554,  -7.1243, -15.9590,  -4.9638, -13.0805,  -2.2726,  -5.7338,
          0.1328,   2.0615,   4.0112,  -2.8213,  -1.7109,  -1.9411],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6662, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.0678,  -5.8667,  -1.3329,  -7.0345,   3.6799,  -3.3361,  -1.6765,
         -7.6208,  -2.0680, -11.5806,   2.4522,   2.6446,  -5.9122,  -0.3864,
         -3.7906,   1.2916,  -4.9914,   2.7316,   3.9954,  -2.9175],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1393, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.0347, -2.5704, -3.0900,  1.4217,  2.4063, -1.8359,  0.4460, -4.9447,
        -0.4804, -3.8358,  2.0480,  3.6322, -5.9445, -1.6203, -3.0919, -0.6905,
        -0.1541,  3.1545, -9.1236, -4.1385], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4223, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7695,  -0.6832,  -2.0132,  -1.2498,   1.9517,   4.2576,  -3.2578,
          1.0850, -10.2246,  -3.6503,  -7.4217,  -8.5074,  -2.3271,   2.5511,
        -10.5842,  -4.5380,  -2.4676,  -4.0552,  -7.0880,  -1.4619],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1227, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.1774,  0.0485,  5.2120, -8.1642,  1.2363, -2.8907, -0.3780, -2.2074,
         3.2006, -1.9431,  0.8073, -3.3342,  1.0156, -3.0098,  2.8374,  3.9132,
        -3.6431,  0.7193, -3.4928,  0.1054], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.4895, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0921e+00, -5.4921e+00,  5.4669e-01, -3.9421e-01,  5.8940e+00,
        -1.6343e+01, -1.4380e+00, -1.3472e+00, -5.4889e+00,  2.3268e+00,
         2.7380e+00, -1.8676e+00, -1.4838e+00, -3.4067e+00,  1.3655e-02,
        -2.4250e+00,  4.5828e+00, -1.3825e+00, -8.1429e-01, -1.5649e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.5719, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4223, -7.9738, -9.4647, -2.9771, -4.6038, -1.3821, -8.3521, -4.4965,
        -7.6826, -1.0111, -4.5749,  3.3336, -5.7756, -2.0144, -5.8564, -5.2788,
        -3.4565,  1.2951, -2.8366, -2.3676], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.9949, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8084,  0.0160,  1.8982,  4.5036, -3.4689,  2.0493, -1.7990, -3.9965,
        -3.6445,  2.9854,  2.0872, -2.4283,  1.5149, -0.4439, -4.2630,  2.4235,
         1.9825, -3.7395, -0.0752, -7.6657], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6936, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.0021,  -2.1502,   0.8017,  -6.8259,  -0.7006,  -0.4892,   3.0735,
         -2.7748,  -6.2406,  -0.6610,  -0.9847,  -4.7232,   3.3644,   3.1097,
         -5.2832,  -1.9873,   0.3901, -43.0683,  -6.5218,  -4.0171],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6843, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 4.7421, -2.5090,  0.9967,  0.0109, -1.1155,  2.9702, -3.5229, -3.5989,
        -0.6677, -1.4809,  1.1726, -9.7663,  2.2505,  1.3558, -5.1000,  0.0399,
        -3.8626, -1.6671, -0.6420, -2.3498], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1372, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.2544, -3.3105,  3.2093,  1.7777, -2.7064,  0.2628, -2.9738, -7.5078,
        -2.1866, -1.2125, -3.9973, -3.6655, -5.8346, -3.1248, -5.4776, -0.2605,
         0.4088, -1.6795, -6.9821, -4.8163], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.4911, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.6279, -4.0412, -1.8701, -1.8171,  6.1929, -3.9795,  0.9002, -1.2981,
        -1.3327,  2.8127,  3.6580, -1.3398,  0.2710, -2.1176, -2.2706,  3.5005,
         4.2583, -3.4650, -0.1763, -3.9820], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.3362, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0337,  1.0176, -0.0882, -4.6062,  0.7000, -1.0877, -3.5461, -1.0036,
        -8.2576, -6.0737, -3.1618, -4.8092, -0.5394, -4.2455,  6.6885, -3.6477,
        -0.4464, -1.4656,  0.5289, -2.4580], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0768, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2712,  0.8297, -2.4675, -0.8681, -7.7866,  6.1079, -1.3805,  1.9805,
        -2.6377, -0.3113, -4.9881,  2.3764,  1.5075, -1.9341, -2.3607, -2.6923,
        -1.4879, -4.2746,  6.5927, -8.3638], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2215, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.0439, -3.1693, -0.3463, -4.3779, -0.3038, -1.2312, -2.7645,  1.7990,
        -2.0093, -0.3719, -4.1954,  3.5832,  2.5373, -1.8148,  1.3920, -1.3686,
        -0.3848, -2.0261,  2.7499, -5.9624], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9154, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.9579,  -8.6653,  -5.4248,  -3.2196,  -1.6647,  -0.9445,   1.0476,
          2.7700, -21.8060,  -2.4477,  -3.0952,  -1.5081,  -3.2878, -17.1593,
        -21.1942,  -6.9179, -12.4542,  -7.4005,  -3.7735,  -5.3440],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3224, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.0145,  -0.8142,  -0.3599,  -5.6476,   1.2373,   3.2510,  -2.8620,
         -1.4353,  -4.4356,  -1.3336,  -2.7044,  -1.4855,  -3.8777, -11.9989,
         -0.5419, -25.3281,  -5.4670, -14.0665,  -5.2008, -10.0379],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7561, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7656,  0.3183, -2.6588, -1.7141, -2.5613,  0.5444,  3.3585,  4.8094,
        -2.9443, -3.8103, -2.8840, -3.1664, -4.6269, -0.0235,  4.0269, -1.9760,
         0.4344,  0.1451, -2.1134,  3.7503], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6429, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0308, -1.5035, -8.0722, -3.8969, -3.9274, -5.0632, -1.8564, -4.1986,
         1.7259,  1.2558, -3.1211, -0.3067, -1.7730, -1.7778,  0.4215,  6.0986,
        -1.8022,  1.0331, -2.6308, -0.8921], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6159, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2070, -3.3355,  2.8722,  4.8924, -1.2502,  0.1618, -0.8946,  0.3941,
         0.8198,  4.5473, -4.6189, -1.6082, -2.4041, -1.3817, -3.0917,  2.8037,
         4.4115, -1.7752, -0.7318, -3.3905], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.2893, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0151, -3.0166,  2.4602,  3.5199, -1.2744,  1.8423, -2.8137, -0.2358,
         1.1076,  5.7894, -1.8789,  1.1721, -1.5353, -0.5363,  1.4553,  4.2578,
        -1.5756,  1.5382, -2.4885, -0.2898], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(0.3241, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.6465, -1.2476, -0.4379, -5.0099,  3.7281,  1.8771, -2.2757,  0.3712,
        -0.8128, -2.4215,  3.0853,  5.0104, -2.7357,  0.9881, -2.0759, -1.3214,
         1.5245,  5.0226, -2.2487,  1.0824], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(0.1375, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4714, -4.5637, -5.2294, -3.2632, -1.0571, -0.3168,  3.5310, -1.9414,
        -1.0828, -1.1319, -0.9548,  1.2823,  1.7553, -5.7144, -1.4558, -8.9928,
        -9.5178, -5.6635, -4.6671, -2.9219], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8189, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7868, -0.3754, -4.1502,  0.0271, -4.5557, -0.0299,  2.7428, -2.8821,
        -0.0433, -2.0697, -4.2998, -3.4280, -1.6196,  3.9705, -1.9605, -0.5445,
        -2.4119, -2.3224,  0.9501,  3.4853], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1152, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 4.1157e+00, -1.0973e+00,  3.4276e-01, -2.8807e+00, -1.4331e-02,
        -2.9566e-01,  4.9126e+00, -1.4730e+01, -8.6345e-01, -2.1726e+00,
        -5.0946e+00,  1.6746e+00,  3.4003e+00, -1.0320e+00, -7.0013e-01,
        -7.6603e+00, -6.0753e+00, -3.2593e+00, -3.3160e+00, -2.0338e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.8390, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.2497,  -3.2109,   4.7194,  -4.4572,  -5.5284,   0.7275,  -2.9810,
         -0.6932,  -2.8608,   1.4198,  -3.5339,  -9.1396,  -1.2147,  -5.4418,
         -1.3997,  -6.5822,   2.8321,   2.0998, -10.1513,  -0.3254],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2736, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.4454,  0.3471, -2.9575,  2.0602,  3.7119, -1.6388, -0.0094, -2.7199,
        -4.4245,  0.5116,  1.3889, -2.9281, -2.1426, -2.3626, -0.9018, -5.6464,
         1.1333,  1.9009, -1.0937, -0.6016], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0409, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.9778, -10.8160,  -3.2336,  -1.4926,  -3.2037,   0.7480,  -1.1482,
         -2.5424,   3.8939,   1.7519,  -1.9641,   0.0421,  -2.6367,  -1.6857,
         -0.1969,   4.4643,  -2.5847,   1.0481,  -2.1409,   0.3449],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.5165, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4542,  -0.2391, -11.8182,  -4.6780, -13.8419,  -0.4111,  -4.6278,
        -42.5395,  -3.2817,  -4.8323,  -5.5646,  -3.1889,  -4.5601,  -1.4053,
         -0.4896,  -0.9553,   1.2356,  -2.0488,  -0.4086,   2.4043],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.1853, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.2458, -3.7150,  3.2890, -1.6274,  1.5821, -2.2375, -0.2984, -1.8392,
         3.5439, -4.2748, -2.9115, -3.5102, -1.4651, -2.6592,  1.2167,  4.2112,
        -2.6902, -0.4498, -2.1410, -0.4143], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8318, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.8396,  -5.7293,  -5.2084,  -6.6469,  -8.5774,  -5.7822,  -5.3714,
         -4.5810,  -4.8631,  -6.1579,  -4.5462,  -4.7656,  -6.7768,  -5.4155,
         -6.8699, -10.4688,  -8.1431,  -4.0192,  -4.7074,  -5.8340],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.0152, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.6359, -6.2065, -3.5759, -4.9381, -4.9777, -5.2761,  3.5881, -2.1985,
        -3.8673, -4.2681, -5.1240, -5.3088, -4.6878, -5.4829, -3.5402, -2.7785,
        -1.6606, -4.4363, -5.0680, -4.2979], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0870, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.1253, -4.0446, -0.6591, -3.0335,  3.7395,  2.3193, -2.1058, -0.0213,
        -2.7952, -0.3318, -4.2677,  3.9408,  2.4761, -4.1623,  0.8965, -1.6383,
        -1.4141,  0.8449,  3.7365, -1.6235], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.4009, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.6600, -4.0450,  0.8943, -3.3042,  0.2086, -2.6369,  0.6823,  3.7819,
        -3.2760,  0.2665, -2.9213, -1.6601, -2.5252,  4.0108,  2.3081, -2.1845,
         1.3814, -2.4252,  0.0864, -6.4178], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7558, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7706,  1.5267, -3.4476, -0.9966, -2.9345,  2.3016,  3.6468, -2.5973,
        -3.7474, -3.0071, -0.8832, -6.5600,  1.8178,  1.3553, -2.4135,  1.4146,
        -1.3878, -0.6657,  2.1508,  5.1961], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6001, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.0120, -0.8462, -3.3653, -0.7236, -5.1727, -4.3377, -0.2930, -4.6006,
        -4.8508, -0.4332,  3.2818, -3.6895, -3.6067, -2.8281, -1.9048, -0.1841,
         1.5422, -2.2456, -0.3879, -4.6195], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3639, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.0237,  2.2605,  5.4134, -2.8699, -0.4316, -2.8907,  0.2772, -3.6640,
         3.4430,  3.4901, -3.1403, -1.0356, -2.1368, -2.6545, -6.1175,  1.2871,
         1.7609, -2.0638,  0.8773, -2.1969], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.5708, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4404,  -0.3059,  -0.7990,   3.9279,  -8.6134,   1.0702,  -2.0921,
         -1.8929,  -3.5472,   3.3623,   3.3326,  -1.9858,   1.1971,  -1.0673,
          1.4740,  -0.0144,   7.0759,  -4.5117,  -2.8484, -10.1692],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-0.9424, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.2821, -3.6378,  3.1761,  2.5457, -2.6867,  0.4832, -1.2233, -0.1115,
         0.2717,  5.3759, -5.5657,  1.1254, -1.0095, -0.9357,  1.8175,  4.9038,
        -1.2692,  0.6243, -3.1973, -0.8845], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.0240, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3543,  1.8931,  1.9270, -2.6604,  1.0326, -4.1406, -2.3647,  1.1180,
         3.5509, -3.0847,  1.2146, -0.3229, -1.0689, -1.3372,  5.4967, -5.5611,
         0.5125, -3.4990,  0.2480, -1.6614], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6031, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-15.1658,  -1.1365,  -7.7946,   0.8852,  -3.8412,  -2.0273,  -3.4776,
         -4.8911,  -4.3377,  -5.8953,  -4.1654,  -7.4855,  -0.5704,  -7.0716,
         -2.7914,  -3.6852,  -0.0640,  -3.5220,   3.0407,   2.6074],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5695, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.5730,   1.4807,  -3.3075,   0.4562,  -3.8737,   0.9176, -11.6008,
          3.8695,   3.1247,  -7.5453,  -2.2757,  -6.5250, -10.2624,  -2.9545,
         -6.8701,  -6.2746,  -5.3805, -15.3799,  -1.3662,  -7.1702],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0755, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9220, -10.7948,  -1.2652,  -5.4704,  -0.6449,   3.3243,   4.0925,
        -10.0089,   1.3759,  -1.5915,  -0.7892,  -2.4896,  -0.8536,  -1.4532,
          0.0884,  -2.1828,  -0.5290,  -6.4903,   2.5359,   1.6686],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.7200, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.5690,  -4.1745,  -6.3114,   0.6622,  -0.6591,  -6.4403,  -1.6217,
         -2.2593,  -2.4934,  -0.4406,   2.2157,  -3.9613,  -2.5904,  -6.4311,
        -17.6589,  -1.9364,  -5.2750,  -0.3250,   0.3778,   5.5807],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0155, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9036, -3.3460, -2.0972,  0.8498, -5.1917, -3.3297, -3.8578, -2.4265,
        -2.1875,  1.7556, -3.5598, -2.0046, -4.7913, -3.4753, -3.6030,  0.5066,
         1.1042, -2.3308, -2.7571, -3.1560], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3901, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.8740, -11.8855,  -1.5046,  -4.3715,  -0.4576,   1.6681,  -0.2335,
         -3.1528,  -1.3478,  -2.5541,  -1.7936, -14.7556,   2.4900,   2.0004,
         -4.1421,   0.6462,  -0.3926,  -0.2019,  -5.6736,   4.0066],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2765, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6832, -8.8085, -1.6436,  0.6886, -3.5491, -0.7652, -4.0969, -0.5673,
        -8.7839, -1.5726, -0.0324, -6.4634, -1.6257, -1.2780, -0.3847,  3.1557,
         5.0777, -2.0928,  1.7091, -2.7209], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7718, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.6863,   6.2023,  -2.7184,  -5.9134,  -4.3588,  -3.6574,  -3.9101,
         -2.7549,  -1.6390,  -4.9359,  -3.2257,  -0.7899,  -3.3710,   2.4300,
         -1.0884,  -0.3691, -10.1269, -21.4256,  -3.9076,  -6.5198],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.0383, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6068, -0.7268, -3.4539, -7.1727,  3.1171, -5.8693,  0.2316, -1.4574,
         0.9556, -3.0095,  1.2644, -4.4528, -9.3672, -2.0558, -3.8475, -1.0997,
        -3.3311,  1.8890,  3.1326, -8.6905], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.3276, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.8339,  4.4098, -3.4002,  0.4354, -3.1080, -2.4903, -2.6590,  1.8407,
         3.1532, -0.5130,  0.3696, -0.7477, -0.1831,  1.3736,  5.2661, -1.5113,
        -3.9038, -3.0284, -0.6888, -3.7916], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.3172, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-10.9120,  -7.7795, -27.1691,  -2.0538,  -1.1880,  -4.1614,  -1.2902,
        -21.2163,  -7.4162,  -4.4953,  -5.7249,  -4.6977,  -2.7744,  -1.5377,
         -7.5363,   3.9949,   1.0339,  -2.5670,  -0.2636,  -5.7383],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.6746, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.7734,   3.0800,  -3.2992,   0.2233, -12.3512,  -3.5333, -17.3165,
         -1.9465,  -5.5728, -44.4310,  -8.0393,  -1.1753,  -4.8316,  -0.1364,
         -2.4869,   1.4090,   4.7535,  -1.8991,   1.6499,  -2.6515],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.7891, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.5947, -2.7889,  2.7734,  2.8903, -1.2273, -1.4592, -0.0961, -0.5382,
         2.3379,  5.6401, -1.7614,  1.3532, -1.6445, -3.6738,  2.6395,  3.9585,
        -2.6598, -0.6214, -1.3719,  0.6021], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(0.1879, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.0169, -7.7934,  2.3750,  2.9197, -2.3438,  1.0027, -2.7811, -1.3577,
        -5.5405, -0.2460,  3.0211, -3.7948, -0.5970, -3.7238, -0.8151, -2.9688,
         2.0903,  0.0528, -2.8889,  0.0516], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1677, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7488,  1.9595,  2.9403, -3.2391, -0.9133, -4.3089, -0.4811, -7.2490,
         1.1808,  3.6818, -5.9566, -0.7773, -2.9824,  0.0385, -4.5480,  3.0333,
         3.4217, -3.5473,  0.9616, -0.4039], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2469, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.2432,  -1.0274,   1.5553,   5.3789,  -1.7278,  -2.4471,  -4.6539,
         -3.5853,   2.8453,   2.9162,  -3.0738,   0.9337,  -0.2203,  -2.6765,
          1.4521,   1.8584,  -2.7491,   1.0588, -24.0230,  -2.5188],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.5474, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9054, -1.1304, -1.9536, -1.1447,  3.8648,  4.4603, -3.3331, -0.4835,
        -3.1679, -0.3467, -4.2054,  4.0177,  2.8298, -2.0353,  1.3528, -1.3006,
        -0.5522,  1.7208,  4.2293, -2.4423], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.1763, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.8464, -0.3449,  2.3654, -2.2379, -4.4050, -0.7640, -3.3146, -1.3150,
        -1.2944, -0.4913,  3.6880, -3.9641, -1.5178, -2.8857, -4.1340, -3.4964,
        -0.0325,  3.9514, -3.1182,  0.8472], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2155, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.5237,  -1.9103,  -3.5140,  -0.0542,  -3.7925,  -0.7273, -15.7057,
         -1.0265,   1.0080,  -2.3123,   0.1827,  -0.6701,  -3.3778,   3.1580,
          3.3264,  -4.6585,   0.9425,  -2.2386,  -4.0082,   2.5170],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.7693, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.0184, -4.8338, -7.5004, -5.0939, -4.4395, -3.2131, -4.8645,  3.5753,
        -4.2937, -1.4036, -4.1541, -5.4701, -4.9981,  1.8124, -5.6298, -0.4876,
        -5.0467, -6.4465, -2.5518,  2.8631], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5597, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.5505,  -5.2536,  -5.0474,  -1.9434,   1.0735,  -7.1415,  -2.0006,
         -2.3136,  -0.7526,  -4.4929,   2.7415,   1.6571, -10.6695,  -2.1403,
         -2.6412,  -3.8472,  -2.5010,   2.2797,   3.4204,  -2.2365],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1680, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.3134,  -0.7374,  -0.5685,   4.5773,  -2.1260,  -2.5558,  -3.5798,
        -19.4135,  -8.5047,  -5.2154,  -4.9120,  -0.6977, -14.4498,  -2.4691,
         -5.6088,  -1.7501,  -3.5327,   0.9523,   3.9791,  -2.9910],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.6959, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6121, -6.5818, -2.2985, -1.0956,  2.4664, -2.1313, -0.3042, -2.3102,
        -0.9852, -6.5568,  2.8519,  2.6235, -1.7742,  0.7509, -3.9947, -1.3861,
         1.0072,  5.3829, -2.3332, -0.5279], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9905, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.9266,  -0.0673,  -2.0388,   2.7061,   3.9865,  -3.3500,   1.6799,
        -15.5941,  -3.1180,  -7.6614,  -0.7003,  -0.0196,   1.1916, -12.9768,
         -0.8126,  -1.6440,  -2.3568,  -3.4241,   3.7518,   0.2891],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2043, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 4.4860, -4.4800,  0.4659, -3.5105,  0.2729, -3.8205,  1.7169,  4.7280,
        -2.9201, -3.9993, -3.9602, -2.4089, -2.2652, -2.2057,  0.2935, -5.7916,
        -0.9479, -1.5692, -2.8398,  2.9757], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2890, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9933, -3.8573, -0.3618,  3.1525, -2.4976, -0.2240, -0.7878, -0.0670,
        -2.8744, -2.4203, -1.9991, -3.7452, -5.8322, -3.1748, -1.4195, -2.1961,
         3.4727, -2.7888,  0.1051, -1.6986], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6104, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.2596,  -6.1318,  -1.2282, -19.5927,  -5.6674, -15.0722,  -6.2462,
         -4.7142,  -1.8002,   0.1616,   5.9413,  -6.4365,  -1.9888,  -3.1943,
          1.3613,  -1.9616,   6.1332,  -1.7216,  -3.9703,  -3.0125],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5700, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.7871, -1.3548, -5.7462,  2.4300,  4.5564, -6.0351, -0.4216, -1.3718,
        -2.5861,  2.7158,  4.6816, -5.1324,  1.7447, -1.4578, -0.3544, -0.3001,
        -2.0676, -6.2840, -0.8144, -5.1036], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1057, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.9303,  3.3924, -2.7400,  0.2405, -5.0691, -1.2475, -7.2031, -2.4735,
        -0.9605, -2.6017, -2.1765, -1.4913, -2.1420, -2.1307,  3.2636, -6.0596,
        -1.8811, -3.2305, -1.3465, -2.9513], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7939, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.4227, -2.6556,  2.9443,  3.4316, -2.9739,  0.2336, -2.4484, -0.4943,
        -4.7121,  0.6387,  2.0239, -2.4868,  2.4090, -1.1420,  0.4231, -0.1375,
         2.7056, -6.9211, -1.9156, -4.8790], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7767, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.3466,   3.2251,  -0.8731,  -2.2502,  -3.3989,  -5.3785, -40.9795,
         -2.3780, -16.6343,  -3.8159,  -1.4441,  -7.9286,  -0.1000,   3.3853,
         -2.4207,   1.6800,  -3.7221,  -1.8370,  -3.7843,   5.5608],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.3720, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.3811,  4.8985, -1.0256,  1.0747, -2.5366, -2.0170,  1.0826,  3.5035,
        -3.8796, -0.6481, -2.0709, -0.3298, -4.7069,  0.6860,  0.0893,  0.8439,
        -0.5009, -3.1487,  0.9233,  4.1657], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.0608, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0775, -5.2319, -4.5896,  2.0279,  4.6262, -1.9858, -1.0325, -1.9174,
        -1.8210, -7.9671,  2.3490, -8.7245, -4.0946,  0.1135, -2.3393, -0.1321,
        -8.3770, -4.6953, -1.2024, -0.1930], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5132, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.0339,  3.2631, -5.1775, -1.4140, -2.1376, -4.1922,  1.7699,  2.1389,
        -1.7934,  0.3992, -4.3121,  0.2398, -2.3974,  4.7960, -0.8048, -2.4250,
        -2.1780, -2.1693,  0.8435,  4.1758], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.4171, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.1026,   0.9139,   3.1149,  -4.9660,   1.2098,  -1.9000,  -1.2469,
          3.6241,   2.2889,  -2.5239,   1.3805,  -2.5550,  -4.8943,   1.5051,
         -4.3971,  -4.8887,  -0.6542,  -4.5275,  -1.7822, -36.6214],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9011, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.4250, -2.8792, -0.4374, -4.3342,  2.3847,  1.3909, -1.4121,  0.4541,
        -1.9978,  0.2536, -1.9242,  3.0482, -4.3164,  1.1796, -1.8549, -0.7458,
        -2.4595,  1.8358, -1.5304, -0.6630], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6791, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.6878,  3.6259, -0.9693, -0.4165, -2.8436, -0.0541, -5.8178,  0.1493,
         0.6919, -1.5744,  1.6097, -1.5317, -1.5191,  3.2296,  3.2612, -3.4562,
        -0.5722, -4.9313, -4.1643, -1.8473], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8221, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.4108,  -3.3563,   2.8796,   3.0552,  -9.4790,  -3.0142,  -3.0342,
        -13.6660,  -8.4587,  -4.2523,  -2.3731, -54.2743,  -1.8598,  -3.0845,
         -5.1546,  -5.9537,  -4.0324, -12.8318,   1.7370,  -3.1084],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.5336, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.3043,  -2.6658,   3.3589,   0.3877,  -2.5922,  -0.7637,  -1.1367,
         -4.5628,   1.0989,   1.1359,  -2.4293,  -1.6633, -11.1820,  -4.4888,
         -6.5738,  -3.5374,  -0.0429,  -6.1144,   3.0656,   3.3770],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.8317, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.8935,  -1.8712,  -3.1066,  -3.0471,   0.2931,  -6.2067,  -0.2044,
          1.7080,  -2.8632,   0.4548,  -1.3114,  -6.0080,  -2.9205,   1.2674,
          4.1012,  -4.0155,   0.3449, -11.2421,  -2.3821,  -7.1607],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1138, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.9316,  0.0630, -2.0670,  1.3698, -4.4541, -3.3919,  0.7616, -2.7872,
        -1.3487, -5.7513,  2.0601,  1.8285, -2.7041, -0.4895, -2.5292, -1.1002,
         2.6573,  3.7491, -3.4147, -0.2881], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9884, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.3719,  0.6971, -2.2003, -0.0729, -3.3748,  2.4923,  4.8065, -4.9433,
        -0.0929, -2.8361,  0.4346, -4.0592,  0.5738, -2.5022, -2.3841, -0.7263,
        -4.0877, -1.6924, -2.7455,  4.2526], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3416, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.2437e+00, -9.3101e+00, -3.7521e+01, -9.6518e+00, -1.2769e+01,
        -7.6527e+00, -7.5282e+00, -5.7156e+00, -9.2524e-03, -4.5987e-01,
         4.7800e+00, -3.7318e+00, -3.8357e-01, -3.7213e+00, -7.9729e-01,
        -4.8090e+00,  1.7410e+00,  3.9167e+00, -2.7077e+00, -1.9592e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0767, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2824,  0.6644, -2.0459, -2.4736,  3.7045,  3.6282, -4.5733, -0.4480,
        -2.2278, -0.5459, -4.6345,  2.5142,  1.0841, -1.3468,  0.9178, -1.4967,
        -3.7732,  1.9651,  3.7063, -1.7304], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.4697, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.0366,  -1.0099,  -7.1837,  -4.1790,  -1.7469,  -2.5132,  -3.6952,
         -1.1453,  -3.1920,  -1.0495, -10.7871,   3.6431,   1.8748, -10.1877,
         -1.1150,  -2.4219,  -4.5256,   2.1656,   2.4086,  -3.1330],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.4915, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.4625,  2.9539, -3.7007,  0.1160, -5.1738, -6.0945, -0.9985, -1.3936,
        -1.4915, -0.4598, -0.6511, -5.1324,  2.1063,  3.2855, -1.5665,  2.1729,
        -8.3384, -3.6105, -9.8797,  0.4056], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7994, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5561, -0.0803, -2.1027,  1.7819,  4.7777, -2.2219,  1.2091, -1.5484,
        -0.3022, -3.2146,  2.5390,  4.1418, -2.8866,  1.3663, -1.7584, -1.0069,
         2.2291,  4.7452, -1.8275, -1.4198], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.0068, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.5360, -2.2647, -4.0044,  0.2019,  3.9610, -1.3530, -5.3793, -3.0297,
        -2.6775, -7.0236, -4.4928,  1.9811, -5.0591,  0.4298, -1.9327, -3.8033,
         3.1285,  1.6809, -3.0739,  1.4839], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6381, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.1926,  1.9684, -3.6207, -2.2103, -3.1857, -2.3697,  0.4929,  5.2976,
        -8.4965, -3.5992, -4.2395, -0.8900, -2.4191,  3.0768,  4.7566, -2.8586,
        -1.1718, -3.7325, -1.4769, -4.6615], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4766, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -8.1537,  -1.6205,  -3.1426,   2.2253,  -2.6045, -10.9304,  -1.1505,
         -4.0695,  -2.1994,  -3.5107,   1.7365,   2.4846,  -6.2142,   0.3122,
         -3.1995,  -0.8521,  -3.2139,   1.6895,   3.2134,  -2.3646],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.0782, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.1259, -4.2873, -6.1052, -3.9583, -2.2898, -4.9280, -1.2783,  1.8773,
        -5.4003, -0.0625, -3.5731, -1.1299,  2.9903,  3.7275, -2.9295,  0.6382,
        -1.3076, -2.3008,  1.5598,  3.7879], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3048, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  3.8661,  -3.2883,   2.0861, -12.5947,  -6.4411,  -2.8214,  -3.5034,
         -1.6397,   2.9123,   1.2091,  -2.4635,   0.6492,  -1.2851,  -0.3085,
         -3.3554,   2.3148,   2.1281,  -8.0838,  -1.5819,  -2.2007],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.7201, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3727,   0.2082,  -0.6641,  -1.8897,   3.6525,   4.2924,  -2.0018,
          1.0011,  -3.7882,  -3.6020,   2.0953,   3.8698,  -4.9130,  -3.4589,
        -41.1976, -10.1917,  -3.4077,  -3.9008,  -0.2976,   1.8035],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2881, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-11.2461,  -1.7936,  -7.2589,  -0.1061,   0.6151,   6.5353, -13.5688,
         -1.3355,  -2.8395,  -1.2923,  -3.5255,   2.2261,  -0.9095,  -2.2591,
         -0.1219,  -5.6655,  -2.1191,   1.5113,   3.2575,  -1.1033],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.0500, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.1542,   2.8726,  -3.3290,  -2.3405, -11.6164,  -4.8119,  -8.6015,
         -7.9379, -11.6285,  -7.3583,   0.4344,   1.6238,   2.5886,  -5.8097,
          0.3933,  -2.2960,  -0.6883,  -3.1494,  -1.8632,   1.4144],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9975, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.3859, -10.4426,   3.6115,   2.9978,  -6.3670,   0.3598,  -2.4393,
         -1.0042,  -3.5466,   2.5743,   3.1004,  -2.1991,   0.3404,  -2.0962,
          0.1732,   0.8989,   3.8188,  -2.0060,  -2.3567, -10.8672],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.2032, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.3571e+00,  5.0649e+00, -1.7641e+00,  1.0487e-03, -3.0074e+00,
        -1.0059e+00, -3.6550e+00,  1.8272e+00,  3.1316e+00, -1.0217e+00,
         1.2745e+00, -7.9201e-01, -5.4874e+00,  2.9091e+00,  2.7244e+00,
        -2.7436e+00, -1.4151e+00, -1.4512e+01, -2.9837e+00, -3.8139e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.1956, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.0495,  -4.4632,  -5.1937,   0.0613, -15.3360,   0.9202,  -8.7491,
         -2.0184,  -1.4124,  -1.8625,  -1.1100,  -1.3670,  -1.7031,  -0.8408,
         -4.4795,  -2.2507,  -4.6745,  -1.8675,  -0.9115,  -1.4896],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1899, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.8281,   2.0467,   5.0194,  -3.4290,  -2.6827, -10.8160,  -4.0319,
         -5.4679,  -3.3884,  -7.3774,  -0.6018,   2.9228,  -2.8214,   0.2353,
         -3.1316,   0.4110,  -7.2781,   2.8703,   3.1286,  -6.2287],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.1224, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.5519, -0.9807, -1.9985, -1.5349,  1.4155,  4.2655, -3.2822,  0.1092,
        -2.7653, -2.8010,  1.4228,  1.6401, -2.0833, -2.4255, -3.3767, -1.1960,
        -6.4318, -2.2337, -0.2371, -3.6149], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5830, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3866,  1.1374,  1.9148, -2.0069,  0.8727, -1.9628, -4.1814,  3.1585,
         3.7285, -4.0351, -1.4531, -2.4888, -3.5815,  3.3871,  3.5719, -2.0908,
         1.4231, -1.5942, -1.9999,  1.4247], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.4081, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.2668, -5.3253, -9.2723, -2.4198, -4.4220, -4.4004, -7.4819, -5.4360,
        -8.8877, -4.1875, -4.7751, -7.4176, -3.8091, -6.0934, -1.3037, -6.1072,
         1.1793, -5.5473, -3.7270, -5.8334], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.9767, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.9304,  -0.0184,  -0.8714,   4.2123,  -3.2797,   0.7436,  -4.1398,
         -5.2857,   1.6258,   2.7541,  -7.1667,  -3.7124, -16.6084,  -8.4732,
         -6.9598,  -4.5731,  -0.3346,  -2.1043,   5.7915,  -8.6629],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9997, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.5246, -1.9367,  0.9713, -0.2845,  5.1807, -0.8665,  0.4758, -2.0952,
        -1.7655,  2.4790,  4.6126, -1.6498, -0.2540, -1.2674, -3.2141,  2.1280,
         3.1605, -4.3985,  0.6913, -2.0731], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(0.0209, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3692,   0.7633,  -3.2171,  -2.8432,   2.5718,   4.3198,  -5.0263,
         -1.7942,  -2.8978,  -9.6589,   1.0197,   0.7966,  -4.3813,  -3.1431,
        -19.7082,  -7.4638,  -5.2451,  -3.1507,  -0.4440,  -0.4908],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1681, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.5886, -3.8873,  0.8835,  2.7103, -3.4212, -1.0139, -0.9487, -7.1904,
         1.4272,  2.5876, -5.7122,  1.1471, -3.8111, -3.7231,  1.3567, -2.6162,
        -2.0767,  0.7906, -0.6086, -4.7425], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.5719, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.5029,  -2.9709,  -2.7109,  -2.4925,   4.1825,  -1.0245,   1.0317,
         -1.0988,  -0.3534,   2.1970,   5.5705,  -3.4761,  -5.2880,  -2.5973,
          0.8284, -11.0608,   1.4400,   1.7823,  -3.5548,   0.4578],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.1320, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6625, -4.3544,  0.7052,  3.1840, -3.8262, -0.4763, -1.9707, -1.0538,
        -6.8762,  1.4650,  1.6985, -1.8954, -1.3484, -1.4605, -2.2969,  2.1489,
         3.5860, -2.5300,  0.4580, -4.0863], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0296, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.7336,  -7.5593,  -2.7402,  -5.1669,   1.5841,   5.1656, -12.3312,
          0.6036,  -2.2898,  -1.7920,  -6.6828,   2.2527,   2.7931,  -2.2138,
         -0.4401,  -1.4774,  -1.4607,   2.6147,   4.7099,  -1.4469],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.4305, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.1556, -4.1481, -5.5679, -1.9450, -5.4148, -5.0630,  1.2743, -4.6558,
        -6.5107, -2.9883, -0.1418, -4.8575,  4.1721, -1.9788, -2.4254, -0.6776,
        -5.7158, -0.3423, -3.5191,  2.1701], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.5245, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.0195,  -3.8943,   0.8134,   1.8063,  -3.1942,  -1.7500, -11.1046,
         -2.7087,  -5.5390,  -4.7744, -40.5291,   0.4552,  -8.6003, -15.3207,
        -10.3175,  -7.6971,  -2.6921,  -8.5121,  -2.0120,   0.2588],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.2146, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.6139, -2.1397, -2.3525, -2.8578,  3.2951,  0.2550, -8.5510, -1.2560,
        -2.3404, -3.8002,  2.8912,  1.6793, -3.0047,  1.3068, -4.0639, -1.2619,
        -2.8113,  3.0371,  3.7913, -1.6696], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0734, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.2301, -7.3956, -4.3571, -5.1143, -3.9096, -4.6089,  1.0350, -3.8026,
        -4.6683, -4.7108, -3.0235, -5.6823, -1.3650,  1.1880, -3.5362, -2.7502,
        -2.9580, -4.2283, -2.5609, -2.5730], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.5126, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9285, -1.7144, -5.8886, -3.8976, -4.0458, -2.3070, -3.5098, -2.0044,
        -4.9783, -2.9369, -3.5172,  4.3009, -2.9000, -3.4395, -6.0590, -7.6928,
        -3.6333, -5.4225, -2.9703, -1.8375], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-3.4191, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1000e+00, -7.3032e+00, -3.1318e+00, -2.0865e-03, -3.7196e+00,
        -3.5863e+00, -5.6393e+00, -4.3584e+00, -2.7131e+00, -3.7774e+00,
         2.3756e+00, -2.8755e+00, -2.3485e+00, -5.1472e+00, -4.5870e+00,
        -1.8029e+00, -3.7717e+00,  1.7272e+00, -5.1593e+00, -6.0706e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2496, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.0482, -10.1362,  -3.2328, -12.9420,  -4.8404,  -0.1674,  -5.0338,
          0.1832,  -5.7883,  -3.3211,  -3.6575, -19.3701,  -0.3077,  -2.6896,
         -7.5676,  -3.9531, -21.0307,  -9.8558,  -5.0409,  -4.0787],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.4439, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3240, -1.3810, -6.4951,  6.4211, -5.8045,  0.9035, -1.2549, -4.6637,
        -3.4942,  4.3951, -5.4068, -0.7614, -1.4459, -0.2179, -4.7965,  5.9763,
        -7.0735, -0.3907, -3.0656, -1.5368], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6708, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.1604,   0.6940,  -2.8235,  -0.8095,  -4.4507,   3.9092,   3.9114,
         -2.1431,  -0.7159,  -0.9184,  -1.3664,   1.9540,   1.6546,  -2.4968,
          0.0348,  -3.4841,  -4.3060,  -1.1045,   4.0125, -11.1670],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.0888, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.1195, -1.7868, -0.2237,  5.3266, -1.4329, -0.7130, -2.2170, -0.2826,
        -1.5254,  1.6836, -2.7201, -2.5842, -9.4722, -7.5582, -3.1581, -4.9748,
         0.0623, -0.6084,  4.7496, -4.1560], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7355, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.2507,  -4.2791, -11.6688,  -1.6667,  -2.6584,  -3.7398,   2.3687,
         -6.4200,  -6.5670,  -6.8821,  -4.5000,  -9.7490,  -5.3534,  -6.8354,
         -5.4443,  -3.6198,  -2.0096, -10.5967,  -7.5059,  -2.9344],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-5.0156, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9355,  0.3176, -6.3240,  3.1338,  1.5649, -2.7609, -0.0100, -3.4668,
        -0.9128, -3.3055,  2.7652,  1.1496, -3.5536,  0.4103, -1.6194, -6.6911,
         1.6054,  1.1957, -4.0351, -0.4438], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0958, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.8748,  -2.1167,   3.3192,   4.6534,  -3.4898,  -0.2685,  -1.2094,
         -0.1946,   0.3640, -10.7293,  -6.2822,  -2.2926,  -3.3371,  -1.7677,
         -0.1858,   5.4066,  -3.2478,   0.2033,  -3.8111,   1.4067],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.2227, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.7129, -0.1966, -4.9411,  0.3391, -2.7122,  3.6345,  4.3964, -3.4137,
         0.0728, -2.4836, -1.9084, -5.5735,  3.0507,  2.7921, -2.0859, -1.6759,
        -2.0004, -1.2348,  2.3046,  2.9485], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6700, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.1872,  -6.7311,   1.8639,  -2.2274,  -5.3122,  -3.1905,  -2.4397,
         -3.0337,  -8.2308,   2.3634,  -0.4513,  -1.6258,  -1.1089,  -4.1709,
        -11.7692,   3.7908,  -2.4116,  -1.7239,  -2.7573,  -0.9734],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5663, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4888,  2.8910,  4.6511, -1.4614,  0.6523, -2.3489, -3.1636,  1.9207,
         4.0224, -3.9892, -0.4064, -3.2558, -1.5982, -3.1555, -3.1037,  1.9423,
        -3.8521,  1.9868, -1.5944, -0.0278], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6190, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.7503, -1.9868, -1.8867, -1.6078,  1.5819, -2.8095, -0.5414, -4.1181,
        -0.6821, -2.7074,  1.2078,  4.5298, -3.1869, -2.9898, -7.4993, -4.8825,
        -4.6053, -3.6595, -0.8523,  0.6739], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.8886, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.0481,  -2.1203,  -0.4235,  -4.2717,   3.2150,   3.0683,  -2.6921,
          1.3772,  -2.4009,   0.5609, -16.8719,   2.0981,   2.4774,  -2.9909,
         -0.3767,  -4.6077,   0.5089, -32.0953,  -1.9228,   2.7897],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.7363, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.0551,  0.7289, -1.4437, -0.7443,  0.3136,  3.3328, -2.5673, -0.9103,
        -3.7387, -0.2325, -3.8067, -3.5032,  3.2172, -0.6938,  0.7891, -1.1304,
        -1.3870,  2.8330,  3.4702, -2.0161], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6272, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4912, -5.4852, -4.0276, -4.8620, -7.9746, -4.5796, -6.7044, -4.3761,
        -8.7937, -3.3207, -8.3832, -3.5445, -6.6242, -5.1691, -7.9290,  0.0568,
        -4.4310, -1.1737, -4.5022, -5.4203], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-5.2368, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.1437,  -2.3917,  -3.6962,   2.0557,   2.8085,  -5.7918,  -3.1401,
        -11.7961,  -3.9458,  -5.2547,  -0.9932,  -0.7009,   2.1975,  -7.6263,
          0.7874,  -1.4054,  -2.0100,  -3.8787,   2.7530,  -1.0227],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.0954, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9625,  3.3143,  2.7575, -2.2955,  1.5498, -1.5634, -0.0932,  1.9149,
         5.6047, -2.0945, -7.1352, -1.0818, -2.2409, -3.4473,  1.0044,  1.9130,
        -2.1481, -2.7992, -1.3853, -0.3848], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6287, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.0904,   5.0578,  -8.4531,  -2.7618,  -5.1929,  -1.6090,  -1.7470,
          2.0832,   1.7402,  -1.4429,  -5.7553, -24.0615,  -4.9190, -19.1040,
         -7.9346,  -2.1052,   3.9971,  -2.3836,  -9.6629,  -4.9658],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.4655, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-33.5089, -28.2467, -34.0247, -17.7134, -13.4565,  -2.3313,  -7.6851,
         -3.6642,  -3.8819,  -5.6444,  -3.5680,  -3.1852,  -3.5854,  -4.2149,
         -3.0908,  -2.6041,  -5.2557,  -5.6566,  -3.9494,  -5.4487],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-9.5358, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3714, -0.2430, -6.7413, -5.7193, -3.2933, -2.6452, -1.4391,  2.2303,
         3.4633, -1.8889,  0.3735, -0.8325, -3.1319,  2.5535,  1.9695, -1.5269,
        -0.4439, -9.9180, -2.5691, -5.2688], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.9221, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.4139e+00, -2.7728e-04,  3.2427e+00, -8.8188e+00, -7.5653e-01,
        -3.1768e+00, -2.7623e+00,  1.1743e+00,  2.3340e+00, -1.7586e+00,
        -6.4752e-01, -6.6424e+00, -4.8612e+00, -3.3457e+00, -3.9738e+00,
        -4.9822e-01, -4.6688e+00,  4.5788e+00, -3.7054e+00, -1.5909e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.9146, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.9856,  -8.0812, -11.3509,  -0.3663,  -4.9608,  -2.1441,  -9.0537,
         -5.4872,  -6.4034,  -5.3388,  -5.5178,  -2.2886,  -3.4079,   2.0548,
         -3.4368,  -2.8712,  -3.9061,  -5.8810,  -3.3456,  -3.4451],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.6109, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3390,  0.3884,  0.5978,  5.5142, -3.5783, -2.4272, -2.5860, -2.7221,
         1.9162,  5.2154, -1.5534,  1.6217, -2.7075, -4.0355,  1.9259,  3.0008,
        -3.6386,  1.1894, -0.1455,  0.1950], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.1584, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.1545, -2.0440,  0.1548, -3.5611,  5.8366, -2.5546, -0.2658, -3.6999,
        -0.4685, -2.1119,  2.1448,  1.9624, -2.6367,  0.2259, -2.1301, -4.2357,
        -2.9137,  4.2352, -4.9766, -2.0289], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8957, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.1605,  -4.3374,  -1.8077,  -5.9752,   3.1285,  -5.9644,  -2.3365,
         -2.8397,  -2.4250,   2.3445,   3.3417,  -1.8115,  -0.9991, -11.0562,
         -1.9419,  -3.9813,  -0.6868,   1.5483,  -0.8280,  -4.5475],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.2168, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.9938, -5.5607, -1.8159, -5.2605, -1.3655, -0.9099, -2.9213,  0.0306,
        -1.6618, -1.7902,  2.3950,  4.5389, -3.3628,  0.3551, -1.6838,  0.4124,
        -4.5699,  5.8993, -5.1888,  1.0798], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2187, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2424, -3.3754, -0.0256,  2.0962, -2.1334,  1.3378, -3.4051, -1.3479,
        -2.8219,  2.7382,  4.5018, -2.0220, -0.3238, -9.9834, -3.8142, -6.9300,
        -0.0135, -2.4077,  5.5121, -3.3918], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.4026, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.0246,  -3.9106,   0.1758,  -2.0470,  -5.3037,  -3.0476,   2.0144,
          2.3660,  -2.3549,   0.9062,  -1.2692,  -5.2247,   3.1139,   0.6192,
         -3.7361,  -1.9335,  -5.4713,  -5.8046,  -6.4082, -19.7294],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8010, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 3.8729,  3.3362, -2.3946, -0.0182, -1.7765, -3.6167, -4.1422,  4.1557,
        -0.9248, -4.8366,  0.1425, -4.3924, -2.1542, -3.7458,  1.9739,  2.5622,
        -2.5534,  1.6877, -0.3612, -3.4614], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8323, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.8256,   3.6561,  -8.2413,  -1.5719,  -1.5147,  -4.9514,   0.0543,
         -2.0425,  -8.8510,  -1.4851, -15.5846,  -8.0182,  -3.2785,  -2.4532,
         -1.0800,  -4.8053,   3.1116,   1.1877,  -2.6845,  -9.7337],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.5556, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 5.2703, -2.7260, -2.6373, -2.0902, -0.9726, -1.0653,  2.4838, -2.9291,
         0.6442, -1.6235, -0.8182, -4.3644,  2.8915,  0.5200, -2.7456,  0.6076,
        -2.3229, -0.5096, -6.9972,  2.0295], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8677, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.2006,  1.7801, -5.6394,  0.0792, -2.1266,  1.2454, -3.2125,  2.7263,
         4.1043, -1.7609, -0.9120, -3.4001, -4.5058, -2.0472,  0.4753,  1.0725,
        -9.8600, -0.7232, -1.7178, -2.8701], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2546, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.7387, -0.6595,  2.0340,  5.7818, -1.9457, -1.3634, -2.4916, -4.3608,
        -0.1644, -1.2596, -5.8063, -0.3849, -2.3735,  0.3867, -0.6368,  4.5082,
        -4.6568,  0.1975, -1.2206, -1.6868], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.8421, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.6938, -7.3322, -6.6348, -0.4616, -3.8790, -1.7986, -4.1512, -5.0311,
        -1.4779,  0.8090, -3.0746, -2.8671, -2.3987, -2.9508, -1.4560,  2.1232,
        -4.0964, -0.4363, -4.0548, -5.0692], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.9466, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4424, -4.0603,  1.0637, -2.1079, -4.1609, -3.1731, -1.2141, -0.3544,
         1.8306, -3.3508, -1.4662, -4.6725, -1.9758, -1.7575,  2.8445, -3.6128,
        -1.8832, -1.6238, -1.1151, -5.1169], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.1175, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.9814, -10.2726,  -5.2503,  -2.7532,  -7.1488,  -6.5980,  -1.7342,
         -4.0511,  -0.8421,  -8.9383,  -9.2977,  -3.3241,  -1.9758,  -1.3982,
          2.0528,   4.3224,  -2.8722,  -0.4306,  -2.0090,  -4.9466],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4224, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.2740, -2.8722, -0.1797, -0.7831, -0.4363, -1.4591,  5.6751, -1.7226,
        -0.5640, -1.8873, -3.5538,  2.8876,  0.8530, -3.6426,  1.1213, -3.1084,
        -2.9502, -4.2897,  1.4075,  1.4097], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.5910, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4018, -2.5414, -0.6705, -3.8307, -2.2924, -0.5475, -3.0658, -0.7902,
        -3.3480,  1.0544, -1.9646, -2.4731, -1.4050, -4.2270,  0.9453,  4.1556,
        -2.8762,  0.3323, -3.1829, -2.1070], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6118, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 4.3828, -1.3734, -2.3088, -3.8446, -0.7408, -1.5040,  5.5379, -1.2085,
        -0.2219, -2.5209,  0.7577, -3.3774,  3.6143,  3.0975, -1.9287, -0.6124,
        -3.0153, -0.4137, -4.0200,  2.2513], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.3724, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.1533, -4.4681, -4.3732, -3.4140,  1.4666,  2.2988, -1.8372, -1.5819,
        -1.2168, -1.7557, -5.1308,  3.6573,  0.2795, -2.7756, -0.9559, -4.4268,
         0.6255, -2.1790,  2.7612, -2.9133], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.2893, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.5375,  2.1894,  2.9570, -1.5479, -1.9750, -7.6805, -7.0284, -6.5069,
        -5.1018, -3.2241, -8.9444,  2.3994, -9.4273, -3.6246, -1.2062, -4.9328,
         1.9512,  2.8088, -3.9355,  0.4954], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.8436, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.4419,   1.8578,  -1.9390,  -5.2620,  -5.6777,  -4.3657,  -4.5651,
         -1.7545,   1.7789,  -6.0199,  -4.3368, -14.6539,  -6.5513,  -3.1858,
         -1.9217,  -1.3552,   2.8567,  -0.7562,  -7.1405,  -0.9553],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2194, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.8967,   5.9461,  -1.3615,   0.1545,  -3.2140,   0.6143,  -3.8686,
          3.8063,   2.4016,  -2.3493,   0.7222,  -2.7936,   1.7655, -24.3026,
          1.3912,   1.8073,  -6.0684,   0.4028,  -1.9112,  -2.4763],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.3219, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0497, -0.8530, -0.3003,  5.0726, -2.4356,  1.4532, -1.0059, -3.0364,
        -3.5769,  1.9812,  2.5037, -2.6725,  0.1084, -1.4375,  1.1155, -6.6029,
         0.0541,  1.8440, -4.9147,  0.7842], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6984, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.0101,  3.2845,  1.9620, -2.8046,  0.3885, -2.3578,  0.6586,  0.1874,
         5.6061, -1.9126,  0.9912, -1.7162, -3.2439,  1.7984,  5.2169, -1.8293,
         2.3619, -1.3981, -1.4790, -2.9190], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.0608, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-5.8229,  0.5281,  2.5558,  3.1054, -4.5754, -0.4321, -1.3423, -2.5459,
         0.3243,  0.4419, -3.1700, -2.6865, -3.6561, -2.4279, -4.1281,  2.7301,
         3.0252, -2.3911,  1.6750, -0.8988], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9846, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3266, -0.8504, -1.8523,  1.6427,  1.8818, -1.9825,  0.6071, -0.9018,
        -0.1336, -5.6101,  2.5467,  1.5349, -4.1722, -0.3282, -4.6411, -3.7841,
         0.3032,  2.9134, -5.5766,  0.2589], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0735, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.7483,  -2.6644,   0.5003, -10.4779,   2.9435,   2.5334,  -7.4332,
          0.4242,  -2.5299,   0.2053,  -4.6427,   2.0925,   2.6817,  -2.0672,
          0.6366,  -2.7297,  -1.0871,  -2.8317,   2.7849,  -3.9583],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.3184, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.3242,   1.8418,  -3.9604,  -3.4854,  -0.2664,  -1.4124,  -0.1972,
          1.5290,   1.8411,  -6.3852,  -2.6598, -14.9563,  -7.1461,  -4.8636,
        -24.4552,  -3.2743,   0.6493,  -0.8430,   3.8975,  -4.5719],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.4521, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.6078, -2.3559, -1.8412, -3.1609,  2.0768,  4.1926, -2.0537, -3.6769,
        -4.2979,  0.6191, -3.9793,  3.0460,  2.9205, -2.6457,  0.9192, -2.1317,
        -1.7231,  1.1912,  4.5125, -1.6588], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.4720, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9973, -3.7138, -2.0741, -4.4811,  4.6010, -1.9139,  0.7952, -3.8171,
        -2.2449, -3.6046,  2.9293,  2.9256, -1.7920,  1.3359, -3.2135, -2.7996,
         1.1347,  5.2118, -2.6180, -0.5509], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7444, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.2422, -2.1372,  3.5427, -1.3052, -1.5174, -1.3985,  0.0454, -1.3548,
         4.8481, -2.5689,  1.4182, -1.8003, -0.1043, -2.2970,  5.9760, -2.3847,
         0.4492, -1.0738, -2.2878,  1.1123], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.1540, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.5646, -1.2502, -2.5988, -0.2357, -2.2714,  0.2157, -4.6139,  4.5417,
        -0.9971,  1.9046, -1.6619, -2.9102,  1.7253,  4.3424, -1.9039, -0.5644,
        -4.4430, -2.5913, -5.9708,  1.4437], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7637, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.5561,   1.3228,  -4.3541,  -0.7437,  -2.4268,  -1.9406,  -1.8199,
          4.9826,  -2.0678,  -3.1588, -16.3135,  -6.2950,  -2.6318,  -3.0727,
          0.0906,   1.3383,  -1.2196,  -5.6007,  -5.6208,  -2.6128],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.5794, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.0321, -0.8668, -2.2680, -3.2459,  1.5988,  4.9537, -1.9619,  1.4981,
        -1.8925, -2.5086, -6.5472,  0.2252,  1.0084, -2.5234,  0.0457, -1.3029,
        -5.4378, -4.3125,  3.1437, -1.5324], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1979, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 2.0761,  5.4641, -3.7917,  0.7416, -1.7757, -2.5511, -3.9136,  2.4355,
         2.0325, -1.7555,  0.6628, -2.0862,  0.4969, -3.9424,  2.9032,  2.7343,
        -2.1159,  0.1765, -2.8478, -1.9866], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.3521, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2403, -2.7841, -2.7239, -3.0902, -2.1994, -2.5568, -0.4540, -0.9740,
         4.2756, -2.2552,  0.0097, -4.2735, -3.8928, -0.4713,  3.3977, -4.1782,
         1.2773, -2.3341, -2.1036, -0.1875], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3879, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -0.9978,   0.8601,  -2.0275,  -0.6551, -13.6402,   3.4452,  -0.2524,
         -2.5269,  -0.4162,  -1.4859,  -0.6738,   2.2064,   5.0610,  -2.6632,
          0.7607,  -6.1422,  -2.9498, -33.5338,  -3.3630,  -2.0894],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0542, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.3742,  -2.2483,   2.3471,   4.7922,  -3.7546,  -5.6687,  -9.6032,
        -17.8092,  -3.9731, -18.9582,  -5.0026,  -0.1272,  -0.8617,   5.7483,
         -4.2268,  -1.5571,  -1.4888,  -3.3268,  -0.1472,   3.9705],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.2135, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-17.9005,  -1.4870,  -8.1329,   3.4921, -18.0911,  -2.0306,  -6.2824,
        -17.1435,   1.4093,   3.1957,  -1.9283,  -4.6178,  -3.8944,  -2.5702,
         -3.5034,  -3.6855,   2.8906,  -2.8938,  -0.7295,   0.4328],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1735, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  4.1489,   0.8745,  -4.7332,  -1.1799,  -1.5963,  -1.1016,  -2.0586,
          1.6501,  -1.9627,  -2.0172,  -7.3455,  -9.1924,  -4.4361, -10.5407,
         -3.9595,  -1.7714,  -1.4741,   4.4036,  -6.2696,   0.5752],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.3993, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.5995, -2.1600, -3.5550, -0.8592, -4.0623, -1.4971,  1.8114,  3.4841,
        -2.8418, -3.1889, -0.1031, -6.9376,  3.3378,  0.6206, -2.9436, -1.0964,
        -0.8379, -2.2199,  3.7032,  4.3149], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6716, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.7047, -3.8241, -1.1604,  1.2677, -4.3458, -2.2674, -3.1490, -1.2137,
        -4.3801,  1.2171,  2.6340, -2.5481,  1.2131, -1.2195, -2.9958,  2.2062,
         1.5426, -3.7329,  0.7602, -9.7562], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.6228, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.6070,  -2.4829,  -1.6779,  -3.4165,  -3.3187,  -3.5408,  -5.0262,
         -4.5205,  -2.4898,  -1.6822,  -2.4029,  -5.2219,   2.6180,   1.9682,
        -10.7375,  -7.0197, -20.9866, -11.2149,  -3.5801, -10.5689],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.9454, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  3.4533,   4.3284,  -1.2823,   0.9430,  -2.6364,  -1.6089, -11.5787,
          2.9444,  -2.2362,  -1.3877,  -4.1976,  -1.8479,  -5.2762,  -0.2466,
          0.9149,  -1.6069,  -0.4760,  -0.5524,  -1.0241,   3.9766],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-0.9699, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -5.3956,  -6.3792, -17.0994,  -5.3736,  -6.8309,  -5.0170,  -4.5589,
         -3.7651,  -4.2654,  -6.2696,  -2.8854,  -3.8620,  -3.9991, -19.6492,
         -5.3101,  -4.3577,  -5.1480,  -5.7001,  -5.8839,  -5.5605],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.3655, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.2307,  -5.3381,   0.1267,  -1.9301,   1.3345,  -2.5024,   0.8962,
         -7.0265,  -8.4412,  -6.5421,   2.9217,   0.2071,  -2.6680,   0.9551,
         -1.4000,   0.7204, -16.4852,  -2.1925,  -3.7398,  -7.8849],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.0610, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.4648, -2.2006,  4.3993, -1.2247,  0.4379, -3.2337, -1.9844,  0.8326,
         3.6653, -1.5060, -1.9321, -0.5820, -2.0941,  3.0363, -0.3932, -3.0486,
         0.8189, -1.9856,  0.7874, -3.6286], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.5150, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -4.8709,  -1.7278,  -4.2327,  -2.7324, -16.7708,  -4.8986,  -5.6562,
         -4.3145,  -3.1042,  -1.0025,   0.2872,   1.8557,  -5.3419,  -0.7394,
         -2.2681,  -3.6026,   2.3030,   2.0948,  -2.0923,  -0.5469],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8681, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  0.5099,  -2.2165,  -0.8060,  -6.5402,   6.0299,  -2.7184,   0.1743,
         -3.5364,  -0.6829,  -6.1104,   0.9709,   2.0210,  -1.2850,  -0.4405,
         -0.3331, -17.8109,   2.2039,   3.9534,  -5.1734,  -3.4177],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.7604, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-91.4062,  -6.3251,  -1.8373,  -1.6986,  -8.3801,  -4.3168,  -2.6650,
         -6.0335,  -3.7937,  -0.9446,  -3.5805,  -0.5792,  -1.6720,  -1.7255,
          0.5596,   2.8470,  -3.7511,   1.1192,  -2.4170,  -0.6731],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.8637, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.5102,  0.7128, -5.3952, -5.6880, -3.7607, -4.0504,  0.4703,  0.9426,
         4.4960, -2.4253, -2.7090, -1.9433, -1.9019,  0.9382, -6.8229, -4.8080,
        -1.0418, -1.4415,  0.4434, -3.3186], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-2.0407, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.3461,  3.0624, -2.0837,  1.3976, -2.1561, -2.8999,  1.5753,  3.4465,
        -4.8145,  0.8768, -1.0987, -0.7908,  1.2803,  4.7967, -3.0741,  0.7926,
        -2.1632,  0.4079, -6.1028,  1.3869], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.4753, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.9155, -5.9569,  0.0670,  6.3207, -3.7121, -1.3439, -2.7295, -1.1141,
        -0.9761,  4.9855, -1.6484,  1.6863, -0.5739, -0.7517,  0.2165,  4.5965,
        -2.9512,  1.2338, -1.0593, -3.0861], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.3856, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.4278, -3.6151, -1.7006, -6.3277, -5.3795, -4.3969, -6.8922, -5.6056,
        -7.1674, -3.8224, -2.7891,  2.8798, -3.2465, -2.3440, -3.4958, -8.6375,
        -7.4074, -1.6244, -3.6564, -3.2663], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-4.0961, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-4.8299e-01, -3.6050e+00,  2.7562e+00,  5.1915e+00, -6.0224e+00,
        -1.4474e+00, -3.2988e+01, -7.1032e+00, -6.5485e+00, -3.3890e+00,
        -4.5587e-01,  6.0691e-01, -1.0331e-02, -4.7775e+00,  6.4719e-01,
        -1.4011e+00,  6.8793e-01, -1.7129e+00,  3.5426e+00, -2.9228e+00],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.9717, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.6229,  0.9705, -3.0571, -0.2374, -2.8002,  1.6460,  1.5825, -1.4466,
         0.7676, -1.2870, -0.1215,  0.5501,  5.5053, -7.3278,  0.6269, -2.7377,
        -1.7146, -3.1101,  2.6717,  2.8754], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.4633, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.2312,  2.4578,  3.1072, -1.6529,  0.5612, -5.4266, -2.4552, -4.3733,
        -0.5115, -2.4065, -4.2444, -1.8896, -1.0957, -4.7676,  2.6250, -4.4759,
        -5.7348, -0.0472, -3.1424, -0.2277], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7966, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 4.9986, -3.5526, -0.6314, -4.3065, -0.3344, -4.6820,  3.0622,  3.5281,
        -1.7852, -2.3253, -3.1213,  0.2818, -3.4306,  3.6941,  3.1949, -1.4590,
         0.5396, -1.2572, -6.9266, -4.0546], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9284, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.0550,   1.9726,   2.9076,  -2.2936,  -2.1205, -10.7625,  -5.6718,
         -4.3362,  -4.6274,  -0.5900,   1.0430,   4.1139,  -5.1875,  -3.1279,
         -2.1518,  -5.1679,  -4.5747,  -9.1975,  -4.9145,  -4.8849],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.3313, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  2.9982,   2.5865,  -3.5024,   1.5705,  -1.9179,   0.5663,  -3.5470,
          3.9833,   1.9837,  -5.3658,  -1.0936,  -3.3654,   0.9327,  -4.9118,
          2.7822,   3.4619,  -2.6608,  -0.0195,  -6.2844, -15.0308],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.3417, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.0914,  -0.6430,  -3.6125,   1.1311,   2.5451,  -1.5122,   0.6370,
         -2.8983,  -0.5867,  -4.5088,   1.8525,   2.8572,  -6.5348,  -0.4233,
        -18.1954,  -8.1128,  -5.2194,  -3.2377,  -0.4415, -12.6841],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-3.1339, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([  1.1378,   3.9554,  -4.7029,  -1.6869,  -1.4258,  -0.2690,  -1.1146,
          5.3744,  -7.0790,  -2.5228, -11.2388, -30.3436,  -8.7762,  -5.7993,
         -5.4656,  -5.3259,  -9.0912,   4.6854,  -3.4061,  -0.1479],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-4.1621, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.3519, -2.6961, -0.8846, -7.2854,  1.6980,  0.7335, -2.0757, -0.4139,
        -1.8214, -4.3569,  1.5388,  2.0358, -2.6219, -1.1911, -0.7887, -0.2155,
         1.6906,  5.4738, -3.0897, -0.2738], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.7096, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-0.8929,  3.6314, -4.0363, -2.8148, -2.8165, -0.3102, -5.0529,  2.0326,
         2.9783, -2.0719, -0.1996, -2.0695, -0.4412,  1.8122,  4.2634, -1.3410,
         1.3515, -0.8897, -0.8874,  1.8284], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.2963, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-2.3766, -6.5070,  1.6746,  2.8034, -2.7152,  1.7503, -2.7700, -0.5368,
        -2.2647,  4.7661, -3.0764, -3.9739, -3.1163, -4.9667, -4.7218,  0.2638,
         1.5279, -2.4412,  1.4407, -1.4954], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.3368, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 1.1162, -2.5291, -1.6423, -1.8348, -1.4583,  1.8876,  3.9392, -3.8554,
        -0.3894, -2.5342, -1.4886,  0.0635,  2.6792, -1.6824, -1.6641, -3.2191,
        -6.0548,  2.4789,  1.0306, -4.3977], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.9778, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.9289,  0.3696, -2.8846,  3.0232,  4.3401, -1.8834,  0.3212, -4.0695,
         0.8871, -4.8491,  3.4690,  2.3382, -5.9141,  0.5651, -2.1809,  0.2154,
        -3.7121,  4.5719, -1.7950, -1.9410], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-0.6529, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-1.3584, -3.5873,  3.9806, -4.5804, -0.8594, -1.5037, -5.1316,  0.9658,
         4.6677, -2.8502,  1.1361, -4.8697, -1.1929, -3.8031,  1.9385,  2.5806,
        -2.0561,  1.3347, -2.7397, -3.0696], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0499, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -3.3937, -10.6198,  -5.9768,  -5.8148,  -5.5026,  -2.8225,  -1.0373,
         -6.8653,   4.0640,  -3.0831,  -4.4107,   0.4171,  -2.9439,  -1.4699,
         -8.2896,   5.5133,  -1.3336,   0.1707,  -2.2247,  -2.2458],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.8934, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.5611, -2.1936, -2.3211,  1.4541, -0.0877, -3.9560, -1.1264, -2.9095,
        -2.2952, -4.9219,  3.4546,  1.6420, -2.4069,  0.2176, -1.7491, -3.4139,
         2.7424,  2.1764, -4.1904, -3.5512], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.1437, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ 0.6652,  2.4553, -3.9850,  0.0822, -1.9601, -2.5880, -3.5331,  2.5879,
         3.3480, -2.5851, -2.4027, -1.6452, -0.3346, -0.4073, -1.6773, -4.1545,
        -2.7654, -1.9865, -2.1597,  1.3657], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.0840, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.3377,  -8.4546, -12.3334,  -0.1209,  -1.2329,   1.5896,   2.5642,
         -1.7169,  -0.8948,  -4.0213,  -1.2290,  -2.6536,   3.8009,  -1.8851,
          0.4299,  -1.7377,  -0.8928,  -3.6450,   3.9835,  -0.3259],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.5057, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.3218,  -3.6829,  -6.1508,  -2.6746,  -3.2036,  -4.3008,  -5.9306,
         -1.6034,  -3.6961,  -5.6878,  -0.5495,  -1.8342, -10.7673,  -4.9731,
        -12.6104,  -6.0618, -24.2748,  -5.5288,  -6.2082, -11.9522],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-6.1506, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.0260,  -2.5957,   0.7040,   4.3138,  -0.9822,  -0.2807,  -0.6542,
         -3.4842,   2.6019,   5.1310,  -3.6352,  -9.7818, -10.0276,  -6.4495,
         -2.2484,  -4.8608,   0.2332,   0.2197,   0.0148,  -4.0707],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.8939, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-3.6434,  0.6785, -2.8511, -0.1385, -1.3660,  4.6311, -2.0777, -0.2072,
        -9.8839, -2.7397, -7.1748, -0.6603, -0.3126,  5.8693, -6.7945,  0.0388,
        -2.8531, -1.8365, -3.2703, -0.3644], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-1.7478, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -1.7556,   0.9492,   3.4919,  -2.6295,   0.6282,  -4.8311, -16.9759,
         -2.5549,  -4.1293,  -2.1811,   0.6032,   3.0737,  -4.4049,   0.0510,
         -3.6313,   0.9930,  -4.1266,   3.7544,   0.4519,  -5.7993],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-1.9511, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -2.4406,   2.8827,  -1.9728,   0.0446,  -2.4280,  -1.4403,  -4.1434,
        -17.6732,  -2.0628,   0.0540,  -3.3239,  -0.8369,  -4.7576,   3.7969,
          2.8068,  -2.7513,   1.2485,  -1.5922,  -0.7015,  -4.8259],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-2.0058, device='cuda:0', grad_fn=<MeanBackward0>)
