tensor([-8.0271, -7.8931, -7.2244, -7.6460, -8.1173, -7.3953, -7.8808, -7.9231,
        -7.7705, -8.6433, -7.7838, -7.4392, -8.1601, -8.3562, -8.3326, -7.2677,
        -7.8028, -6.4608, -8.2641, -8.7752], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.8582, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.6525, -7.1186, -7.3082, -7.4223, -8.0713, -8.1492, -7.1412, -7.6958,
        -6.9042, -7.9215, -8.3400, -8.4171, -8.2512, -6.9690, -8.0442, -7.3607,
        -7.3280, -8.2662, -8.4396, -8.0582], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7430, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.6370, -8.5752, -9.5999, -8.8993, -7.5096, -8.5501, -6.9385, -8.7070,
        -9.1812, -7.6161, -7.5722, -6.4020, -7.0818, -6.2148, -6.9508, -6.9197,
        -8.3741, -8.9220, -9.2338, -8.9729], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-8.0429, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.3442, -9.1962, -7.9066, -8.2738, -7.8369, -6.3153, -8.5165, -9.0263,
        -7.6280, -7.5531, -7.7318, -6.8914, -6.9914, -7.7369, -7.9541, -8.0614,
        -7.8162, -8.0416, -7.7662, -8.5670], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.9077, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.2257, -8.8942, -7.6096, -7.4320, -7.7631, -7.8397, -7.2945, -7.7925,
        -7.9251, -8.2876, -7.8882, -7.8918, -8.0064, -7.3169, -8.2294, -8.1483,
        -8.2832, -7.0290, -7.8016, -6.8007], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.8230, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1797, -7.7728, -8.5957, -7.9220, -7.2570, -7.9757, -6.4776, -8.6416,
        -7.4435, -7.3349, -7.0983, -8.1358, -7.1753, -7.0076, -7.1161, -9.1351,
        -8.3720, -8.7116, -7.9951, -7.1061], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7227, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7654, -8.9306, -7.1020, -8.0325, -7.7662, -8.2970, -8.4786, -8.3716,
        -8.5182, -8.8492, -7.8879, -7.1328, -7.7184, -6.8147, -8.2925, -8.1162,
        -8.0337, -7.8148, -8.3477, -6.9913], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.9631, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.6453, -7.9367, -7.3601, -9.2405, -7.4898, -6.9737, -7.5281, -8.9873,
        -8.3545, -8.9530, -8.0884, -7.1463, -7.6115, -7.1818, -7.6897, -8.0641,
        -7.9275, -6.8228, -7.2970, -6.6077], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7953, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.8716, -7.5888, -8.1276, -8.6525, -7.7068, -7.2248, -7.8202, -6.8479,
        -8.4515, -8.3747, -7.7013, -7.4027, -7.6665, -8.1154, -7.4386, -7.8232,
        -7.8467, -7.9654, -8.3435, -7.9864], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.8478, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1717, -8.0902, -8.6185, -7.7876, -7.8005, -7.8358, -7.7988, -7.4525,
        -8.0923, -8.2565, -7.9759, -7.5908, -7.9072, -6.4305, -8.4231, -9.1832,
        -7.7269, -7.4066, -7.0960, -7.2612], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7953, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2694, -7.4193, -7.3132, -7.1117, -8.0392, -8.4182, -9.5924, -9.5516,
        -6.9819, -8.3491, -7.4898, -8.3155, -8.3878, -7.7339, -8.5179, -6.7926,
        -7.9655, -6.6705, -7.3548, -8.0248], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.8650, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.9833, -7.6472, -9.6616, -9.1598, -7.5477, -8.5663, -7.3186, -8.4686,
        -8.5800, -7.5529, -7.4933, -6.9073, -8.8123, -7.4464, -8.1383, -8.5489,
        -8.4776, -7.8718, -7.6527, -8.2862], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-8.1560, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.8925, -7.0926, -8.5785, -8.1128, -8.1447, -8.6711, -8.0289, -6.9833,
        -7.6389, -7.8565, -7.2324, -7.4701, -7.7922, -8.2939, -8.5035, -7.6640,
        -8.8717, -9.0280, -8.2717, -7.3022], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-8.0215, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1539, -8.5237, -7.7725, -8.0079, -8.4535, -7.6894, -7.1431, -7.3929,
        -7.6993, -6.9117, -6.4075, -6.7447, -8.7581, -9.4424, -8.1672, -7.2713,
        -8.3885, -7.0907, -7.5047, -8.1273], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7325, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1584, -7.7651, -7.3736, -7.2995, -8.2646, -8.3163, -8.1436, -7.0586,
        -7.8632, -6.8992, -7.4957, -8.2095, -8.2342, -8.3911, -7.0302, -7.9039,
        -6.6051, -8.3105, -8.6633, -7.9759], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7481, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.8347, -7.1944, -7.4089, -7.8734, -8.4901, -7.7358, -7.4760, -7.6802,
        -6.5929, -8.4565, -9.1889, -7.7408, -7.1997, -7.1392, -7.3173, -7.1995,
        -7.5417, -8.2517, -8.0485, -8.3031], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7337, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7051, -7.1929, -7.8284, -6.4266, -8.1545, -8.4936, -8.1618, -7.0757,
        -7.3739, -6.7856, -6.7905, -7.0377, -7.9016, -8.5955, -8.1702, -7.4700,
        -7.7643, -6.6004, -8.4014, -8.7456], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.6338, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1178, -7.4636, -7.7940, -7.1932, -7.6811, -7.4259, -7.2280, -7.3726,
        -7.3087, -7.3049, -7.7903, -7.8732, -9.0528, -8.8758, -7.2948, -8.4186,
        -7.5245, -8.2202, -7.9390, -7.3393], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7109, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3688, -7.7720, -7.7323, -7.2366, -8.3823, -8.8870, -9.1104, -9.6471,
        -7.1567, -8.1938, -7.0475, -8.1063, -8.3640, -8.5263, -7.6552, -7.1645,
        -7.0086, -6.4949, -8.1173, -8.8345], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.9403, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.2589, -8.4586, -8.0579, -7.1217, -7.8100, -6.4856, -8.0441, -8.3657,
        -7.5693, -7.3823, -7.6669, -6.9043, -8.3669, -8.4019, -7.7205, -7.3282,
        -7.7498, -6.9282, -7.9388, -8.4249], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7492, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4111, -7.9605, -7.6675, -7.7867, -8.1786, -8.6499, -7.0491, -7.7829,
        -7.9570, -7.5917, -8.2367, -7.6748, -8.2330, -8.9981, -7.5542, -7.2491,
        -7.7111, -7.8445, -7.3755, -6.7702], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7841, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.2300,  -7.6471,  -7.2473,  -8.2578,  -9.0679,  -7.4308,  -7.5545,
         -6.9566,  -8.2822,  -7.2110,  -6.6857,  -8.3122, -10.0901,  -9.7432,
         -9.0272,  -7.3977,  -8.6843,  -7.6840,  -8.6570,  -8.7420],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.0954, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.6521, -7.3116, -7.1450, -7.8651, -8.4622, -8.6350, -7.8192, -8.6294,
        -8.1746, -8.5867, -8.2675, -7.1892, -7.6862, -7.0805, -7.7062, -8.3057,
        -7.6126, -7.1202, -8.1768, -6.4247], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7925, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1619, -6.1835, -5.9425, -7.5258, -8.4617, -8.4699, -7.3639, -6.9038,
        -7.8905, -7.1529, -7.5759, -6.7187, -7.8669, -8.6057, -9.2161, -8.1532,
        -7.2685, -8.0556, -7.7072, -7.1282], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.5176, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5173, -7.7072, -7.4005, -8.1528, -8.6159, -7.8667, -7.2686, -7.8077,
        -7.1265, -8.0464, -8.3320, -7.9441, -7.3591, -8.3648, -6.6887, -8.3459,
        -8.1335, -7.6241, -7.3759, -9.4872], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.8582, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5903, -8.5876, -7.7566, -7.8121, -8.1379, -8.7766, -7.7307, -7.3182,
        -7.9041, -7.3241, -7.6838, -8.3224, -8.0447, -8.0530, -7.1611, -7.8866,
        -6.6947, -7.3182, -7.9233, -7.8571], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7942, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.4141, -7.2303, -8.3521, -6.8215, -8.5535, -8.0965, -7.2536, -7.3740,
        -9.6136, -7.0746, -7.2281, -7.0038, -7.5368, -7.8304, -8.3884, -8.6826,
        -8.0603, -7.2895, -8.1767, -7.3791], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.8680, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2013, -7.4368, -7.3573, -8.0209, -8.0480, -8.2488, -8.3815, -8.8299,
        -8.8111, -8.9345, -7.7192, -7.3992, -6.9358, -7.5598, -7.2267, -7.5377,
        -8.0233, -7.9815, -8.7578, -7.8277], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.9119, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.7395, -7.7263, -7.0784, -7.7544, -7.1242, -7.6156, -8.2835, -8.3623,
        -8.1523, -7.1219, -7.8027, -6.6182, -8.4164, -9.0240, -8.0499, -7.4558,
        -7.4946, -6.8003, -7.2050, -8.8302], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7828, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.6106, -7.7784, -8.9060, -8.2331, -9.0953, -8.1323, -7.2231, -7.8397,
        -7.0949, -6.8215, -7.6066, -7.9771, -8.2087, -8.1397, -7.9604, -7.7057,
        -8.6213, -7.9064, -7.5274, -7.3187], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.9353, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7739, -7.3823, -7.7938, -7.2195, -8.5167, -8.8863, -7.6008, -7.7284,
        -6.8908, -7.3668, -7.8120, -8.2543, -7.9050, -7.0817, -7.6607, -7.4022,
        -7.2353, -7.9409, -8.3435, -7.8279], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7311, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1327, -8.1565, -6.4697, -7.9519, -8.0054, -7.9204, -6.9231, -7.8338,
        -7.5522, -7.7181, -8.1249, -8.0695, -7.5685, -7.7685, -8.4533, -7.9018,
        -8.6312, -7.8000, -7.1522, -7.6772], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7405, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5664, -7.1673, -7.2869, -7.8713, -8.2848, -7.9005, -7.1971, -7.8307,
        -7.5078, -7.3921, -8.2064, -8.0250, -7.6459, -7.0355, -7.8119, -6.8165,
        -8.1875, -8.4130, -7.6752, -7.2340], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.6528, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1063, -8.0894, -7.0341, -7.2704, -8.0026, -8.6585, -7.8835, -7.1903,
        -7.8822, -7.1988, -7.1451, -8.0347, -7.6468, -7.0101, -7.9343, -7.2263,
        -6.9921, -7.9567, -8.2390, -8.1030], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.6302, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.7398, -7.9637, -7.7290, -8.0537, -8.7940, -7.9200, -8.8261, -8.4865,
        -8.7298, -7.6964, -7.0603, -7.6105, -6.9883, -7.5542, -8.0995, -8.1091,
        -7.8703, -7.6246, -8.9758, -7.5360], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-8.0184, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.6646,  -7.3875,  -7.6468,  -8.4329,  -7.5937,  -8.4700,  -9.0014,
         -8.2477, -10.0255,  -8.2345,  -7.0914,  -7.6781,  -6.4040,  -8.4857,
         -8.9766,  -7.7615,  -7.5490,  -6.8927,  -7.1362,  -7.2885],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.8984, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.2408, -8.2219, -8.0599, -8.8051, -9.1077, -7.6591, -7.5017, -6.9389,
        -8.3737, -7.1760, -6.8409, -8.0794, -9.7651, -9.1462, -8.8040, -7.3462,
        -8.2893, -7.0711, -8.0973, -8.0620], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-8.0793, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1580, -7.1650, -7.8695, -8.1014, -7.8857, -7.6395, -7.8322, -7.2270,
        -7.8425, -8.6530, -7.9141, -7.3817, -7.9144, -8.0598, -7.4952, -8.3151,
        -8.1545, -8.0792, -8.3679, -7.8271], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.8441, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.3520, -8.5563, -7.6336, -7.2769, -7.6881, -6.9129, -8.2276, -8.6528,
        -7.6380, -7.1118, -7.6088, -7.0398, -6.9648, -7.6128, -8.0149, -8.1228,
        -8.6088, -7.8224, -8.8952, -8.3433], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.8542, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5343, -7.9788, -8.4008, -8.6512, -7.7503, -8.9910, -7.7249, -8.6086,
        -7.9886, -7.1948, -7.8071, -7.8872, -7.8599, -8.2876, -7.7389, -6.8534,
        -8.0254, -7.1711, -7.3016, -7.8308], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.8793, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5936, -7.2658, -7.2204, -6.8029, -7.7071, -8.2477, -9.0980, -9.0116,
        -8.7829, -8.8427, -7.9223, -7.2599, -8.0255, -7.7846, -7.4990, -8.0897,
        -7.6409, -7.2281, -8.2403, -6.6147], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.8439, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.8777, -8.1014, -7.9370, -6.9665, -7.9307, -6.3916, -8.6777, -8.8610,
        -7.6833, -7.7080, -6.9381, -7.3521, -7.0857, -6.5721, -7.8196, -9.5048,
        -9.9272, -8.5812, -7.1335, -8.3342], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.9192, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0793, -7.4102, -7.8820, -8.2075, -8.2203, -8.0236, -7.1364, -7.5684,
        -7.7960, -7.8011, -7.4122, -7.8612, -7.1595, -8.3056, -8.2378, -7.7428,
        -7.5189, -6.9748, -8.3402, -7.4660], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7072, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.9851, -8.7410, -8.9232, -8.8051, -7.4755, -7.5009, -7.1406, -8.6284,
        -7.7003, -7.1505, -8.2466, -9.1300, -8.8013, -8.7339, -7.9140, -7.0780,
        -7.9893, -6.9663, -8.2589, -8.6636], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-8.0916, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.6933, -7.9524, -8.1473, -8.6771, -8.7164, -7.1664, -8.4679, -8.1621,
        -8.1722, -7.0433, -7.8497, -6.6773, -8.1860, -8.3660, -7.6809, -6.9801,
        -7.9976, -7.2194, -7.0392, -7.9915], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.8093, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.4097, -8.0066, -7.2746, -7.9140, -7.4149, -7.3686, -8.2128, -8.2963,
        -8.0143, -7.0154, -7.7681, -6.6934, -8.1779, -8.4144, -7.4587, -7.5646,
        -7.9175, -8.1507, -7.9247, -8.6227], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.8310, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.9261, -7.5931, -7.2049, -7.6500, -7.9055, -6.9680, -7.8683, -8.2063,
        -7.8648, -7.9094, -7.5829, -7.6994, -8.1589, -8.4671, -8.0652, -7.1655,
        -7.7173, -8.2038, -7.5584, -7.9827], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7849, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.1035, -9.5722, -9.2313, -8.4993, -6.7328, -8.5415, -9.0198, -7.1190,
        -7.0137, -7.7371, -8.2452, -8.7956, -7.9220, -6.2750, -7.3404, -6.7332,
        -7.9133, -7.9660, -7.2437, -7.1486], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.8577, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.1067, -8.5323, -8.2196, -7.2662, -7.9498, -6.5692, -8.1804, -8.3671,
        -7.7557, -6.8093, -7.6410, -7.4329, -7.4891, -8.2333, -8.5223, -8.1228,
        -7.0449, -7.7760, -6.8042, -7.2795], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7051, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.0814, -9.0253, -7.7495, -7.3750, -6.9245, -7.6976, -7.3435, -7.8481,
        -8.0683, -8.1290, -8.6102, -7.8727, -8.6121, -7.6258, -8.1998, -7.8502,
        -7.0797, -7.6358, -7.4685, -8.0065], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.8602, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1968, -7.8730, -7.7172, -7.2764, -8.0974, -7.9228, -6.9888, -7.8335,
        -7.0695, -7.5149, -8.0675, -8.0677, -7.4278, -7.8744, -8.3578, -7.1027,
        -7.9394, -8.8019, -7.8149, -8.4524], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7698, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5893, -8.0646, -7.9478, -8.7402, -7.8367, -8.7218, -7.4090, -8.0741,
        -7.9881, -7.1729, -7.5361, -7.2909, -7.5872, -8.2682, -7.6409, -7.1610,
        -8.2851, -6.9737, -7.5966, -8.1977], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.8041, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7647, -6.4999, -7.7668, -7.9915, -8.4992, -7.8335, -7.1730, -7.9008,
        -6.7050, -8.5118, -9.0811, -7.7732, -7.2458, -7.6251, -7.0869, -7.1750,
        -7.5158, -8.1067, -7.8632, -8.3401], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7230, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5956, -7.1803, -7.2368, -7.7803, -8.0513, -7.9474, -7.3893, -8.0280,
        -6.7553, -8.5021, -8.8800, -7.6266, -7.5733, -7.0205, -7.3799, -7.3753,
        -7.8691, -7.7621, -7.0691, -7.8584], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.6441, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9760, -8.1088, -7.2676, -7.0579, -7.3795, -8.5440, -8.0558, -9.0580,
        -7.8768, -7.0448, -7.9706, -6.6972, -7.9769, -7.7957, -7.7596, -7.1523,
        -8.2627, -7.3229, -7.5896, -8.1253], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7011, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1017, -8.0165, -6.5449, -8.6892, -9.0264, -7.6527, -7.7321, -6.9813,
        -8.1520, -7.3110, -7.2780, -8.1526, -9.1339, -7.9440, -6.7199, -8.0598,
        -6.6396, -6.5777, -7.3894, -7.9699], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.6536, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.9602, -7.7926, -7.7634, -7.9245, -8.4714, -8.0740, -7.0370, -7.7904,
        -8.0115, -7.3930, -7.9339, -8.0814, -8.0019, -7.9466, -7.9609, -7.4398,
        -7.7897, -8.5193, -7.9374, -7.0548], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.8442, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9433, -8.0515, -8.4074, -7.3857, -8.3812, -7.6570, -7.0717, -8.1475,
        -6.9095, -8.3907, -9.0836, -7.6325, -7.6199, -7.8784, -7.1161, -8.1828,
        -8.4971, -7.7781, -7.3753, -7.6337], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.8071, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.1659, -6.8947, -7.8329, -7.3885, -8.5140, -7.8941, -6.9862, -7.1934,
        -7.7033, -6.2547, -7.4400, -7.4358, -8.4251, -7.3455, -7.1788, -6.9578,
        -6.3763, -6.6560, -7.3700, -7.1876], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2600, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2620, -8.1951, -6.5066, -8.0831, -8.3397, -8.6527, -7.6185, -7.1782,
        -8.1121, -6.8853, -8.2820, -8.5903, -7.7752, -7.5036, -7.8509, -6.6658,
        -8.2191, -8.3313, -8.5154, -7.9087], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.8238, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4412, -7.5942, -6.8298, -6.9234, -7.6490, -8.1587, -8.0550, -7.3940,
        -7.7900, -7.6960, -7.9929, -8.4587, -8.2307, -7.0134, -7.7665, -8.4676,
        -7.2897, -8.0317, -7.6297, -8.3477], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7380, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3374, -7.4861, -7.8821, -8.2444, -8.2391, -8.1273, -7.7306, -8.6209,
        -8.9875, -7.7940, -7.1710, -6.9776, -8.0786, -7.1896, -6.6920, -7.6465,
        -9.2876, -9.1914, -8.7387, -7.1734], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.9298, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.5448, -9.2987, -7.3955, -8.4598, -7.0325, -8.2312, -8.5633, -7.6386,
        -7.2984, -7.5505, -8.3446, -7.5586, -6.8292, -7.1444, -8.3519, -8.1800,
        -9.4516, -8.9786, -7.3516, -8.3677], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-8.0786, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.3125, -8.6650, -7.7129, -7.7767, -8.3908, -7.4300, -8.3469, -8.1145,
        -8.2301, -7.0753, -7.8245, -6.5801, -8.0472, -8.3088, -7.6722, -7.1486,
        -7.4637, -7.5252, -7.0172, -6.9919], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7317, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.3893, -8.4551, -8.1868, -7.0134, -7.8338, -6.2782, -8.7284, -8.8730,
        -7.6423, -7.4287, -7.5518, -6.6035, -6.9823, -7.5106, -8.4011, -8.0071,
        -7.6925, -8.4306, -7.8866, -7.1332], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7514, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.5511, -7.1551, -8.4500, -8.5305, -6.8439, -8.3492, -9.1931, -8.1321,
        -7.2667, -7.8306, -6.3161, -8.1535, -8.6129, -7.7962, -7.3078, -7.5156,
        -8.3762, -7.7095, -6.8922, -8.0529], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.8518, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4690, -8.2592, -8.4945, -8.0926, -7.2328, -7.8318, -6.5046, -8.1305,
        -8.6701, -7.6366, -7.3773, -7.7580, -7.4276, -7.2380, -7.7473, -7.7072,
        -8.0062, -8.4019, -7.8619, -7.5624], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7705, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.1470, -8.5557, -7.9892, -7.1725, -7.8480, -7.2874, -7.2472, -7.8952,
        -8.2173, -8.0709, -7.8006, -7.5843, -7.4882, -7.7200, -8.5620, -7.8845,
        -7.2875, -7.7976, -7.5552, -7.6426], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7877, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2927, -7.7807, -6.8218, -7.6525, -8.1479, -8.3406, -8.2381, -7.1291,
        -7.9746, -6.5943, -8.7448, -8.7325, -7.6076, -7.6361, -6.9172, -7.5963,
        -7.3863, -7.7303, -8.2762, -9.1463], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7873, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7778, -7.7130, -8.7208, -7.0094, -7.7562, -8.2961, -8.3941, -7.2535,
        -7.8305, -9.1361, -7.0374, -6.9168, -7.7924, -8.6500, -7.9960, -9.0758,
        -8.6025, -7.4321, -8.5651, -6.9894], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.9472, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.6014, -7.7054, -8.9177, -7.6286, -6.6825, -7.4215, -8.4496, -8.7587,
        -9.8763, -8.7262, -7.1993, -8.8147, -8.1930, -7.9264, -8.3499, -8.6515,
        -7.5308, -7.1427, -7.4389, -6.6338], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.9824, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2372, -7.5563, -6.7295, -7.9306, -8.0999, -8.5810, -8.0214, -6.8066,
        -7.7847, -7.2294, -7.7590, -8.3607, -7.8404, -7.0239, -7.5298, -7.7763,
        -7.2575, -8.0118, -8.7588, -9.4037], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7849, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4287, -7.8874, -6.8371, -7.2039, -7.7480, -8.3806, -7.8178, -8.3829,
        -7.9177, -7.6208, -8.3222, -8.8499, -7.4265, -7.3920, -7.9009, -7.0510,
        -8.1788, -8.3199, -7.6457, -7.2475], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7780, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2287, -8.1331, -8.3332, -7.9494, -7.5510, -7.8915, -6.7676, -8.0109,
        -8.4205, -7.6157, -7.0881, -7.6856, -7.1523, -7.1324, -7.8493, -8.1139,
        -8.0045, -8.0613, -7.9322, -8.3152], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7618, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3440, -7.8710, -8.0893, -7.8598, -6.9579, -7.5782, -7.2592, -8.3247,
        -8.5791, -7.6786, -7.4454, -7.8718, -8.2147, -7.7304, -7.4322, -7.6834,
        -8.1604, -8.6814, -7.6317, -8.7346], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.8564, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -6.9190,  -8.2452,  -9.1320,  -7.7817,  -7.6376,  -7.0188,  -7.1813,
         -7.2226,  -6.5307,  -8.0731, -10.0010,  -9.4948,  -9.2669,  -6.9913,
         -7.5455,  -8.2378,  -8.0199,  -8.4331,  -8.0959,  -7.3616],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-7.9595, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.3787, -7.8275, -8.2087, -7.8762, -7.2813, -7.5062, -7.2760, -8.2617,
        -8.5616, -7.6687, -7.1509, -7.5873, -6.7278, -8.0493, -8.3488, -7.6267,
        -7.2134, -7.5222, -7.0396, -7.6443], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.6878, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.1649, -7.7959, -7.3006, -7.9899, -7.6537, -7.5436, -8.2041, -7.9988,
        -6.9617, -7.6620, -7.9135, -7.1346, -7.7620, -7.8673, -7.8929, -7.1693,
        -7.9874, -7.1237, -8.0029, -8.4720], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7300, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1165, -7.8736, -7.3769, -7.3873, -8.0520, -7.8234, -7.0465, -8.4705,
        -6.3913, -8.2421, -8.7752, -7.7352, -7.4988, -7.6488, -7.1261, -7.8482,
        -8.0730, -8.3365, -7.1331, -7.1659], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.6560, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5366, -8.0398, -8.7617, -7.9192, -7.0754, -7.5036, -7.1619, -7.4412,
        -7.7497, -8.2482, -8.0183, -7.2574, -7.7360, -7.5178, -7.6376, -8.4197,
        -7.9735, -6.9343, -7.7311, -7.5105], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7087, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.8558, -8.0720, -7.9692, -7.1590, -7.7987, -6.9161, -8.2513, -8.7057,
        -7.8173, -7.1021, -7.6065, -7.4669, -7.2457, -6.8083, -8.2119, -9.8547,
        -9.7770, -9.0675, -7.0538, -8.3723], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.9556, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8678, -7.8708, -6.6614, -8.2661, -8.3277, -8.4710, -7.7914, -7.2316,
        -7.7883, -6.7132, -7.8830, -8.0439, -8.4734, -8.0422, -6.9739, -7.8785,
        -7.3489, -7.4950, -8.3624, -8.4879], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7489, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0617, -6.3682, -7.8702, -9.6808, -9.9404, -8.4828, -7.2097, -8.6895,
        -7.6187, -8.5345, -8.9772, -7.7217, -7.3425, -7.0339, -7.8500, -7.0226,
        -6.4530, -7.2909, -9.1471, -9.6711], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.9983, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([ -7.4713,  -7.2132,  -6.7572,  -8.8700, -10.6480,  -9.9885,  -9.6559,
         -7.3079,  -8.2298,  -8.1063,  -8.0671,  -8.8750,  -8.0166,  -7.4417,
         -7.8844,  -7.2775,  -7.6849,  -8.0732,  -7.9000,  -7.0443],
       device='cuda:0', grad_fn=<SumBackward1>) tensor(-8.1256, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.0719, -8.2295, -8.2244, -7.1851, -7.9368, -7.5373, -7.3411, -8.0124,
        -8.3118, -7.9425, -7.4625, -7.7573, -7.2032, -7.4915, -8.2524, -8.0090,
        -7.1670, -8.4442, -6.9575, -7.3909], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7464, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2675, -8.0480, -7.6747, -6.9187, -7.6108, -6.9824, -8.3283, -8.8027,
        -7.7004, -7.4560, -7.6821, -7.2433, -7.4919, -8.0642, -8.2608, -8.1052,
        -7.4464, -7.8215, -6.8777, -6.7713], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.6277, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0904, -7.3722, -8.1440, -7.2627, -8.1969, -8.4913, -8.1461, -7.7178,
        -7.8742, -6.5426, -7.4329, -7.9530, -8.3628, -7.9720, -7.3799, -7.8200,
        -6.4922, -8.5179, -9.2310, -7.6818], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7841, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5088, -7.5545, -8.1645, -7.6048, -6.9842, -7.7119, -7.3981, -7.3010,
        -8.3400, -8.0790, -8.2252, -6.9565, -7.9737, -6.5132, -9.0741, -9.2865,
        -7.5032, -7.6039, -6.9973, -6.6128], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.6697, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.8100, -7.1445, -8.0425, -6.4166, -8.0123, -8.5385, -8.4486, -7.9001,
        -7.1545, -7.8144, -6.7616, -7.5035, -7.8934, -8.0130, -7.7710, -9.2451,
        -7.3669, -8.1632, -9.0517, -7.7724], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.8412, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.1874, -8.3078, -7.1880, -8.3477, -7.7193, -7.1907, -7.3674, -7.9135,
        -8.9245, -9.1066, -7.4917, -7.8304, -6.9959, -6.7890, -7.3981, -7.9569,
        -8.1959, -7.8113, -7.3875, -7.9450], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.8527, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.6705, -6.7452, -7.5242, -7.7902, -7.0208, -7.5744, -8.6394, -9.1185,
        -8.5792, -7.1664, -8.4985, -7.0361, -6.9166, -8.0859, -8.5043, -7.5919,
        -6.9680, -8.4198, -7.1117, -6.9111], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.6936, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.6077, -7.8925, -7.2604, -7.8603, -8.0877, -7.8866, -8.2591, -7.7837,
        -8.0610, -7.0642, -7.9294, -8.1713, -8.1070, -7.2539, -7.5997, -6.8593,
        -8.0636, -8.2592, -7.8180, -6.8519], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7338, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0438, -7.7714, -8.4118, -7.7064, -8.1940, -7.8316, -7.7465, -8.4255,
        -8.7144, -7.6127, -7.4668, -7.9701, -7.6540, -7.4850, -8.1388, -8.2732,
        -8.1043, -7.2769, -7.7428, -6.3205], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7945, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.0018, -7.8970, -7.0590, -7.9440, -8.1339, -7.0627, -7.5405, -8.2541,
        -8.2603, -7.8151, -9.0685, -7.3196, -8.5998, -9.1038, -8.1550, -7.4537,
        -7.7506, -6.2171, -7.5822, -7.9264], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.8572, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.8337, -8.7423, -9.4351, -7.0938, -7.7607, -6.4346, -7.3426, -6.9501,
        -7.6993, -8.2083, -6.7810, -7.9632, -7.8271, -8.5163, -9.2150, -7.5606,
        -7.3506, -6.9200, -7.5688, -6.9318], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7567, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5274, -6.6393, -7.3747, -7.8151, -8.6773, -7.7872, -7.6380, -8.3113,
        -6.9743, -7.8646, -8.6405, -7.6638, -7.4469, -7.1162, -6.7168, -6.6415,
        -7.4216, -8.1417, -8.5788, -7.8438], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.6410, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-9.1068, -8.9741, -9.3930, -7.2559, -7.4075, -7.4205, -7.6639, -8.3359,
        -8.2559, -7.6379, -7.1467, -7.7577, -7.1470, -7.8031, -8.1031, -7.5983,
        -7.0486, -7.8164, -6.6639, -8.4576], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.8497, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1578, -7.5943, -7.1522, -7.2100, -7.8551, -8.2406, -7.8528, -8.0116,
        -7.9639, -6.6457, -8.1353, -8.4552, -7.7126, -7.2467, -8.1461, -6.8304,
        -8.4581, -8.7127, -7.5422, -7.3723], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7148, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.4445, -8.4269, -9.1038, -7.5432, -7.5203, -7.0313, -7.2080, -7.3537,
        -7.8187, -7.7892, -7.2291, -8.2317, -7.3618, -7.0311, -7.9591, -8.0372,
        -7.2350, -7.6565, -7.6522, -8.2261], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7430, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.9778, -6.9928, -7.1107, -7.9880, -8.5628, -8.3316, -7.3634, -7.6817,
        -7.4532, -7.3602, -8.3087, -8.2470, -8.3252, -7.0537, -7.8823, -6.6398,
        -7.3581, -8.0032, -8.5346, -7.9306], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7553, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.6379, -8.1068, -7.3080, -8.2498, -8.3071, -8.4806, -7.7279, -7.6683,
        -6.5495, -8.2645, -9.0032, -7.7633, -7.3859, -7.6174, -6.5279, -7.6824,
        -7.8608, -8.4021, -7.4855, -7.6217], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7825, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7329, -7.3185, -7.2317, -7.9377, -8.4857, -7.9594, -7.6308, -7.8464,
        -6.5787, -7.8990, -8.2031, -8.1074, -7.7560, -7.0700, -7.8985, -6.7684,
        -8.1061, -8.2108, -7.7364, -7.1064], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.6792, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5562, -7.7417, -7.2630, -7.6652, -7.7311, -7.5911, -7.3118, -8.2241,
        -7.9383, -7.8217, -7.9063, -7.4055, -7.8781, -8.0572, -7.1051, -7.5858,
        -6.8498, -7.2473, -7.6466, -7.5264], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.6026, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.8635, -8.4958, -7.4379, -7.9846, -7.8223, -8.0027, -8.6819, -7.7238,
        -8.4607, -7.6825, -8.2057, -8.1499, -7.0320, -7.7138, -7.5365, -7.5245,
        -8.2090, -8.3643, -8.3025, -7.0380], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.9116, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5532, -7.6056, -6.9061, -7.7243, -7.8798, -8.2387, -8.2943, -8.8636,
        -7.9628, -8.5171, -8.0025, -6.8959, -7.7930, -6.8985, -7.9247, -8.2932,
        -8.2423, -7.7303, -7.0583, -7.9452], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.8165, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0207, -8.0063, -8.8504, -7.7258, -7.2475, -7.7972, -6.8007, -8.0362,
        -8.3751, -7.8226, -7.3016, -7.6549, -6.7936, -7.3713, -7.7368, -8.3349,
        -8.0418, -7.7788, -7.7442, -8.4232], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7432, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.0108, -8.5151, -7.6735, -7.9023, -6.8394, -7.9778, -6.6744, -7.9473,
        -8.1968, -7.9456, -7.2067, -8.1503, -6.7723, -8.5095, -8.2090, -7.5370,
        -7.2976, -7.5694, -7.8418, -7.5041], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7140, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7895, -7.8639, -9.0462, -9.4089, -8.3589, -7.1453, -8.5254, -7.1107,
        -8.6401, -8.5701, -7.4708, -7.4370, -7.4801, -7.7495, -7.5486, -7.9089,
        -7.9465, -7.7896, -7.1936, -7.7927], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.8888, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4039, -6.8697, -7.1095, -7.4248, -7.1032, -6.7656, -8.1030, -8.1319,
        -7.5860, -7.5449, -7.7426, -7.8263, -8.0002, -7.9280, -6.6265, -7.1409,
        -7.5816, -7.8403, -7.8718, -8.0289], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.5315, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9565, -6.7405, -6.5130, -8.6012, -7.7885, -8.7736, -8.9968, -7.3617,
        -8.0965, -6.7200, -8.2647, -8.2265, -7.8415, -7.0618, -7.6353, -8.6765,
        -7.8607, -7.7685, -7.8761, -8.0596], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7910, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4901, -7.3204, -9.0994, -9.4024, -8.2169, -7.1794, -8.1724, -7.1073,
        -7.5626, -7.8916, -8.2902, -7.8786, -7.3759, -7.6832, -7.3945, -7.1491,
        -7.8737, -8.5012, -7.8622, -7.2801], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7866, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5387, -6.9623, -7.7226, -7.3772, -8.1354, -7.7788, -7.1427, -7.6912,
        -7.7292, -7.1592, -7.9976, -7.9078, -7.1027, -8.3395, -6.6435, -8.2261,
        -8.8379, -7.6792, -6.7588, -7.1451], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.5938, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.8682, -8.7283, -8.6656, -8.4541, -7.0998, -8.5054, -7.3761, -6.8156,
        -7.2459, -7.2926, -7.3118, -7.3836, -7.2891, -6.9740, -6.3562, -8.0970,
        -9.2046, -9.2632, -8.6730, -7.0307], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7817, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4275, -7.4261, -7.6830, -6.6971, -7.2189, -7.3293, -7.8885, -7.9181,
        -8.1873, -8.5441, -8.2782, -7.9188, -8.8635, -7.4110, -7.8523, -7.1437,
        -6.7595, -6.7203, -7.2226, -7.8896], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.6190, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.4231, -7.7529, -7.1730, -7.9625, -7.2816, -7.3097, -8.0483, -8.2761,
        -7.9505, -7.9505, -7.8938, -7.1359, -7.4046, -8.1541, -8.4271, -8.1383,
        -7.1659, -7.7242, -7.4827, -6.9931], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7324, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.9507, -7.2179, -8.3323, -8.4684, -7.5309, -7.3745, -7.2771, -8.6293,
        -7.5435, -7.5809, -8.7366, -8.5763, -7.8980, -7.5298, -8.5975, -8.7711,
        -7.5546, -7.4088, -7.0093, -7.1724], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.9080, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2405, -7.8568, -6.9836, -7.5605, -8.2125, -8.2738, -8.2368, -7.0249,
        -7.7580, -6.5853, -7.8868, -8.1265, -8.0121, -7.4701, -8.6291, -6.7210,
        -8.2746, -8.4212, -8.6961, -7.6534], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7812, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5149, -7.8111, -8.1260, -7.0364, -8.0609, -7.9943, -7.9644, -7.1572,
        -7.8758, -6.4847, -8.4763, -8.9629, -7.5608, -7.4283, -7.5988, -6.7509,
        -7.5353, -8.2938, -8.5369, -7.7740], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7472, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0451, -7.3641, -7.6510, -8.1654, -8.3805, -7.7699, -8.7299, -7.1406,
        -8.0287, -7.9453, -8.1259, -7.5280, -7.7235, -6.7008, -8.5065, -8.4137,
        -7.6778, -7.6083, -6.8606, -8.0229], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7694, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.1520, -7.3376, -8.4396, -8.3747, -8.8262, -8.1535, -9.1631, -7.5966,
        -7.1486, -8.1585, -6.5107, -7.9333, -8.1280, -7.8688, -7.2450, -7.8257,
        -8.6645, -7.0532, -7.3939, -9.9814], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.9977, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5644, -8.3629, -8.7751, -7.9709, -7.0194, -8.0675, -7.2757, -8.2234,
        -7.9448, -7.3377, -7.4385, -6.8433, -7.8359, -7.6307, -7.6708, -8.0207,
        -8.9972, -8.7087, -7.3273, -8.5569], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.8786, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7580, -8.2935, -7.8611, -7.1044, -7.5872, -6.9422, -8.2086, -7.8159,
        -7.2191, -7.5477, -8.2676, -7.2443, -7.3072, -8.0039, -8.0016, -8.0881,
        -8.4513, -7.5844, -7.6502, -8.5849], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7761, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0363, -7.5924, -8.1382, -8.1884, -8.3037, -7.9676, -8.5806, -8.2093,
        -8.9289, -7.7831, -7.5950, -6.9182, -7.9409, -6.6636, -8.5294, -8.6623,
        -7.5226, -7.1243, -7.6289, -6.7168], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.8015, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.8485, -8.0139, -9.0030, -7.8443, -7.2311, -7.0007, -7.4392, -7.1114,
        -6.6452, -6.9483, -8.8980, -9.3759, -8.2339, -7.1159, -8.5105, -7.0654,
        -8.2362, -8.6181, -8.1077, -7.4626], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.8355, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9945, -8.0478, -6.7194, -8.2919, -8.9654, -7.6708, -7.4616, -7.6776,
        -7.2812, -7.0920, -7.6976, -8.4126, -8.0505, -8.2561, -7.9990, -7.5515,
        -8.1860, -8.5990, -7.7411, -6.9548], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7825, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.6738, -7.0627, -8.3969, -8.1589, -9.6357, -8.6843, -7.2053, -8.5984,
        -7.8168, -7.7293, -8.3462, -7.7302, -7.8150, -6.8463, -7.9484, -7.3552,
        -7.4955, -7.9542, -7.9824, -7.9564], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.8696, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.9863, -7.6489, -8.4823, -8.9340, -7.4989, -7.4207, -7.6398, -7.3826,
        -7.3796, -8.0709, -8.4388, -8.0411, -7.1483, -7.8362, -6.4868, -8.6227,
        -8.9130, -7.7521, -7.9743, -6.5027], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.8080, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.6599, -7.4149, -7.8831, -7.1318, -7.1073, -7.6802, -7.9194, -7.6649,
        -8.1056, -8.2916, -7.1393, -8.1802, -8.2855, -7.9122, -7.4373, -7.9495,
        -6.7379, -8.3492, -9.2090, -7.8777], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7968, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4536, -7.1473, -7.9924, -8.1262, -8.0231, -6.8011, -7.4486, -6.2726,
        -7.2261, -7.8048, -7.9179, -7.5265, -6.8565, -7.9182, -6.6837, -7.9713,
        -8.1408, -7.5702, -7.0066, -7.3697], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4129, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.1103, -8.1108, -7.3631, -7.0436, -7.3127, -7.7222, -7.2985, -6.8772,
        -6.5788, -8.2245, -7.7951, -9.2182, -8.2647, -6.9898, -8.2771, -7.1382,
        -7.7641, -8.0289, -7.7687, -7.9453], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.6916, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.8055, -8.1506, -8.0338, -6.7662, -7.6958, -6.6156, -7.8517, -8.5268,
        -7.2105, -7.2663, -6.8309, -7.3038, -7.5109, -7.8616, -7.8964, -6.5864,
        -7.3958, -7.7273, -6.7522, -7.1925], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4490, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.0818, -7.1353, -6.7981, -7.2266, -7.5849, -7.1965, -7.1093, -7.3816,
        -7.8037, -7.1577, -6.7577, -8.1411, -9.0152, -9.2271, -8.0890, -7.1282,
        -7.6900, -7.4285, -7.9541, -7.9897], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.5948, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9015, -7.4546, -6.1459, -7.8447, -7.9446, -7.9717, -7.4559, -7.4627,
        -6.1925, -7.8782, -7.9433, -8.3592, -7.7576, -6.7813, -6.5359, -6.7493,
        -6.6112, -7.2735, -8.0567, -7.9587], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3640, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4462, -6.8084, -6.5083, -6.3639, -7.7704, -7.8180, -7.5143, -7.3465,
        -7.2286, -6.7724, -6.7989, -7.2381, -7.7356, -7.8040, -7.2640, -7.6738,
        -7.0160, -7.6289, -8.5982, -7.4331], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.2884, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.1624, -6.6323, -6.6732, -7.3495, -7.8277, -7.8153, -7.8196, -7.6822,
        -7.2433, -8.4514, -8.5164, -7.4645, -7.2315, -6.9158, -7.1260, -6.8814,
        -7.4020, -8.0421, -7.8973, -6.8162], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4475, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.9086, -7.4467, -6.5347, -7.5418, -7.9066, -7.5214, -7.0692, -7.2133,
        -6.7284, -6.9531, -7.3006, -7.5522, -7.8313, -7.9757, -7.2065, -7.9849,
        -8.3384, -7.7589, -7.1238, -7.4860], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4191, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4305, -6.2045, -6.8336, -6.7117, -7.6896, -8.4986, -8.3648, -7.5777,
        -8.7419, -7.5050, -7.6187, -8.0012, -6.9909, -7.3383, -7.5102, -7.1014,
        -7.9949, -7.8604, -8.3277, -6.7169], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.5509, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7825, -9.3646, -9.7605, -8.9339, -7.0196, -8.2705, -7.7280, -8.2972,
        -8.3611, -8.4610, -7.4186, -6.9770, -7.2155, -6.8378, -6.4914, -6.5833,
        -8.4368, -9.3337, -9.5369, -9.0691], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-8.0939, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.6755, -8.0018, -7.8344, -6.7243, -7.5820, -6.5797, -7.2659, -7.7415,
        -7.5172, -7.3099, -7.9143, -6.3658, -7.9317, -7.8526, -7.8086, -7.8009,
        -7.8606, -6.5329, -6.9495, -7.5403], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4395, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3751, -7.6432, -6.6683, -8.1216, -7.9318, -7.4856, -7.3988, -7.1175,
        -6.9561, -6.9924, -7.8030, -8.1356, -7.6701, -7.0561, -7.5900, -6.5517,
        -7.4662, -7.8761, -7.8827, -7.9107], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4816, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.9655, -6.9775, -7.3283, -7.6261, -7.3157, -7.8425, -7.4913, -6.7765,
        -7.2147, -6.7325, -7.6737, -8.2971, -7.6158, -7.2311, -6.9629, -6.5883,
        -6.3810, -6.7957, -7.3267, -8.0292], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3086, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5488, -7.2931, -6.7605, -7.7503, -7.0516, -6.9983, -7.5670, -8.1117,
        -8.0674, -7.4264, -8.6703, -8.4225, -8.3592, -7.6071, -6.9323, -7.8764,
        -8.0341, -7.2111, -7.7800, -7.5182], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.6493, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.8996, -6.8389, -7.5768, -7.4550, -6.6005, -7.4729, -8.2144, -8.1058,
        -7.5660, -8.1229, -8.0249, -8.3175, -7.9347, -6.8356, -7.9671, -7.6289,
        -8.0332, -8.0034, -8.0998, -6.8303], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.6764, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8884, -7.1264, -7.8082, -7.4795, -7.2473, -7.5186, -6.3204, -7.9844,
        -8.4239, -7.5982, -7.4678, -7.0134, -6.4783, -7.5306, -7.5390, -8.2075,
        -7.4289, -6.9179, -7.3273, -6.1921], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3249, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4775, -8.3564, -9.6932, -9.8264, -9.1810, -7.0021, -8.2136, -7.9548,
        -8.7713, -7.2129, -7.3672, -7.3100, -6.7622, -7.6668, -6.9388, -6.4196,
        -7.7680, -8.9368, -9.7240, -8.4083], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.9995, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.2624, -6.9425, -7.6582, -7.7190, -7.8960, -6.9342, -7.7648, -6.6267,
        -7.4164, -7.8729, -7.3843, -6.9016, -7.3245, -6.6527, -7.5943, -7.8694,
        -7.7948, -6.9657, -7.4847, -8.2453], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4155, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.2124, -8.0429, -7.1534, -8.2564, -7.4130, -7.8418, -6.6729, -7.7310,
        -6.4309, -7.1098, -7.6062, -8.1727, -7.8579, -6.5779, -7.7328, -6.1905,
        -7.8313, -8.2984, -7.6790, -7.0096], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4910, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.8689, -6.8752, -6.7296, -7.4420, -8.5889, -8.7090, -8.2133, -7.3297,
        -8.3753, -6.7526, -7.7858, -8.1573, -8.0553, -7.5327, -6.5921, -7.5122,
        -7.6082, -7.6409, -8.2052, -7.4981], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.6736, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.3151, -7.8123, -7.7071, -8.2349, -7.9469, -7.7964, -8.1766, -7.6800,
        -6.8470, -7.3671, -6.7698, -6.8042, -6.4806, -8.1491, -9.4098, -9.7688,
        -8.7122, -7.4059, -8.3187, -7.5111], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.8107, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.4959, -8.2362, -8.5088, -7.7031, -7.4303, -6.8291, -6.9560, -7.0479,
        -7.5888, -8.1985, -8.1337, -7.1019, -7.6482, -6.8523, -6.6386, -7.5832,
        -7.6065, -7.9195, -8.0485, -8.4346], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.5481, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.0274, -8.0268, -7.1430, -7.5962, -6.7751, -7.9584, -8.0622, -7.7408,
        -7.1441, -7.7071, -8.2249, -7.2753, -6.7214, -8.6045, -9.5360, -9.2220,
        -8.4461, -7.2647, -7.9409, -7.1784], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.8298, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.3651, -7.3920, -7.3780, -6.8842, -7.7936, -6.9739, -6.5570, -7.5602,
        -8.1429, -8.3904, -7.6176, -7.9111, -7.8543, -7.9752, -7.7187, -7.9678,
        -6.7875, -7.5725, -6.2198, -7.6223], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.5342, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.2488, -7.6921, -6.8732, -7.4967, -6.7067, -6.9394, -7.6701, -8.0297,
        -8.0039, -6.7121, -7.3144, -6.7021, -7.2854, -7.8537, -8.0758, -7.4185,
        -6.8287, -7.8549, -6.4770, -7.6088], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3896, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.8389, -7.1464, -7.8849, -7.4355, -6.7400, -7.6355, -7.2545, -6.7557,
        -7.4035, -7.7136, -7.7165, -7.7947, -7.7901, -7.5915, -7.2339, -7.8698,
        -7.8134, -6.6002, -7.2265, -7.4973], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4471, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.7156, -7.2732, -7.4869, -6.5984, -7.7614, -8.5475, -7.7228, -7.1720,
        -6.6700, -7.0977, -6.7849, -8.0058, -8.1730, -9.1005, -9.2888, -7.0542,
        -7.9435, -7.2460, -8.3783, -8.3232], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.7172, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.4886, -7.3302, -7.9911, -7.6013, -6.6032, -7.2136, -6.4229, -7.8978,
        -8.2429, -7.3212, -7.2349, -7.4979, -7.1493, -7.1267, -7.6377, -8.0585,
        -7.8232, -6.8923, -7.5783, -7.1223], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4117, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7365, -7.4673, -6.9664, -7.1107, -7.7018, -7.4239, -6.8365, -7.2443,
        -7.0376, -7.3398, -7.6597, -7.9249, -7.2148, -7.2898, -8.0834, -6.8102,
        -7.6727, -7.8762, -7.9432, -6.8846], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3612, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.5972, -7.8009, -7.1371, -7.5373, -8.0377, -7.5379, -6.6351, -7.1387,
        -7.2256, -6.8340, -7.6163, -7.9642, -7.7911, -7.2846, -7.5864, -7.1136,
        -6.8309, -7.6287, -7.7746, -6.7858], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3929, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7121, -6.8431, -7.2038, -7.6025, -7.8772, -7.6319, -7.3872, -7.7481,
        -7.0017, -7.7305, -7.9865, -6.9039, -7.3184, -7.4759, -6.8947, -7.6423,
        -7.7653, -7.7620, -6.7868, -7.5351], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3905, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.1171, -7.4899, -7.1963, -7.5472, -7.4416, -6.8786, -7.6338, -7.8719,
        -7.7865, -7.4961, -7.7177, -7.0919, -7.8697, -8.1791, -7.6382, -7.1106,
        -7.7866, -7.6619, -7.8615, -8.3027], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.6339, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-8.1627, -7.6936, -6.8674, -7.6344, -6.8628, -7.5737, -7.9701, -7.8253,
        -6.8429, -7.3228, -7.2505, -7.4662, -8.0008, -7.7388, -6.9305, -7.9508,
        -7.4306, -7.1017, -7.8416, -7.4920], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4980, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.7851, -7.5247, -6.8729, -6.8373, -7.3552, -8.0718, -7.8444, -7.6633,
        -7.5502, -6.5160, -8.1305, -8.5527, -7.5981, -7.1794, -7.3652, -6.8224,
        -6.9633, -7.1115, -7.9588, -8.3067], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.4505, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-7.6657, -7.0243, -7.4966, -7.4023, -7.0686, -7.7240, -8.0959, -7.9462,
        -6.9638, -7.6479, -7.5399, -6.8768, -7.1438, -7.6323, -8.0478, -8.0610,
        -7.7540, -7.2568, -7.9498, -8.2006], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.5749, device='cuda:0', grad_fn=<MeanBackward0>)
tensor([-6.8326, -7.1430, -7.0825, -6.9148, -7.6626, -8.0032, -7.9253, -6.5430,
        -7.6679, -6.7516, -7.7106, -7.9965, -7.5987, -6.9362, -7.3902, -6.7775,
        -7.8245, -7.9014, -7.2640, -7.1993], device='cuda:0',
       grad_fn=<SumBackward1>) tensor(-7.3563, device='cuda:0', grad_fn=<MeanBackward0>)
