{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "sys.path.append(\"../../\")\n",
    "sys.path.append(\"../../models/episodic_transformer_memory_ppo\")\n",
    "\n",
    "from environments.Minigrid_Memory.env.env_minigrid import Minigrid  \n",
    "from models.episodic_transformer_memory_ppo.model import ActorCriticModel\n",
    "import os \n",
    "\n",
    "import numpy as np\n",
    "import gym\n",
    "import matplotlib.pyplot as plt\n",
    "import random\n",
    "import torch\n",
    "import yaml\n",
    "import time\n",
    "from moviepy.editor import ImageSequenceClip, VideoFileClip\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def init_transformer_memory(trxl_conf, max_episode_steps, device):\n",
    "    \"\"\"Returns initial tensors for the episodic memory of the transformer.\n",
    "\n",
    "    Arguments:\n",
    "        trxl_conf {dict} -- Transformer configuration dictionary\n",
    "        max_episode_steps {int} -- Maximum number of steps per episode\n",
    "        device {torch.device} -- Target device for the tensors\n",
    "\n",
    "    Returns:\n",
    "        memory {torch.Tensor}, memory_mask {torch.Tensor}, memory_indices {torch.Tensor} -- Initial episodic memory, episodic memory mask, and sliding memory window indices\n",
    "    \"\"\"\n",
    "    # Episodic memory mask used in attention\n",
    "    memory_mask = torch.tril(torch.ones((trxl_conf[\"memory_length\"], trxl_conf[\"memory_length\"])), diagonal=-1)\n",
    "    # Episdic memory tensor\n",
    "    memory = torch.zeros((1, max_episode_steps, trxl_conf[\"num_blocks\"], trxl_conf[\"embed_dim\"])).to(device)\n",
    "    # Setup sliding memory window indices\n",
    "    repetitions = torch.repeat_interleave(torch.arange(0, trxl_conf[\"memory_length\"]).unsqueeze(0), trxl_conf[\"memory_length\"] - 1, dim = 0).long()\n",
    "    memory_indices = torch.stack([torch.arange(i, i + trxl_conf[\"memory_length\"]) for i in range(max_episode_steps - trxl_conf[\"memory_length\"] + 1)]).long()\n",
    "    memory_indices = torch.cat((repetitions, memory_indices))\n",
    "    return memory, memory_mask, memory_indices"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "env = Minigrid('MiniGrid-MemoryS13-v0', 31)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "config_path = '/opt/Memory-RL-Codebase/configs/GTRXL_configs/MinigridMemory/Static/MinigridMemory_SHORT_TERM.yaml'\n",
    "#config_path = '/opt/Memory-RL-Codebase/configs/GTRXL_configs/MinigridMemory/Static/MinigridMemory_LONG_TERM.yaml'\n",
    "\n",
    "\n",
    "\n",
    "device = torch.device('cuda:1')\n",
    "\n",
    "\n",
    "with open(config_path, 'r') as file:\n",
    "    config = yaml.safe_load(file)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "agent = ActorCriticModel(config, env.observation_space, (env.action_space.n,), env.max_episode_steps).to(device)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "checkpoint_path = '/opt/Memory-RL-Codebase/autorun/checkpoints copy/MinigridMemory/MinigridMemory/GTXL/GTXL_MinigridMemory_SHORT_TERM_static/2024_09_29-00_10_39.pt' #'/opt/Memory-RL-Codebase/autorun/checkpoints/MinigridMemory/MinigridMemory/GTXL/GTXL_MinigridMemory_SHORT_TERM/2024_09_28-21_03_04.pt'\n",
    "#checkpoint_path = '/opt/Memory-RL-Codebase/autorun/checkpoints/MinigridMemory/MinigridMemory/GTXL/GTXL_MinigridMemory_LONG_TERM_static/best_model/2024_09_29-00_10_37.pt'\n",
    "\n",
    "checkpoint = torch.load(checkpoint_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "ActorCriticModel(\n",
       "  (conv1): Conv2d(3, 32, kernel_size=(8, 8), stride=(4, 4))\n",
       "  (conv2): Conv2d(32, 64, kernel_size=(4, 4), stride=(2, 2))\n",
       "  (conv3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))\n",
       "  (lin_hidden): Linear(in_features=3136, out_features=128, bias=True)\n",
       "  (transformer): Transformer(\n",
       "    (activation): ReLU()\n",
       "    (linear_embedding): Linear(in_features=128, out_features=128, bias=True)\n",
       "    (pos_embedding): SinusoidalPosition()\n",
       "    (transformer_blocks): ModuleList(\n",
       "      (0-5): 6 x TransformerBlock(\n",
       "        (attention): MultiHeadAttention(\n",
       "          (values): Linear(in_features=128, out_features=128, bias=False)\n",
       "          (keys): Linear(in_features=128, out_features=128, bias=False)\n",
       "          (queries): Linear(in_features=128, out_features=128, bias=False)\n",
       "          (fc_out): Linear(in_features=128, out_features=128, bias=True)\n",
       "        )\n",
       "        (gate1): GRUGate(\n",
       "          (Wr): Linear(in_features=128, out_features=128, bias=False)\n",
       "          (Ur): Linear(in_features=128, out_features=128, bias=False)\n",
       "          (Wz): Linear(in_features=128, out_features=128, bias=False)\n",
       "          (Uz): Linear(in_features=128, out_features=128, bias=False)\n",
       "          (Wg): Linear(in_features=128, out_features=128, bias=False)\n",
       "          (Ug): Linear(in_features=128, out_features=128, bias=False)\n",
       "          (sigmoid): Sigmoid()\n",
       "          (tanh): Tanh()\n",
       "        )\n",
       "        (gate2): GRUGate(\n",
       "          (Wr): Linear(in_features=128, out_features=128, bias=False)\n",
       "          (Ur): Linear(in_features=128, out_features=128, bias=False)\n",
       "          (Wz): Linear(in_features=128, out_features=128, bias=False)\n",
       "          (Uz): Linear(in_features=128, out_features=128, bias=False)\n",
       "          (Wg): Linear(in_features=128, out_features=128, bias=False)\n",
       "          (Ug): Linear(in_features=128, out_features=128, bias=False)\n",
       "          (sigmoid): Sigmoid()\n",
       "          (tanh): Tanh()\n",
       "        )\n",
       "        (norm1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)\n",
       "        (norm2): LayerNorm((128,), eps=1e-05, elementwise_affine=True)\n",
       "        (norm_kv): LayerNorm((128,), eps=1e-05, elementwise_affine=True)\n",
       "        (fc): Sequential(\n",
       "          (0): Linear(in_features=128, out_features=128, bias=True)\n",
       "          (1): ReLU()\n",
       "        )\n",
       "      )\n",
       "    )\n",
       "  )\n",
       "  (lin_policy): Linear(in_features=128, out_features=128, bias=True)\n",
       "  (lin_value): Linear(in_features=128, out_features=128, bias=True)\n",
       "  (policy_branches): ModuleList(\n",
       "    (0): Linear(in_features=128, out_features=3, bias=True)\n",
       "  )\n",
       "  (value): Linear(in_features=128, out_features=1, bias=True)\n",
       ")"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "agent.load_state_dict(checkpoint[\"model_state_dict\"])\n",
    "agent.eval()\n",
    "agent.to(device)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "agent = agent.to(device)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "torch.set_default_device(device)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "from itertools import permutations\n",
    "\n",
    "def generate_permutations(nums):\n",
    "\n",
    "    perms = permutations(nums)\n",
    "    result = [int(''.join(map(str, perm))) for perm in perms]\n",
    "    \n",
    "    return result\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'GTXL_MinigridMemory_SHORT_TERM_static'"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "checkpoint_path.split('/')[-2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Episode: 0, seed: 12345 Reward: 0, Steps: 56 Mean reward: 0.0, Mean steps: 0.4666666666666667\n",
      "Episode: 1, seed: 12354 Reward: 0.9435502958579881, Steps: 53 Mean reward: 0.007862919132149902, Mean steps: 0.9083333333333333\n",
      "Episode: 2, seed: 12435 Reward: 0.9435502958579881, Steps: 53 Mean reward: 0.015725838264299803, Mean steps: 1.35\n",
      "Episode: 3, seed: 12453 Reward: 0.9446153846153846, Steps: 52 Mean reward: 0.023597633136094674, Mean steps: 1.7833333333333334\n",
      "Episode: 4, seed: 12534 Reward: 0.9435502958579881, Steps: 53 Mean reward: 0.031460552268244575, Mean steps: 2.225\n",
      "Episode: 5, seed: 12543 Reward: 0.9403550295857989, Steps: 56 Mean reward: 0.03929684418145956, Mean steps: 2.691666666666667\n",
      "Episode: 6, seed: 13245 Reward: 0.9414201183431953, Steps: 55 Mean reward: 0.04714201183431953, Mean steps: 3.15\n",
      "Episode: 7, seed: 13254 Reward: 0.947810650887574, Steps: 49 Mean reward: 0.05504043392504931, Mean steps: 3.558333333333333\n",
      "Episode: 8, seed: 13425 Reward: 0.9446153846153846, Steps: 52 Mean reward: 0.06291222879684417, Mean steps: 3.9916666666666667\n",
      "Episode: 9, seed: 13452 Reward: 0.9797633136094674, Steps: 19 Mean reward: 0.07107692307692307, Mean steps: 4.15\n",
      "Episode: 10, seed: 13524 Reward: 0.9435502958579881, Steps: 53 Mean reward: 0.07893984220907298, Mean steps: 4.591666666666667\n",
      "Episode: 11, seed: 13542 Reward: 0.9392899408284023, Steps: 57 Mean reward: 0.086767258382643, Mean steps: 5.066666666666666\n",
      "Episode: 12, seed: 14235 Reward: 0.9446153846153846, Steps: 52 Mean reward: 0.09463905325443787, Mean steps: 5.5\n",
      "Episode: 13, seed: 14253 Reward: 0.9392899408284023, Steps: 57 Mean reward: 0.1024664694280079, Mean steps: 5.975\n",
      "Episode: 14, seed: 14325 Reward: 0.9446153846153846, Steps: 52 Mean reward: 0.11033826429980277, Mean steps: 6.408333333333333\n",
      "Episode: 15, seed: 14352 Reward: 0.945680473372781, Steps: 51 Mean reward: 0.1182189349112426, Mean steps: 6.833333333333333\n",
      "Episode: 16, seed: 14523 Reward: 0.9403550295857989, Steps: 56 Mean reward: 0.1260552268244576, Mean steps: 7.3\n",
      "Episode: 17, seed: 14532 Reward: 0.9424852071005917, Steps: 54 Mean reward: 0.13390927021696253, Mean steps: 7.75\n",
      "Episode: 18, seed: 15234 Reward: 0.9424852071005917, Steps: 54 Mean reward: 0.14176331360946745, Mean steps: 8.2\n",
      "Episode: 19, seed: 15243 Reward: 0.9382248520710059, Steps: 58 Mean reward: 0.1495818540433925, Mean steps: 8.683333333333334\n",
      "Episode: 20, seed: 15324 Reward: 0.9435502958579881, Steps: 53 Mean reward: 0.15744477317554242, Mean steps: 9.125\n",
      "Episode: 21, seed: 15342 Reward: 0.9414201183431953, Steps: 55 Mean reward: 0.16528994082840237, Mean steps: 9.583333333333334\n",
      "Episode: 22, seed: 15423 Reward: 0.945680473372781, Steps: 51 Mean reward: 0.17317061143984222, Mean steps: 10.008333333333333\n",
      "Episode: 23, seed: 15432 Reward: 0.9371597633136095, Steps: 59 Mean reward: 0.1809802761341223, Mean steps: 10.5\n",
      "Episode: 24, seed: 21345 Reward: 0.947810650887574, Steps: 49 Mean reward: 0.1888786982248521, Mean steps: 10.908333333333333\n",
      "Episode: 25, seed: 21354 Reward: 0.9424852071005917, Steps: 54 Mean reward: 0.19673274161735704, Mean steps: 11.358333333333333\n",
      "Episode: 26, seed: 21435 Reward: 0.9414201183431953, Steps: 55 Mean reward: 0.204577909270217, Mean steps: 11.816666666666666\n",
      "Episode: 27, seed: 21453 Reward: 0.9435502958579881, Steps: 53 Mean reward: 0.21244082840236692, Mean steps: 12.258333333333333\n",
      "Episode: 28, seed: 21534 Reward: 0.9403550295857989, Steps: 56 Mean reward: 0.2202771203155819, Mean steps: 12.725\n",
      "Episode: 29, seed: 21543 Reward: 0.9414201183431953, Steps: 55 Mean reward: 0.22812228796844186, Mean steps: 13.183333333333334\n",
      "Episode: 30, seed: 23145 Reward: 0.9424852071005917, Steps: 54 Mean reward: 0.23597633136094678, Mean steps: 13.633333333333333\n",
      "Episode: 31, seed: 23154 Reward: 0.9424852071005917, Steps: 54 Mean reward: 0.24383037475345173, Mean steps: 14.083333333333334\n",
      "Episode: 32, seed: 23415 Reward: 0.9424852071005917, Steps: 54 Mean reward: 0.25168441814595666, Mean steps: 14.533333333333333\n",
      "Episode: 33, seed: 23451 Reward: 0.9446153846153846, Steps: 52 Mean reward: 0.25955621301775156, Mean steps: 14.966666666666667\n",
      "Episode: 34, seed: 23514 Reward: 0.9424852071005917, Steps: 54 Mean reward: 0.26741025641025645, Mean steps: 15.416666666666666\n",
      "Episode: 35, seed: 23541 Reward: 0.9446153846153846, Steps: 52 Mean reward: 0.2752820512820513, Mean steps: 15.85\n",
      "Episode: 36, seed: 24135 Reward: 0.9328994082840236, Steps: 63 Mean reward: 0.28305621301775147, Mean steps: 16.375\n",
      "Episode: 37, seed: 24153 Reward: 0.9424852071005917, Steps: 54 Mean reward: 0.2909102564102564, Mean steps: 16.825\n",
      "Episode: 38, seed: 24315 Reward: 0.9446153846153846, Steps: 52 Mean reward: 0.29878205128205126, Mean steps: 17.258333333333333\n",
      "Episode: 39, seed: 24351 Reward: 0.947810650887574, Steps: 49 Mean reward: 0.30668047337278104, Mean steps: 17.666666666666668\n",
      "Episode: 40, seed: 24513 Reward: 0.9435502958579881, Steps: 53 Mean reward: 0.31454339250493096, Mean steps: 18.108333333333334\n",
      "Episode: 41, seed: 24531 Reward: 0.9446153846153846, Steps: 52 Mean reward: 0.3224151873767258, Mean steps: 18.541666666666668\n",
      "Episode: 42, seed: 25134 Reward: 0.9424852071005917, Steps: 54 Mean reward: 0.3302692307692307, Mean steps: 18.991666666666667\n",
      "Episode: 43, seed: 25143 Reward: 0.9446153846153846, Steps: 52 Mean reward: 0.33814102564102555, Mean steps: 19.425\n",
      "Episode: 44, seed: 25314 Reward: 0.945680473372781, Steps: 51 Mean reward: 0.3460216962524654, Mean steps: 19.85\n",
      "Episode: 45, seed: 25341 Reward: 0.9392899408284023, Steps: 57 Mean reward: 0.3538491124260354, Mean steps: 20.325\n",
      "Episode: 46, seed: 25413 Reward: 0.9424852071005917, Steps: 54 Mean reward: 0.36170315581854035, Mean steps: 20.775\n",
      "Episode: 47, seed: 25431 Reward: 0.9435502958579881, Steps: 53 Mean reward: 0.3695660749506902, Mean steps: 21.216666666666665\n",
      "Episode: 48, seed: 31245 Reward: 0.9435502958579881, Steps: 53 Mean reward: 0.37742899408284014, Mean steps: 21.658333333333335\n",
      "Episode: 49, seed: 31254 Reward: 0.945680473372781, Steps: 51 Mean reward: 0.38530966469428, Mean steps: 22.083333333333332\n",
      "Episode: 50, seed: 31425 Reward: 0.9403550295857989, Steps: 56 Mean reward: 0.39314595660749496, Mean steps: 22.55\n",
      "Episode: 51, seed: 31452 Reward: 0.9424852071005917, Steps: 54 Mean reward: 0.40099999999999986, Mean steps: 23.0\n",
      "Episode: 52, seed: 31524 Reward: 0.9435502958579881, Steps: 53 Mean reward: 0.4088629191321498, Mean steps: 23.441666666666666\n",
      "Episode: 53, seed: 31542 Reward: 0.9424852071005917, Steps: 54 Mean reward: 0.4167169625246547, Mean steps: 23.891666666666666\n",
      "Episode: 54, seed: 32145 Reward: 0.9403550295857989, Steps: 56 Mean reward: 0.4245532544378696, Mean steps: 24.358333333333334\n",
      "Episode: 55, seed: 32154 Reward: 0.9467455621301775, Steps: 50 Mean reward: 0.4324428007889544, Mean steps: 24.775\n",
      "Episode: 56, seed: 32415 Reward: 0.9414201183431953, Steps: 55 Mean reward: 0.4402879684418144, Mean steps: 25.233333333333334\n",
      "Episode: 57, seed: 32451 Reward: 0.9776331360946745, Steps: 21 Mean reward: 0.4484349112426033, Mean steps: 25.408333333333335\n",
      "Episode: 58, seed: 32514 Reward: 0.9371597633136095, Steps: 59 Mean reward: 0.45624457593688345, Mean steps: 25.9\n",
      "Episode: 59, seed: 32541 Reward: 0.9446153846153846, Steps: 52 Mean reward: 0.4641163708086783, Mean steps: 26.333333333333332\n",
      "Episode: 60, seed: 34125 Reward: 0.9414201183431953, Steps: 55 Mean reward: 0.4719615384615382, Mean steps: 26.791666666666668\n",
      "Episode: 61, seed: 34152 Reward: 0.9435502958579881, Steps: 53 Mean reward: 0.47982445759368814, Mean steps: 27.233333333333334\n",
      "Episode: 62, seed: 34215 Reward: 0.9467455621301775, Steps: 50 Mean reward: 0.48771400394477293, Mean steps: 27.65\n",
      "Episode: 63, seed: 34251 Reward: 0.9382248520710059, Steps: 58 Mean reward: 0.49553254437869804, Mean steps: 28.133333333333333\n",
      "Episode: 64, seed: 34512 Reward: 0.9382248520710059, Steps: 58 Mean reward: 0.5033510848126231, Mean steps: 28.616666666666667\n",
      "Episode: 65, seed: 34521 Reward: 0.9424852071005917, Steps: 54 Mean reward: 0.511205128205128, Mean steps: 29.066666666666666\n",
      "Episode: 66, seed: 35124 Reward: 0.9414201183431953, Steps: 55 Mean reward: 0.519050295857988, Mean steps: 29.525\n",
      "Episode: 67, seed: 35142 Reward: 0.9435502958579881, Steps: 53 Mean reward: 0.5269132149901379, Mean steps: 29.966666666666665\n",
      "Episode: 68, seed: 35214 Reward: 0.9424852071005917, Steps: 54 Mean reward: 0.5347672583826427, Mean steps: 30.416666666666668\n",
      "Episode: 69, seed: 35241 Reward: 0.9424852071005917, Steps: 54 Mean reward: 0.5426213017751477, Mean steps: 30.866666666666667\n",
      "Episode: 70, seed: 35412 Reward: 0.9424852071005917, Steps: 54 Mean reward: 0.5504753451676526, Mean steps: 31.316666666666666\n",
      "Episode: 71, seed: 35421 Reward: 0.945680473372781, Steps: 51 Mean reward: 0.5583560157790923, Mean steps: 31.741666666666667\n",
      "Episode: 72, seed: 41235 Reward: 0.9382248520710059, Steps: 58 Mean reward: 0.5661745562130174, Mean steps: 32.225\n",
      "Episode: 73, seed: 41253 Reward: 0.9424852071005917, Steps: 54 Mean reward: 0.5740285996055223, Mean steps: 32.675\n",
      "Episode: 74, seed: 41325 Reward: 0.9435502958579881, Steps: 53 Mean reward: 0.5818915187376722, Mean steps: 33.11666666666667\n",
      "Episode: 75, seed: 41352 Reward: 0.9435502958579881, Steps: 53 Mean reward: 0.5897544378698221, Mean steps: 33.55833333333333\n",
      "Episode: 76, seed: 41523 Reward: 0, Steps: 19 Mean reward: 0.5897544378698221, Mean steps: 33.71666666666667\n",
      "Episode: 77, seed: 41532 Reward: 0.9424852071005917, Steps: 54 Mean reward: 0.597608481262327, Mean steps: 34.166666666666664\n",
      "Episode: 78, seed: 42135 Reward: 0.945680473372781, Steps: 51 Mean reward: 0.6054891518737668, Mean steps: 34.59166666666667\n",
      "Episode: 79, seed: 42153 Reward: 0.9435502958579881, Steps: 53 Mean reward: 0.6133520710059167, Mean steps: 35.03333333333333\n",
      "Episode: 80, seed: 42315 Reward: 0.9414201183431953, Steps: 55 Mean reward: 0.6211972386587766, Mean steps: 35.49166666666667\n",
      "Episode: 81, seed: 42351 Reward: 0.9446153846153846, Steps: 52 Mean reward: 0.6290690335305715, Mean steps: 35.925\n",
      "Episode: 82, seed: 42513 Reward: 0.9424852071005917, Steps: 54 Mean reward: 0.6369230769230765, Mean steps: 36.375\n",
      "Episode: 83, seed: 42531 Reward: 0.9435502958579881, Steps: 53 Mean reward: 0.6447859960552264, Mean steps: 36.81666666666667\n",
      "Episode: 84, seed: 43125 Reward: 0.945680473372781, Steps: 51 Mean reward: 0.6526666666666662, Mean steps: 37.24166666666667\n",
      "Episode: 85, seed: 43152 Reward: 0.945680473372781, Steps: 51 Mean reward: 0.6605473372781059, Mean steps: 37.666666666666664\n",
      "Episode: 86, seed: 43215 Reward: 0.9446153846153846, Steps: 52 Mean reward: 0.6684191321499008, Mean steps: 38.1\n",
      "Episode: 87, seed: 43251 Reward: 0.9403550295857989, Steps: 56 Mean reward: 0.6762554240631159, Mean steps: 38.56666666666667\n",
      "Episode: 88, seed: 43512 Reward: 0.9382248520710059, Steps: 58 Mean reward: 0.6840739644970408, Mean steps: 39.05\n",
      "Episode: 89, seed: 43521 Reward: 0.9435502958579881, Steps: 53 Mean reward: 0.6919368836291908, Mean steps: 39.49166666666667\n",
      "Episode: 90, seed: 45123 Reward: 0.9414201183431953, Steps: 55 Mean reward: 0.6997820512820507, Mean steps: 39.95\n",
      "Episode: 91, seed: 45132 Reward: 0.9446153846153846, Steps: 52 Mean reward: 0.7076538461538456, Mean steps: 40.38333333333333\n",
      "Episode: 92, seed: 45213 Reward: 0.9446153846153846, Steps: 52 Mean reward: 0.7155256410256405, Mean steps: 40.81666666666667\n",
      "Episode: 93, seed: 45231 Reward: 0.9414201183431953, Steps: 55 Mean reward: 0.7233708086785005, Mean steps: 41.275\n",
      "Episode: 94, seed: 45312 Reward: 0.9414201183431953, Steps: 55 Mean reward: 0.7312159763313603, Mean steps: 41.733333333333334\n",
      "Episode: 95, seed: 45321 Reward: 0.9414201183431953, Steps: 55 Mean reward: 0.7390611439842202, Mean steps: 42.19166666666667\n",
      "Episode: 96, seed: 51234 Reward: 0.9435502958579881, Steps: 53 Mean reward: 0.7469240631163702, Mean steps: 42.63333333333333\n",
      "Episode: 97, seed: 51243 Reward: 0.9446153846153846, Steps: 52 Mean reward: 0.7547958579881651, Mean steps: 43.06666666666667\n",
      "Episode: 98, seed: 51324 Reward: 0.9467455621301775, Steps: 50 Mean reward: 0.7626854043392498, Mean steps: 43.483333333333334\n",
      "Episode: 99, seed: 51342 Reward: 0.9424852071005917, Steps: 54 Mean reward: 0.7705394477317548, Mean steps: 43.93333333333333\n",
      "Episode: 100, seed: 51423 Reward: 0.9403550295857989, Steps: 56 Mean reward: 0.7783757396449698, Mean steps: 44.4\n",
      "Episode: 101, seed: 51432 Reward: 0.9446153846153846, Steps: 52 Mean reward: 0.7862475345167647, Mean steps: 44.833333333333336\n",
      "Episode: 102, seed: 52134 Reward: 0.9435502958579881, Steps: 53 Mean reward: 0.7941104536489145, Mean steps: 45.275\n",
      "Episode: 103, seed: 52143 Reward: 0.9446153846153846, Steps: 52 Mean reward: 0.8019822485207094, Mean steps: 45.708333333333336\n",
      "Episode: 104, seed: 52314 Reward: 0.9424852071005917, Steps: 54 Mean reward: 0.8098362919132144, Mean steps: 46.15833333333333\n",
      "Episode: 105, seed: 52341 Reward: 0.9414201183431953, Steps: 55 Mean reward: 0.8176814595660743, Mean steps: 46.61666666666667\n",
      "Episode: 106, seed: 52413 Reward: 0.9446153846153846, Steps: 52 Mean reward: 0.8255532544378692, Mean steps: 47.05\n",
      "Episode: 107, seed: 52431 Reward: 0.9435502958579881, Steps: 53 Mean reward: 0.8334161735700191, Mean steps: 47.49166666666667\n",
      "Episode: 108, seed: 53124 Reward: 0.9467455621301775, Steps: 50 Mean reward: 0.8413057199211039, Mean steps: 47.90833333333333\n",
      "Episode: 109, seed: 53142 Reward: 0.9424852071005917, Steps: 54 Mean reward: 0.8491597633136088, Mean steps: 48.358333333333334\n",
      "Episode: 110, seed: 53214 Reward: 0, Steps: 39 Mean reward: 0.8491597633136088, Mean steps: 48.68333333333333\n",
      "Episode: 111, seed: 53241 Reward: 0.9392899408284023, Steps: 57 Mean reward: 0.8569871794871788, Mean steps: 49.15833333333333\n",
      "Episode: 112, seed: 53412 Reward: 0.9435502958579881, Steps: 53 Mean reward: 0.8648500986193287, Mean steps: 49.6\n",
      "Episode: 113, seed: 53421 Reward: 0.9392899408284023, Steps: 57 Mean reward: 0.8726775147928988, Mean steps: 50.075\n",
      "Episode: 114, seed: 54123 Reward: 0.9414201183431953, Steps: 55 Mean reward: 0.8805226824457587, Mean steps: 50.53333333333333\n",
      "Episode: 115, seed: 54132 Reward: 0.9371597633136095, Steps: 59 Mean reward: 0.8883323471400388, Mean steps: 51.025\n",
      "Episode: 116, seed: 54213 Reward: 0.9446153846153846, Steps: 52 Mean reward: 0.8962041420118337, Mean steps: 51.458333333333336\n",
      "Episode: 117, seed: 54231 Reward: 0.9424852071005917, Steps: 54 Mean reward: 0.9040581854043386, Mean steps: 51.90833333333333\n",
      "Episode: 118, seed: 54312 Reward: 0.9435502958579881, Steps: 53 Mean reward: 0.9119211045364886, Mean steps: 52.35\n",
      "Episode: 119, seed: 54321 Reward: 0.9446153846153846, Steps: 52 Mean reward: 0.9197928994082835, Mean steps: 52.78333333333333\n",
      "Total num episodes: 120 Success rate: 0.0, Mean reward: 0.9197928994082835, Mean steps: 52.78333333333333\n"
     ]
    }
   ],
   "source": [
    "\n",
    "### evaluate !\n",
    "\n",
    "videos_dir = '/opt/Memory-RL-Codebase/eval/Minigrid_Memory/GTRXL'\n",
    "\n",
    "nums = [1, 2, 3, 4, 5]\n",
    "eval_seeds = generate_permutations(nums)\n",
    "\n",
    "videos_limit = len(eval_seeds) + 1\n",
    "n_episode = len(eval_seeds)\n",
    "\n",
    "\n",
    "render = False\n",
    "\n",
    "total_reward = 0\n",
    "num_successes = 0\n",
    "total_steps = 0\n",
    "\n",
    "\n",
    "for i in range(n_episode):\n",
    "\n",
    "    if render:\n",
    "        frames = []\n",
    "\n",
    "    done = False\n",
    "    memory, memory_mask, memory_indices = init_transformer_memory(config[\"transformer\"], env.max_episode_steps, device)\n",
    "\n",
    "    memory = memory.to(device)\n",
    "    memory_mask = memory_mask.to(device)\n",
    "    memory_indices = memory_indices.to(device)\n",
    "\n",
    "\n",
    "    memory_length = config[\"transformer\"][\"memory_length\"]\n",
    "    # eval_seeds = config.get(\"eval_seeds\", None)\n",
    "    t = 0\n",
    "    ep_reward = 0\n",
    "\n",
    "    if eval_seeds is not None:\n",
    "        obs = env.reset(eval_seeds[i])    \n",
    "    else:\n",
    "        obs = env.reset()\n",
    "\n",
    "    if render and i < videos_limit:\n",
    "        rofl = env.render()\n",
    "        time.sleep(0.5)\n",
    "        frames.append(rofl)\n",
    "\n",
    "\n",
    "\n",
    "    while not done:\n",
    "        # Prepare observation and memory\n",
    "        obs = torch.tensor(np.expand_dims(obs, 0), dtype=torch.float32, device=device)\n",
    "        in_memory = memory[0, memory_indices[t].unsqueeze(0)]\n",
    "        t_ = max(0, min(t, memory_length - 1))\n",
    "        mask = memory_mask[t_].unsqueeze(0)\n",
    "        indices = memory_indices[t].unsqueeze(0)\n",
    "        # Forward model\n",
    "        policy, value, new_memory = agent(obs.to(device), in_memory.to(device), mask.to(device), indices.to(device))\n",
    "        memory[:, t] = new_memory\n",
    "        # Sample action\n",
    "        action = []\n",
    "        for action_branch in policy:\n",
    "            action.append(action_branch.sample().item())\n",
    "        # Step environemnt\n",
    "        # print(f'action: {action}')\n",
    "        obs, reward, done, info = env.step(action)\n",
    "        # print(f'Action :{action}, obs: {obs.shape}, reward: {reward}, terminated: {done}, info: {info}')\n",
    "        if render and i < videos_limit:\n",
    "            rofl = env.render()\n",
    "            if done:\n",
    "                print(f\"Episode terminated. Episode reward: {ep_reward}\")\n",
    "            time.sleep(0.5)\n",
    "            frames.append(rofl)\n",
    "\n",
    "\n",
    "\n",
    "        ep_reward += reward\n",
    "        t += 1\n",
    "\n",
    "\n",
    "    if info.get(\"is_success\"):\n",
    "        num_successes += 1\n",
    "    total_reward += ep_reward\n",
    "    total_steps += t\n",
    "\n",
    "    if render and i < videos_limit:\n",
    "        desired_resolution = (945, 540)\n",
    "        original_aspect_ratio = 112 / 64\n",
    "        width = int(desired_resolution[0] * original_aspect_ratio)\n",
    "        height = desired_resolution[1]\n",
    "\n",
    "        observations = [np.squeeze(o) for o in frames]\n",
    "\n",
    "        clip = ImageSequenceClip(observations, fps=2)\n",
    "        clip = clip.resize(width=width, height=height)\n",
    "\n",
    "\n",
    "        run_name = checkpoint_path.split('/')[-1].strip('.pt')\n",
    "        run_type = checkpoint_path.split('/')[-2]\n",
    "        curr_seed = eval_seeds[i]\n",
    "        curr_reward = float(info['reward'])\n",
    "\n",
    "        if not os.path.exists(videos_dir + f\"/{run_type}/{run_name}\"):\n",
    "            os.makedirs(videos_dir + f\"/{run_type}/{run_name}\")\n",
    "\n",
    "        clip.write_videofile(videos_dir + f\"/{run_type}/{run_name}/{run_name}_seed={curr_seed}_reward={curr_reward:0.2}.mp4\", fps=2)\n",
    "\n",
    "    curr_seed = eval_seeds[i]\n",
    "    print(f'Episode: {i}, seed: {curr_seed} Reward: {ep_reward}, Steps: {t} Mean reward: {total_reward / n_episode}, Mean steps: {total_steps / n_episode}')\n",
    "\n",
    "\n",
    "print(f'Total num episodes: {n_episode} Success rate: {num_successes / n_episode}, Mean reward: {total_reward / n_episode}, Mean steps: {total_steps / n_episode}')\n",
    "\n",
    "    #self.eval_env.reset()\n",
    "\n",
    "\n",
    "        # return (\n",
    "        #     num_successes / n_episode,\n",
    "        #     total_reward / n_episode,\n",
    "        #     total_steps / n_episode,\n",
    "        # )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAGFCAYAAAASI+9IAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuNSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/xnp5ZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAGgklEQVR4nO3dvXFTWxSAUfPGHeAecA84VooacSEvdgpFkCp+LoIiqEEv+4a5cxlrQDrnylork/yjbSXf7Nlgfzgej8c7ALi7u/tn9gAAbIcoABBRACCiAEBEAYCIAgARBQAiCgDk/tRPfHl5ueQcAFzY8/Pzm59jUwAgogBARAGAnHxT4LYdDofZI3Ahu91u9ghsiE0BgIgCABEFACIKAEQUAIgoABBRACCiAEBEAYCIAgARBQAiCgBEFACIKAAQUQAgogBARAGAiAIAEQUAIgoARBQAiCgAEFEAIKIAQEQBgIgCABEFACIKAEQUAIgoABBRACCiAEBEAYCIAgARBQAiCgBEFACIKAAQUQAgogBARAGAiAIAEQUAIgoARBQAiCgAEFEAIKIAQEQBgIgCABEFACIKAEQUAIgoABBRACCiAEBEAYCIAgARBQAiCgBEFACIKAAQUQAgogBARAGAiAIAEQUAIgoARBQAiCgAEFEAIKIAQEQBgIgCABEFACIKAEQUAIgoABBRACCiAEBEAYCIAgARBQAiCgBEFACIKAAQUQAgogBARAGAiAIAEQUAIgoARBQAiCgAEFEAIKIAQEQBgIgCABEFACIKAEQUAIgoABBRACCiAEBEAYCIAgARBQAiCgBEFACIKAAQUQAgogBARAGAiAIAEQUAIgoARBQAiCgAkPvZA3Addrvd7BGAAWwKAEQUAIgoABBRACAOzZzkcDjMHoEL8Y8I+JVNAYCIAgARBQAiCgBEFACIKAAQUQAgogBARAGAiAIAEQUAIgoARBQAiCgAEFEAIKIAQEQBgIgCABEFACIKAEQUAIgoABBRACCiAEBEAYCIAgARBQAiCgBEFACIKAAQUQAgogBARAGAiAIAEQUAIgoARBQAiCgAEFEAIKIAQEQBgIgCABEFACIKAEQUAIgoABBRACCiAEBEAYCIAgARBQAiCgBEFACIKAAQUQAgogBARAGAiAIAEQUAIgoARBQAiCgAEFEAIKIAQEQBgIgCABEFACIKAEQUAIgoABBRACCiAEBEAYCIAgARBQAiCgBEFACIKAAQUQAgogBARAGAiAIAEQUAIgoARBQAiCgAEFEAIKIAQEQBgIgCABEFACIKAEQUAIgoABBRACCiAEBEAYCIAgARBQAiCgBEFACIKAAQUQAgogBARAGAiAIAEQUAIgoARBQAiCgAEFEAIKIAQEQBgIgCABEFACIKAEQUAIgoAJD72QNwHXa73ewRgAFsCgBEFACIKAAQNwVOcjgcZo/AhbgX8SubAgARBQAiCgBEFACIKAAQUQAgogBARAGAiAIAEQUAIgoARBQAiF+Ix836/MbHX4dMAdtiUwAgogBARAGAiAIAcWjmZu3f+LhDM7fIpgBARAGAiAIAcVPgJnxaee7xD77mxxlmgS2zKQAQUQAgogBA3BS4CU9n+ho3Bd47mwIAEQUAIgoARBQAiEMzN2F/pq/59ndjwObZFACIKAAQUQAgbgq8S18Gfd/vF3odmMWmAEBEAYCIAgARBQDi0My7tB/0fR2aeW9sCgBEFACIKAAQNwWu3qeV5x4u9FrL77v22v46G9fMpgBARAGAiAIAEQUA4tDM1dtv7LX/HT0EnJFNAYCIAgARBQDipsDV+bh4/DRlit+/9tfF458jBoEzsSkAEFEAIKIAQNwUuDozbwinWM7nD/FwTWwKAEQUAIgoABBRACAOzVyd/ewB3rBfPHZo5prYFACIKAAQUQAgbgps3ufF44cpU5xuOd9y/tdRg8AfsCkAEFEAIKIAQEQBgDg0s3n72QP8pf3isUMzW2ZTACCiAEBEAYC4KbApH1eeexw+xXkt51/7GX+OGAROYFMAIKIAQEQBgLgpsCn72QMMsF957tvoIeA3bAoARBQAiCgAEFEAIA7NbMrT7AEGWPsZHZrZCpsCABEFACIKAMRNgam+LB4/TJlirLWfcfk+fB8xCKywKQAQUQAgogBARAGAODQz1S38Z7VTLN8Hh2ZmsSkAEFEAIKIAQNwUGObTynOPw6fYpuX7sPZe/RgxCDfPpgBARAGAiAIAcVNgmLVfBPff8Cmuw9p75abACDYFACIKAEQUAIgoABCHZoZ5PfE5YB6bAgARBQAiCgBEFACIKAAQUQAgogBARAGAiAIAEQUAIgoARBQAyIfj8XicPQQA22BTACCiAEBEAYCIAgARBQAiCgBEFACIKAAQUQAg/wMXGTZYQ50GKAAAAABJRU5ErkJggg==",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "obs_np = obs.transpose(1, 2, 0)\n",
    "\n",
    "\n",
    "plt.imshow(obs_np)\n",
    "plt.axis('off')  \n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [],
   "source": [
    "env = Minigrid('MiniGrid-MemoryS13-v0', 31)\n",
    "\n",
    "env.reset() \n",
    "init_obs = env.render()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(2976, 3, 2976)"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "init_obs_np.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(2976, 2976, 3)"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "init_obs.transpose(0, 1, 2).shape\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAGFCAYAAAASI+9IAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuNSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/xnp5ZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA7lUlEQVR4nO3dS4xk250u9G+v/YrYEbHjHZl1Xn3sti3ZbreP3Xa7dXuIQAikKwHiji66ozMCRgwQMEBISEgIBkhIDM4AiQEMQICEQCAhLpe+jeBKLatbXMnSPfXKU6cqs/IRmZGveOwdm0F6bVfftvt8/+za6Sj7+0lHpW5/tWrnjrXjH3my1neCqqoqiIiIAHC/7gsQEZHdoaEgIiI1DQUREalpKIiISE1DQUREahoKIiJS01AQEZGahoKIiNQiNvjpp582eR0iItKwzz777Csz+k5BRERqGgoiIlLTUBARkZqGgoiI1DQURESkpqEgIiI1DQUREalpKIiISE1DQUREahoKIiJSo2suLPSffRYRuRMEAZ29z3unZX1GI0MBAP7iL/4Cr1+/prKdTgdRFOHi4oLK93o9AMDl5SWVHwwGWK/XuLm5ofKj0Qg3NzdYLpdUfjKZYLFYYL1eU/nZbIazszMURUHl9/b2cHJygrIsqfz+/j5ev36N7XZL5R89eoTDw0NqQwZBgP39fbx69Ypa25p3zmE2m+Hw8JDKh2GIyWSCo6MjKh/HMQaDAY6Pj6l8mqbodrs4PT2l8u12G2ma4vz8nMp3u10457BYLKh8nufYbre4urqi8sPhEMvlEre3t1R+PB7j6uoKq9WKyk+nU5yfn2Oz2VD5XdrL1vx99vKPfvQj9Pt9Kg8AT58+xePHj6lsFEX4oz/6I7TbbXp9at23utobXr9+jefPn1PZfr+PNE3pITIajQAAZ2dnVP729hbL5ZJ+8NbrNRaLBa6vr6l8WZY4OTmhhwgAvHz5kn6QnHM4ODigH6QoivDs2TP6wYjjGM+ePaOyfn32tQ2CAGEY0nnnHIIgoPNRFGG73dL5NE2xXq9xcHBA5bMsw3A4xJdffknle70esiyjh9RwOEQYhjg5OaHyk8kERVHQQ2e1WuHq6ooeIkVR4OzsjB4iVVXh8PCQ/kAUBAFevHhBfyDye4cdCnEc4/nz5zux951z+N73vkevDQDn5+f0+nEc40c/+pFpfYZ+piAiIjUNBRERqWkoiIhITUNBRERqGgoiIlLTUBARkZqGgoiI1DQURESkpqEgIiK1xk40dzod+nh3r9dDHMemfFVV9AnfbreLOI7pjpBut4uqqhBF3O3pdDpYr9dI05TO53lOn+r095L9erMsQ7/fp091ZlmGwWBgyrOvVRAEprxzzpSPosiUT5LElG+1Wqa93Ol00G636dPt3W4XYRjSp9u73S7KsqRfq06nA+ccwjCk80VRIEkSOt/v9+kTzT7P7n3/WrEnmtvttnnvN7mX2fvupWlKrx/HMZx7+5/rGxsKURTRb5JxHCNJElMegClfVZX5eixH69mHCPjFvWE3TBRFpuvx67MPhl+fZXltgyBAHMd03jlnyodhaMrHcWzKJ0nSeN45Z8oXRWH6esuypN+E/V627h32A5fPs3vf30vLs2jd++y9tOaDIDC/aVvWj6LorZfhAQ0OhYuLC7rLyNp95Dc4230EwNR9FIahqfsoSRJT91G73cbx8TH96bDT6eD4+Jj+TqHX6+H169f0g5HnOX3vrfkgCNDtdum8/06BzfuHiM37YczmsyzDdrul8777iM1vNhtT99F2uzV1HznnTN1HcRybuo9arRaOj4/p7xSyLMPx8TE9pPzeYYeC35u7sPedc/Qz7l1fX9Pr+4H/tulnCiIiUtNQEBGRmoaCiIjUNBRERKSmoSAiIjUNBRERqWkoiIhITUNBRERqGgoiIlJr7ERzr9fDaDSis3Ec06cc2W4QL89ztFotussoz3NT9UCe5yjLEqvVisr7e8OedvR5S9fTeDymT4H6PHMK1J9QZl9ba945Z8pHUWTaa0mSmPLtdtuU991H7Gub5znCMKRfK9+BxdYn5HluqmHJ8xxVVaHdblP5Xq+H9XpNn2j295J91n3espdHo5FpLze1951z9HuO12636fWb6j7aie8U2CPpu8z6NTT9NTe1/i6+Vk3e+/t8vU3vhd+mr/ch1m7qenbxWWE09p3C5eUl3U1UliXSNDV1GQF891EURabuozRNTd1HWZbh/Pyc7j7q9XqYz+f0p8l+v4+zszP6O4XhcIizszN6U45GI5yenlJZn2fvfRAEGAwGdN45hzzPTa9tlmV0Pk1TJElC57MsQxAEdH6z2Ziup6oqhGFouj+W7qMkSUzdR+12G/P5nO4+6na7mM/n9HcKeZ5jPp/T3yn4vcN+p+D3pmXvW953/LPF8K+Vxe3tLb1+HMf0fbHYie8URERkN2goiIhITUNBRERqGgoiIlLTUBARkZqGgoiI1DQURESkpqEgIiI1DQUREak1dqJ5MBiYTkXGcUyvnec5ANC9IsPhEOv1Gq1Wi84nSYJOp0PlB4MBANCnOgeDATabjanraTab0Sea8zzHbDajT3XmeY69vT1TfjabUdkgCOrrZzjnTPkoikz5OI5N+VarhV6vR58+z7IMrVaLvpe9Xg9hGNIdNoPBAGVZ0l1Gw+EQrVYLWZbR64dhSJ/O911MltP5q9WK3sv+tWJP7t5n71v2siXvnDO9rwF33VmWvR+GoWl9at23vuLPrddremPFcYyqqui8f3Nn876wy5JfrVZ0frPZYLVa0YV4fn32QdpsNlgul/SDVBQFVqsV/SD59dlSMP/1MnnnXL0+IwxDUz6KIlN+u92a8kEQIE1T0/VY3lTTNDXl1+s1iqJofC9b8+wHIp9nPxD5PLv3m9zLPs/eG+ecuYbC8tr69823rbGhcHNzQ3cN+QePzfvvENh8q9UydR91Oh1cXl7S3Ud5nuPy8pJ+MYfDIRaLBT0U/L1kH4ybmxtcXFzQG+b29hYXFxdU1poPggDj8Zi+9865+v4woihCnuemXqssy+h8URSIoojOV1WFsizpfBiGCMOQzidJgqIo6HyWZabuo16vh8ViQX+XPxgMsFgs6KHg9zI7FPxeZt9cb29vsVgsGt37lr3MPrPearWi11f3kYiINE5DQUREahoKIiJS01AQEZGahoKIiNQ0FEREpKahICIiNQ0FERGpaSiIiEitsRPNo9GIPuXou4/YHg/ffZSmKZX33Udsl9FkMkG73aZPdY7HY4RhSH+9o9EIVVXRJ5pHoxE2mw19OnI4HOK9996jTjsGQYDBYID33nuPPupvyfsTyo8ePaKuPQxDUz6KIoxGI7piJEkSDIdD+t777iP25KjvPmK7jHq9nqkjx3cftdttKj8ajdDpdNDr9aj8eDxGkiT06fzRaAQApr1cliV9onk4HNKVLX5vPnr0iN7L/llpau9bu496vR699y3vmRY7UXNRVRWSJDEdHwf4moskSbBarXB5eUnl2+02Li8vcXNzQ+W73S4uLy/pNyZfy8A+SL5KwDIUFosF/Ubm82w1wHA4NNVc+OtnOOdMtRVRFKHT6Zj2gmWvbTYbOOfo/Ha7NdVcBEFgqrmIosi0fpqmuLm5oStbsizD5eUl/YGo1+vh8vKS/kDU7/dNNRe7tpct+Xe15qKxobBcLumNGEURttstnfffIbD5Tqdjup7b21vTg+Tz7Kcrfy3sUPB5doOtVitcXV3RD4bPs1arFX1vgiAw3XvnnHnvWPK+LJDNV1WFNE1N1w/wezNJEoRhSOfb7TaKoqDzvV7PtJf9vWSHgs+zQ8Hn2aHg8+ybn39tLXufvTfW/H0K8dbrNb1+U4V4+pmCiIjUNBRERKSmoSAiIjUNBRERqWkoiIhITUNBRERqGgoiIlLTUBARkZqGgoiI1Bo70TyZTOgTuJ1OB3EcI0kSKu+7j7Iso/KDwQCbzab+fV9lPB6j2+3Spzqn0ymSJKFPdU4mEwRBQJ/qnEwmdX3CVwmCAOPxGJvNhu6LGY1G+PDDD+n+F0veOYfxeIz1em3OM6IowmQyoe9lkiQYDAb0SdNWq4Vut4sgCKh8lmVI05TuvPHdR61Wi8r3+32UZYlut0vlR6MR8jynK1smkwlarRZd2TKZTBCGIX06fzKZAAD9eo3HYxRFYdrLq9Wqkb18n73Pvqd5eZ7jgw8+oLJxHCOK3v5beGNDYbFY4OTkhMqu12skSULny7JEVVU4Pz+nr8fSfRSGoan7KEkSnJ6e0g9Su93G6ekp/SB1Oh2cnJyYhuzx8TF9BL7b7eL4+JjKWvNBENTXw3DOod1u03shiiLT3kmSBEEQ0Pl2u42yLOl8t9s1Xf9ms0EYhjg7O6PyVVWhKApT/87NzQ1dYxLHMc7Pz+kPRK1WC6enp/QQz7IMJycn9FDodrs4OTmhh7jPN7H3gyAw5Z1z9Nfp3dzc0HsnjmNztxKjsaGwXq/pLiDfZcTm/Zsvm1+v11itVjuVXy6X9FDweXYDbDYbLJdL+sHweZYlHwSBaS8450zrR1FkWt+301qux7K+b65k8+1225RfrVYoisK815rOs0PB59k3S59nh8Iu7f37dB9ZXlv/4fht088URESkpqEgIiI1DQUREalpKIiISE1DQUREahoKIiJS01AQEZGahoKIiNQ0FEREpNbYiebZbEZnO50OoihCu92m8r1e7y/9+lUGgwHW6zWGwyGV930x7MnCyWSCdrtNn+qczWaIoog+1TmdThEEAX2ieTqdYrvd0qcpfXcQ2/8ymUzo09hBEGA6ndJfq3MO0+mU/lqjKMJ4PKZPdsZxjMFgQHcZpWmKbrdLd8y0222kaUp3GXW7XTjn0Ol0qHye59hut+j3+1R+OBxiuVzStRXj8RidToeubPG9X+x+mM1mcM6Z9nJVVY3u5ab2vnOubmtgDQYD/M7v/A6VjaLo3eo+Ojs7w8uXL6lsnudI05TuFBmNRqiqCvP5nMpvNhusVissFgsqX1UVFosFrq+vqXwQBDg9PTVVM7x69YreXEmS4OXLl/SD1Gq18PLlS1Pp28uXL+kHw+cZQRAgTVM6H4Yh4jim83EcIwgCOp+mKcqypPNZlmG1WtH5Xq+HdruN169fU/nBYIAwDHF6ekrl1+s1iqKge7/KssT19TXdfQQA8/mcHiJhGOLo6Ij+QBTHMV69ekV/SPB7p4m9bM1b97KvSLG4vLyk10+S5N3qPiqKgn7TK4rC1LToc5b1N5uNaX1L3rr+ffPsBiiKgm4lBe7eOCyb1/La+jZYNl+WpSnvC+Isn97KsjTtBcv6m80GcRybXlvfx9TE9TzU3rTm2aHg8+xQ8K8tu/f9s8Ky7B3nnLmbyLI+AHUfiYhIszQURESkpqEgIiI1DQUREalpKIiISE1DQUREahoKIiJS01AQEZGahoKIiNQaO9G8t7cH57iZ47uP2P4X33nE9r/0+31sNhvc3NxQ+dFohMFgYOo+6nQ69MlI3xfDnurc29tDGIb0iWbfO8WcAvXdRNvtlj7qP5vN7pVn+O4j9qRmFEUYjUZ0l1Ecx+j3+wjDkMqnaYpOp4MkSai87z7KsozK++4jtscrz3OUZUn3eA0GA6xWK1P3Ua/Xo7uPJpMJWq2WqfsoiiLTXg6CgN4/Te9lSy/XfbqPhsMhvva1r1HZd6776OTkBAcHB1S23+8jSRJT9xFw16/EmM1mWC6XdPfRZrMxdR9tt1ucnJzQQ8R39bAPUhiGODg4MJXEHRwc0A+Sz1veiJ8/f05lgyCo12eEYYggCOi8f4Nh82maYr1e44svvqDyWZZhMBiYuo+yLMPR0RGVt3Yf+QI3tvtotVqZuo+KojB1H1VVZeo+cs7hxYsX9Aci616O43hn9rJzDr/3e79HZb2Liwt6/SRJ8MMf/tC0PqOxoVCWJf0mVpYlttutKf/mr+y1/Lbkt9st3fz4Zp5lea18uyub942YlvWte6fpvPW1sjTg3mf9Xcrfd++zQ8Ffj2XvW0rlLPmqqszdRJb1Lc+shX6mICIiNQ0FERGpaSiIiEhNQ0FERGoaCiIiUtNQEBGRmoaCiIjUNBRERKSmofAWsBULIiK7rrETzfv7+3QvR5ZliKKI7n/pdrsAQPe/5HmOoijo7qPhcIjhcEj3v4zHY3Q6Hbq2YjqdotVq0ScSfV8Me6pzb28PQRDQpyl9VxKb39vbo7O+X4blnMNkMqEHbRiGGI1GdJdRHMfI8xxxHFN5333UarWofLvdRpIk9R79Kp1OB845user1+thu91iPB5Ted/hxVawjEYj5HlO11ZMJhO02216L/veL0v3URiG9N5/iL1sybP7xhuNRvjd3/1dKvvOdR+9fv0az549o7L9fh9pmuL169dUfjweo6oqU/fRarXCxcUFlX/vvfewWCzovpjNZoPj42P6wdtut3j58iX94AHAwcEB/eAFQYCnT5/ShXjOOTx58oQuBXPO4enTp1TeOYcgCPDkyRPq2v0bAJuP4xibzYbea2maYm9vj+6XybIMw+EQX375JZXP8xxZluHw8JDKD4dDhGGIk5MTKj+dTut+Isb+/j6ur69xeXlJ5VerFebzOf0BqigKHB0d0R+gqqrCixcv6A9QQRDg2bNnphI6695n93IQBPWzxe7973znO8jznLp2AJjP5/TeT5IEn3zyCb02q7GhwDYPAr/oCLH0lfjfx65vuZ7tdmvOW6+/yeuxfL3+utlPYtb8Q9xLS9661+6zN5ve+03thYfI/zbtfWvv0Zt/BoO9Div9TEFERGoaCiIiUtNQEBGRmoaCiIjUNBRERKSmoSAiIjUNBRERqWkoiIhITUPhl7B2GTXdfXSf9Xfpa1A31K/XLt3/XboWq1279qaup7ETzY8ePaL7ZXz3EXsc3PfKjEYjKp/nOTabDW5vb6n8YDAwdR+NRiN0u1366P5kMkGr1aKP7k+nU1P30Ww2g3PO1P9iYcnft/vIOe7ziu8+YjtgoihCv99HkiRUPkkSdDodtNttKt9qtZCmKd3j5buPBoMBle/1eijLEpPJhMr3+32sViu6gmU4HKLf79MVLOPxGFmWNdp91GTvV9N7/z7dR9/4xjeo7DvXfXR4eEj30QwGAyRJYuo+AoDT01Mqv7e3h+Vy2Vj30YcffmjqPiqKwtR9tN1uTd1HAPDkyRO6/wUAHj9+bOp/YfN+OH3++edffdG4e5MvyxKPHz+m8nEcY71e4+nTp1Re3Ud/vQ8++ABnZ2d099F6vTZ1H5Vlaeo+qqrK1H0E2Pa+ZS/7Z4Xdy845fPvb3zZ1H52dndHrJ0mC73//+/TarMaGgqX3w9oRcp+89Xqavv6mr8fSFfPmr+za1jyr6XzTmr6e+6zf9P3ctde3yb28S5q6Hv1MQUREahoKIiJS01AQEZGahoKIiNQ0FEREpKahICIiNQ0FERGpaSiIiEitsaGwa106lt/TdG+QP0nZ1Pr3+T1s/iG+1ibzD6Hpvf8Q+7Op/C6+vk3t/aa9c91H+/v7dC/Hb2v3EVtbcZ/+F2v3UVN9MfftPmI3vO8+CsOQysdxjDzPG+s+arfbSJKk3qNf5T7dR9vttq56+Sq/Cd1HYRjuRPeRdS+r++if8urVKzx//pzK9vt9pGlKdx/5YXB2dkblZ7MZVquVqfvo4uIC19fXVN7afbTZbPDy5Ut6iGy3Wzx//px+kKqqwtOnT+kHCeC7jzy2n8U/oJbuo6Io8OTJEyofRRFWqxXds9V091Gv10OWZTg6OqLy1u6jyWSCoihwfn5O5a3dR++//z7m83mj3UdffPEFPUTe5b38rnYf/db8TGGXupis7rP+Ln0NTd57+WoP0S22C2s3bZeuBVD3kYiIPAANBRERqWkoiIhITUNBRERqGgoiIlLTUBARkZqGgoiI1DQURESk1tiJZktnic9aO0gseeccnffZpvL3+XqteeccfQrU55nDME1f+2/Cvd+1vGXvP0S+ya/X/x5Ltun3KQvr3m9Co91HbB9NlmWI45jui/E5ti+m3+9js9nQfTHD4RCDwYCurRiPx+h0OnRfzHQ6RZqm5u4jS19MEATUUPB9LlVV0UPhvnmGcw7T6ZTKAr/oPmIfkDiO0e/36c6YNE3R6XSQpimVb7fb9e9h+O4jtgohz3OUZUn3fg0GA6xWK7r3azQaIc9zurZiMpmg3W7TlS2z2QxxHNN72fd4sR9wfDdRk3uZvRbnnLn7aDgc4utf/zqVVffRG+7TfbRcLrFYLKj8o0ePsFgs6O6j9Xpt6j4qisLcfXRwcEA/SADw5MmTRvpfrHnfF/P48WMq75xDWZam7qP1eq3uo19hf38fV1dXuLq6ovLL5RJnZ2f0ENlsNjg8PKQ/EJVliRcvXpi6j549e2b6rrep7iPPspfv033Erh/HMT755BN6bZZ+pvAW7FoniojIfWkoiIhITUNBRERqGgoiIlLTUBARkZqGgoiI1DQURESkpqEgIiI1DQUREak1dqLZOUdXD/isJf/mr0w+DEM677NNXn8YhvQJZZ+3VEWEYWg6js+u77tu2Lz13ljv/X1eK/81sPmm98JD7LVdy1v3JisIAnp96172eev7lIVlb1rui0VjQ+HHP/4xvv/971PZMAwRBAF99N33fbD5OI6x3W5Nb8I/+9nPTN1Hlv6X6XSKOI7p659Op3SXEXBXhbDdbukhMplMUJYl/WD4qgVGEASYTqemez+dTumv1XcfsXz3EftA+R6jOI6pvO8+arfbVN53H7G9X777iO39uk/3UbfbNXUfpWlq6j6yfCDyPVjsfvB7x7L3LXvZmmc7s7zBYICPP/6Yyr5T3UdBEJj6PnbNarXC8+fPTd1HJycn9BApy9LUfVRVlan7KAgCU/eRc47uW3lzfTYLgM778jNL99Fms1H30a9g7T5arVam7qOiKEzdR9vt1tR9BMDUfeScM+99dq8Btr3vnMN3v/tdem0AmM/n9PpxHOMHP/iBaX1GY98piIjI30AI4A8B/HMAUgD/EMD/AYD77HlvGgoiIrsmBPBvA/h3APgW9g2A/xrAvwGA+5cY96K/fSQismv+Fu4GQhdA8PN/EgB/F8C/1uwfraEgIrJr/nn84juEN0UA/jYafefWUBAR2TV/3V9ea+HuO4eGaCiIiOya/xvAL/vLhtVf87+9JRoKIiK75n8B8D8AePNv4lYA/gzAf9HsH62/fSQismtuAHwK4P8F8C/i7ofM/xDAfw6AOzJzbxoKIiK76ALAfwrgP8PdzxC4s65/YxoKv4LlCHkcx0iShD51mSQJ0jStT/uyeUsNiK/2YIRhiDiO6ZqLMAzpe+O7Zdh8GIamfBzHpnwURY3nLXvHr21Z/81fmfXjODbn2dP2Ps/uNWs1g1+fPc3v76dlLze599ln/M3f80vXr/BX3q3Z6hWroCLPg3/66af0olVV4fPPP//lR/FjAAXuvsifa7VaCMOQrpXIsgxVVdFH8fM8x7e+9S36BaqqCldXV/RGH4/HmE6n9NF633vUVH4+n+P169dU1q/PvgnsWj4Igrrqgs2HYWjqr7Hk/RuBpevJkvflbZaBb8lHUUT3YN0nf5+9vyt77T75TqdjGoJPnjwxVbz85Cc/oXu2AOCzzz776nXp1YyePXuG58+f/+L/kQH4t3D392//MYD/APW/G+v3+0jTlH4j8wVoZ2dnVP69997DN7/5TXooBEGAXq9HZYG7vpvJZGL+VNCEqqqw2WzoQjMR2R3n5+d0j1ccx/iDP/iDt34ND/e3j/4e7k7obQD8qwD+owf900VEhPBwb8s/AHCEuyPafwLgE9yVPImIyM54uKHwDwDsAfjfAPwzP/+/G277ExERm4f720f/LYAJgP8Yd4cy/n38pR82i4jIr9/DfaewBfC/A1gB+H8AcD8jFhGRB/Rw3yn8Pdz9PIH/21MiIvLAHm4o/DcA/gJ3/+UgERHZSQ/3r48eAfjWg/6JIiJi9HBv0b+Hu//e6H8J4M8f7E8VERGDxv71URzHSNM3DiL8rz//x3vjf0qS5K/m/xpJktwtQeZ9Rwh7tP6+ml7fYpeuRUQ4YRia3teaaFFobCgMBgOs12sqm2VZXWzG6PV6qKqqHg5fpd/v46c//SndlTQYDHB7e0tXRXzzm9/Ezc0N3V/T6/VwfX1N99HkeY7Ly0v6jf78/Bx//ud/TudnsxmOj4/pUrDpdEpXkgRBgMlkguPjYyrvnMN4PKbzYRhiOBzi5OSEykdRhDzP6YqUJEmQZdkv7/H6JVqtFpIkwWKxoPJZlsE5h6urKyrf7Xax3W5xc3ND5fM8x3q9xnLJHQoaDAa4ubmhn93RaITFYkF3Q00mE8znc/pZmU6nOD09pZ8Vy172ectebnrv93o97O3tUXlf1vi2NTYUjo+PcXBwQGWb7j5aLpdYLpf0g/ro0SMsFgu6oK/dbptK2WazGU5PT+kHY29vD8fHx/SDcXt7i5/97Gf0g1EUBT7//HMqCwCbzYbOB0GAr3/963j8+DGVd87h448/NpWCffDBB3RfTJqm2Nvbo/dmlmUYDof48kuuxL7X6yHLMhwdHVH54XCIMAzpoTaZTFAUBT2k9vf3cXV1RQ+d999/H2dnZ/QHqI8++giHh4f0EPn444/x4sULeois12s8e/aM3vtFUeDx48c7sfedc1iv1/TeD8MQH330Eb034zjGj3/8YyproR/7viUpgH8BwAfQTRWRd5f+ewpvSQTgX8LdUYw/x905vf8PavIQkXeLhsJbFADoAvhbAH4C4DmAv4+7/872CdTqISK7T0OhAQHubuzvAvg6gH8FwD/CXRfgP/k1XpeIyFfRv/5+IMHP/xER2WX6TqEBFYASwAGA/xPAnwI4hv71kYjsPg2Ft6gCcIVf/KD5HwPg/mKfiMhu0FB4SwoA/yPuOv++xF1TuIjIu0ZD4S1ZAfiff90XISLyN9TYUEjTFFmWUVlfDcDm2+02qqoyrR8EAX2KstVqYbPZ0Kci0zRFmqZwjvu5ve95Yk80+7zlVGen06HzaZqi0+mYvl723gdBYMo750z5KIpM+SRJTPl2u23Ot1ot094Mw9CUL8uSPkHcarWw3W7pveCvne3U8fcmiri3Ep9nn0WfZ6/fv480tZct71PWvex7j9h8U91HQUXevU8//ZRetKoq/PSnP6WP+nc6HcRxTB/d7/V6AIDLy0sqPx6P8fu///tUFrh7MauqojfWZDLBhx9+SOet6zvn6IcCAF69ekXfe+BuM7IDahfzlvsTBAGCIHhn8/fZO7uWt+zlXdtrTecPDg7oWowoivDHf/zH9BABgM8+++yr16VXMzo9PaX7YpruPvLleewneas4jhFFUSNT26qqKkRRhFarZfp9vkn2Xc3Lb65d22tN5q+uruj3zTiOTQOHpXMKIiJS01AQEZGahoKIiNQ0FEREpKahICIiNQ0FERGpaSiIiEhNQ0FERGoaCiIiUmvsRHOWZcjznMp2u10kSWLKV1VF96dYulwA0Ef2vbIssVqt6HwQBKY/w5ovy9L8NYhIMyzvPWma1jU+XyWO40ZaGhobCj/5yU8ae+PzN9maZxVFgT/90z/Fcrmk8rPZDIvFgi4p29vbw+npKT3UHj16hNevX9NH2vf39/Hq1Sv6/rz33nvm/MuXL6lsEAR49OgRnXfOYW9vD69evaLyYRhiMpnQXU9xHGM4HNKVKmmaotvt4vT0lMpnWYY0TTGfz6l8t9tFGIa4uLig8v1+H2VZ4urqisoPh0Msl0vc3nL/ZY/JZILLy0v6Q85sNsN8Psdms6Hy+/v7OD4+pvfyo0ePcHR0RPcl7dpe/sM//EMMBgMqD9ztz7/SZRQCGOKuivkK9X+tq6lCvEaGQhAEdGviLtput3j16hWur6/p33NyckIPkSAI8PLlS/pBCsMQBwcHplbVFy9e0A9Gq9XCF198QWV9/sWLF1TWN0uyeeccoiii875zis37dlo2n2UZhsMh3UfT6/WQZRk9pIbDIcIwxMnJCZVfLpcoioIujyyKAldXV/QQqaoKZ2dn9BBxzuHw8JD+QBRFEb788kv6A5HfO5aWV+veZ/cCcLd/LHv5k08+odcG7rqP/tLeaQH4DwH8HQDnAP49AP/T3f+k7iMRkd82/yyAfx3APwCwAfCf4O67hgZpKIiI7KpHuPvPOP5XAP4EwBhAt9k/8t39dzwiIr/p/j6AIwD/HYDk578eNvtH6jsFEZFd9U8A/MsArgH8XwD+Tdz9a6QGaSiIiOyqDMDy5/8c4u6HzQ3TUBAR2VU/wt0Pl6cP90fqZwoiIrvqTwD8BYB/9HB/pIaCiMiu+haAvw2g/3B/pIaCiMiuKgCcAfh3AXz+MH9kI0Ohqqp3vnun3+8jSRIq2+v1sNls0G63qXy328VgMKBPdfo8e6qz0+lgOBzSr4HPsyz5IAhMeeecKR+GIbrdLp2P49i0fqvVMq3f6XTQarXoE755nsM5R59MzfMcRVHQ9Qa9Xg9hGCKOYyrf7Xax3W7RarXo/GAwoE/n+zz79frXit37/gT6rux9a7NDq9X6y+ufAfjv3wi88T9FUfRudR/92Z/9GQ4Pub9Q2+l0kCQJ3Rfji/MWiwWVHw6HWK/XdG3FeDxGGIYIw5DKO+fulWc3rs+zbwT3XZ9lyQdBYMo750x5/2Cwef86WfLW63+IvdNU3np//Prsm7Z1r/lr2YW9b93L9+klsu7lJjQ2FM7Pz+n+l36/jzRN6ZIy/6nk7OyMyldVheVySQ8R5xwWiwU9RFqtFs7Ozujuo06ng9PTU/rTVa/Xw+npKf3pqt/v4+TkhH4wBoMB3b1jzQdBgDzP6bxzDt1ul85HUYRWq0Xn0zRFHMd0PssyVFVF53u9HlarFZ0vy9LUfQTA1H0URZGp+yhNU1P3UZZlOD09pb8z8uWC7HfJfu+wQ8fvzab2vn+2GM45+uv0bm5u6PXjOKbvi4X+SqqIiNQ0FEREpKahICIiNQ0FERGpaSiIiEhNQ0FERGoaCiIiUtNQEBGRmoaCiIjUGjvRnOc5JpMJle12u0iShD6d1+/fVQayvR+DwQDr9ZruMhoMBoiiiO4y6vf7qKoKq9WKyud5jvV6TZ9o9veSPdHc6/UwnU7p++nzzCnQIAjQ6/Xo19afaGbzzjlTPooiUz5JElO+1Wohz3P6tfXdR5YuI0t/zXA4RFEUdKeO7/Biu4z6/T6CIKBP5/suJkvX02QyMZ1onkwmpr08mUzoE81N7n3nnLn7KMsyev04jt+t7qPtdku/8GVZoigKU76qqkbX97+nifX9vbGuz77R+PXZB8mvzz4Yltc2CALTvfTlcJZ6gCbXt+4F/zpZ8mEYNrZ+03v5ofZ+E3v5zeth9751r1mLQa3PVhMaGwpXV1d0P0tVVUjTlM776cjmkyQxdR+1221T91G328XFxQX96arf7+P8/Jz+TmE4HOL8/JweCuPxGPP5nN6Qk8mELiP067P3PggCjEYj02s7GAxM3T7dbpfOp2mKVqtF59frNZxzdN6/aVjuTxiGpq/Xsn6r1TJ1H3U6HVxcXNDdR3me4/z8nP5Owb+27Buf3zvsUJhMJjg/P9+JvW9pv/WWyyW9vrqPRESkcRoKIiJS01AQEZGahoKIiNQ0FEREpKahICIiNQ0FERGpaSiIiEhNQ0FERGqNnWgeDoemvhhLj0ee5wBAdxkNh0Os12tkWUblR6MR0jRFr9ej8845+lTncDg0HZcfDAZYrVb06cXBYID9/X36VGe/38f+/j6VteaDIKivh+FPNLP5MAxN+TiO6y4sht8H7MnULMvQarXoCoJer2fqyOn3+yjLku4yGo1GaLfb6Ha7dD6KIvrZHQ6HqKrKdDp/s9nQ99O/tpZetF3a+3Ec02sDd+0I7PpRFCEMQ9P61LpvfcWfWy6X9NF6fxyczcdxjKqq6Hyr1cJqtTId9b+5ucHNzQ2Vz/McNzc3dM3FcrnE9fU1/SCtVitcX1/TD5Jfn3mQgiCo12f7Xyx555zp3odhaMrHcWzKp2mKLMvofFmWiOOYzgMw7c0wDBGGIZ1PksT0rLTbbdze3tL5breLm5sbuuai3+/j+vqaHrJ+b7IfiCx7/829uSt731pDsV6vTXuhiZqLxoaCZSP6QjDLzQBA57MsMw2pXq+H6+truvvo5uYGV1dX9FDw94YdCj5vGQqXl5f0pyWfZ1nyQRCY9oJzzpSPogj9fp/ObzYbdDodOr/dbpEkCZ0PgsA0FOI4Ng2FVqtlela63a6p+8jfe3Yo+Dw7FHyeHQq3t7e4vLyk3/x2ae/fp/vIMhT8h+O3TT9TEBGRmoaCiIjUNBRERKSmoSAiIjUNBRERqWkoiIhITUNBRERqGgoiIlLTUBARkVpjJ5rH4zF9atF3H7E9IXmeo6oqtNttKj8YDLDZbOguo/F4XJ+CZkwmE8RxTJ/qHI/HAEDfH38v2VOdo9EIq9WKPu04HA7xwQcfmPLvv/8+lQ2CAKPRiL6Xzrn6+hlhGGI8HtOnw+M4rrunGJYOLODu9HyapnSXUbfbRRiGSNOUyvvuo06nQ+VHoxG63S59QnkymSBNU/r+j8djOOfo+z8ej7Hdbun7PxqNsNls6L3v9ya7l0ejkWkvW/a+c47uZ/PyPKfXf+e6j66urnB2dkZli6JAkiR03r/g8/mcyvs+ncViQeWTJMHl5SVdc9FqtXB+fk6/8XU6Hcznc/pB6vV6mM/n9IOU5znm87mpROzs7Izuf+n3+5jP53T/S57n9GsbhiG63S6dj+MY7Xabzvs3bDbvP3iweV+8yOa32y3CMKTzQRCgKApcXFxQ+SiK6hoWRpqmOD8/p4dIlmWYz+f0B6Jut4v5fE5/IPJ7md37Te7lIAhMe9k5R3+d3u3tLb3+O9d9tFqt6I2VJAmqqqLz/kFl88vlEsvl0pS/vb2l8/5rZYeCz1sK8W5vb+kHY71e4+bmhv605PMsS96XiLH30hfosfnNZmPKb7dbrNdrOm+9/iiK6v4mRqvVQhiGpr1ZFIV5bzadZ4eCz1sK8W5ubug3P//aNrn3LXvZ+qa92Wzo9YuiUPeRiIg0S0NBRERqGgoiIlLTUBARkZqGgoiI1DQURESkpqEgIiI1DQUREalpKIiISK2xE83T6ZQ+bdfpdBBFEVqtFpX3XTTdbpfK9/t9bDYbDAYDKj8ajdDr9UzdR61Wiz7VOZ1OEYYhfapzMpmgqir6dORkMjGddhyPx/S1W/NBEGAymdCnt51z9fUzwjDEZDKh700cx/Q+AO5qH3w/EaPdbiNNU7rLqNvtwjmHLMuofJ7nKMsSeZ5T+eFwiH6/T5+S9b1fbPfRdDpFHMf06zudThEEAX0637+27Ovre7Ca2PtBEJjz1u6jfr+Pjz76iMpGUUR3bFk0NhTOz89xeHhIZfv9PpIkwfHxMZVfr9eoqoruPirL0tR9BMDUfRSGIU5PT+khkiQJjo6O6Aep1Wrh6OiIfpDa7TaOjo7oBynLMhwdHdH9L5a8cw7tdpveC74cjs3HcYwoiuh8mqaoqorOZ1mGzWZD53u9HtrtNl6/fk3lB4NBvX8YRVGgKAqcn59T+aqqcH19TXcfOecwn8/pIRLHMV6/fk0PkTRNcXR0RA99v5fZve/3JrP3rXs5CALTXrYUBXpXV1f0+kmS0PfForGhsNls6Im6Xq8RBIEp/+av7LXsWp7dMD7PboCiKEwtqT7PsuSDIDDtBf8gsfntdmvK+0I5Nh9FkXkvR1Fk2gu+j4ld33L9D7GXV6uVeX12KPj12Q84D7H3LXvZ2k1UliW9flVV6j4SEZFmaSiIiEhNQ0FERGoaCiIiUtNQEBGRmoaCiIjUNBRERKSmoSAiIjUNBRERqTV2onlvbw9BEFBZ333E9r/47iO2/8V3H93c3FD50WiEfr9v6j7Ksow+iTibzRDHMX2qczabwTlHn2j2vVPsUf/pdIqyLOmj/tb8bDajr905h9lsRp9gjaIIo9GIygK/6D5yjvs85LuP4jim8r77qN1uU3nffcT2eOV5ju12S/c3DYdDLJdLU/dRt9s1dR+laUqfzp/NZgjDkN4Ps9kMAOj90PRetuSdc3QHljcYDPDxxx9T2Xeu++jk5AQvXrygstbuo9FoZOo+Wq1Wpu6jsiyxWCzo7iPg7utlh4hzDq9evaIfpCiK8OLFC/pBSpIEL168oB8kn2cfjCRJ8MUXX1BrB0GAOI7pvRCGIcIwpPNRFKGqKjqfpimKoqDzWZZhMBjg5cuXVL7X69V9Ogxr95EvC2S7jzabjan7aLvdmrqPgiDA0dER/YEoDEN8+eWX9Aciv3ea2Ms+b9nLlrxzDt/73veorLdYLOi9mSQJfR8tGhsKZVnSF1wUhak11OfYvL8Wy/pN5q3X4/PsUCjL0tQU6fMsy2vrGzHZ/Ha7NeWt1+M/pVpeW2ve+tr639fU+r8JeXYobLdbU0Nwk3vtPt1H/voZ7L+JsdLPFEREpKahICIiNQ0FERGpaSiIiEhNQ0FERGoaCiIiUtNQEBGRmoaCiIjUNBRERKTW2Inm/f19hGFIZbMsQxzHdP9Lr9dDVVV0/4u1+2g4HNadMQzfF8Me9fd9MZbuoyiKTH0xQRDQp0BnsxmqqjJ1GVnye3t79MlO5xym0ymVBe5OKI/HY7rLKI5j9Pt9ujMmTVN0Oh26w8Z3H3U6HSrf6XQQhiHd45XnOcqypPueBoMBVqsVXVsxGo2Q5zndfTSZTNBut03dR3Ec03t5b28Pzrmd2cuWXq77dB8Nh0N8/etfp7LvXPfR69ev8fz5cyrb7/eRpilev35N5f0DcXZ2RuVnsxlWqxUuLi6ovO9JYruPiqIwdR9VVYWXL1/SD1IQBDg4OKAfpDAM8ezZM1Op3LNnz+gHIwxDPH36lFo7CAI45/Ds2TMq7z9IsHn/BsPm0zTFbDaj+2uyLMNwOMSXX35J5a3dR8PhEGEY4uTkhMpbu4/29/dxfX2Ny8tLKr/ZbDCfz+kPUNvtFkdHR/QQAYAXL16YqiKeP39u3vvshxBL3rqXnXP4zne+Q2W98/Nzev0kSfCDH/zAtD6jsaGw3W5NfSXW/Ju/MvmyLBu9nl3Ls02Ob+ZZlmvx37Gweev6/nW13Bu2QfbNa2kyb/mu7r57YZfyTV6/f22b2vuWvXMflvUt122hnymIiEhNQ0FERGoaCiIiUtNQEBGRmoaCiIjUNBRERKSmoSAiIjUNBRERqWko/BLW/yB2U/8BbZG3Qfvz7fhtuY+NnWh+9OgR4jimsu12G3Ec0/0v3W4XVVXR/S95nmOz2dD9L4PBAMPhkD66PxqN0O126dqKyWSCVqtFn0icTqeI49jU/+Kco0917u3tUbn75n0XEyMIAkynU7rLKAxDDIdDugMmiiL0+30kSULlkyRBp9NBu92m8q1WC2maotfrUfksy+Cco3u8er0eyrLEZDKh8v1+H6vViq5gGQ6H6Pf7dI/XeDxGlmV0bcV0OkWSJKa9HIbhzuxlSz4IArRaLdP6o9EI3/jGN6hsGIbvVvfR4eHhTnUfLZdLLBYLKv/o0SNT99FqtTJ1HxVFYeo+2m63pu4jAHj69KmpiuLJkyf0gxcEAR4/fkxnAdD5MAyx3W7x5MkTKh9FET744AN1H/0K1u6j999/H2dnZ/QHqM1mY+o+KsvS1H1UVZWpxwtAo3vZknfO4dvf/jb9YRcA5vM5vX6SJPjkk0/otVmNDQW2efDNrCX/5q9NrP+u5y39L9Y+F8u1WPP+urV33l7euhd2bf0m99tD7DUL67PSBP1MQUREahoKIiJS01AQEZGahoKIiNQ0FEREpKahICIiNQ0FERGpaSiIiEjtnRwK9+kgsfyepruPHqJbaZf6m3bp3v822qX9+S6/vrt0LUBz17MT3UdZliGKIlP3EYDGuo+Gw+GDdB+xR/1nsxmiKKJPMO7t7Zm7j4IgaKRfJggCzGYzOu+cw2QyMXUfjUYjugPGd2w11X3UbreRJAndfdTpdMzdR9vttrHuo9Fo9CDdR2xli+8+suz9XdnLTXcfRVH07nUfsX001u6j8XiMqqpM3Uer1QoXFxdU/r333sPFxQXdffThhx/i+PjY3H3EPnjb7RbPnz+nH6SqqkzdR8BdNxHzIPlPJ59//jm1rn9A2XwYhiiKgu4+iuMY6/UaT58+pfJpmmJvbw8HBwdU/j7dR51OB4eHh1Te2n00nU5RFAXm8zmVv0/30Xw+x83NDZVfr9fm7qMvvvjC3H1k6f1i97I1b93L9+k+Ojs7o9dPkgTf//736bVZjf3ro/v0fjS5trU7pam1fb7J67nP72m6z6WJrM83udfuo+m9/xD7s6n8Lr6+Te79JjV1Pe/kzxRERKQZGgoiIlLTUBARkZqGgoiI1DQURESkpqEgIiI1DQUREalpKIiISK2xodB0H4p1feV/dfbNX9m1rXnWQ+Qtmt6b1j9j19bfpfxD7GXL+k1757qP9vf36V6O+3QfVVWF4XBI5a3dR4PBoPHuozRN6aP70+nU1H00m80a63/x67Nr37f7iN3wvvsoDEMqH0UR+v1+Y91HrVYLaZrW/VxfxXcf9ft9Kt/r9VCWJcbjMZW3dh8Nh0PkeW7qPmq32412HzXZ42XZy359lrqP/imvXr3C8+fPqay1+8gX4Vm6j5bLJRaLBZVvuvtos9ng5cuX9BApyxIHBweNdh+xfSvWfBAE2G63ePz4MZV3zpm6j6Iowmq1onu2HqL7KMsyHB0dUXlr99FkMkFRFDg/P6fy9+k+Ojs7oz9ArddrHB4e0kPE2n203W7x7NmzRnq8PMtetuSb7j6K4/jd6j56lzXdFfMQdvGa5Ndjl/bCLl2L1bt87RYaCiIiUtNQEBGRmoaCiIjUNBRERKSmoSAiIjUNBRERqWkoiIhITUNBRERqjZ1ods7BOW7mOOcQBIEpX1WVKW+9nl3MW6ol7pNnWfI+2+ReaHr9Jq/Hd+80uX6Te7Pp9S3ZN69nF/a+Zd2HWp/R2FDwnSUM333E9sX43GAwoPLW7qPhcFh3xjDG4zGyLDN1H8VxbO5/sXQfAfwJzOl0aqoRsOSDIDDlffcRy3cfsV1JvvuI7Yzx3UdpmlL5druNJEmQZRmV991HbBVCr9fDdrule7+s3Ue+x8vSfdRqtUzdR1EUmXq/fFUKm6+qamf2PrtvvMFggK997WtU9p3rPjo8PHxnu48ePXqExWJBdx+tViucnJzQD15RFKbuo+12a+o+AoAnT56YPi2x3UTWvC8ns3QflWVp6j5ar9fqPvoV9vf3cXV1haurKyq/XC5N3UebzcbcffTixQt6iFRVZeo+8ntzF/a+cw7f+c536LUBYD6f0+vHcYxPPvnEtD5DP1MQEZGahoKIiNQ0FEREpKahICIiNQ0FERGpaSiIiEhNQ0FERGoaCiIiUtNQEBGRWmMnmsMwRBiGdNY5Z8q/+St7LbuWZ09pWtYG7k5SRlFkqpaIoog6Beq7WdjrCYKg0b0QRVHje82at1yPX9uyflVVpvX9PWLXt+T9+uxpe/+1sieO/fWw6/v7ya7f5N73vVkW1r3fhMaGwmQyoV+YLMsQxzHa7TaV7/V6qKqK7krq9/vYbDZ0V9JwOESv16NrK3z/C3vUfzKZIAxD+qi/v5fsm/xkMkFZlvT9H4/HKIqCfjDG4zFd0WHN++4jy5vMeDym700cx+j3+/TDmqYpOp0O/QC2222kaUp33nQ6HYRhSHcl5XmOsizprqTBYIDVakXXVoxGI2RZRvd+TSYTJEli6v0KgoB+ff1ry76+lr1szd9nLydJQmW9fr+Pjz76iMq+c91HR0dH72z3kc+y3UebzcbUfbTdbk3dRwBM3UfOOTx9+tT0aYztGnpzfYZvAWXzvuGSzUdRhKIo1H30K1i7j9brtan7qCxLU/dRVVWm7qMgCEzdR2EY7szed87hu9/9Lr02cNd9xK4fxzF++MMfmtZn6GcKIiJS01AQEZGahoKIiNQ0FEREpKahICIiNQ0FERGpaSiIiEhNQ0FERGoaCiIiUmvsRHMURYjjmMrGcWzOv/krey2W9e+TZ08cR1FkOv7u8+wpUJ9nToEGQVDn2aP+lrzvlmHvpe+6aXLvNJ237B3fM2RZPwgC0/pJkpjz1r3GniD2ebZmxOcttSdN7332Xt6n+8iyFyz30SKoyFfz008/pRetqgrL5ZLeWP7mWWocAJiOvlu6g3xZnWWjW7qG7pNn76Xyfz1f0GfJO+dMe7PJvXyfvW/Zyw+x93dlLzxEPssyU5nlarWiK0OCIECWZfWeYHz22WdfmWnkO4UgCOhyOxERuWMpU2yKfqYgIiI1DQUREalpKIiISE1DQUREahoKIiJS01AQEZGahoKIiNQ0FEREpKahICIiNQ0FERGp0d1HIiLym0/fKYiISE1DQUREahoKIiJS01AQEZGahoKIiNQ0FEREpKahICIiNQ0FERGpaSiIiEjt/weX0LSFrx76+gAAAABJRU5ErkJggg==",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "init_obs_np_transposed = init_obs.transpose(0, 1, 2)\n",
    "\n",
    "\n",
    "plt.imshow(init_obs_np_transposed)\n",
    "plt.axis('off')  \n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'random_length': False,\n",
       " 'actions': <enum 'Actions'>,\n",
       " 'action_space': Discrete(7),\n",
       " 'agent_view_size': 3,\n",
       " 'observation_space': Dict(image:Box(0, 255, (7, 7, 3), uint8)),\n",
       " 'reward_range': (0, 1),\n",
       " 'window': <gym_minigrid.window.Window at 0x7fad707e8c40>,\n",
       " 'width': 31,\n",
       " 'height': 31,\n",
       " 'max_steps': 845,\n",
       " 'see_through_walls': False,\n",
       " 'agent_pos': (6, 15),\n",
       " 'agent_dir': 0,\n",
       " 'np_random': RandomState(MT19937) at 0x7FAC8BE02640,\n",
       " 'grid': <gym_minigrid.minigrid.Grid at 0x7fac8be101f0>,\n",
       " 'success_pos': (29, 16),\n",
       " 'failure_pos': (29, 14),\n",
       " 'mission': 'go to the matching object at the end of the hallway',\n",
       " 'carrying': None,\n",
       " 'step_count': 0,\n",
       " 'spec': EnvSpec(MiniGrid-MemoryS13-v0)}"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "env._env.env.env.env.__dict__"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([22, 15])"
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np_random = np.random.Generator = (\n",
    "        np.random.default_rng()\n",
    "    )\n",
    "\n",
    "def _rand_int(low: int, high: int) -> int:\n",
    "    \"\"\"\n",
    "    Generate random integer in [low,high[\n",
    "    \"\"\"\n",
    "\n",
    "    return np_random.integers(low, high)\n",
    "\n",
    "random_length = False\n",
    "height = 31\n",
    "width = 31\n",
    "\n",
    "if random_length:\n",
    "    hallway_end = _rand_int(4, width - 2)\n",
    "else:\n",
    "    hallway_end = width - 3\n",
    "agent_pos = np.array((_rand_int(1, hallway_end + 1), height // 2))\n",
    "agent_pos\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Moviepy - Building video /opt/Memory-RL-Codebase/eval/Minigrid_Memory/GTRXL/GTRXL_2024_09_29-00_10_37_seed=45_reward=0.0.mp4.\n",
      "Moviepy - Writing video /opt/Memory-RL-Codebase/eval/Minigrid_Memory/GTRXL/GTRXL_2024_09_29-00_10_37_seed=45_reward=0.0.mp4\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "                                                            \r"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Moviepy - Done !\n",
      "Moviepy - video ready /opt/Memory-RL-Codebase/eval/Minigrid_Memory/GTRXL/GTRXL_2024_09_29-00_10_37_seed=45_reward=0.0.mp4\n"
     ]
    }
   ],
   "source": [
    "desired_resolution = (945, 540)\n",
    "original_aspect_ratio = 112 / 64\n",
    "width = int(desired_resolution[0] * original_aspect_ratio)\n",
    "height = desired_resolution[1]\n",
    "\n",
    "observations = [np.squeeze(o) for o in frames]\n",
    "\n",
    "clip = ImageSequenceClip(observations, fps=2)\n",
    "clip = clip.resize(width=width, height=height)\n",
    "\n",
    "\n",
    "run_name = checkpoint_path.split('/')[-1].strip('.pt')\n",
    "curr_seed = eval_seeds[i]\n",
    "curr_reward = float(info['reward'])\n",
    "clip.write_videofile(videos_dir + f\"/GTRXL_{run_name}_seed={curr_seed}_reward={curr_reward:0.2}.mp4\", fps=2)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'/opt/Memory-RL-Codebase/eval/Minigrid_Memory/GTRXL/GTRXL_2024_09_29-00_10_37_seed=123_reward=0.98.mp4'"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "curr_reward = info['reward']\n",
    "videos_dir + f\"/GTRXL_{run_name}_seed={curr_seed}_reward={curr_reward:0.2}.mp4\"\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "t:   7%|▋         | 7/97 [02:13<00:29,  3.07it/s, now=None]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Moviepy - Building video /opt/Memory-RL-Codebase/eval/Minigrid_Memory/DTQN/GTRXL_eval_0.mp4.\n",
      "Moviepy - Writing video /opt/Memory-RL-Codebase/eval/Minigrid_Memory/DTQN/GTRXL_eval_0.mp4\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "t:   7%|▋         | 7/97 [02:53<00:29,  3.07it/s, now=None]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Moviepy - Done !\n",
      "Moviepy - video ready /opt/Memory-RL-Codebase/eval/Minigrid_Memory/DTQN/GTRXL_eval_0.mp4\n"
     ]
    }
   ],
   "source": [
    "desired_resolution = (945, 540)\n",
    "original_aspect_ratio = 112 / 64\n",
    "width = int(desired_resolution[0] * original_aspect_ratio)\n",
    "height = desired_resolution[1]\n",
    "\n",
    "observations = [np.squeeze(o) for o in frames]\n",
    "\n",
    "clip = ImageSequenceClip(observations, fps=2)\n",
    "clip = clip.resize(width=width, height=height)\n",
    "\n",
    "clip.write_videofile(videos_dir + f\"/GTRXL_eval_{i}.mp4\", fps=2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "cuda:1 cuda:1 cuda:1\n"
     ]
    }
   ],
   "source": [
    "print(memory.device, memory_mask.device, memory_indices.device)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'update_step': 26,\n",
       " 'reward_mean': 0.5638745562130177,\n",
       " 'reward_std': 0.4801215678308591,\n",
       " 'length_mean': 26.55,\n",
       " 'length_std': 22.75933874259092,\n",
       " 'model_state_dict': OrderedDict([('conv1.weight',\n",
       "               tensor([[[[-0.1030,  0.1816, -0.0085,  ..., -0.2673, -0.0289,  0.0434],\n",
       "                         [-0.0463,  0.0559, -0.1273,  ..., -0.0939,  0.0140, -0.0055],\n",
       "                         [-0.1798,  0.0932, -0.0120,  ..., -0.2887,  0.0191, -0.2366],\n",
       "                         ...,\n",
       "                         [ 0.0284, -0.1768,  0.1065,  ..., -0.1002, -0.0793, -0.0806],\n",
       "                         [ 0.0165, -0.0451,  0.1857,  ...,  0.1559, -0.1615, -0.1717],\n",
       "                         [-0.0352, -0.0589,  0.0175,  ..., -0.0666, -0.0857, -0.0859]],\n",
       "               \n",
       "                        [[-0.0376,  0.0336, -0.1207,  ...,  0.0291,  0.0863, -0.0077],\n",
       "                         [-0.2151, -0.0857, -0.1363,  ...,  0.0156, -0.1348, -0.1234],\n",
       "                         [ 0.0077, -0.0359, -0.0547,  ...,  0.0794,  0.1162,  0.0082],\n",
       "                         ...,\n",
       "                         [ 0.1282,  0.0962,  0.0126,  ..., -0.0686, -0.1002,  0.0303],\n",
       "                         [-0.0182,  0.0039, -0.2335,  ..., -0.1667, -0.0787, -0.0088],\n",
       "                         [ 0.1475,  0.0231,  0.1384,  ..., -0.0896, -0.1658, -0.0255]],\n",
       "               \n",
       "                        [[-0.0845, -0.0851, -0.1135,  ..., -0.0371,  0.0707,  0.0653],\n",
       "                         [-0.0636, -0.2064,  0.0950,  ..., -0.0292,  0.0792,  0.0590],\n",
       "                         [-0.0071, -0.1271, -0.0796,  ..., -0.1635, -0.1634, -0.0440],\n",
       "                         ...,\n",
       "                         [ 0.0358,  0.0723,  0.0291,  ...,  0.0329,  0.0329,  0.1627],\n",
       "                         [ 0.0975, -0.0478, -0.0890,  ..., -0.1285, -0.0836, -0.0160],\n",
       "                         [ 0.1666, -0.0044,  0.0569,  ..., -0.1539,  0.0200, -0.0019]]],\n",
       "               \n",
       "               \n",
       "                       [[[-0.1798, -0.1748, -0.0771,  ...,  0.1399,  0.2000,  0.2504],\n",
       "                         [ 0.2708, -0.0552, -0.0093,  ..., -0.0690,  0.0356, -0.0164],\n",
       "                         [-0.0858,  0.1052, -0.0584,  ...,  0.0421,  0.0290,  0.1050],\n",
       "                         ...,\n",
       "                         [ 0.0604, -0.0646,  0.0562,  ..., -0.1232,  0.0328,  0.0905],\n",
       "                         [ 0.0467, -0.0761,  0.0128,  ...,  0.0190,  0.0990, -0.0516],\n",
       "                         [-0.0135, -0.0381, -0.0857,  ...,  0.0068,  0.1284, -0.0425]],\n",
       "               \n",
       "                        [[ 0.0273, -0.0714,  0.0367,  ...,  0.0617, -0.0508, -0.1068],\n",
       "                         [-0.1313,  0.0923, -0.2658,  ..., -0.0186, -0.0746,  0.0466],\n",
       "                         [-0.0960, -0.0679, -0.0022,  ...,  0.1422, -0.0706,  0.0220],\n",
       "                         ...,\n",
       "                         [ 0.0473, -0.1026,  0.0933,  ..., -0.2839,  0.0730,  0.1225],\n",
       "                         [ 0.2417,  0.0936, -0.0146,  ...,  0.0703,  0.1119, -0.0497],\n",
       "                         [ 0.1048,  0.0407, -0.0319,  ...,  0.0249,  0.0416, -0.0223]],\n",
       "               \n",
       "                        [[ 0.0394,  0.0337,  0.2558,  ..., -0.0775,  0.0749, -0.1275],\n",
       "                         [-0.0715, -0.0334,  0.0692,  ..., -0.0676,  0.1093,  0.0475],\n",
       "                         [-0.2536,  0.0890,  0.0061,  ...,  0.1656,  0.1057, -0.0822],\n",
       "                         ...,\n",
       "                         [ 0.0455, -0.0448, -0.0281,  ...,  0.1600,  0.0212,  0.0913],\n",
       "                         [-0.1390, -0.0737, -0.0326,  ...,  0.1436, -0.0312, -0.1172],\n",
       "                         [-0.0235,  0.0271, -0.0217,  ..., -0.0668, -0.1330, -0.0454]]],\n",
       "               \n",
       "               \n",
       "                       [[[ 0.0417,  0.0076, -0.0812,  ...,  0.0566,  0.1288, -0.0421],\n",
       "                         [-0.0762,  0.1253, -0.1127,  ..., -0.1247, -0.0392, -0.1893],\n",
       "                         [ 0.0579,  0.0717,  0.0329,  ..., -0.1271,  0.0281, -0.0513],\n",
       "                         ...,\n",
       "                         [-0.0577, -0.1361,  0.0608,  ...,  0.0350, -0.1253, -0.1377],\n",
       "                         [ 0.1031, -0.0573, -0.0592,  ..., -0.1873,  0.0141,  0.0391],\n",
       "                         [ 0.1364, -0.1435, -0.0543,  ...,  0.0031, -0.1035,  0.1800]],\n",
       "               \n",
       "                        [[-0.0177,  0.0051,  0.1291,  ...,  0.2471,  0.1172,  0.1059],\n",
       "                         [ 0.1578,  0.1129,  0.0910,  ...,  0.0570, -0.1503, -0.0136],\n",
       "                         [ 0.0893,  0.0581,  0.0153,  ..., -0.0063, -0.1170, -0.0941],\n",
       "                         ...,\n",
       "                         [ 0.0506,  0.0546, -0.0449,  ..., -0.0238,  0.0197,  0.0377],\n",
       "                         [ 0.0504, -0.0119, -0.0333,  ...,  0.0866,  0.0807, -0.1358],\n",
       "                         [-0.0470,  0.0541, -0.0731,  ...,  0.0451, -0.0496, -0.1772]],\n",
       "               \n",
       "                        [[ 0.0451,  0.0426,  0.0236,  ..., -0.0397,  0.0006, -0.0802],\n",
       "                         [ 0.1692, -0.0161, -0.0229,  ...,  0.0125, -0.0233,  0.0876],\n",
       "                         [-0.0801,  0.0670, -0.0601,  ..., -0.0062, -0.0237,  0.3113],\n",
       "                         ...,\n",
       "                         [-0.1142,  0.0303,  0.2881,  ..., -0.0432,  0.1201, -0.0364],\n",
       "                         [ 0.1021,  0.1163,  0.0348,  ..., -0.0807,  0.0478,  0.2037],\n",
       "                         [-0.1086,  0.1288, -0.1158,  ...,  0.0120, -0.1154, -0.1304]]],\n",
       "               \n",
       "               \n",
       "                       ...,\n",
       "               \n",
       "               \n",
       "                       [[[ 0.0489, -0.0707, -0.1285,  ...,  0.2418, -0.0505,  0.0285],\n",
       "                         [-0.0160, -0.0012, -0.0849,  ..., -0.0277,  0.1679, -0.0176],\n",
       "                         [ 0.0798,  0.1718, -0.0036,  ..., -0.0531,  0.0862, -0.0810],\n",
       "                         ...,\n",
       "                         [-0.0321,  0.0124, -0.1826,  ..., -0.0857,  0.0612, -0.0151],\n",
       "                         [ 0.0088, -0.0500,  0.0588,  ..., -0.0362,  0.0456, -0.0397],\n",
       "                         [-0.0801, -0.1370,  0.1078,  ...,  0.0834, -0.1561, -0.0859]],\n",
       "               \n",
       "                        [[ 0.2081,  0.0139, -0.0042,  ...,  0.0421,  0.1044, -0.1129],\n",
       "                         [ 0.0829,  0.1095,  0.0870,  ..., -0.0751,  0.2007,  0.1095],\n",
       "                         [ 0.0174,  0.0566,  0.2276,  ...,  0.0186, -0.0254,  0.0158],\n",
       "                         ...,\n",
       "                         [ 0.1823,  0.0590, -0.1064,  ...,  0.0396, -0.0559, -0.0746],\n",
       "                         [ 0.0394,  0.1219,  0.0895,  ...,  0.0920,  0.0412, -0.0207],\n",
       "                         [ 0.2159,  0.1835,  0.0047,  ...,  0.0339,  0.1117,  0.0600]],\n",
       "               \n",
       "                        [[-0.0342,  0.0680, -0.1168,  ..., -0.0144,  0.1969,  0.0155],\n",
       "                         [-0.0027,  0.1520,  0.0624,  ...,  0.1853,  0.0328, -0.0218],\n",
       "                         [ 0.1488,  0.0661, -0.0401,  ...,  0.1295, -0.0306, -0.1738],\n",
       "                         ...,\n",
       "                         [-0.0907, -0.1351,  0.1236,  ..., -0.0601, -0.1251, -0.1526],\n",
       "                         [ 0.0079,  0.2151, -0.1328,  ..., -0.0261, -0.0699,  0.0809],\n",
       "                         [ 0.0100, -0.1390, -0.0237,  ..., -0.0107, -0.0888, -0.1185]]],\n",
       "               \n",
       "               \n",
       "                       [[[-0.2238,  0.0663, -0.0553,  ...,  0.1677,  0.0405,  0.1334],\n",
       "                         [-0.1248,  0.0175,  0.0110,  ..., -0.1060, -0.0053,  0.2138],\n",
       "                         [-0.1424, -0.0217,  0.1968,  ...,  0.0269, -0.0338,  0.0909],\n",
       "                         ...,\n",
       "                         [ 0.0655,  0.0883,  0.1541,  ..., -0.1543,  0.0965,  0.0118],\n",
       "                         [-0.0834,  0.0266, -0.0379,  ..., -0.1661, -0.1638, -0.0984],\n",
       "                         [-0.1588, -0.0732, -0.0213,  ..., -0.1114, -0.0366, -0.1088]],\n",
       "               \n",
       "                        [[ 0.0922,  0.0844, -0.2748,  ..., -0.0394, -0.0649, -0.1026],\n",
       "                         [ 0.1368,  0.1053,  0.0882,  ...,  0.1803, -0.1917, -0.0060],\n",
       "                         [-0.0073, -0.1967, -0.1370,  ..., -0.0630, -0.0948, -0.0026],\n",
       "                         ...,\n",
       "                         [-0.0439,  0.2129, -0.1330,  ..., -0.0671, -0.0539, -0.0093],\n",
       "                         [-0.1091, -0.0959,  0.0918,  ..., -0.1709, -0.0801, -0.0817],\n",
       "                         [-0.0877, -0.0929, -0.0209,  ...,  0.1012, -0.0198,  0.1763]],\n",
       "               \n",
       "                        [[-0.1693,  0.0600, -0.0397,  ..., -0.0203,  0.0253, -0.0395],\n",
       "                         [-0.0401,  0.0189,  0.0578,  ..., -0.0548,  0.0471, -0.0919],\n",
       "                         [ 0.0114, -0.1153,  0.0233,  ..., -0.0752, -0.0190,  0.0443],\n",
       "                         ...,\n",
       "                         [ 0.0302,  0.0653,  0.0539,  ..., -0.1316,  0.1135,  0.0195],\n",
       "                         [-0.0257, -0.0734, -0.0326,  ...,  0.0797, -0.1483, -0.1787],\n",
       "                         [-0.0952, -0.1368, -0.0083,  ..., -0.1311,  0.1327, -0.0028]]],\n",
       "               \n",
       "               \n",
       "                       [[[-0.1398,  0.2464, -0.0117,  ...,  0.0494, -0.0512, -0.1523],\n",
       "                         [-0.0980, -0.0150,  0.0491,  ..., -0.2235, -0.1099, -0.0520],\n",
       "                         [ 0.0351,  0.0581,  0.0987,  ...,  0.0411, -0.0609, -0.0961],\n",
       "                         ...,\n",
       "                         [ 0.0168, -0.1151,  0.1397,  ..., -0.0747, -0.0161,  0.0183],\n",
       "                         [-0.0699,  0.0786, -0.1294,  ..., -0.0796,  0.0624, -0.2355],\n",
       "                         [-0.1085, -0.0542,  0.0677,  ..., -0.0388, -0.0704, -0.0252]],\n",
       "               \n",
       "                        [[ 0.1742, -0.0322,  0.0446,  ...,  0.0077,  0.0478,  0.1906],\n",
       "                         [-0.1243, -0.0817,  0.0578,  ...,  0.0520, -0.0869,  0.0407],\n",
       "                         [ 0.1290,  0.0705,  0.1489,  ..., -0.0977, -0.1171,  0.0190],\n",
       "                         ...,\n",
       "                         [ 0.0104, -0.0071,  0.0191,  ..., -0.0329,  0.0417,  0.1238],\n",
       "                         [ 0.0327, -0.0122, -0.0053,  ..., -0.0368, -0.1141, -0.0229],\n",
       "                         [-0.0442, -0.1011, -0.1207,  ..., -0.0849, -0.0054, -0.0072]],\n",
       "               \n",
       "                        [[-0.1090, -0.3142,  0.1783,  ..., -0.1286, -0.0612, -0.0338],\n",
       "                         [-0.2002, -0.1561, -0.1819,  ...,  0.0622,  0.0513, -0.0723],\n",
       "                         [-0.0331,  0.1348, -0.0462,  ...,  0.1328, -0.0016, -0.0147],\n",
       "                         ...,\n",
       "                         [-0.1686, -0.0095, -0.1689,  ..., -0.0052, -0.0436,  0.2119],\n",
       "                         [-0.0683,  0.0572,  0.0030,  ..., -0.0652, -0.0943,  0.1284],\n",
       "                         [ 0.0629,  0.0477, -0.0253,  ...,  0.0498, -0.1456, -0.1083]]]],\n",
       "                      device='cuda:0')),\n",
       "              ('conv1.bias',\n",
       "               tensor([-0.0337, -0.0644, -0.0318, -0.0569, -0.0927, -0.0723,  0.0176, -0.0660,\n",
       "                       -0.0233, -0.0469, -0.0074, -0.0661,  0.0286,  0.0312, -0.0494,  0.0135,\n",
       "                        0.0606,  0.0227,  0.0336,  0.0433,  0.0669,  0.0460,  0.0728, -0.0536,\n",
       "                       -0.0588,  0.0215, -0.0755, -0.0376,  0.0323, -0.0569,  0.0525,  0.0373],\n",
       "                      device='cuda:0')),\n",
       "              ('conv2.weight',\n",
       "               tensor([[[[-2.8355e-02,  1.5114e-02, -1.1207e-01, -7.3594e-02],\n",
       "                         [-4.8132e-02,  1.5903e-02,  1.9384e-02,  2.8060e-02],\n",
       "                         [-4.1076e-02, -5.1976e-02,  1.0334e-01, -8.4486e-02],\n",
       "                         [ 2.8774e-02,  8.7286e-02, -4.2861e-02,  7.3355e-02]],\n",
       "               \n",
       "                        [[ 9.0555e-02, -6.7356e-03, -4.6698e-02,  1.6398e-01],\n",
       "                         [-9.6674e-02, -1.5999e-01,  7.3323e-02,  8.5899e-02],\n",
       "                         [ 2.3537e-02, -5.8850e-02, -5.9235e-02, -1.7907e-01],\n",
       "                         [-1.1156e-01,  1.3365e-01, -7.4774e-03,  5.4081e-02]],\n",
       "               \n",
       "                        [[-5.1644e-02, -1.8687e-02, -4.9776e-02, -3.0554e-02],\n",
       "                         [-9.8148e-02,  6.6204e-02, -1.5090e-02,  6.1360e-02],\n",
       "                         [-2.9549e-02, -3.4677e-02,  7.1593e-02,  1.6650e-02],\n",
       "                         [ 8.6887e-02, -1.2235e-02,  8.9428e-02,  5.7109e-02]],\n",
       "               \n",
       "                        ...,\n",
       "               \n",
       "                        [[-6.9709e-02, -2.8460e-02,  1.1946e-01, -6.8952e-02],\n",
       "                         [-1.7618e-02, -9.7680e-02,  6.4974e-02,  6.7507e-02],\n",
       "                         [ 1.4768e-02,  1.0855e-03,  1.6402e-03, -6.1430e-02],\n",
       "                         [-1.7975e-01,  5.5856e-02,  8.6438e-03, -3.7317e-02]],\n",
       "               \n",
       "                        [[ 1.0095e-01, -4.6966e-02,  1.1848e-01, -5.8220e-02],\n",
       "                         [ 2.0593e-02, -8.8952e-02, -6.1793e-02,  1.8969e-02],\n",
       "                         [-2.4639e-02,  1.8106e-02, -1.3091e-01,  7.2168e-03],\n",
       "                         [-9.0159e-03, -3.7833e-02, -1.2645e-02, -3.1186e-03]],\n",
       "               \n",
       "                        [[-7.2195e-02,  2.4170e-02,  5.0964e-03, -2.3764e-02],\n",
       "                         [ 4.9572e-02, -1.2573e-01, -2.6449e-02, -3.1689e-02],\n",
       "                         [ 1.1434e-02,  1.4838e-02, -2.2065e-02, -7.8483e-02],\n",
       "                         [ 4.6302e-02,  3.7550e-02, -7.3332e-02, -4.4166e-02]]],\n",
       "               \n",
       "               \n",
       "                       [[[-1.0131e-01, -4.1065e-02, -6.5976e-02, -2.2955e-02],\n",
       "                         [-1.2984e-01, -2.3372e-03, -4.4732e-02, -9.6575e-02],\n",
       "                         [-1.1689e-01,  4.6262e-02, -7.4231e-02,  4.0226e-02],\n",
       "                         [-5.8720e-02, -6.6346e-02, -1.4292e-02,  2.1881e-02]],\n",
       "               \n",
       "                        [[-3.2088e-02,  4.7944e-02, -4.5784e-02, -1.3018e-01],\n",
       "                         [ 2.1668e-02, -5.1684e-02, -1.0988e-01, -1.2130e-01],\n",
       "                         [-2.8551e-02,  1.0546e-01, -1.0119e-01, -5.4390e-02],\n",
       "                         [-7.1229e-02, -1.4711e-01,  2.1566e-02, -7.2669e-02]],\n",
       "               \n",
       "                        [[ 7.4136e-02, -4.6205e-02,  9.0398e-03, -8.4802e-03],\n",
       "                         [ 1.0492e-01,  7.1850e-02,  6.2272e-02, -2.7634e-02],\n",
       "                         [-5.7775e-02,  1.9925e-02, -1.2878e-02,  1.6113e-02],\n",
       "                         [-3.4428e-03,  2.3066e-02, -2.1262e-02,  8.2213e-02]],\n",
       "               \n",
       "                        ...,\n",
       "               \n",
       "                        [[-2.8908e-03,  2.6200e-02,  5.6202e-02,  1.4728e-02],\n",
       "                         [ 3.6545e-02, -1.7412e-02,  6.2859e-02,  9.8367e-02],\n",
       "                         [ 4.4857e-02,  2.3507e-02, -6.7329e-02,  8.5262e-03],\n",
       "                         [-2.3218e-03, -1.6280e-02,  1.9268e-02, -3.3805e-02]],\n",
       "               \n",
       "                        [[ 2.8901e-02, -3.3576e-02,  1.1272e-01, -6.5658e-02],\n",
       "                         [ 9.7370e-02,  8.7903e-02,  1.4011e-01, -7.3992e-02],\n",
       "                         [-1.3010e-02,  9.1001e-02, -4.2895e-02,  4.8036e-02],\n",
       "                         [ 3.8677e-02,  4.2147e-02, -2.5946e-02,  2.2028e-02]],\n",
       "               \n",
       "                        [[ 2.7153e-02, -1.6031e-02, -2.0765e-02,  4.9804e-02],\n",
       "                         [ 1.0813e-01,  7.7308e-02, -4.6036e-02, -6.8371e-02],\n",
       "                         [-9.4346e-02,  1.3447e-02,  4.5810e-02, -1.0642e-01],\n",
       "                         [-1.8213e-01,  1.2782e-02, -1.4215e-01,  7.9141e-02]]],\n",
       "               \n",
       "               \n",
       "                       [[[ 7.4527e-02,  2.5897e-02,  2.6738e-02, -1.1083e-01],\n",
       "                         [ 5.4087e-02,  1.6588e-02, -7.2409e-02, -1.1798e-02],\n",
       "                         [-4.4851e-02, -9.3891e-03,  7.0292e-02, -1.8008e-02],\n",
       "                         [ 1.5368e-02, -8.1679e-03,  8.7940e-02, -1.6714e-01]],\n",
       "               \n",
       "                        [[ 6.0062e-02,  1.0963e-04,  9.4351e-03,  4.2298e-02],\n",
       "                         [ 3.9223e-02, -4.8699e-03, -6.8419e-02,  1.0467e-02],\n",
       "                         [-8.4078e-02,  4.0355e-02,  1.6541e-03, -7.6280e-02],\n",
       "                         [ 6.5760e-02,  6.6998e-03,  8.9974e-02, -1.1237e-01]],\n",
       "               \n",
       "                        [[ 1.1636e-01,  1.3244e-01, -2.4728e-02,  1.3012e-02],\n",
       "                         [ 2.4170e-02, -5.4360e-02,  1.2687e-01,  1.5468e-02],\n",
       "                         [-2.0782e-02,  1.7590e-02,  3.9919e-02,  3.9347e-03],\n",
       "                         [ 4.3840e-02, -4.3606e-02, -1.1180e-01, -4.6810e-02]],\n",
       "               \n",
       "                        ...,\n",
       "               \n",
       "                        [[ 5.0761e-02, -1.7982e-02,  6.0619e-02,  1.0923e-01],\n",
       "                         [-1.4183e-01, -5.3023e-02,  6.7810e-02,  7.5303e-02],\n",
       "                         [ 3.6312e-02, -3.4286e-02,  3.0829e-02, -8.9743e-02],\n",
       "                         [-6.6515e-02,  3.6128e-02,  2.0191e-03, -5.5726e-02]],\n",
       "               \n",
       "                        [[ 4.6428e-02, -1.2951e-01, -4.9731e-02, -5.4111e-04],\n",
       "                         [-1.4047e-01,  3.2706e-02, -1.4212e-01, -6.4760e-02],\n",
       "                         [-1.1329e-01,  1.5482e-02, -2.9563e-02, -1.1206e-01],\n",
       "                         [-8.6744e-02,  6.3069e-02,  4.0474e-02,  8.6882e-02]],\n",
       "               \n",
       "                        [[ 7.1450e-02,  3.4104e-02,  1.2687e-01, -3.1847e-02],\n",
       "                         [ 3.1338e-02,  3.4749e-02, -5.8284e-02, -1.5590e-01],\n",
       "                         [-2.7358e-02, -5.9885e-02, -3.6864e-02, -1.8738e-01],\n",
       "                         [ 1.4093e-01, -2.8408e-03,  1.5943e-03,  4.6650e-02]]],\n",
       "               \n",
       "               \n",
       "                       ...,\n",
       "               \n",
       "               \n",
       "                       [[[-1.8575e-02,  4.3635e-02,  2.6154e-02, -6.5353e-02],\n",
       "                         [ 5.9794e-02, -2.4970e-02, -3.4280e-02,  1.8625e-03],\n",
       "                         [ 3.2270e-02,  4.3552e-02,  4.4607e-02,  6.7870e-02],\n",
       "                         [ 1.1163e-01, -3.8083e-02,  5.1830e-03,  2.7837e-02]],\n",
       "               \n",
       "                        [[ 3.3298e-02, -9.9624e-02,  1.3857e-02,  2.9217e-02],\n",
       "                         [-3.5334e-02, -8.8811e-02, -3.3878e-02, -8.2005e-02],\n",
       "                         [-2.8351e-02, -9.1782e-02, -2.3281e-02,  3.4104e-02],\n",
       "                         [ 8.5224e-02,  3.8067e-03, -1.0662e-01,  2.9747e-02]],\n",
       "               \n",
       "                        [[-4.7163e-02, -3.1249e-02, -2.3444e-02,  2.1661e-03],\n",
       "                         [-1.8970e-03, -4.9290e-02, -3.8793e-02, -4.2231e-02],\n",
       "                         [-6.3660e-03,  6.9501e-04,  4.0733e-03, -1.0799e-01],\n",
       "                         [-1.6069e-02, -6.3591e-02, -4.0325e-02, -8.4963e-02]],\n",
       "               \n",
       "                        ...,\n",
       "               \n",
       "                        [[ 1.1278e-01,  1.0043e-01, -1.1950e-01,  1.9557e-03],\n",
       "                         [ 2.6866e-02, -4.8331e-02,  9.8909e-03,  2.1363e-02],\n",
       "                         [ 2.8101e-02, -4.6342e-02, -1.5973e-01, -5.1610e-02],\n",
       "                         [ 9.5840e-03,  2.1849e-02,  8.7096e-02, -7.6147e-02]],\n",
       "               \n",
       "                        [[ 1.5029e-02, -6.5097e-02,  7.0662e-02,  3.2989e-02],\n",
       "                         [-2.4756e-03, -7.1897e-02,  6.1624e-02,  6.6389e-02],\n",
       "                         [ 4.1043e-02, -2.5516e-02,  1.5068e-02, -4.4453e-02],\n",
       "                         [-2.7414e-02,  8.3398e-02,  9.1899e-03, -5.2079e-02]],\n",
       "               \n",
       "                        [[ 1.8746e-02, -1.1645e-01, -3.3041e-02,  5.6260e-02],\n",
       "                         [-4.9196e-02, -2.4284e-03, -2.3261e-03,  2.8175e-02],\n",
       "                         [ 4.8837e-02,  3.5774e-02, -1.9686e-02, -8.4421e-02],\n",
       "                         [ 1.8898e-02, -6.0040e-02,  2.2454e-02, -8.4292e-03]]],\n",
       "               \n",
       "               \n",
       "                       [[[-3.7345e-02, -9.9811e-02, -7.9408e-02, -3.8134e-03],\n",
       "                         [ 9.0868e-03, -7.6678e-03,  1.9297e-04,  4.4155e-03],\n",
       "                         [-7.8906e-03,  3.1966e-02,  6.3454e-02,  7.1939e-02],\n",
       "                         [-7.3239e-02, -6.1034e-02,  1.5877e-02,  8.3946e-02]],\n",
       "               \n",
       "                        [[ 2.4992e-02, -1.1378e-01,  1.1938e-01, -5.1244e-02],\n",
       "                         [ 1.4412e-01, -2.0504e-02, -2.6163e-02,  6.7927e-02],\n",
       "                         [ 1.3495e-02,  1.0018e-01,  8.6094e-03,  4.4661e-02],\n",
       "                         [-2.8484e-02,  4.7296e-02, -4.8001e-02, -3.4375e-02]],\n",
       "               \n",
       "                        [[-2.3020e-02,  8.8681e-02, -1.4896e-01, -2.0687e-02],\n",
       "                         [ 4.6598e-02,  7.9913e-03,  1.7125e-02, -7.9225e-02],\n",
       "                         [ 2.5587e-02, -6.1928e-02, -6.0768e-02, -3.6407e-02],\n",
       "                         [ 9.5514e-02, -5.9290e-02, -6.8351e-02, -2.0666e-02]],\n",
       "               \n",
       "                        ...,\n",
       "               \n",
       "                        [[ 5.6886e-02, -4.0316e-02, -1.4723e-01, -1.0748e-01],\n",
       "                         [-4.1073e-02, -9.1886e-02,  2.9240e-03,  6.1096e-02],\n",
       "                         [-4.6383e-02, -2.0147e-02, -1.8721e-02, -5.7836e-03],\n",
       "                         [ 5.0190e-02, -1.7873e-02, -8.1281e-02,  5.0774e-02]],\n",
       "               \n",
       "                        [[ 3.2500e-02,  6.5973e-03, -2.0381e-02, -5.6201e-02],\n",
       "                         [-4.5590e-02, -1.9136e-02,  3.8400e-02,  6.7649e-02],\n",
       "                         [-7.9646e-02,  4.7375e-02,  2.6182e-03,  1.8110e-02],\n",
       "                         [ 8.3328e-03, -4.6558e-02,  9.1693e-02, -1.1597e-01]],\n",
       "               \n",
       "                        [[-1.7883e-02,  6.5449e-02, -8.8734e-02, -1.1599e-01],\n",
       "                         [-3.1211e-02, -2.6609e-02,  4.8774e-02,  3.4387e-02],\n",
       "                         [ 6.2407e-02,  8.7752e-03,  4.1070e-02,  7.2146e-02],\n",
       "                         [-5.5587e-02, -6.5677e-02, -8.2715e-02,  1.8951e-02]]],\n",
       "               \n",
       "               \n",
       "                       [[[-2.6354e-02, -1.3477e-02,  1.6074e-02, -5.0831e-02],\n",
       "                         [ 8.5651e-03,  1.2299e-02, -4.9471e-02,  4.7955e-02],\n",
       "                         [ 1.1288e-01,  9.7374e-02,  2.8401e-02, -4.6414e-02],\n",
       "                         [-4.5148e-02, -8.1960e-02,  1.9849e-02,  7.6048e-03]],\n",
       "               \n",
       "                        [[ 4.3373e-02,  5.5981e-02, -1.3661e-02, -7.4425e-02],\n",
       "                         [-3.2052e-02, -5.9672e-02,  1.0024e-01,  8.6221e-02],\n",
       "                         [ 1.4461e-01,  1.3099e-02,  2.9052e-02, -2.5541e-02],\n",
       "                         [ 5.5576e-02, -7.3380e-02,  5.5788e-02,  4.0066e-02]],\n",
       "               \n",
       "                        [[-4.9924e-02,  2.7419e-02,  9.3213e-02, -4.9591e-02],\n",
       "                         [-8.9468e-02, -8.4228e-03, -3.0626e-02, -2.6159e-02],\n",
       "                         [-2.4461e-02, -2.0376e-02,  7.9179e-02,  1.3270e-03],\n",
       "                         [ 2.7509e-02, -2.1788e-02,  8.3526e-02,  5.3258e-02]],\n",
       "               \n",
       "                        ...,\n",
       "               \n",
       "                        [[ 1.0744e-02,  5.5216e-02,  6.5713e-02,  6.9011e-02],\n",
       "                         [-9.0014e-02,  7.1095e-02, -5.9443e-02,  5.2992e-03],\n",
       "                         [-8.6710e-02,  4.9320e-02,  8.9853e-02,  8.9173e-02],\n",
       "                         [-1.0332e-02,  9.3802e-02, -1.3408e-01,  3.3231e-02]],\n",
       "               \n",
       "                        [[-9.3538e-03, -1.5706e-02, -1.1759e-02, -8.1942e-02],\n",
       "                         [ 1.0634e-01, -6.0052e-02,  2.4943e-02, -2.2518e-01],\n",
       "                         [-6.9473e-03, -5.0506e-02,  2.8475e-02, -1.7141e-02],\n",
       "                         [-7.9396e-02,  6.7909e-02,  9.5751e-02,  3.5187e-02]],\n",
       "               \n",
       "                        [[-1.2880e-01, -6.4922e-02, -8.9240e-02, -5.2907e-03],\n",
       "                         [ 5.1576e-02, -6.5294e-02,  3.3370e-02, -4.2961e-02],\n",
       "                         [ 5.7679e-04, -1.3431e-01, -1.3508e-02, -4.2492e-02],\n",
       "                         [ 9.3310e-02, -4.0335e-02, -6.4199e-02, -1.0902e-01]]]],\n",
       "                      device='cuda:0')),\n",
       "              ('conv2.bias',\n",
       "               tensor([-9.1998e-03,  3.9223e-02, -3.4318e-03, -2.7268e-03, -1.2995e-02,\n",
       "                        1.9056e-02,  2.3036e-02,  1.7714e-02, -6.8315e-03, -1.1337e-02,\n",
       "                        2.4957e-03, -3.7331e-02, -3.2944e-02,  5.0344e-02,  3.5672e-02,\n",
       "                       -1.7305e-02,  1.5173e-02, -3.7785e-02, -1.6190e-02, -3.7113e-02,\n",
       "                       -2.0398e-02, -8.3584e-03,  1.6131e-02,  2.6342e-02,  2.8681e-02,\n",
       "                       -2.3016e-02,  3.5739e-03, -2.4635e-02,  3.2166e-02, -4.5032e-02,\n",
       "                        2.9074e-02,  1.7690e-02,  3.6170e-02, -2.5891e-02, -5.1900e-02,\n",
       "                       -4.5675e-02, -3.8083e-02, -9.7133e-03,  1.3940e-02, -4.3141e-02,\n",
       "                        3.1660e-02,  2.9456e-02, -8.9575e-05, -2.3784e-02, -2.4061e-02,\n",
       "                       -3.4429e-02,  2.9625e-02,  1.7725e-02, -1.8560e-02, -3.7091e-02,\n",
       "                        3.9093e-02,  5.2603e-03, -9.9675e-03, -1.3009e-02, -1.0208e-02,\n",
       "                       -1.2589e-02, -2.8042e-02,  4.6032e-02, -3.4642e-02, -1.4101e-02,\n",
       "                       -3.0300e-02, -9.3979e-03, -3.1213e-03, -5.0488e-02], device='cuda:0')),\n",
       "              ('conv3.weight',\n",
       "               tensor([[[[-0.0612, -0.0711, -0.0595],\n",
       "                         [ 0.0044,  0.0299,  0.0043],\n",
       "                         [-0.0510, -0.0105, -0.0153]],\n",
       "               \n",
       "                        [[ 0.1310,  0.0350, -0.1303],\n",
       "                         [ 0.0416, -0.0698, -0.0456],\n",
       "                         [-0.0086,  0.0269, -0.0721]],\n",
       "               \n",
       "                        [[-0.0806,  0.0203,  0.0758],\n",
       "                         [-0.0756, -0.0828,  0.1220],\n",
       "                         [ 0.0155, -0.0201,  0.0346]],\n",
       "               \n",
       "                        ...,\n",
       "               \n",
       "                        [[-0.0835, -0.1733,  0.0379],\n",
       "                         [ 0.1052, -0.0121, -0.0495],\n",
       "                         [ 0.0712, -0.0232,  0.1531]],\n",
       "               \n",
       "                        [[ 0.0115, -0.0044,  0.0359],\n",
       "                         [-0.0296, -0.0017,  0.0699],\n",
       "                         [-0.1097, -0.0086, -0.0207]],\n",
       "               \n",
       "                        [[ 0.0507, -0.0195, -0.0424],\n",
       "                         [-0.0306,  0.0541, -0.0617],\n",
       "                         [-0.0193, -0.0560, -0.0492]]],\n",
       "               \n",
       "               \n",
       "                       [[[ 0.0561, -0.0192,  0.0131],\n",
       "                         [-0.0510,  0.0598, -0.0789],\n",
       "                         [ 0.1128,  0.0494,  0.1336]],\n",
       "               \n",
       "                        [[ 0.0151,  0.0365, -0.0417],\n",
       "                         [ 0.0601, -0.0584,  0.0403],\n",
       "                         [-0.0148, -0.0162, -0.0059]],\n",
       "               \n",
       "                        [[-0.0842, -0.0819, -0.0669],\n",
       "                         [ 0.0407,  0.0354,  0.0572],\n",
       "                         [-0.0171, -0.0382,  0.1168]],\n",
       "               \n",
       "                        ...,\n",
       "               \n",
       "                        [[-0.0327, -0.0063,  0.1063],\n",
       "                         [ 0.0095, -0.0317,  0.1006],\n",
       "                         [ 0.0203, -0.0163,  0.0602]],\n",
       "               \n",
       "                        [[ 0.0420, -0.1371,  0.0917],\n",
       "                         [ 0.0033,  0.0526,  0.0542],\n",
       "                         [ 0.0263, -0.0182, -0.0100]],\n",
       "               \n",
       "                        [[ 0.0936, -0.0013, -0.0328],\n",
       "                         [-0.0287, -0.1146, -0.1245],\n",
       "                         [-0.0647,  0.0209,  0.0203]]],\n",
       "               \n",
       "               \n",
       "                       [[[ 0.0969,  0.0057,  0.0257],\n",
       "                         [ 0.0338,  0.0005, -0.0413],\n",
       "                         [-0.0443,  0.0362, -0.0399]],\n",
       "               \n",
       "                        [[-0.0474, -0.0818, -0.1264],\n",
       "                         [-0.0863,  0.0068,  0.0981],\n",
       "                         [-0.0019, -0.0366, -0.0761]],\n",
       "               \n",
       "                        [[-0.0263,  0.1243, -0.0244],\n",
       "                         [-0.0202, -0.0802, -0.1082],\n",
       "                         [ 0.0164,  0.0036,  0.1195]],\n",
       "               \n",
       "                        ...,\n",
       "               \n",
       "                        [[-0.0982, -0.0198,  0.0727],\n",
       "                         [-0.0858,  0.0083, -0.0421],\n",
       "                         [-0.0938,  0.1728, -0.0181]],\n",
       "               \n",
       "                        [[-0.0534, -0.0305, -0.0138],\n",
       "                         [ 0.0037, -0.0657,  0.0188],\n",
       "                         [-0.0917,  0.0578, -0.1126]],\n",
       "               \n",
       "                        [[-0.0732, -0.1214, -0.0338],\n",
       "                         [-0.0563,  0.0114,  0.0491],\n",
       "                         [ 0.0520,  0.0128,  0.0660]]],\n",
       "               \n",
       "               \n",
       "                       ...,\n",
       "               \n",
       "               \n",
       "                       [[[ 0.0379, -0.0319,  0.0485],\n",
       "                         [ 0.0736, -0.1053,  0.0331],\n",
       "                         [-0.0078, -0.0761,  0.0411]],\n",
       "               \n",
       "                        [[-0.0313, -0.0384,  0.0492],\n",
       "                         [-0.0196, -0.0033, -0.0008],\n",
       "                         [ 0.0115,  0.0026,  0.0484]],\n",
       "               \n",
       "                        [[-0.0491,  0.0900,  0.0477],\n",
       "                         [ 0.1081, -0.0608, -0.0619],\n",
       "                         [ 0.0482,  0.0534,  0.0045]],\n",
       "               \n",
       "                        ...,\n",
       "               \n",
       "                        [[-0.0865, -0.0251, -0.0258],\n",
       "                         [-0.1007,  0.0677,  0.0826],\n",
       "                         [-0.1244, -0.0690, -0.0010]],\n",
       "               \n",
       "                        [[-0.0637, -0.0043, -0.0456],\n",
       "                         [ 0.0226,  0.0105,  0.0851],\n",
       "                         [ 0.0163,  0.0174,  0.0186]],\n",
       "               \n",
       "                        [[-0.1660, -0.0385,  0.0242],\n",
       "                         [-0.0210,  0.0253,  0.0386],\n",
       "                         [-0.0434,  0.0326,  0.0317]]],\n",
       "               \n",
       "               \n",
       "                       [[[-0.0749,  0.0754, -0.0038],\n",
       "                         [ 0.0216, -0.0303, -0.0114],\n",
       "                         [ 0.0236, -0.0030,  0.0654]],\n",
       "               \n",
       "                        [[ 0.0016,  0.0705,  0.0718],\n",
       "                         [-0.0705,  0.0288,  0.0144],\n",
       "                         [-0.0852, -0.1298,  0.0381]],\n",
       "               \n",
       "                        [[ 0.0692, -0.0633,  0.1270],\n",
       "                         [-0.0226,  0.0207, -0.0117],\n",
       "                         [ 0.0759, -0.0186,  0.0119]],\n",
       "               \n",
       "                        ...,\n",
       "               \n",
       "                        [[ 0.0699,  0.0206,  0.0845],\n",
       "                         [-0.0414, -0.0376, -0.0206],\n",
       "                         [-0.0177,  0.0283, -0.0415]],\n",
       "               \n",
       "                        [[ 0.0893,  0.0376,  0.0200],\n",
       "                         [ 0.0198,  0.0149,  0.0607],\n",
       "                         [ 0.0254, -0.0172, -0.0239]],\n",
       "               \n",
       "                        [[ 0.0980, -0.0155, -0.0502],\n",
       "                         [-0.0641, -0.0156, -0.0337],\n",
       "                         [-0.0977, -0.0350,  0.0897]]],\n",
       "               \n",
       "               \n",
       "                       [[[ 0.0024,  0.0157, -0.0657],\n",
       "                         [-0.0476, -0.1128, -0.0249],\n",
       "                         [ 0.0762, -0.0147,  0.0458]],\n",
       "               \n",
       "                        [[-0.0210, -0.1063,  0.0706],\n",
       "                         [-0.0446,  0.0301, -0.0295],\n",
       "                         [-0.0259,  0.0356,  0.0516]],\n",
       "               \n",
       "                        [[ 0.0020,  0.0187, -0.0121],\n",
       "                         [ 0.0883,  0.0508, -0.0930],\n",
       "                         [-0.0202, -0.0366,  0.0476]],\n",
       "               \n",
       "                        ...,\n",
       "               \n",
       "                        [[-0.0011,  0.0097,  0.0094],\n",
       "                         [ 0.0173, -0.0595, -0.1246],\n",
       "                         [ 0.0570, -0.0793, -0.0046]],\n",
       "               \n",
       "                        [[ 0.0499, -0.0433, -0.0464],\n",
       "                         [ 0.1144,  0.0207,  0.0307],\n",
       "                         [-0.0376,  0.0701, -0.0171]],\n",
       "               \n",
       "                        [[-0.0026, -0.0192, -0.0028],\n",
       "                         [-0.0595, -0.1116,  0.0215],\n",
       "                         [-0.0107, -0.0708,  0.0302]]]], device='cuda:0')),\n",
       "              ('conv3.bias',\n",
       "               tensor([-0.0139,  0.0349, -0.0248,  0.0035,  0.0100, -0.0001,  0.0148, -0.0055,\n",
       "                        0.0373,  0.0434, -0.0259,  0.0182, -0.0096,  0.0348, -0.0041, -0.0092,\n",
       "                       -0.0068,  0.0343, -0.0433,  0.0072,  0.0178,  0.0053, -0.0103, -0.0048,\n",
       "                       -0.0411, -0.0393, -0.0304, -0.0330, -0.0287,  0.0268, -0.0160, -0.0372,\n",
       "                       -0.0053,  0.0030,  0.0407,  0.0219, -0.0247, -0.0298,  0.0072, -0.0153,\n",
       "                       -0.0033, -0.0176, -0.0425,  0.0298, -0.0189,  0.0444,  0.0092,  0.0379,\n",
       "                        0.0033, -0.0222, -0.0362,  0.0295, -0.0327,  0.0316, -0.0191, -0.0358,\n",
       "                        0.0114, -0.0299, -0.0326,  0.0191,  0.0070, -0.0323, -0.0055,  0.0250],\n",
       "                      device='cuda:0')),\n",
       "              ('lin_hidden.weight',\n",
       "               tensor([[-0.0559, -0.0021, -0.0333,  ...,  0.0316, -0.0217, -0.0404],\n",
       "                       [ 0.0350,  0.0063,  0.0229,  ...,  0.0306,  0.0258,  0.0491],\n",
       "                       [-0.0091, -0.0026,  0.0079,  ..., -0.0257,  0.0045,  0.0367],\n",
       "                       ...,\n",
       "                       [ 0.0121, -0.0366, -0.0094,  ..., -0.0196, -0.0069, -0.0009],\n",
       "                       [-0.0054, -0.0114, -0.0082,  ...,  0.0163, -0.0467,  0.0264],\n",
       "                       [-0.0028,  0.0069,  0.0149,  ..., -0.0641,  0.0305, -0.0024]],\n",
       "                      device='cuda:0')),\n",
       "              ('lin_hidden.bias',\n",
       "               tensor([ 0.0069,  0.0078,  0.0010,  0.0244,  0.0097, -0.0086,  0.0098, -0.0084,\n",
       "                        0.0101, -0.0052,  0.0028,  0.0094, -0.0204, -0.0035,  0.0147,  0.0129,\n",
       "                        0.0147, -0.0159,  0.0133, -0.0011,  0.0097, -0.0020, -0.0196, -0.0040,\n",
       "                        0.0086,  0.0093, -0.0073,  0.0102, -0.0163, -0.0164, -0.0090, -0.0029,\n",
       "                       -0.0082, -0.0018,  0.0069, -0.0151,  0.0042, -0.0054,  0.0055,  0.0114,\n",
       "                       -0.0073, -0.0194, -0.0050, -0.0135,  0.0032, -0.0102,  0.0087, -0.0043,\n",
       "                        0.0086,  0.0248, -0.0133, -0.0067, -0.0179,  0.0191, -0.0090,  0.0074,\n",
       "                       -0.0160, -0.0112, -0.0118, -0.0113, -0.0033,  0.0048, -0.0005, -0.0206,\n",
       "                       -0.0119, -0.0026, -0.0101, -0.0115, -0.0157,  0.0055,  0.0118,  0.0103,\n",
       "                        0.0102, -0.0049,  0.0128,  0.0015, -0.0223, -0.0144,  0.0153, -0.0162,\n",
       "                        0.0199, -0.0101,  0.0016,  0.0105,  0.0113,  0.0058,  0.0090, -0.0086,\n",
       "                        0.0065, -0.0010, -0.0183,  0.0068,  0.0052,  0.0062,  0.0080, -0.0011,\n",
       "                       -0.0163,  0.0139,  0.0166,  0.0068, -0.0104, -0.0116,  0.0039, -0.0039,\n",
       "                       -0.0191, -0.0051, -0.0191,  0.0294, -0.0065, -0.0011, -0.0060, -0.0072,\n",
       "                        0.0138, -0.0078, -0.0004,  0.0154, -0.0180, -0.0141, -0.0073,  0.0075,\n",
       "                        0.0065, -0.0203,  0.0032,  0.0094, -0.0040,  0.0017, -0.0206, -0.0154],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.linear_embedding.weight',\n",
       "               tensor([[-0.0091, -0.0617,  0.0728,  ...,  0.1958, -0.0022, -0.0109],\n",
       "                       [ 0.1381,  0.1041,  0.1929,  ...,  0.0639, -0.0125,  0.1207],\n",
       "                       [-0.0174,  0.2496,  0.0960,  ..., -0.2239,  0.0604, -0.0269],\n",
       "                       ...,\n",
       "                       [-0.0099,  0.0444, -0.0700,  ..., -0.0238,  0.1447, -0.1724],\n",
       "                       [ 0.1224, -0.0210,  0.1585,  ...,  0.2776, -0.0078, -0.1133],\n",
       "                       [ 0.0892,  0.1334,  0.1215,  ..., -0.0425, -0.3079,  0.3615]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.linear_embedding.bias',\n",
       "               tensor([-0.0915,  0.0404,  0.0640, -0.0587, -0.0141,  0.0818, -0.0576, -0.0731,\n",
       "                       -0.0671,  0.0178,  0.0982, -0.0019, -0.0224, -0.0246,  0.0240,  0.0486,\n",
       "                        0.0723,  0.0621,  0.0892, -0.0514,  0.0611,  0.0028,  0.0897, -0.0077,\n",
       "                        0.0410, -0.0482, -0.0071, -0.0461,  0.0080, -0.0551, -0.0031,  0.0364,\n",
       "                        0.0022, -0.0660,  0.0108,  0.0336,  0.0023, -0.0639,  0.0491,  0.0211,\n",
       "                        0.0379, -0.0117, -0.0631, -0.0752,  0.0188, -0.0293, -0.0399, -0.0461,\n",
       "                       -0.0255,  0.0430, -0.0355, -0.0626, -0.0503, -0.0403,  0.0415,  0.0176,\n",
       "                        0.0292,  0.0329,  0.0385, -0.0454,  0.0618, -0.0298,  0.0384,  0.0212,\n",
       "                        0.0292,  0.0375, -0.0872, -0.0866, -0.0082, -0.0530, -0.0399, -0.0528,\n",
       "                       -0.0395, -0.0578,  0.0364, -0.0047,  0.0093,  0.0091,  0.0624, -0.0262,\n",
       "                        0.0110, -0.0791,  0.0514, -0.0262,  0.0768, -0.0904,  0.0170, -0.0210,\n",
       "                       -0.0528, -0.0143, -0.0353,  0.0803,  0.0744,  0.0828,  0.0098, -0.0532,\n",
       "                        0.0553, -0.0512, -0.0508,  0.0484,  0.0614,  0.0033, -0.0528, -0.0038,\n",
       "                        0.0882,  0.0707, -0.0603, -0.0042, -0.0430, -0.0568,  0.0625, -0.0278,\n",
       "                       -0.0697, -0.0685, -0.0443, -0.0590, -0.0731,  0.0601,  0.0722, -0.0865,\n",
       "                       -0.0062,  0.0001,  0.0225, -0.0756, -0.0112,  0.0297,  0.0015, -0.0403],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.pos_embedding.inv_freqs',\n",
       "               tensor([1.0000e+00, 8.6596e-01, 7.4989e-01, 6.4938e-01, 5.6234e-01, 4.8697e-01,\n",
       "                       4.2170e-01, 3.6517e-01, 3.1623e-01, 2.7384e-01, 2.3714e-01, 2.0535e-01,\n",
       "                       1.7783e-01, 1.5399e-01, 1.3335e-01, 1.1548e-01, 1.0000e-01, 8.6596e-02,\n",
       "                       7.4989e-02, 6.4938e-02, 5.6234e-02, 4.8697e-02, 4.2170e-02, 3.6517e-02,\n",
       "                       3.1623e-02, 2.7384e-02, 2.3714e-02, 2.0535e-02, 1.7783e-02, 1.5399e-02,\n",
       "                       1.3335e-02, 1.1548e-02, 1.0000e-02, 8.6596e-03, 7.4989e-03, 6.4938e-03,\n",
       "                       5.6234e-03, 4.8697e-03, 4.2170e-03, 3.6517e-03, 3.1623e-03, 2.7384e-03,\n",
       "                       2.3714e-03, 2.0535e-03, 1.7783e-03, 1.5399e-03, 1.3335e-03, 1.1548e-03,\n",
       "                       1.0000e-03, 8.6596e-04, 7.4989e-04, 6.4938e-04, 5.6234e-04, 4.8697e-04,\n",
       "                       4.2170e-04, 3.6517e-04, 3.1623e-04, 2.7384e-04, 2.3714e-04, 2.0535e-04,\n",
       "                       1.7783e-04, 1.5399e-04, 1.3335e-04, 1.1548e-04], device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.0.attention.values.weight',\n",
       "               tensor([[ 4.8885e-02, -5.2476e-02, -6.7935e-02,  ...,  2.2648e-02,\n",
       "                        -4.6310e-02,  2.2412e-02],\n",
       "                       [-5.7038e-02,  6.3690e-02, -2.6936e-02,  ..., -4.7239e-02,\n",
       "                        -7.6012e-02,  7.1286e-02],\n",
       "                       [ 6.4054e-02,  6.4172e-02, -8.0573e-02,  ...,  6.3974e-05,\n",
       "                         3.0026e-02,  5.3770e-02],\n",
       "                       ...,\n",
       "                       [-5.5395e-02, -6.3951e-02,  4.3000e-02,  ..., -4.8983e-02,\n",
       "                         9.4099e-02, -5.4643e-02],\n",
       "                       [-3.3643e-02,  2.3233e-02,  3.8821e-02,  ..., -7.1746e-02,\n",
       "                         2.4493e-02,  7.5715e-03],\n",
       "                       [-1.3754e-02, -7.4159e-02,  4.8493e-02,  ..., -7.7458e-03,\n",
       "                        -5.3149e-03, -5.4444e-02]], device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.0.attention.keys.weight',\n",
       "               tensor([[-0.1121, -0.1260, -0.0357,  ...,  0.0158,  0.0623, -0.0014],\n",
       "                       [ 0.0417,  0.0895,  0.0537,  ..., -0.0054,  0.0304, -0.0910],\n",
       "                       [ 0.0519, -0.0409, -0.0889,  ...,  0.0069,  0.0557, -0.0477],\n",
       "                       ...,\n",
       "                       [-0.0359,  0.0596,  0.0594,  ..., -0.0314,  0.0133, -0.0638],\n",
       "                       [ 0.0359, -0.0252,  0.0534,  ...,  0.0360,  0.0793,  0.0358],\n",
       "                       [-0.0068,  0.0913,  0.0501,  ..., -0.0605,  0.0102, -0.0129]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.0.attention.queries.weight',\n",
       "               tensor([[ 0.0107,  0.0586, -0.0959,  ...,  0.0105,  0.0105, -0.0925],\n",
       "                       [-0.0829,  0.0674, -0.0314,  ..., -0.0200, -0.0999,  0.0034],\n",
       "                       [ 0.0267, -0.0015,  0.0559,  ...,  0.0871, -0.0985,  0.0641],\n",
       "                       ...,\n",
       "                       [ 0.0487,  0.0114,  0.0159,  ..., -0.0036, -0.0896, -0.0490],\n",
       "                       [ 0.0699, -0.0181, -0.0767,  ..., -0.0685,  0.0779,  0.0772],\n",
       "                       [ 0.0805, -0.0753, -0.0194,  ...,  0.0518, -0.0737, -0.1018]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.0.attention.fc_out.weight',\n",
       "               tensor([[-0.0156,  0.0025, -0.0633,  ..., -0.0428, -0.0371,  0.0904],\n",
       "                       [ 0.0472,  0.0191,  0.0385,  ..., -0.0658, -0.0102, -0.0067],\n",
       "                       [-0.0436, -0.0201, -0.0318,  ...,  0.0120,  0.0080,  0.0047],\n",
       "                       ...,\n",
       "                       [-0.0763, -0.0140, -0.0465,  ...,  0.0324, -0.0421, -0.0134],\n",
       "                       [-0.0435, -0.0546,  0.0649,  ...,  0.0817, -0.0701,  0.0087],\n",
       "                       [ 0.0564,  0.0185,  0.0583,  ...,  0.0717, -0.0513, -0.0537]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.0.attention.fc_out.bias',\n",
       "               tensor([ 0.0121, -0.0505, -0.0563, -0.0070,  0.0210,  0.0765, -0.0556, -0.0269,\n",
       "                        0.0130,  0.0643,  0.0086, -0.0140, -0.0532,  0.0288,  0.0042, -0.0369,\n",
       "                        0.0203,  0.0845, -0.0264, -0.0012, -0.0136,  0.0674, -0.0460,  0.0607,\n",
       "                       -0.0137, -0.0472, -0.0856, -0.0533,  0.0463, -0.0671, -0.0336, -0.0734,\n",
       "                        0.0049,  0.0159, -0.0647,  0.0932,  0.0186,  0.0122,  0.0503, -0.0131,\n",
       "                       -0.0589, -0.0590,  0.0586,  0.0327, -0.0551,  0.0808,  0.0661,  0.0591,\n",
       "                       -0.0515,  0.0671,  0.0823, -0.0410, -0.0248, -0.0922,  0.0455, -0.0142,\n",
       "                       -0.0537, -0.0850, -0.0221, -0.0487, -0.0721, -0.0666,  0.0038,  0.0087,\n",
       "                        0.0563,  0.0614, -0.0646, -0.0426, -0.0286, -0.0398, -0.0832,  0.0604,\n",
       "                        0.0886,  0.0064,  0.0172,  0.0479, -0.0034,  0.0474,  0.0689, -0.0825,\n",
       "                       -0.0509, -0.0796,  0.0200, -0.0119, -0.0139,  0.0563,  0.0014, -0.0461,\n",
       "                        0.0025, -0.0105,  0.0407,  0.0558, -0.0237, -0.0674, -0.0379, -0.0331,\n",
       "                       -0.0190, -0.0872,  0.0440,  0.0170, -0.0573, -0.0357, -0.0379, -0.0329,\n",
       "                        0.0674,  0.0745, -0.0460,  0.0132,  0.0581, -0.0009, -0.0769, -0.0014,\n",
       "                       -0.0228, -0.0078,  0.0596, -0.0896,  0.0243, -0.0435, -0.0441,  0.0778,\n",
       "                        0.0014, -0.0489, -0.0354, -0.0551,  0.0645,  0.0532, -0.0100,  0.0305],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.0.gate1.bg',\n",
       "               tensor([-6.2769e-03,  1.9576e-03, -1.3246e-02, -9.0300e-03, -5.5558e-03,\n",
       "                        2.3216e-03,  9.9925e-03, -6.9255e-03, -2.8577e-04, -1.3911e-03,\n",
       "                        2.7344e-02, -3.3115e-04,  6.8371e-03, -2.7270e-03,  1.7186e-03,\n",
       "                       -2.8197e-03, -1.3615e-02, -5.9917e-03,  2.0292e-02, -1.8898e-03,\n",
       "                        7.3794e-03,  2.8063e-03, -1.7681e-03,  2.7172e-03,  1.7123e-03,\n",
       "                        3.8297e-03, -9.0189e-03,  2.1207e-03, -5.3080e-03,  9.3260e-03,\n",
       "                       -1.1846e-03,  2.5758e-02,  2.4279e-03, -2.8154e-03, -9.1150e-03,\n",
       "                       -1.4935e-02,  4.8208e-03,  7.6588e-03, -5.3771e-03, -2.3129e-03,\n",
       "                        5.4231e-03, -6.3029e-04,  9.9807e-03,  1.8623e-05,  1.1452e-03,\n",
       "                       -8.1793e-03, -5.6032e-03,  5.2134e-03, -9.3303e-03,  8.8825e-03,\n",
       "                       -4.7478e-03,  2.2942e-03,  2.3970e-06, -1.3735e-02, -3.1937e-03,\n",
       "                       -6.2970e-04, -5.5429e-03, -6.7279e-03,  1.7321e-03,  4.0113e-03,\n",
       "                        5.1622e-03,  5.8720e-04,  4.1946e-03,  2.2908e-03, -1.3391e-03,\n",
       "                       -6.5090e-03, -1.8025e-03, -4.5434e-03,  6.0472e-03,  1.0891e-02,\n",
       "                       -1.3371e-02, -6.0985e-03, -9.7167e-04,  3.1012e-03,  1.1391e-02,\n",
       "                       -1.1119e-03, -1.1484e-03,  2.8760e-03, -5.1442e-03,  1.5645e-03,\n",
       "                       -6.4550e-03, -5.4790e-03,  9.6654e-03,  1.5819e-03,  1.5221e-02,\n",
       "                       -1.9785e-03, -3.2526e-03, -5.5953e-03,  1.1121e-02, -1.1743e-03,\n",
       "                       -1.5304e-04, -1.9857e-03,  3.0726e-03,  1.7047e-02, -1.1028e-04,\n",
       "                        9.1047e-03,  1.1564e-02, -4.7523e-03, -2.4027e-04,  1.1288e-02,\n",
       "                        5.8723e-03, -6.9990e-03, -1.4341e-03, -4.5513e-03,  1.8290e-02,\n",
       "                       -6.3893e-03,  2.4459e-02,  1.5376e-03,  5.6072e-03, -1.7683e-02,\n",
       "                        5.0275e-03, -2.0329e-03,  1.9791e-02,  7.6471e-03,  3.8026e-03,\n",
       "                       -3.2299e-03, -4.9412e-03, -6.4598e-03, -9.3364e-04,  1.4406e-03,\n",
       "                        3.3057e-03, -9.7918e-03,  8.8697e-03,  8.2065e-03, -3.9829e-03,\n",
       "                        6.9429e-03, -1.4357e-02, -1.1714e-02], device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.0.gate1.Wr.weight',\n",
       "               tensor([[ 0.0495,  0.0595, -0.0544,  ...,  0.0519,  0.0027,  0.0900],\n",
       "                       [-0.0613, -0.1070,  0.0262,  ..., -0.1001, -0.0514,  0.1392],\n",
       "                       [-0.0652, -0.0605,  0.0102,  ..., -0.0312, -0.0520,  0.0527],\n",
       "                       ...,\n",
       "                       [-0.0995, -0.0713, -0.0183,  ..., -0.0714,  0.1523, -0.0980],\n",
       "                       [ 0.1177,  0.0148, -0.0808,  ...,  0.1107, -0.1354,  0.0341],\n",
       "                       [ 0.0152, -0.0646, -0.0789,  ...,  0.0940, -0.1209,  0.0704]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.0.gate1.Ur.weight',\n",
       "               tensor([[-0.1557, -0.1153,  0.0806,  ..., -0.1162, -0.0705, -0.1296],\n",
       "                       [-0.1015, -0.1201, -0.0399,  ...,  0.0150, -0.1032,  0.1470],\n",
       "                       [ 0.1027,  0.0779,  0.0770,  ...,  0.0656,  0.0649,  0.1048],\n",
       "                       ...,\n",
       "                       [ 0.0372,  0.1100,  0.0300,  ..., -0.0705,  0.0250,  0.0704],\n",
       "                       [-0.0091,  0.0899,  0.0394,  ..., -0.0746,  0.0436, -0.0564],\n",
       "                       [ 0.0869, -0.0602, -0.0670,  ...,  0.1025, -0.0839, -0.0492]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.0.gate1.Wz.weight',\n",
       "               tensor([[ 0.0265,  0.0443, -0.0755,  ...,  0.0072,  0.0928, -0.1451],\n",
       "                       [ 0.0095,  0.1122, -0.0221,  ...,  0.0307, -0.1044, -0.0860],\n",
       "                       [ 0.0399,  0.1462,  0.1296,  ..., -0.1401, -0.0777, -0.0918],\n",
       "                       ...,\n",
       "                       [ 0.0259, -0.0796, -0.0438,  ..., -0.1347, -0.1391,  0.0061],\n",
       "                       [ 0.0002,  0.0857, -0.0255,  ..., -0.0840, -0.0640,  0.0082],\n",
       "                       [ 0.1190,  0.1327,  0.0866,  ..., -0.1384, -0.0242,  0.1250]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.0.gate1.Uz.weight',\n",
       "               tensor([[-0.1318,  0.1143, -0.1368,  ...,  0.0803, -0.0105,  0.0744],\n",
       "                       [ 0.0197, -0.0185,  0.1275,  ...,  0.0511,  0.0155,  0.1321],\n",
       "                       [ 0.0354,  0.0068,  0.0093,  ..., -0.0810, -0.0591, -0.1124],\n",
       "                       ...,\n",
       "                       [ 0.0787, -0.0319, -0.0300,  ..., -0.1513,  0.1170,  0.0807],\n",
       "                       [ 0.0473,  0.1106,  0.0334,  ..., -0.0160,  0.0473, -0.0133],\n",
       "                       [ 0.1303,  0.0705,  0.0843,  ...,  0.1263, -0.1414, -0.1079]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.0.gate1.Wg.weight',\n",
       "               tensor([[-0.0850, -0.1087, -0.0341,  ...,  0.0791, -0.0402,  0.0636],\n",
       "                       [ 0.0923,  0.0472, -0.0046,  ...,  0.1186, -0.0837, -0.0865],\n",
       "                       [-0.0978,  0.0944,  0.1340,  ...,  0.1325, -0.1376,  0.1110],\n",
       "                       ...,\n",
       "                       [ 0.0291, -0.1097,  0.1263,  ..., -0.1292,  0.0016,  0.0227],\n",
       "                       [ 0.0700, -0.0353,  0.0467,  ..., -0.0787, -0.0332, -0.0160],\n",
       "                       [-0.1318, -0.1081, -0.0261,  ..., -0.0646,  0.0874, -0.0120]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.0.gate1.Ug.weight',\n",
       "               tensor([[ 0.0268,  0.1284, -0.0793,  ...,  0.0281,  0.0130, -0.1025],\n",
       "                       [ 0.1286, -0.0729, -0.1145,  ..., -0.1157,  0.1515,  0.1161],\n",
       "                       [-0.0808, -0.0412,  0.1148,  ...,  0.0170,  0.0023, -0.0825],\n",
       "                       ...,\n",
       "                       [-0.0519,  0.0653, -0.0595,  ...,  0.0762,  0.1358,  0.0013],\n",
       "                       [-0.0151, -0.0437, -0.0581,  ..., -0.1290, -0.0758,  0.1327],\n",
       "                       [ 0.0256,  0.0699, -0.1018,  ...,  0.0855, -0.0029, -0.1026]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.0.gate2.bg',\n",
       "               tensor([-6.3828e-03,  5.1359e-03,  6.0573e-04, -9.9653e-03,  1.6482e-03,\n",
       "                        2.7519e-03, -3.6387e-04,  2.8434e-03,  1.5263e-03,  9.1266e-03,\n",
       "                        5.7449e-03,  9.9525e-04,  9.3196e-03,  4.2020e-03, -1.2308e-02,\n",
       "                       -6.1258e-03, -5.4945e-03,  3.9519e-03, -1.1006e-03,  1.0261e-03,\n",
       "                        8.4540e-05, -4.1440e-03, -1.0228e-03,  9.3769e-04,  2.6025e-03,\n",
       "                       -1.1123e-02, -8.4793e-03, -1.0141e-02,  9.6313e-03,  4.4575e-03,\n",
       "                       -3.4331e-03,  1.3951e-02, -5.1066e-03, -4.7156e-03,  4.5191e-03,\n",
       "                        2.2254e-03,  1.3623e-02, -9.0024e-03,  7.5053e-03,  1.6532e-04,\n",
       "                        3.1081e-03,  4.2736e-03, -5.1817e-03, -2.0160e-03, -2.0529e-03,\n",
       "                       -7.6662e-03,  1.2603e-03, -8.3067e-03, -4.7830e-03, -2.3415e-03,\n",
       "                        1.8675e-02,  1.3460e-02, -3.1382e-03,  6.9406e-03,  6.3797e-03,\n",
       "                        2.9080e-03,  4.2029e-04,  1.6801e-03, -5.9945e-03, -5.8056e-03,\n",
       "                       -1.9600e-03, -8.4453e-03,  1.6571e-04, -3.7880e-03, -5.4602e-03,\n",
       "                       -1.3289e-02, -2.0408e-04, -1.3221e-02, -5.2904e-03, -1.3328e-02,\n",
       "                        7.1177e-03, -2.1503e-02,  9.1161e-03, -6.9814e-03, -3.4762e-03,\n",
       "                        4.5937e-03, -6.0329e-04, -2.7939e-03, -4.5670e-03,  2.0349e-03,\n",
       "                       -8.3230e-03,  4.3461e-03,  9.0985e-03, -1.4123e-03,  7.6943e-03,\n",
       "                        2.0583e-04,  3.2312e-03, -4.1800e-03, -3.9373e-03, -3.7415e-03,\n",
       "                       -3.4589e-03,  9.3158e-04, -1.9038e-02,  9.1050e-04,  3.7849e-03,\n",
       "                        1.2588e-03,  2.2407e-03,  1.4093e-02, -2.3240e-03,  8.8454e-03,\n",
       "                       -4.1990e-03,  5.4279e-03, -8.0844e-03,  1.0230e-02,  1.0579e-02,\n",
       "                        2.1516e-04,  2.9398e-02,  2.1635e-02, -1.2635e-03, -2.6404e-03,\n",
       "                        2.7715e-03, -3.0811e-03,  6.0604e-03, -7.8447e-03,  7.1729e-03,\n",
       "                       -4.5416e-03,  8.3583e-03, -7.1016e-03, -2.5914e-03,  1.1134e-02,\n",
       "                       -3.4363e-03,  3.7173e-04,  1.0006e-02, -5.6164e-03,  7.0527e-04,\n",
       "                       -1.2492e-03, -1.2270e-03,  2.6200e-03], device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.0.gate2.Wr.weight',\n",
       "               tensor([[ 0.0698, -0.0478,  0.0018,  ..., -0.0019,  0.0445, -0.1125],\n",
       "                       [-0.1116, -0.0561, -0.0983,  ...,  0.1464,  0.0454,  0.0400],\n",
       "                       [-0.0879, -0.0084, -0.0264,  ...,  0.1014,  0.0715,  0.0457],\n",
       "                       ...,\n",
       "                       [-0.0958, -0.1106,  0.0512,  ...,  0.0212,  0.1100,  0.0869],\n",
       "                       [ 0.0507,  0.0131,  0.0414,  ..., -0.1025,  0.0910,  0.1389],\n",
       "                       [-0.1194, -0.1186, -0.0683,  ...,  0.0325,  0.1196,  0.0726]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.0.gate2.Ur.weight',\n",
       "               tensor([[ 0.1069,  0.0772, -0.0002,  ..., -0.0907,  0.0154, -0.1002],\n",
       "                       [ 0.0903,  0.1085, -0.0960,  ...,  0.0570, -0.1559,  0.0204],\n",
       "                       [ 0.0977, -0.1481,  0.0848,  ...,  0.0469,  0.1024,  0.0482],\n",
       "                       ...,\n",
       "                       [-0.0713,  0.1257,  0.0584,  ..., -0.0194, -0.0584,  0.0169],\n",
       "                       [ 0.0824, -0.1188,  0.0142,  ...,  0.0925,  0.1310,  0.1694],\n",
       "                       [-0.0702,  0.0181, -0.0420,  ...,  0.0605,  0.1256, -0.0266]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.0.gate2.Wz.weight',\n",
       "               tensor([[ 0.0212, -0.0683, -0.1106,  ...,  0.0886,  0.1464,  0.0196],\n",
       "                       [-0.0322, -0.0580, -0.1425,  ...,  0.0408,  0.0447, -0.0742],\n",
       "                       [ 0.0138, -0.1056, -0.0385,  ...,  0.0928,  0.0219, -0.1267],\n",
       "                       ...,\n",
       "                       [-0.0208, -0.0345, -0.0605,  ...,  0.1338, -0.0703,  0.0676],\n",
       "                       [ 0.1271,  0.0488, -0.0852,  ...,  0.0951,  0.0793,  0.1175],\n",
       "                       [ 0.0378,  0.0599,  0.1340,  ...,  0.0323, -0.0830, -0.1450]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.0.gate2.Uz.weight',\n",
       "               tensor([[-0.0322, -0.0567, -0.0747,  ...,  0.0678,  0.0058, -0.0166],\n",
       "                       [ 0.0542, -0.0322,  0.1202,  ..., -0.1474, -0.0642,  0.1352],\n",
       "                       [-0.0709, -0.0464,  0.1433,  ..., -0.1315, -0.0972,  0.0116],\n",
       "                       ...,\n",
       "                       [ 0.1581, -0.0885, -0.1294,  ..., -0.1129,  0.0651, -0.0776],\n",
       "                       [-0.0070,  0.0920, -0.1499,  ...,  0.0368,  0.1262, -0.0924],\n",
       "                       [-0.0465,  0.0408,  0.1141,  ...,  0.1074, -0.0461, -0.0369]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.0.gate2.Wg.weight',\n",
       "               tensor([[-0.1008, -0.0620,  0.0107,  ..., -0.1072,  0.1265, -0.1395],\n",
       "                       [-0.0146,  0.0815,  0.1154,  ...,  0.0881, -0.0607,  0.0150],\n",
       "                       [ 0.1262, -0.1422, -0.1486,  ..., -0.0663,  0.0601,  0.0328],\n",
       "                       ...,\n",
       "                       [ 0.1569,  0.0478,  0.0626,  ...,  0.0314,  0.0183, -0.0663],\n",
       "                       [ 0.0494, -0.1144, -0.0494,  ...,  0.1073, -0.0849, -0.0580],\n",
       "                       [-0.0575,  0.0175, -0.1361,  ...,  0.1364, -0.0797,  0.0111]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.0.gate2.Ug.weight',\n",
       "               tensor([[ 0.1141,  0.1439, -0.1088,  ..., -0.0209, -0.0879, -0.0267],\n",
       "                       [-0.1475, -0.0148,  0.0181,  ...,  0.1340,  0.0210, -0.0233],\n",
       "                       [ 0.1103, -0.0866, -0.1355,  ..., -0.0509,  0.1512, -0.1403],\n",
       "                       ...,\n",
       "                       [ 0.1588, -0.0931,  0.1504,  ...,  0.0692, -0.0526,  0.0891],\n",
       "                       [ 0.1474, -0.1051,  0.1313,  ...,  0.1373,  0.0590,  0.1160],\n",
       "                       [-0.0379,  0.1190, -0.0218,  ...,  0.0270,  0.1356, -0.0254]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.0.norm1.weight',\n",
       "               tensor([0.9961, 0.9858, 1.0028, 1.0046, 1.0001, 0.9812, 0.9872, 1.0107, 1.0085,\n",
       "                       0.9894, 1.0303, 0.9859, 1.0090, 0.9919, 0.9979, 0.9991, 0.9956, 1.0086,\n",
       "                       0.9917, 1.0055, 1.0087, 1.0099, 1.0191, 0.9935, 0.9923, 0.9910, 1.0005,\n",
       "                       0.9929, 0.9990, 1.0049, 0.9867, 1.0213, 0.9953, 1.0057, 0.9982, 1.0007,\n",
       "                       1.0051, 0.9963, 0.9962, 0.9977, 1.0060, 1.0152, 1.0020, 1.0010, 0.9854,\n",
       "                       1.0155, 0.9717, 0.9990, 0.9922, 0.9983, 1.0107, 0.9753, 0.9993, 0.9823,\n",
       "                       1.0178, 0.9898, 0.9857, 0.9897, 0.9863, 1.0086, 1.0062, 1.0072, 1.0122,\n",
       "                       1.0010, 0.9840, 0.9975, 0.9950, 0.9714, 0.9943, 1.0023, 1.0001, 1.0025,\n",
       "                       1.0008, 1.0006, 0.9846, 0.9943, 0.9923, 0.9980, 0.9811, 0.9942, 0.9869,\n",
       "                       0.9891, 1.0042, 0.9984, 0.9974, 0.9947, 0.9988, 0.9956, 0.9893, 1.0032,\n",
       "                       1.0006, 0.9981, 1.0226, 0.9857, 1.0044, 0.9990, 0.9894, 0.9800, 1.0027,\n",
       "                       0.9950, 1.0034, 0.9715, 1.0055, 0.9986, 1.0128, 0.9955, 0.9957, 1.0048,\n",
       "                       0.9933, 1.0000, 1.0097, 1.0011, 1.0042, 0.9954, 0.9850, 0.9821, 1.0065,\n",
       "                       1.0110, 1.0053, 0.9854, 1.0069, 0.9943, 1.0057, 1.0020, 1.0083, 0.9944,\n",
       "                       0.9905, 1.0091], device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.0.norm1.bias',\n",
       "               tensor([-9.0522e-03,  1.1528e-02, -6.9767e-03,  1.0223e-03, -6.9586e-03,\n",
       "                        1.0159e-02,  8.2686e-03, -1.1849e-02, -1.1441e-02, -1.9624e-03,\n",
       "                       -2.5109e-03,  1.0768e-02, -4.0308e-03,  4.9444e-03, -4.4406e-03,\n",
       "                       -6.0395e-03,  1.8111e-02,  1.4592e-02,  7.9959e-03,  1.2701e-02,\n",
       "                        4.9698e-03,  6.8926e-03, -8.6785e-04,  1.2413e-03,  3.8961e-03,\n",
       "                       -7.1868e-03, -3.8637e-03, -4.1104e-03, -2.0828e-03,  5.3806e-03,\n",
       "                        1.0045e-02,  1.4171e-02,  1.1778e-03, -3.9266e-03, -1.4716e-03,\n",
       "                       -1.4064e-03, -9.2785e-03,  1.6931e-02, -4.4858e-04, -2.8742e-03,\n",
       "                       -7.3743e-03,  2.4937e-03, -5.1281e-03, -6.2020e-03,  1.0717e-02,\n",
       "                       -9.3446e-03,  2.4046e-02,  7.0153e-03,  1.5628e-03, -3.5059e-03,\n",
       "                       -1.3992e-02,  6.0555e-03, -9.1643e-05,  7.0108e-03,  2.6598e-03,\n",
       "                        1.7672e-02,  8.0986e-03, -1.2523e-02,  1.1853e-02,  2.8988e-03,\n",
       "                       -5.5312e-03, -3.6232e-03, -7.2516e-03, -1.1362e-02, -1.5570e-02,\n",
       "                        2.7429e-03,  4.9295e-03,  2.6904e-02,  1.2168e-03, -3.4896e-03,\n",
       "                        1.0433e-02, -2.8388e-03, -5.8995e-03, -8.4156e-03, -5.2150e-03,\n",
       "                        7.6106e-04, -7.0731e-03, -3.7312e-03,  1.2061e-02, -2.3017e-03,\n",
       "                        8.7124e-03,  6.2644e-03,  9.1389e-03, -2.3713e-03,  1.8210e-03,\n",
       "                       -4.1310e-03, -5.9652e-03, -5.2861e-03,  7.9833e-03,  6.0503e-03,\n",
       "                       -5.9831e-03, -1.4850e-03,  3.7530e-03,  5.9552e-03,  3.9897e-03,\n",
       "                       -5.4499e-03, -1.1730e-02,  1.3103e-02, -1.0369e-03,  4.1354e-04,\n",
       "                        5.3957e-03,  2.1256e-02, -9.9344e-03,  9.3298e-04, -4.9299e-03,\n",
       "                        1.7711e-04,  1.1640e-02, -5.4893e-03, -9.2760e-03, -6.0774e-03,\n",
       "                        4.1800e-03,  9.9881e-03, -1.1074e-02,  3.7854e-03, -1.3329e-02,\n",
       "                        2.1751e-02,  9.4714e-03, -1.4480e-02,  4.4199e-03,  4.7151e-03,\n",
       "                       -1.1542e-03,  2.0199e-03,  4.0165e-03, -7.6980e-03, -8.0236e-03,\n",
       "                        2.2128e-03, -2.7006e-03, -3.4376e-03], device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.0.norm2.weight',\n",
       "               tensor([0.9921, 1.0053, 0.9964, 0.9972, 0.9984, 0.9852, 1.0040, 1.0289, 0.9828,\n",
       "                       0.9921, 1.0212, 1.0006, 0.9814, 1.0019, 1.0144, 0.9906, 0.9819, 1.0014,\n",
       "                       1.0164, 0.9878, 1.0010, 0.9966, 1.0230, 0.9965, 0.9984, 1.0019, 1.0006,\n",
       "                       0.9920, 1.0010, 1.0030, 0.9950, 1.0196, 0.9978, 0.9998, 0.9874, 0.9971,\n",
       "                       0.9925, 1.0188, 1.0020, 0.9930, 0.9961, 1.0055, 0.9994, 0.9950, 0.9945,\n",
       "                       0.9886, 1.0042, 0.9994, 1.0003, 0.9941, 1.0004, 0.9977, 0.9919, 0.9962,\n",
       "                       0.9936, 0.9850, 1.0000, 0.9957, 0.9903, 0.9982, 0.9925, 0.9999, 1.0061,\n",
       "                       0.9922, 1.0063, 0.9995, 0.9964, 1.0026, 0.9965, 1.0066, 0.9966, 1.0037,\n",
       "                       1.0012, 0.9852, 1.0001, 0.9995, 0.9997, 0.9873, 0.9850, 0.9906, 0.9893,\n",
       "                       1.0061, 1.0018, 0.9886, 1.0062, 0.9899, 0.9888, 0.9890, 0.9856, 1.0074,\n",
       "                       1.0058, 0.9954, 1.0226, 0.9879, 1.0113, 0.9951, 1.0040, 1.0003, 0.9922,\n",
       "                       1.0079, 0.9927, 1.0013, 1.0059, 1.0022, 1.0204, 0.9974, 1.0031, 1.0065,\n",
       "                       0.9981, 1.0294, 0.9914, 0.9974, 1.0081, 1.0007, 0.9976, 1.0049, 0.9990,\n",
       "                       0.9971, 0.9885, 0.9928, 0.9941, 1.0022, 1.0007, 0.9844, 0.9907, 1.0065,\n",
       "                       1.0114, 1.0107], device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.0.norm2.bias',\n",
       "               tensor([ 4.6105e-03,  8.7079e-03,  6.6066e-03, -5.0124e-03,  2.1884e-03,\n",
       "                        7.6669e-03,  6.4316e-03,  6.5513e-04, -8.9574e-04,  3.5293e-03,\n",
       "                       -9.9990e-04, -3.7389e-03,  8.2267e-03, -3.1291e-03,  3.7867e-03,\n",
       "                        5.1990e-04,  1.0657e-02,  4.9596e-03,  1.2027e-03,  5.2707e-03,\n",
       "                        3.6499e-04,  6.6320e-03,  2.3147e-03, -8.1091e-04, -8.1445e-03,\n",
       "                        4.3723e-03, -5.6436e-03,  4.4049e-03,  2.9779e-03,  1.9548e-03,\n",
       "                        3.6086e-03, -7.8660e-04, -6.2185e-04,  1.1292e-03,  1.0957e-02,\n",
       "                        6.3774e-03,  5.3808e-03,  1.9408e-03,  7.4076e-05,  2.0676e-03,\n",
       "                       -7.9242e-03,  6.7957e-03,  2.7747e-03, -4.6166e-03,  4.5608e-03,\n",
       "                        5.5739e-03, -5.0035e-03,  3.5370e-04, -4.5638e-03,  2.5903e-03,\n",
       "                        1.2281e-04, -2.6211e-03, -7.4754e-04,  8.4621e-03,  1.3687e-03,\n",
       "                       -3.2620e-03, -7.6216e-03, -2.3144e-04, -1.1793e-03,  1.0207e-03,\n",
       "                       -2.8157e-03, -5.2757e-03,  8.5817e-03, -4.7317e-03,  8.9661e-03,\n",
       "                        1.5702e-03, -2.7477e-03,  7.7478e-04, -1.9893e-04,  2.4629e-03,\n",
       "                        1.3036e-03, -1.1216e-03, -1.3761e-03, -4.9898e-03, -3.3980e-03,\n",
       "                       -2.9952e-03, -3.6274e-03,  5.2953e-03, -5.4772e-03, -1.0270e-03,\n",
       "                        2.7047e-03, -5.0229e-03,  1.7979e-03,  1.0627e-02, -2.6583e-03,\n",
       "                        5.1715e-03, -1.0023e-03, -6.8278e-04,  9.1007e-03, -2.0209e-03,\n",
       "                        1.6294e-04, -4.9684e-04,  2.8864e-03,  8.0894e-03, -2.2202e-03,\n",
       "                        7.1661e-05,  3.5533e-03, -2.3046e-03, -3.2104e-03, -4.5441e-03,\n",
       "                       -4.5358e-03, -7.8522e-04, -2.6722e-03,  5.9921e-03, -3.8972e-03,\n",
       "                        1.3171e-04,  2.1761e-03,  4.7621e-03,  5.5854e-03,  4.7922e-03,\n",
       "                       -1.5660e-03, -1.6325e-03, -2.2280e-03, -6.0642e-03,  6.4544e-04,\n",
       "                        6.9077e-03, -4.6529e-03,  8.0403e-03, -4.1867e-03, -2.5594e-03,\n",
       "                        2.4687e-03, -4.2195e-03, -4.4243e-03,  1.3178e-02, -9.6387e-03,\n",
       "                       -1.1874e-03,  9.1556e-03, -8.0998e-03], device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.0.norm_kv.weight',\n",
       "               tensor([0.9937, 0.9957, 0.9895, 1.0031, 0.9959, 0.9954, 1.0021, 0.9959, 1.0006,\n",
       "                       0.9891, 0.9942, 0.9987, 0.9983, 0.9909, 0.9919, 0.9995, 0.9910, 0.9860,\n",
       "                       0.9881, 0.9980, 1.0251, 1.0068, 0.9887, 0.9906, 0.9976, 1.0013, 1.0152,\n",
       "                       0.9929, 1.0038, 0.9860, 0.9945, 0.9954, 0.9915, 1.0017, 0.9949, 0.9934,\n",
       "                       0.9964, 0.9967, 0.9879, 0.9964, 0.9937, 0.9832, 0.9830, 0.9772, 0.9935,\n",
       "                       0.9924, 0.9918, 0.9851, 0.9944, 0.9944, 0.9917, 0.9943, 0.9962, 0.9921,\n",
       "                       0.9977, 0.9948, 0.9972, 0.9904, 0.9845, 1.0103, 1.0003, 0.9897, 0.9892,\n",
       "                       0.9844, 0.9958, 0.9901, 0.9928, 0.9948, 0.9902, 0.9907, 0.9914, 0.9987,\n",
       "                       0.9978, 0.9956, 1.0030, 0.9982, 0.9822, 0.9884, 1.0203, 0.9951, 0.9933,\n",
       "                       0.9913, 1.0006, 1.0002, 0.9935, 0.9973, 0.9937, 0.9968, 0.9850, 0.9952,\n",
       "                       0.9838, 0.9985, 0.9974, 0.9875, 1.0035, 1.0089, 0.9874, 0.9922, 1.0071,\n",
       "                       0.9962, 1.0043, 0.9948, 1.0038, 1.0096, 0.9993, 1.0010, 0.9874, 0.9996,\n",
       "                       0.9907, 1.0001, 0.9939, 0.9861, 0.9876, 1.0002, 0.9918, 1.0001, 0.9807,\n",
       "                       1.0065, 0.9765, 0.9872, 1.0010, 0.9963, 0.9885, 0.9951, 1.0045, 0.9925,\n",
       "                       1.0019, 0.9907], device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.0.norm_kv.bias',\n",
       "               tensor([-4.1460e-03,  3.5339e-03,  2.9238e-03, -6.2495e-04,  3.5773e-03,\n",
       "                        4.4552e-03,  3.5482e-03,  6.4739e-03,  1.9545e-03, -2.0440e-03,\n",
       "                        3.7045e-04, -2.8552e-03,  2.9600e-03,  4.6488e-03,  6.7121e-03,\n",
       "                       -1.9249e-03, -2.6208e-03, -6.9952e-03, -1.7485e-03, -4.0250e-03,\n",
       "                       -5.3730e-03, -2.1106e-03, -1.9007e-03,  9.4437e-03,  4.1571e-03,\n",
       "                        4.2639e-03,  2.6666e-04,  5.1527e-03, -6.5665e-03, -6.5452e-03,\n",
       "                        3.9962e-03, -7.9982e-03,  4.9295e-03, -2.4920e-03,  4.1504e-03,\n",
       "                        8.6284e-03,  2.7228e-03, -5.2478e-03,  9.7016e-03,  1.0634e-03,\n",
       "                        4.3239e-03, -6.0861e-03, -3.6671e-03, -9.8270e-03,  2.7541e-03,\n",
       "                        3.0229e-03,  4.3189e-03,  7.3414e-04,  9.1946e-03, -6.1461e-03,\n",
       "                        6.6033e-03, -7.1928e-04, -4.8480e-03,  2.6010e-03, -4.2122e-03,\n",
       "                       -6.9555e-04,  1.2056e-03, -1.3587e-03,  7.3114e-03,  8.2866e-03,\n",
       "                       -3.6026e-03,  8.6393e-03, -2.4810e-03,  1.5326e-02, -5.2069e-03,\n",
       "                       -1.3416e-03,  1.1154e-02,  1.2943e-03,  1.0240e-02, -7.7300e-05,\n",
       "                        6.3995e-03, -1.4949e-03,  9.9638e-06,  3.2023e-03, -5.3399e-03,\n",
       "                        2.5860e-03, -1.2284e-03, -1.2229e-03,  3.1936e-03, -3.5414e-03,\n",
       "                        2.6831e-03,  5.3264e-03, -3.9145e-03,  1.7267e-04,  3.3444e-03,\n",
       "                        1.9509e-03,  4.0193e-03, -2.4264e-03,  9.4891e-03, -4.0975e-04,\n",
       "                        6.6262e-03,  3.6873e-03, -6.1095e-03,  4.3510e-03,  2.5584e-03,\n",
       "                        4.5253e-03, -6.8771e-03, -3.7878e-03,  3.7838e-03,  1.0809e-03,\n",
       "                       -8.3052e-03, -1.2011e-03, -5.1670e-03,  2.4858e-03,  5.5842e-04,\n",
       "                        9.5868e-04, -1.1573e-02, -1.9737e-03, -5.1393e-03,  1.1637e-05,\n",
       "                       -2.1720e-03, -3.8875e-03, -3.4330e-03,  5.9628e-03, -6.9192e-03,\n",
       "                        4.7925e-03, -8.9741e-03, -1.8953e-04, -1.3238e-02, -1.1355e-02,\n",
       "                       -9.6124e-03, -1.9420e-04, -5.1930e-03,  8.2105e-06,  5.0728e-03,\n",
       "                       -8.4803e-03,  3.2042e-03, -1.3462e-02], device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.0.fc.0.weight',\n",
       "               tensor([[ 0.0707, -0.0425, -0.0630,  ...,  0.0190, -0.0623, -0.0094],\n",
       "                       [ 0.0831,  0.0682,  0.0479,  ..., -0.0271, -0.0832,  0.0490],\n",
       "                       [ 0.0699, -0.0618,  0.0432,  ..., -0.0872,  0.0094,  0.0284],\n",
       "                       ...,\n",
       "                       [ 0.0569,  0.0587, -0.0021,  ...,  0.0264, -0.0476,  0.0151],\n",
       "                       [ 0.0228, -0.0223,  0.0619,  ...,  0.0475,  0.0670, -0.0818],\n",
       "                       [ 0.0711,  0.0889,  0.0449,  ...,  0.0236,  0.0754, -0.0385]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.0.fc.0.bias',\n",
       "               tensor([-0.0712,  0.0450,  0.0589,  0.0557, -0.0493, -0.0100,  0.0571,  0.0648,\n",
       "                        0.0246,  0.0053, -0.0614,  0.0200, -0.0685,  0.0266,  0.0241, -0.0330,\n",
       "                        0.0485,  0.0781, -0.0116,  0.0722,  0.0125,  0.0165, -0.0356, -0.0040,\n",
       "                       -0.0477,  0.0198,  0.0503, -0.0587,  0.0651,  0.0502, -0.0164,  0.0402,\n",
       "                        0.0499, -0.0891, -0.0784, -0.0796, -0.0251,  0.0794,  0.0432, -0.0564,\n",
       "                       -0.0358, -0.0505,  0.0765,  0.0226, -0.0268, -0.0326, -0.0616,  0.0372,\n",
       "                        0.0618,  0.0419, -0.0152, -0.0587, -0.0379,  0.0818,  0.0063,  0.0371,\n",
       "                       -0.0110,  0.0119, -0.0664, -0.0174,  0.0123, -0.0456,  0.0162,  0.0628,\n",
       "                       -0.0613, -0.0260, -0.0496, -0.0092, -0.0285,  0.0113,  0.0077, -0.0574,\n",
       "                       -0.0426,  0.0510, -0.0589,  0.0775,  0.0464,  0.0137,  0.0587, -0.0901,\n",
       "                       -0.0117, -0.0120,  0.0638,  0.0552, -0.0290,  0.0009, -0.0497, -0.0377,\n",
       "                       -0.0381,  0.0076,  0.0766, -0.0281,  0.0423,  0.0371,  0.0651,  0.0938,\n",
       "                       -0.0478, -0.0120,  0.0033, -0.0917,  0.0872,  0.0503,  0.0036, -0.0529,\n",
       "                        0.0016, -0.0354,  0.0289, -0.0691, -0.0480, -0.0037,  0.0168, -0.0077,\n",
       "                       -0.0359, -0.0470, -0.0621,  0.0414,  0.0424,  0.0031,  0.0752, -0.0613,\n",
       "                        0.0137,  0.0538, -0.0712,  0.0836,  0.0201,  0.0048, -0.0352,  0.0423],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.1.attention.values.weight',\n",
       "               tensor([[ 0.0005, -0.0420,  0.0491,  ...,  0.0739, -0.0177, -0.0262],\n",
       "                       [ 0.0377, -0.0164,  0.0613,  ..., -0.0747, -0.0603, -0.0913],\n",
       "                       [-0.0360, -0.0868, -0.0779,  ..., -0.0153,  0.0407,  0.0659],\n",
       "                       ...,\n",
       "                       [-0.0766,  0.0168,  0.0326,  ...,  0.0088, -0.0092, -0.0565],\n",
       "                       [ 0.0770,  0.0078,  0.0545,  ..., -0.0361, -0.0748,  0.0688],\n",
       "                       [-0.0632,  0.0032, -0.0113,  ...,  0.0830,  0.0139, -0.0180]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.1.attention.keys.weight',\n",
       "               tensor([[-0.0010,  0.0789, -0.0875,  ..., -0.0429, -0.0890, -0.0003],\n",
       "                       [ 0.0039, -0.0218, -0.0026,  ..., -0.0072,  0.0003,  0.0123],\n",
       "                       [-0.0091, -0.0090, -0.0646,  ...,  0.0194,  0.0708,  0.0469],\n",
       "                       ...,\n",
       "                       [-0.0549, -0.0157,  0.0054,  ...,  0.0566, -0.0226, -0.0021],\n",
       "                       [-0.0464,  0.0191, -0.0078,  ..., -0.0031,  0.0211, -0.0838],\n",
       "                       [-0.0751, -0.0244, -0.0769,  ...,  0.0803,  0.0500,  0.0423]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.1.attention.queries.weight',\n",
       "               tensor([[ 0.0782,  0.0282, -0.0012,  ..., -0.0125, -0.0340, -0.0829],\n",
       "                       [-0.0356,  0.0685,  0.0065,  ..., -0.0223, -0.0453,  0.0358],\n",
       "                       [ 0.0817,  0.0463, -0.0156,  ..., -0.0287,  0.0196, -0.0472],\n",
       "                       ...,\n",
       "                       [ 0.0348, -0.0527,  0.0274,  ..., -0.0491, -0.0267, -0.1019],\n",
       "                       [ 0.0122, -0.0247, -0.0402,  ...,  0.0213, -0.0320, -0.0341],\n",
       "                       [ 0.0104,  0.0970, -0.0024,  ...,  0.0189, -0.1066, -0.1297]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.1.attention.fc_out.weight',\n",
       "               tensor([[ 0.0538,  0.0159, -0.0635,  ..., -0.0692, -0.0725, -0.0385],\n",
       "                       [-0.0226,  0.0710, -0.0348,  ..., -0.0614,  0.0654,  0.0537],\n",
       "                       [ 0.0400,  0.0610, -0.0661,  ...,  0.0452,  0.0270, -0.0218],\n",
       "                       ...,\n",
       "                       [-0.0277, -0.0495,  0.0656,  ..., -0.0494,  0.0059, -0.0137],\n",
       "                       [-0.0115, -0.0298,  0.0683,  ..., -0.0617, -0.0203, -0.0304],\n",
       "                       [-0.0226,  0.0673,  0.0468,  ...,  0.0582,  0.0862,  0.0309]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.1.attention.fc_out.bias',\n",
       "               tensor([-0.0787, -0.0628,  0.0331, -0.0851, -0.0513,  0.0519, -0.0089, -0.0112,\n",
       "                       -0.0165,  0.0102,  0.0316, -0.0345, -0.0377, -0.0595,  0.0834, -0.0536,\n",
       "                       -0.0832,  0.0497,  0.0258, -0.0341,  0.0673,  0.0315,  0.0144,  0.0854,\n",
       "                        0.0359,  0.0260,  0.0064,  0.0864, -0.0013, -0.0444, -0.0674, -0.0270,\n",
       "                        0.0817, -0.0132, -0.0017, -0.0389, -0.0492, -0.0783, -0.0732,  0.0126,\n",
       "                        0.0720, -0.0061,  0.0759, -0.0305,  0.0726, -0.0170,  0.0443, -0.0628,\n",
       "                       -0.0181,  0.0149,  0.0593, -0.0537,  0.0751, -0.0839,  0.0272,  0.0274,\n",
       "                       -0.0546,  0.0393, -0.0591, -0.0593, -0.0456, -0.0472,  0.0724,  0.0660,\n",
       "                        0.0764,  0.0684, -0.0497,  0.0400,  0.0481, -0.0250, -0.0809,  0.0443,\n",
       "                        0.0348, -0.0009, -0.0722,  0.0722,  0.0670,  0.0398,  0.0705,  0.0812,\n",
       "                       -0.0072,  0.0166,  0.0046, -0.0632, -0.0007, -0.0739, -0.0442,  0.0431,\n",
       "                        0.0652,  0.0577, -0.0717, -0.0545,  0.0383,  0.0536, -0.0585, -0.0873,\n",
       "                        0.0156,  0.0154, -0.0552, -0.0404, -0.0015, -0.0639,  0.0655,  0.0895,\n",
       "                       -0.0286,  0.0614,  0.0404,  0.0709,  0.0276,  0.0400, -0.0039, -0.0315,\n",
       "                       -0.0919,  0.0748, -0.0558,  0.0220,  0.0840, -0.0619, -0.0303, -0.0737,\n",
       "                        0.0696, -0.0609,  0.0326, -0.0352, -0.0253, -0.0462,  0.0102,  0.0038],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.1.gate1.bg',\n",
       "               tensor([-4.3612e-03, -1.8133e-03,  5.0528e-04, -2.9553e-05, -4.8448e-03,\n",
       "                        4.9949e-04,  8.6653e-03, -1.8370e-04, -1.2187e-02, -1.1998e-02,\n",
       "                        9.1640e-03, -1.7884e-03, -1.6279e-03, -8.5885e-04,  9.1774e-03,\n",
       "                       -1.8494e-03,  7.1248e-04,  1.7421e-03, -3.7673e-03,  1.5028e-03,\n",
       "                       -9.4396e-03, -3.5292e-03,  8.5346e-03, -8.9580e-04, -1.3240e-02,\n",
       "                        6.8860e-03,  2.6612e-03, -1.3143e-03, -1.7382e-03,  1.1671e-02,\n",
       "                        2.0330e-03,  1.5505e-02, -1.1429e-02, -3.3655e-03, -4.1196e-03,\n",
       "                       -7.4539e-03,  1.1529e-02,  7.0805e-03, -1.2910e-03, -5.4752e-03,\n",
       "                        5.3191e-04, -5.1092e-03,  5.4899e-03,  4.9921e-03, -7.3887e-04,\n",
       "                        1.0384e-03, -2.7623e-04, -2.0839e-03, -3.2470e-03,  5.5313e-03,\n",
       "                        1.0361e-02,  7.1305e-04, -1.1700e-03,  1.3740e-02,  4.0780e-03,\n",
       "                       -2.9954e-03, -3.8234e-03,  8.4007e-04,  1.5376e-03,  7.8941e-03,\n",
       "                       -7.6985e-03,  3.6420e-03, -5.7665e-03, -3.7031e-03,  1.2478e-02,\n",
       "                       -1.8032e-03, -1.5440e-04,  1.7155e-03,  5.3868e-03,  7.2082e-03,\n",
       "                       -3.2565e-04, -1.7630e-03, -1.6889e-03, -6.9983e-03,  9.0082e-03,\n",
       "                       -2.7873e-03, -8.4852e-03, -5.3159e-04,  4.4263e-04,  6.2791e-03,\n",
       "                        6.1226e-03, -3.9382e-03,  8.5327e-03, -2.3575e-03, -8.6966e-03,\n",
       "                       -2.4612e-02,  4.7058e-03, -3.6553e-03,  1.0129e-02, -1.7608e-02,\n",
       "                       -7.2023e-03, -8.0108e-04,  3.9090e-03,  2.7544e-03, -1.3102e-02,\n",
       "                       -1.4686e-02,  5.6201e-03,  2.2443e-03,  5.8372e-03,  7.8531e-03,\n",
       "                        4.3574e-03,  6.3418e-04,  9.9059e-03, -6.3307e-03, -5.9660e-03,\n",
       "                       -1.2035e-03,  5.0810e-03, -6.3008e-03, -7.1315e-03, -3.2475e-03,\n",
       "                        1.8278e-03, -1.0813e-02,  1.1597e-02,  6.6339e-03,  1.3736e-03,\n",
       "                        3.6712e-03, -7.0220e-03,  4.9545e-03,  5.9316e-03,  9.9058e-03,\n",
       "                       -1.5305e-02, -4.2381e-03,  3.0384e-04, -1.0425e-02,  2.3594e-03,\n",
       "                        4.1150e-03, -9.3184e-03,  1.0622e-02], device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.1.gate1.Wr.weight',\n",
       "               tensor([[ 0.0543, -0.0544,  0.0253,  ..., -0.0458, -0.1126,  0.1307],\n",
       "                       [ 0.0096, -0.0387,  0.0482,  ..., -0.0327, -0.1384, -0.1576],\n",
       "                       [-0.0904,  0.0842, -0.0640,  ..., -0.0581, -0.1350,  0.1137],\n",
       "                       ...,\n",
       "                       [-0.1345, -0.0481,  0.0988,  ...,  0.0547, -0.1334, -0.0408],\n",
       "                       [-0.0023, -0.1135,  0.0328,  ..., -0.0440,  0.0852,  0.1464],\n",
       "                       [-0.1069,  0.0569,  0.0689,  ...,  0.1163, -0.0418,  0.0635]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.1.gate1.Ur.weight',\n",
       "               tensor([[ 0.1261, -0.1092,  0.1540,  ...,  0.0642,  0.1684,  0.0871],\n",
       "                       [-0.1138, -0.0964,  0.0150,  ..., -0.1088, -0.0121,  0.0857],\n",
       "                       [-0.0084, -0.1008, -0.0951,  ...,  0.1384, -0.0447, -0.0182],\n",
       "                       ...,\n",
       "                       [-0.0845, -0.0675,  0.0143,  ...,  0.0908, -0.0648, -0.1107],\n",
       "                       [-0.1203,  0.0270,  0.0203,  ..., -0.0347, -0.0024,  0.0924],\n",
       "                       [-0.0560,  0.0321,  0.1385,  ..., -0.0472, -0.1457,  0.1352]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.1.gate1.Wz.weight',\n",
       "               tensor([[-0.1285,  0.0421, -0.0015,  ...,  0.0365, -0.0208,  0.0276],\n",
       "                       [-0.1283, -0.0234,  0.1101,  ...,  0.0799, -0.1282,  0.1662],\n",
       "                       [ 0.0614, -0.0970,  0.0697,  ...,  0.0299,  0.1113,  0.0718],\n",
       "                       ...,\n",
       "                       [ 0.0471,  0.0818,  0.1164,  ..., -0.0616, -0.1180,  0.1018],\n",
       "                       [-0.0532, -0.1049, -0.0710,  ...,  0.0504, -0.1304,  0.0085],\n",
       "                       [-0.0918, -0.0515,  0.0406,  ..., -0.0816,  0.0108,  0.0354]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.1.gate1.Uz.weight',\n",
       "               tensor([[ 0.0035, -0.1339, -0.1408,  ..., -0.1306,  0.0729,  0.1513],\n",
       "                       [ 0.0611,  0.0997, -0.0356,  ...,  0.0480, -0.0634, -0.0581],\n",
       "                       [-0.0058, -0.0944, -0.0747,  ...,  0.0537, -0.0317,  0.0368],\n",
       "                       ...,\n",
       "                       [ 0.0958,  0.0026, -0.0378,  ...,  0.0777, -0.1075,  0.0982],\n",
       "                       [-0.0683, -0.0929,  0.0115,  ..., -0.1245,  0.0088, -0.0092],\n",
       "                       [-0.0515,  0.1479,  0.0538,  ..., -0.0150,  0.1599,  0.0433]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.1.gate1.Wg.weight',\n",
       "               tensor([[ 0.0273, -0.1280, -0.0167,  ...,  0.1246, -0.0050, -0.1556],\n",
       "                       [-0.0871, -0.1320, -0.0814,  ...,  0.0137,  0.0876,  0.1293],\n",
       "                       [-0.1296, -0.0624, -0.1363,  ..., -0.0049, -0.0862,  0.0164],\n",
       "                       ...,\n",
       "                       [-0.0633,  0.0791, -0.1103,  ...,  0.1508, -0.0294,  0.0251],\n",
       "                       [ 0.0289, -0.0736,  0.0786,  ...,  0.0048, -0.1263, -0.0853],\n",
       "                       [-0.0700,  0.0009,  0.1134,  ...,  0.0943,  0.1011,  0.1438]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.1.gate1.Ug.weight',\n",
       "               tensor([[ 0.1236,  0.0806, -0.1111,  ..., -0.0682,  0.0408,  0.1065],\n",
       "                       [-0.0700, -0.0659,  0.0888,  ..., -0.0342, -0.0987,  0.1414],\n",
       "                       [-0.1013, -0.0221,  0.1344,  ...,  0.1112, -0.1216, -0.0872],\n",
       "                       ...,\n",
       "                       [ 0.0032,  0.0093, -0.0231,  ..., -0.1409,  0.1369, -0.0405],\n",
       "                       [ 0.1252, -0.1206,  0.0251,  ...,  0.1332, -0.0798,  0.0240],\n",
       "                       [-0.0061, -0.0239, -0.0550,  ..., -0.0255,  0.0919, -0.1041]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.1.gate2.bg',\n",
       "               tensor([ 0.0036,  0.0142, -0.0027, -0.0027,  0.0104, -0.0117,  0.0010,  0.0038,\n",
       "                       -0.0063,  0.0003,  0.0059,  0.0008, -0.0071, -0.0060, -0.0054,  0.0009,\n",
       "                        0.0169, -0.0029, -0.0052,  0.0008,  0.0038,  0.0030,  0.0112, -0.0041,\n",
       "                       -0.0023, -0.0024, -0.0197, -0.0142,  0.0072,  0.0065, -0.0041,  0.0327,\n",
       "                        0.0037,  0.0050, -0.0076,  0.0024, -0.0037,  0.0110,  0.0026, -0.0034,\n",
       "                       -0.0029,  0.0062,  0.0064,  0.0051, -0.0059, -0.0054,  0.0063,  0.0035,\n",
       "                       -0.0009,  0.0019,  0.0065, -0.0004,  0.0012, -0.0122, -0.0046,  0.0046,\n",
       "                       -0.0098,  0.0018,  0.0097,  0.0029,  0.0073,  0.0055, -0.0019,  0.0031,\n",
       "                        0.0118,  0.0021, -0.0091, -0.0033, -0.0006, -0.0055,  0.0019,  0.0072,\n",
       "                        0.0053,  0.0021,  0.0044, -0.0160, -0.0013,  0.0014,  0.0009, -0.0076,\n",
       "                        0.0004, -0.0011,  0.0067, -0.0027,  0.0130, -0.0010, -0.0030, -0.0052,\n",
       "                       -0.0088, -0.0055,  0.0073,  0.0085, -0.0051, -0.0018, -0.0030, -0.0115,\n",
       "                        0.0044, -0.0045, -0.0072,  0.0203, -0.0022, -0.0087,  0.0027, -0.0035,\n",
       "                        0.0034, -0.0025,  0.0214, -0.0027,  0.0022,  0.0008,  0.0023,  0.0155,\n",
       "                        0.0167,  0.0012, -0.0078,  0.0024,  0.0025,  0.0122, -0.0016, -0.0047,\n",
       "                       -0.0061, -0.0014,  0.0114,  0.0076, -0.0040,  0.0046, -0.0045,  0.0025],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.1.gate2.Wr.weight',\n",
       "               tensor([[-0.0754, -0.0494,  0.0293,  ..., -0.1465, -0.1481,  0.1279],\n",
       "                       [-0.1448, -0.0448, -0.1384,  ..., -0.0597, -0.1448,  0.1046],\n",
       "                       [-0.1274, -0.0928,  0.0731,  ...,  0.1096,  0.0608, -0.0042],\n",
       "                       ...,\n",
       "                       [-0.0034, -0.1247,  0.1514,  ..., -0.1495,  0.0766, -0.1555],\n",
       "                       [-0.1106, -0.0280, -0.0765,  ..., -0.0061,  0.0942, -0.0856],\n",
       "                       [-0.0502, -0.1109,  0.0390,  ..., -0.1564, -0.1432, -0.0126]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.1.gate2.Ur.weight',\n",
       "               tensor([[ 0.0045,  0.0602, -0.0302,  ..., -0.0322, -0.0199,  0.0678],\n",
       "                       [ 0.1253, -0.0614, -0.0346,  ...,  0.0873, -0.0707,  0.0716],\n",
       "                       [-0.1287, -0.0855,  0.0206,  ..., -0.0035, -0.1008, -0.1616],\n",
       "                       ...,\n",
       "                       [-0.0224, -0.1144,  0.1230,  ..., -0.0044,  0.1175,  0.1333],\n",
       "                       [ 0.0612,  0.0658,  0.0735,  ..., -0.0028,  0.0093,  0.0625],\n",
       "                       [-0.0690, -0.1110, -0.0368,  ...,  0.1089,  0.1502, -0.1014]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.1.gate2.Wz.weight',\n",
       "               tensor([[-0.1475, -0.0072, -0.0202,  ..., -0.1046, -0.1344, -0.0800],\n",
       "                       [ 0.1272, -0.0897, -0.0258,  ..., -0.0661, -0.0392,  0.1335],\n",
       "                       [-0.0578, -0.1000, -0.0266,  ...,  0.0448, -0.1487, -0.0687],\n",
       "                       ...,\n",
       "                       [-0.1309, -0.0427, -0.0356,  ..., -0.0949,  0.1129,  0.0667],\n",
       "                       [ 0.0442,  0.0689,  0.0152,  ..., -0.0211, -0.1477,  0.0336],\n",
       "                       [-0.0611, -0.0325,  0.0764,  ...,  0.1231,  0.1108, -0.0453]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.1.gate2.Uz.weight',\n",
       "               tensor([[-0.1356,  0.0512, -0.0065,  ..., -0.1016, -0.1123,  0.0773],\n",
       "                       [ 0.0194, -0.1448,  0.1179,  ..., -0.0160, -0.0838, -0.0846],\n",
       "                       [-0.0347,  0.1506, -0.1199,  ...,  0.0404, -0.0331,  0.0461],\n",
       "                       ...,\n",
       "                       [ 0.1223,  0.0235, -0.0899,  ..., -0.0680, -0.1052,  0.1351],\n",
       "                       [ 0.0868,  0.0176,  0.0735,  ..., -0.0569, -0.0301,  0.0716],\n",
       "                       [ 0.1147, -0.1187,  0.1307,  ...,  0.0107,  0.1342,  0.0326]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.1.gate2.Wg.weight',\n",
       "               tensor([[-0.1226,  0.0246,  0.1715,  ..., -0.0864, -0.1131, -0.0051],\n",
       "                       [-0.0177,  0.0010, -0.0114,  ...,  0.0237,  0.0940, -0.1104],\n",
       "                       [-0.1341,  0.1054,  0.1312,  ..., -0.0599,  0.0760,  0.0734],\n",
       "                       ...,\n",
       "                       [-0.0968,  0.0302, -0.1094,  ..., -0.0507, -0.1290, -0.0889],\n",
       "                       [ 0.0374, -0.0381,  0.0379,  ..., -0.0651,  0.0121, -0.1363],\n",
       "                       [-0.1273, -0.0382,  0.0532,  ..., -0.0846, -0.1084,  0.1211]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.1.gate2.Ug.weight',\n",
       "               tensor([[ 0.0156, -0.1350, -0.0649,  ...,  0.1074, -0.1208, -0.0166],\n",
       "                       [-0.0783,  0.0921,  0.0494,  ..., -0.0675, -0.0543, -0.0279],\n",
       "                       [-0.1315, -0.1367, -0.0057,  ...,  0.0580, -0.0402,  0.1137],\n",
       "                       ...,\n",
       "                       [ 0.1418,  0.0626,  0.0425,  ..., -0.1145,  0.0147,  0.0058],\n",
       "                       [ 0.0433, -0.1286, -0.1268,  ..., -0.0266, -0.1430, -0.1297],\n",
       "                       [-0.0903, -0.0914, -0.1463,  ..., -0.1424, -0.1153,  0.0651]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.1.norm1.weight',\n",
       "               tensor([1.0061, 1.0223, 0.9838, 1.0046, 1.0160, 0.9823, 1.0210, 0.9829, 1.0225,\n",
       "                       1.0136, 1.0565, 1.0099, 1.0302, 1.0211, 1.0081, 1.0509, 1.0032, 1.0157,\n",
       "                       0.9802, 1.0133, 1.0115, 1.0269, 1.0026, 1.0222, 1.0115, 1.0083, 1.0393,\n",
       "                       1.0101, 1.0058, 1.0259, 1.0503, 1.0417, 1.0000, 1.0201, 1.0098, 1.0120,\n",
       "                       1.0252, 1.0044, 1.0383, 0.9765, 1.0278, 0.9951, 0.9891, 1.0301, 1.0345,\n",
       "                       0.9994, 0.9981, 0.9828, 1.0244, 1.0058, 0.9821, 0.9904, 0.9945, 1.0250,\n",
       "                       1.0120, 0.9964, 1.0068, 0.9829, 0.9931, 0.9954, 0.9980, 1.0082, 1.0218,\n",
       "                       1.0109, 0.9832, 1.0103, 1.0444, 0.9923, 0.9942, 0.9840, 0.9824, 1.0069,\n",
       "                       1.0011, 1.0161, 1.0059, 1.0070, 0.9884, 1.0283, 0.9947, 0.9939, 0.9901,\n",
       "                       0.9999, 1.0064, 1.0091, 0.9998, 1.0262, 1.0205, 1.0012, 0.9970, 1.0156,\n",
       "                       0.9762, 1.0182, 1.0045, 1.0268, 1.0400, 1.0285, 1.0310, 1.0139, 0.9892,\n",
       "                       1.0151, 0.9751, 0.9882, 1.0327, 0.9967, 1.0253, 1.0041, 1.0179, 0.9871,\n",
       "                       1.0114, 0.9925, 1.0105, 1.0102, 1.0266, 1.0042, 1.0327, 1.0188, 1.0039,\n",
       "                       0.9984, 1.0648, 1.0338, 1.0234, 0.9934, 0.9771, 1.0045, 0.9804, 1.0015,\n",
       "                       0.9951, 1.0522], device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.1.norm1.bias',\n",
       "               tensor([-0.0161,  0.0346, -0.0013, -0.0198, -0.0046, -0.0102,  0.0046,  0.0046,\n",
       "                        0.0356, -0.0150, -0.0333, -0.0248,  0.0102, -0.0297, -0.0187, -0.0459,\n",
       "                       -0.0081,  0.0156,  0.0134, -0.0159,  0.0289, -0.0394,  0.0037,  0.0104,\n",
       "                       -0.0361, -0.0199, -0.0466, -0.0156,  0.0090,  0.0262, -0.0225,  0.0271,\n",
       "                       -0.0296, -0.0061, -0.0136, -0.0047, -0.0323,  0.0145, -0.0246, -0.0027,\n",
       "                        0.0241, -0.0008,  0.0262,  0.0216, -0.0443,  0.0189, -0.0211, -0.0031,\n",
       "                        0.0166, -0.0116, -0.0137, -0.0560, -0.0070, -0.0326, -0.0231,  0.0069,\n",
       "                       -0.0247, -0.0011,  0.0078, -0.0166, -0.0189, -0.0164, -0.0233, -0.0285,\n",
       "                       -0.0154, -0.0202, -0.0383,  0.0253,  0.0029, -0.0474,  0.0093, -0.0288,\n",
       "                        0.0241,  0.0165, -0.0063, -0.0113, -0.0069, -0.0293,  0.0200,  0.0173,\n",
       "                        0.0256, -0.0025,  0.0158,  0.0163, -0.0133, -0.0246, -0.0246,  0.0385,\n",
       "                        0.0146, -0.0135, -0.0115, -0.0168, -0.0216,  0.0190, -0.0422, -0.0012,\n",
       "                       -0.0281, -0.0254,  0.0062, -0.0113, -0.0002, -0.0034,  0.0028,  0.0224,\n",
       "                        0.0323,  0.0044,  0.0243, -0.0083,  0.0232,  0.0396,  0.0282, -0.0163,\n",
       "                        0.0251, -0.0146,  0.0218,  0.0071,  0.0249,  0.0132,  0.0402, -0.0341,\n",
       "                       -0.0226, -0.0028,  0.0465,  0.0093, -0.0121,  0.0008, -0.0193, -0.0193],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.1.norm2.weight',\n",
       "               tensor([1.0003, 1.0038, 0.9903, 0.9952, 0.9924, 0.9912, 0.9869, 0.9952, 0.9882,\n",
       "                       0.9922, 1.0165, 0.9923, 0.9956, 1.0024, 0.9950, 0.9980, 0.9851, 0.9852,\n",
       "                       0.9925, 0.9962, 1.0105, 0.9959, 1.0075, 0.9966, 0.9893, 0.9957, 1.0022,\n",
       "                       1.0049, 0.9966, 1.0037, 0.9933, 1.0132, 0.9982, 0.9947, 0.9975, 0.9889,\n",
       "                       0.9985, 0.9948, 0.9914, 0.9968, 0.9863, 0.9952, 0.9939, 0.9951, 0.9869,\n",
       "                       1.0021, 1.0078, 0.9973, 0.9893, 0.9942, 0.9996, 0.9936, 0.9980, 1.0004,\n",
       "                       0.9930, 0.9930, 0.9991, 0.9999, 0.9945, 1.0026, 1.0057, 0.9979, 1.0110,\n",
       "                       0.9956, 0.9948, 0.9930, 0.9877, 1.0015, 0.9910, 1.0089, 0.9909, 1.0011,\n",
       "                       0.9859, 1.0027, 1.0000, 0.9845, 1.0004, 1.0000, 0.9995, 0.9934, 0.9970,\n",
       "                       1.0062, 1.0008, 0.9955, 0.9922, 1.0075, 0.9956, 1.0005, 1.0060, 0.9967,\n",
       "                       1.0027, 0.9998, 1.0080, 0.9977, 0.9942, 1.0011, 0.9945, 1.0048, 0.9966,\n",
       "                       0.9979, 0.9942, 0.9972, 1.0004, 1.0023, 0.9944, 0.9893, 1.0225, 0.9955,\n",
       "                       1.0015, 1.0006, 1.0074, 0.9980, 1.0075, 0.9983, 1.0009, 0.9953, 1.0099,\n",
       "                       0.9933, 0.9897, 1.0052, 0.9984, 0.9913, 1.0032, 0.9987, 0.9951, 1.0001,\n",
       "                       1.0070, 0.9988], device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.1.norm2.bias',\n",
       "               tensor([ 5.1248e-03, -4.4909e-04,  8.3017e-03, -4.3177e-03,  2.1833e-03,\n",
       "                       -5.1358e-03,  2.6970e-03,  8.6450e-04, -6.1966e-03, -4.0346e-03,\n",
       "                       -6.1417e-03, -9.2171e-04, -4.8204e-04, -8.8571e-03, -4.4527e-03,\n",
       "                        3.0571e-04,  2.6900e-03, -3.3407e-03, -5.7263e-03,  1.4637e-03,\n",
       "                        7.3379e-03, -1.7866e-03, -3.5763e-03, -6.5239e-04,  2.8056e-03,\n",
       "                        8.6169e-04, -5.9376e-03, -3.5803e-03, -3.8230e-03, -4.2923e-03,\n",
       "                       -1.5114e-02, -1.2713e-03,  2.4577e-03,  8.2706e-03, -1.9487e-03,\n",
       "                        4.5015e-03, -7.2629e-04, -4.5048e-03,  3.5339e-03, -1.2464e-03,\n",
       "                       -2.4504e-03,  1.7583e-03, -9.3045e-04,  5.0642e-03, -4.8510e-04,\n",
       "                        2.1557e-04,  8.8071e-03, -3.4866e-04,  2.0414e-03, -9.7472e-04,\n",
       "                        1.8265e-03,  3.3272e-03, -9.8439e-03,  8.3519e-03,  6.0840e-03,\n",
       "                        4.1108e-03, -5.6784e-03,  2.3949e-03,  3.1484e-03, -3.7543e-03,\n",
       "                       -6.4677e-03, -1.3634e-04,  4.4077e-03, -3.1282e-03, -3.5666e-03,\n",
       "                       -1.0026e-03, -2.3091e-03,  2.9359e-03,  9.7155e-03,  7.4000e-03,\n",
       "                        4.0983e-03,  4.0815e-03, -1.2172e-02, -8.7166e-04,  3.0493e-03,\n",
       "                       -1.6611e-03,  3.5742e-03, -6.3833e-03, -1.3411e-03, -2.7533e-04,\n",
       "                       -7.6715e-05, -7.5907e-03, -3.7200e-04,  8.6305e-03,  1.2765e-03,\n",
       "                        5.1936e-03,  5.0820e-04, -2.3196e-03, -4.7051e-03,  6.0465e-03,\n",
       "                        1.6737e-03,  2.1269e-03, -2.4775e-03, -1.9479e-03, -3.9203e-03,\n",
       "                       -1.6188e-03, -3.0208e-03, -6.8283e-03,  2.3255e-05,  4.8027e-03,\n",
       "                       -4.9322e-03, -1.0911e-03, -3.9596e-03,  3.0827e-03, -5.9458e-03,\n",
       "                        1.1198e-03,  3.3626e-03, -8.2911e-03,  4.5172e-03, -5.0724e-03,\n",
       "                       -3.4160e-03, -1.2372e-03,  2.4036e-03, -3.2366e-03,  4.1560e-03,\n",
       "                        2.1456e-03,  5.4570e-03, -6.1917e-04,  9.5694e-04,  1.0251e-03,\n",
       "                       -1.4089e-03,  4.0375e-03,  1.4464e-03, -1.5867e-03, -4.2235e-04,\n",
       "                       -5.0263e-04, -6.0445e-04,  6.8005e-03], device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.1.norm_kv.weight',\n",
       "               tensor([0.9989, 0.9977, 0.9849, 0.9904, 1.0219, 1.0122, 1.0064, 1.0160, 0.9984,\n",
       "                       1.0000, 0.9980, 1.0074, 1.0086, 1.0147, 1.0021, 0.9997, 0.9959, 1.0066,\n",
       "                       1.0078, 0.9902, 0.9958, 1.0023, 0.9975, 1.0223, 0.9952, 1.0091, 1.0090,\n",
       "                       0.9951, 1.0086, 0.9746, 1.0097, 1.0123, 1.0112, 1.0344, 0.9918, 1.0147,\n",
       "                       1.0031, 1.0034, 0.9860, 1.0182, 1.0026, 0.9874, 1.0023, 1.0005, 0.9907,\n",
       "                       0.9843, 1.0234, 1.0202, 1.0084, 0.9909, 0.9963, 0.9947, 0.9990, 0.9934,\n",
       "                       0.9949, 0.9946, 0.9992, 0.9990, 1.0015, 0.9964, 0.9836, 0.9887, 0.9973,\n",
       "                       0.9947, 1.0061, 1.0126, 0.9764, 0.9928, 0.9906, 1.0010, 1.0067, 0.9981,\n",
       "                       0.9898, 1.0083, 0.9911, 0.9933, 1.0141, 0.9995, 1.0166, 0.9855, 1.0089,\n",
       "                       1.0060, 1.0273, 0.9938, 0.9900, 0.9887, 0.9976, 1.0020, 0.9943, 0.9884,\n",
       "                       1.0117, 0.9843, 1.0026, 1.0228, 1.0053, 1.0129, 1.0226, 1.0139, 1.0029,\n",
       "                       0.9977, 0.9913, 0.9930, 0.9968, 1.0126, 0.9843, 1.0044, 1.0052, 1.0061,\n",
       "                       0.9982, 0.9945, 0.9955, 0.9955, 1.0048, 0.9977, 0.9923, 0.9897, 1.0032,\n",
       "                       0.9986, 0.9827, 0.9941, 0.9958, 0.9902, 1.0004, 1.0002, 1.0063, 0.9902,\n",
       "                       0.9988, 0.9948], device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.1.norm_kv.bias',\n",
       "               tensor([ 9.0433e-03, -1.2558e-05, -1.8036e-03,  8.5454e-03,  3.6463e-03,\n",
       "                        2.2387e-03, -7.6920e-04,  7.5545e-03, -4.9479e-03,  2.8330e-04,\n",
       "                       -6.5842e-04,  4.5216e-03, -3.5510e-04,  4.9223e-03,  3.2504e-03,\n",
       "                        3.9407e-03,  6.2439e-03, -7.4451e-03, -1.8192e-03,  1.2223e-02,\n",
       "                       -3.5911e-03,  1.6161e-03,  3.7270e-03,  8.5172e-04,  4.2375e-03,\n",
       "                        1.8919e-03, -1.3688e-03, -6.1060e-04,  3.3561e-05, -1.2999e-02,\n",
       "                       -3.4491e-03, -1.4801e-03,  3.1883e-04, -3.4314e-04,  3.8569e-03,\n",
       "                        3.4070e-03,  5.3619e-03, -1.4462e-03,  5.1833e-03,  3.3377e-03,\n",
       "                        2.2530e-03,  9.1794e-04, -1.4017e-03, -7.9325e-04,  6.4120e-03,\n",
       "                        5.5344e-03,  3.9508e-03,  8.1258e-03, -6.9057e-04,  2.3924e-03,\n",
       "                        3.5139e-03,  1.3530e-03, -2.9260e-03,  7.8579e-03,  6.6704e-03,\n",
       "                        1.6794e-03, -2.3971e-03,  2.0046e-03,  9.7414e-04, -5.4630e-03,\n",
       "                        9.4159e-03,  5.2708e-03,  8.9394e-03,  2.5640e-03,  3.5416e-04,\n",
       "                        4.7343e-03,  9.0459e-03,  2.4663e-04,  2.3393e-03, -1.3441e-03,\n",
       "                       -1.0002e-03,  7.5932e-04,  3.3506e-03,  7.4219e-03,  6.4889e-03,\n",
       "                        2.5267e-03,  3.1932e-03,  1.3211e-02,  4.6242e-03,  2.5404e-04,\n",
       "                       -2.5022e-03, -2.4248e-03, -6.2022e-03, -2.7442e-03,  4.0764e-03,\n",
       "                        5.9866e-03,  3.9775e-03, -1.9522e-03,  4.3519e-03,  1.5130e-02,\n",
       "                       -9.6239e-04,  1.1697e-02, -1.8611e-04, -9.7100e-03, -3.2927e-03,\n",
       "                       -8.2008e-04, -4.8508e-03, -1.1103e-02, -9.1912e-03, -6.7324e-04,\n",
       "                       -4.6103e-03,  2.4253e-03, -1.4753e-03, -9.0686e-04, -7.6164e-03,\n",
       "                        3.7395e-03,  1.7703e-03, -1.0426e-02,  2.5836e-03, -2.5392e-03,\n",
       "                       -5.1861e-03, -3.3536e-03, -4.3966e-03,  4.1842e-04, -4.5011e-03,\n",
       "                       -3.2935e-03, -8.4354e-03, -2.4299e-04, -7.0434e-03, -1.8755e-03,\n",
       "                       -3.8029e-03, -4.4522e-03, -4.0443e-03, -5.3533e-03, -6.0546e-03,\n",
       "                       -8.1861e-03, -1.8818e-03, -1.7246e-03], device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.1.fc.0.weight',\n",
       "               tensor([[ 0.0287,  0.0831,  0.0361,  ..., -0.0265, -0.0081, -0.0639],\n",
       "                       [-0.0147,  0.0460, -0.0431,  ..., -0.0084,  0.0621, -0.0882],\n",
       "                       [ 0.0074, -0.0742,  0.0904,  ...,  0.0652,  0.0420,  0.0334],\n",
       "                       ...,\n",
       "                       [ 0.0203,  0.0192, -0.1017,  ...,  0.0606,  0.0458,  0.0629],\n",
       "                       [ 0.0041, -0.0322,  0.0301,  ..., -0.0507,  0.0131, -0.0572],\n",
       "                       [ 0.0656, -0.0567, -0.0722,  ...,  0.0549, -0.0940,  0.0383]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.1.fc.0.bias',\n",
       "               tensor([ 0.0264,  0.0127,  0.0017,  0.0173,  0.0667,  0.0686, -0.0632, -0.0141,\n",
       "                       -0.0204, -0.0649, -0.0724,  0.0189,  0.0173,  0.0430,  0.0744, -0.0465,\n",
       "                        0.0556, -0.0614, -0.0540, -0.0432, -0.0110,  0.0631, -0.0487,  0.0410,\n",
       "                        0.0347,  0.0862,  0.0390,  0.0830, -0.0697, -0.0138,  0.0271,  0.0036,\n",
       "                        0.0462, -0.0507, -0.0280,  0.0267,  0.0317,  0.0541, -0.0820,  0.0618,\n",
       "                       -0.0751, -0.0569, -0.0113,  0.0580, -0.0311,  0.0189,  0.0034, -0.0456,\n",
       "                       -0.0378, -0.0473,  0.0804, -0.0746, -0.0730, -0.0171, -0.0590,  0.0452,\n",
       "                       -0.0648,  0.0306, -0.0405,  0.0099, -0.0571,  0.0191, -0.0318, -0.0262,\n",
       "                       -0.0817,  0.0039,  0.0112, -0.0041,  0.0432, -0.0545,  0.0371, -0.0260,\n",
       "                        0.0274,  0.0321, -0.0131, -0.0623,  0.0253,  0.0465, -0.0131, -0.0209,\n",
       "                       -0.0223, -0.0425, -0.0071, -0.0585, -0.0435, -0.0326, -0.0064,  0.0528,\n",
       "                       -0.0713,  0.0799, -0.0121, -0.0506, -0.0610, -0.0730, -0.0354,  0.0151,\n",
       "                        0.0577,  0.0078, -0.0599, -0.0770, -0.0175, -0.0076,  0.0781,  0.0046,\n",
       "                        0.0606, -0.0798, -0.0682,  0.0350,  0.0315,  0.0056, -0.0690, -0.0632,\n",
       "                        0.0471, -0.0435,  0.0779,  0.0755,  0.0334,  0.0795,  0.0627, -0.0741,\n",
       "                        0.0337,  0.0355,  0.0285, -0.0631,  0.0552, -0.0361, -0.0079, -0.0429],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.2.attention.values.weight',\n",
       "               tensor([[-0.0046, -0.0378,  0.0597,  ...,  0.0499, -0.0257, -0.0155],\n",
       "                       [ 0.0486,  0.0189, -0.0588,  ..., -0.0002, -0.0458,  0.0529],\n",
       "                       [-0.0451, -0.0597, -0.0212,  ...,  0.0029, -0.0630, -0.0835],\n",
       "                       ...,\n",
       "                       [-0.0811, -0.0525, -0.0144,  ..., -0.0618,  0.0462, -0.0215],\n",
       "                       [-0.0481, -0.0537, -0.0548,  ..., -0.0145, -0.0239,  0.0658],\n",
       "                       [ 0.0317, -0.0387,  0.0547,  ...,  0.0712, -0.0398,  0.0596]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.2.attention.keys.weight',\n",
       "               tensor([[ 0.0080,  0.0558, -0.0183,  ..., -0.0960,  0.0538, -0.0499],\n",
       "                       [ 0.0640,  0.0046, -0.0150,  ...,  0.0547,  0.0504, -0.0492],\n",
       "                       [ 0.0904,  0.0044, -0.0927,  ...,  0.0929,  0.0699, -0.0776],\n",
       "                       ...,\n",
       "                       [ 0.0177, -0.0249,  0.0049,  ...,  0.0512,  0.0223, -0.0384],\n",
       "                       [-0.0629, -0.0321, -0.0631,  ...,  0.0438,  0.0416, -0.0738],\n",
       "                       [ 0.0835, -0.0091, -0.0819,  ...,  0.0142,  0.0401,  0.0570]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.2.attention.queries.weight',\n",
       "               tensor([[ 0.0017,  0.0330, -0.0528,  ...,  0.0934,  0.0650,  0.0107],\n",
       "                       [-0.0087,  0.0365,  0.0336,  ...,  0.0666, -0.0562, -0.0425],\n",
       "                       [ 0.0920, -0.0475, -0.0536,  ..., -0.0722,  0.0289,  0.0773],\n",
       "                       ...,\n",
       "                       [ 0.0353, -0.0466, -0.0565,  ..., -0.0827, -0.0561, -0.0265],\n",
       "                       [-0.1036,  0.1121, -0.0940,  ..., -0.0022,  0.0111, -0.0580],\n",
       "                       [ 0.0519,  0.0893,  0.0193,  ..., -0.0639,  0.0463,  0.0646]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.2.attention.fc_out.weight',\n",
       "               tensor([[-0.0493, -0.0118, -0.0145,  ...,  0.0701,  0.0331, -0.0300],\n",
       "                       [-0.0602,  0.0699,  0.0028,  ...,  0.0141, -0.0274,  0.0516],\n",
       "                       [-0.0881,  0.0667,  0.0446,  ..., -0.0230,  0.0103,  0.0004],\n",
       "                       ...,\n",
       "                       [-0.0278, -0.0505,  0.0182,  ...,  0.0351, -0.0393,  0.0068],\n",
       "                       [ 0.0804, -0.0356,  0.0206,  ..., -0.0601,  0.0199,  0.0523],\n",
       "                       [ 0.0131,  0.0535, -0.0643,  ...,  0.0134,  0.0587, -0.0354]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.2.attention.fc_out.bias',\n",
       "               tensor([-0.0643,  0.0084, -0.0182, -0.0731,  0.0400, -0.0346, -0.0809, -0.0422,\n",
       "                        0.0287,  0.0233, -0.0556, -0.0567, -0.0114,  0.0012, -0.0306, -0.0265,\n",
       "                       -0.0085,  0.0629,  0.0305, -0.0412,  0.0542,  0.0299, -0.0772,  0.0163,\n",
       "                       -0.0619, -0.0649,  0.0186, -0.0312,  0.0379, -0.0418, -0.0491, -0.0625,\n",
       "                       -0.0093, -0.0568,  0.0499, -0.0673,  0.0285,  0.0473, -0.0065, -0.0012,\n",
       "                        0.0739,  0.0678, -0.0474, -0.0218,  0.0201,  0.0569, -0.0310, -0.0739,\n",
       "                        0.0241,  0.0091,  0.0468, -0.0305, -0.0817, -0.0504,  0.0455,  0.0045,\n",
       "                        0.0671, -0.0606, -0.0563,  0.0353, -0.0152, -0.0735,  0.0050,  0.0645,\n",
       "                       -0.0800, -0.0185, -0.0039,  0.0247, -0.0642,  0.0553, -0.0218,  0.0082,\n",
       "                        0.0777,  0.0455,  0.0086,  0.0800, -0.0646,  0.0681, -0.0414,  0.0850,\n",
       "                        0.0079, -0.0164,  0.0685, -0.0278,  0.0491,  0.0493,  0.0651, -0.0762,\n",
       "                       -0.0287, -0.0665,  0.0529, -0.0532, -0.0733, -0.0500,  0.0452, -0.0452,\n",
       "                        0.0688, -0.0078,  0.0030, -0.0649,  0.0152, -0.0168,  0.0242,  0.0107,\n",
       "                        0.0015,  0.0761,  0.0479, -0.0467,  0.0707,  0.0849, -0.0846, -0.0212,\n",
       "                       -0.0041,  0.0603, -0.0366, -0.0866,  0.0401,  0.0204,  0.0801, -0.0430,\n",
       "                        0.0628, -0.0349, -0.0109,  0.0728,  0.0904, -0.0609,  0.0312,  0.0544],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.2.gate1.bg',\n",
       "               tensor([-2.0077e-03, -1.1657e-02, -3.1355e-03,  3.0838e-03, -8.2803e-03,\n",
       "                        1.3756e-03, -4.1594e-03,  6.7417e-06,  7.1883e-03,  2.6204e-04,\n",
       "                       -7.7513e-03, -9.4796e-03,  6.1506e-03, -3.8656e-03,  7.2799e-03,\n",
       "                       -7.0816e-03,  1.2812e-02,  5.0713e-03, -4.5852e-03, -2.5649e-03,\n",
       "                        1.6120e-02, -2.8788e-03, -4.6423e-04, -2.1231e-03, -1.3458e-02,\n",
       "                        5.2748e-03, -2.9668e-03, -2.8203e-03,  3.4520e-03,  1.4646e-02,\n",
       "                       -7.3945e-03,  1.3866e-02,  5.5027e-03,  5.9588e-04,  1.1192e-02,\n",
       "                        5.4269e-03,  3.9126e-03,  3.1740e-06,  1.6760e-03, -1.5433e-03,\n",
       "                       -8.6628e-03, -1.7613e-02, -2.3171e-03,  6.6848e-03,  6.3874e-03,\n",
       "                        8.3924e-03, -7.6017e-03,  1.3460e-03,  2.3919e-04, -9.5768e-03,\n",
       "                       -5.4022e-03,  2.3614e-03,  1.3547e-03,  4.3577e-03, -3.5685e-03,\n",
       "                       -2.5983e-03, -8.9086e-03,  4.3602e-03, -1.5172e-04,  2.8244e-04,\n",
       "                        1.1046e-03,  1.6779e-02, -2.2155e-03, -3.9259e-04, -3.8636e-03,\n",
       "                        3.7173e-03,  2.1907e-02,  7.9454e-03, -5.7141e-03,  6.7792e-03,\n",
       "                        2.8534e-03,  1.1347e-02, -6.7824e-03, -8.9235e-04, -9.4297e-03,\n",
       "                       -9.3196e-03, -1.6563e-02, -1.5540e-03, -5.9879e-05, -1.7781e-02,\n",
       "                       -5.2706e-05, -5.8854e-03,  3.9702e-03, -2.1713e-03, -4.3081e-03,\n",
       "                       -5.1381e-03,  4.0305e-03, -1.1979e-02, -9.1783e-03,  2.4620e-03,\n",
       "                       -1.8558e-03, -3.3162e-03,  1.0079e-03,  3.3698e-03, -5.3844e-03,\n",
       "                        1.7592e-02, -6.9350e-03,  1.5742e-03,  1.1382e-02, -2.0444e-03,\n",
       "                        2.3207e-03, -2.3264e-03,  4.1110e-03,  1.8724e-03,  3.7220e-03,\n",
       "                       -1.1297e-03,  2.5283e-02,  5.1259e-03,  6.0379e-03,  2.7443e-03,\n",
       "                        4.2806e-03, -7.1963e-03,  3.0981e-03, -2.9664e-03,  4.4440e-03,\n",
       "                        3.0856e-03,  1.2558e-02, -9.7179e-04, -7.4489e-03,  3.1985e-03,\n",
       "                       -1.5592e-03, -5.7152e-03,  1.0063e-02,  1.3086e-02,  2.6873e-03,\n",
       "                        7.1557e-03,  4.1822e-03, -2.7799e-03], device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.2.gate1.Wr.weight',\n",
       "               tensor([[ 0.0242,  0.0498,  0.1041,  ..., -0.1043, -0.0917, -0.0678],\n",
       "                       [-0.1417,  0.0643,  0.0621,  ..., -0.1098, -0.0546, -0.1258],\n",
       "                       [ 0.1351, -0.1225,  0.1233,  ...,  0.0510, -0.1683, -0.1228],\n",
       "                       ...,\n",
       "                       [ 0.0090, -0.0134, -0.1474,  ..., -0.0433, -0.0982, -0.0550],\n",
       "                       [-0.0739, -0.0273, -0.0871,  ..., -0.0256, -0.0222, -0.0266],\n",
       "                       [ 0.1227, -0.0069,  0.0590,  ..., -0.0697,  0.1189,  0.1267]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.2.gate1.Ur.weight',\n",
       "               tensor([[ 0.0991, -0.1275, -0.1295,  ...,  0.1045,  0.0651,  0.0740],\n",
       "                       [ 0.0076,  0.0044, -0.1018,  ..., -0.0493,  0.0073, -0.0109],\n",
       "                       [ 0.0325,  0.1511,  0.1368,  ..., -0.1238,  0.0044,  0.1118],\n",
       "                       ...,\n",
       "                       [ 0.0117, -0.1187, -0.0349,  ..., -0.0476, -0.0537, -0.0491],\n",
       "                       [ 0.1534, -0.1256,  0.0604,  ..., -0.1368, -0.1474,  0.0223],\n",
       "                       [-0.1188, -0.0476, -0.1092,  ...,  0.0410, -0.1554,  0.0255]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.2.gate1.Wz.weight',\n",
       "               tensor([[-0.0919,  0.0884, -0.0637,  ...,  0.0447, -0.1065, -0.1464],\n",
       "                       [ 0.0263, -0.0346, -0.0016,  ...,  0.1371, -0.0115,  0.0551],\n",
       "                       [ 0.1135, -0.0438,  0.1125,  ..., -0.0118,  0.1541,  0.1179],\n",
       "                       ...,\n",
       "                       [-0.1516, -0.0732,  0.1147,  ..., -0.0693, -0.0187,  0.0468],\n",
       "                       [ 0.0678,  0.1137,  0.0445,  ...,  0.0044,  0.0900, -0.0802],\n",
       "                       [-0.1007,  0.0004,  0.0549,  ...,  0.1170,  0.0301,  0.1216]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.2.gate1.Uz.weight',\n",
       "               tensor([[-0.0043,  0.0657,  0.0419,  ...,  0.0335,  0.1444, -0.1349],\n",
       "                       [ 0.0944, -0.0451, -0.0248,  ..., -0.0755,  0.1214, -0.0868],\n",
       "                       [ 0.0801, -0.0693, -0.0201,  ...,  0.0440, -0.0478, -0.1128],\n",
       "                       ...,\n",
       "                       [-0.0826,  0.0777,  0.0615,  ..., -0.0303, -0.0730,  0.1310],\n",
       "                       [-0.0260,  0.1007, -0.0797,  ...,  0.0944,  0.0685, -0.1342],\n",
       "                       [-0.1064, -0.0025, -0.0066,  ...,  0.1351,  0.0297,  0.0684]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.2.gate1.Wg.weight',\n",
       "               tensor([[ 0.0749, -0.1351, -0.1460,  ..., -0.1165,  0.1291,  0.0123],\n",
       "                       [ 0.0378, -0.0665,  0.1662,  ..., -0.0036, -0.0247, -0.0555],\n",
       "                       [ 0.1353,  0.0501, -0.0941,  ..., -0.0786,  0.0629, -0.0720],\n",
       "                       ...,\n",
       "                       [-0.0361,  0.0843, -0.0521,  ...,  0.1060, -0.0007,  0.0302],\n",
       "                       [-0.1325, -0.1262, -0.1132,  ..., -0.0440, -0.0210,  0.0301],\n",
       "                       [-0.0699,  0.0378, -0.0329,  ..., -0.1126,  0.0586,  0.0007]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.2.gate1.Ug.weight',\n",
       "               tensor([[-0.0569,  0.0765,  0.1279,  ...,  0.0553, -0.0541,  0.1298],\n",
       "                       [-0.1329, -0.0378,  0.1224,  ..., -0.0281,  0.0461, -0.0067],\n",
       "                       [-0.0387, -0.0360,  0.0249,  ...,  0.1266,  0.0034,  0.1422],\n",
       "                       ...,\n",
       "                       [-0.0825, -0.0656,  0.0242,  ...,  0.0441, -0.0100, -0.0692],\n",
       "                       [ 0.0289,  0.0463,  0.0930,  ..., -0.1175,  0.1267,  0.0986],\n",
       "                       [-0.0886,  0.0801,  0.1014,  ..., -0.0966, -0.1103, -0.0814]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.2.gate2.bg',\n",
       "               tensor([ 0.0108,  0.0045, -0.0026,  0.0013, -0.0122, -0.0091,  0.0050,  0.0074,\n",
       "                        0.0055, -0.0056, -0.0048,  0.0035, -0.0052, -0.0030,  0.0011,  0.0034,\n",
       "                       -0.0027, -0.0066,  0.0016,  0.0009,  0.0084, -0.0118,  0.0222,  0.0011,\n",
       "                        0.0116, -0.0056, -0.0177,  0.0055, -0.0026,  0.0058, -0.0003,  0.0193,\n",
       "                        0.0005,  0.0027,  0.0012, -0.0138,  0.0047, -0.0087, -0.0044, -0.0004,\n",
       "                        0.0060,  0.0015,  0.0066, -0.0068,  0.0044, -0.0049, -0.0055, -0.0002,\n",
       "                        0.0019,  0.0076, -0.0062, -0.0138,  0.0064,  0.0117, -0.0057,  0.0043,\n",
       "                        0.0030,  0.0111, -0.0067,  0.0017,  0.0011,  0.0024,  0.0168,  0.0021,\n",
       "                        0.0086,  0.0066, -0.0020,  0.0068, -0.0036,  0.0022, -0.0083,  0.0011,\n",
       "                       -0.0001, -0.0061,  0.0043,  0.0071, -0.0072,  0.0013,  0.0127, -0.0113,\n",
       "                        0.0038,  0.0177,  0.0043, -0.0019, -0.0123,  0.0010,  0.0014,  0.0094,\n",
       "                        0.0132,  0.0011, -0.0005,  0.0119, -0.0026, -0.0020,  0.0011, -0.0047,\n",
       "                        0.0043, -0.0048,  0.0092, -0.0042,  0.0097, -0.0073,  0.0100,  0.0006,\n",
       "                        0.0022,  0.0019,  0.0203,  0.0079, -0.0064, -0.0087, -0.0003,  0.0147,\n",
       "                        0.0140,  0.0104, -0.0020,  0.0011, -0.0046, -0.0012, -0.0140, -0.0083,\n",
       "                        0.0025,  0.0054,  0.0006, -0.0009, -0.0009, -0.0011,  0.0001, -0.0156],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.2.gate2.Wr.weight',\n",
       "               tensor([[ 6.8144e-02,  1.0194e-01, -1.1244e-01,  ...,  1.1493e-02,\n",
       "                         1.2900e-01, -5.0126e-02],\n",
       "                       [-1.1745e-01, -7.7140e-02,  2.5626e-02,  ..., -9.1500e-02,\n",
       "                         9.0239e-02, -5.1452e-02],\n",
       "                       [-4.1192e-02, -4.4796e-02, -1.4287e-01,  ..., -4.3439e-02,\n",
       "                        -6.3698e-02, -1.0609e-01],\n",
       "                       ...,\n",
       "                       [ 3.4122e-02, -1.0549e-01, -1.4510e-01,  ..., -3.8346e-02,\n",
       "                        -3.7078e-02,  1.6582e-01],\n",
       "                       [-2.4520e-02, -1.0415e-01, -1.4829e-01,  ...,  8.1661e-02,\n",
       "                        -7.0092e-02,  1.9493e-02],\n",
       "                       [-6.8151e-03,  7.5399e-02,  4.2588e-05,  ..., -1.0223e-01,\n",
       "                         1.1754e-01,  8.1377e-02]], device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.2.gate2.Ur.weight',\n",
       "               tensor([[ 0.1163, -0.0197, -0.1591,  ...,  0.0112,  0.0955,  0.0188],\n",
       "                       [ 0.0675,  0.0708,  0.0044,  ..., -0.0608,  0.1536, -0.0046],\n",
       "                       [-0.0165, -0.0407,  0.0781,  ...,  0.0652, -0.0075,  0.0790],\n",
       "                       ...,\n",
       "                       [ 0.0613, -0.0729,  0.0314,  ..., -0.0961, -0.0248,  0.0955],\n",
       "                       [-0.1487,  0.0016,  0.0198,  ...,  0.0631,  0.0770,  0.1243],\n",
       "                       [ 0.0785,  0.0863, -0.1385,  ..., -0.0951, -0.1165, -0.0708]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.2.gate2.Wz.weight',\n",
       "               tensor([[ 0.0224, -0.1561, -0.0561,  ...,  0.1323,  0.1268,  0.0783],\n",
       "                       [-0.0237,  0.1281,  0.0561,  ..., -0.0286,  0.1032,  0.0182],\n",
       "                       [ 0.0716,  0.0181,  0.0903,  ..., -0.1473, -0.1272,  0.0999],\n",
       "                       ...,\n",
       "                       [-0.0036,  0.0425,  0.0863,  ..., -0.1531, -0.0538,  0.1619],\n",
       "                       [-0.1076, -0.0914,  0.0741,  ..., -0.0868,  0.1554,  0.1059],\n",
       "                       [ 0.1231,  0.0785,  0.0793,  ..., -0.0220,  0.0630, -0.0135]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.2.gate2.Uz.weight',\n",
       "               tensor([[-0.1115, -0.1518,  0.0108,  ...,  0.0470, -0.1263,  0.0528],\n",
       "                       [ 0.1013, -0.0033, -0.0883,  ..., -0.0887,  0.1147,  0.0869],\n",
       "                       [ 0.0244,  0.0629,  0.1249,  ...,  0.0364, -0.0988, -0.0719],\n",
       "                       ...,\n",
       "                       [-0.0930, -0.0004,  0.0690,  ...,  0.0311,  0.0322, -0.0581],\n",
       "                       [-0.0054,  0.0589, -0.1276,  ...,  0.0073, -0.1314, -0.1496],\n",
       "                       [ 0.1363,  0.0481, -0.1373,  ..., -0.0631,  0.0667, -0.1237]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.2.gate2.Wg.weight',\n",
       "               tensor([[ 0.0104,  0.1084, -0.0903,  ..., -0.0105, -0.1317, -0.0654],\n",
       "                       [ 0.1120,  0.0840,  0.0394,  ..., -0.0644,  0.0922, -0.1224],\n",
       "                       [-0.1156, -0.0578,  0.1271,  ..., -0.0351,  0.1345,  0.0683],\n",
       "                       ...,\n",
       "                       [-0.0035, -0.0810,  0.0851,  ...,  0.0404, -0.0903, -0.1183],\n",
       "                       [ 0.0648, -0.0160,  0.0706,  ...,  0.1140, -0.0318, -0.0734],\n",
       "                       [ 0.0824, -0.1313, -0.0500,  ...,  0.0949, -0.0614,  0.1591]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.2.gate2.Ug.weight',\n",
       "               tensor([[-0.0315, -0.0028,  0.1302,  ..., -0.1286, -0.0982,  0.1081],\n",
       "                       [-0.0060,  0.1410, -0.1282,  ..., -0.0896, -0.1167, -0.1302],\n",
       "                       [-0.0404,  0.1214, -0.0105,  ..., -0.0110, -0.0348, -0.1316],\n",
       "                       ...,\n",
       "                       [ 0.0884, -0.1435,  0.1074,  ..., -0.0967,  0.0528, -0.0630],\n",
       "                       [ 0.0269,  0.1085, -0.0574,  ...,  0.0989, -0.0524,  0.1246],\n",
       "                       [ 0.0897, -0.0753,  0.0222,  ..., -0.0185,  0.1314, -0.0433]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.2.norm1.weight',\n",
       "               tensor([1.0086, 1.0234, 1.0231, 1.0209, 1.0422, 1.0357, 1.0172, 1.0035, 1.0360,\n",
       "                       0.9946, 1.0379, 1.0344, 1.0147, 1.0209, 1.0101, 0.9628, 0.9903, 1.0051,\n",
       "                       1.0499, 1.0177, 1.0206, 1.0402, 1.0256, 0.9924, 1.0274, 0.9881, 1.0522,\n",
       "                       1.0267, 1.0226, 1.0081, 1.0201, 1.0592, 1.0090, 0.9834, 1.0074, 1.0053,\n",
       "                       0.9985, 1.0263, 1.0195, 1.0569, 1.0042, 1.0483, 1.0104, 1.0219, 0.9911,\n",
       "                       1.0348, 1.0299, 1.0208, 1.0183, 0.9765, 1.0039, 1.0484, 1.0299, 1.0241,\n",
       "                       0.9816, 1.0255, 0.9926, 1.0352, 1.0177, 1.0329, 1.0250, 1.0196, 1.0191,\n",
       "                       0.9902, 1.0207, 0.9953, 1.0312, 1.0219, 1.0009, 1.0644, 0.9834, 0.9999,\n",
       "                       0.9932, 1.0191, 1.0279, 1.0280, 1.0444, 1.0446, 1.0000, 1.0090, 1.0005,\n",
       "                       1.0081, 1.0421, 1.0034, 1.0051, 1.0352, 1.0184, 1.0027, 0.9858, 1.0315,\n",
       "                       1.0252, 1.0041, 1.0671, 1.0344, 1.0195, 1.0361, 1.0224, 1.0174, 1.0197,\n",
       "                       1.0075, 0.9906, 1.0286, 1.0336, 1.0192, 1.0053, 1.0241, 1.0209, 1.0046,\n",
       "                       1.0000, 1.0513, 0.9994, 1.0122, 1.0345, 1.0121, 1.0556, 0.9935, 1.0395,\n",
       "                       1.0169, 1.0342, 1.0087, 1.0204, 1.0327, 0.9660, 1.0302, 1.0270, 1.0170,\n",
       "                       0.9795, 1.0451], device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.2.norm1.bias',\n",
       "               tensor([-0.0078, -0.0036, -0.0051, -0.0366,  0.0242,  0.0456, -0.0240, -0.0084,\n",
       "                        0.0188, -0.0156,  0.0677,  0.0011, -0.0433, -0.0078, -0.0254,  0.0235,\n",
       "                       -0.0340,  0.0118,  0.0096, -0.0310,  0.0215,  0.0466, -0.0109, -0.0042,\n",
       "                       -0.0217, -0.0278, -0.0459, -0.0369, -0.0022,  0.0188, -0.0338,  0.0238,\n",
       "                       -0.0026, -0.0055, -0.0011,  0.0212,  0.0032, -0.0268, -0.0251,  0.0408,\n",
       "                        0.0014, -0.0559, -0.0015,  0.0007,  0.0043,  0.0048,  0.0005,  0.0045,\n",
       "                       -0.0011,  0.0218,  0.0123,  0.0475,  0.0037, -0.0107,  0.0064,  0.0108,\n",
       "                        0.0364, -0.0199, -0.0191,  0.0269, -0.0067, -0.0302,  0.0135,  0.0098,\n",
       "                       -0.0015,  0.0289,  0.0358, -0.0077,  0.0053,  0.0313,  0.0350, -0.0204,\n",
       "                        0.0079, -0.0013, -0.0322, -0.0152, -0.0025, -0.0432, -0.0058,  0.0007,\n",
       "                       -0.0037,  0.0307,  0.0563,  0.0061, -0.0075,  0.0383, -0.0451, -0.0026,\n",
       "                        0.0137,  0.0221, -0.0083,  0.0056,  0.0243,  0.0018,  0.0139, -0.0206,\n",
       "                       -0.0024, -0.0143, -0.0233,  0.0267, -0.0184, -0.0470,  0.0312, -0.0297,\n",
       "                       -0.0142, -0.0108,  0.0132,  0.0018, -0.0220, -0.0276,  0.0061, -0.0421,\n",
       "                        0.0392, -0.0378, -0.0161,  0.0045, -0.0317, -0.0412,  0.0061,  0.0200,\n",
       "                        0.0064,  0.0084,  0.0033, -0.0154,  0.0165,  0.0158, -0.0226,  0.0198],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.2.norm2.weight',\n",
       "               tensor([1.0021, 1.0007, 0.9967, 0.9926, 0.9937, 1.0057, 1.0008, 1.0014, 0.9917,\n",
       "                       1.0025, 0.9915, 0.9923, 0.9955, 1.0013, 1.0015, 0.9955, 0.9913, 0.9965,\n",
       "                       0.9962, 0.9942, 1.0186, 0.9934, 1.0017, 0.9931, 1.0110, 0.9917, 0.9991,\n",
       "                       1.0027, 0.9871, 0.9939, 0.9932, 1.0106, 1.0096, 0.9854, 0.9962, 0.9960,\n",
       "                       0.9828, 0.9883, 0.9940, 1.0008, 0.9898, 1.0003, 0.9986, 1.0023, 0.9969,\n",
       "                       1.0004, 0.9938, 0.9913, 0.9989, 0.9933, 0.9727, 1.0008, 1.0002, 1.0082,\n",
       "                       0.9957, 1.0012, 1.0002, 1.0049, 1.0031, 0.9977, 0.9951, 1.0087, 0.9802,\n",
       "                       1.0029, 0.9967, 0.9945, 0.9898, 1.0001, 1.0034, 0.9940, 0.9976, 0.9948,\n",
       "                       0.9889, 1.0015, 1.0134, 0.9948, 0.9969, 0.9938, 0.9886, 1.0177, 1.0039,\n",
       "                       1.0015, 0.9898, 0.9926, 0.9918, 1.0065, 1.0056, 0.9981, 0.9885, 1.0064,\n",
       "                       1.0009, 0.9892, 1.0009, 0.9812, 0.9997, 0.9905, 1.0035, 1.0013, 0.9964,\n",
       "                       0.9981, 0.9954, 0.9933, 1.0011, 1.0020, 1.0013, 0.9877, 1.0156, 0.9915,\n",
       "                       0.9899, 0.9979, 0.9855, 1.0012, 0.9976, 0.9896, 0.9878, 1.0019, 0.9983,\n",
       "                       1.0015, 1.0122, 0.9962, 0.9958, 0.9927, 0.9995, 0.9963, 1.0021, 0.9957,\n",
       "                       0.9937, 1.0021], device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.2.norm2.bias',\n",
       "               tensor([-4.4472e-03, -2.8235e-03,  1.4654e-04,  1.4069e-03, -6.9724e-03,\n",
       "                        8.0097e-03,  3.3929e-03, -1.1231e-02,  5.5535e-03, -7.2218e-04,\n",
       "                       -5.3186e-03,  1.9208e-03,  6.6105e-03, -9.0510e-04,  1.8057e-03,\n",
       "                       -1.9680e-03,  3.7551e-03,  5.0928e-03, -2.0396e-03,  6.0715e-03,\n",
       "                       -5.0951e-04, -1.5503e-03, -9.9511e-04,  2.6722e-03,  2.9824e-03,\n",
       "                        6.8758e-03,  8.3329e-03, -4.5419e-03,  4.2484e-05,  1.9506e-03,\n",
       "                        3.8561e-03,  4.7603e-03, -2.1418e-03, -5.4131e-03,  4.2482e-03,\n",
       "                        7.5968e-04,  6.8408e-03,  2.8887e-03, -2.3817e-03, -2.9132e-03,\n",
       "                       -4.0455e-03, -1.0517e-04, -3.3890e-03,  3.1267e-03, -7.1450e-04,\n",
       "                       -3.4779e-04, -6.4841e-03, -4.1372e-03, -3.5971e-03,  1.0238e-03,\n",
       "                        7.1451e-03,  2.1003e-03,  6.0493e-03,  8.8320e-04, -3.0231e-03,\n",
       "                       -3.7287e-03,  6.7717e-03, -3.0929e-03,  2.1327e-03, -4.5560e-03,\n",
       "                        2.9107e-03, -6.2585e-03,  8.8022e-03, -1.4453e-03,  7.1326e-03,\n",
       "                        2.3974e-03,  1.5865e-03, -5.3400e-03,  1.2191e-03,  1.2417e-03,\n",
       "                        4.4789e-03, -3.6169e-03, -6.6765e-03, -1.0866e-03,  5.9378e-03,\n",
       "                        4.8892e-03, -1.8237e-03, -4.7082e-04,  2.7367e-03, -1.1025e-02,\n",
       "                        1.1072e-03, -3.5230e-03,  1.5286e-03,  6.8922e-03,  1.7831e-03,\n",
       "                       -8.1494e-04, -1.1023e-02, -3.1711e-03,  1.1896e-02,  7.6999e-03,\n",
       "                       -1.2029e-03, -2.0443e-03,  4.5637e-03, -4.6508e-03,  1.6343e-03,\n",
       "                       -7.3266e-03, -5.3039e-03,  5.4916e-03,  2.9288e-04, -8.2314e-04,\n",
       "                       -1.3398e-03,  1.4584e-03,  6.1141e-04, -4.3312e-03, -2.9811e-03,\n",
       "                       -7.8981e-05, -9.4451e-04,  3.0262e-03, -5.5552e-03, -1.1830e-03,\n",
       "                       -1.0098e-02,  5.7706e-03, -5.1955e-03, -9.7191e-04,  4.0887e-03,\n",
       "                       -1.0075e-03,  1.6894e-03, -3.3724e-03,  5.7428e-03,  3.0266e-03,\n",
       "                       -9.9993e-04, -4.8128e-03, -1.9630e-03, -2.0949e-03,  1.9947e-03,\n",
       "                        1.3707e-04,  2.9059e-03, -1.2093e-02], device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.2.norm_kv.weight',\n",
       "               tensor([1.0367, 1.0188, 1.0131, 0.9920, 1.0052, 1.0226, 0.9942, 0.9951, 0.9984,\n",
       "                       1.0067, 1.0079, 0.9939, 1.0047, 1.0362, 1.0031, 1.0184, 1.0414, 1.0004,\n",
       "                       0.9899, 0.9869, 1.0286, 1.0040, 0.9963, 1.0100, 1.0021, 0.9802, 1.0094,\n",
       "                       0.9903, 1.0087, 0.9812, 1.0116, 1.0048, 1.0014, 1.0072, 1.0106, 1.0030,\n",
       "                       1.0164, 1.0131, 0.9854, 1.0068, 0.9979, 0.9868, 1.0097, 1.0285, 1.0085,\n",
       "                       0.9977, 1.0221, 1.0003, 1.0108, 1.0028, 0.9888, 1.0066, 0.9958, 1.0078,\n",
       "                       0.9927, 0.9855, 0.9970, 0.9801, 0.9965, 1.0025, 0.9938, 0.9956, 1.0312,\n",
       "                       0.9975, 1.0186, 1.0477, 0.9707, 1.0070, 0.9915, 1.0406, 1.0056, 0.9866,\n",
       "                       0.9973, 1.0005, 1.0078, 1.0300, 1.0222, 1.0114, 1.0245, 1.0023, 0.9901,\n",
       "                       1.0152, 1.0059, 0.9959, 1.0181, 1.0079, 0.9968, 0.9985, 0.9997, 0.9993,\n",
       "                       0.9971, 1.0146, 1.0062, 1.0001, 0.9982, 1.0032, 1.0102, 1.0068, 0.9921,\n",
       "                       0.9843, 0.9953, 0.9927, 0.9814, 1.0129, 1.0094, 0.9900, 0.9796, 0.9936,\n",
       "                       0.9902, 1.0012, 0.9933, 1.0019, 1.0294, 0.9953, 0.9998, 0.9949, 1.0053,\n",
       "                       0.9927, 0.9986, 1.0054, 1.0054, 1.0095, 1.0039, 0.9944, 0.9930, 1.0011,\n",
       "                       1.0068, 1.0005], device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.2.norm_kv.bias',\n",
       "               tensor([-2.5468e-03, -2.4968e-03,  1.6228e-03,  6.9439e-03, -1.7116e-03,\n",
       "                        4.8649e-04,  7.2948e-03,  3.7522e-03, -9.3203e-04,  7.2196e-03,\n",
       "                       -3.0223e-04,  4.1920e-03,  5.4663e-03,  3.6419e-03,  1.0865e-02,\n",
       "                       -3.3837e-04, -3.2148e-03, -3.8847e-03,  1.4875e-03,  1.1051e-02,\n",
       "                       -9.9019e-04,  3.0715e-03,  7.6337e-03,  3.2146e-03,  8.0933e-03,\n",
       "                       -8.9721e-03,  4.1139e-03,  2.1512e-03, -1.3042e-03, -9.2422e-03,\n",
       "                       -4.1165e-03, -1.1624e-02, -6.3305e-03, -3.6393e-03, -6.9854e-03,\n",
       "                       -7.1621e-03, -8.0732e-05, -3.3390e-03,  1.2929e-02, -2.0980e-03,\n",
       "                        2.8057e-03,  9.7872e-03,  5.0378e-03, -1.1958e-02,  1.9146e-03,\n",
       "                        5.7440e-04, -3.6930e-03,  1.4691e-03, -1.2935e-03,  6.4631e-03,\n",
       "                        4.2762e-03, -2.8594e-04,  6.2735e-03,  6.5398e-03,  2.0713e-03,\n",
       "                        7.4144e-03,  5.3214e-03,  7.5821e-03,  9.2398e-04, -3.2354e-03,\n",
       "                        4.3733e-03,  9.1746e-03,  1.7921e-03,  6.0580e-03, -6.2406e-03,\n",
       "                       -3.7637e-03,  2.8843e-03,  7.7816e-04,  2.2985e-03,  3.5155e-03,\n",
       "                        1.3398e-03,  2.7568e-03,  2.6366e-03,  2.5028e-03,  4.6460e-03,\n",
       "                       -4.3886e-03,  1.0917e-02,  5.5394e-03,  1.6947e-03, -5.6973e-04,\n",
       "                        5.8197e-04, -4.6184e-03, -4.1670e-03,  2.2768e-03, -3.4633e-03,\n",
       "                        3.5549e-03,  5.2507e-03,  3.4323e-03,  4.7777e-03,  9.6443e-03,\n",
       "                        1.0085e-02,  5.3180e-03,  7.1369e-03, -2.5434e-03, -2.5848e-03,\n",
       "                        2.5999e-03,  2.5701e-04,  3.3666e-03, -2.1271e-03, -7.9870e-03,\n",
       "                       -6.7017e-03, -7.8365e-03, -9.1323e-03, -8.6666e-03, -1.6620e-03,\n",
       "                        3.3831e-03, -1.9249e-02, -2.3410e-03, -9.9326e-03, -3.1663e-03,\n",
       "                       -1.0730e-02,  4.5216e-03, -3.6350e-03, -6.2759e-03, -4.5727e-03,\n",
       "                       -2.1615e-03,  4.1230e-03,  7.0460e-05, -1.9291e-03,  7.5146e-04,\n",
       "                       -3.4654e-03, -2.6252e-03, -7.0343e-03, -5.2525e-03, -9.4401e-03,\n",
       "                       -3.9789e-03,  2.9025e-03,  1.8893e-03], device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.2.fc.0.weight',\n",
       "               tensor([[ 0.0294,  0.0573, -0.0889,  ..., -0.0791, -0.0021, -0.0407],\n",
       "                       [-0.0483, -0.0437, -0.0849,  ...,  0.0246,  0.0516,  0.0842],\n",
       "                       [-0.0646, -0.0587, -0.0228,  ...,  0.0352, -0.0202,  0.0655],\n",
       "                       ...,\n",
       "                       [ 0.0586, -0.1031, -0.0473,  ...,  0.0144,  0.0811,  0.0770],\n",
       "                       [-0.0538,  0.0869, -0.0009,  ...,  0.0111, -0.0763,  0.0057],\n",
       "                       [-0.0985,  0.0018, -0.0110,  ...,  0.0173,  0.0062,  0.0664]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.2.fc.0.bias',\n",
       "               tensor([-0.0816, -0.0518, -0.0848, -0.0743, -0.0537,  0.0359, -0.0696,  0.0410,\n",
       "                       -0.0520,  0.0464,  0.0030,  0.0925, -0.0435,  0.0578,  0.0263,  0.0209,\n",
       "                       -0.0007, -0.0445,  0.0648, -0.0272,  0.0647, -0.0362,  0.0602,  0.0511,\n",
       "                        0.0896,  0.0050,  0.0584, -0.0900,  0.0717,  0.0367,  0.0186,  0.0561,\n",
       "                       -0.0344,  0.0438, -0.0526,  0.0696, -0.0455, -0.0069,  0.0401,  0.0801,\n",
       "                       -0.0720,  0.0717,  0.0214,  0.0140,  0.0168,  0.0763,  0.0469,  0.0317,\n",
       "                        0.0161, -0.0736, -0.0031,  0.0569, -0.0777,  0.0651,  0.0830, -0.0389,\n",
       "                        0.0507, -0.0098, -0.0350,  0.0669,  0.0183,  0.0755, -0.0516, -0.0617,\n",
       "                       -0.0057, -0.0950,  0.0879,  0.0068,  0.0057, -0.0642,  0.0430,  0.0239,\n",
       "                        0.0394,  0.0784,  0.0459, -0.0030,  0.0248, -0.0833, -0.0568,  0.0862,\n",
       "                       -0.0643,  0.0190, -0.0349,  0.0231,  0.0594,  0.0039,  0.0392, -0.0537,\n",
       "                        0.0067,  0.0334, -0.0474,  0.0038,  0.0237,  0.0496, -0.0044,  0.0386,\n",
       "                       -0.0100, -0.0473,  0.0346, -0.0281,  0.0602,  0.0508, -0.0403,  0.0657,\n",
       "                        0.0008,  0.0392, -0.0356,  0.0760, -0.0687,  0.0247,  0.0628,  0.0088,\n",
       "                       -0.0737, -0.0391,  0.0144, -0.0023,  0.0582, -0.0061, -0.0404,  0.0640,\n",
       "                       -0.0596, -0.0629, -0.0393, -0.0104, -0.0578,  0.0720, -0.0565,  0.0482],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.3.attention.values.weight',\n",
       "               tensor([[-0.0424,  0.0104,  0.0321,  ..., -0.0892, -0.0085, -0.0667],\n",
       "                       [ 0.0597,  0.0547, -0.0101,  ..., -0.0471, -0.0691, -0.0707],\n",
       "                       [-0.0583,  0.0211, -0.0776,  ..., -0.0373,  0.0408, -0.0193],\n",
       "                       ...,\n",
       "                       [-0.0581, -0.0572, -0.0345,  ..., -0.0762,  0.0056, -0.0064],\n",
       "                       [ 0.0653,  0.0190,  0.0546,  ..., -0.0060, -0.0641, -0.0006],\n",
       "                       [ 0.0761,  0.0207,  0.0461,  ..., -0.0092,  0.0806, -0.0692]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.3.attention.keys.weight',\n",
       "               tensor([[-9.2845e-02, -6.8438e-02,  4.9710e-02,  ..., -2.0353e-02,\n",
       "                        -6.2348e-02,  4.7027e-02],\n",
       "                       [-6.5805e-02, -1.1683e-01, -2.9814e-02,  ...,  5.3141e-02,\n",
       "                        -2.4172e-03,  9.7224e-02],\n",
       "                       [ 9.0318e-05,  1.4867e-02, -3.8494e-02,  ...,  2.5948e-02,\n",
       "                        -3.3201e-03,  5.7673e-02],\n",
       "                       ...,\n",
       "                       [ 9.1751e-03, -2.0675e-02,  5.5556e-02,  ..., -8.4443e-02,\n",
       "                        -5.1491e-02, -9.6628e-02],\n",
       "                       [-7.3108e-02, -1.9107e-02,  4.7525e-03,  ..., -1.4956e-01,\n",
       "                         2.9869e-02,  7.1133e-02],\n",
       "                       [ 8.5485e-02,  2.6276e-02, -4.7781e-02,  ..., -2.7869e-02,\n",
       "                         2.3688e-02,  6.3244e-02]], device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.3.attention.queries.weight',\n",
       "               tensor([[ 0.0451, -0.0107,  0.0162,  ...,  0.0518, -0.0258, -0.0247],\n",
       "                       [-0.0520, -0.0775, -0.0797,  ...,  0.0257,  0.0477,  0.0749],\n",
       "                       [-0.0689, -0.1017, -0.0014,  ...,  0.0401, -0.0400,  0.0498],\n",
       "                       ...,\n",
       "                       [ 0.0448,  0.0778,  0.0221,  ...,  0.0708,  0.1013,  0.0008],\n",
       "                       [-0.0256, -0.0111,  0.0335,  ..., -0.0906, -0.0461, -0.0025],\n",
       "                       [-0.0269, -0.0665, -0.0192,  ..., -0.0110,  0.0462,  0.0161]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.3.attention.fc_out.weight',\n",
       "               tensor([[-0.0565, -0.0399, -0.0157,  ..., -0.0556,  0.0145,  0.0793],\n",
       "                       [-0.0341,  0.0291,  0.0578,  ..., -0.0849, -0.0540,  0.0079],\n",
       "                       [ 0.0617,  0.0865,  0.0836,  ..., -0.0725,  0.0469, -0.0141],\n",
       "                       ...,\n",
       "                       [ 0.0627,  0.0622, -0.0674,  ..., -0.0716, -0.0040,  0.0465],\n",
       "                       [-0.0133, -0.0404, -0.0438,  ..., -0.0670, -0.0239, -0.0290],\n",
       "                       [-0.0766,  0.0212, -0.0092,  ..., -0.0511,  0.0344,  0.0292]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.3.attention.fc_out.bias',\n",
       "               tensor([-0.0117,  0.0658,  0.0788, -0.0537,  0.0019,  0.0427, -0.0098, -0.0356,\n",
       "                       -0.0584, -0.0037, -0.0577, -0.0617,  0.0573, -0.0843,  0.0535, -0.0821,\n",
       "                        0.0309,  0.0413,  0.0479, -0.0489, -0.0623, -0.0711,  0.0251, -0.0373,\n",
       "                       -0.0883,  0.0567,  0.0636, -0.0180, -0.0824,  0.0109,  0.0615, -0.0357,\n",
       "                        0.0469, -0.0609, -0.0474,  0.0130,  0.0495, -0.0484, -0.0758, -0.0228,\n",
       "                        0.0693,  0.0669,  0.0603, -0.0688, -0.0680,  0.0283, -0.0472,  0.0811,\n",
       "                        0.0760,  0.0315, -0.0535,  0.0762,  0.0743,  0.0614,  0.0793, -0.0008,\n",
       "                        0.0412, -0.0119, -0.0367,  0.0016, -0.0430,  0.0285,  0.0815, -0.0519,\n",
       "                       -0.0092, -0.0176, -0.0113,  0.0352, -0.0862, -0.0243,  0.0751, -0.0267,\n",
       "                       -0.0808,  0.0085, -0.0529, -0.0627,  0.0615,  0.0806, -0.0904, -0.0278,\n",
       "                       -0.0235,  0.0028,  0.0591,  0.0589, -0.0745, -0.0731, -0.0465, -0.0765,\n",
       "                        0.0488, -0.0345, -0.0093, -0.0816,  0.0358, -0.0269, -0.0480, -0.0542,\n",
       "                        0.0298,  0.0723,  0.0320, -0.0623, -0.0759,  0.0641,  0.0862,  0.0087,\n",
       "                        0.0385,  0.0462,  0.0221, -0.0018,  0.0477,  0.0452,  0.0417,  0.0258,\n",
       "                        0.0262,  0.0053, -0.0496, -0.0394, -0.0326, -0.0290, -0.0437, -0.0496,\n",
       "                        0.0301, -0.0060,  0.0607, -0.0695, -0.0261,  0.0359,  0.0796,  0.0028],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.3.gate1.bg',\n",
       "               tensor([ 0.0093, -0.0203,  0.0018,  0.0009,  0.0060,  0.0004,  0.0112, -0.0054,\n",
       "                        0.0010, -0.0092,  0.0116, -0.0163, -0.0048, -0.0051,  0.0051, -0.0002,\n",
       "                        0.0015,  0.0006,  0.0004,  0.0021,  0.0032,  0.0172, -0.0035, -0.0148,\n",
       "                        0.0133,  0.0055,  0.0041, -0.0003,  0.0140,  0.0034, -0.0054,  0.0111,\n",
       "                        0.0081,  0.0110, -0.0034, -0.0026, -0.0065,  0.0060,  0.0127, -0.0004,\n",
       "                       -0.0003, -0.0098,  0.0097,  0.0021,  0.0026, -0.0036,  0.0164,  0.0174,\n",
       "                        0.0006, -0.0023,  0.0014,  0.0033, -0.0060,  0.0093,  0.0027, -0.0014,\n",
       "                        0.0059,  0.0014,  0.0004,  0.0033,  0.0032, -0.0013,  0.0054, -0.0028,\n",
       "                        0.0099,  0.0087,  0.0109, -0.0084,  0.0118, -0.0026,  0.0053,  0.0070,\n",
       "                       -0.0036, -0.0008, -0.0017,  0.0057, -0.0164,  0.0031, -0.0036,  0.0028,\n",
       "                        0.0255,  0.0027, -0.0010,  0.0222,  0.0020,  0.0084,  0.0054,  0.0015,\n",
       "                       -0.0009, -0.0049, -0.0036,  0.0005,  0.0082,  0.0019, -0.0026, -0.0089,\n",
       "                        0.0081, -0.0154,  0.0040,  0.0044, -0.0039,  0.0069,  0.0029,  0.0009,\n",
       "                       -0.0071,  0.0147,  0.0242, -0.0080,  0.0069,  0.0066, -0.0051,  0.0057,\n",
       "                       -0.0064,  0.0010,  0.0033,  0.0018,  0.0028,  0.0033, -0.0100, -0.0070,\n",
       "                       -0.0029, -0.0098, -0.0156, -0.0114, -0.0009,  0.0027, -0.0007,  0.0180],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.3.gate1.Wr.weight',\n",
       "               tensor([[ 0.0037,  0.1063,  0.0887,  ...,  0.0182, -0.1341,  0.0713],\n",
       "                       [ 0.0692, -0.0268,  0.0417,  ..., -0.0556, -0.0098, -0.0899],\n",
       "                       [ 0.0158,  0.0315,  0.0285,  ..., -0.1508,  0.1071,  0.0438],\n",
       "                       ...,\n",
       "                       [ 0.1031,  0.0935, -0.0151,  ...,  0.0722, -0.0961, -0.1350],\n",
       "                       [ 0.1398,  0.0684,  0.1272,  ..., -0.0916, -0.0974,  0.1352],\n",
       "                       [-0.0624,  0.1567, -0.0098,  ...,  0.0197, -0.0751,  0.0927]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.3.gate1.Ur.weight',\n",
       "               tensor([[ 0.0708,  0.1236,  0.1121,  ..., -0.0401,  0.0487,  0.0401],\n",
       "                       [-0.0410, -0.0219, -0.1231,  ..., -0.1425, -0.0374, -0.0011],\n",
       "                       [ 0.0214, -0.1210, -0.0982,  ...,  0.0218,  0.0910,  0.0254],\n",
       "                       ...,\n",
       "                       [-0.0958,  0.0246,  0.0838,  ..., -0.1246,  0.1209, -0.0542],\n",
       "                       [ 0.1215,  0.1482, -0.1092,  ...,  0.0980,  0.0457, -0.0944],\n",
       "                       [-0.0102, -0.1661,  0.0049,  ...,  0.1462,  0.1273,  0.1212]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.3.gate1.Wz.weight',\n",
       "               tensor([[-0.1033,  0.0065,  0.0843,  ..., -0.0726,  0.1018,  0.0410],\n",
       "                       [-0.0849, -0.1566,  0.1396,  ..., -0.1105,  0.1471, -0.1412],\n",
       "                       [ 0.0161,  0.0603,  0.0527,  ..., -0.1111, -0.0682, -0.0761],\n",
       "                       ...,\n",
       "                       [ 0.0650, -0.1025, -0.0660,  ...,  0.0426, -0.0454,  0.1273],\n",
       "                       [-0.0543,  0.1138,  0.0223,  ...,  0.1153,  0.1033,  0.0169],\n",
       "                       [-0.0269,  0.0783,  0.0279,  ..., -0.0026,  0.1310,  0.1393]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.3.gate1.Uz.weight',\n",
       "               tensor([[-0.1248,  0.0871,  0.0771,  ...,  0.0637,  0.0508, -0.0116],\n",
       "                       [ 0.0339, -0.0759, -0.0279,  ..., -0.0834,  0.0229, -0.0441],\n",
       "                       [-0.0291, -0.0798, -0.0427,  ..., -0.0335,  0.0474,  0.0020],\n",
       "                       ...,\n",
       "                       [ 0.1010,  0.0145,  0.0267,  ..., -0.0130, -0.1201, -0.0685],\n",
       "                       [ 0.0872, -0.0295,  0.0303,  ...,  0.1042,  0.0457, -0.0053],\n",
       "                       [ 0.0479,  0.1500, -0.0512,  ...,  0.1239, -0.0391,  0.0677]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.3.gate1.Wg.weight',\n",
       "               tensor([[-0.0569,  0.1013, -0.1051,  ..., -0.0376, -0.0541,  0.1535],\n",
       "                       [-0.0167, -0.0446,  0.0937,  ..., -0.0869,  0.0433,  0.0475],\n",
       "                       [-0.0797, -0.0268, -0.1637,  ...,  0.0481,  0.0033,  0.0462],\n",
       "                       ...,\n",
       "                       [-0.1020,  0.0338,  0.0600,  ...,  0.0877,  0.0670, -0.0662],\n",
       "                       [-0.0681, -0.0069, -0.1348,  ..., -0.0238,  0.0794, -0.0789],\n",
       "                       [ 0.0468,  0.0858, -0.0555,  ..., -0.1168,  0.0632, -0.1176]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.3.gate1.Ug.weight',\n",
       "               tensor([[ 0.0650, -0.0644, -0.0388,  ...,  0.0723,  0.0231,  0.0518],\n",
       "                       [ 0.0329,  0.0609,  0.1114,  ...,  0.1225,  0.0438,  0.0733],\n",
       "                       [-0.1079, -0.1283, -0.0722,  ..., -0.0907, -0.1162,  0.1221],\n",
       "                       ...,\n",
       "                       [-0.0275,  0.0673, -0.1440,  ...,  0.0022,  0.0666, -0.1178],\n",
       "                       [-0.0747, -0.0955, -0.0365,  ..., -0.0755,  0.0841, -0.1014],\n",
       "                       [-0.1299, -0.0228,  0.0310,  ...,  0.0862, -0.0743,  0.0088]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.3.gate2.bg',\n",
       "               tensor([ 6.1189e-03, -5.0517e-03, -6.5784e-03, -5.1492e-03,  5.0265e-03,\n",
       "                        2.7579e-04,  5.8235e-04,  3.8735e-03, -2.4501e-03,  2.5541e-03,\n",
       "                        5.0843e-04, -9.4377e-05,  9.3451e-03,  9.9430e-03,  1.4541e-02,\n",
       "                        7.2444e-03, -3.0961e-03, -7.2668e-04, -7.5169e-04,  1.0481e-03,\n",
       "                        1.2072e-02,  1.1645e-02,  4.1368e-03, -4.8416e-03,  1.0765e-02,\n",
       "                        1.4371e-03, -1.1814e-02, -3.0600e-03,  1.4696e-02,  7.3313e-03,\n",
       "                       -5.3280e-03,  4.7803e-03, -2.8415e-03, -7.6469e-03, -1.2171e-02,\n",
       "                       -4.8461e-03,  1.0280e-02, -4.9337e-03,  2.3049e-03, -9.2241e-03,\n",
       "                       -1.6683e-03,  3.8704e-03,  8.2665e-03,  2.5153e-03,  1.5121e-02,\n",
       "                        1.2326e-02, -3.1225e-03,  2.2852e-03, -4.6901e-03,  1.1366e-02,\n",
       "                        3.7746e-03,  1.6212e-02,  1.1150e-02,  4.1027e-04,  4.6414e-03,\n",
       "                       -9.6536e-03,  2.6750e-03,  6.0211e-03,  3.7846e-03, -3.7787e-03,\n",
       "                       -3.8652e-03, -1.2596e-02, -6.8237e-03,  7.3363e-03,  1.2154e-02,\n",
       "                       -2.2317e-03,  1.5137e-02, -6.1543e-03, -3.4363e-03,  4.4242e-03,\n",
       "                        6.6917e-03, -1.7973e-03,  3.6398e-03,  7.1512e-03,  4.7340e-03,\n",
       "                        1.4427e-03, -4.7760e-04,  8.2638e-03, -6.9629e-03, -4.2576e-04,\n",
       "                        6.8936e-03,  5.3569e-03,  1.0722e-02,  2.5180e-03, -5.9483e-03,\n",
       "                        8.3971e-03,  2.2236e-03,  1.0429e-02,  6.6905e-03, -2.9873e-03,\n",
       "                       -6.9174e-03,  2.7248e-03,  1.7432e-02, -6.2773e-03,  1.1628e-02,\n",
       "                       -4.9168e-03,  5.2343e-04,  5.4650e-03, -6.7748e-03,  7.8766e-03,\n",
       "                       -1.0033e-03,  1.1298e-03, -2.7228e-03,  6.2435e-04, -6.3724e-03,\n",
       "                       -7.5953e-03,  2.3800e-02,  5.2114e-03, -1.0882e-03, -2.1008e-04,\n",
       "                       -1.8621e-03,  1.1767e-04, -6.3449e-03,  7.9811e-04,  5.0162e-03,\n",
       "                       -2.1806e-03,  4.8015e-03,  3.4319e-03,  7.5062e-04, -6.4701e-03,\n",
       "                       -5.0889e-03, -1.1118e-02,  6.1927e-03,  7.1496e-03, -5.8197e-03,\n",
       "                        4.7503e-03,  8.7994e-03,  4.8653e-03], device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.3.gate2.Wr.weight',\n",
       "               tensor([[ 0.0535, -0.0237, -0.0995,  ..., -0.0300,  0.0418, -0.1192],\n",
       "                       [-0.0946,  0.1478, -0.0080,  ...,  0.0002, -0.1140, -0.0873],\n",
       "                       [ 0.1124,  0.0599,  0.1492,  ...,  0.0388, -0.0909, -0.1275],\n",
       "                       ...,\n",
       "                       [ 0.1548,  0.1204,  0.0186,  ..., -0.1484,  0.1296,  0.0679],\n",
       "                       [-0.0439, -0.0258, -0.0201,  ...,  0.0990, -0.0875, -0.0396],\n",
       "                       [-0.1076, -0.1110,  0.0642,  ..., -0.1322,  0.1189,  0.0618]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.3.gate2.Ur.weight',\n",
       "               tensor([[ 0.0180, -0.0566, -0.0052,  ..., -0.0624, -0.0535, -0.0921],\n",
       "                       [-0.0068, -0.0357, -0.1118,  ..., -0.0694, -0.0591, -0.0309],\n",
       "                       [-0.0622,  0.0977, -0.0976,  ...,  0.0760, -0.1288, -0.0327],\n",
       "                       ...,\n",
       "                       [ 0.1323,  0.0796,  0.0665,  ..., -0.0297,  0.0158, -0.1234],\n",
       "                       [-0.0963, -0.0761, -0.0123,  ...,  0.0419,  0.1486,  0.1340],\n",
       "                       [ 0.0785, -0.0838, -0.0041,  ...,  0.0479, -0.1321, -0.1556]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.3.gate2.Wz.weight',\n",
       "               tensor([[-0.0675,  0.0331,  0.1135,  ...,  0.0239,  0.0047, -0.0810],\n",
       "                       [ 0.1465,  0.0044, -0.0275,  ..., -0.1103, -0.0690, -0.0855],\n",
       "                       [ 0.1245,  0.1064,  0.0293,  ...,  0.1399, -0.0696,  0.1146],\n",
       "                       ...,\n",
       "                       [-0.1386,  0.0943,  0.1230,  ..., -0.0326, -0.1119, -0.1404],\n",
       "                       [-0.0851, -0.1002,  0.0753,  ...,  0.0975,  0.0656, -0.1680],\n",
       "                       [ 0.0836,  0.0853, -0.1462,  ...,  0.0152, -0.0091, -0.1398]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.3.gate2.Uz.weight',\n",
       "               tensor([[ 0.0552, -0.0172,  0.1375,  ...,  0.1109,  0.0830,  0.1311],\n",
       "                       [-0.1315,  0.1508, -0.0420,  ...,  0.0727, -0.0109,  0.1214],\n",
       "                       [-0.0403,  0.1211, -0.0784,  ..., -0.0506, -0.0174, -0.0027],\n",
       "                       ...,\n",
       "                       [-0.1196,  0.0085,  0.1184,  ...,  0.1196, -0.1076,  0.1128],\n",
       "                       [-0.1307,  0.1543,  0.0279,  ..., -0.1562,  0.0515,  0.0871],\n",
       "                       [ 0.0750, -0.0132,  0.1091,  ..., -0.1219, -0.0133, -0.1383]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.3.gate2.Wg.weight',\n",
       "               tensor([[-0.1284,  0.1244, -0.0582,  ...,  0.0827,  0.1097,  0.0302],\n",
       "                       [-0.1102,  0.1342,  0.0982,  ...,  0.0826,  0.0136,  0.1297],\n",
       "                       [ 0.1054,  0.0046, -0.0842,  ..., -0.0843, -0.0592,  0.0009],\n",
       "                       ...,\n",
       "                       [-0.0771, -0.0833,  0.1169,  ..., -0.0060, -0.0827,  0.0364],\n",
       "                       [-0.0046,  0.1082,  0.0211,  ...,  0.0857,  0.0098,  0.1424],\n",
       "                       [ 0.1097, -0.0732,  0.0409,  ..., -0.0664,  0.0398, -0.0222]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.3.gate2.Ug.weight',\n",
       "               tensor([[ 0.0106, -0.0791,  0.0790,  ..., -0.1041,  0.0225,  0.1165],\n",
       "                       [-0.0139, -0.1469, -0.0432,  ...,  0.0580, -0.1027,  0.0007],\n",
       "                       [ 0.0233,  0.0134, -0.0812,  ..., -0.1009,  0.0486, -0.0321],\n",
       "                       ...,\n",
       "                       [ 0.0638,  0.1653, -0.0023,  ...,  0.0498, -0.1354,  0.1024],\n",
       "                       [ 0.0233, -0.1025,  0.1390,  ...,  0.1012,  0.0887,  0.0633],\n",
       "                       [ 0.1421, -0.1277,  0.1599,  ...,  0.0949, -0.1155, -0.1059]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.3.norm1.weight',\n",
       "               tensor([1.0033, 0.9944, 0.9808, 0.9945, 0.9964, 1.0151, 1.0216, 0.9893, 1.0184,\n",
       "                       0.9972, 1.0294, 1.0178, 1.0174, 1.0094, 0.9697, 1.0185, 1.0163, 1.0198,\n",
       "                       1.0070, 1.0058, 1.0261, 1.0190, 1.0236, 1.0206, 1.0021, 0.9982, 1.0111,\n",
       "                       1.0060, 0.9988, 1.0131, 1.0096, 1.0243, 1.0225, 0.9937, 1.0286, 1.0238,\n",
       "                       0.9816, 1.0091, 1.0218, 1.0049, 1.0075, 1.0054, 1.0316, 1.0289, 1.0156,\n",
       "                       0.9996, 1.0025, 0.9943, 1.0235, 1.0176, 1.0151, 0.9920, 1.0030, 1.0195,\n",
       "                       1.0302, 1.0107, 1.0060, 1.0082, 0.9839, 0.9995, 1.0224, 1.0201, 1.0095,\n",
       "                       1.0138, 0.9895, 1.0134, 1.0160, 1.0014, 0.9623, 1.0003, 1.0079, 1.0414,\n",
       "                       1.0297, 1.0220, 1.0177, 0.9885, 1.0173, 0.9971, 1.0104, 1.0086, 1.0057,\n",
       "                       1.0110, 1.0208, 1.0003, 1.0250, 0.9983, 0.9954, 1.0171, 1.0267, 1.0186,\n",
       "                       0.9943, 0.9834, 1.0249, 1.0009, 1.0149, 1.0079, 0.9925, 0.9954, 1.0257,\n",
       "                       0.9938, 1.0106, 0.9923, 0.9839, 0.9983, 0.9857, 1.0035, 1.0280, 0.9709,\n",
       "                       1.0156, 1.0013, 1.0020, 1.0412, 1.0504, 0.9972, 1.0032, 1.0087, 1.0026,\n",
       "                       1.0103, 0.9745, 1.0331, 1.0095, 1.0025, 1.0184, 1.0182, 1.0204, 0.9911,\n",
       "                       1.0041, 0.9864], device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.3.norm1.bias',\n",
       "               tensor([-0.0110,  0.0126,  0.0199,  0.0205, -0.0029,  0.0015,  0.0205, -0.0197,\n",
       "                       -0.0142,  0.0205, -0.0226,  0.0117,  0.0015, -0.0078,  0.0066,  0.0160,\n",
       "                       -0.0071, -0.0009, -0.0123, -0.0289, -0.0242,  0.0033, -0.0121, -0.0193,\n",
       "                        0.0083,  0.0182,  0.0013, -0.0162, -0.0017, -0.0196, -0.0171,  0.0217,\n",
       "                       -0.0238, -0.0229, -0.0086, -0.0293, -0.0025,  0.0080, -0.0115,  0.0131,\n",
       "                        0.0093,  0.0028,  0.0383,  0.0284,  0.0217,  0.0177, -0.0062,  0.0040,\n",
       "                        0.0283, -0.0044, -0.0019,  0.0038,  0.0161, -0.0110, -0.0395, -0.0295,\n",
       "                        0.0069, -0.0088, -0.0508, -0.0048, -0.0153,  0.0408,  0.0280, -0.0064,\n",
       "                        0.0033,  0.0212, -0.0186,  0.0079, -0.0417,  0.0031,  0.0166,  0.0059,\n",
       "                        0.0045,  0.0254,  0.0274,  0.0170,  0.0240,  0.0361,  0.0116,  0.0166,\n",
       "                        0.0076, -0.0172,  0.0136, -0.0092, -0.0178, -0.0118, -0.0035, -0.0176,\n",
       "                        0.0175, -0.0058, -0.0081,  0.0127,  0.0190,  0.0076,  0.0131,  0.0146,\n",
       "                       -0.0108, -0.0175,  0.0274,  0.0120,  0.0337,  0.0122, -0.0068, -0.0025,\n",
       "                        0.0097, -0.0132,  0.0386,  0.0059,  0.0012, -0.0159, -0.0008, -0.0118,\n",
       "                        0.0197, -0.0149, -0.0086, -0.0294, -0.0089,  0.0176, -0.0233,  0.0254,\n",
       "                       -0.0114, -0.0136,  0.0069, -0.0283,  0.0060, -0.0014, -0.0358, -0.0087],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.3.norm2.weight',\n",
       "               tensor([0.9947, 1.0004, 0.9938, 1.0002, 0.9992, 0.9921, 0.9921, 1.0057, 0.9903,\n",
       "                       0.9957, 0.9870, 0.9966, 1.0029, 0.9980, 0.9933, 1.0031, 0.9971, 0.9933,\n",
       "                       0.9889, 0.9867, 0.9980, 1.0003, 0.9943, 0.9961, 0.9995, 1.0202, 0.9935,\n",
       "                       1.0005, 0.9949, 1.0031, 1.0000, 0.9974, 1.0025, 0.9842, 0.9975, 1.0043,\n",
       "                       0.9906, 0.9900, 1.0064, 1.0003, 0.9948, 0.9934, 0.9960, 0.9900, 0.9991,\n",
       "                       0.9885, 1.0055, 0.9953, 0.9848, 0.9900, 1.0005, 1.0002, 0.9887, 1.0031,\n",
       "                       1.0006, 0.9904, 0.9973, 0.9908, 0.9933, 0.9907, 1.0106, 0.9948, 0.9977,\n",
       "                       0.9832, 1.0057, 0.9884, 1.0019, 0.9951, 0.9879, 1.0048, 1.0016, 1.0017,\n",
       "                       1.0017, 0.9893, 0.9949, 0.9939, 1.0037, 0.9940, 0.9894, 0.9972, 0.9920,\n",
       "                       0.9948, 1.0013, 1.0026, 1.0000, 0.9979, 0.9937, 0.9976, 0.9951, 0.9988,\n",
       "                       0.9914, 0.9946, 0.9952, 0.9957, 0.9984, 0.9894, 0.9854, 1.0043, 0.9896,\n",
       "                       1.0052, 0.9911, 1.0036, 0.9828, 1.0016, 0.9996, 1.0000, 1.0094, 0.9896,\n",
       "                       0.9863, 0.9938, 0.9802, 0.9975, 0.9922, 0.9948, 0.9932, 0.9940, 0.9987,\n",
       "                       0.9845, 1.0101, 1.0118, 1.0005, 0.9959, 1.0061, 0.9960, 0.9990, 0.9908,\n",
       "                       0.9905, 0.9907], device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.3.norm2.bias',\n",
       "               tensor([-1.7790e-03,  5.9887e-03, -3.7868e-03, -3.5263e-03, -2.1438e-03,\n",
       "                        1.1694e-03,  1.7735e-03, -1.6655e-03, -3.7628e-03, -8.1688e-03,\n",
       "                       -3.7308e-03, -5.8844e-03, -4.6393e-03, -2.7047e-04,  7.6169e-03,\n",
       "                        2.7202e-03,  3.6855e-03, -3.9095e-03, -9.2427e-04, -3.3926e-03,\n",
       "                        1.1420e-03, -3.5965e-03,  1.4564e-03,  3.0932e-04, -8.9132e-03,\n",
       "                       -7.5266e-03,  1.3522e-03, -3.6258e-03, -2.7788e-03, -6.6674e-04,\n",
       "                       -5.1106e-03, -1.3338e-03,  3.5172e-03, -5.1185e-03, -2.4258e-03,\n",
       "                       -3.2186e-03,  3.8475e-03, -4.7723e-03, -5.9346e-03, -6.8136e-03,\n",
       "                        5.0124e-04,  3.8922e-04,  4.5580e-03,  1.4172e-03, -3.7302e-03,\n",
       "                        2.6157e-03, -9.7353e-03, -9.3375e-03,  8.1538e-04,  1.1256e-03,\n",
       "                       -3.1190e-03,  2.1061e-04, -2.5455e-04, -3.6864e-03, -4.7426e-03,\n",
       "                        5.1704e-04,  5.5040e-03, -6.9282e-03, -2.2505e-03, -2.7902e-03,\n",
       "                       -2.8101e-03, -1.4329e-03,  4.2811e-04,  1.3042e-03,  8.1136e-04,\n",
       "                        2.0323e-03,  6.7001e-03, -2.2594e-06,  3.0213e-03,  1.6940e-03,\n",
       "                        1.8062e-03, -7.4335e-03,  9.0100e-04, -4.1727e-03, -6.0354e-03,\n",
       "                       -3.3870e-03, -1.3554e-04,  2.7972e-03,  4.7114e-03,  5.6503e-04,\n",
       "                       -9.5495e-03,  2.9795e-04, -6.5445e-03,  4.7028e-03,  2.8155e-03,\n",
       "                       -1.2445e-03, -4.3513e-03, -3.1267e-03,  1.4795e-03, -8.8598e-03,\n",
       "                        4.0363e-03, -5.1356e-03, -1.2570e-02,  3.7572e-04,  2.1883e-03,\n",
       "                       -7.4697e-04, -2.4689e-03, -7.1223e-03, -4.3985e-03,  4.3451e-03,\n",
       "                       -5.5670e-04, -1.3389e-03,  2.9691e-03, -1.6310e-04,  5.2859e-03,\n",
       "                       -1.8102e-03, -5.9850e-03, -1.1897e-02,  7.4381e-03,  1.5827e-03,\n",
       "                        5.5678e-03, -2.2124e-03, -5.1337e-03, -2.6615e-04, -4.6131e-04,\n",
       "                       -5.2172e-03, -1.4477e-03, -5.0169e-03, -7.7006e-03, -4.0834e-03,\n",
       "                       -3.7937e-04, -1.1166e-03,  7.5969e-03,  3.5948e-03, -2.5445e-03,\n",
       "                       -2.9261e-03,  7.0807e-03,  8.1854e-03], device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.3.norm_kv.weight',\n",
       "               tensor([1.0109, 1.0230, 1.0125, 1.0032, 1.0329, 1.0547, 1.0083, 0.9845, 0.9916,\n",
       "                       1.0350, 1.0194, 0.9915, 0.9988, 1.0209, 0.9987, 1.0266, 1.0056, 1.0167,\n",
       "                       1.0029, 0.9905, 1.0118, 0.9901, 1.0016, 0.9921, 0.9882, 0.9903, 1.0000,\n",
       "                       1.0111, 1.0012, 0.9910, 1.0493, 1.0049, 1.0087, 1.0238, 0.9968, 1.0050,\n",
       "                       0.9967, 0.9990, 0.9977, 0.9977, 0.9851, 1.0052, 1.0128, 0.9993, 0.9918,\n",
       "                       1.0061, 0.9964, 0.9856, 0.9873, 0.9891, 0.9986, 0.9967, 0.9832, 1.0081,\n",
       "                       0.9887, 0.9916, 0.9960, 0.9964, 0.9887, 0.9879, 1.0147, 0.9957, 0.9982,\n",
       "                       0.9882, 1.0333, 1.0331, 1.0183, 1.0076, 0.9946, 1.0021, 1.0104, 1.0028,\n",
       "                       1.0084, 1.0060, 1.0018, 1.0059, 0.9945, 1.0056, 1.0069, 0.9980, 1.0046,\n",
       "                       1.0013, 0.9984, 1.0076, 0.9957, 0.9874, 0.9882, 0.9928, 0.9935, 0.9977,\n",
       "                       0.9939, 1.0003, 1.0062, 1.0131, 0.9911, 0.9741, 1.0077, 0.9964, 0.9841,\n",
       "                       0.9906, 0.9965, 1.0017, 0.9956, 1.0015, 0.9980, 0.9977, 0.9930, 0.9907,\n",
       "                       0.9968, 0.9896, 0.9912, 0.9984, 1.0074, 1.0011, 0.9922, 1.0038, 0.9953,\n",
       "                       1.0016, 0.9891, 0.9965, 1.0065, 0.9966, 0.9935, 0.9969, 0.9901, 0.9893,\n",
       "                       0.9952, 1.0015], device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.3.norm_kv.bias',\n",
       "               tensor([ 5.2234e-03, -9.7597e-04, -1.8752e-03,  9.1566e-03,  4.5755e-03,\n",
       "                        5.0060e-03, -8.4246e-03,  4.4859e-03,  3.0321e-03,  4.4554e-03,\n",
       "                        3.4934e-04,  3.2487e-03,  4.1481e-03,  7.0689e-03, -3.5468e-03,\n",
       "                       -7.4929e-04,  2.0056e-03, -3.1662e-03,  3.2790e-03,  6.1699e-03,\n",
       "                        2.2586e-03,  5.5905e-03,  7.9434e-03, -1.2789e-03, -7.8591e-03,\n",
       "                       -2.6692e-03, -4.9588e-03, -3.0999e-03, -7.2519e-03, -8.2456e-03,\n",
       "                        2.5712e-03, -1.5153e-02, -1.9441e-03, -2.0622e-03, -9.6167e-04,\n",
       "                       -6.6928e-03, -4.9547e-03,  2.0749e-03,  1.0782e-02, -1.3422e-03,\n",
       "                        4.0614e-03,  7.3910e-03,  5.3303e-03, -1.4838e-02,  1.2308e-02,\n",
       "                       -4.4506e-03,  4.8266e-03,  3.9346e-03,  1.3810e-02,  1.1562e-02,\n",
       "                        7.8505e-04, -2.0445e-03,  1.2521e-02,  1.4191e-03,  7.8331e-03,\n",
       "                        8.4386e-03,  3.2960e-03, -8.4705e-04,  6.4729e-03,  6.9330e-03,\n",
       "                       -7.8570e-03,  9.4706e-03,  8.6466e-05,  1.1561e-03, -3.0726e-03,\n",
       "                       -5.5080e-03, -1.7381e-04,  1.9193e-04,  7.0354e-03,  1.3950e-04,\n",
       "                       -5.1665e-03,  3.2025e-03,  5.7602e-03, -1.1394e-03, -2.4328e-03,\n",
       "                        2.7358e-03, -7.8132e-05, -6.5328e-03,  3.6804e-03,  6.0372e-03,\n",
       "                        3.8307e-04,  5.8154e-03, -1.4134e-03, -5.4640e-03,  3.1602e-03,\n",
       "                        5.3442e-03,  1.3974e-02,  6.5857e-03,  1.5385e-03,  2.4183e-03,\n",
       "                        1.1426e-02,  2.8698e-03,  3.5919e-03,  5.1858e-03, -2.7244e-03,\n",
       "                       -9.1467e-03,  6.7674e-04,  4.6455e-03, -1.1959e-02, -6.1646e-03,\n",
       "                       -2.3841e-03,  1.8123e-03, -6.6395e-03, -1.6875e-03, -5.0892e-04,\n",
       "                       -4.3204e-03, -1.7160e-02, -3.6985e-03, -6.5854e-03, -7.2990e-03,\n",
       "                       -8.1374e-03, -1.4965e-03, -6.5766e-03,  7.6124e-04, -2.1173e-05,\n",
       "                        6.0420e-04, -3.8396e-03,  1.9250e-03, -7.9739e-03, -1.4216e-03,\n",
       "                       -6.7756e-03, -1.7658e-03, -6.7060e-03, -5.5841e-03, -1.3044e-02,\n",
       "                       -6.8726e-03,  6.7213e-04,  1.0470e-03], device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.3.fc.0.weight',\n",
       "               tensor([[ 0.0643, -0.0396,  0.0616,  ...,  0.0390,  0.0050, -0.0118],\n",
       "                       [ 0.0872,  0.0904,  0.0663,  ..., -0.0628, -0.0860,  0.0213],\n",
       "                       [-0.0800, -0.0752,  0.0346,  ...,  0.0296,  0.0194,  0.0296],\n",
       "                       ...,\n",
       "                       [-0.0834, -0.0650, -0.0053,  ..., -0.0588, -0.0406, -0.0519],\n",
       "                       [-0.0223,  0.0212,  0.0072,  ...,  0.0399, -0.0638, -0.0417],\n",
       "                       [ 0.0504, -0.0700, -0.0092,  ...,  0.0476,  0.0402, -0.0252]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.3.fc.0.bias',\n",
       "               tensor([ 6.5188e-02,  1.5871e-02,  7.6472e-03,  5.4902e-02,  3.6113e-02,\n",
       "                        1.1954e-02,  8.4413e-02, -2.1411e-02, -1.9737e-02, -3.3957e-02,\n",
       "                       -8.6058e-02,  1.0112e-02,  7.0491e-02, -7.6851e-02,  3.5845e-02,\n",
       "                        6.4516e-02,  8.3591e-02,  4.0154e-02, -5.3215e-02, -4.2141e-02,\n",
       "                       -8.9088e-02,  1.7900e-02,  7.4483e-02, -2.9562e-02,  4.7122e-02,\n",
       "                        5.8694e-02,  5.4646e-02, -1.8836e-02, -6.5518e-03, -4.7962e-03,\n",
       "                        1.8343e-02,  3.8256e-02, -7.5333e-02,  4.0278e-02, -2.0997e-02,\n",
       "                        3.3211e-02,  4.3407e-02, -7.2489e-02,  6.3695e-02,  5.0098e-03,\n",
       "                        7.3053e-02, -1.0495e-02, -3.0034e-02,  5.4600e-02,  2.8313e-02,\n",
       "                       -1.8884e-02,  5.4533e-02, -2.6134e-02, -7.7094e-02, -5.0656e-02,\n",
       "                       -5.1637e-02, -5.4082e-02, -5.3955e-02, -9.2466e-02, -2.4874e-02,\n",
       "                       -4.3344e-03, -5.6289e-02, -3.5179e-02, -2.4736e-02,  1.6661e-02,\n",
       "                        1.5967e-02,  4.1141e-05, -1.0658e-02, -9.0823e-03,  7.0521e-02,\n",
       "                       -6.4222e-04,  3.8032e-02,  5.2678e-03, -5.9268e-02, -1.7436e-02,\n",
       "                       -2.1937e-02, -5.2460e-02,  5.2449e-02,  7.2945e-02, -3.8135e-02,\n",
       "                        4.1260e-02, -3.1030e-03,  9.4464e-02,  5.7731e-03,  1.0935e-02,\n",
       "                        2.8661e-02, -3.7113e-02,  3.9174e-02, -4.6459e-03,  7.9822e-02,\n",
       "                        4.7075e-02,  8.5152e-02, -2.9527e-02, -4.3423e-02,  6.7214e-02,\n",
       "                       -2.1749e-02, -7.3253e-02, -2.9884e-02,  1.4748e-02, -5.4236e-02,\n",
       "                       -5.8239e-02, -2.4188e-02, -5.0222e-02,  5.6625e-02,  5.1613e-02,\n",
       "                        3.9418e-02, -3.9777e-02, -7.9393e-02,  1.0023e-02, -9.1310e-03,\n",
       "                       -7.7236e-03,  1.1531e-02, -8.7558e-02, -8.7712e-05,  6.0569e-02,\n",
       "                       -7.7948e-02, -1.4746e-02, -5.3293e-02, -2.6344e-02, -2.0754e-02,\n",
       "                        5.4119e-02,  5.3004e-02, -8.0122e-02,  1.8321e-02,  2.5573e-03,\n",
       "                        6.0398e-02,  5.6989e-03, -5.8659e-02, -6.7142e-02,  5.3345e-02,\n",
       "                        4.2533e-02,  2.7184e-02,  6.9212e-02], device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.4.attention.values.weight',\n",
       "               tensor([[ 0.0409,  0.0069,  0.0607,  ...,  0.0605,  0.0370,  0.0102],\n",
       "                       [-0.0754, -0.0118, -0.0708,  ...,  0.0720, -0.0075,  0.0104],\n",
       "                       [ 0.0438, -0.0564, -0.0237,  ..., -0.0572,  0.0880, -0.0676],\n",
       "                       ...,\n",
       "                       [-0.0053, -0.0233,  0.0331,  ...,  0.0241, -0.0416,  0.0191],\n",
       "                       [ 0.0851,  0.0141, -0.0800,  ..., -0.0710,  0.0091,  0.0636],\n",
       "                       [-0.0952,  0.0840,  0.0613,  ..., -0.0444,  0.0159,  0.0390]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.4.attention.keys.weight',\n",
       "               tensor([[ 0.0584, -0.0709,  0.0117,  ...,  0.0446,  0.0277, -0.0698],\n",
       "                       [-0.0979,  0.0428, -0.0375,  ..., -0.0371,  0.0844, -0.0332],\n",
       "                       [-0.0246,  0.0214,  0.0141,  ...,  0.0134, -0.0195,  0.0384],\n",
       "                       ...,\n",
       "                       [-0.0781,  0.0605, -0.0852,  ..., -0.0715, -0.0455, -0.0102],\n",
       "                       [-0.0377, -0.0600, -0.0442,  ...,  0.0417, -0.0135,  0.0393],\n",
       "                       [-0.0439, -0.0534, -0.0141,  ..., -0.0421,  0.0396, -0.0198]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.4.attention.queries.weight',\n",
       "               tensor([[-0.0195, -0.0727, -0.0113,  ..., -0.0351,  0.0419, -0.0279],\n",
       "                       [ 0.0375,  0.0650, -0.0758,  ...,  0.0836,  0.0425, -0.0376],\n",
       "                       [ 0.0650, -0.0684, -0.0132,  ..., -0.0341, -0.0672, -0.0613],\n",
       "                       ...,\n",
       "                       [ 0.0813,  0.0286,  0.0266,  ...,  0.0245,  0.0275, -0.0302],\n",
       "                       [-0.0241, -0.0540,  0.0220,  ..., -0.0834,  0.0256,  0.0073],\n",
       "                       [ 0.0901, -0.1194,  0.0304,  ..., -0.1076,  0.0194,  0.0950]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.4.attention.fc_out.weight',\n",
       "               tensor([[-0.0953, -0.0302,  0.0108,  ...,  0.0734, -0.0388, -0.0017],\n",
       "                       [-0.0765, -0.0197,  0.0295,  ..., -0.1014,  0.0754,  0.0111],\n",
       "                       [ 0.0838,  0.0262,  0.0670,  ..., -0.0862, -0.0091,  0.0044],\n",
       "                       ...,\n",
       "                       [-0.0833, -0.0142,  0.0026,  ..., -0.0034, -0.0722, -0.0037],\n",
       "                       [-0.0191,  0.0470, -0.0551,  ..., -0.0475, -0.0656,  0.0520],\n",
       "                       [-0.0757, -0.0456,  0.0737,  ..., -0.0788, -0.0723,  0.0010]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.4.attention.fc_out.bias',\n",
       "               tensor([ 0.0089, -0.0531,  0.0348,  0.0417,  0.0753, -0.0139, -0.0028, -0.0142,\n",
       "                        0.0259, -0.0566, -0.0223,  0.0835, -0.0604, -0.0390, -0.0453, -0.0184,\n",
       "                       -0.0136,  0.0621,  0.0111, -0.0544, -0.0843, -0.0430,  0.0155,  0.0154,\n",
       "                        0.0650,  0.0083,  0.0778, -0.0009, -0.0241, -0.0111,  0.0405, -0.0262,\n",
       "                       -0.0152, -0.0247,  0.0065, -0.0762, -0.0806,  0.0376, -0.0427, -0.0060,\n",
       "                        0.0020,  0.0409,  0.0630,  0.0399, -0.0320, -0.0065,  0.0601, -0.0588,\n",
       "                       -0.0731, -0.0228,  0.0819, -0.0231, -0.0377,  0.0018, -0.0754,  0.0522,\n",
       "                        0.0529, -0.0540,  0.0759, -0.0441, -0.0380, -0.0568, -0.0229,  0.0256,\n",
       "                        0.0567,  0.0556,  0.0627,  0.0486, -0.0383,  0.0408,  0.0649, -0.0384,\n",
       "                        0.0876, -0.0603, -0.0525,  0.0159, -0.0057, -0.0377, -0.0162,  0.0246,\n",
       "                       -0.0459,  0.0575,  0.0204,  0.0718,  0.0450,  0.0021, -0.0081,  0.0147,\n",
       "                       -0.0130, -0.0666, -0.0180,  0.0292, -0.0825,  0.0120, -0.0223,  0.0387,\n",
       "                        0.0586, -0.0463,  0.0132,  0.0488, -0.0795, -0.0048, -0.0194, -0.0319,\n",
       "                        0.0725, -0.0131, -0.0322, -0.0193,  0.0720,  0.0810, -0.0552,  0.0845,\n",
       "                       -0.0338, -0.0662,  0.0746, -0.0819,  0.0281, -0.0490,  0.0111, -0.0045,\n",
       "                        0.0496, -0.0016, -0.0640, -0.0492,  0.0191, -0.0174,  0.0664, -0.0064],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.4.gate1.bg',\n",
       "               tensor([-1.5991e-03,  1.1472e-04, -5.2152e-03, -2.2174e-03,  5.5716e-03,\n",
       "                        2.4298e-02, -2.1049e-03,  5.5593e-03,  1.7598e-03,  1.4317e-02,\n",
       "                        1.9310e-03, -1.0843e-03, -8.3522e-03,  3.4594e-03,  3.0949e-05,\n",
       "                        1.8961e-03, -1.8731e-02, -5.5275e-03,  4.1426e-04, -2.2316e-04,\n",
       "                       -8.8688e-04, -3.4205e-03, -1.3157e-02,  3.2974e-03,  2.5429e-03,\n",
       "                       -4.6122e-03, -1.6286e-04,  1.0442e-03,  2.3746e-03, -4.2535e-03,\n",
       "                       -8.0559e-03,  2.6233e-03,  2.6508e-03,  4.8614e-03,  5.2888e-03,\n",
       "                        3.3087e-03, -3.7717e-03, -6.9930e-03,  1.2416e-04, -6.1901e-03,\n",
       "                        6.1791e-03, -1.0853e-02, -6.2896e-03, -7.3777e-03,  1.9869e-03,\n",
       "                       -7.5751e-04, -1.1412e-03,  1.1001e-03,  2.7652e-04, -8.3100e-03,\n",
       "                        3.7255e-03,  8.0557e-03, -1.4636e-02,  9.7672e-05,  1.0861e-02,\n",
       "                       -2.9440e-03,  1.2340e-02,  2.9647e-03,  2.4305e-03, -7.2564e-04,\n",
       "                        1.6580e-03, -4.7647e-03, -1.0515e-02, -1.5262e-02, -2.6157e-02,\n",
       "                        1.3562e-03,  1.3729e-02, -6.1007e-03,  3.3302e-03, -1.2096e-03,\n",
       "                       -7.1132e-03, -1.6584e-02,  8.7989e-03,  1.4035e-03, -6.2540e-03,\n",
       "                       -1.5257e-03, -1.3537e-02,  4.4396e-03,  1.2567e-02, -7.5833e-03,\n",
       "                       -3.7589e-03, -9.3356e-03, -6.6949e-03, -2.0896e-03, -1.9897e-03,\n",
       "                       -1.7771e-03, -8.9254e-03, -3.5581e-03,  7.2557e-03, -1.7021e-02,\n",
       "                       -2.1870e-03, -7.2831e-03,  2.0699e-02, -9.3611e-03,  3.4230e-03,\n",
       "                        9.4333e-03,  1.8681e-03, -6.6928e-03, -3.1821e-03,  2.6861e-03,\n",
       "                       -6.2452e-03, -2.0598e-04,  1.5974e-03, -4.2032e-03,  2.7523e-03,\n",
       "                       -3.3640e-03,  1.1967e-02, -2.0940e-03, -1.2531e-03, -1.0829e-02,\n",
       "                       -3.7010e-03, -2.9628e-03, -5.9643e-03, -3.3937e-03, -1.6454e-02,\n",
       "                        5.3478e-03,  3.1709e-03,  1.7681e-03,  2.3641e-03,  5.6512e-03,\n",
       "                       -8.6255e-04, -7.8636e-03, -6.0690e-03, -1.9159e-03, -9.7437e-03,\n",
       "                       -1.1542e-02, -4.7704e-03,  2.0221e-03], device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.4.gate1.Wr.weight',\n",
       "               tensor([[-0.1417,  0.0935, -0.0644,  ..., -0.1181, -0.1340, -0.0356],\n",
       "                       [ 0.0315,  0.0059, -0.0447,  ...,  0.0911, -0.0167, -0.1040],\n",
       "                       [-0.1283, -0.1307,  0.0015,  ..., -0.1005, -0.1292,  0.1161],\n",
       "                       ...,\n",
       "                       [-0.0888, -0.0747,  0.1254,  ...,  0.1132,  0.0346, -0.1484],\n",
       "                       [ 0.0750, -0.0595, -0.0457,  ..., -0.0843, -0.0334, -0.0150],\n",
       "                       [ 0.1548,  0.0879,  0.0397,  ...,  0.0530,  0.0372, -0.1613]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.4.gate1.Ur.weight',\n",
       "               tensor([[ 0.0907,  0.0804,  0.0565,  ...,  0.0371, -0.1242,  0.0126],\n",
       "                       [-0.1331,  0.1045,  0.0545,  ..., -0.1220, -0.0210,  0.0232],\n",
       "                       [ 0.0643,  0.0458,  0.0346,  ...,  0.0387,  0.0837, -0.0279],\n",
       "                       ...,\n",
       "                       [-0.1098, -0.1032,  0.1121,  ...,  0.1136,  0.0970,  0.0561],\n",
       "                       [-0.1104, -0.0074, -0.1000,  ...,  0.1105,  0.1431, -0.1386],\n",
       "                       [ 0.0764, -0.1151,  0.0943,  ...,  0.0929, -0.1166, -0.1578]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.4.gate1.Wz.weight',\n",
       "               tensor([[ 0.1002,  0.0779,  0.0782,  ...,  0.0216, -0.0961, -0.1074],\n",
       "                       [-0.1545, -0.0441, -0.0690,  ..., -0.1229, -0.0793, -0.0941],\n",
       "                       [ 0.0936,  0.1428,  0.0506,  ..., -0.0453,  0.0332, -0.1517],\n",
       "                       ...,\n",
       "                       [ 0.1027, -0.1281,  0.0741,  ..., -0.0298,  0.1047,  0.0214],\n",
       "                       [ 0.0599,  0.1345,  0.0584,  ...,  0.1452, -0.0852, -0.0666],\n",
       "                       [ 0.1259, -0.0868,  0.0999,  ...,  0.1045, -0.1410, -0.1295]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.4.gate1.Uz.weight',\n",
       "               tensor([[ 0.0081,  0.1328, -0.0524,  ...,  0.1149, -0.1261,  0.1283],\n",
       "                       [-0.0305,  0.0644, -0.0763,  ..., -0.1212,  0.0928, -0.1379],\n",
       "                       [-0.0451, -0.0898, -0.1003,  ..., -0.1041,  0.0182,  0.1089],\n",
       "                       ...,\n",
       "                       [ 0.0839,  0.0433,  0.0072,  ...,  0.0438, -0.0190,  0.0766],\n",
       "                       [ 0.1047,  0.1354,  0.0554,  ...,  0.0357,  0.0213, -0.0037],\n",
       "                       [-0.0580,  0.0527,  0.0216,  ...,  0.0346, -0.1062,  0.0615]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.4.gate1.Wg.weight',\n",
       "               tensor([[-0.0217, -0.1409,  0.0460,  ...,  0.0290, -0.0404,  0.0409],\n",
       "                       [ 0.0216,  0.0149, -0.1111,  ...,  0.1207,  0.1249,  0.1581],\n",
       "                       [-0.0507,  0.1186,  0.0415,  ..., -0.0051, -0.0930, -0.1621],\n",
       "                       ...,\n",
       "                       [ 0.1088, -0.0706,  0.0453,  ..., -0.1177,  0.0478, -0.0653],\n",
       "                       [ 0.1019,  0.0238,  0.1401,  ..., -0.0362,  0.0248, -0.0190],\n",
       "                       [ 0.1117,  0.0639, -0.1038,  ...,  0.0079,  0.0999,  0.0970]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.4.gate1.Ug.weight',\n",
       "               tensor([[-0.0572, -0.0927,  0.0082,  ...,  0.0223,  0.0489,  0.0383],\n",
       "                       [-0.1119, -0.0975, -0.0794,  ..., -0.1509, -0.0814, -0.0729],\n",
       "                       [-0.0397, -0.1156,  0.0875,  ..., -0.1061,  0.0432,  0.0281],\n",
       "                       ...,\n",
       "                       [-0.0007,  0.1413,  0.0197,  ..., -0.0535,  0.1248,  0.0589],\n",
       "                       [ 0.0255,  0.1406, -0.0419,  ..., -0.0836,  0.0609,  0.0590],\n",
       "                       [ 0.0917, -0.0890, -0.1241,  ...,  0.0458, -0.0876, -0.0587]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.4.gate2.bg',\n",
       "               tensor([-7.8281e-03,  1.5205e-03, -9.2063e-03, -1.3356e-02,  7.0215e-04,\n",
       "                        2.7629e-03, -2.9099e-03,  1.2033e-02,  4.2288e-03,  3.7559e-03,\n",
       "                        5.0892e-03, -3.1023e-03,  4.0148e-03,  2.9866e-03, -4.2554e-04,\n",
       "                        4.0358e-03,  2.0732e-03, -2.8272e-03, -7.7911e-03, -4.2698e-03,\n",
       "                        1.6511e-03,  4.8361e-03,  2.1239e-03,  1.2361e-03, -4.1513e-03,\n",
       "                       -5.6824e-03,  1.1839e-02,  4.0543e-04, -3.2545e-03, -1.7929e-03,\n",
       "                        3.0798e-03, -1.6968e-03,  1.9704e-03, -7.1677e-03, -1.3281e-03,\n",
       "                        6.2521e-03,  4.0353e-03, -1.2250e-02, -3.2130e-03,  4.9506e-03,\n",
       "                       -9.9263e-04,  1.6588e-03,  2.4783e-03, -4.3720e-03, -8.2626e-03,\n",
       "                       -1.7221e-03, -4.8839e-03, -5.9917e-03, -4.4387e-03, -5.2961e-03,\n",
       "                       -3.6848e-03, -1.7522e-03,  6.3501e-03,  3.6740e-03,  8.2023e-03,\n",
       "                       -1.5034e-03, -1.0135e-03,  6.8279e-03, -6.2769e-04, -7.6704e-03,\n",
       "                        4.0732e-03, -1.9282e-02,  2.6231e-03, -1.2439e-03,  9.3431e-03,\n",
       "                        2.9803e-03,  8.1072e-03, -3.8546e-04, -1.0113e-03, -4.4430e-03,\n",
       "                        1.5991e-03,  5.5910e-03,  2.8605e-03,  2.2570e-03, -5.7749e-03,\n",
       "                        4.4106e-03,  2.7600e-03,  3.0664e-05, -7.8488e-03,  5.2311e-03,\n",
       "                       -8.3782e-03, -1.3401e-03,  1.6861e-02, -2.0578e-03,  7.8063e-03,\n",
       "                       -1.8971e-03, -8.4027e-04, -5.2105e-03, -3.8745e-03,  2.6503e-03,\n",
       "                       -6.2390e-04, -4.7588e-03,  4.4511e-03, -3.9299e-03, -9.8954e-03,\n",
       "                       -6.6512e-03, -6.5868e-03,  7.7333e-03,  5.9969e-03, -2.6479e-03,\n",
       "                       -5.6653e-04,  1.1785e-02,  4.5372e-03,  7.3975e-03,  2.7361e-03,\n",
       "                        1.7348e-03,  1.0133e-02, -9.0980e-04,  1.8530e-02,  1.2635e-02,\n",
       "                       -3.3086e-03, -4.5022e-03,  4.7266e-03,  6.5481e-03, -1.7974e-03,\n",
       "                        1.2777e-02, -2.2466e-04, -7.0580e-03, -2.5918e-03,  6.3941e-03,\n",
       "                        7.4143e-03,  1.0384e-02,  3.6889e-03,  8.5902e-03,  1.7757e-02,\n",
       "                       -6.6189e-03,  1.2849e-03,  3.6177e-03], device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.4.gate2.Wr.weight',\n",
       "               tensor([[-0.1138, -0.0929,  0.0212,  ...,  0.0471,  0.1195,  0.0964],\n",
       "                       [-0.0538,  0.0048,  0.1041,  ...,  0.0012,  0.1147, -0.1272],\n",
       "                       [-0.0326, -0.1483, -0.0198,  ..., -0.0410,  0.1114,  0.1244],\n",
       "                       ...,\n",
       "                       [-0.0314, -0.0985, -0.1009,  ..., -0.1539,  0.0847, -0.1330],\n",
       "                       [ 0.0479, -0.1161, -0.0446,  ...,  0.0214,  0.1016, -0.0936],\n",
       "                       [-0.0582,  0.1181, -0.1371,  ...,  0.1371, -0.0659, -0.0985]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.4.gate2.Ur.weight',\n",
       "               tensor([[ 0.0536,  0.0123,  0.1071,  ...,  0.0908, -0.1307,  0.0252],\n",
       "                       [-0.0098,  0.1038,  0.0097,  ...,  0.0093, -0.0085, -0.0195],\n",
       "                       [-0.0477, -0.0890,  0.1246,  ..., -0.1566, -0.1300, -0.0150],\n",
       "                       ...,\n",
       "                       [-0.1179,  0.0582,  0.1131,  ...,  0.1095,  0.1365,  0.1295],\n",
       "                       [ 0.0952,  0.1178,  0.0480,  ...,  0.0920,  0.0189, -0.1269],\n",
       "                       [ 0.0785,  0.0171, -0.0113,  ..., -0.0248,  0.1188,  0.0319]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.4.gate2.Wz.weight',\n",
       "               tensor([[ 0.0682,  0.0557,  0.1650,  ...,  0.0669,  0.0940,  0.0946],\n",
       "                       [ 0.1439,  0.0015,  0.0140,  ...,  0.0544,  0.0399, -0.0311],\n",
       "                       [ 0.1370,  0.0395, -0.0757,  ...,  0.1421,  0.0743,  0.0408],\n",
       "                       ...,\n",
       "                       [ 0.0183, -0.1173,  0.0298,  ...,  0.0145, -0.1187, -0.1021],\n",
       "                       [ 0.0047, -0.0347,  0.0574,  ...,  0.1222,  0.1248,  0.0494],\n",
       "                       [-0.0333, -0.1402, -0.1444,  ...,  0.0840,  0.1466, -0.1108]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.4.gate2.Uz.weight',\n",
       "               tensor([[ 0.0428, -0.0267, -0.0997,  ..., -0.0328,  0.1256, -0.1566],\n",
       "                       [-0.0766, -0.0741, -0.1051,  ..., -0.1437, -0.0321,  0.0212],\n",
       "                       [-0.0717, -0.0269,  0.0836,  ...,  0.0016,  0.0874, -0.0460],\n",
       "                       ...,\n",
       "                       [ 0.0153,  0.0961,  0.0317,  ..., -0.0169, -0.0215,  0.1118],\n",
       "                       [ 0.0618, -0.0794, -0.0055,  ..., -0.0316, -0.0600, -0.0993],\n",
       "                       [-0.0558,  0.0094,  0.0592,  ..., -0.0328, -0.1572,  0.0524]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.4.gate2.Wg.weight',\n",
       "               tensor([[ 0.0454, -0.0625, -0.1269,  ..., -0.0164, -0.1057,  0.0891],\n",
       "                       [ 0.0061, -0.0530, -0.0475,  ..., -0.0041, -0.1211,  0.1263],\n",
       "                       [-0.0051,  0.1220,  0.1563,  ..., -0.1483,  0.0966,  0.0637],\n",
       "                       ...,\n",
       "                       [ 0.1321,  0.0726, -0.0860,  ..., -0.1022,  0.1445,  0.0394],\n",
       "                       [ 0.0132, -0.1488,  0.0060,  ...,  0.0652,  0.0928,  0.0579],\n",
       "                       [ 0.1221, -0.0208, -0.0966,  ...,  0.0594, -0.1541,  0.1172]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.4.gate2.Ug.weight',\n",
       "               tensor([[ 0.0111, -0.0839, -0.1158,  ...,  0.0469, -0.0622, -0.0630],\n",
       "                       [ 0.0908,  0.1444,  0.0253,  ..., -0.0421, -0.0100,  0.0008],\n",
       "                       [ 0.0080,  0.0695,  0.0355,  ..., -0.0388,  0.0751, -0.0539],\n",
       "                       ...,\n",
       "                       [ 0.0092, -0.0595,  0.1301,  ...,  0.0424,  0.1527,  0.0326],\n",
       "                       [-0.0147,  0.0408, -0.0287,  ...,  0.0813,  0.1053, -0.0772],\n",
       "                       [-0.0360, -0.1006,  0.0040,  ...,  0.0530,  0.1031, -0.0044]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.4.norm1.weight',\n",
       "               tensor([0.9970, 1.0056, 0.9925, 1.0497, 1.0387, 0.9981, 1.0749, 1.0111, 1.0689,\n",
       "                       1.0327, 1.0106, 1.0268, 1.0351, 1.0453, 0.9968, 1.0405, 1.0476, 1.0412,\n",
       "                       1.0418, 1.0497, 1.0345, 1.0122, 1.0276, 1.0664, 1.0280, 1.0129, 1.0285,\n",
       "                       1.0169, 1.0349, 1.0123, 1.0060, 1.0339, 1.0441, 1.0113, 1.0425, 1.0340,\n",
       "                       1.0159, 1.0120, 1.0266, 1.0268, 1.0175, 1.0156, 1.0019, 1.0074, 1.0265,\n",
       "                       1.0251, 1.0889, 1.0409, 0.9796, 1.0287, 1.0031, 1.0116, 0.9850, 1.0472,\n",
       "                       1.0352, 1.0557, 1.0244, 1.0152, 1.0408, 1.0297, 1.0068, 1.0017, 1.0361,\n",
       "                       1.0443, 1.0214, 0.9955, 1.0374, 1.0225, 1.0224, 1.0349, 0.9680, 1.0274,\n",
       "                       1.0317, 1.0180, 1.0336, 0.9958, 1.0445, 1.0067, 1.0324, 1.0173, 1.0423,\n",
       "                       1.0049, 0.9936, 0.9765, 1.0206, 1.0490, 1.0494, 1.0127, 0.9893, 1.0165,\n",
       "                       1.0186, 1.0113, 1.0493, 0.9923, 1.0153, 1.0024, 1.0047, 1.0039, 1.0150,\n",
       "                       1.0027, 1.0512, 1.0212, 1.0224, 0.9960, 0.9981, 1.0388, 0.9964, 1.0331,\n",
       "                       1.0055, 0.9982, 1.0082, 1.0017, 1.0302, 1.0066, 1.0100, 1.0204, 1.0528,\n",
       "                       1.0301, 1.0424, 1.0447, 1.0625, 1.0132, 1.0201, 1.0138, 1.0066, 1.0019,\n",
       "                       0.9975, 1.0111], device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.4.norm1.bias',\n",
       "               tensor([ 0.0352,  0.0059, -0.0218,  0.0371,  0.0361,  0.0126,  0.0411,  0.0139,\n",
       "                       -0.0508, -0.0092, -0.0484, -0.0673,  0.0310,  0.0337,  0.0220,  0.0406,\n",
       "                        0.0534, -0.0034,  0.0769,  0.0088, -0.0396,  0.0381, -0.0283, -0.0723,\n",
       "                        0.0515,  0.0052, -0.0471, -0.0407, -0.0121,  0.0393, -0.0147, -0.0427,\n",
       "                        0.0390, -0.0246,  0.0514,  0.0162, -0.0472, -0.0448,  0.0545,  0.0273,\n",
       "                        0.0275, -0.0087,  0.0252,  0.0066, -0.0252, -0.0522, -0.0694,  0.0465,\n",
       "                       -0.0005, -0.0218, -0.0370,  0.0090,  0.0130, -0.0406, -0.0411, -0.0454,\n",
       "                        0.0229, -0.0381,  0.0571, -0.0292,  0.0062, -0.0091, -0.0341, -0.0590,\n",
       "                       -0.0091,  0.0054, -0.0595, -0.0330,  0.0362,  0.0382,  0.0303,  0.0121,\n",
       "                        0.0502,  0.0260, -0.0452,  0.0065,  0.0589,  0.0245,  0.0415,  0.0203,\n",
       "                        0.0141, -0.0253,  0.0199, -0.0039, -0.0087, -0.0503,  0.0582,  0.0112,\n",
       "                       -0.0262, -0.0323,  0.0080, -0.0005,  0.0893, -0.0239, -0.0032,  0.0347,\n",
       "                        0.0111, -0.0162,  0.0240,  0.0238,  0.0510,  0.0150, -0.0367, -0.0122,\n",
       "                        0.0132, -0.0335,  0.0060, -0.0247,  0.0650, -0.0072, -0.0311,  0.0054,\n",
       "                       -0.0177, -0.0208, -0.0360, -0.0545, -0.0418,  0.0479, -0.0602,  0.0497,\n",
       "                        0.0803, -0.0170, -0.0018, -0.0173, -0.0214,  0.0263,  0.0453,  0.0197],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.4.norm2.weight',\n",
       "               tensor([1.0026, 1.0020, 0.9948, 0.9848, 0.9877, 0.9997, 1.0030, 1.0015, 0.9892,\n",
       "                       0.9924, 1.0032, 1.0003, 0.9980, 1.0005, 0.9846, 0.9842, 0.9966, 0.9972,\n",
       "                       0.9954, 0.9899, 0.9995, 0.9893, 1.0121, 0.9968, 0.9940, 1.0040, 0.9994,\n",
       "                       0.9945, 1.0009, 0.9903, 1.0069, 0.9882, 0.9940, 0.9993, 1.0060, 0.9909,\n",
       "                       1.0015, 0.9948, 1.0008, 1.0083, 1.0023, 0.9945, 1.0011, 0.9827, 0.9941,\n",
       "                       0.9912, 0.9915, 0.9950, 0.9908, 0.9987, 0.9801, 0.9999, 0.9909, 0.9998,\n",
       "                       0.9908, 0.9963, 1.0059, 0.9935, 0.9973, 0.9891, 0.9869, 0.9970, 0.9979,\n",
       "                       1.0101, 1.0081, 0.9992, 1.0014, 0.9917, 1.0134, 0.9912, 0.9989, 0.9959,\n",
       "                       1.0002, 0.9949, 0.9830, 0.9952, 0.9979, 0.9907, 1.0039, 0.9975, 0.9966,\n",
       "                       0.9945, 0.9964, 1.0018, 0.9978, 0.9854, 1.0039, 0.9874, 1.0020, 0.9960,\n",
       "                       1.0032, 1.0013, 1.0035, 1.0001, 1.0061, 0.9894, 1.0123, 0.9993, 1.0149,\n",
       "                       1.0043, 1.0014, 0.9951, 0.9971, 1.0094, 1.0029, 0.9908, 1.0078, 0.9962,\n",
       "                       1.0066, 0.9955, 0.9957, 0.9820, 1.0103, 0.9957, 1.0028, 0.9976, 0.9822,\n",
       "                       1.0009, 0.9961, 0.9901, 0.9867, 1.0067, 1.0134, 1.0081, 0.9983, 0.9898,\n",
       "                       1.0059, 1.0074], device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.4.norm2.bias',\n",
       "               tensor([ 4.2299e-03,  2.1136e-03,  3.5341e-03,  1.3390e-03, -3.5470e-03,\n",
       "                       -4.7600e-03, -1.0079e-04, -3.4646e-03, -5.9345e-04,  4.5596e-04,\n",
       "                        8.7975e-04, -2.6880e-03,  4.0212e-03,  5.4240e-03, -3.5186e-03,\n",
       "                       -3.6424e-03, -6.4816e-03,  1.0703e-03, -1.8969e-03,  3.3554e-03,\n",
       "                       -4.9674e-03, -1.8070e-03, -1.5357e-02,  3.9789e-03,  4.3510e-04,\n",
       "                        2.5803e-03,  3.0543e-03,  2.5378e-03,  2.9494e-03, -4.1905e-03,\n",
       "                        2.7031e-03, -1.5072e-04, -2.4195e-03,  4.2392e-03,  1.0357e-02,\n",
       "                        6.7469e-03, -4.3417e-04,  4.9869e-03, -1.6131e-03, -1.4017e-03,\n",
       "                       -6.2639e-03,  2.3751e-04, -2.7657e-03,  4.1067e-03,  4.8454e-03,\n",
       "                        8.3590e-03,  2.6508e-03, -4.3727e-03, -7.1070e-03,  7.3812e-04,\n",
       "                        7.8327e-03, -3.4753e-04,  3.4607e-03,  2.6994e-03, -8.0840e-04,\n",
       "                       -1.5207e-03,  1.0054e-04,  3.3590e-03,  1.4139e-02, -9.4529e-03,\n",
       "                        2.5884e-03,  1.0099e-03, -6.2454e-03, -4.1373e-03,  7.6374e-05,\n",
       "                        4.8847e-04,  1.9136e-03,  1.1425e-02,  8.8970e-03,  2.3672e-03,\n",
       "                        1.2153e-03, -6.1025e-03, -4.9805e-03, -4.2959e-03,  3.3350e-03,\n",
       "                        4.6932e-03, -1.1793e-03, -1.7254e-03, -7.8526e-05, -2.1378e-03,\n",
       "                        4.4442e-03,  2.6916e-03, -6.5256e-04, -3.4836e-03,  3.0582e-03,\n",
       "                        6.1895e-03, -1.9376e-03, -7.9467e-05,  3.0949e-03, -2.6435e-03,\n",
       "                       -4.1345e-04, -5.9170e-04, -5.3346e-03, -2.5140e-03,  5.8450e-03,\n",
       "                       -5.3944e-03, -1.1895e-03, -3.9495e-03,  1.0400e-02,  5.0847e-03,\n",
       "                       -6.7264e-04, -4.8576e-03, -7.9734e-04,  9.1672e-03,  4.4977e-04,\n",
       "                       -2.2070e-04,  1.6547e-03,  2.8909e-03,  1.4357e-04,  2.5546e-03,\n",
       "                        2.7087e-03,  1.1943e-04,  6.4615e-03,  1.5603e-03,  4.0467e-03,\n",
       "                        9.1236e-03, -8.4011e-03,  2.0128e-03,  1.9799e-03, -1.5314e-03,\n",
       "                       -5.3331e-03, -4.1712e-03,  1.7025e-03,  2.5812e-03,  2.6922e-05,\n",
       "                       -2.4771e-03, -1.3738e-03, -7.7632e-03], device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.4.norm_kv.weight',\n",
       "               tensor([1.0175, 1.0606, 1.0228, 1.0130, 1.0058, 0.9886, 1.0168, 1.0085, 1.0052,\n",
       "                       1.0053, 1.0044, 1.0167, 1.0168, 1.0183, 1.0070, 1.0139, 1.0148, 1.0076,\n",
       "                       1.0205, 0.9897, 0.9984, 1.0020, 0.9943, 0.9960, 1.0201, 0.9898, 1.0198,\n",
       "                       1.0040, 0.9964, 0.9888, 1.0038, 1.0099, 0.9997, 1.0268, 0.9826, 0.9780,\n",
       "                       1.0055, 0.9980, 0.9852, 0.9999, 1.0061, 1.0060, 1.0107, 0.9915, 0.9926,\n",
       "                       0.9912, 0.9990, 1.0019, 0.9900, 1.0111, 0.9882, 0.9828, 0.9908, 1.0081,\n",
       "                       0.9888, 0.9833, 1.0082, 1.0023, 0.9959, 0.9975, 1.0165, 1.0005, 0.9909,\n",
       "                       0.9888, 1.0582, 1.0351, 1.0137, 0.9855, 1.0003, 0.9962, 1.0227, 0.9908,\n",
       "                       1.0324, 1.0067, 1.0099, 1.0116, 1.0088, 1.0120, 1.0127, 1.0050, 1.0111,\n",
       "                       1.0227, 1.0009, 0.9927, 1.0012, 0.9814, 1.0042, 0.9972, 0.9887, 1.0012,\n",
       "                       0.9926, 0.9981, 1.0119, 0.9859, 0.9818, 1.0004, 0.9950, 0.9938, 0.9860,\n",
       "                       0.9885, 0.9931, 0.9902, 0.9997, 0.9932, 0.9884, 1.0012, 0.9968, 0.9769,\n",
       "                       0.9979, 0.9991, 1.0048, 0.9987, 0.9880, 1.0076, 0.9899, 1.0013, 1.0035,\n",
       "                       0.9953, 0.9865, 0.9932, 0.9895, 0.9933, 1.0022, 1.0040, 0.9942, 0.9918,\n",
       "                       1.0027, 0.9907], device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.4.norm_kv.bias',\n",
       "               tensor([ 1.3815e-02, -1.7500e-04,  3.9015e-03,  8.4001e-05,  4.9411e-03,\n",
       "                        2.1651e-02,  5.0801e-03,  4.6945e-03,  3.1323e-03,  2.4241e-03,\n",
       "                        7.5004e-04,  4.3517e-03,  2.3851e-03,  1.1284e-02,  2.8303e-03,\n",
       "                        7.1990e-03, -3.9303e-03, -5.4306e-03,  5.3969e-03,  1.0733e-02,\n",
       "                        4.6703e-03,  6.2952e-03,  5.7846e-03,  1.4409e-02,  3.8762e-03,\n",
       "                       -9.6686e-03, -4.9630e-03, -4.2355e-03, -1.3141e-02, -9.8962e-03,\n",
       "                       -1.3816e-03, -1.0345e-02, -4.8108e-03,  7.1706e-03, -1.1638e-02,\n",
       "                       -7.6441e-03, -7.6332e-03, -1.5281e-03, -6.1201e-03, -2.2051e-03,\n",
       "                        7.5580e-03,  1.4484e-03, -5.0826e-05,  3.3128e-03,  1.1122e-02,\n",
       "                        6.2780e-03,  3.4596e-03,  7.8224e-03,  1.1435e-02,  1.5426e-03,\n",
       "                        9.5898e-03,  3.4490e-03,  7.0381e-03,  3.5845e-03,  6.4911e-03,\n",
       "                        1.0827e-02,  4.7548e-03, -9.8463e-04,  6.5682e-03,  3.7445e-03,\n",
       "                       -6.0173e-03, -5.9000e-03,  4.7592e-03,  4.6951e-03, -5.0327e-03,\n",
       "                       -3.7683e-03,  6.1521e-03,  1.5853e-02,  1.2235e-02,  2.2574e-03,\n",
       "                       -3.8512e-03,  1.0320e-02, -6.5048e-03,  2.0724e-03,  5.0048e-03,\n",
       "                       -2.4810e-03,  3.5610e-04, -1.6340e-03,  7.9173e-04,  2.1581e-03,\n",
       "                       -5.1598e-04,  1.2022e-03,  1.1411e-03,  1.8170e-03,  3.7812e-03,\n",
       "                        1.8744e-02,  7.5119e-03,  8.3711e-03,  1.7438e-02,  1.2833e-02,\n",
       "                        2.8561e-03,  6.3026e-03,  3.5666e-04,  1.0004e-02, -1.2096e-02,\n",
       "                        4.5858e-03, -1.1712e-03, -5.7324e-03, -9.4580e-03, -1.4337e-02,\n",
       "                       -7.6516e-03, -1.0221e-02, -2.2608e-03, -4.6837e-03, -1.6289e-02,\n",
       "                       -5.6251e-03, -1.6345e-02, -4.5882e-03, -2.5444e-03, -7.4049e-04,\n",
       "                       -3.7524e-03, -5.3938e-03, -1.3680e-02, -2.1962e-03, -7.5563e-03,\n",
       "                       -8.5036e-03, -1.1108e-02, -3.5288e-03, -6.1456e-03, -5.0289e-03,\n",
       "                       -7.5207e-03, -7.9578e-04, -4.0297e-03, -5.7784e-03, -8.4473e-03,\n",
       "                       -1.4958e-02,  2.7728e-03,  1.7019e-04], device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.4.fc.0.weight',\n",
       "               tensor([[-0.0757, -0.0196,  0.0244,  ..., -0.0125,  0.0047,  0.0414],\n",
       "                       [-0.0389, -0.0118,  0.0585,  ..., -0.0624,  0.0907, -0.0494],\n",
       "                       [-0.0217, -0.0712,  0.0115,  ..., -0.0019, -0.0952,  0.0925],\n",
       "                       ...,\n",
       "                       [-0.0838, -0.0195,  0.0268,  ...,  0.0084, -0.0244, -0.0374],\n",
       "                       [-0.0420,  0.0461,  0.0265,  ...,  0.0818, -0.0593, -0.0747],\n",
       "                       [-0.0449, -0.0776,  0.0573,  ...,  0.0250, -0.0660, -0.0352]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.4.fc.0.bias',\n",
       "               tensor([ 6.7715e-02, -1.0043e-04,  2.2900e-02, -5.5316e-02, -1.0188e-01,\n",
       "                        7.9417e-02, -7.7531e-02, -3.0014e-02, -2.6368e-02, -9.0171e-02,\n",
       "                       -8.3968e-02, -4.8619e-02,  3.5575e-02, -5.1562e-02,  7.4771e-02,\n",
       "                        2.3060e-02,  6.6888e-02,  1.7391e-02, -4.7856e-02,  3.4738e-02,\n",
       "                       -6.2324e-02, -4.5019e-02, -1.0042e-01,  1.2831e-02,  8.2598e-02,\n",
       "                        4.0824e-02,  6.7819e-02, -5.6731e-02, -7.0396e-02, -9.4607e-03,\n",
       "                       -5.2807e-02, -5.0899e-03, -2.2963e-02,  5.1790e-02,  5.1290e-04,\n",
       "                       -7.7579e-02, -2.2031e-02, -6.4358e-02,  3.0354e-02, -8.1323e-02,\n",
       "                       -6.4764e-02,  5.4959e-02,  4.4120e-02,  2.6444e-02, -2.7172e-02,\n",
       "                       -8.1820e-02, -2.7254e-02, -8.1370e-03, -5.9365e-02, -5.8116e-02,\n",
       "                        3.9797e-02,  7.8100e-02, -1.7960e-02,  1.8415e-02,  1.3847e-02,\n",
       "                       -6.1896e-02, -5.3697e-02, -1.0115e-02,  4.8515e-02, -1.6480e-02,\n",
       "                        4.5044e-03, -3.1610e-02,  4.1313e-02, -1.4414e-02, -7.9586e-02,\n",
       "                       -4.1718e-02,  2.5626e-02, -7.5389e-02,  5.5119e-02,  3.8302e-03,\n",
       "                        7.3732e-02,  2.3903e-02, -2.5097e-03, -2.5526e-02,  2.5250e-02,\n",
       "                        7.0952e-02,  2.6312e-02, -4.5980e-02,  7.1968e-02, -2.0901e-02,\n",
       "                       -4.7467e-02, -8.9939e-02,  6.2743e-02, -8.4140e-02,  8.7132e-02,\n",
       "                        3.0215e-02,  9.9527e-04, -5.3082e-02,  2.0204e-02,  2.9243e-02,\n",
       "                       -6.8068e-03, -3.9796e-02, -4.2676e-02, -4.3234e-03, -3.4801e-02,\n",
       "                        3.9245e-02, -4.8214e-02, -4.9269e-02,  1.9362e-02,  9.7589e-03,\n",
       "                        3.5284e-02, -7.4853e-03,  1.5290e-02,  7.8099e-02, -9.6498e-02,\n",
       "                        7.7236e-02, -7.9140e-02,  5.4439e-02,  3.0728e-02, -7.0052e-02,\n",
       "                        6.9169e-02, -5.1002e-02, -1.4501e-02, -8.4278e-02,  1.2791e-02,\n",
       "                        7.7411e-02,  2.1722e-02,  2.0324e-02, -8.7588e-02,  2.7120e-03,\n",
       "                       -5.2753e-02,  5.8615e-02, -6.4171e-02, -9.0947e-02, -7.1214e-02,\n",
       "                       -5.9138e-02,  5.1017e-02,  2.2560e-02], device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.5.attention.values.weight',\n",
       "               tensor([[-0.1023,  0.0518, -0.0566,  ..., -0.0748, -0.0728,  0.0247],\n",
       "                       [-0.0507, -0.0014,  0.0769,  ..., -0.0007,  0.0629,  0.0696],\n",
       "                       [-0.0978,  0.0727, -0.0200,  ...,  0.0591, -0.0454,  0.0723],\n",
       "                       ...,\n",
       "                       [ 0.0599, -0.0421, -0.0627,  ...,  0.0375,  0.0804, -0.0335],\n",
       "                       [-0.0184,  0.0616, -0.0202,  ...,  0.0901,  0.0371, -0.0497],\n",
       "                       [-0.0535, -0.0024,  0.0850,  ..., -0.0249, -0.0323,  0.0559]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.5.attention.keys.weight',\n",
       "               tensor([[ 0.0516, -0.1028, -0.0257,  ...,  0.0078,  0.0336,  0.0820],\n",
       "                       [ 0.1160,  0.0168, -0.0737,  ...,  0.0414,  0.0500, -0.0532],\n",
       "                       [-0.0188, -0.0389, -0.0062,  ...,  0.0310, -0.1128,  0.0266],\n",
       "                       ...,\n",
       "                       [-0.0121,  0.0067,  0.0171,  ...,  0.0509,  0.0186,  0.0346],\n",
       "                       [-0.0485,  0.0229, -0.0579,  ...,  0.0423, -0.0998, -0.0724],\n",
       "                       [ 0.0514, -0.0038, -0.0879,  ...,  0.0299, -0.0132,  0.0227]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.5.attention.queries.weight',\n",
       "               tensor([[ 0.0299, -0.0360, -0.0603,  ...,  0.1188, -0.1321,  0.0203],\n",
       "                       [-0.0698,  0.0709,  0.0543,  ...,  0.0114, -0.0404,  0.0608],\n",
       "                       [ 0.0767,  0.0810,  0.0366,  ...,  0.0016, -0.0408, -0.0324],\n",
       "                       ...,\n",
       "                       [ 0.0348,  0.0813,  0.0960,  ..., -0.0033,  0.0160, -0.0589],\n",
       "                       [ 0.0815, -0.0117,  0.0542,  ..., -0.0913, -0.0335,  0.0808],\n",
       "                       [-0.0629, -0.1059, -0.0749,  ..., -0.1058,  0.0345,  0.0144]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.5.attention.fc_out.weight',\n",
       "               tensor([[ 0.0430, -0.0466, -0.0262,  ..., -0.0529, -0.0383, -0.0714],\n",
       "                       [ 0.0605, -0.0069,  0.0205,  ...,  0.0190,  0.0290, -0.0561],\n",
       "                       [ 0.0924,  0.0166, -0.0826,  ..., -0.0052, -0.0678,  0.0696],\n",
       "                       ...,\n",
       "                       [-0.0523, -0.0034, -0.0535,  ..., -0.0367,  0.0324, -0.0387],\n",
       "                       [-0.0659,  0.0742, -0.0724,  ...,  0.0560,  0.0698,  0.0739],\n",
       "                       [ 0.0103, -0.0350, -0.0257,  ...,  0.0466, -0.0061,  0.0432]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.5.attention.fc_out.bias',\n",
       "               tensor([-7.0319e-02,  6.4625e-02,  3.7406e-02, -1.3667e-02,  1.9171e-02,\n",
       "                        6.1610e-03, -8.1341e-02, -3.0460e-02,  6.2409e-03,  2.9935e-02,\n",
       "                        7.6613e-02, -2.6495e-02, -2.5271e-02,  2.7899e-02, -7.1998e-03,\n",
       "                       -8.7969e-02,  1.5107e-02, -8.5275e-02, -6.1582e-02,  5.5766e-02,\n",
       "                        2.0392e-03, -1.6226e-02,  4.9835e-02, -5.8354e-03, -5.3724e-02,\n",
       "                       -4.7340e-02,  5.4556e-02,  1.3772e-02, -6.2386e-02, -3.4103e-02,\n",
       "                       -7.5296e-03, -2.9913e-02, -7.1216e-02,  2.9713e-02,  6.0747e-02,\n",
       "                       -1.6734e-03,  3.4495e-02, -4.2836e-02,  6.0330e-02, -4.8760e-04,\n",
       "                       -6.5097e-02, -1.5431e-05, -4.9654e-03, -2.1003e-02, -6.4221e-02,\n",
       "                       -1.6675e-02, -8.6889e-02,  3.5724e-02,  7.6788e-02, -3.8803e-02,\n",
       "                       -4.8358e-03, -5.6667e-02, -1.0895e-02,  6.8964e-02, -4.4956e-02,\n",
       "                        4.0070e-02,  8.9269e-02, -9.3587e-03, -3.6334e-02,  2.6264e-02,\n",
       "                        6.6994e-02,  8.6921e-02,  1.1084e-02,  2.8714e-02, -3.4927e-02,\n",
       "                        1.6314e-02, -6.3040e-02,  2.7560e-03, -3.8458e-02, -8.3779e-02,\n",
       "                        1.3724e-03,  4.9990e-02,  4.3902e-02, -6.7823e-02, -2.4415e-03,\n",
       "                        3.1310e-02,  3.0973e-02,  5.8125e-02, -3.3074e-02, -7.7464e-02,\n",
       "                        2.5986e-02, -2.0717e-02, -7.7594e-02, -7.7778e-02, -5.8543e-02,\n",
       "                       -9.1449e-02,  7.4953e-02,  8.7630e-02,  5.2116e-02, -8.4386e-02,\n",
       "                       -6.1763e-02,  7.9949e-02,  8.4643e-03, -2.6462e-03, -6.8778e-02,\n",
       "                       -6.1989e-02, -7.2943e-02, -3.8087e-02, -6.7220e-02, -8.0576e-02,\n",
       "                       -4.2832e-02,  8.2717e-02,  8.4407e-02,  6.6624e-02, -5.3610e-02,\n",
       "                        3.6475e-02,  4.8609e-02,  6.3657e-02, -1.0514e-02, -5.6058e-02,\n",
       "                       -1.2437e-02,  3.3691e-02, -5.8504e-02,  5.9964e-02, -5.5772e-02,\n",
       "                        4.2735e-02,  1.3546e-02, -7.4966e-02, -7.7821e-02, -5.9878e-02,\n",
       "                       -6.4453e-02, -6.2934e-02, -6.0754e-02,  4.6176e-02,  8.9169e-03,\n",
       "                       -7.0842e-02, -6.2432e-02,  3.0473e-02], device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.5.gate1.bg',\n",
       "               tensor([ 1.8700e-02, -1.9383e-03,  3.1246e-04,  1.4627e-02,  1.0694e-02,\n",
       "                        1.4804e-02, -1.9478e-03,  5.8504e-03, -7.3885e-05,  1.4511e-03,\n",
       "                       -1.4055e-02,  6.7569e-03,  9.0000e-03, -1.6214e-03,  9.7024e-03,\n",
       "                        4.1356e-03,  4.4763e-03,  4.1205e-03,  1.7656e-03,  7.1964e-03,\n",
       "                        1.4480e-03,  2.0650e-03,  3.2648e-04,  1.0311e-03,  1.0544e-02,\n",
       "                        8.0463e-03,  1.0421e-02,  3.7215e-03, -9.5722e-04, -2.4338e-03,\n",
       "                       -1.1965e-02, -4.2344e-04, -1.1592e-02,  4.8421e-04,  2.1372e-02,\n",
       "                       -4.5786e-03,  2.0547e-03, -3.7915e-03,  1.0397e-02,  6.6383e-03,\n",
       "                        1.4797e-02, -6.4745e-03, -5.4140e-03, -5.7145e-04, -7.9289e-03,\n",
       "                        4.1422e-03, -5.3715e-06,  2.9272e-03,  7.3641e-04, -1.4364e-03,\n",
       "                       -4.4942e-03, -1.9055e-03, -1.0994e-04,  1.5896e-03, -2.9603e-03,\n",
       "                       -2.1695e-03,  1.6033e-03, -9.4719e-03,  1.7487e-03, -6.9284e-04,\n",
       "                        5.6954e-03,  1.9004e-02, -2.4782e-03,  1.4109e-02,  2.0209e-03,\n",
       "                        1.0923e-02,  1.4816e-02,  3.7753e-03,  3.2932e-03,  1.0687e-02,\n",
       "                        1.0221e-02,  3.0032e-04,  2.1272e-03, -2.2763e-03,  1.0676e-03,\n",
       "                       -4.1187e-03,  1.8934e-03,  6.3908e-04,  9.9543e-03,  5.0195e-03,\n",
       "                        7.0192e-04,  4.1848e-03,  2.2606e-03,  2.6548e-03,  6.4958e-03,\n",
       "                        1.5807e-04, -2.1245e-03, -1.3210e-03, -1.0413e-02, -2.8450e-03,\n",
       "                        5.8198e-04,  2.0704e-03,  7.8815e-03,  1.0558e-03, -2.0888e-03,\n",
       "                       -1.2805e-02,  1.1530e-02,  5.0836e-03,  7.0108e-03,  2.7670e-03,\n",
       "                        3.6643e-03,  4.8157e-03,  4.1103e-04, -6.4700e-03,  4.6480e-03,\n",
       "                        1.2316e-02,  1.5484e-02,  4.7097e-03,  8.7289e-03,  9.2416e-03,\n",
       "                        9.6131e-04,  4.6481e-03, -2.0888e-03, -1.0353e-03, -9.7914e-03,\n",
       "                       -3.8506e-03,  2.1305e-03,  1.2665e-03,  1.1600e-03,  2.6635e-03,\n",
       "                       -4.4909e-03, -2.1984e-03,  7.5443e-03, -1.8828e-03,  2.3674e-03,\n",
       "                       -1.2139e-02,  3.2792e-03, -3.1759e-03], device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.5.gate1.Wr.weight',\n",
       "               tensor([[ 0.0990,  0.1521, -0.0451,  ..., -0.0427, -0.1188,  0.0679],\n",
       "                       [ 0.0216,  0.1053,  0.0081,  ...,  0.1099,  0.0299,  0.1629],\n",
       "                       [ 0.0596,  0.1519, -0.0898,  ..., -0.1021, -0.1393,  0.0542],\n",
       "                       ...,\n",
       "                       [ 0.0094, -0.0395,  0.1475,  ..., -0.1246,  0.0230, -0.0529],\n",
       "                       [ 0.0455,  0.1110,  0.0353,  ...,  0.1536, -0.1447,  0.0622],\n",
       "                       [ 0.1083,  0.0724, -0.0830,  ..., -0.0094, -0.0620,  0.0642]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.5.gate1.Ur.weight',\n",
       "               tensor([[-0.0982,  0.0852,  0.0485,  ..., -0.0208,  0.1109, -0.0363],\n",
       "                       [ 0.0097, -0.0905, -0.1257,  ..., -0.1281,  0.0842,  0.0578],\n",
       "                       [ 0.0912,  0.1357,  0.0293,  ..., -0.1020,  0.1112,  0.0958],\n",
       "                       ...,\n",
       "                       [ 0.1069,  0.0897, -0.1026,  ..., -0.1141, -0.1542, -0.0422],\n",
       "                       [ 0.0136, -0.1389,  0.0045,  ..., -0.0552, -0.1121, -0.0163],\n",
       "                       [ 0.0058,  0.0984,  0.1184,  ...,  0.1440,  0.0454, -0.1263]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.5.gate1.Wz.weight',\n",
       "               tensor([[-0.0566, -0.1515, -0.1382,  ...,  0.0714, -0.0250, -0.0540],\n",
       "                       [ 0.0608, -0.1340, -0.0153,  ...,  0.1112, -0.0511, -0.1246],\n",
       "                       [-0.1504,  0.0272,  0.0706,  ...,  0.0502, -0.0130, -0.0409],\n",
       "                       ...,\n",
       "                       [ 0.1062, -0.1348,  0.0541,  ...,  0.0561,  0.1514, -0.1272],\n",
       "                       [ 0.0726, -0.1032,  0.0996,  ...,  0.1341, -0.0465, -0.0634],\n",
       "                       [ 0.0929,  0.0999,  0.0641,  ...,  0.0552,  0.0644,  0.0386]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.5.gate1.Uz.weight',\n",
       "               tensor([[ 0.1544, -0.0947, -0.0050,  ..., -0.1568, -0.0392,  0.0056],\n",
       "                       [ 0.1361, -0.1228, -0.0747,  ...,  0.0034, -0.0955, -0.0062],\n",
       "                       [ 0.0372,  0.1201,  0.0825,  ..., -0.0039,  0.1246,  0.0176],\n",
       "                       ...,\n",
       "                       [ 0.1045,  0.1159, -0.0050,  ...,  0.0672, -0.0926,  0.1473],\n",
       "                       [ 0.0901, -0.0983, -0.0334,  ...,  0.0587, -0.0754,  0.1295],\n",
       "                       [ 0.0753,  0.0979, -0.1463,  ...,  0.0036,  0.0987,  0.0829]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.5.gate1.Wg.weight',\n",
       "               tensor([[ 0.0941, -0.1059,  0.1010,  ...,  0.0102,  0.1414,  0.1432],\n",
       "                       [ 0.1454, -0.0099,  0.1350,  ...,  0.0672, -0.0198,  0.0701],\n",
       "                       [ 0.0371,  0.0739,  0.0090,  ..., -0.1480, -0.0412,  0.0644],\n",
       "                       ...,\n",
       "                       [ 0.0663,  0.0557, -0.0962,  ..., -0.1524, -0.0577,  0.0535],\n",
       "                       [-0.1142, -0.0557, -0.1332,  ..., -0.1036,  0.1214, -0.0234],\n",
       "                       [ 0.0023, -0.1370, -0.1108,  ...,  0.0066, -0.0408, -0.1005]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.5.gate1.Ug.weight',\n",
       "               tensor([[ 0.0489,  0.1284, -0.0225,  ..., -0.0055, -0.1066, -0.1016],\n",
       "                       [ 0.0346, -0.0161, -0.1086,  ..., -0.1318,  0.0625, -0.1388],\n",
       "                       [-0.1563, -0.0648, -0.0750,  ...,  0.0999,  0.0630,  0.1318],\n",
       "                       ...,\n",
       "                       [ 0.0414, -0.0885, -0.0399,  ..., -0.0901,  0.1230,  0.1268],\n",
       "                       [-0.0932,  0.0636,  0.0759,  ..., -0.1210,  0.0478,  0.0373],\n",
       "                       [-0.0618, -0.0637,  0.0326,  ..., -0.0730,  0.0734,  0.0622]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.5.gate2.bg',\n",
       "               tensor([ 4.6709e-03, -6.2125e-03,  1.9592e-03,  8.7896e-03,  3.1357e-03,\n",
       "                        3.1898e-03,  7.2758e-03,  5.4281e-03,  1.5459e-03, -4.4880e-03,\n",
       "                        2.5875e-03,  2.6827e-03, -6.4121e-03,  4.0377e-03,  4.8860e-04,\n",
       "                       -3.9217e-03,  3.8387e-03, -8.0967e-03,  1.7306e-02, -6.1466e-03,\n",
       "                       -1.5678e-02, -4.6885e-03, -7.5064e-04, -1.2062e-04, -7.7564e-03,\n",
       "                       -6.3882e-03,  1.9632e-02, -6.1260e-04,  2.9214e-03,  2.6285e-03,\n",
       "                        8.2528e-03,  3.2875e-04, -2.9928e-03, -1.5632e-02,  8.5305e-04,\n",
       "                       -1.4334e-03,  6.0866e-03,  8.1240e-03, -4.4175e-04, -8.1799e-03,\n",
       "                        7.6321e-03, -3.0272e-03,  5.5748e-03, -1.3228e-03,  9.0181e-04,\n",
       "                       -1.6072e-03,  2.1445e-03,  1.5568e-03,  4.3768e-03,  4.2611e-03,\n",
       "                       -4.7819e-03, -2.3160e-03,  5.7179e-03,  3.9956e-03, -2.3647e-03,\n",
       "                       -1.1212e-02,  4.9210e-03, -1.7846e-02, -1.1198e-02,  1.0581e-03,\n",
       "                       -9.9802e-04, -2.9232e-03, -6.6062e-03, -1.6169e-03,  2.4532e-03,\n",
       "                        3.9500e-04,  2.6040e-03,  1.4345e-02,  6.4249e-03,  2.4980e-03,\n",
       "                        1.0102e-02, -1.2995e-03,  2.9179e-03,  7.3751e-03,  1.1090e-02,\n",
       "                        1.6689e-03, -9.4244e-03, -1.0275e-02, -1.2539e-03,  8.2128e-03,\n",
       "                        1.5733e-02, -6.4230e-03, -8.9935e-03, -6.0270e-03, -1.3298e-02,\n",
       "                       -8.8374e-03,  1.0219e-02,  2.5685e-03, -4.7143e-03,  6.2284e-03,\n",
       "                        1.9203e-03,  1.0717e-04, -1.6038e-03, -2.5916e-03, -3.9359e-04,\n",
       "                        2.1558e-03,  1.2863e-02, -3.2584e-03, -1.0212e-03,  5.5409e-03,\n",
       "                       -1.1238e-02, -5.7745e-04, -5.6274e-03,  1.1544e-03, -1.9462e-03,\n",
       "                       -6.1220e-03,  1.6088e-02, -4.6690e-03,  4.0315e-03, -1.2279e-02,\n",
       "                        6.6815e-03,  5.4752e-03, -7.6163e-03,  5.7112e-03,  8.1126e-03,\n",
       "                       -5.3892e-03,  1.0468e-04, -2.1560e-03, -9.1640e-03, -7.3456e-03,\n",
       "                       -7.1864e-03, -2.1400e-03, -7.7917e-03,  2.5807e-03,  8.9514e-05,\n",
       "                       -8.2589e-04, -2.6100e-03, -3.8813e-03], device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.5.gate2.Wr.weight',\n",
       "               tensor([[-0.0404,  0.0642,  0.1124,  ..., -0.0255,  0.0623, -0.0938],\n",
       "                       [ 0.1090,  0.0490, -0.0925,  ..., -0.1024,  0.1389, -0.0254],\n",
       "                       [ 0.1212,  0.0980, -0.0869,  ...,  0.1282,  0.0779,  0.0151],\n",
       "                       ...,\n",
       "                       [ 0.0073,  0.1230, -0.0659,  ..., -0.1185,  0.0340, -0.0171],\n",
       "                       [-0.0332,  0.1205,  0.0835,  ..., -0.0671,  0.0259, -0.0612],\n",
       "                       [-0.1076, -0.0817, -0.0518,  ..., -0.1384,  0.1429,  0.0152]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.5.gate2.Ur.weight',\n",
       "               tensor([[ 0.1490, -0.0844, -0.0513,  ..., -0.0092,  0.1044, -0.0615],\n",
       "                       [ 0.1182, -0.1325, -0.1319,  ..., -0.1083,  0.0413, -0.1035],\n",
       "                       [ 0.0381, -0.0400,  0.0804,  ..., -0.0744, -0.0625, -0.0878],\n",
       "                       ...,\n",
       "                       [ 0.0526, -0.1020,  0.0144,  ..., -0.1151, -0.0277, -0.0599],\n",
       "                       [-0.0041,  0.0616,  0.1026,  ...,  0.0935,  0.0540,  0.0145],\n",
       "                       [-0.0706, -0.0211, -0.0987,  ..., -0.1471,  0.0640,  0.1515]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.5.gate2.Wz.weight',\n",
       "               tensor([[-0.0746, -0.1345, -0.0326,  ..., -0.1132, -0.1143, -0.1084],\n",
       "                       [-0.0544, -0.0003,  0.0424,  ...,  0.0310, -0.0087, -0.1364],\n",
       "                       [ 0.0385,  0.0479, -0.0397,  ..., -0.0026,  0.0853,  0.1250],\n",
       "                       ...,\n",
       "                       [ 0.0811,  0.0783,  0.1017,  ...,  0.1125,  0.0295,  0.0957],\n",
       "                       [-0.0042, -0.0054, -0.1428,  ...,  0.1163, -0.0237, -0.0057],\n",
       "                       [-0.1652, -0.0692,  0.0195,  ..., -0.1324,  0.0082,  0.1434]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.5.gate2.Uz.weight',\n",
       "               tensor([[ 0.1182,  0.0049,  0.0459,  ..., -0.1476, -0.0508, -0.0716],\n",
       "                       [-0.0018, -0.0697,  0.0338,  ...,  0.0243,  0.0533, -0.0389],\n",
       "                       [-0.0073,  0.1479, -0.1124,  ...,  0.1405, -0.0927, -0.1365],\n",
       "                       ...,\n",
       "                       [-0.1149, -0.1423,  0.0445,  ...,  0.0401, -0.0719, -0.1148],\n",
       "                       [ 0.1304, -0.0259, -0.1301,  ...,  0.0721,  0.0250, -0.0894],\n",
       "                       [ 0.0393,  0.0431,  0.0339,  ..., -0.0268, -0.0922,  0.1223]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.5.gate2.Wg.weight',\n",
       "               tensor([[-0.0733,  0.1434, -0.0259,  ..., -0.0542,  0.0332,  0.1267],\n",
       "                       [-0.0785,  0.1198, -0.0753,  ..., -0.1086, -0.1456, -0.1452],\n",
       "                       [ 0.0918, -0.0086, -0.1593,  ..., -0.0616,  0.0641, -0.0925],\n",
       "                       ...,\n",
       "                       [-0.1291,  0.0762,  0.1278,  ...,  0.1132,  0.1086,  0.0467],\n",
       "                       [-0.0010, -0.0101, -0.0084,  ..., -0.0982, -0.0112,  0.1384],\n",
       "                       [-0.1497,  0.0506,  0.1409,  ...,  0.0672,  0.0067,  0.0527]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.5.gate2.Ug.weight',\n",
       "               tensor([[ 0.1309, -0.0567,  0.0277,  ...,  0.1217,  0.0600,  0.1551],\n",
       "                       [-0.1181, -0.1449, -0.0501,  ...,  0.0062,  0.0138, -0.1186],\n",
       "                       [ 0.1071, -0.0617, -0.0862,  ...,  0.1492, -0.0986,  0.1137],\n",
       "                       ...,\n",
       "                       [-0.0533,  0.1077,  0.0541,  ...,  0.0055, -0.1397, -0.0972],\n",
       "                       [ 0.0443, -0.0457, -0.0017,  ..., -0.1245,  0.1245, -0.0352],\n",
       "                       [-0.0412,  0.0154,  0.0918,  ..., -0.0699, -0.0660,  0.1437]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.5.norm1.weight',\n",
       "               tensor([1.0031, 1.0053, 0.9918, 1.0395, 1.0125, 1.0048, 1.0285, 0.9990, 0.9858,\n",
       "                       0.9801, 1.0021, 0.9933, 1.0037, 1.0186, 1.0021, 1.0147, 1.0058, 0.9805,\n",
       "                       0.9934, 0.9958, 0.9939, 0.9669, 1.0242, 0.9833, 1.0114, 1.0173, 1.0135,\n",
       "                       1.0012, 1.0343, 1.0066, 1.0013, 1.0018, 1.0195, 1.0238, 1.0528, 0.9908,\n",
       "                       0.9911, 1.0331, 1.0360, 1.0065, 1.0235, 1.0004, 1.0103, 1.0363, 1.0072,\n",
       "                       1.0133, 1.0237, 1.0361, 0.9944, 0.9952, 1.0085, 1.0363, 1.0283, 1.0233,\n",
       "                       0.9902, 1.0144, 0.9983, 0.9969, 1.0067, 1.0168, 1.0214, 0.9921, 1.0309,\n",
       "                       1.0074, 0.9953, 0.9628, 1.0091, 1.0273, 0.9909, 1.0286, 1.0164, 1.0005,\n",
       "                       1.0301, 1.0124, 1.0382, 1.0016, 1.0031, 0.9862, 0.9799, 1.0294, 0.9704,\n",
       "                       1.0356, 1.0241, 1.0316, 1.0226, 1.0109, 1.0071, 1.0309, 1.0067, 1.0192,\n",
       "                       1.0388, 1.0361, 1.0180, 1.0048, 1.0383, 1.0225, 1.0340, 1.0144, 0.9919,\n",
       "                       1.0124, 1.0086, 1.0099, 1.0047, 1.0449, 1.0349, 1.0197, 1.0349, 1.0397,\n",
       "                       1.0192, 1.0269, 0.9851, 0.9720, 1.0326, 1.0100, 1.0222, 1.0411, 1.0265,\n",
       "                       1.0220, 0.9842, 0.9962, 0.9931, 1.0282, 1.0416, 0.9955, 1.0290, 1.0202,\n",
       "                       1.0327, 1.0097], device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.5.norm1.bias',\n",
       "               tensor([-0.0293,  0.0392,  0.0065,  0.0389,  0.0173, -0.0233, -0.0404, -0.0196,\n",
       "                       -0.0355, -0.0308, -0.0165, -0.0239,  0.0130, -0.0274,  0.0364, -0.0273,\n",
       "                        0.0158,  0.0068, -0.0010, -0.0243,  0.0137,  0.0195,  0.0085, -0.0368,\n",
       "                        0.0334, -0.0182, -0.0284,  0.0208, -0.0341,  0.0274, -0.0314,  0.0440,\n",
       "                        0.0181, -0.0369,  0.0601, -0.0007, -0.0602, -0.0420, -0.0259,  0.0115,\n",
       "                       -0.0382, -0.0263, -0.0415, -0.0457, -0.0217, -0.0327,  0.0395, -0.0432,\n",
       "                        0.0224,  0.0057, -0.0409, -0.0492, -0.0293,  0.0407,  0.0209, -0.0184,\n",
       "                       -0.0083, -0.0216,  0.0217,  0.0241, -0.0150,  0.0345, -0.0204, -0.0315,\n",
       "                        0.0285,  0.0001,  0.0314, -0.0435, -0.0148,  0.0426,  0.0372, -0.0201,\n",
       "                        0.0172,  0.0215, -0.0416,  0.0183, -0.0043,  0.0101, -0.0130,  0.0264,\n",
       "                       -0.0417,  0.0507, -0.0374,  0.0354,  0.0394, -0.0303,  0.0505, -0.0265,\n",
       "                       -0.0293,  0.0173,  0.0370,  0.0286, -0.0351, -0.0101,  0.0360,  0.0330,\n",
       "                       -0.0289, -0.0204,  0.0281,  0.0214, -0.0146, -0.0167,  0.0045,  0.0319,\n",
       "                        0.0328, -0.0420,  0.0307, -0.0065, -0.0188,  0.0274, -0.0148,  0.0336,\n",
       "                        0.0469, -0.0085, -0.0274,  0.0226, -0.0432,  0.0244, -0.0194, -0.0079,\n",
       "                       -0.0111, -0.0339,  0.0498,  0.0319, -0.0371,  0.0341, -0.0394, -0.0246],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.5.norm2.weight',\n",
       "               tensor([0.9955, 0.9932, 1.0076, 1.0026, 0.9966, 0.9971, 1.0013, 0.9833, 0.9893,\n",
       "                       0.9988, 0.9971, 1.0036, 0.9954, 0.9946, 0.9949, 0.9912, 0.9975, 0.9943,\n",
       "                       0.9950, 0.9969, 0.9886, 0.9940, 0.9995, 0.9948, 0.9966, 0.9998, 1.0209,\n",
       "                       0.9904, 1.0012, 1.0040, 1.0122, 0.9933, 0.9976, 0.9908, 0.9980, 0.9973,\n",
       "                       0.9754, 1.0103, 1.0019, 0.9947, 0.9844, 1.0180, 0.9899, 1.0003, 0.9953,\n",
       "                       0.9788, 1.0000, 0.9954, 1.0026, 1.0081, 0.9984, 0.9955, 0.9878, 1.0007,\n",
       "                       0.9938, 0.9987, 0.9886, 0.9864, 0.9953, 0.9990, 0.9926, 1.0044, 1.0048,\n",
       "                       0.9995, 1.0009, 1.0045, 1.0033, 0.9946, 0.9871, 0.9973, 1.0033, 0.9983,\n",
       "                       0.9859, 0.9990, 0.9979, 0.9882, 0.9870, 1.0063, 0.9944, 0.9834, 0.9942,\n",
       "                       0.9974, 0.9993, 0.9911, 0.9963, 0.9899, 1.0010, 1.0056, 0.9971, 0.9958,\n",
       "                       0.9937, 1.0055, 1.0011, 1.0035, 1.0055, 0.9950, 0.9950, 1.0069, 0.9978,\n",
       "                       0.9910, 0.9800, 1.0043, 1.0034, 0.9886, 0.9957, 0.9933, 0.9994, 0.9871,\n",
       "                       0.9969, 0.9964, 0.9996, 0.9956, 0.9810, 0.9957, 0.9924, 0.9945, 0.9904,\n",
       "                       1.0110, 0.9946, 0.9853, 0.9993, 0.9903, 1.0017, 1.0013, 1.0180, 1.0062,\n",
       "                       0.9954, 0.9930], device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.5.norm2.bias',\n",
       "               tensor([ 3.1437e-03, -4.1689e-04, -5.1381e-04, -6.3238e-04,  1.9196e-03,\n",
       "                       -2.6171e-03, -3.9095e-03, -1.3271e-04,  1.3218e-05,  1.0610e-03,\n",
       "                        2.0340e-03,  1.1137e-03,  6.6507e-03,  5.9844e-03,  9.9840e-03,\n",
       "                        2.4350e-06, -5.1370e-03, -2.4123e-03, -2.6945e-05,  1.3372e-03,\n",
       "                        3.3341e-03,  2.5283e-03,  3.9613e-04, -4.6409e-04, -4.1712e-03,\n",
       "                       -2.6408e-03,  1.1000e-04,  2.1050e-03, -2.7946e-03, -2.5527e-03,\n",
       "                       -3.4098e-04, -6.5147e-03, -2.5398e-03,  7.7844e-03,  6.1389e-04,\n",
       "                        9.4440e-03,  1.7415e-02,  2.5635e-03, -3.6804e-03, -4.0154e-03,\n",
       "                        4.2370e-03,  8.7096e-03,  3.3153e-03, -5.4806e-03, -5.4905e-03,\n",
       "                       -3.4786e-04,  3.1132e-03, -1.5845e-03,  7.9015e-03,  2.8918e-03,\n",
       "                       -5.7707e-04,  1.1984e-03,  5.4496e-03,  4.5892e-03,  2.2312e-03,\n",
       "                        1.5060e-03,  5.4203e-03, -7.9760e-03, -8.0466e-04, -1.5863e-03,\n",
       "                       -3.4617e-03,  2.3745e-04, -8.3287e-03, -1.4050e-03, -1.6725e-03,\n",
       "                        5.4789e-03,  2.6092e-03,  2.3684e-03, -1.0861e-03, -4.4503e-03,\n",
       "                        6.8580e-03,  1.4321e-03, -5.3142e-03,  8.5422e-03,  8.2181e-03,\n",
       "                        1.7677e-03, -1.1757e-03,  4.9513e-03, -3.6211e-03,  4.7417e-03,\n",
       "                        2.6430e-03, -2.2582e-03, -4.5951e-03, -2.6939e-03,  6.5160e-05,\n",
       "                        5.6076e-03, -2.8682e-03,  6.3918e-03,  2.3257e-03, -5.8791e-03,\n",
       "                        1.8723e-03, -2.3391e-03,  3.5908e-03, -1.6303e-03,  8.0835e-03,\n",
       "                       -1.0346e-03,  2.7631e-03, -3.5541e-03, -1.3039e-02, -3.3075e-03,\n",
       "                        9.4539e-03, -6.1807e-03,  9.6886e-04, -2.5699e-03,  2.9518e-03,\n",
       "                        4.4971e-04,  2.5703e-03, -1.6105e-03, -4.6097e-03,  2.5297e-03,\n",
       "                        6.2948e-03, -6.2414e-03, -9.0489e-04,  4.1386e-03, -3.7429e-03,\n",
       "                        3.0397e-03, -7.4478e-04, -5.1249e-03,  1.5927e-03, -1.7091e-03,\n",
       "                        3.2448e-03,  4.8176e-03,  1.1746e-02,  4.5559e-04, -1.5989e-03,\n",
       "                        4.7647e-03,  6.3587e-04, -5.0034e-03], device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.5.norm_kv.weight',\n",
       "               tensor([1.0222, 0.9852, 1.0199, 1.0193, 1.0141, 1.0358, 1.0151, 0.9955, 1.0035,\n",
       "                       1.0098, 1.0002, 0.9933, 1.0216, 1.0186, 1.0082, 0.9922, 0.9996, 0.9961,\n",
       "                       0.9874, 0.9866, 0.9887, 0.9937, 1.0050, 1.0002, 0.9945, 0.9997, 0.9878,\n",
       "                       0.9953, 1.0012, 0.9955, 0.9900, 0.9881, 0.9955, 0.9959, 0.9920, 0.9904,\n",
       "                       0.9860, 1.0237, 1.0038, 1.0102, 0.9985, 0.9990, 1.0146, 0.9906, 0.9921,\n",
       "                       1.0001, 1.0025, 0.9923, 1.0010, 0.9945, 0.9949, 0.9883, 0.9963, 1.0182,\n",
       "                       1.0055, 1.0096, 0.9956, 1.0049, 1.0277, 1.0074, 0.9950, 0.9993, 0.9906,\n",
       "                       1.0079, 1.0074, 1.0091, 0.9893, 0.9986, 1.0127, 1.0357, 1.0172, 0.9920,\n",
       "                       1.0063, 1.0043, 0.9929, 1.0158, 1.0152, 1.0118, 1.0130, 1.0102, 1.0065,\n",
       "                       1.0298, 1.0156, 0.9912, 0.9980, 0.9876, 0.9925, 0.9894, 0.9906, 1.0049,\n",
       "                       1.0189, 0.9942, 0.9976, 1.0017, 0.9945, 1.0020, 1.0047, 1.0025, 0.9898,\n",
       "                       0.9866, 0.9917, 0.9977, 0.9897, 0.9858, 0.9739, 1.0231, 1.0037, 0.9957,\n",
       "                       1.0039, 0.9863, 0.9747, 1.0000, 0.9853, 0.9980, 1.0053, 0.9759, 0.9904,\n",
       "                       0.9927, 0.9930, 0.9955, 0.9893, 1.0076, 0.9937, 0.9861, 0.9913, 0.9869,\n",
       "                       1.0039, 0.9906], device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.5.norm_kv.bias',\n",
       "               tensor([ 3.2842e-03, -1.6877e-04,  3.0127e-03,  1.3273e-03,  3.8934e-03,\n",
       "                        1.3639e-03,  5.7989e-03,  2.8287e-03,  6.7597e-03,  4.1322e-03,\n",
       "                        2.8917e-03,  8.5719e-03,  1.9052e-03, -5.6959e-04,  2.5390e-03,\n",
       "                        7.9879e-03, -6.3273e-03, -4.2163e-03,  8.1311e-03,  9.3529e-03,\n",
       "                        1.0228e-02,  3.8040e-03,  1.5701e-03,  3.0682e-03, -1.0625e-02,\n",
       "                        7.0559e-04, -8.5865e-03, -6.1183e-03, -5.5669e-03, -7.1436e-03,\n",
       "                       -2.8210e-03, -7.3016e-03, -6.7540e-03, -2.6155e-03, -1.3024e-02,\n",
       "                       -6.3782e-03, -4.6422e-03, -6.3413e-03,  5.6020e-03, -1.9792e-03,\n",
       "                        4.8419e-03,  1.9271e-04,  1.3101e-03,  7.4006e-03,  9.2811e-03,\n",
       "                        8.3750e-04, -2.7772e-03,  4.8327e-03,  4.8587e-03,  6.8591e-03,\n",
       "                        4.1273e-03,  8.0158e-03,  4.9864e-03, -4.5631e-03,  1.2608e-02,\n",
       "                        4.3725e-03,  4.8741e-03,  1.1365e-03, -1.3327e-03, -5.4384e-05,\n",
       "                        1.9679e-03, -8.9729e-04,  7.1787e-03,  8.9047e-03, -3.9201e-03,\n",
       "                       -4.3394e-03, -9.0100e-05,  5.2626e-03,  8.3959e-03, -5.0656e-04,\n",
       "                       -5.9821e-03,  1.1893e-02,  2.0882e-03,  8.5200e-04,  7.6032e-03,\n",
       "                        2.8182e-03,  6.2333e-03, -2.1970e-03,  1.9232e-03,  3.0590e-03,\n",
       "                        5.1665e-03,  9.4903e-05, -4.3628e-03, -6.3964e-03, -1.1907e-03,\n",
       "                        1.0050e-02,  6.6337e-03,  8.9057e-03,  5.4186e-03,  2.4871e-03,\n",
       "                        3.8597e-03,  3.3636e-03,  6.4045e-03,  5.7774e-03, -1.0580e-02,\n",
       "                       -1.7998e-03, -1.8514e-06,  1.4720e-03, -6.2166e-03, -7.5671e-03,\n",
       "                       -7.0645e-03, -3.8801e-03, -8.0856e-03, -1.3059e-02, -2.2154e-02,\n",
       "                        3.3225e-03, -5.3972e-03, -5.5558e-03, -1.0468e-03, -1.0071e-02,\n",
       "                       -1.9011e-02, -8.4905e-04, -1.0763e-02, -1.8940e-03, -2.5080e-04,\n",
       "                       -2.1397e-02, -1.2076e-02, -4.3533e-03, -6.1720e-03, -3.6285e-03,\n",
       "                       -1.2997e-02, -1.6478e-03, -4.0548e-03, -1.2004e-02, -1.0391e-02,\n",
       "                       -1.0617e-02, -3.9551e-03, -6.5573e-03], device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.5.fc.0.weight',\n",
       "               tensor([[-0.0302, -0.0040, -0.0923,  ...,  0.0096,  0.0130, -0.0617],\n",
       "                       [ 0.0025,  0.0620, -0.0297,  ..., -0.0519, -0.0130,  0.0773],\n",
       "                       [-0.0263,  0.0581,  0.0493,  ...,  0.0216, -0.0713, -0.0523],\n",
       "                       ...,\n",
       "                       [ 0.0176, -0.0155,  0.0315,  ...,  0.0273,  0.0780,  0.0095],\n",
       "                       [ 0.0385,  0.0053,  0.0820,  ...,  0.0079,  0.0279, -0.0450],\n",
       "                       [-0.0724,  0.0218,  0.0674,  ...,  0.0754, -0.0587, -0.0405]],\n",
       "                      device='cuda:0')),\n",
       "              ('transformer.transformer_blocks.5.fc.0.bias',\n",
       "               tensor([ 0.0383,  0.0599,  0.0369,  0.0009, -0.0385, -0.0390, -0.0149, -0.0305,\n",
       "                        0.0369,  0.0296,  0.0619, -0.0551, -0.0313, -0.0770, -0.0587,  0.0201,\n",
       "                        0.0291,  0.0735,  0.0588, -0.0411,  0.0448, -0.0384,  0.0587, -0.0516,\n",
       "                       -0.0379,  0.0253,  0.0028,  0.0681, -0.0003,  0.0470, -0.0877,  0.0687,\n",
       "                        0.0293,  0.0620, -0.0169,  0.0117,  0.0090,  0.0615,  0.0704, -0.0108,\n",
       "                        0.0508, -0.0468,  0.0829, -0.0435, -0.0837, -0.0090,  0.0568,  0.0452,\n",
       "                        0.0283,  0.0737, -0.0657, -0.0853, -0.0740,  0.0178,  0.0587,  0.0185,\n",
       "                        0.0071,  0.0610, -0.0231,  0.0661,  0.0399, -0.0235,  0.0118, -0.0138,\n",
       "                        0.0103, -0.0729,  0.0352,  0.0623, -0.0110, -0.0177,  0.0399, -0.0029,\n",
       "                       -0.0452,  0.0683,  0.0189,  0.0617,  0.0054, -0.0479,  0.0770,  0.0095,\n",
       "                        0.0011,  0.0288, -0.0941, -0.0434,  0.0049,  0.0468, -0.0620,  0.0190,\n",
       "                       -0.0605,  0.0289, -0.0854, -0.0449, -0.0035, -0.0024, -0.0120, -0.0541,\n",
       "                       -0.0295, -0.0342,  0.0058,  0.0442,  0.0022, -0.0115, -0.0675,  0.0732,\n",
       "                       -0.0375, -0.0565, -0.0191,  0.0494, -0.0255, -0.0510,  0.0335,  0.0497,\n",
       "                        0.0899,  0.0745, -0.0077,  0.0526, -0.0211, -0.0244,  0.0580, -0.1025,\n",
       "                       -0.0465, -0.0958,  0.0195,  0.0300,  0.0368, -0.0844, -0.0761,  0.0029],\n",
       "                      device='cuda:0')),\n",
       "              ('lin_policy.weight',\n",
       "               tensor([[-0.2676, -0.0271, -0.1540,  ...,  0.0733,  0.0511, -0.1259],\n",
       "                       [ 0.2052, -0.0343,  0.1441,  ..., -0.0321,  0.1480, -0.0595],\n",
       "                       [ 0.1471,  0.0486,  0.2948,  ...,  0.2022,  0.0257,  0.1122],\n",
       "                       ...,\n",
       "                       [-0.0650,  0.0273,  0.0860,  ...,  0.1511, -0.0538,  0.1119],\n",
       "                       [ 0.0276, -0.1587, -0.2223,  ...,  0.0989, -0.0594, -0.1140],\n",
       "                       [-0.0363,  0.0440,  0.0189,  ..., -0.1906, -0.0296, -0.1208]],\n",
       "                      device='cuda:0')),\n",
       "              ('lin_policy.bias',\n",
       "               tensor([ 0.0005,  0.0260,  0.0195,  0.0887, -0.0630,  0.0358, -0.0059,  0.0928,\n",
       "                       -0.0561,  0.0123,  0.0105, -0.0668,  0.0789, -0.0855,  0.0783,  0.0239,\n",
       "                        0.0764, -0.0003,  0.0150, -0.0512, -0.0580,  0.0096,  0.0111, -0.0718,\n",
       "                        0.0094,  0.0071, -0.0193,  0.0600, -0.0647, -0.0390, -0.0731,  0.0049,\n",
       "                       -0.0634, -0.0583, -0.0785, -0.0161,  0.0760,  0.0087, -0.0555,  0.0488,\n",
       "                        0.0198, -0.0957,  0.0318, -0.0172, -0.0081,  0.0451, -0.0479,  0.0388,\n",
       "                        0.0575, -0.0072, -0.0134, -0.0422, -0.0351,  0.0312,  0.0790, -0.0263,\n",
       "                       -0.0687,  0.0725,  0.0962,  0.0655, -0.0749,  0.0480,  0.0611,  0.0868,\n",
       "                        0.0797,  0.0804, -0.0215, -0.1018, -0.0852,  0.0400, -0.0277, -0.0531,\n",
       "                       -0.0424, -0.0211,  0.0453,  0.0155, -0.0486,  0.0290,  0.0792, -0.1003,\n",
       "                       -0.0731,  0.0314,  0.0878, -0.0205,  0.0473, -0.0554, -0.0326, -0.0731,\n",
       "                        0.0178, -0.0392,  0.0888, -0.0377, -0.0523, -0.0354,  0.0947, -0.0107,\n",
       "                        0.0085,  0.0051, -0.0150, -0.0491, -0.0613, -0.0264,  0.0414,  0.0786,\n",
       "                        0.0665,  0.0468, -0.0503, -0.0424,  0.0547,  0.0630, -0.0181, -0.0781,\n",
       "                       -0.0078, -0.0823,  0.0813, -0.0709,  0.0667,  0.0580,  0.0163,  0.0009,\n",
       "                       -0.0590,  0.0352,  0.0256,  0.0237, -0.0111, -0.0271, -0.0097, -0.0244],\n",
       "                      device='cuda:0')),\n",
       "              ('lin_value.weight',\n",
       "               tensor([[ 0.0747, -0.1865,  0.2131,  ..., -0.0775,  0.1820, -0.2123],\n",
       "                       [ 0.0756, -0.0621, -0.0052,  ...,  0.0108, -0.1173, -0.0041],\n",
       "                       [ 0.0352, -0.0354,  0.0151,  ...,  0.0165, -0.1258,  0.0892],\n",
       "                       ...,\n",
       "                       [ 0.0740, -0.1941, -0.0007,  ...,  0.0358,  0.1134,  0.1069],\n",
       "                       [-0.0556,  0.0148, -0.2871,  ...,  0.1920, -0.0765, -0.0401],\n",
       "                       [-0.0385, -0.1541,  0.0939,  ...,  0.0426,  0.0911,  0.1576]],\n",
       "                      device='cuda:0')),\n",
       "              ('lin_value.bias',\n",
       "               tensor([-0.0115, -0.0699, -0.0711,  0.0535, -0.0249,  0.0227,  0.0796,  0.0768,\n",
       "                        0.0218, -0.0699,  0.0604, -0.0573, -0.0826, -0.0299,  0.0533, -0.0263,\n",
       "                        0.0494,  0.0263, -0.0560,  0.0458,  0.0756, -0.0167,  0.0423, -0.0120,\n",
       "                       -0.0597, -0.0466,  0.0273,  0.0216, -0.0854,  0.0775,  0.0614, -0.0125,\n",
       "                       -0.0369,  0.0745, -0.0805,  0.0449,  0.0121,  0.0345, -0.0972,  0.0270,\n",
       "                       -0.0399, -0.0367,  0.0103, -0.0406,  0.0322,  0.0793, -0.0196,  0.0454,\n",
       "                        0.0367, -0.0713, -0.0020,  0.0818,  0.0137, -0.0791, -0.0068, -0.0975,\n",
       "                        0.0773,  0.0419, -0.0259, -0.0292, -0.0053,  0.0250,  0.0127, -0.0879,\n",
       "                       -0.0414,  0.0621,  0.0095,  0.0651, -0.0501,  0.0001, -0.0568,  0.0340,\n",
       "                        0.0125,  0.0597, -0.0988,  0.0070,  0.0176, -0.0560, -0.0877, -0.0609,\n",
       "                       -0.0374,  0.0513, -0.0764, -0.0327, -0.0211,  0.0537, -0.0766,  0.0310,\n",
       "                       -0.0819,  0.0349,  0.0641, -0.0557,  0.0645,  0.0383, -0.0819,  0.0119,\n",
       "                        0.0177, -0.0155, -0.0460,  0.0229, -0.0884, -0.0909,  0.0590,  0.0392,\n",
       "                        0.0934, -0.0786, -0.0685, -0.0888, -0.0850,  0.0238, -0.0009, -0.0296,\n",
       "                       -0.0339, -0.0012,  0.0855, -0.0689,  0.0775, -0.0151,  0.0698,  0.0024,\n",
       "                        0.0494,  0.0351, -0.0714, -0.0619,  0.0413, -0.0791, -0.0805, -0.0551],\n",
       "                      device='cuda:0')),\n",
       "              ('policy_branches.0.weight',\n",
       "               tensor([[-1.2483e-02,  8.0601e-03, -1.8599e-02, -2.4001e-02,  2.3501e-02,\n",
       "                         1.7062e-02,  2.3761e-02, -6.0259e-02, -2.1350e-02,  3.1046e-02,\n",
       "                        -3.0359e-02, -7.9488e-03,  2.0309e-03,  2.5402e-02, -1.0354e-02,\n",
       "                        -2.7165e-02, -3.6012e-02, -1.7566e-02,  5.1990e-02, -3.0358e-02,\n",
       "                         3.1770e-02, -4.5709e-02, -9.8847e-03,  4.2143e-03,  4.9367e-03,\n",
       "                        -2.2392e-02, -6.2891e-02, -1.0360e-02, -2.4987e-02, -3.3655e-02,\n",
       "                        -1.9202e-02,  2.4741e-02, -8.7804e-03, -6.5093e-02,  2.3453e-03,\n",
       "                         3.8014e-03, -3.9344e-03,  1.8427e-03,  1.3544e-02, -2.5982e-02,\n",
       "                        -1.8652e-02,  9.4656e-03, -3.2124e-02, -3.0923e-02,  1.4412e-02,\n",
       "                        -1.9591e-02, -1.5073e-02,  1.0775e-02,  2.7708e-02, -6.4025e-02,\n",
       "                         1.1055e-02, -3.1040e-02,  1.5660e-02, -1.9266e-02, -1.2370e-02,\n",
       "                        -1.3195e-02, -2.6217e-03,  3.1367e-03,  5.3648e-03,  2.1386e-02,\n",
       "                         2.9575e-02, -8.4986e-03, -2.0232e-02,  3.9345e-02, -2.2913e-02,\n",
       "                        -1.0350e-02,  3.1183e-02,  4.4371e-03, -8.4277e-03,  2.2464e-02,\n",
       "                        -1.1926e-02, -1.8500e-03, -2.4128e-03, -8.1020e-03,  2.6592e-02,\n",
       "                        -1.9387e-02,  2.1135e-02, -1.1017e-02,  1.8080e-02,  3.2057e-02,\n",
       "                         6.0469e-05,  5.4301e-04, -1.5958e-02, -1.7099e-02,  1.6040e-02,\n",
       "                         1.4396e-02,  2.3759e-02,  1.9423e-02, -2.0469e-02,  2.4916e-02,\n",
       "                        -1.2147e-02, -2.5694e-02, -4.2425e-02, -1.2691e-02, -8.3873e-04,\n",
       "                         1.6468e-02, -1.6539e-02,  2.3989e-02,  1.5796e-02, -3.8176e-02,\n",
       "                        -1.1851e-02, -1.5775e-03, -1.5571e-02, -2.2076e-02, -1.3350e-02,\n",
       "                        -3.0076e-02,  7.3596e-03, -5.6187e-03,  1.4849e-02, -9.1767e-03,\n",
       "                        -2.2112e-02,  1.8840e-02,  3.0908e-03, -3.9391e-02, -1.3618e-02,\n",
       "                         2.3126e-02,  3.0325e-02,  2.3071e-02, -1.7172e-03, -1.7758e-02,\n",
       "                        -1.5481e-02,  2.9453e-02, -4.5826e-03, -1.6876e-02, -5.6564e-02,\n",
       "                        -1.5725e-02,  7.7217e-03, -2.4141e-02],\n",
       "                       [-1.7975e-02, -1.7986e-02, -2.4509e-02, -4.1108e-03, -4.0130e-02,\n",
       "                         3.6122e-02, -3.9946e-02,  4.3195e-02, -1.2649e-02, -1.6197e-02,\n",
       "                         6.1818e-03,  8.3321e-03, -2.1251e-02, -1.4934e-02, -2.6758e-02,\n",
       "                        -5.9855e-03, -1.5071e-03, -2.5957e-02, -3.5461e-02, -1.3890e-02,\n",
       "                         7.8690e-03, -3.1151e-02, -2.4876e-02, -8.6993e-03, -1.5648e-02,\n",
       "                        -1.1284e-02, -9.5086e-03, -1.1514e-02, -5.5910e-03, -2.1973e-02,\n",
       "                         2.7951e-03, -1.5546e-02,  9.5070e-03, -1.8879e-02, -1.4486e-02,\n",
       "                        -4.3558e-02, -2.2989e-02,  1.5387e-03, -1.5663e-02, -4.0795e-03,\n",
       "                        -1.8537e-02, -2.3019e-02, -2.4046e-02, -9.5306e-04, -1.5245e-02,\n",
       "                         8.2275e-03,  3.2148e-02, -3.7601e-02, -2.0701e-02,  2.7035e-02,\n",
       "                        -7.0323e-03, -1.8086e-02, -7.5599e-03,  2.6061e-03, -8.7298e-03,\n",
       "                        -1.2211e-02, -2.1027e-02, -1.7767e-02, -2.5371e-02, -2.5015e-02,\n",
       "                        -7.2633e-04,  1.1192e-02,  3.7980e-02,  1.4927e-02,  3.1889e-04,\n",
       "                        -2.6313e-03, -1.3209e-02,  1.3228e-02, -1.2302e-02, -2.0925e-02,\n",
       "                         1.6385e-02,  7.8507e-03,  1.0549e-02, -1.1679e-02, -2.9247e-02,\n",
       "                        -7.2118e-03, -1.9474e-02, -5.3481e-03, -3.8552e-03, -1.2152e-02,\n",
       "                         1.2027e-02, -2.6948e-02, -1.7945e-02,  4.3207e-03, -2.0158e-02,\n",
       "                        -2.3706e-02, -2.1962e-02,  6.4331e-04,  2.0597e-02, -7.9135e-03,\n",
       "                         5.8524e-02, -1.1925e-02,  1.7198e-02, -2.6549e-02, -1.7001e-02,\n",
       "                        -2.2895e-02, -7.1066e-03, -8.2388e-03,  2.4580e-02, -2.0945e-02,\n",
       "                        -6.4788e-03, -1.1877e-02, -8.0594e-03, -1.2019e-02, -1.5368e-02,\n",
       "                        -1.6366e-02, -4.0429e-02, -5.5457e-03, -3.2952e-02,  3.2578e-02,\n",
       "                         3.0292e-04, -7.7635e-03, -7.2137e-02,  2.7308e-02, -3.2261e-02,\n",
       "                        -2.1087e-02, -2.6582e-02, -1.6133e-02,  3.0669e-03,  4.1570e-02,\n",
       "                        -2.5456e-02, -2.6762e-02, -2.9723e-02,  3.7295e-02,  5.5194e-02,\n",
       "                         5.1696e-03,  1.9547e-02,  1.9377e-02],\n",
       "                       [ 3.2693e-02, -4.7937e-03,  2.3625e-02,  1.0768e-02,  1.1936e-02,\n",
       "                        -4.8804e-02,  4.7617e-04,  3.7545e-03,  2.1557e-02, -3.3713e-02,\n",
       "                         1.4572e-02,  5.8538e-03,  1.0819e-02,  2.4829e-02,  2.8751e-02,\n",
       "                         2.3561e-02,  2.9975e-02,  2.4619e-02, -5.1113e-02,  3.0081e-02,\n",
       "                        -1.7622e-02,  3.5318e-02,  6.8857e-03,  4.6822e-03,  3.6605e-03,\n",
       "                         1.8819e-02,  5.5707e-02,  4.8758e-03,  2.3774e-02,  3.1386e-02,\n",
       "                         1.0343e-03, -9.2971e-03,  5.1569e-03,  5.9472e-02,  1.2901e-02,\n",
       "                        -4.7685e-03,  1.6322e-02, -6.7576e-04,  8.8652e-03,  2.6561e-02,\n",
       "                         4.2118e-02, -1.7026e-02,  2.3352e-02,  3.3626e-02, -1.4541e-02,\n",
       "                         1.2786e-02,  5.6579e-03,  2.2164e-03, -1.3989e-03, -1.8623e-03,\n",
       "                         1.3817e-02,  2.9399e-02,  7.7127e-03, -9.9805e-03,  2.8986e-02,\n",
       "                         3.8910e-02,  8.6381e-03,  1.0727e-02,  1.0476e-02, -2.8276e-03,\n",
       "                        -1.2845e-02,  2.3720e-03, -9.3776e-03, -5.3721e-02,  2.0449e-02,\n",
       "                         1.6753e-02, -1.6229e-02, -2.1246e-02, -2.2992e-03, -8.4156e-03,\n",
       "                         3.8201e-03,  5.3751e-03, -2.3410e-04,  1.5419e-02,  6.1173e-03,\n",
       "                         2.4950e-02,  1.3150e-02,  2.2401e-03, -1.2310e-03, -3.2097e-03,\n",
       "                        -9.1816e-03,  2.8148e-02,  3.4231e-02,  3.4600e-02, -1.0051e-02,\n",
       "                         1.3501e-02, -1.4539e-02, -7.8505e-03, -8.9491e-03, -2.5863e-02,\n",
       "                        -2.4386e-02,  1.9765e-02,  1.1067e-02,  3.8737e-02,  2.4342e-02,\n",
       "                         9.3922e-03,  2.4524e-02,  2.6793e-02, -3.7288e-02,  1.8905e-02,\n",
       "                         2.8306e-02,  1.1486e-02,  2.6417e-02,  1.9936e-02,  2.5570e-02,\n",
       "                         3.7021e-02,  3.2931e-02,  1.7937e-02,  1.1651e-03, -1.3773e-02,\n",
       "                         2.6260e-02,  5.4450e-03,  2.2059e-02,  3.1206e-03,  1.8082e-02,\n",
       "                        -8.5486e-03, -1.5266e-02, -2.2736e-02,  9.3899e-03, -1.2574e-02,\n",
       "                         2.1732e-02, -2.4687e-02,  2.0417e-02, -2.1262e-02, -1.9150e-02,\n",
       "                         2.5353e-02,  9.2120e-03,  6.1672e-03]], device='cuda:0')),\n",
       "              ('policy_branches.0.bias',\n",
       "               tensor([ 0.0459, -0.0675,  0.0492], device='cuda:0')),\n",
       "              ('value.weight',\n",
       "               tensor([[-0.0343, -0.1722,  0.0047, -0.0315, -0.0273, -0.1044, -0.0114,  0.0557,\n",
       "                         0.1630,  0.0647,  0.1786,  0.1042,  0.1351, -0.0656, -0.1292,  0.0348,\n",
       "                         0.1395, -0.0246, -0.0985,  0.0869,  0.0698, -0.1505, -0.0326,  0.1202,\n",
       "                        -0.0335, -0.0117, -0.0682, -0.1138,  0.0104, -0.0405, -0.0955, -0.0323,\n",
       "                        -0.0350,  0.1432, -0.0589, -0.0909, -0.0104,  0.0512, -0.0017,  0.0883,\n",
       "                         0.0344,  0.0430, -0.0724, -0.0515,  0.1219,  0.0165,  0.0164, -0.1400,\n",
       "                        -0.0966,  0.0384,  0.0947,  0.0502,  0.0195,  0.0024,  0.0178, -0.1024,\n",
       "                         0.1809, -0.0530,  0.0343,  0.0379,  0.0431, -0.0575,  0.0355,  0.0014,\n",
       "                        -0.0516, -0.0206,  0.0600, -0.0370,  0.0478, -0.0025,  0.0448, -0.1280,\n",
       "                         0.1948, -0.1113,  0.1337,  0.0563,  0.0725, -0.0196, -0.0476,  0.0371,\n",
       "                        -0.0835,  0.0514,  0.0712, -0.1465, -0.0168,  0.0694,  0.0271,  0.0913,\n",
       "                        -0.1308,  0.1175, -0.0392, -0.2161, -0.0255, -0.0316,  0.0726,  0.0775,\n",
       "                         0.0159, -0.0446, -0.0101,  0.0345, -0.0016, -0.1041,  0.1039, -0.0531,\n",
       "                         0.0333,  0.0958, -0.0025, -0.0467,  0.0629, -0.0344,  0.0019, -0.0470,\n",
       "                         0.1347, -0.0114,  0.0858, -0.0366,  0.0088, -0.0748, -0.1277, -0.0481,\n",
       "                         0.0116, -0.0791, -0.0746, -0.1280, -0.0495,  0.0882, -0.0018, -0.0174]],\n",
       "                      device='cuda:0')),\n",
       "              ('value.bias', tensor([0.0355], device='cuda:0'))]),\n",
       " 'optimizer_state_dict': {'state': {0: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[[[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,\n",
       "                0.0000e+00,  0.0000e+00],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,\n",
       "                0.0000e+00,  0.0000e+00],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,\n",
       "                0.0000e+00,  0.0000e+00],\n",
       "              ...,\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,\n",
       "                0.0000e+00,  0.0000e+00],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,\n",
       "                0.0000e+00,  0.0000e+00],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,\n",
       "                0.0000e+00,  0.0000e+00]],\n",
       "    \n",
       "             [[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,\n",
       "                0.0000e+00,  0.0000e+00],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,\n",
       "                0.0000e+00,  0.0000e+00],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,\n",
       "                0.0000e+00,  0.0000e+00],\n",
       "              ...,\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,\n",
       "                0.0000e+00,  0.0000e+00],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,\n",
       "                0.0000e+00,  0.0000e+00],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,\n",
       "                0.0000e+00,  0.0000e+00]],\n",
       "    \n",
       "             [[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,\n",
       "                0.0000e+00,  0.0000e+00],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,\n",
       "                0.0000e+00,  0.0000e+00],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,\n",
       "                0.0000e+00,  0.0000e+00],\n",
       "              ...,\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,\n",
       "                0.0000e+00,  0.0000e+00],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,\n",
       "                0.0000e+00,  0.0000e+00],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  0.0000e+00,\n",
       "                0.0000e+00,  0.0000e+00]]],\n",
       "    \n",
       "    \n",
       "            [[[ 1.0180e-05,  1.7419e-05,  1.6581e-05,  ...,  2.4107e-05,\n",
       "                2.4646e-05,  2.4735e-05],\n",
       "              [ 9.1425e-06,  1.8809e-05,  1.7562e-05,  ...,  2.5860e-05,\n",
       "                2.6485e-05,  2.6488e-05],\n",
       "              [ 9.2891e-06,  1.9100e-05,  1.8183e-05,  ...,  2.6180e-05,\n",
       "                2.6458e-05,  2.6697e-05],\n",
       "              ...,\n",
       "              [ 9.9222e-06,  1.6820e-05,  1.7382e-05,  ...,  2.5297e-05,\n",
       "                2.5504e-05,  2.5864e-05],\n",
       "              [ 1.0485e-05,  1.7076e-05,  1.7148e-05,  ...,  2.5417e-05,\n",
       "                2.5165e-05,  2.6254e-05],\n",
       "              [ 1.0781e-05,  1.7815e-05,  1.7189e-05,  ...,  2.5819e-05,\n",
       "                2.5240e-05,  2.6119e-05]],\n",
       "    \n",
       "             [[ 6.5134e-06,  1.2566e-05,  1.2163e-05,  ...,  1.7204e-05,\n",
       "                1.6276e-05,  1.7389e-05],\n",
       "              [ 7.8614e-06,  1.4110e-05,  1.2123e-05,  ...,  2.1172e-05,\n",
       "                1.9026e-05,  2.0774e-05],\n",
       "              [ 9.6932e-06,  1.6300e-05,  1.3302e-05,  ...,  1.9425e-05,\n",
       "                2.1140e-05,  2.4287e-05],\n",
       "              ...,\n",
       "              [ 4.3554e-06,  1.1736e-05,  1.0033e-05,  ...,  1.6983e-05,\n",
       "                2.0199e-05,  2.4388e-05],\n",
       "              [ 5.3377e-06,  1.3171e-05,  1.1522e-05,  ...,  1.8736e-05,\n",
       "                2.2237e-05,  2.5257e-05],\n",
       "              [ 6.8181e-06,  1.4410e-05,  1.1474e-05,  ...,  2.1602e-05,\n",
       "                2.5281e-05,  2.5515e-05]],\n",
       "    \n",
       "             [[ 8.5475e-06,  1.6075e-05,  1.6075e-05,  ...,  2.3449e-05,\n",
       "                2.3449e-05,  2.3449e-05],\n",
       "              [ 7.7465e-06,  1.6452e-05,  1.6452e-05,  ...,  2.5019e-05,\n",
       "                2.5019e-05,  2.5019e-05],\n",
       "              [ 7.7465e-06,  1.6452e-05,  1.6452e-05,  ...,  2.5019e-05,\n",
       "                2.5019e-05,  2.5019e-05],\n",
       "              ...,\n",
       "              [ 7.3119e-06,  1.5745e-05,  1.5745e-05,  ...,  2.4583e-05,\n",
       "                2.4583e-05,  2.4583e-05],\n",
       "              [ 7.3119e-06,  1.5745e-05,  1.5745e-05,  ...,  2.4583e-05,\n",
       "                2.4583e-05,  2.4583e-05],\n",
       "              [ 7.3119e-06,  1.5745e-05,  1.5745e-05,  ...,  2.4583e-05,\n",
       "                2.4583e-05,  2.4583e-05]]],\n",
       "    \n",
       "    \n",
       "            [[[-1.7508e-05, -1.5581e-06, -2.1326e-06,  ...,  2.1789e-06,\n",
       "                2.3250e-06,  2.4369e-06],\n",
       "              [-9.4795e-06,  1.0838e-05,  1.0609e-05,  ...,  1.5524e-05,\n",
       "                1.5785e-05,  1.5923e-05],\n",
       "              [-1.0106e-05,  1.0220e-05,  1.0694e-05,  ...,  1.5750e-05,\n",
       "                1.5987e-05,  1.6387e-05],\n",
       "              ...,\n",
       "              [-9.4104e-06,  9.0068e-06,  9.7821e-06,  ...,  1.5369e-05,\n",
       "                1.5893e-05,  1.5936e-05],\n",
       "              [-1.0295e-05,  7.9636e-06,  8.8940e-06,  ...,  1.5432e-05,\n",
       "                1.5390e-05,  1.5208e-05],\n",
       "              [-1.2144e-05,  7.7723e-06,  8.6258e-06,  ...,  1.5356e-05,\n",
       "                1.5013e-05,  1.4539e-05]],\n",
       "    \n",
       "             [[-1.1672e-05,  4.2226e-06, -5.4651e-06,  ...,  3.8286e-06,\n",
       "                1.0165e-05,  8.3030e-06],\n",
       "              [-8.4707e-06,  1.5839e-05,  9.2011e-06,  ...,  1.5897e-05,\n",
       "                2.2077e-05,  1.9759e-05],\n",
       "              [-6.7877e-06,  1.9791e-05,  9.6471e-06,  ...,  1.6565e-05,\n",
       "                2.0054e-05,  1.8099e-05],\n",
       "              ...,\n",
       "              [-7.6056e-06,  2.1105e-05,  1.8088e-05,  ...,  2.6537e-05,\n",
       "                2.7776e-05,  2.4851e-05],\n",
       "              [-9.4328e-06,  1.6443e-05,  1.1276e-05,  ...,  1.8949e-05,\n",
       "                2.1022e-05,  1.9844e-05],\n",
       "              [-1.7512e-05,  8.1817e-06,  5.4948e-06,  ...,  1.2804e-05,\n",
       "                1.8498e-05,  1.3690e-05]],\n",
       "    \n",
       "             [[-1.7794e-05, -1.7511e-06, -1.7511e-06,  ...,  1.5341e-06,\n",
       "                1.5341e-06,  1.5341e-06],\n",
       "              [-9.4872e-06,  1.1408e-05,  1.1408e-05,  ...,  1.4574e-05,\n",
       "                1.4574e-05,  1.4574e-05],\n",
       "              [-9.4872e-06,  1.1408e-05,  1.1408e-05,  ...,  1.4574e-05,\n",
       "                1.4574e-05,  1.4574e-05],\n",
       "              ...,\n",
       "              [-7.4092e-06,  1.3114e-05,  1.3114e-05,  ...,  1.5398e-05,\n",
       "                1.5398e-05,  1.5398e-05],\n",
       "              [-7.4092e-06,  1.3114e-05,  1.3114e-05,  ...,  1.5398e-05,\n",
       "                1.5398e-05,  1.5398e-05],\n",
       "              [-7.4092e-06,  1.3114e-05,  1.3114e-05,  ...,  1.5398e-05,\n",
       "                1.5398e-05,  1.5398e-05]]],\n",
       "    \n",
       "    \n",
       "            ...,\n",
       "    \n",
       "    \n",
       "            [[[-1.0053e-05,  6.9680e-08,  9.4791e-09,  ...,  1.9727e-06,\n",
       "                1.9935e-06,  2.5885e-06],\n",
       "              [-8.2654e-06,  3.3655e-06,  3.1510e-06,  ...,  5.3465e-06,\n",
       "                5.4152e-06,  6.9431e-06],\n",
       "              [-9.5028e-06,  2.6292e-06,  2.3436e-06,  ...,  4.3795e-06,\n",
       "                4.4687e-06,  5.7052e-06],\n",
       "              ...,\n",
       "              [-9.4539e-06,  2.8550e-06,  4.0968e-06,  ...,  5.3564e-06,\n",
       "                5.7020e-06,  5.7560e-06],\n",
       "              [-1.0583e-05,  1.9750e-06,  3.1613e-06,  ...,  4.7445e-06,\n",
       "                4.9963e-06,  5.0141e-06],\n",
       "              [-1.1311e-05,  1.3446e-06,  1.7259e-06,  ...,  3.5603e-06,\n",
       "                3.9586e-06,  4.7785e-06]],\n",
       "    \n",
       "             [[-1.0101e-05,  1.9879e-06,  3.7291e-08,  ...,  4.9956e-06,\n",
       "                5.2577e-06,  5.2699e-06],\n",
       "              [-1.1069e-05,  7.8788e-06,  7.1000e-06,  ...,  1.2878e-05,\n",
       "                1.1051e-05,  9.3531e-06],\n",
       "              [-8.1154e-06,  1.1787e-05,  7.8963e-06,  ...,  1.4303e-05,\n",
       "                1.0625e-05,  7.0113e-06],\n",
       "              ...,\n",
       "              [-2.7258e-07,  1.9206e-05,  1.8685e-05,  ...,  1.9959e-05,\n",
       "                1.8923e-05,  1.5383e-05],\n",
       "              [ 1.0143e-06,  2.0474e-05,  1.8349e-05,  ...,  1.8999e-05,\n",
       "                1.7797e-05,  1.6542e-05],\n",
       "              [-1.7740e-06,  1.7355e-05,  1.5844e-05,  ...,  1.6529e-05,\n",
       "                1.7550e-05,  1.5505e-05]],\n",
       "    \n",
       "             [[-5.9454e-06,  4.9541e-06,  4.9541e-06,  ...,  5.0887e-06,\n",
       "                5.0887e-06,  5.0887e-06],\n",
       "              [-3.6897e-06,  9.5926e-06,  9.5926e-06,  ...,  1.0184e-05,\n",
       "                1.0184e-05,  1.0184e-05],\n",
       "              [-3.6897e-06,  9.5926e-06,  9.5926e-06,  ...,  1.0184e-05,\n",
       "                1.0184e-05,  1.0184e-05],\n",
       "              ...,\n",
       "              [-5.2623e-07,  1.2639e-05,  1.2639e-05,  ...,  1.2409e-05,\n",
       "                1.2409e-05,  1.2409e-05],\n",
       "              [-5.2623e-07,  1.2639e-05,  1.2639e-05,  ...,  1.2409e-05,\n",
       "                1.2409e-05,  1.2409e-05],\n",
       "              [-5.2623e-07,  1.2639e-05,  1.2639e-05,  ...,  1.2409e-05,\n",
       "                1.2409e-05,  1.2409e-05]]],\n",
       "    \n",
       "    \n",
       "            [[[-2.6307e-08, -2.6307e-08, -2.6307e-08,  ..., -8.8266e-08,\n",
       "               -8.8266e-08, -8.8266e-08],\n",
       "              [-2.6307e-08, -2.6307e-08, -5.3652e-08,  ..., -8.8266e-08,\n",
       "               -8.8266e-08, -8.8266e-08],\n",
       "              [-2.6307e-08, -2.6307e-08, -7.4420e-08,  ..., -8.8266e-08,\n",
       "               -8.8266e-08, -8.8266e-08],\n",
       "              ...,\n",
       "              [-2.6307e-08, -2.6307e-08, -2.6307e-08,  ..., -2.6307e-08,\n",
       "               -2.6307e-08, -2.6307e-08],\n",
       "              [-2.6307e-08, -2.6307e-08, -2.6307e-08,  ..., -2.6307e-08,\n",
       "               -2.6307e-08, -2.6307e-08],\n",
       "              [-2.6307e-08, -2.6307e-08, -2.6307e-08,  ..., -2.6307e-08,\n",
       "               -2.6307e-08, -2.6307e-08]],\n",
       "    \n",
       "             [[-2.6307e-08, -2.6307e-08, -2.6307e-08,  ..., -2.6307e-08,\n",
       "               -2.6307e-08, -2.6307e-08],\n",
       "              [-2.6307e-08, -2.6307e-08, -2.6307e-08,  ..., -2.6307e-08,\n",
       "               -2.6307e-08, -2.6307e-08],\n",
       "              [-2.6307e-08, -2.6307e-08, -2.6307e-08,  ..., -2.6307e-08,\n",
       "               -2.6307e-08, -2.6307e-08],\n",
       "              ...,\n",
       "              [-2.6307e-08, -2.6307e-08, -2.6307e-08,  ..., -2.6307e-08,\n",
       "               -2.6307e-08, -2.6307e-08],\n",
       "              [-2.6307e-08, -2.6307e-08, -2.6307e-08,  ..., -2.6307e-08,\n",
       "               -2.6307e-08, -2.6307e-08],\n",
       "              [-2.6307e-08, -2.6307e-08, -2.6307e-08,  ..., -2.6307e-08,\n",
       "               -2.6307e-08, -2.6307e-08]],\n",
       "    \n",
       "             [[-2.6307e-08, -2.6307e-08, -2.6307e-08,  ..., -2.6307e-08,\n",
       "               -2.6307e-08, -2.6307e-08],\n",
       "              [-2.6307e-08, -2.6307e-08, -2.6307e-08,  ..., -2.6307e-08,\n",
       "               -2.6307e-08, -2.6307e-08],\n",
       "              [-2.6307e-08, -2.6307e-08, -2.6307e-08,  ..., -2.6307e-08,\n",
       "               -2.6307e-08, -2.6307e-08],\n",
       "              ...,\n",
       "              [-2.6307e-08, -2.6307e-08, -2.6307e-08,  ..., -2.6307e-08,\n",
       "               -2.6307e-08, -2.6307e-08],\n",
       "              [-2.6307e-08, -2.6307e-08, -2.6307e-08,  ..., -2.6307e-08,\n",
       "               -2.6307e-08, -2.6307e-08],\n",
       "              [-2.6307e-08, -2.6307e-08, -2.6307e-08,  ..., -2.6307e-08,\n",
       "               -2.6307e-08, -2.6307e-08]]],\n",
       "    \n",
       "    \n",
       "            [[[ 1.5982e-08,  1.5982e-08,  1.5982e-08,  ...,  1.5982e-08,\n",
       "                1.5982e-08,  1.5982e-08],\n",
       "              [ 7.4589e-26,  7.0526e-26,  7.0526e-26,  ...,  2.4220e-26,\n",
       "                2.4220e-26,  2.4220e-26],\n",
       "              [ 7.4589e-26,  7.0526e-26,  7.0526e-26,  ...,  2.4220e-26,\n",
       "                2.4220e-26,  2.4220e-26],\n",
       "              ...,\n",
       "              [ 7.4589e-26,  7.0526e-26,  7.0526e-26,  ...,  4.6188e-26,\n",
       "                2.4511e-26,  2.4220e-26],\n",
       "              [ 7.4589e-26,  7.0526e-26,  7.0526e-26,  ...,  6.0791e-26,\n",
       "                2.5429e-26,  2.4220e-26],\n",
       "              [ 7.4589e-26,  7.0526e-26,  7.0526e-26,  ...,  7.0526e-26,\n",
       "                3.6146e-26,  2.4220e-26]],\n",
       "    \n",
       "             [[ 1.5982e-08,  1.5982e-08,  1.5982e-08,  ...,  1.5982e-08,\n",
       "                1.5982e-08,  1.5982e-08],\n",
       "              [ 3.9021e-26,  3.4958e-26,  3.4958e-26,  ...,  3.3758e-26,\n",
       "                2.5966e-26,  2.4220e-26],\n",
       "              [ 3.7252e-26,  3.3631e-26,  3.3631e-26,  ...,  2.7213e-26,\n",
       "                2.4220e-26,  2.4220e-26],\n",
       "              ...,\n",
       "              [ 3.5064e-26,  3.0243e-26,  2.7213e-26,  ...,  2.4220e-26,\n",
       "                2.4220e-26,  2.4220e-26],\n",
       "              [ 3.0518e-26,  2.4220e-26,  2.4220e-26,  ...,  2.4220e-26,\n",
       "                2.4220e-26,  2.4220e-26],\n",
       "              [ 2.8283e-26,  2.4220e-26,  2.4220e-26,  ...,  2.4220e-26,\n",
       "                2.4220e-26,  2.4220e-26]],\n",
       "    \n",
       "             [[ 1.5982e-08,  1.5982e-08,  1.5982e-08,  ...,  1.5982e-08,\n",
       "                1.5982e-08,  1.5982e-08],\n",
       "              [ 2.8283e-26,  2.4220e-26,  2.4220e-26,  ...,  2.4220e-26,\n",
       "                2.4220e-26,  2.4220e-26],\n",
       "              [ 2.8283e-26,  2.4220e-26,  2.4220e-26,  ...,  2.4220e-26,\n",
       "                2.4220e-26,  2.4220e-26],\n",
       "              ...,\n",
       "              [ 2.8283e-26,  2.4220e-26,  2.4220e-26,  ...,  2.4220e-26,\n",
       "                2.4220e-26,  2.4220e-26],\n",
       "              [ 2.8283e-26,  2.4220e-26,  2.4220e-26,  ...,  2.4220e-26,\n",
       "                2.4220e-26,  2.4220e-26],\n",
       "              [ 2.8283e-26,  2.4220e-26,  2.4220e-26,  ...,  2.4220e-26,\n",
       "                2.4220e-26,  2.4220e-26]]]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[[[0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00,\n",
       "               0.0000e+00, 0.0000e+00],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00,\n",
       "               0.0000e+00, 0.0000e+00],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00,\n",
       "               0.0000e+00, 0.0000e+00],\n",
       "              ...,\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00,\n",
       "               0.0000e+00, 0.0000e+00],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00,\n",
       "               0.0000e+00, 0.0000e+00],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00,\n",
       "               0.0000e+00, 0.0000e+00]],\n",
       "    \n",
       "             [[0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00,\n",
       "               0.0000e+00, 0.0000e+00],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00,\n",
       "               0.0000e+00, 0.0000e+00],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00,\n",
       "               0.0000e+00, 0.0000e+00],\n",
       "              ...,\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00,\n",
       "               0.0000e+00, 0.0000e+00],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00,\n",
       "               0.0000e+00, 0.0000e+00],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00,\n",
       "               0.0000e+00, 0.0000e+00]],\n",
       "    \n",
       "             [[0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00,\n",
       "               0.0000e+00, 0.0000e+00],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00,\n",
       "               0.0000e+00, 0.0000e+00],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00,\n",
       "               0.0000e+00, 0.0000e+00],\n",
       "              ...,\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00,\n",
       "               0.0000e+00, 0.0000e+00],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00,\n",
       "               0.0000e+00, 0.0000e+00],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 0.0000e+00,\n",
       "               0.0000e+00, 0.0000e+00]]],\n",
       "    \n",
       "    \n",
       "            [[[1.2871e-08, 1.0384e-08, 1.0641e-08,  ..., 1.3330e-08,\n",
       "               1.3484e-08, 1.3339e-08],\n",
       "              [1.0791e-08, 8.8882e-09, 8.6683e-09,  ..., 1.0596e-08,\n",
       "               1.0880e-08, 1.0605e-08],\n",
       "              [1.0688e-08, 9.0199e-09, 8.7755e-09,  ..., 1.0817e-08,\n",
       "               1.1001e-08, 1.0715e-08],\n",
       "              ...,\n",
       "              [1.0497e-08, 8.8713e-09, 8.7419e-09,  ..., 1.1253e-08,\n",
       "               1.1248e-08, 1.1273e-08],\n",
       "              [1.0659e-08, 8.9086e-09, 8.7034e-09,  ..., 1.1304e-08,\n",
       "               1.1397e-08, 1.1511e-08],\n",
       "              [1.0911e-08, 8.9578e-09, 8.7156e-09,  ..., 1.1188e-08,\n",
       "               1.1502e-08, 1.1728e-08]],\n",
       "    \n",
       "             [[1.6595e-08, 1.3431e-08, 1.3525e-08,  ..., 1.4253e-08,\n",
       "               1.3610e-08, 1.3475e-08],\n",
       "              [1.3285e-08, 1.0669e-08, 1.0494e-08,  ..., 1.1487e-08,\n",
       "               1.1000e-08, 1.1236e-08],\n",
       "              [1.3400e-08, 1.0130e-08, 9.6241e-09,  ..., 1.0840e-08,\n",
       "               1.0621e-08, 1.1057e-08],\n",
       "              ...,\n",
       "              [1.2269e-08, 9.4416e-09, 8.8517e-09,  ..., 1.0033e-08,\n",
       "               1.0507e-08, 1.0612e-08],\n",
       "              [1.2001e-08, 9.2178e-09, 8.7260e-09,  ..., 9.9538e-09,\n",
       "               1.0315e-08, 1.0555e-08],\n",
       "              [1.1924e-08, 9.2333e-09, 8.7850e-09,  ..., 1.0091e-08,\n",
       "               1.0339e-08, 1.0332e-08]],\n",
       "    \n",
       "             [[1.3163e-08, 1.0384e-08, 1.0384e-08,  ..., 1.1605e-08,\n",
       "               1.1605e-08, 1.1605e-08],\n",
       "              [1.0403e-08, 8.5712e-09, 8.5712e-09,  ..., 9.7743e-09,\n",
       "               9.7743e-09, 9.7743e-09],\n",
       "              [1.0403e-08, 8.5712e-09, 8.5712e-09,  ..., 9.7743e-09,\n",
       "               9.7743e-09, 9.7743e-09],\n",
       "              ...,\n",
       "              [1.0370e-08, 8.5395e-09, 8.5395e-09,  ..., 9.9152e-09,\n",
       "               9.9152e-09, 9.9152e-09],\n",
       "              [1.0370e-08, 8.5395e-09, 8.5395e-09,  ..., 9.9152e-09,\n",
       "               9.9152e-09, 9.9152e-09],\n",
       "              [1.0370e-08, 8.5395e-09, 8.5395e-09,  ..., 9.9152e-09,\n",
       "               9.9152e-09, 9.9152e-09]]],\n",
       "    \n",
       "    \n",
       "            [[[2.2941e-08, 2.8182e-08, 2.7511e-08,  ..., 2.2567e-08,\n",
       "               2.2610e-08, 2.2647e-08],\n",
       "              [3.0497e-08, 4.2605e-08, 4.1185e-08,  ..., 2.6246e-08,\n",
       "               2.6275e-08, 2.6757e-08],\n",
       "              [2.9963e-08, 4.2251e-08, 4.2002e-08,  ..., 2.6275e-08,\n",
       "               2.6061e-08, 2.6979e-08],\n",
       "              ...,\n",
       "              [2.7024e-08, 3.4102e-08, 3.5521e-08,  ..., 2.1753e-08,\n",
       "               2.2161e-08, 2.3097e-08],\n",
       "              [2.6663e-08, 3.2994e-08, 3.4358e-08,  ..., 2.1749e-08,\n",
       "               2.1774e-08, 2.2990e-08],\n",
       "              [2.5924e-08, 3.2851e-08, 3.3534e-08,  ..., 2.1902e-08,\n",
       "               2.1808e-08, 2.2053e-08]],\n",
       "    \n",
       "             [[5.2223e-08, 6.2126e-08, 6.8865e-08,  ..., 5.2542e-08,\n",
       "               4.7353e-08, 4.5775e-08],\n",
       "              [6.8244e-08, 8.3162e-08, 9.2426e-08,  ..., 6.2085e-08,\n",
       "               5.3000e-08, 5.4522e-08],\n",
       "              [6.6079e-08, 7.9367e-08, 8.8861e-08,  ..., 5.4897e-08,\n",
       "               4.7532e-08, 5.2070e-08],\n",
       "              ...,\n",
       "              [5.3236e-08, 6.4984e-08, 6.7070e-08,  ..., 4.1971e-08,\n",
       "               3.9926e-08, 3.7160e-08],\n",
       "              [4.7349e-08, 5.6336e-08, 6.0115e-08,  ..., 3.6298e-08,\n",
       "               3.4193e-08, 3.3799e-08],\n",
       "              [4.4714e-08, 5.3861e-08, 5.7228e-08,  ..., 3.5836e-08,\n",
       "               3.2504e-08, 3.2952e-08]],\n",
       "    \n",
       "             [[2.2774e-08, 2.7917e-08, 2.7917e-08,  ..., 2.2935e-08,\n",
       "               2.2935e-08, 2.2935e-08],\n",
       "              [2.9927e-08, 4.1404e-08, 4.1404e-08,  ..., 2.6608e-08,\n",
       "               2.6608e-08, 2.6608e-08],\n",
       "              [2.9927e-08, 4.1404e-08, 4.1404e-08,  ..., 2.6608e-08,\n",
       "               2.6608e-08, 2.6608e-08],\n",
       "              ...,\n",
       "              [2.7271e-08, 3.7295e-08, 3.7295e-08,  ..., 2.2755e-08,\n",
       "               2.2755e-08, 2.2755e-08],\n",
       "              [2.7271e-08, 3.7295e-08, 3.7295e-08,  ..., 2.2755e-08,\n",
       "               2.2755e-08, 2.2755e-08],\n",
       "              [2.7271e-08, 3.7295e-08, 3.7295e-08,  ..., 2.2755e-08,\n",
       "               2.2755e-08, 2.2755e-08]]],\n",
       "    \n",
       "    \n",
       "            ...,\n",
       "    \n",
       "    \n",
       "            [[[3.9921e-08, 4.0951e-08, 3.9283e-08,  ..., 2.6089e-08,\n",
       "               2.7209e-08, 2.8472e-08],\n",
       "              [4.2920e-08, 4.1992e-08, 3.9633e-08,  ..., 2.3939e-08,\n",
       "               2.5342e-08, 2.7017e-08],\n",
       "              [3.9997e-08, 3.9403e-08, 3.7897e-08,  ..., 2.3302e-08,\n",
       "               2.4365e-08, 2.5830e-08],\n",
       "              ...,\n",
       "              [3.2320e-08, 3.4398e-08, 3.4798e-08,  ..., 2.1199e-08,\n",
       "               2.1011e-08, 2.1305e-08],\n",
       "              [3.1051e-08, 3.3426e-08, 3.3799e-08,  ..., 2.0695e-08,\n",
       "               2.0536e-08, 2.0139e-08],\n",
       "              [3.0720e-08, 3.2355e-08, 3.2213e-08,  ..., 2.0008e-08,\n",
       "               1.9965e-08, 1.9304e-08]],\n",
       "    \n",
       "             [[8.8484e-08, 9.3091e-08, 9.4730e-08,  ..., 6.5192e-08,\n",
       "               6.2808e-08, 6.0265e-08],\n",
       "              [9.6188e-08, 9.8416e-08, 9.9025e-08,  ..., 6.1439e-08,\n",
       "               5.9137e-08, 5.8267e-08],\n",
       "              [9.3391e-08, 9.4880e-08, 9.6096e-08,  ..., 5.8668e-08,\n",
       "               5.6459e-08, 5.7302e-08],\n",
       "              ...,\n",
       "              [8.3321e-08, 8.6288e-08, 8.7236e-08,  ..., 5.2806e-08,\n",
       "               4.9190e-08, 4.7892e-08],\n",
       "              [7.9800e-08, 8.2903e-08, 8.4286e-08,  ..., 4.8353e-08,\n",
       "               4.6915e-08, 4.6389e-08],\n",
       "              [7.8681e-08, 8.2020e-08, 8.2341e-08,  ..., 4.6807e-08,\n",
       "               4.5786e-08, 4.5308e-08]],\n",
       "    \n",
       "             [[5.1816e-08, 5.6210e-08, 5.6210e-08,  ..., 3.5695e-08,\n",
       "               3.5695e-08, 3.5695e-08],\n",
       "              [5.7198e-08, 6.2171e-08, 6.2171e-08,  ..., 3.4644e-08,\n",
       "               3.4644e-08, 3.4644e-08],\n",
       "              [5.7198e-08, 6.2171e-08, 6.2171e-08,  ..., 3.4644e-08,\n",
       "               3.4644e-08, 3.4644e-08],\n",
       "              ...,\n",
       "              [5.5392e-08, 5.9934e-08, 5.9934e-08,  ..., 3.2792e-08,\n",
       "               3.2792e-08, 3.2792e-08],\n",
       "              [5.5392e-08, 5.9934e-08, 5.9934e-08,  ..., 3.2792e-08,\n",
       "               3.2792e-08, 3.2792e-08],\n",
       "              [5.5392e-08, 5.9934e-08, 5.9934e-08,  ..., 3.2792e-08,\n",
       "               3.2792e-08, 3.2792e-08]]],\n",
       "    \n",
       "    \n",
       "            [[[1.0535e-11, 1.0321e-11, 1.0321e-11,  ..., 2.2631e-11,\n",
       "               2.2631e-11, 2.3260e-11],\n",
       "              [1.0535e-11, 1.0321e-11, 1.4181e-11,  ..., 2.2631e-11,\n",
       "               2.2913e-11, 2.3356e-11],\n",
       "              [1.0535e-11, 1.0321e-11, 1.8533e-11,  ..., 2.2631e-11,\n",
       "               2.3167e-11, 2.3356e-11],\n",
       "              ...,\n",
       "              [3.8976e-12, 3.6646e-12, 3.6974e-12,  ..., 3.7410e-12,\n",
       "               3.7410e-12, 3.7410e-12],\n",
       "              [3.8976e-12, 3.6646e-12, 3.7232e-12,  ..., 3.7410e-12,\n",
       "               3.7410e-12, 3.7410e-12],\n",
       "              [3.8976e-12, 3.6806e-12, 3.7408e-12,  ..., 3.7410e-12,\n",
       "               3.7410e-12, 3.7410e-12]],\n",
       "    \n",
       "             [[5.5937e-12, 5.2218e-12, 5.0629e-12,  ..., 5.2264e-12,\n",
       "               5.2294e-12, 5.2328e-12],\n",
       "              [5.3314e-12, 5.0370e-12, 4.9439e-12,  ..., 5.2267e-12,\n",
       "               5.2291e-12, 5.2291e-12],\n",
       "              [5.1903e-12, 4.8755e-12, 4.8750e-12,  ..., 5.2279e-12,\n",
       "               5.2291e-12, 5.2291e-12],\n",
       "              ...,\n",
       "              [3.8951e-12, 3.6603e-12, 3.6610e-12,  ..., 3.6644e-12,\n",
       "               3.6644e-12, 3.6644e-12],\n",
       "              [3.8956e-12, 3.6617e-12, 3.6618e-12,  ..., 3.6644e-12,\n",
       "               3.7176e-12, 3.7176e-12],\n",
       "              [3.8962e-12, 3.6621e-12, 3.6621e-12,  ..., 3.9139e-12,\n",
       "               3.9137e-12, 3.9135e-12]],\n",
       "    \n",
       "             [[5.1417e-12, 4.8847e-12, 4.8847e-12,  ..., 5.2291e-12,\n",
       "               5.2291e-12, 5.2291e-12],\n",
       "              [5.1417e-12, 4.8847e-12, 4.8847e-12,  ..., 5.2291e-12,\n",
       "               5.2291e-12, 5.2291e-12],\n",
       "              [5.1417e-12, 4.8847e-12, 4.8847e-12,  ..., 5.2291e-12,\n",
       "               5.2291e-12, 5.2291e-12],\n",
       "              ...,\n",
       "              [3.8976e-12, 3.6646e-12, 3.6646e-12,  ..., 3.6644e-12,\n",
       "               3.6644e-12, 3.6644e-12],\n",
       "              [3.8976e-12, 3.6646e-12, 3.6646e-12,  ..., 3.6644e-12,\n",
       "               3.6644e-12, 3.6644e-12],\n",
       "              [3.8976e-12, 3.6646e-12, 3.6646e-12,  ..., 3.6644e-12,\n",
       "               3.6644e-12, 3.6644e-12]]],\n",
       "    \n",
       "    \n",
       "            [[[2.6013e-12, 2.7326e-12, 2.7326e-12,  ..., 3.3276e-13,\n",
       "               3.3276e-13, 3.3276e-13],\n",
       "              [2.5936e-12, 2.7248e-12, 2.7248e-12,  ..., 3.2366e-13,\n",
       "               3.2366e-13, 3.2366e-13],\n",
       "              [2.5936e-12, 2.7248e-12, 2.7248e-12,  ..., 3.2366e-13,\n",
       "               3.2366e-13, 3.2366e-13],\n",
       "              ...,\n",
       "              [2.5936e-12, 2.7248e-12, 2.7248e-12,  ..., 1.2923e-12,\n",
       "               3.4620e-13, 3.2366e-13],\n",
       "              [2.5936e-12, 2.7248e-12, 2.7248e-12,  ..., 2.0770e-12,\n",
       "               4.2825e-13, 3.2366e-13],\n",
       "              [2.5936e-12, 2.7248e-12, 2.7248e-12,  ..., 2.7248e-12,\n",
       "               8.5232e-13, 3.2366e-13]],\n",
       "    \n",
       "             [[8.4778e-13, 8.5806e-13, 8.5806e-13,  ..., 8.5806e-13,\n",
       "               5.5084e-13, 4.0048e-13],\n",
       "              [8.3964e-13, 8.4995e-13, 8.4995e-13,  ..., 7.7185e-13,\n",
       "               3.6668e-13, 3.2366e-13],\n",
       "              [7.6505e-13, 7.9298e-13, 7.9298e-13,  ..., 4.5574e-13,\n",
       "               3.2366e-13, 3.2366e-13],\n",
       "              ...,\n",
       "              [6.8411e-13, 6.3974e-13, 4.5574e-13,  ..., 3.2366e-13,\n",
       "               3.2366e-13, 3.2366e-13],\n",
       "              [4.1089e-13, 3.2366e-13, 3.2366e-13,  ..., 3.2366e-13,\n",
       "               3.2366e-13, 3.2366e-13],\n",
       "              [3.1828e-13, 3.2366e-13, 3.2366e-13,  ..., 3.2366e-13,\n",
       "               3.2366e-13, 3.2366e-13]],\n",
       "    \n",
       "             [[3.2742e-13, 3.3276e-13, 3.3276e-13,  ..., 3.3276e-13,\n",
       "               3.3276e-13, 3.3276e-13],\n",
       "              [3.1828e-13, 3.2366e-13, 3.2366e-13,  ..., 3.2366e-13,\n",
       "               3.2366e-13, 3.2366e-13],\n",
       "              [3.1828e-13, 3.2366e-13, 3.2366e-13,  ..., 3.2366e-13,\n",
       "               3.2366e-13, 3.2366e-13],\n",
       "              ...,\n",
       "              [3.1828e-13, 3.2366e-13, 3.2366e-13,  ..., 3.2366e-13,\n",
       "               3.2366e-13, 3.2366e-13],\n",
       "              [3.1828e-13, 3.2366e-13, 3.2366e-13,  ..., 3.2366e-13,\n",
       "               3.2366e-13, 3.2366e-13],\n",
       "              [3.1828e-13, 3.2366e-13, 3.2366e-13,  ..., 3.2366e-13,\n",
       "               3.2366e-13, 3.2366e-13]]]], device='cuda:0')},\n",
       "   1: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([ 0.0000e+00,  4.7074e-05, -3.0543e-05,  5.6052e-45,  1.9696e-04,\n",
       "             8.7055e-05, -9.2234e-05,  1.6215e-04, -4.7875e-07,  5.8801e-07,\n",
       "             0.0000e+00, -4.1202e-07,  4.1878e-06,  1.5603e-05,  4.8224e-05,\n",
       "             2.6962e-05,  5.2867e-08, -1.0972e-05,  1.2127e-04,  4.8723e-05,\n",
       "             1.0961e-06, -6.4275e-05, -1.8765e-05,  0.0000e+00,  1.8381e-38,\n",
       "             1.2678e-06,  1.5143e-04,  1.7177e-08, -8.1702e-05, -4.8003e-05,\n",
       "            -3.7242e-07, -3.6493e-07], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([0.0000e+00, 6.8988e-08, 1.5265e-07, 1.6216e-12, 2.4790e-07, 3.1658e-08,\n",
       "            3.6350e-08, 1.6964e-07, 1.5806e-10, 3.7790e-10, 0.0000e+00, 1.3109e-10,\n",
       "            7.7520e-09, 3.0109e-07, 5.7174e-08, 1.0827e-08, 5.2314e-10, 1.6706e-07,\n",
       "            7.0676e-08, 5.0660e-08, 7.0547e-10, 1.1119e-07, 1.1667e-08, 0.0000e+00,\n",
       "            5.0709e-13, 4.9553e-10, 2.0341e-07, 1.2817e-12, 1.1692e-07, 1.4725e-07,\n",
       "            5.1546e-10, 8.0924e-10], device='cuda:0')},\n",
       "   2: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[[[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00]],\n",
       "    \n",
       "             [[ 8.5774e-07,  1.0593e-06,  1.0215e-06,  1.1489e-06],\n",
       "              [ 4.4212e-07,  9.4667e-07,  7.7398e-07,  1.3372e-06],\n",
       "              [ 6.1365e-08,  9.9238e-07,  8.6100e-07,  1.2441e-06],\n",
       "              [-9.1601e-08,  1.0469e-06,  8.0074e-07,  1.2753e-06]],\n",
       "    \n",
       "             [[ 9.8044e-07,  1.5798e-06,  1.2067e-06,  1.4281e-07],\n",
       "              [ 4.3513e-07,  1.6557e-06,  9.0293e-07,  1.4991e-07],\n",
       "              [ 4.8023e-08,  9.8408e-07,  8.9634e-07, -1.6838e-08],\n",
       "              [-1.7406e-07,  1.3219e-06,  1.1617e-06,  2.2712e-07]],\n",
       "    \n",
       "             ...,\n",
       "    \n",
       "             [[ 2.5722e-06,  3.0499e-06,  2.6599e-06,  3.2010e-06],\n",
       "              [ 2.9376e-06,  2.4294e-06,  2.2894e-06,  2.9483e-06],\n",
       "              [ 2.7879e-06,  2.8334e-06,  2.5441e-06,  3.3284e-06],\n",
       "              [ 2.7680e-06,  2.8666e-06,  2.4003e-06,  3.6209e-06]],\n",
       "    \n",
       "             [[ 1.3246e-08,  1.3246e-08,  2.2174e-09, -9.4471e-28],\n",
       "              [ 1.0416e-08,  8.4166e-09, -3.5952e-11,  6.8490e-10],\n",
       "              [ 1.0416e-08,  8.4166e-09, -3.5952e-11,  6.8490e-10],\n",
       "              [ 1.0416e-08,  8.4166e-09, -3.5952e-11,  6.8490e-10]],\n",
       "    \n",
       "             [[ 9.4258e-09,  1.6316e-08,  5.5620e-08, -6.2210e-28],\n",
       "              [ 7.4174e-09,  1.7427e-08,  4.7126e-08,  4.8345e-10],\n",
       "              [ 7.4174e-09,  1.7427e-08,  4.7126e-08,  4.8345e-10],\n",
       "              [ 7.4174e-09,  1.7427e-08,  4.7126e-08,  4.8345e-10]]],\n",
       "    \n",
       "    \n",
       "            [[[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00]],\n",
       "    \n",
       "             [[-6.9549e-06, -6.4982e-06, -7.8447e-06, -8.4203e-06],\n",
       "              [-6.6608e-06, -4.3025e-06, -7.8907e-06, -6.3231e-06],\n",
       "              [-7.6046e-06, -5.3017e-06, -7.2628e-06, -7.6786e-06],\n",
       "              [-6.4718e-06, -3.2394e-06, -6.4850e-06, -5.7843e-06]],\n",
       "    \n",
       "             [[-1.6727e-05, -1.2607e-05, -1.5968e-05, -2.7292e-06],\n",
       "              [-1.6181e-05, -9.9021e-06, -1.5730e-05, -3.3003e-06],\n",
       "              [-1.0587e-05, -5.7059e-06, -1.4805e-05, -4.2844e-06],\n",
       "              [-1.2557e-05, -7.7188e-06, -1.6381e-05, -5.3979e-06]],\n",
       "    \n",
       "             ...,\n",
       "    \n",
       "             [[-3.2799e-05, -2.6671e-05, -2.6091e-05, -2.2149e-05],\n",
       "              [-2.8473e-05, -2.4840e-05, -2.4584e-05, -2.2282e-05],\n",
       "              [-2.6686e-05, -2.3672e-05, -2.5964e-05, -2.2676e-05],\n",
       "              [-2.5877e-05, -2.3255e-05, -2.4432e-05, -2.2130e-05]],\n",
       "    \n",
       "             [[-1.9455e-08, -2.9805e-08, -2.3828e-08, -3.3401e-08],\n",
       "              [-2.5034e-08, -2.3934e-08, -1.4899e-08, -3.3767e-08],\n",
       "              [-2.4612e-08, -2.3512e-08, -1.4899e-08, -2.6783e-08],\n",
       "              [ 6.8396e-09, -2.3512e-08, -7.0974e-09, -2.6783e-08]],\n",
       "    \n",
       "             [[-1.3828e-08, -3.9202e-08, -5.6889e-08, -4.7218e-08],\n",
       "              [-1.7780e-08, -3.6267e-08, -5.5333e-08, -4.9165e-08],\n",
       "              [-1.7481e-08, -3.5968e-08, -5.5333e-08, -4.4200e-08],\n",
       "              [-1.7481e-08, -3.5968e-08, -5.5333e-08, -4.4200e-08]]],\n",
       "    \n",
       "    \n",
       "            [[[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00]],\n",
       "    \n",
       "             [[-8.7678e-06, -1.1823e-05, -1.6576e-05, -2.0337e-05],\n",
       "              [-1.0011e-05, -7.0617e-06, -1.7381e-05, -1.7731e-05],\n",
       "              [-1.1008e-05, -1.1572e-05, -1.7757e-05, -1.9850e-05],\n",
       "              [-8.6411e-06, -5.7022e-06, -1.4162e-05, -1.6384e-05]],\n",
       "    \n",
       "             [[-1.4109e-05,  2.1898e-06, -1.7900e-05, -6.9370e-07],\n",
       "              [-1.6825e-05,  4.7354e-06, -1.8519e-05, -2.0186e-06],\n",
       "              [-5.3409e-06,  1.4565e-05, -9.0637e-06,  3.0607e-06],\n",
       "              [-1.0645e-05,  7.9349e-06, -1.2334e-05, -3.2676e-06]],\n",
       "    \n",
       "             ...,\n",
       "    \n",
       "             [[-2.5220e-05, -1.8304e-05, -3.8698e-05, -3.1959e-05],\n",
       "              [-1.8446e-05, -1.8563e-05, -3.1900e-05, -3.0587e-05],\n",
       "              [-1.5790e-05, -1.3834e-05, -3.1935e-05, -3.2773e-05],\n",
       "              [-1.1205e-05, -1.1079e-05, -2.8581e-05, -3.2120e-05]],\n",
       "    \n",
       "             [[-2.4941e-09, -2.7893e-10, -5.0993e-09, -7.6281e-09],\n",
       "              [-1.4307e-08, -1.1334e-08, -1.3202e-08, -2.0978e-08],\n",
       "              [-1.4307e-08, -1.1334e-08, -1.3202e-08, -1.3125e-08],\n",
       "              [ 2.9742e-08, -1.1334e-08, -1.2098e-08, -1.3125e-08]],\n",
       "    \n",
       "             [[-1.7520e-08, -2.1404e-08, -4.6325e-10, -1.1823e-08],\n",
       "              [-2.0349e-08, -3.7862e-08, -4.2628e-09, -2.5019e-08],\n",
       "              [-2.0349e-08, -3.7862e-08, -4.2628e-09, -1.9417e-08],\n",
       "              [-2.0349e-08, -3.7862e-08, -4.2628e-09, -1.9417e-08]]],\n",
       "    \n",
       "    \n",
       "            ...,\n",
       "    \n",
       "    \n",
       "            [[[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00]],\n",
       "    \n",
       "             [[-2.1798e-07, -7.1687e-08, -2.9650e-08, -1.1189e-08],\n",
       "              [-2.3603e-07, -5.1087e-08,  4.5002e-34,  4.3526e-34],\n",
       "              [-2.3603e-07, -5.1087e-08,  4.3464e-34,  4.1870e-34],\n",
       "              [-2.3603e-07, -5.1087e-08,  4.3859e-34,  4.0820e-34]],\n",
       "    \n",
       "             [[-1.4718e-07, -2.9774e-07, -1.1029e-07, -8.6872e-09],\n",
       "              [-1.5892e-07, -2.7213e-07, -1.0038e-07,  5.7808e-34],\n",
       "              [-1.5892e-07, -2.7213e-07, -1.0038e-07,  5.3766e-34],\n",
       "              [-1.5892e-07, -2.7213e-07, -1.0038e-07,  5.7093e-34]],\n",
       "    \n",
       "             ...,\n",
       "    \n",
       "             [[-4.7293e-07, -4.8640e-07, -1.3895e-07, -1.6409e-08],\n",
       "              [-4.8852e-07, -4.7231e-07, -1.2812e-07,  1.7975e-33],\n",
       "              [-4.8852e-07, -4.7231e-07, -1.2812e-07,  1.8197e-33],\n",
       "              [-4.8852e-07, -4.7231e-07, -1.2812e-07,  1.7900e-33]],\n",
       "    \n",
       "             [[ 8.6163e-11,  4.1900e-09,  3.6319e-09, -3.6469e-09],\n",
       "              [ 5.0895e-11,  9.9375e-09,  9.1117e-09, -8.0108e-09],\n",
       "              [ 5.0895e-11,  9.9375e-09,  9.1117e-09, -8.0108e-09],\n",
       "              [ 5.0895e-11,  9.9375e-09,  9.1117e-09, -8.0108e-09]],\n",
       "    \n",
       "             [[-3.2984e-09,  3.0149e-09,  2.6184e-09, -2.4321e-09],\n",
       "              [-3.8312e-09,  7.1624e-09,  6.5728e-09, -5.4222e-09],\n",
       "              [-3.8312e-09,  7.1624e-09,  6.5728e-09, -5.4222e-09],\n",
       "              [-3.8312e-09,  7.1624e-09,  6.5728e-09, -5.4222e-09]]],\n",
       "    \n",
       "    \n",
       "            [[[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00]],\n",
       "    \n",
       "             [[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00]],\n",
       "    \n",
       "             [[ 2.2678e-11,  0.0000e+00,  0.0000e+00,  0.0000e+00],\n",
       "              [ 2.2678e-11,  0.0000e+00,  0.0000e+00,  0.0000e+00],\n",
       "              [ 2.2678e-11,  0.0000e+00,  0.0000e+00,  0.0000e+00],\n",
       "              [ 2.2678e-11,  0.0000e+00,  0.0000e+00,  0.0000e+00]],\n",
       "    \n",
       "             ...,\n",
       "    \n",
       "             [[ 2.8434e-11,  0.0000e+00,  0.0000e+00,  0.0000e+00],\n",
       "              [ 2.8434e-11,  0.0000e+00,  0.0000e+00,  0.0000e+00],\n",
       "              [ 2.8434e-11,  0.0000e+00,  0.0000e+00,  0.0000e+00],\n",
       "              [ 2.8434e-11,  0.0000e+00,  0.0000e+00,  0.0000e+00]],\n",
       "    \n",
       "             [[ 0.0000e+00,  3.8713e-12,  3.8713e-12,  3.8713e-12],\n",
       "              [ 0.0000e+00,  3.8713e-12,  3.8713e-12,  3.8713e-12],\n",
       "              [ 0.0000e+00,  3.8713e-12,  3.8713e-12,  3.8713e-12],\n",
       "              [ 0.0000e+00,  3.8713e-12,  3.8713e-12,  3.8713e-12]],\n",
       "    \n",
       "             [[-8.1042e-30,  2.6938e-12,  2.6938e-12,  2.6938e-12],\n",
       "              [-8.1042e-30,  2.6938e-12,  2.6938e-12,  2.6938e-12],\n",
       "              [-8.1042e-30,  2.6938e-12,  2.6938e-12,  2.6938e-12],\n",
       "              [-8.1042e-30,  2.6938e-12,  2.6938e-12,  2.6938e-12]]],\n",
       "    \n",
       "    \n",
       "            [[[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00]],\n",
       "    \n",
       "             [[-3.7006e-06, -1.5890e-06, -6.9798e-06, -1.2995e-06],\n",
       "              [-3.6032e-06, -2.8093e-06, -8.7997e-06, -1.1160e-06],\n",
       "              [-2.0525e-06, -2.5455e-06, -6.9843e-06, -1.9435e-06],\n",
       "              [-2.5750e-06, -1.9276e-06, -7.5834e-06, -1.0396e-06]],\n",
       "    \n",
       "             [[-8.5773e-06, -2.9439e-06, -1.5031e-05, -1.6286e-06],\n",
       "              [-8.3640e-06, -2.5703e-06, -1.3814e-05,  1.2880e-06],\n",
       "              [-7.9315e-06, -5.1841e-07, -1.2035e-05,  2.1134e-06],\n",
       "              [-8.8919e-06, -6.9809e-07, -1.1669e-05,  3.0029e-06]],\n",
       "    \n",
       "             ...,\n",
       "    \n",
       "             [[-1.7594e-05, -1.5747e-05, -1.7130e-05, -9.8938e-06],\n",
       "              [-1.8342e-05, -1.5576e-05, -1.6894e-05, -1.1955e-05],\n",
       "              [-1.6748e-05, -1.2990e-05, -1.4661e-05, -8.8456e-06],\n",
       "              [-1.7311e-05, -1.2775e-05, -1.3899e-05, -7.4150e-06]],\n",
       "    \n",
       "             [[ 1.7916e-08,  1.5739e-08,  1.3485e-08,  3.0078e-08],\n",
       "              [ 2.2987e-08,  1.5727e-08,  8.7557e-09,  3.6350e-08],\n",
       "              [ 1.1466e-08,  1.0597e-09, -4.7295e-09,  1.5538e-08],\n",
       "              [-1.1219e-08,  1.0597e-09, -4.6682e-09,  1.5538e-08]],\n",
       "    \n",
       "             [[-1.4703e-08,  1.0276e-07, -2.0112e-08,  2.6683e-08],\n",
       "              [-1.8180e-08,  1.1380e-07, -2.3522e-08,  3.1027e-08],\n",
       "              [-1.0644e-08,  5.9613e-08, -3.4103e-09,  1.0935e-08],\n",
       "              [-1.0644e-08,  5.9613e-08, -3.4103e-09,  1.0935e-08]]]],\n",
       "           device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[[[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00]],\n",
       "    \n",
       "             [[5.3332e-11, 1.1612e-10, 1.3446e-10, 1.8118e-10],\n",
       "              [3.1653e-11, 1.1234e-10, 1.1105e-10, 2.2976e-10],\n",
       "              [1.8865e-11, 7.8201e-11, 1.0881e-10, 1.7764e-10],\n",
       "              [1.7913e-11, 6.2867e-11, 1.1077e-10, 1.8057e-10]],\n",
       "    \n",
       "             [[1.3110e-10, 1.7622e-10, 3.0931e-10, 7.4130e-11],\n",
       "              [7.4047e-11, 2.4239e-10, 3.2185e-10, 1.2181e-10],\n",
       "              [4.1886e-11, 7.8468e-11, 3.0472e-10, 9.8460e-11],\n",
       "              [3.0660e-11, 1.3319e-10, 3.3371e-10, 9.1739e-11]],\n",
       "    \n",
       "             ...,\n",
       "    \n",
       "             [[1.7992e-09, 2.3090e-09, 2.8570e-09, 3.5851e-09],\n",
       "              [2.0433e-09, 1.9645e-09, 2.6234e-09, 3.3929e-09],\n",
       "              [1.9913e-09, 2.4220e-09, 2.7461e-09, 3.5908e-09],\n",
       "              [2.0871e-09, 2.2002e-09, 2.6020e-09, 3.4352e-09]],\n",
       "    \n",
       "             [[1.5990e-15, 1.9772e-15, 1.9877e-16, 8.2534e-16],\n",
       "              [1.7503e-14, 1.3945e-14, 1.0792e-14, 7.0144e-15],\n",
       "              [6.2847e-15, 4.0716e-15, 1.7093e-15, 2.5770e-15],\n",
       "              [5.7284e-14, 3.9567e-15, 7.8560e-15, 2.6021e-15]],\n",
       "    \n",
       "             [[3.6585e-15, 1.4364e-14, 4.0137e-14, 3.4773e-16],\n",
       "              [1.3092e-15, 1.9360e-14, 6.0011e-14, 1.1984e-15],\n",
       "              [1.0344e-14, 3.7038e-14, 6.0011e-14, 1.1984e-15],\n",
       "              [1.3086e-15, 1.9359e-14, 6.0011e-14, 1.1984e-15]]],\n",
       "    \n",
       "    \n",
       "            [[[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00]],\n",
       "    \n",
       "             [[7.5117e-10, 7.8782e-10, 6.0240e-10, 7.3161e-10],\n",
       "              [7.0864e-10, 6.6956e-10, 5.8451e-10, 8.2926e-10],\n",
       "              [6.8017e-10, 6.6745e-10, 5.2233e-10, 7.0917e-10],\n",
       "              [8.0317e-10, 7.6452e-10, 6.1213e-10, 7.3947e-10]],\n",
       "    \n",
       "             [[3.0615e-09, 4.1350e-09, 2.6096e-09, 2.8135e-09],\n",
       "              [2.3369e-09, 2.6978e-09, 2.2601e-09, 1.9174e-09],\n",
       "              [2.3532e-09, 3.6020e-09, 1.9086e-09, 2.2888e-09],\n",
       "              [2.2260e-09, 2.3968e-09, 1.6338e-09, 1.3945e-09]],\n",
       "    \n",
       "             ...,\n",
       "    \n",
       "             [[1.5987e-08, 1.3618e-08, 1.3080e-08, 1.2667e-08],\n",
       "              [1.4922e-08, 1.2973e-08, 1.2346e-08, 1.2579e-08],\n",
       "              [1.4928e-08, 1.1745e-08, 1.1947e-08, 1.2206e-08],\n",
       "              [1.4436e-08, 1.2160e-08, 1.1837e-08, 1.1979e-08]],\n",
       "    \n",
       "             [[2.2679e-14, 2.5310e-14, 2.0406e-14, 2.2174e-14],\n",
       "              [3.4373e-14, 3.6875e-14, 4.1569e-14, 4.7344e-14],\n",
       "              [2.9614e-14, 2.8365e-14, 3.0287e-14, 3.2106e-14],\n",
       "              [1.3159e-12, 4.5848e-14, 1.7577e-12, 5.5306e-14]],\n",
       "    \n",
       "             [[4.7390e-14, 4.7985e-14, 7.2473e-14, 8.4763e-14],\n",
       "              [8.0102e-14, 9.6990e-14, 9.5902e-14, 1.7957e-13],\n",
       "              [8.4038e-14, 8.9804e-14, 7.0207e-14, 1.7787e-13],\n",
       "              [7.6305e-14, 8.7228e-14, 7.0129e-14, 1.7576e-13]]],\n",
       "    \n",
       "    \n",
       "            [[[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00]],\n",
       "    \n",
       "             [[1.9541e-09, 2.0468e-09, 1.4766e-09, 1.7786e-09],\n",
       "              [1.9515e-09, 1.9744e-09, 1.5406e-09, 1.6415e-09],\n",
       "              [1.9701e-09, 2.2609e-09, 1.4545e-09, 1.9848e-09],\n",
       "              [2.1377e-09, 2.1775e-09, 1.4787e-09, 1.6241e-09]],\n",
       "    \n",
       "             [[1.2059e-08, 1.4802e-08, 9.3081e-09, 7.8528e-09],\n",
       "              [1.2430e-08, 1.1979e-08, 8.9218e-09, 5.6073e-09],\n",
       "              [1.2377e-08, 1.6212e-08, 7.6455e-09, 6.4061e-09],\n",
       "              [1.0588e-08, 1.0184e-08, 5.3213e-09, 3.3310e-09]],\n",
       "    \n",
       "             ...,\n",
       "    \n",
       "             [[5.5500e-08, 4.4383e-08, 3.6561e-08, 3.2108e-08],\n",
       "              [5.1106e-08, 4.3139e-08, 3.2945e-08, 3.1292e-08],\n",
       "              [5.0868e-08, 3.6619e-08, 2.9999e-08, 2.6091e-08],\n",
       "              [4.3904e-08, 3.4780e-08, 2.5433e-08, 2.3911e-08]],\n",
       "    \n",
       "             [[7.9065e-15, 1.1736e-14, 1.8475e-14, 2.3225e-14],\n",
       "              [1.8372e-14, 2.5667e-14, 1.8910e-14, 5.3039e-14],\n",
       "              [2.8283e-14, 2.6749e-14, 2.6759e-14, 5.0388e-14],\n",
       "              [1.1655e-12, 6.0956e-14, 7.8368e-13, 6.5678e-14]],\n",
       "    \n",
       "             [[2.1438e-14, 3.0256e-14, 8.9456e-15, 3.3155e-14],\n",
       "              [3.6076e-14, 9.4251e-14, 2.2048e-14, 1.1374e-13],\n",
       "              [4.5178e-14, 8.9939e-14, 1.9281e-14, 9.6211e-14],\n",
       "              [4.0484e-14, 7.3579e-14, 1.9079e-14, 9.5683e-14]]],\n",
       "    \n",
       "    \n",
       "            ...,\n",
       "    \n",
       "    \n",
       "            [[[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00]],\n",
       "    \n",
       "             [[3.4733e-12, 1.1287e-13, 1.4060e-13, 9.6516e-14],\n",
       "              [2.9065e-12, 6.4309e-14, 7.7581e-14, 7.0491e-14],\n",
       "              [2.9058e-12, 6.2025e-14, 7.7657e-14, 6.8506e-14],\n",
       "              [2.8983e-12, 9.9431e-14, 1.2893e-13, 1.0858e-13]],\n",
       "    \n",
       "             [[1.0733e-11, 1.2514e-11, 2.2961e-12, 1.6694e-13],\n",
       "              [1.0036e-11, 1.0253e-11, 1.6982e-12, 1.3770e-13],\n",
       "              [1.0034e-11, 1.0255e-11, 1.6909e-12, 1.1944e-13],\n",
       "              [9.9977e-12, 1.0257e-11, 1.7801e-12, 2.1242e-13]],\n",
       "    \n",
       "             ...,\n",
       "    \n",
       "             [[6.6219e-11, 4.1076e-11, 5.8177e-12, 1.4917e-12],\n",
       "              [6.3408e-11, 3.5907e-11, 4.4169e-12, 1.2903e-12],\n",
       "              [6.3390e-11, 3.5906e-11, 4.4503e-12, 1.3808e-12],\n",
       "              [6.3643e-11, 3.6230e-11, 4.6438e-12, 1.5082e-12]],\n",
       "    \n",
       "             [[9.9201e-16, 5.3247e-15, 1.8021e-14, 6.1346e-14],\n",
       "              [1.1984e-15, 1.9420e-14, 3.8640e-14, 1.4065e-13],\n",
       "              [1.2005e-15, 1.9348e-14, 3.8627e-14, 1.4065e-13],\n",
       "              [1.1978e-15, 1.9368e-14, 3.8656e-14, 1.4066e-13]],\n",
       "    \n",
       "             [[1.2395e-13, 2.4378e-15, 8.0617e-15, 2.8276e-14],\n",
       "              [1.3291e-13, 8.8118e-15, 1.7235e-14, 6.4865e-14],\n",
       "              [1.3291e-13, 8.8118e-15, 1.7235e-14, 6.4864e-14],\n",
       "              [1.3291e-13, 8.8118e-15, 1.7235e-14, 6.4864e-14]]],\n",
       "    \n",
       "    \n",
       "            [[[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00]],\n",
       "    \n",
       "             [[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00]],\n",
       "    \n",
       "             [[2.2229e-14, 0.0000e+00, 0.0000e+00, 0.0000e+00],\n",
       "              [2.2229e-14, 0.0000e+00, 0.0000e+00, 0.0000e+00],\n",
       "              [2.2229e-14, 0.0000e+00, 0.0000e+00, 0.0000e+00],\n",
       "              [2.2229e-14, 0.0000e+00, 0.0000e+00, 0.0000e+00]],\n",
       "    \n",
       "             ...,\n",
       "    \n",
       "             [[5.7743e-14, 0.0000e+00, 0.0000e+00, 0.0000e+00],\n",
       "              [5.7743e-14, 0.0000e+00, 0.0000e+00, 0.0000e+00],\n",
       "              [5.7743e-14, 0.0000e+00, 0.0000e+00, 0.0000e+00],\n",
       "              [5.7743e-14, 0.0000e+00, 0.0000e+00, 0.0000e+00]],\n",
       "    \n",
       "             [[0.0000e+00, 9.6361e-16, 9.6361e-16, 9.6361e-16],\n",
       "              [0.0000e+00, 9.6361e-16, 9.6361e-16, 9.6361e-16],\n",
       "              [0.0000e+00, 9.6361e-16, 9.6361e-16, 9.6361e-16],\n",
       "              [0.0000e+00, 9.6361e-16, 9.6361e-16, 9.6361e-16]],\n",
       "    \n",
       "             [[1.0029e-19, 2.9977e-16, 2.9977e-16, 2.9977e-16],\n",
       "              [1.0029e-19, 2.9977e-16, 2.9977e-16, 2.9977e-16],\n",
       "              [1.0029e-19, 2.9977e-16, 2.9977e-16, 2.9977e-16],\n",
       "              [1.0029e-19, 2.9977e-16, 2.9977e-16, 2.9977e-16]]],\n",
       "    \n",
       "    \n",
       "            [[[0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00]],\n",
       "    \n",
       "             [[1.4351e-09, 8.8011e-10, 5.2795e-10, 5.1218e-10],\n",
       "              [1.5862e-09, 7.7370e-10, 4.2965e-10, 4.1132e-10],\n",
       "              [1.7613e-09, 9.2702e-10, 3.4466e-10, 3.7797e-10],\n",
       "              [1.4906e-09, 9.7353e-10, 4.5587e-10, 3.8215e-10]],\n",
       "    \n",
       "             [[2.0749e-09, 4.3556e-09, 1.1139e-09, 1.4005e-09],\n",
       "              [2.4426e-09, 4.6243e-09, 1.2125e-09, 1.7196e-09],\n",
       "              [2.7912e-09, 5.3681e-09, 1.3197e-09, 1.7923e-09],\n",
       "              [2.9040e-09, 5.3567e-09, 1.3847e-09, 1.9023e-09]],\n",
       "    \n",
       "             ...,\n",
       "    \n",
       "             [[1.5397e-08, 1.1780e-08, 6.0801e-09, 4.9312e-09],\n",
       "              [1.7213e-08, 1.2761e-08, 6.2881e-09, 4.8276e-09],\n",
       "              [1.7616e-08, 1.2833e-08, 6.2791e-09, 4.9934e-09],\n",
       "              [1.8347e-08, 1.3992e-08, 6.1074e-09, 4.7925e-09]],\n",
       "    \n",
       "             [[3.4513e-14, 5.1840e-14, 5.1384e-14, 5.5564e-14],\n",
       "              [1.8366e-14, 1.6034e-14, 2.4504e-14, 2.9915e-14],\n",
       "              [1.9939e-14, 2.9761e-14, 6.2560e-14, 7.4408e-14],\n",
       "              [1.7665e-13, 2.8671e-14, 1.3707e-12, 8.2570e-14]],\n",
       "    \n",
       "             [[5.9745e-14, 1.1869e-13, 4.3857e-14, 1.4970e-13],\n",
       "              [9.6804e-14, 6.8885e-14, 2.0695e-14, 7.8521e-14],\n",
       "              [1.8177e-13, 8.5619e-14, 2.7486e-14, 3.1667e-14],\n",
       "              [1.8036e-13, 2.6153e-14, 2.6157e-14, 3.1666e-14]]]], device='cuda:0')},\n",
       "   3: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([ 3.4046e-06, -2.4894e-05, -3.5716e-05,  1.9712e-11,  4.3914e-05,\n",
       "             7.3711e-05, -7.8908e-07,  1.9725e-06,  2.6308e-06,  1.7204e-07,\n",
       "             1.4317e-05,  2.9570e-05, -4.5938e-05,  1.0529e-06,  7.3943e-06,\n",
       "             1.6177e-05,  9.2430e-08, -4.7995e-07,  6.7442e-06,  2.7674e-05,\n",
       "            -2.0069e-08,  6.9646e-06,  1.9687e-05, -2.4684e-06, -2.4450e-05,\n",
       "             4.0479e-05,  4.2916e-05,  1.2435e-05, -9.1920e-08, -1.9631e-05,\n",
       "            -2.7474e-07,  1.7435e-06,  5.0157e-05,  7.1251e-08,  1.1487e-05,\n",
       "             2.0432e-06,  4.5736e-07,  4.3372e-05, -2.7548e-05,  6.1212e-37,\n",
       "            -4.8651e-08,  7.1854e-05,  1.1838e-06, -4.2604e-06,  5.0847e-05,\n",
       "             1.1104e-07,  3.3263e-06,  5.7716e-06, -3.8348e-05, -4.9670e-06,\n",
       "            -4.2447e-07, -1.6937e-05,  1.1548e-05, -2.7800e-08,  7.3952e-07,\n",
       "             1.5254e-08,  1.6561e-30, -1.5993e-05,  5.6052e-45,  3.2814e-05,\n",
       "             1.7223e-05, -1.6107e-07,  7.4259e-11, -9.4930e-06], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([2.6802e-09, 7.6527e-09, 2.9151e-08, 1.0042e-12, 3.0384e-08, 3.1214e-08,\n",
       "            9.1478e-09, 1.0408e-10, 3.4467e-10, 4.2665e-11, 9.4470e-09, 4.3269e-09,\n",
       "            4.6979e-08, 1.4971e-09, 9.0373e-10, 5.0982e-09, 3.0354e-12, 4.5795e-09,\n",
       "            1.2506e-08, 7.2092e-09, 2.8117e-10, 2.4361e-08, 1.1626e-08, 4.7912e-09,\n",
       "            4.5056e-08, 1.5226e-08, 6.7414e-09, 8.3864e-09, 2.5909e-09, 6.1148e-09,\n",
       "            4.5808e-11, 4.1163e-09, 2.0871e-08, 5.3598e-12, 2.1197e-09, 2.9338e-09,\n",
       "            1.1018e-10, 7.5202e-09, 7.2048e-09, 4.1947e-13, 4.0856e-11, 5.2501e-08,\n",
       "            2.5322e-09, 1.8706e-09, 1.6495e-08, 2.1265e-12, 1.6091e-08, 4.0075e-09,\n",
       "            2.2860e-08, 1.8436e-08, 3.3855e-11, 8.7976e-09, 2.6055e-08, 2.0153e-11,\n",
       "            1.8476e-10, 2.4828e-11, 2.5559e-14, 2.2495e-08, 4.3188e-17, 3.8449e-09,\n",
       "            8.4850e-09, 5.2711e-11, 4.1950e-13, 2.9642e-09], device='cuda:0')},\n",
       "   4: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[[[ 0.0000e+00,  0.0000e+00,  0.0000e+00],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00]],\n",
       "    \n",
       "             [[ 0.0000e+00,  0.0000e+00,  0.0000e+00],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00]],\n",
       "    \n",
       "             [[ 0.0000e+00,  0.0000e+00,  0.0000e+00],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00]],\n",
       "    \n",
       "             ...,\n",
       "    \n",
       "             [[ 0.0000e+00,  0.0000e+00,  0.0000e+00],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00]],\n",
       "    \n",
       "             [[ 0.0000e+00,  0.0000e+00,  0.0000e+00],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00]],\n",
       "    \n",
       "             [[ 0.0000e+00,  0.0000e+00,  0.0000e+00],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00]]],\n",
       "    \n",
       "    \n",
       "            [[[-3.1271e-09, -3.2185e-08,  2.0542e-08],\n",
       "              [-1.5040e-07, -2.7314e-07, -5.7551e-08],\n",
       "              [ 4.0705e-07, -2.4960e-07, -5.2013e-07]],\n",
       "    \n",
       "             [[ 3.5415e-06, -8.5769e-07, -5.0182e-06],\n",
       "              [ 9.3751e-06,  6.2184e-06,  3.3931e-06],\n",
       "              [ 6.8489e-06,  1.0445e-05,  9.2200e-06]],\n",
       "    \n",
       "             [[ 5.1886e-06,  9.4769e-07, -3.4130e-06],\n",
       "              [ 1.3609e-05,  6.6055e-06, -1.3858e-06],\n",
       "              [ 8.2672e-06,  6.3695e-06,  5.1503e-06]],\n",
       "    \n",
       "             ...,\n",
       "    \n",
       "             [[ 5.6052e-45,  1.9314e-14,  4.4793e-08],\n",
       "              [ 5.6052e-45,  2.0587e-14,  5.3112e-08],\n",
       "              [-4.5019e-36,  1.4530e-35,  1.4273e-35]],\n",
       "    \n",
       "             [[ 5.6052e-45,  0.0000e+00, -5.6052e-45],\n",
       "              [ 5.6052e-45,  5.6052e-45,  1.8258e-15],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00]],\n",
       "    \n",
       "             [[ 1.3428e-05,  1.2805e-05,  8.9257e-06],\n",
       "              [ 3.9551e-06,  6.0045e-06,  6.2337e-06],\n",
       "              [ 6.7264e-06,  7.9307e-06,  5.9948e-06]]],\n",
       "    \n",
       "    \n",
       "            [[[ 4.0436e-08,  7.9481e-10, -4.3824e-09],\n",
       "              [ 2.3238e-08,  2.8129e-09, -6.2960e-12],\n",
       "              [-5.7941e-10, -7.8026e-41, -4.2476e-41]],\n",
       "    \n",
       "             [[-5.4821e-07, -2.9851e-07, -1.0821e-07],\n",
       "              [-5.4653e-07, -2.9815e-07, -1.1738e-07],\n",
       "              [-3.9734e-07, -5.8308e-08, -2.9712e-09]],\n",
       "    \n",
       "             [[-4.8326e-07, -3.4421e-07, -7.4340e-08],\n",
       "              [-4.3812e-07, -2.6540e-07,  3.7568e-09],\n",
       "              [-1.8660e-07, -7.8106e-08, -2.9151e-09]],\n",
       "    \n",
       "             ...,\n",
       "    \n",
       "             [[-3.8432e-08,  3.8802e-09,  1.2866e-08],\n",
       "              [-5.2458e-08, -2.4067e-08, -1.8978e-08],\n",
       "              [ 5.6052e-45,  0.0000e+00,  5.6052e-45]],\n",
       "    \n",
       "             [[-6.6868e-17,  9.9492e-43,  8.8662e-18],\n",
       "              [ 5.6868e-14, -2.8911e-13,  1.5346e-13],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00]],\n",
       "    \n",
       "             [[-6.9372e-07, -3.2074e-07, -1.3315e-08],\n",
       "              [-6.6851e-07, -2.9329e-07, -9.0574e-10],\n",
       "              [-6.4156e-07, -5.9050e-07, -5.0467e-07]]],\n",
       "    \n",
       "    \n",
       "            ...,\n",
       "    \n",
       "    \n",
       "            [[[ 5.5143e-15,  7.5726e-42,  5.7007e-15],\n",
       "              [ 1.9105e-41,  1.2451e-41, -9.2065e-43],\n",
       "              [ 1.1967e-42,  9.0706e-42,  3.4472e-43]],\n",
       "    \n",
       "             [[ 5.9781e-14,  4.4642e-14,  1.3559e-14],\n",
       "              [ 5.9689e-14,  4.6471e-14,  1.5580e-14],\n",
       "              [ 1.2376e-40,  2.5787e-15,  1.7808e-40]],\n",
       "    \n",
       "             [[ 8.7411e-14,  3.4540e-14,  4.0556e-14],\n",
       "              [ 5.7406e-14,  1.2107e-41,  8.7245e-42],\n",
       "              [ 1.2783e-40,  4.1137e-41,  2.2522e-41]],\n",
       "    \n",
       "             ...,\n",
       "    \n",
       "             [[ 5.0447e-43,  4.4299e-15,  7.8753e-43],\n",
       "              [ 5.6052e-45,  1.8582e-14,  1.0902e-42],\n",
       "              [ 0.0000e+00,  0.0000e+00, -5.6052e-45]],\n",
       "    \n",
       "             [[ 1.2331e-43, -9.9492e-44,  1.5975e-43],\n",
       "              [-8.4078e-44,  8.7962e-15,  5.1147e-43],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00]],\n",
       "    \n",
       "             [[ 9.2077e-15,  4.7078e-15,  1.4005e-15],\n",
       "              [ 1.7091e-15,  1.0331e-40,  1.2263e-40],\n",
       "              [ 1.6798e-13,  1.3880e-13,  1.3895e-13]]],\n",
       "    \n",
       "    \n",
       "            [[[ 2.1014e-11, -6.8918e-11,  1.7863e-10],\n",
       "              [ 2.4748e-18, -7.7198e-11,  4.9900e-11],\n",
       "              [ 1.4073e-41,  4.4281e-42,  1.1416e-41]],\n",
       "    \n",
       "             [[ 4.4414e-10,  4.0722e-10,  9.4041e-10],\n",
       "              [ 4.8386e-10,  5.3958e-10,  1.0849e-09],\n",
       "              [-2.5978e-11, -3.7992e-11, -4.5092e-11]],\n",
       "    \n",
       "             [[ 1.1893e-09,  9.3585e-10,  1.0499e-09],\n",
       "              [ 3.8999e-10, -4.4105e-11,  3.4542e-10],\n",
       "              [-1.3636e-11, -2.7876e-11, -4.2272e-11]],\n",
       "    \n",
       "             ...,\n",
       "    \n",
       "             [[ 6.8423e-12,  2.1136e-11,  1.5637e-17],\n",
       "              [ 2.4054e-10,  5.6638e-11, -3.6649e-11],\n",
       "              [-5.6052e-45, -5.6052e-45,  5.6052e-45]],\n",
       "    \n",
       "             [[ 0.0000e+00,  0.0000e+00,  0.0000e+00],\n",
       "              [ 3.2971e-14, -2.5661e-13,  2.3764e-17],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00]],\n",
       "    \n",
       "             [[-2.0555e-11, -3.1714e-11, -2.7607e-11],\n",
       "              [-1.7175e-11, -1.4306e-11, -2.4294e-11],\n",
       "              [ 3.0680e-09,  3.1549e-09,  2.9456e-09]]],\n",
       "    \n",
       "    \n",
       "            [[[-6.6798e-09,  8.5409e-09,  4.4765e-08],\n",
       "              [ 9.8054e-08,  7.0062e-08,  1.8260e-08],\n",
       "              [ 1.1289e-07, -4.8915e-08,  4.6582e-08]],\n",
       "    \n",
       "             [[ 1.3227e-05,  1.1751e-05,  1.5490e-05],\n",
       "              [ 1.4549e-05,  1.4767e-05,  1.9531e-05],\n",
       "              [ 1.5360e-05,  1.6005e-05,  1.8069e-05]],\n",
       "    \n",
       "             [[ 1.0801e-05,  8.8887e-06,  1.4334e-05],\n",
       "              [ 1.1425e-05,  1.0912e-05,  1.6245e-05],\n",
       "              [ 1.1865e-05,  1.1345e-05,  1.3801e-05]],\n",
       "    \n",
       "             ...,\n",
       "    \n",
       "             [[ 6.3230e-36,  7.1783e-08, -1.6613e-07],\n",
       "              [-2.2052e-35,  8.7350e-08,  2.7412e-08],\n",
       "              [-1.2776e-35, -9.6536e-35, -1.0391e-34]],\n",
       "    \n",
       "             [[-5.6052e-45,  5.6052e-45,  1.7453e-41],\n",
       "              [ 0.0000e+00,  5.4230e-42,  2.1819e-12],\n",
       "              [ 0.0000e+00,  0.0000e+00,  0.0000e+00]],\n",
       "    \n",
       "             [[ 1.2621e-05,  1.1914e-05,  1.2020e-05],\n",
       "              [ 1.0196e-05,  1.0994e-05,  9.7540e-06],\n",
       "              [ 1.3330e-05,  1.3763e-05,  1.1335e-05]]]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[[[0.0000e+00, 0.0000e+00, 0.0000e+00],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00]],\n",
       "    \n",
       "             [[0.0000e+00, 0.0000e+00, 0.0000e+00],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00]],\n",
       "    \n",
       "             [[0.0000e+00, 0.0000e+00, 0.0000e+00],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00]],\n",
       "    \n",
       "             ...,\n",
       "    \n",
       "             [[0.0000e+00, 0.0000e+00, 0.0000e+00],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00]],\n",
       "    \n",
       "             [[0.0000e+00, 0.0000e+00, 0.0000e+00],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00]],\n",
       "    \n",
       "             [[0.0000e+00, 0.0000e+00, 0.0000e+00],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00]]],\n",
       "    \n",
       "    \n",
       "            [[[6.4514e-12, 4.9006e-12, 5.6172e-12],\n",
       "              [5.6092e-12, 6.3287e-12, 1.4225e-11],\n",
       "              [2.9050e-11, 8.2917e-12, 8.3410e-12]],\n",
       "    \n",
       "             [[9.1888e-09, 9.6192e-09, 9.6960e-09],\n",
       "              [9.1984e-09, 8.5119e-09, 8.0364e-09],\n",
       "              [7.0979e-09, 4.7679e-09, 4.9320e-09]],\n",
       "    \n",
       "             [[4.3142e-09, 4.9577e-09, 5.5589e-09],\n",
       "              [4.7464e-09, 5.4228e-09, 5.2221e-09],\n",
       "              [3.7314e-09, 2.4203e-09, 2.2036e-09]],\n",
       "    \n",
       "             ...,\n",
       "    \n",
       "             [[4.0058e-17, 7.9103e-16, 7.1908e-13],\n",
       "              [6.8077e-18, 2.7569e-16, 7.7328e-13],\n",
       "              [1.7283e-17, 4.0550e-16, 4.7781e-16]],\n",
       "    \n",
       "             [[5.1906e-18, 0.0000e+00, 7.1468e-18],\n",
       "              [8.3281e-19, 2.5135e-18, 5.7436e-15],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00]],\n",
       "    \n",
       "             [[4.0954e-09, 3.3476e-09, 2.8124e-09],\n",
       "              [3.5946e-09, 1.8531e-09, 1.6801e-09],\n",
       "              [4.1085e-09, 1.8749e-09, 1.7079e-09]]],\n",
       "    \n",
       "    \n",
       "            [[[8.8240e-13, 8.0718e-13, 1.1545e-12],\n",
       "              [8.2625e-13, 8.1150e-13, 1.0077e-12],\n",
       "              [7.8090e-13, 7.3686e-13, 6.9772e-13]],\n",
       "    \n",
       "             [[3.8670e-11, 3.4625e-11, 3.1673e-11],\n",
       "              [3.8567e-11, 3.5149e-11, 3.1234e-11],\n",
       "              [3.2752e-11, 3.0094e-11, 2.9201e-11]],\n",
       "    \n",
       "             [[2.3108e-11, 1.9734e-11, 1.0084e-11],\n",
       "              [1.5582e-11, 1.0594e-11, 4.7808e-12],\n",
       "              [1.0103e-11, 7.4499e-12, 5.1373e-12]],\n",
       "    \n",
       "             ...,\n",
       "    \n",
       "             [[8.5456e-13, 2.3813e-12, 5.5340e-14],\n",
       "              [9.6881e-13, 2.9408e-12, 2.6159e-13],\n",
       "              [6.9066e-24, 0.0000e+00, 1.9169e-21]],\n",
       "    \n",
       "             [[1.2797e-17, 4.7827e-18, 7.3900e-17],\n",
       "              [2.6377e-15, 5.2440e-15, 6.5193e-15],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00]],\n",
       "    \n",
       "             [[1.9769e-11, 1.0999e-11, 6.1212e-12],\n",
       "              [1.9619e-11, 1.1611e-11, 5.8819e-12],\n",
       "              [8.6075e-11, 8.8738e-11, 7.7869e-11]]],\n",
       "    \n",
       "    \n",
       "            ...,\n",
       "    \n",
       "    \n",
       "            [[[5.6159e-15, 3.8793e-15, 1.1528e-14],\n",
       "              [8.9063e-15, 2.1471e-14, 1.9870e-15],\n",
       "              [6.7554e-14, 2.3172e-14, 3.7355e-14]],\n",
       "    \n",
       "             [[3.3193e-12, 4.3979e-12, 1.1672e-11],\n",
       "              [5.0524e-12, 5.9622e-12, 1.5123e-11],\n",
       "              [1.0326e-11, 1.0213e-11, 2.1259e-11]],\n",
       "    \n",
       "             [[2.2243e-13, 2.3766e-13, 5.7352e-13],\n",
       "              [1.0391e-13, 2.1815e-13, 6.9349e-13],\n",
       "              [1.4126e-13, 1.1690e-13, 6.0724e-13]],\n",
       "    \n",
       "             ...,\n",
       "    \n",
       "             [[1.1228e-14, 7.5327e-15, 6.1573e-16],\n",
       "              [9.8716e-17, 2.8381e-16, 1.1719e-17],\n",
       "              [0.0000e+00, 0.0000e+00, 2.8318e-20]],\n",
       "    \n",
       "             [[2.5120e-15, 2.3784e-15, 1.2015e-16],\n",
       "              [2.7003e-18, 4.5934e-17, 1.6763e-18],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00]],\n",
       "    \n",
       "             [[5.0566e-13, 4.3861e-13, 9.2068e-13],\n",
       "              [6.1897e-13, 4.3297e-13, 8.5226e-13],\n",
       "              [4.8588e-13, 3.8470e-13, 8.2768e-13]]],\n",
       "    \n",
       "    \n",
       "            [[[5.0578e-13, 1.1332e-12, 9.5993e-13],\n",
       "              [1.3819e-12, 2.6914e-12, 8.9797e-13],\n",
       "              [3.2367e-13, 1.5007e-12, 5.2743e-13]],\n",
       "    \n",
       "             [[7.6056e-11, 8.5582e-11, 1.1026e-10],\n",
       "              [7.6898e-11, 1.0477e-10, 1.1764e-10],\n",
       "              [7.3399e-11, 8.2241e-11, 1.0560e-10]],\n",
       "    \n",
       "             [[4.1792e-12, 2.4111e-12, 2.7972e-12],\n",
       "              [4.7497e-12, 2.4345e-12, 7.3042e-12],\n",
       "              [3.2853e-12, 5.4821e-12, 3.4921e-12]],\n",
       "    \n",
       "             ...,\n",
       "    \n",
       "             [[6.1060e-15, 5.7683e-15, 2.1988e-15],\n",
       "              [2.6156e-14, 1.5143e-14, 6.3386e-15],\n",
       "              [1.2686e-18, 3.0175e-18, 1.0730e-18]],\n",
       "    \n",
       "             [[0.0000e+00, 0.0000e+00, 0.0000e+00],\n",
       "              [4.1485e-15, 4.9752e-15, 1.5710e-15],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00]],\n",
       "    \n",
       "             [[2.0355e-11, 7.4211e-12, 2.8021e-11],\n",
       "              [2.1298e-11, 5.2911e-12, 1.9484e-11],\n",
       "              [1.8703e-11, 7.3093e-12, 2.2135e-11]]],\n",
       "    \n",
       "    \n",
       "            [[[4.5044e-12, 1.0257e-11, 5.5922e-12],\n",
       "              [1.4034e-11, 2.4693e-11, 1.4754e-11],\n",
       "              [1.7160e-11, 3.7437e-11, 1.5080e-11]],\n",
       "    \n",
       "             [[3.1940e-08, 2.6287e-08, 1.9710e-08],\n",
       "              [3.0402e-08, 2.2686e-08, 1.7956e-08],\n",
       "              [2.7117e-08, 1.6237e-08, 1.3324e-08]],\n",
       "    \n",
       "             [[1.5742e-08, 1.1927e-08, 7.8933e-09],\n",
       "              [1.4860e-08, 1.0892e-08, 6.8363e-09],\n",
       "              [1.1084e-08, 6.7276e-09, 4.9776e-09]],\n",
       "    \n",
       "             ...,\n",
       "    \n",
       "             [[3.8072e-15, 1.3260e-12, 5.2559e-12],\n",
       "              [3.5970e-15, 1.3891e-12, 1.6822e-12],\n",
       "              [9.1935e-16, 2.2905e-14, 7.6390e-15]],\n",
       "    \n",
       "             [[4.5459e-16, 1.6307e-15, 1.6761e-17],\n",
       "              [0.0000e+00, 7.6884e-18, 8.6400e-16],\n",
       "              [0.0000e+00, 0.0000e+00, 0.0000e+00]],\n",
       "    \n",
       "             [[2.1832e-08, 1.3457e-08, 1.0747e-08],\n",
       "              [1.8697e-08, 1.0195e-08, 7.7079e-09],\n",
       "              [1.9114e-08, 1.0455e-08, 7.2544e-09]]]], device='cuda:0')},\n",
       "   5: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([ 0.0000e+00, -5.0607e-06, -5.0928e-07,  2.8738e-07,  6.4934e-07,\n",
       "             4.0119e-05,  2.9806e-05,  2.5140e-07,  1.8767e-15,  3.9691e-07,\n",
       "            -7.1814e-08,  1.1579e-05, -3.2123e-19,  1.2762e-08,  1.5985e-39,\n",
       "            -1.3302e-05,  2.5318e-06,  1.0314e-05,  5.6190e-12,  2.3877e-05,\n",
       "             1.7308e-05,  1.5084e-06,  1.3172e-42,  5.6052e-45, -9.7904e-07,\n",
       "             5.6052e-45, -1.9696e-05, -8.6464e-06, -8.0098e-07,  2.8240e-33,\n",
       "            -1.2386e-05,  1.0581e-05,  1.7430e-05,  9.9747e-13, -7.5512e-07,\n",
       "             9.6695e-12, -5.4564e-06,  2.2021e-41,  1.0246e-07,  2.1686e-06,\n",
       "             8.1922e-23,  3.1570e-05,  8.3293e-12,  3.9100e-06,  5.7484e-08,\n",
       "            -2.3146e-05, -1.4886e-07,  4.5181e-05, -9.7041e-08, -2.9992e-05,\n",
       "             2.2443e-05, -1.3391e-07,  1.6650e-06, -2.7595e-08, -3.8347e-06,\n",
       "            -4.1250e-16,  2.5310e-05,  2.3394e-07,  7.5182e-37,  3.7884e-05,\n",
       "             2.1289e-11,  1.7106e-13,  4.8411e-09,  6.4106e-06], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([0.0000e+00, 8.4192e-09, 2.0312e-10, 2.3100e-10, 2.4071e-10, 2.1991e-09,\n",
       "            1.0957e-08, 3.6497e-08, 1.3115e-17, 8.6991e-11, 4.6561e-10, 1.0720e-08,\n",
       "            1.1182e-13, 2.3261e-10, 1.0491e-11, 8.7756e-09, 6.4292e-09, 2.1250e-08,\n",
       "            7.1463e-16, 1.0460e-08, 3.1066e-08, 6.3234e-10, 1.0017e-11, 5.0053e-13,\n",
       "            8.2454e-10, 1.1088e-12, 2.7579e-08, 1.2175e-08, 1.4989e-10, 9.5033e-17,\n",
       "            4.9843e-09, 5.9987e-09, 8.4221e-09, 1.6957e-10, 6.8323e-10, 2.5069e-11,\n",
       "            2.2093e-08, 1.1477e-12, 3.9931e-10, 2.2110e-09, 6.5927e-10, 6.1080e-09,\n",
       "            3.3802e-11, 6.8529e-09, 1.8765e-09, 2.1319e-09, 3.2201e-12, 6.4824e-09,\n",
       "            8.2501e-12, 3.4938e-08, 1.2064e-08, 6.2522e-11, 3.6793e-09, 8.7659e-10,\n",
       "            9.8081e-10, 1.3524e-09, 1.0145e-08, 8.4419e-10, 4.9391e-11, 6.3974e-09,\n",
       "            8.1275e-10, 1.0768e-11, 1.6055e-10, 3.4550e-08], device='cuda:0')},\n",
       "   6: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  3.0112e-06,\n",
       "              3.3263e-06,  3.9237e-06],\n",
       "            [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ..., -6.1777e-07,\n",
       "             -1.3809e-06, -1.7969e-06],\n",
       "            [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  2.9166e-40,\n",
       "              2.3047e-40,  2.0404e-40],\n",
       "            ...,\n",
       "            [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  3.6935e-06,\n",
       "              5.1887e-06,  6.8432e-06],\n",
       "            [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ...,  1.9275e-12,\n",
       "              4.3726e-12,  8.5998e-12],\n",
       "            [ 0.0000e+00,  0.0000e+00,  0.0000e+00,  ..., -1.1599e-06,\n",
       "             -5.4440e-07, -2.6540e-07]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 2.1798e-10, 1.5703e-10,\n",
       "             2.0960e-10],\n",
       "            [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 1.5692e-10, 1.3380e-10,\n",
       "             1.7459e-10],\n",
       "            [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 1.9503e-14, 1.2174e-14,\n",
       "             9.5385e-15],\n",
       "            ...,\n",
       "            [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 8.0743e-10, 7.0995e-10,\n",
       "             9.1383e-10],\n",
       "            [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 8.0705e-12, 2.2866e-12,\n",
       "             4.9647e-12],\n",
       "            [0.0000e+00, 0.0000e+00, 0.0000e+00,  ..., 1.4241e-10, 1.0923e-10,\n",
       "             1.5975e-10]], device='cuda:0')},\n",
       "   7: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([ 1.4209e-05,  1.5620e-05,  1.9985e-40,  1.4571e-05,  1.3047e-39,\n",
       "             5.4157e-31,  9.4908e-40,  1.4651e-07, -2.6674e-07,  7.5836e-40,\n",
       "            -3.5733e-06,  3.2718e-06,  1.2783e-11,  3.5235e-32,  1.4786e-06,\n",
       "             1.0741e-28, -3.4947e-06,  4.6796e-06,  1.2122e-24,  4.8464e-13,\n",
       "             5.6052e-45,  3.5217e-06,  5.6052e-45,  8.2415e-39,  7.3020e-33,\n",
       "             1.4935e-05,  4.3139e-06,  7.6539e-06,  3.6301e-13,  5.2167e-15,\n",
       "            -1.7956e-06,  1.3968e-11,  1.5620e-40,  5.8143e-40,  1.6421e-15,\n",
       "             4.5682e-43,  1.9773e-05,  7.0880e-06,  5.6052e-45,  5.6052e-45,\n",
       "            -3.2388e-05,  5.6052e-45,  5.6052e-45, -4.1535e-32,  7.0065e-45,\n",
       "             7.3239e-07,  1.2085e-41, -4.2678e-06,  5.6052e-45, -2.5439e-06,\n",
       "             2.6744e-06,  4.2986e-41,  4.4583e-30,  1.6230e-07,  9.0291e-33,\n",
       "             1.3913e-07,  6.5868e-27,  6.0899e-06,  5.6052e-45,  3.8547e-31,\n",
       "            -5.2142e-06,  0.0000e+00,  1.7758e-39,  8.0812e-39,  5.0237e-40,\n",
       "            -3.5821e-06,  1.8172e-30,  5.6052e-45,  1.4445e-31,  3.3780e-33,\n",
       "             6.6359e-06,  2.4690e-36,  7.4367e-42,  5.6052e-45,  2.5144e-31,\n",
       "             1.6174e-12,  2.4446e-07,  4.9505e-36,  2.7108e-06,  3.3653e-31,\n",
       "             4.3211e-06,  1.4522e-38,  7.4330e-06,  1.9433e-24,  5.6052e-45,\n",
       "             1.9400e-30,  5.6052e-45,  1.8789e-39,  6.4426e-33,  1.4239e-38,\n",
       "             5.6052e-45,  1.8813e-09,  5.5745e-06,  3.0501e-30,  3.5201e-42,\n",
       "             1.4550e-38,  2.1120e-05,  4.9540e-07,  3.7747e-06,  2.8051e-41,\n",
       "             2.6764e-06,  1.7662e-41,  4.1386e-17,  1.4228e-05,  5.6052e-45,\n",
       "             1.4842e-05,  5.6052e-45,  3.3358e-06,  2.8865e-19,  1.1690e-06,\n",
       "             2.3074e-40, -2.7256e-06,  4.7644e-44,  5.4084e-07,  8.1179e-06,\n",
       "             8.0241e-36,  4.5942e-07, -7.4386e-07,  5.6052e-45,  5.6052e-45,\n",
       "             5.6052e-45,  1.6079e-29,  2.1525e-07,  3.7050e-13,  5.6052e-45,\n",
       "            -3.0522e-07,  4.7361e-11, -3.6088e-06], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([4.2157e-09, 8.5130e-10, 9.1746e-15, 2.8434e-09, 1.8301e-14, 7.0192e-13,\n",
       "            8.4333e-15, 5.4344e-10, 6.2086e-09, 1.8068e-11, 1.5445e-08, 7.0103e-10,\n",
       "            3.3552e-12, 1.3892e-11, 3.5212e-10, 4.6177e-11, 1.5777e-09, 4.6305e-09,\n",
       "            2.5063e-11, 3.2752e-11, 7.4214e-15, 4.9328e-09, 5.0922e-13, 5.1336e-15,\n",
       "            2.0344e-12, 3.2317e-09, 9.6475e-10, 1.9461e-09, 2.3556e-10, 1.8321e-10,\n",
       "            1.1914e-10, 9.2158e-12, 2.6934e-11, 5.7064e-12, 2.0159e-13, 5.5120e-11,\n",
       "            1.2516e-09, 6.3824e-09, 5.3731e-13, 5.0793e-13, 3.7988e-09, 1.2683e-14,\n",
       "            7.1529e-11, 3.2351e-12, 4.8613e-13, 7.7142e-10, 2.6375e-12, 1.4592e-09,\n",
       "            6.3960e-16, 5.3777e-09, 1.7756e-10, 1.5938e-11, 1.1433e-11, 2.3258e-09,\n",
       "            3.4295e-11, 2.2507e-09, 1.5886e-12, 1.5013e-08, 1.1169e-14, 2.6412e-11,\n",
       "            4.1261e-09, 0.0000e+00, 3.9844e-11, 1.6175e-13, 7.4521e-11, 3.3225e-09,\n",
       "            1.1720e-11, 2.2218e-11, 8.3770e-13, 1.2377e-13, 7.0902e-09, 3.1103e-11,\n",
       "            1.9751e-13, 7.4006e-16, 1.6869e-13, 1.0666e-12, 2.2345e-11, 1.1500e-10,\n",
       "            1.3019e-09, 5.2565e-11, 2.0729e-09, 4.3675e-12, 3.6592e-09, 4.9870e-13,\n",
       "            3.8666e-13, 3.5455e-11, 1.8915e-14, 1.2144e-11, 3.3216e-12, 3.2782e-11,\n",
       "            5.0112e-13, 3.4094e-11, 2.1761e-09, 1.1459e-12, 8.8494e-12, 3.4430e-11,\n",
       "            8.4919e-09, 1.9525e-10, 3.3271e-09, 1.2110e-14, 4.9849e-10, 6.4183e-14,\n",
       "            7.9696e-18, 8.7043e-09, 1.7650e-11, 1.7710e-09, 4.7002e-11, 1.9163e-08,\n",
       "            2.4980e-10, 7.1202e-10, 8.7811e-15, 1.5632e-09, 4.9588e-12, 3.0877e-10,\n",
       "            2.0794e-09, 7.1891e-13, 3.0171e-10, 1.2744e-08, 1.5328e-11, 8.4364e-15,\n",
       "            7.7571e-17, 7.2595e-13, 6.1840e-11, 1.0689e-10, 8.2654e-13, 1.7080e-09,\n",
       "            3.5817e-11, 3.4687e-10], device='cuda:0')},\n",
       "   8: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-5.7470e-06, -1.4826e-05, -8.1275e-43,  ..., -2.2755e-05,\n",
       "              4.7719e-12, -5.0242e-06],\n",
       "            [ 5.6052e-45, -5.6052e-45,  0.0000e+00,  ...,  5.6052e-45,\n",
       "              0.0000e+00,  5.6052e-45],\n",
       "            [-8.9779e-21, -9.0381e-21,  0.0000e+00,  ...,  1.7583e-21,\n",
       "             -5.6052e-45, -2.8152e-21],\n",
       "            ...,\n",
       "            [ 2.0012e-05, -9.0081e-06, -2.2281e-42,  ...,  4.1136e-06,\n",
       "              5.1371e-12,  2.1741e-06],\n",
       "            [-5.9435e-06, -7.7584e-06, -7.7772e-43,  ...,  2.4310e-07,\n",
       "             -4.9826e-12, -2.0825e-06],\n",
       "            [ 2.1091e-05,  1.7751e-05,  7.0345e-43,  ...,  6.8241e-05,\n",
       "             -7.6425e-13,  1.4287e-06]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[1.4783e-09, 8.2182e-10, 1.3275e-19,  ..., 7.4860e-10, 1.9717e-13,\n",
       "             4.3848e-10],\n",
       "            [3.5786e-16, 9.6625e-18, 0.0000e+00,  ..., 4.5422e-16, 0.0000e+00,\n",
       "             1.1674e-16],\n",
       "            [6.7784e-13, 1.1574e-11, 0.0000e+00,  ..., 5.1650e-13, 5.1799e-18,\n",
       "             3.8571e-13],\n",
       "            ...,\n",
       "            [1.6525e-09, 1.4924e-09, 1.4682e-18,  ..., 1.5217e-09, 2.3891e-13,\n",
       "             4.1372e-10],\n",
       "            [6.1686e-10, 3.4694e-10, 1.6886e-19,  ..., 1.0457e-09, 7.2763e-14,\n",
       "             7.3603e-11],\n",
       "            [1.1087e-09, 2.5124e-08, 1.1169e-19,  ..., 1.6015e-08, 8.1770e-14,\n",
       "             1.4470e-10]], device='cuda:0')},\n",
       "   9: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([-1.6034e-06,  5.6052e-45, -3.2007e-21, -3.5926e-06,  6.7139e-06,\n",
       "             1.9059e-07, -2.8490e-06,  2.9442e-07,  8.4899e-06, -1.1081e-07,\n",
       "             1.1518e-05,  5.1737e-07,  2.9483e-06,  7.8479e-11, -2.9157e-06,\n",
       "             1.6640e-19, -3.1374e-06,  1.4223e-06, -3.1673e-06, -6.4852e-07,\n",
       "             8.8456e-07,  3.1831e-06,  6.9649e-06, -1.0133e-07, -1.7773e-07,\n",
       "            -4.2626e-07,  1.3923e-06,  1.2081e-12,  4.7059e-07,  1.0218e-05,\n",
       "             4.0197e-07,  2.9000e-05,  1.9108e-23,  4.1582e-07,  4.5106e-07,\n",
       "             5.6052e-45,  6.5553e-08,  6.3866e-06,  1.7714e-09,  4.8671e-08,\n",
       "            -1.0239e-06, -1.4334e-06,  3.7746e-06, -5.2240e-07,  4.3528e-09,\n",
       "             1.4762e-07, -8.8816e-08,  5.8389e-07, -3.6522e-07, -3.4834e-07,\n",
       "             2.4948e-06,  7.8665e-07, -1.9550e-06,  1.2446e-06,  1.2099e-06,\n",
       "            -6.8695e-06,  3.6322e-15, -1.0563e-06, -1.7698e-07,  1.6326e-06,\n",
       "             9.4865e-06,  3.7262e-06,  5.8704e-07, -3.2563e-07,  1.8755e-06,\n",
       "             1.7645e-07,  1.8743e-06,  1.1828e-13,  2.7671e-27, -4.4037e-06,\n",
       "             1.2617e-06,  5.9899e-37,  3.8970e-42, -3.6140e-11,  2.6744e-06,\n",
       "             3.3333e-06,  7.7017e-07, -1.0761e-06,  2.8048e-06,  2.9784e-06,\n",
       "             1.9771e-07,  9.1237e-08, -1.6203e-06,  1.0504e-08,  2.7983e-07,\n",
       "            -3.4802e-07, -2.1779e-06,  2.9077e-06,  8.2912e-08, -1.7136e-06,\n",
       "            -9.4072e-11,  0.0000e+00, -1.0694e-06, -4.7299e-06, -2.9630e-07,\n",
       "            -9.1735e-07, -1.0133e-07, -9.3966e-08,  6.4603e-07, -3.3219e-05,\n",
       "             2.3283e-06,  2.1113e-06,  5.6052e-45,  1.2383e-06, -3.9332e-06,\n",
       "             0.0000e+00,  3.0581e-05,  1.8582e-06,  4.4267e-06,  8.6191e-08,\n",
       "             1.6791e-05, -2.1753e-06,  7.5018e-06,  6.1874e-07, -4.8343e-06,\n",
       "            -1.0987e-07,  1.6424e-06, -2.3197e-06,  1.2554e-05,  3.0880e-07,\n",
       "            -3.6005e-07,  2.0418e-06,  1.0129e-05, -6.9168e-07, -1.1383e-06,\n",
       "             3.4366e-06, -2.3599e-06,  1.0973e-05], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([2.3506e-10, 7.1959e-16, 2.7614e-12, 2.1572e-10, 1.1364e-09, 8.6734e-11,\n",
       "            3.6484e-10, 3.3925e-11, 4.0163e-10, 2.6763e-10, 1.0934e-09, 1.4764e-11,\n",
       "            4.0968e-11, 3.1573e-13, 2.1121e-10, 7.6153e-12, 1.1148e-10, 5.9071e-10,\n",
       "            6.0199e-10, 1.5320e-10, 3.4825e-09, 1.5741e-10, 1.4129e-09, 3.4613e-11,\n",
       "            5.2367e-12, 1.8613e-10, 5.1186e-11, 2.0069e-11, 1.2649e-10, 1.7344e-09,\n",
       "            8.0152e-13, 3.0139e-08, 7.7346e-13, 1.8593e-10, 1.1265e-11, 5.6251e-11,\n",
       "            1.6166e-13, 5.0833e-10, 1.3044e-12, 5.7367e-15, 6.3117e-11, 5.9652e-11,\n",
       "            1.0617e-10, 1.7519e-10, 2.8741e-12, 7.0262e-11, 4.2595e-11, 2.5429e-10,\n",
       "            9.7824e-12, 5.4016e-11, 2.3683e-10, 3.1879e-10, 2.9136e-10, 8.2497e-11,\n",
       "            1.7103e-10, 1.4559e-10, 9.0306e-12, 1.5123e-10, 1.5997e-11, 5.4651e-11,\n",
       "            7.2197e-11, 1.8873e-10, 5.4800e-10, 2.9895e-10, 4.1857e-10, 2.5735e-10,\n",
       "            1.6310e-10, 3.8469e-12, 2.6107e-12, 3.2579e-10, 8.7283e-11, 2.1345e-11,\n",
       "            1.2843e-14, 1.4447e-12, 4.9825e-10, 1.4883e-11, 1.3293e-10, 1.7189e-10,\n",
       "            1.3052e-10, 8.8746e-10, 2.9573e-11, 1.9040e-11, 3.3124e-10, 5.1981e-13,\n",
       "            2.9448e-10, 2.0337e-11, 5.5452e-11, 4.4795e-10, 3.0752e-12, 6.6498e-11,\n",
       "            8.1085e-12, 0.0000e+00, 1.4512e-10, 9.4727e-11, 1.3615e-10, 3.8668e-11,\n",
       "            1.8424e-10, 8.5768e-11, 1.3919e-11, 6.9465e-10, 7.5718e-11, 2.3064e-11,\n",
       "            1.9869e-14, 4.0803e-11, 6.8440e-10, 0.0000e+00, 2.5914e-07, 5.5989e-11,\n",
       "            4.0997e-10, 1.6920e-11, 1.7034e-09, 1.0507e-10, 4.8088e-09, 5.7455e-12,\n",
       "            4.2083e-10, 1.2671e-11, 5.2870e-10, 7.6949e-11, 1.8963e-09, 1.6381e-10,\n",
       "            1.7740e-11, 6.4808e-12, 7.1356e-10, 2.9922e-11, 1.8788e-11, 2.9645e-10,\n",
       "            1.4807e-10, 6.9374e-10], device='cuda:0')},\n",
       "   10: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-7.1203e-07,  1.0067e-07, -2.1951e-07,  ...,  8.7245e-08,\n",
       "             -9.0804e-07,  6.6579e-07],\n",
       "            [-6.8401e-07,  4.4488e-08, -2.7317e-08,  ..., -8.5950e-08,\n",
       "             -1.4309e-06,  8.3746e-07],\n",
       "            [-1.1777e-06,  1.9639e-07,  5.5269e-08,  ..., -1.3734e-07,\n",
       "             -2.8500e-06,  1.5795e-06],\n",
       "            ...,\n",
       "            [-9.4704e-07,  2.0429e-06,  1.7008e-06,  ..., -6.8174e-07,\n",
       "             -3.6458e-06, -1.7883e-07],\n",
       "            [-1.2621e-06,  1.1046e-06,  8.4484e-07,  ...,  1.0878e-07,\n",
       "             -1.8930e-06,  2.8682e-07],\n",
       "            [ 1.1955e-06,  1.2978e-06,  1.5752e-06,  ..., -1.1426e-06,\n",
       "             -1.8765e-06, -1.4998e-06]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[2.6876e-11, 4.0031e-11, 2.7445e-11,  ..., 6.8093e-11, 1.9644e-10,\n",
       "             2.1658e-11],\n",
       "            [1.7817e-11, 3.3696e-11, 2.3385e-11,  ..., 5.5544e-11, 1.6414e-10,\n",
       "             2.0323e-11],\n",
       "            [4.5298e-11, 7.9434e-11, 5.9974e-11,  ..., 1.0522e-10, 3.4823e-10,\n",
       "             3.7121e-11],\n",
       "            ...,\n",
       "            [3.0153e-11, 4.5764e-11, 3.1096e-11,  ..., 9.8972e-11, 1.9002e-10,\n",
       "             4.0683e-11],\n",
       "            [3.2839e-11, 6.2716e-11, 4.4724e-11,  ..., 8.4601e-11, 2.8058e-10,\n",
       "             2.5636e-11],\n",
       "            [2.3154e-11, 3.6844e-11, 2.8822e-11,  ..., 5.0858e-11, 1.6385e-10,\n",
       "             1.4930e-11]], device='cuda:0')},\n",
       "   11: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 2.8895e-07,  2.9667e-07, -1.2091e-07,  ..., -1.0643e-07,\n",
       "             -5.6391e-08, -3.3043e-07],\n",
       "            [-2.0818e-07, -1.6721e-07,  7.2130e-08,  ...,  1.0466e-07,\n",
       "              1.5936e-07,  1.6528e-07],\n",
       "            [-2.3436e-08, -8.6012e-08,  1.0954e-07,  ...,  1.8681e-08,\n",
       "              7.8989e-08,  1.6704e-08],\n",
       "            ...,\n",
       "            [-1.3320e-07,  1.1698e-08, -1.8638e-08,  ...,  2.7077e-08,\n",
       "             -1.7986e-08,  5.0296e-08],\n",
       "            [-7.8430e-08,  7.6098e-09, -1.7281e-08,  ...,  2.6596e-08,\n",
       "              3.0014e-08,  1.6860e-08],\n",
       "            [ 2.0726e-07, -3.4922e-08,  5.1511e-08,  ..., -3.6988e-08,\n",
       "              2.6094e-08, -8.7504e-08]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[7.8394e-13, 3.2453e-13, 1.9326e-13,  ..., 5.0810e-13, 5.6222e-12,\n",
       "             3.2128e-12],\n",
       "            [2.4155e-13, 1.2313e-13, 6.1821e-14,  ..., 1.2689e-13, 1.9064e-12,\n",
       "             1.1916e-12],\n",
       "            [9.7236e-14, 3.2266e-14, 2.3528e-14,  ..., 6.6708e-14, 5.1028e-13,\n",
       "             2.6947e-13],\n",
       "            ...,\n",
       "            [4.6353e-14, 1.7215e-15, 2.2655e-15,  ..., 2.3895e-14, 6.3748e-15,\n",
       "             4.6246e-15],\n",
       "            [2.4828e-13, 1.9341e-14, 2.6707e-14,  ..., 2.7387e-13, 6.6318e-14,\n",
       "             4.5829e-14],\n",
       "            [8.8188e-14, 6.1035e-15, 1.0221e-14,  ..., 6.0294e-14, 2.2511e-14,\n",
       "             1.3602e-14]], device='cuda:0')},\n",
       "   12: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-5.3540e-08, -4.0534e-08, -4.1879e-08,  ...,  9.7230e-08,\n",
       "             -5.0316e-08, -7.4064e-08],\n",
       "            [ 5.7011e-08, -9.6502e-08, -1.0137e-07,  ...,  9.7526e-08,\n",
       "              2.6724e-07,  5.7406e-08],\n",
       "            [ 1.1563e-07,  4.0824e-08,  4.2130e-08,  ..., -8.5726e-08,\n",
       "              1.6150e-07,  9.9704e-08],\n",
       "            ...,\n",
       "            [-1.3188e-09, -1.6976e-09, -1.8147e-09,  ...,  3.5999e-08,\n",
       "              6.9851e-08, -1.4824e-08],\n",
       "            [-9.0979e-08,  5.3093e-08,  5.5606e-08,  ..., -4.3452e-08,\n",
       "             -2.1742e-07,  5.5895e-08],\n",
       "            [-1.7419e-08,  1.1756e-08,  1.2407e-08,  ..., -1.1965e-07,\n",
       "             -2.2962e-07,  1.0418e-07]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[8.5160e-14, 1.7537e-13, 1.9199e-13,  ..., 4.4183e-13, 1.2659e-12,\n",
       "             2.6820e-14],\n",
       "            [5.0699e-14, 3.4266e-14, 3.6697e-14,  ..., 5.6151e-14, 2.3500e-13,\n",
       "             1.8679e-14],\n",
       "            [1.0062e-13, 3.1521e-13, 3.4586e-13,  ..., 8.9615e-13, 2.1492e-12,\n",
       "             3.1256e-14],\n",
       "            ...,\n",
       "            [2.3487e-14, 9.1297e-15, 9.4749e-15,  ..., 2.7163e-14, 5.1073e-14,\n",
       "             3.2874e-15],\n",
       "            [1.3713e-13, 8.3484e-14, 8.8813e-14,  ..., 2.0519e-13, 5.2882e-13,\n",
       "             2.6668e-14],\n",
       "            [4.8900e-14, 3.2642e-14, 3.5020e-14,  ..., 6.5749e-14, 2.2048e-13,\n",
       "             1.4240e-14]], device='cuda:0')},\n",
       "   13: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-1.6672e-06, -4.0223e-07,  3.6761e-07,  ...,  1.8302e-07,\n",
       "             -9.5496e-07,  1.1240e-06],\n",
       "            [-6.4109e-07, -6.0057e-08, -6.6596e-09,  ...,  1.2742e-07,\n",
       "             -3.8349e-07,  1.7269e-08],\n",
       "            [-2.1393e-06, -4.4098e-07,  8.1065e-07,  ...,  1.3416e-06,\n",
       "             -5.8598e-07,  2.1128e-06],\n",
       "            ...,\n",
       "            [ 7.0953e-07, -1.3583e-07,  3.2325e-07,  ..., -1.5016e-07,\n",
       "              8.1854e-07,  4.6505e-07],\n",
       "            [ 8.1797e-07,  3.5491e-07, -6.8135e-07,  ..., -6.1264e-07,\n",
       "              1.2414e-06, -5.0333e-07],\n",
       "            [-5.4995e-07,  3.6039e-07, -1.5136e-06,  ..., -1.3900e-06,\n",
       "             -9.6147e-07, -1.2610e-06]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[4.3734e-11, 1.1348e-11, 1.3826e-11,  ..., 1.0730e-11, 2.8231e-11,\n",
       "             1.2729e-11],\n",
       "            [2.4176e-11, 1.1485e-11, 1.3122e-11,  ..., 1.1844e-11, 1.5535e-11,\n",
       "             8.4544e-12],\n",
       "            [4.0381e-11, 3.3244e-11, 1.7797e-11,  ..., 3.4469e-11, 2.4337e-11,\n",
       "             3.3718e-11],\n",
       "            ...,\n",
       "            [3.4269e-11, 2.9410e-11, 1.9998e-11,  ..., 3.7836e-11, 2.3120e-11,\n",
       "             2.8188e-11],\n",
       "            [2.4101e-11, 1.7326e-11, 1.5342e-11,  ..., 1.9037e-11, 1.7256e-11,\n",
       "             1.7930e-11],\n",
       "            [5.5282e-11, 1.6040e-11, 2.3443e-11,  ..., 2.1021e-11, 4.1183e-11,\n",
       "             2.4822e-11]], device='cuda:0')},\n",
       "   14: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([ 1.4768e-06,  6.6807e-07,  2.5088e-06,  1.4276e-06, -1.1526e-06,\n",
       "             2.7001e-06, -2.7080e-06, -8.1179e-07, -2.2188e-06,  3.4304e-07,\n",
       "             3.3561e-06,  7.8240e-08, -2.7833e-06, -2.7689e-06,  3.4255e-07,\n",
       "            -4.4192e-06,  3.1491e-06, -3.3548e-07, -2.9283e-06, -1.1891e-06,\n",
       "            -4.2542e-06, -4.4518e-07,  9.5927e-07, -4.0836e-06,  3.2527e-06,\n",
       "            -7.0972e-07,  1.8070e-06,  2.3264e-06, -2.9490e-06,  2.7325e-07,\n",
       "            -1.0783e-06,  1.9031e-06, -9.4045e-07, -1.1613e-06,  9.3732e-07,\n",
       "            -2.1714e-06,  3.0698e-07,  9.1362e-07,  3.7975e-07,  3.5599e-06,\n",
       "             1.0427e-06, -5.9143e-06, -2.0658e-07,  3.4609e-06, -1.5375e-06,\n",
       "            -5.6072e-07, -2.9618e-06,  2.9776e-06,  2.7588e-06, -1.7760e-06,\n",
       "            -2.3765e-06, -4.2102e-06,  6.4277e-07,  3.1275e-06, -1.3711e-06,\n",
       "             3.0470e-07,  2.2889e-06, -2.1259e-06,  4.0973e-06,  1.8063e-06,\n",
       "            -3.2543e-06,  3.8913e-07, -3.6680e-07,  1.2761e-06,  2.2830e-06,\n",
       "            -1.6823e-06,  2.1298e-06, -3.8672e-06, -3.1403e-06, -2.6975e-07,\n",
       "             1.3706e-06,  3.3746e-06, -3.4189e-06, -1.7096e-06,  2.4065e-06,\n",
       "             1.6477e-06, -7.8316e-07,  1.0266e-06, -1.2270e-06, -4.7187e-07,\n",
       "            -2.8726e-06,  6.0512e-07, -2.6122e-06,  1.0192e-06, -2.7672e-06,\n",
       "            -2.0494e-06,  2.5558e-06,  2.9901e-06,  1.5388e-06, -2.2776e-06,\n",
       "             4.4719e-06,  2.3085e-06, -1.3196e-06,  1.9123e-06, -1.0418e-06,\n",
       "             1.3062e-06,  1.8298e-06, -8.2592e-08, -4.4135e-07, -2.5900e-06,\n",
       "             1.5285e-06, -3.0658e-07,  2.9706e-07,  1.3808e-06,  1.0167e-06,\n",
       "             1.2707e-06, -1.0416e-06, -6.9646e-07, -3.5773e-06, -9.4499e-09,\n",
       "             2.0119e-06,  2.8782e-06, -1.6703e-07, -4.2823e-07,  1.3405e-06,\n",
       "            -8.3225e-07,  2.9022e-08, -1.3099e-06, -3.0983e-06, -3.7981e-06,\n",
       "            -6.3495e-07, -3.9335e-06,  2.4205e-06,  1.3016e-06, -2.7552e-06,\n",
       "            -3.9453e-07, -2.5142e-06, -9.9352e-07], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([1.1944e-10, 8.7399e-11, 1.8510e-10, 1.1653e-10, 1.1505e-10, 8.3136e-11,\n",
       "            1.7414e-10, 1.9855e-10, 1.5986e-10, 1.1769e-10, 1.2718e-10, 8.1041e-11,\n",
       "            7.2903e-11, 1.0914e-10, 1.8771e-10, 1.3258e-10, 1.6566e-10, 1.1918e-10,\n",
       "            1.2728e-10, 1.0303e-10, 1.9407e-10, 1.9397e-10, 1.3228e-10, 1.7988e-10,\n",
       "            1.4697e-10, 1.2489e-10, 1.1710e-10, 2.4325e-10, 1.1480e-10, 1.4440e-10,\n",
       "            1.4679e-10, 2.4361e-10, 1.1201e-10, 1.2807e-10, 1.9879e-10, 1.5208e-10,\n",
       "            1.2661e-10, 1.9653e-10, 1.2732e-10, 9.8093e-11, 1.7374e-10, 1.9659e-10,\n",
       "            2.3591e-10, 2.0749e-10, 1.1748e-10, 1.2542e-10, 8.4485e-11, 1.1601e-10,\n",
       "            1.5310e-10, 1.0259e-10, 1.1910e-10, 1.0954e-10, 2.2393e-10, 3.7386e-10,\n",
       "            1.4030e-10, 1.3557e-10, 9.7813e-11, 1.5327e-10, 1.2581e-10, 1.2090e-10,\n",
       "            1.5935e-10, 1.9648e-10, 2.5526e-10, 1.8674e-10, 2.1979e-10, 1.9858e-10,\n",
       "            9.9566e-11, 1.7442e-10, 1.6658e-10, 1.5738e-10, 7.7105e-11, 1.8210e-10,\n",
       "            1.7202e-10, 1.6499e-10, 1.5814e-10, 1.1116e-10, 1.5157e-10, 7.0744e-11,\n",
       "            1.4328e-10, 1.7709e-10, 1.3451e-10, 1.3391e-10, 7.0569e-11, 1.4464e-10,\n",
       "            1.9403e-10, 8.5741e-11, 1.5447e-10, 1.8225e-10, 1.5820e-10, 1.1429e-10,\n",
       "            2.8574e-10, 1.4098e-10, 1.0647e-10, 1.5397e-10, 1.5341e-10, 1.3273e-10,\n",
       "            8.7937e-11, 1.0776e-10, 1.3722e-10, 1.4931e-10, 1.9796e-10, 8.1852e-11,\n",
       "            2.7200e-10, 2.4788e-10, 1.6340e-10, 1.8747e-10, 1.2095e-10, 1.7310e-10,\n",
       "            7.7456e-11, 1.2476e-10, 2.0306e-10, 3.4183e-10, 7.4396e-11, 1.1167e-10,\n",
       "            1.1231e-10, 1.1231e-10, 1.2876e-10, 9.0373e-11, 1.1189e-10, 9.6503e-11,\n",
       "            7.3216e-11, 1.2326e-10, 8.3863e-11, 1.1148e-10, 2.6092e-10, 1.7727e-10,\n",
       "            1.1923e-10, 2.0069e-10], device='cuda:0')},\n",
       "   15: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([-5.9834e-07, -2.6522e-07, -3.1142e-07, -8.3996e-07,  1.2753e-06,\n",
       "             8.0946e-07, -3.6901e-07, -9.4679e-08,  6.2333e-06, -1.1957e-07,\n",
       "             5.0151e-07, -1.6187e-07, -1.9869e-07, -9.6320e-07, -5.6012e-08,\n",
       "             1.6293e-07, -1.5330e-06,  1.2892e-06, -5.1099e-08, -1.3639e-08,\n",
       "            -7.6259e-09, -2.1366e-06, -3.7625e-07, -1.1209e-07, -6.2687e-07,\n",
       "            -4.7004e-07,  1.3630e-06,  1.0546e-07,  3.8802e-07,  1.9674e-06,\n",
       "             1.1127e-07, -9.2907e-07, -3.5760e-08,  9.0386e-08,  3.3416e-07,\n",
       "             6.3866e-08,  8.3893e-07,  8.8717e-07, -4.4549e-07,  6.0282e-08,\n",
       "             2.9778e-07, -4.6047e-08, -1.1882e-06,  1.0521e-06,  4.4503e-08,\n",
       "             7.7951e-08,  6.4822e-07, -3.2097e-07, -2.2640e-07,  1.4341e-06,\n",
       "             7.1070e-08,  1.1653e-07,  1.1026e-06,  2.7846e-07,  1.3112e-06,\n",
       "            -5.8526e-07,  6.0954e-08, -2.0445e-07,  4.0913e-07,  2.5146e-06,\n",
       "             1.5830e-07,  1.5233e-06, -2.6698e-07,  2.1989e-07,  3.3367e-06,\n",
       "             3.0000e-06,  7.3315e-07, -4.7390e-07,  3.3119e-08,  1.2246e-06,\n",
       "             6.2784e-09,  4.9808e-07,  2.0483e-09, -6.0554e-07,  1.9956e-08,\n",
       "             1.8346e-06,  2.8007e-06,  1.2330e-06,  3.6430e-07, -1.2423e-06,\n",
       "            -1.7468e-08,  2.4253e-07, -1.6413e-06,  3.0875e-07, -9.4758e-07,\n",
       "            -4.8865e-07, -2.1887e-07,  1.9861e-06, -4.0994e-09, -1.4392e-07,\n",
       "            -2.8164e-07,  1.5621e-06, -5.9237e-07, -6.8638e-07,  7.0559e-07,\n",
       "            -5.5411e-07,  1.3116e-06,  2.7926e-07,  4.0321e-07,  9.5617e-08,\n",
       "             7.6224e-07, -6.9858e-07, -8.6693e-08,  4.4027e-07, -8.7799e-07,\n",
       "            -9.7546e-07, -2.2479e-07,  1.4745e-07, -4.2414e-07,  2.0462e-07,\n",
       "             6.7809e-07,  4.9843e-07, -3.2850e-08, -1.3634e-06,  3.0317e-07,\n",
       "             2.8256e-08, -2.9382e-06,  3.5428e-08,  9.2162e-06,  5.0670e-07,\n",
       "             2.4970e-07, -1.7543e-07,  2.3278e-08, -3.3578e-08,  1.5376e-07,\n",
       "             2.5014e-06, -4.1692e-07, -6.3794e-07], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([2.0087e-11, 2.0394e-12, 7.4968e-12, 1.5398e-11, 4.3546e-10, 1.2859e-11,\n",
       "            9.4861e-12, 3.1078e-12, 1.4466e-10, 4.0300e-11, 1.6684e-11, 4.6095e-12,\n",
       "            7.8719e-12, 7.2058e-12, 1.1888e-12, 1.6452e-12, 1.1338e-11, 3.0391e-10,\n",
       "            1.2771e-11, 4.2932e-11, 2.3132e-11, 4.1427e-11, 1.7517e-11, 5.4736e-12,\n",
       "            7.2753e-12, 2.7949e-11, 2.6544e-12, 7.9377e-12, 7.5366e-12, 3.9486e-11,\n",
       "            6.7174e-12, 5.3548e-10, 1.0651e-11, 2.1491e-10, 5.7440e-12, 5.0289e-12,\n",
       "            6.4370e-12, 2.9397e-11, 6.0084e-12, 1.3100e-12, 2.8576e-12, 1.1700e-12,\n",
       "            1.0895e-10, 3.3142e-11, 1.5778e-12, 4.0058e-12, 5.5378e-12, 3.5854e-11,\n",
       "            1.9042e-12, 6.8958e-12, 5.5873e-12, 4.4501e-11, 1.4949e-10, 5.2398e-12,\n",
       "            4.1429e-12, 1.0302e-11, 2.3736e-12, 8.5600e-13, 1.0869e-11, 4.3942e-11,\n",
       "            1.5668e-12, 2.9727e-11, 1.9603e-11, 2.1965e-12, 1.2227e-09, 3.4304e-11,\n",
       "            1.3524e-11, 1.0184e-12, 1.3580e-12, 1.8212e-11, 1.2473e-12, 1.6775e-12,\n",
       "            1.4429e-11, 2.5479e-12, 2.0184e-11, 7.3023e-12, 5.5994e-11, 1.5763e-11,\n",
       "            6.6365e-12, 6.2175e-11, 6.2076e-12, 2.9765e-12, 1.8194e-10, 2.4455e-12,\n",
       "            1.0348e-11, 4.5767e-12, 3.2326e-12, 6.6396e-10, 1.3546e-13, 3.6739e-12,\n",
       "            3.9220e-12, 5.3925e-12, 8.1746e-11, 9.0684e-12, 2.9934e-11, 3.5726e-12,\n",
       "            6.1568e-12, 1.0779e-11, 5.2630e-12, 7.9012e-12, 7.6133e-12, 1.1054e-11,\n",
       "            1.0064e-12, 4.7635e-12, 1.5395e-10, 8.9912e-12, 7.7381e-11, 9.9853e-13,\n",
       "            6.6917e-11, 2.6103e-12, 7.5435e-11, 4.5583e-12, 4.9098e-12, 3.4356e-12,\n",
       "            1.2198e-11, 1.5696e-12, 2.3302e-10, 4.7527e-13, 1.3889e-09, 1.8114e-10,\n",
       "            1.7636e-12, 1.8104e-12, 1.0530e-12, 9.9860e-13, 6.5497e-12, 8.1655e-11,\n",
       "            1.4482e-11, 4.3263e-11], device='cuda:0')},\n",
       "   16: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 6.3514e-09,  3.8127e-09,  2.1205e-08,  ...,  1.6424e-08,\n",
       "              5.2864e-08, -5.4501e-08],\n",
       "            [ 5.6052e-45,  5.6052e-45, -5.6052e-45,  ..., -5.6052e-45,\n",
       "             -5.6052e-45,  5.6052e-45],\n",
       "            [ 2.7662e-23, -1.7987e-23, -7.5672e-24,  ..., -1.5707e-23,\n",
       "             -4.8099e-23,  3.0122e-23],\n",
       "            ...,\n",
       "            [-1.1233e-07,  2.0834e-09, -2.5163e-07,  ..., -1.8148e-07,\n",
       "             -6.8778e-07,  7.4878e-07],\n",
       "            [ 3.3334e-07, -1.9663e-07,  5.3573e-07,  ...,  4.9842e-07,\n",
       "              7.5857e-07, -5.9452e-07],\n",
       "            [-3.5196e-08,  1.2295e-08, -3.9207e-09,  ..., -2.7760e-08,\n",
       "              1.1055e-07, -4.5423e-08]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[4.3303e-13, 7.4370e-14, 1.7737e-13,  ..., 1.2386e-13, 2.3083e-13,\n",
       "             9.2997e-14],\n",
       "            [2.1671e-20, 4.7620e-20, 2.1931e-20,  ..., 3.2632e-20, 4.5612e-20,\n",
       "             1.2547e-20],\n",
       "            [2.9286e-15, 1.4373e-15, 1.5438e-15,  ..., 1.1080e-15, 1.2464e-15,\n",
       "             6.3976e-16],\n",
       "            ...,\n",
       "            [1.0671e-12, 5.1686e-13, 5.4892e-13,  ..., 3.6366e-13, 3.6784e-12,\n",
       "             2.2596e-12],\n",
       "            [4.1879e-12, 8.0599e-13, 2.1216e-12,  ..., 1.1574e-12, 5.6073e-12,\n",
       "             3.9623e-12],\n",
       "            [6.1484e-14, 3.0879e-14, 1.9625e-14,  ..., 1.0758e-14, 1.7421e-13,\n",
       "             8.5903e-14]], device='cuda:0')},\n",
       "   17: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 5.0105e-08, -5.6052e-45, -8.9659e-24,  ...,  1.8744e-07,\n",
       "              3.4920e-07,  6.1025e-08],\n",
       "            [-5.6052e-45, -5.6052e-45,  0.0000e+00,  ..., -5.6052e-45,\n",
       "             -5.6052e-45,  0.0000e+00],\n",
       "            [-1.4211e-22,  0.0000e+00, -1.4844e-24,  ..., -2.3894e-22,\n",
       "             -2.9936e-22, -1.7130e-22],\n",
       "            ...,\n",
       "            [-2.0731e-06,  5.6052e-45,  9.4632e-23,  ..., -1.9433e-06,\n",
       "             -3.5059e-06, -2.3117e-06],\n",
       "            [-8.3264e-07, -5.6052e-45, -2.3805e-22,  ...,  6.4525e-07,\n",
       "              1.5838e-06,  3.1554e-06],\n",
       "            [ 2.3312e-07,  0.0000e+00, -3.2294e-23,  ...,  2.5181e-07,\n",
       "              6.2499e-07,  1.4676e-07]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[7.9903e-12, 1.8459e-19, 1.8798e-14,  ..., 4.0197e-12, 1.7547e-11,\n",
       "             1.4318e-13],\n",
       "            [4.6659e-20, 2.0070e-22, 0.0000e+00,  ..., 1.1877e-20, 1.7295e-20,\n",
       "             0.0000e+00],\n",
       "            [6.0278e-14, 0.0000e+00, 5.2295e-15,  ..., 9.4271e-14, 2.1713e-13,\n",
       "             1.4402e-15],\n",
       "            ...,\n",
       "            [6.3315e-11, 8.7044e-21, 2.6978e-14,  ..., 1.4018e-10, 2.7040e-10,\n",
       "             2.6724e-11],\n",
       "            [1.4439e-10, 7.5302e-20, 3.3179e-14,  ..., 2.1984e-10, 6.5019e-10,\n",
       "             2.2161e-11],\n",
       "            [4.7948e-12, 0.0000e+00, 6.4750e-16,  ..., 9.9183e-13, 3.2327e-12,\n",
       "             7.2255e-13]], device='cuda:0')},\n",
       "   18: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-2.7917e-07,  1.2152e-08,  1.5995e-07,  ...,  1.2915e-07,\n",
       "              6.8455e-07, -5.8632e-08],\n",
       "            [-1.4728e-08, -1.1934e-08,  7.2796e-08,  ...,  1.5641e-08,\n",
       "              2.4965e-07, -1.1660e-07],\n",
       "            [-1.0683e-07,  4.1495e-08,  1.8492e-07,  ...,  1.3323e-07,\n",
       "              4.1788e-07, -7.9379e-08],\n",
       "            ...,\n",
       "            [-5.6563e-07, -4.0725e-07, -9.1440e-07,  ..., -6.5674e-07,\n",
       "             -2.2540e-06,  2.2146e-06],\n",
       "            [-3.4332e-07,  2.2779e-07,  2.3382e-07,  ...,  3.3615e-07,\n",
       "              5.5651e-07, -3.7910e-08],\n",
       "            [ 2.3598e-07, -1.8711e-07,  9.0546e-07,  ...,  1.0073e-06,\n",
       "              8.5978e-07, -9.3015e-07]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[2.9657e-12, 1.7968e-12, 1.6507e-12,  ..., 1.0838e-12, 6.7935e-12,\n",
       "             1.4949e-12],\n",
       "            [4.1881e-13, 6.5070e-14, 8.4602e-14,  ..., 1.2006e-13, 3.6569e-13,\n",
       "             2.4699e-13],\n",
       "            [1.3539e-12, 3.9779e-13, 5.0228e-13,  ..., 3.6544e-13, 2.4535e-12,\n",
       "             1.1377e-12],\n",
       "            ...,\n",
       "            [1.4066e-11, 4.3764e-12, 7.0718e-12,  ..., 4.7246e-12, 2.2890e-11,\n",
       "             1.5474e-11],\n",
       "            [3.8450e-12, 5.2047e-13, 1.4819e-12,  ..., 8.4082e-13, 4.1658e-12,\n",
       "             2.0349e-12],\n",
       "            [2.6682e-12, 1.3075e-12, 1.4540e-12,  ..., 1.2745e-12, 2.0186e-11,\n",
       "             1.4887e-11]], device='cuda:0')},\n",
       "   19: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 3.4728e-06, -5.6052e-45, -1.3035e-24,  ..., -6.6109e-07,\n",
       "              7.7070e-07,  4.2274e-07],\n",
       "            [ 2.4804e-07,  5.6052e-45, -2.4928e-23,  ...,  4.5425e-07,\n",
       "              8.1763e-07,  6.9367e-07],\n",
       "            [ 3.1414e-06, -5.6052e-45, -5.0595e-23,  ..., -4.7863e-07,\n",
       "              5.3133e-07, -6.1743e-08],\n",
       "            ...,\n",
       "            [-1.1706e-05,  5.6052e-45,  2.6599e-22,  ..., -9.3980e-06,\n",
       "             -2.3113e-05, -1.7716e-06],\n",
       "            [ 1.4928e-06,  5.6052e-45,  4.5727e-24,  ...,  4.0466e-07,\n",
       "              1.0489e-06,  1.5537e-07],\n",
       "            [-1.4627e-06, -5.6052e-45,  2.6175e-22,  ..., -4.6199e-07,\n",
       "             -3.2257e-06,  2.1215e-06]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[2.4976e-10, 1.3898e-18, 2.2536e-14,  ..., 1.6081e-11, 1.4804e-10,\n",
       "             1.2173e-11],\n",
       "            [6.7358e-12, 2.6690e-20, 1.3433e-15,  ..., 7.2318e-12, 1.8676e-11,\n",
       "             1.0590e-12],\n",
       "            [5.7949e-11, 3.7702e-20, 7.1676e-15,  ..., 3.8385e-11, 1.0737e-10,\n",
       "             9.1052e-12],\n",
       "            ...,\n",
       "            [3.9117e-10, 1.3392e-18, 3.5668e-13,  ..., 9.7659e-10, 2.1702e-09,\n",
       "             1.1506e-10],\n",
       "            [6.0209e-11, 2.9571e-20, 5.0028e-14,  ..., 4.5360e-11, 1.6375e-10,\n",
       "             1.5502e-11],\n",
       "            [1.4260e-10, 2.2374e-20, 1.3612e-14,  ..., 2.2283e-10, 4.9989e-10,\n",
       "             1.4001e-10]], device='cuda:0')},\n",
       "   20: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 4.2845e-08, -3.5003e-09,  2.9733e-08,  ...,  1.4895e-08,\n",
       "              5.0758e-08, -7.9514e-08],\n",
       "            [ 1.0843e-07, -4.3825e-07,  5.2129e-07,  ...,  6.3298e-07,\n",
       "             -3.0430e-07,  4.8915e-07],\n",
       "            [-1.3464e-07, -4.1384e-07,  2.9764e-07,  ...,  5.2496e-07,\n",
       "             -1.8129e-06,  1.6594e-06],\n",
       "            ...,\n",
       "            [ 1.7146e-07, -1.3734e-08,  4.8124e-07,  ...,  3.7320e-07,\n",
       "              8.2319e-07, -7.4996e-07],\n",
       "            [ 6.7702e-07, -2.8630e-07, -1.0321e-07,  ..., -3.5222e-07,\n",
       "             -7.6067e-07, -1.2600e-07],\n",
       "            [-1.2195e-07,  2.5604e-07, -1.4136e-06,  ..., -1.7482e-06,\n",
       "             -2.4285e-06,  7.4760e-07]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[6.4168e-12, 7.2066e-13, 5.1083e-13,  ..., 2.6282e-12, 1.3919e-12,\n",
       "             1.0655e-12],\n",
       "            [1.6155e-11, 1.3808e-12, 1.3417e-12,  ..., 6.1037e-12, 4.2728e-12,\n",
       "             1.4820e-12],\n",
       "            [9.6558e-12, 4.8855e-12, 4.5338e-12,  ..., 5.3055e-12, 2.8883e-11,\n",
       "             1.9559e-11],\n",
       "            ...,\n",
       "            [9.3005e-11, 1.1934e-11, 2.9534e-11,  ..., 2.9576e-11, 4.8325e-11,\n",
       "             1.8704e-11],\n",
       "            [1.3548e-11, 3.7933e-12, 2.9620e-12,  ..., 6.4362e-12, 2.1866e-11,\n",
       "             1.1028e-11],\n",
       "            [1.5329e-11, 3.8856e-12, 6.5516e-12,  ..., 6.7015e-12, 9.3169e-11,\n",
       "             6.1215e-11]], device='cuda:0')},\n",
       "   21: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 1.5528e-08,  5.6052e-45, -1.6350e-25,  ...,  9.5508e-08,\n",
       "              5.0785e-07, -2.3621e-07],\n",
       "            [ 9.7825e-09,  5.6052e-45, -7.6504e-26,  ...,  2.1803e-08,\n",
       "              6.1475e-07, -5.9153e-07],\n",
       "            [ 2.1620e-08, -5.6052e-45, -7.3954e-24,  ..., -5.7449e-06,\n",
       "             -1.5222e-06, -7.0690e-06],\n",
       "            ...,\n",
       "            [ 6.7558e-08, -5.6052e-45, -6.1743e-23,  ..., -1.8414e-06,\n",
       "              1.5274e-06,  1.3135e-06],\n",
       "            [-2.3659e-08, -5.6052e-45,  1.8337e-23,  ..., -4.5161e-06,\n",
       "              2.6275e-06, -2.9307e-06],\n",
       "            [-1.6323e-09,  5.6052e-45, -3.6608e-23,  ..., -2.4647e-07,\n",
       "              5.7894e-06, -9.1195e-06]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[8.9915e-13, 2.2511e-18, 6.8145e-16,  ..., 3.6232e-11, 6.9919e-11,\n",
       "             6.0485e-12],\n",
       "            [1.7440e-12, 6.5282e-19, 3.1054e-16,  ..., 1.0779e-11, 1.4667e-11,\n",
       "             3.0532e-12],\n",
       "            [2.0336e-12, 2.4250e-20, 2.0477e-15,  ..., 2.0439e-10, 2.7242e-10,\n",
       "             5.2994e-11],\n",
       "            ...,\n",
       "            [3.6685e-11, 1.2331e-17, 1.0396e-13,  ..., 4.3762e-10, 7.3522e-10,\n",
       "             5.7559e-11],\n",
       "            [1.8419e-12, 2.2771e-19, 1.4817e-14,  ..., 6.9094e-10, 1.6060e-10,\n",
       "             9.1275e-11],\n",
       "            [6.5093e-13, 3.3200e-19, 2.0056e-15,  ..., 4.5106e-10, 4.5067e-10,\n",
       "             2.9615e-10]], device='cuda:0')},\n",
       "   22: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([-3.6546e-07,  1.1351e-07, -6.1246e-07, -6.8603e-07,  1.0540e-06,\n",
       "            -7.3674e-07,  3.5114e-07,  2.7201e-07,  8.7339e-07,  1.0948e-07,\n",
       "             2.8740e-07,  1.2111e-07,  1.9804e-07, -2.0582e-07, -4.7094e-07,\n",
       "             8.4193e-07,  8.8798e-07,  3.6715e-07,  1.8687e-06,  1.2875e-06,\n",
       "             6.0848e-07,  1.2010e-07, -7.9938e-07,  3.4401e-07,  2.5048e-07,\n",
       "             4.8526e-07,  6.7170e-08,  4.9188e-07,  7.3670e-09, -8.7317e-07,\n",
       "             1.0049e-07,  5.5732e-07,  6.5971e-07,  7.0174e-08,  2.9957e-07,\n",
       "            -2.1341e-07, -7.8581e-07, -1.5078e-07, -8.3500e-07,  2.2616e-07,\n",
       "            -5.7058e-09, -9.4674e-09,  1.8219e-06,  2.9001e-06,  5.0785e-08,\n",
       "             7.7686e-08, -2.8101e-07, -5.2524e-09,  5.0653e-07, -9.5569e-07,\n",
       "            -6.0888e-07, -3.9883e-06,  4.3185e-07, -5.2506e-07,  4.9201e-07,\n",
       "            -8.9467e-07,  2.4792e-07,  7.4650e-07, -1.9242e-06,  2.7740e-06,\n",
       "            -3.7356e-07,  1.3755e-06, -6.6914e-08, -2.7894e-07,  7.8624e-06,\n",
       "             1.6816e-06,  4.2174e-07, -1.2784e-07, -9.8077e-08, -1.2924e-07,\n",
       "             1.9091e-07, -3.9204e-07,  1.2717e-07,  2.3578e-06,  1.0944e-06,\n",
       "            -1.0919e-07, -2.7923e-06,  9.0290e-07,  2.3202e-06, -3.8304e-07,\n",
       "            -3.6620e-07,  2.3239e-06, -2.8175e-07,  1.6755e-07,  3.7345e-07,\n",
       "             3.9173e-07,  5.0968e-07,  5.9076e-07, -2.4400e-07,  6.1424e-08,\n",
       "             6.0382e-08,  7.7825e-07, -1.0763e-07, -2.8027e-07, -5.2011e-07,\n",
       "            -2.7326e-07,  2.0154e-07,  1.2155e-07, -5.1213e-07,  6.5766e-07,\n",
       "            -1.9865e-07,  2.9880e-07, -3.8373e-08,  3.2357e-08,  1.7085e-06,\n",
       "             8.1125e-07,  1.3837e-06,  3.7255e-07,  8.0929e-07, -1.5756e-07,\n",
       "             2.3215e-07,  1.5816e-07, -7.7865e-06,  1.5226e-07,  1.7034e-06,\n",
       "             3.9498e-07, -2.6821e-06, -9.5493e-07,  2.5818e-06, -7.3365e-07,\n",
       "             3.9025e-07,  5.4286e-07,  2.8936e-07,  3.5374e-07, -1.9187e-07,\n",
       "            -3.7537e-07,  9.1876e-07, -9.1611e-07], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([7.6933e-11, 5.2476e-12, 1.0531e-11, 1.1368e-11, 9.7925e-11, 1.6449e-11,\n",
       "            1.5170e-11, 4.9850e-12, 1.2675e-11, 2.7500e-12, 4.6128e-11, 9.6489e-12,\n",
       "            4.7740e-12, 1.4486e-12, 7.1616e-12, 2.4401e-11, 1.5346e-11, 2.8685e-12,\n",
       "            6.0077e-11, 1.8397e-11, 4.7763e-10, 1.0508e-11, 3.2310e-10, 3.0195e-12,\n",
       "            1.4617e-11, 2.3403e-11, 6.2841e-12, 4.7189e-12, 4.0509e-12, 9.7484e-10,\n",
       "            1.9569e-12, 6.8729e-11, 2.3304e-11, 2.5796e-11, 3.2064e-12, 7.7286e-12,\n",
       "            1.2591e-11, 9.0065e-12, 9.4119e-12, 3.1494e-12, 5.0161e-12, 4.8860e-12,\n",
       "            3.9824e-11, 3.1572e-11, 9.8069e-12, 2.7105e-12, 3.2435e-11, 1.3393e-11,\n",
       "            7.5337e-12, 2.3770e-11, 5.1151e-12, 9.6714e-11, 6.8580e-11, 9.1303e-12,\n",
       "            2.0806e-11, 1.0612e-11, 6.3726e-12, 5.2889e-12, 1.9322e-11, 1.5297e-11,\n",
       "            2.3478e-11, 1.6500e-11, 1.4098e-11, 1.2668e-11, 4.2503e-10, 3.1072e-11,\n",
       "            1.1507e-11, 3.4094e-12, 2.1430e-11, 4.8705e-12, 1.6083e-11, 7.7395e-12,\n",
       "            1.4334e-12, 2.3090e-11, 1.7628e-10, 2.8683e-12, 3.1697e-11, 9.1906e-12,\n",
       "            1.1705e-10, 7.5207e-12, 6.4665e-12, 5.2927e-11, 4.5696e-11, 1.2762e-11,\n",
       "            6.5691e-12, 2.3018e-12, 2.2448e-11, 1.1532e-11, 8.8446e-12, 3.0925e-12,\n",
       "            1.2422e-11, 1.6288e-11, 1.0005e-11, 9.6246e-12, 1.6282e-11, 8.4024e-12,\n",
       "            2.1256e-11, 1.8693e-12, 4.6258e-12, 3.8071e-11, 3.2825e-12, 1.2164e-11,\n",
       "            1.0843e-12, 1.6143e-12, 4.1939e-11, 1.6602e-11, 1.6811e-10, 4.2556e-12,\n",
       "            9.4561e-12, 2.0503e-12, 3.6460e-11, 8.2325e-12, 4.1280e-09, 6.9565e-12,\n",
       "            1.5239e-11, 7.4386e-12, 2.5559e-11, 1.3007e-10, 6.4556e-10, 4.5089e-12,\n",
       "            5.2473e-12, 2.2678e-12, 1.1799e-10, 4.0391e-12, 1.7140e-11, 3.8209e-12,\n",
       "            3.9960e-11, 5.0417e-12], device='cuda:0')},\n",
       "   23: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-2.7422e-08, -2.0940e-08, -1.1865e-07,  ..., -1.6167e-08,\n",
       "             -3.2930e-08, -8.1635e-08],\n",
       "            [-5.3257e-08, -1.9632e-08, -1.8167e-08,  ...,  9.6012e-09,\n",
       "             -8.3756e-09, -2.4085e-08],\n",
       "            [ 3.5311e-11, -1.9856e-08, -6.5375e-08,  ..., -4.4232e-09,\n",
       "              5.5763e-09, -6.1963e-08],\n",
       "            ...,\n",
       "            [ 5.9590e-07,  7.9677e-08,  1.7644e-07,  ...,  1.7702e-07,\n",
       "              3.2099e-07,  3.2766e-08],\n",
       "            [-2.9009e-07, -2.7405e-08, -9.8267e-08,  ..., -4.0928e-08,\n",
       "             -5.2636e-08, -9.9091e-08],\n",
       "            [ 1.9672e-07, -1.3969e-08, -3.4581e-09,  ...,  2.2836e-08,\n",
       "              4.7806e-08,  2.6459e-08]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[8.4176e-13, 2.0029e-13, 1.8162e-12,  ..., 5.7817e-14, 4.0793e-14,\n",
       "             1.2285e-12],\n",
       "            [5.7939e-13, 3.1524e-15, 2.5854e-13,  ..., 1.4480e-14, 1.1887e-15,\n",
       "             9.9832e-14],\n",
       "            [1.8191e-13, 1.3827e-14, 9.7758e-14,  ..., 4.2940e-14, 5.6322e-14,\n",
       "             7.4820e-14],\n",
       "            ...,\n",
       "            [8.6545e-12, 2.3519e-14, 1.9803e-13,  ..., 2.7548e-12, 2.9315e-12,\n",
       "             1.6124e-13],\n",
       "            [1.1565e-12, 6.3437e-14, 9.5097e-13,  ..., 8.7301e-14, 5.6471e-14,\n",
       "             3.8283e-13],\n",
       "            [9.7253e-13, 1.4373e-14, 1.0473e-13,  ..., 7.9470e-13, 1.0036e-14,\n",
       "             8.1187e-14]], device='cuda:0')},\n",
       "   24: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-6.2721e-07,  6.5437e-08, -1.0267e-07,  ...,  1.5473e-07,\n",
       "             -1.7023e-07,  1.0990e-07],\n",
       "            [-5.6840e-08, -8.5821e-09,  1.2536e-08,  ..., -2.8659e-08,\n",
       "              2.2030e-08,  7.5909e-08],\n",
       "            [-3.3677e-07, -3.8596e-08, -6.5142e-08,  ...,  2.2855e-08,\n",
       "             -3.6511e-08, -1.1692e-09],\n",
       "            ...,\n",
       "            [ 6.0552e-07,  1.7142e-07, -1.3629e-07,  ...,  4.5932e-07,\n",
       "              9.5928e-08, -4.6061e-08],\n",
       "            [-5.3607e-07, -1.5186e-07,  1.0979e-07,  ..., -2.9752e-07,\n",
       "             -8.8408e-08, -2.1719e-07],\n",
       "            [ 1.0981e-07, -7.0095e-08,  6.4358e-08,  ...,  2.5896e-07,\n",
       "              3.0070e-07,  1.1029e-06]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[1.0922e-11, 2.7654e-13, 3.0449e-13,  ..., 9.3390e-13, 1.0389e-12,\n",
       "             7.1418e-13],\n",
       "            [9.1487e-13, 2.6694e-13, 3.7738e-14,  ..., 1.8513e-13, 2.2293e-13,\n",
       "             7.0856e-14],\n",
       "            [6.5581e-13, 4.6495e-14, 7.2488e-14,  ..., 1.4752e-13, 1.3603e-13,\n",
       "             9.8647e-14],\n",
       "            ...,\n",
       "            [3.8291e-12, 2.5601e-13, 7.4439e-13,  ..., 6.7745e-12, 4.5721e-12,\n",
       "             4.7620e-13],\n",
       "            [2.6330e-12, 1.6310e-13, 1.5744e-13,  ..., 6.4126e-13, 1.3183e-12,\n",
       "             3.6073e-13],\n",
       "            [1.8640e-12, 9.7601e-13, 1.1245e-12,  ..., 2.6525e-12, 2.9194e-13,\n",
       "             6.2968e-12]], device='cuda:0')},\n",
       "   25: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 7.4089e-07,  1.0363e-08,  3.0529e-07,  ...,  6.4540e-08,\n",
       "              3.2498e-07,  2.6457e-08],\n",
       "            [-1.8599e-07, -2.0485e-08,  4.0827e-08,  ...,  7.7507e-08,\n",
       "             -4.5389e-08, -2.2733e-08],\n",
       "            [ 5.0778e-07,  9.1603e-08,  4.0571e-07,  ...,  2.1252e-07,\n",
       "              3.7893e-07,  2.4127e-07],\n",
       "            ...,\n",
       "            [ 4.8444e-07,  4.5350e-08, -2.6762e-08,  ...,  2.8898e-07,\n",
       "             -1.1999e-07,  4.2183e-08],\n",
       "            [-1.0865e-06,  2.2894e-09, -2.0355e-08,  ..., -6.6331e-07,\n",
       "              2.6900e-08, -1.3302e-08],\n",
       "            [ 1.1301e-06,  3.2311e-07,  3.3403e-07,  ...,  7.5926e-08,\n",
       "             -4.1178e-08,  6.0570e-07]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[6.5471e-11, 6.5649e-12, 5.3412e-11,  ..., 4.7225e-12, 4.2099e-12,\n",
       "             5.9292e-11],\n",
       "            [1.0607e-11, 1.0200e-13, 9.0661e-13,  ..., 3.0555e-12, 4.4059e-12,\n",
       "             8.2966e-13],\n",
       "            [1.9964e-11, 5.2187e-13, 5.1045e-12,  ..., 5.6311e-12, 7.1601e-12,\n",
       "             4.4479e-12],\n",
       "            ...,\n",
       "            [4.9301e-12, 7.9613e-14, 2.0041e-12,  ..., 5.5312e-13, 6.0859e-13,\n",
       "             7.8518e-13],\n",
       "            [8.5953e-11, 1.1918e-12, 1.0852e-11,  ..., 1.2685e-11, 9.6647e-12,\n",
       "             1.7503e-12],\n",
       "            [7.9467e-12, 2.4364e-13, 1.4821e-12,  ..., 1.9400e-12, 7.5897e-13,\n",
       "             1.2073e-12]], device='cuda:0')},\n",
       "   26: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 5.9814e-07,  1.9947e-07, -6.4238e-08,  ..., -1.1051e-07,\n",
       "             -8.8332e-08, -8.3077e-07],\n",
       "            [ 4.6377e-08, -1.0894e-07, -1.6671e-07,  ..., -4.0343e-07,\n",
       "              2.5389e-07,  2.7982e-07],\n",
       "            [ 1.7966e-06, -5.3939e-08,  3.1269e-07,  ...,  2.1607e-07,\n",
       "              4.6360e-07, -4.1746e-07],\n",
       "            ...,\n",
       "            [ 3.4966e-07,  1.4082e-07, -4.3671e-07,  ...,  8.3471e-07,\n",
       "              2.3422e-07,  2.4306e-06],\n",
       "            [-1.0115e-06, -2.4858e-07,  1.4703e-07,  ..., -2.0236e-06,\n",
       "             -6.7444e-07, -4.8871e-06],\n",
       "            [ 1.2152e-06,  7.2075e-07, -8.1543e-07,  ...,  9.5387e-07,\n",
       "             -2.7965e-07,  1.1716e-07]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[3.2173e-10, 1.5315e-11, 1.0186e-11,  ..., 5.0946e-11, 4.1740e-11,\n",
       "             2.1519e-11],\n",
       "            [7.8078e-12, 3.7177e-13, 1.2016e-12,  ..., 5.7802e-12, 5.0534e-12,\n",
       "             5.5164e-13],\n",
       "            [3.3078e-11, 1.1028e-12, 2.3882e-12,  ..., 1.6343e-11, 1.1582e-11,\n",
       "             1.8608e-12],\n",
       "            ...,\n",
       "            [6.9696e-12, 1.0556e-12, 4.3661e-13,  ..., 2.8747e-12, 3.1535e-12,\n",
       "             1.8915e-12],\n",
       "            [2.2617e-11, 1.2775e-12, 3.7482e-12,  ..., 2.6595e-11, 2.5824e-11,\n",
       "             1.4674e-11],\n",
       "            [9.7938e-12, 6.8537e-13, 5.8318e-13,  ..., 3.2238e-12, 4.2430e-12,\n",
       "             8.0998e-12]], device='cuda:0')},\n",
       "   27: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-4.5753e-07,  7.2275e-08,  2.3891e-08,  ..., -2.7300e-07,\n",
       "             -8.6559e-08, -1.2565e-07],\n",
       "            [ 3.6641e-06,  3.5006e-07,  1.1588e-06,  ...,  1.5094e-06,\n",
       "              6.5654e-07,  8.7336e-07],\n",
       "            [ 6.9427e-08,  1.4062e-07,  4.8311e-07,  ..., -6.3655e-08,\n",
       "             -1.2669e-07,  2.9312e-07],\n",
       "            ...,\n",
       "            [-3.2566e-07,  1.5846e-07,  3.9368e-07,  ..., -4.6434e-08,\n",
       "             -1.7661e-07,  1.9759e-07],\n",
       "            [ 9.4685e-08,  7.6239e-08,  4.6975e-08,  ...,  4.6649e-08,\n",
       "              8.1491e-08,  4.5348e-08],\n",
       "            [ 1.3176e-06,  5.6070e-07,  1.1872e-06,  ...,  8.0040e-09,\n",
       "             -1.1681e-08,  1.1368e-06]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[1.4057e-10, 3.3137e-12, 2.1360e-11,  ..., 2.0604e-11, 2.3348e-12,\n",
       "             2.6192e-11],\n",
       "            [8.5070e-11, 2.0120e-12, 2.9415e-11,  ..., 5.6540e-12, 3.3478e-12,\n",
       "             2.0000e-11],\n",
       "            [5.0472e-11, 2.8927e-12, 2.8992e-11,  ..., 4.4048e-12, 1.1324e-12,\n",
       "             1.9334e-11],\n",
       "            ...,\n",
       "            [1.3882e-10, 1.2453e-12, 6.3160e-11,  ..., 1.9399e-12, 3.2629e-12,\n",
       "             2.3335e-11],\n",
       "            [1.1158e-10, 3.1949e-12, 1.9611e-11,  ..., 1.1200e-11, 9.4793e-13,\n",
       "             4.9561e-12],\n",
       "            [7.5389e-11, 1.0573e-11, 6.9215e-11,  ..., 5.5007e-12, 1.0240e-12,\n",
       "             6.0846e-11]], device='cuda:0')},\n",
       "   28: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-3.1301e-07, -1.0574e-07,  2.6602e-08,  ..., -2.5456e-08,\n",
       "             -7.2140e-07,  1.0274e-06],\n",
       "            [ 4.8339e-06,  8.3367e-07, -2.3735e-07,  ...,  2.8768e-06,\n",
       "              2.3555e-07,  2.3006e-06],\n",
       "            [ 1.3920e-06,  2.9060e-08,  1.0171e-07,  ...,  7.1480e-09,\n",
       "             -1.9156e-07, -2.8985e-09],\n",
       "            ...,\n",
       "            [ 2.7449e-07, -9.0804e-08,  6.6156e-08,  ..., -6.9802e-08,\n",
       "              3.7014e-08, -9.5979e-08],\n",
       "            [ 3.2335e-07,  2.4783e-08,  5.2449e-08,  ...,  3.0969e-07,\n",
       "              7.2168e-08, -4.6767e-08],\n",
       "            [ 4.3456e-06,  3.5520e-07, -2.4830e-07,  ...,  1.0901e-06,\n",
       "             -1.7856e-07, -1.1413e-06]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[4.7434e-11, 4.3990e-12, 5.3307e-13,  ..., 2.2854e-11, 7.0280e-12,\n",
       "             5.9821e-12],\n",
       "            [6.4072e-11, 3.9073e-12, 8.9778e-13,  ..., 5.9152e-12, 6.1266e-12,\n",
       "             1.7667e-12],\n",
       "            [6.0580e-11, 4.6966e-12, 7.5583e-13,  ..., 1.3598e-12, 4.4286e-12,\n",
       "             4.0820e-13],\n",
       "            ...,\n",
       "            [6.4149e-11, 5.3797e-12, 6.6885e-13,  ..., 1.0947e-12, 5.4934e-12,\n",
       "             8.5466e-13],\n",
       "            [1.4116e-11, 5.3506e-13, 3.0513e-13,  ..., 6.6705e-13, 2.7073e-12,\n",
       "             2.5330e-13],\n",
       "            [2.0406e-10, 7.0740e-12, 3.1878e-12,  ..., 2.9034e-12, 3.5934e-12,\n",
       "             1.5642e-12]], device='cuda:0')},\n",
       "   29: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([-1.0905e-07, -2.3639e-09,  1.0537e-08, -2.4264e-08, -1.5438e-07,\n",
       "            -1.5087e-08,  3.7578e-08, -2.0570e-08, -5.1295e-08, -2.8366e-08,\n",
       "            -2.2184e-07,  3.3274e-08,  5.9136e-08, -4.3907e-09,  2.1953e-08,\n",
       "             1.1834e-08, -5.6173e-08, -6.1205e-08, -2.7938e-08, -1.5597e-07,\n",
       "            -1.9345e-07, -2.0819e-07, -4.0091e-08,  1.4759e-08, -2.3906e-08,\n",
       "            -2.7278e-08,  2.1594e-08,  3.3668e-09, -1.5492e-08, -2.7657e-07,\n",
       "             1.6119e-08,  2.8043e-07,  6.1456e-08, -8.5495e-08,  6.3599e-08,\n",
       "             2.4836e-09, -2.5336e-08,  1.9328e-08,  1.9020e-09,  2.7256e-08,\n",
       "            -2.7313e-08, -5.9515e-08, -3.0543e-08,  7.9750e-08, -3.4532e-09,\n",
       "            -1.7767e-07,  6.4163e-08, -4.2217e-09, -4.1310e-08,  6.6253e-08,\n",
       "             5.6960e-09,  3.7995e-08, -1.6657e-07, -7.1223e-08,  3.3255e-08,\n",
       "            -3.1977e-08,  5.4240e-08,  6.2268e-09, -1.9995e-08, -1.0491e-07,\n",
       "            -6.4948e-08, -7.5262e-08,  1.4978e-07,  3.9315e-09,  2.7908e-07,\n",
       "             1.7351e-08,  1.3970e-08,  5.9032e-08,  2.5428e-09,  1.0242e-08,\n",
       "            -1.7080e-08,  7.9890e-09, -2.5779e-08,  1.3029e-08,  2.3338e-08,\n",
       "             6.8376e-09,  1.6850e-07,  3.0945e-08, -4.0436e-08, -1.9578e-07,\n",
       "            -8.5132e-09,  5.3279e-08, -1.6182e-07,  3.9423e-08,  8.3283e-08,\n",
       "             7.1230e-09,  1.3963e-08, -1.1087e-07,  3.3097e-08, -1.3456e-07,\n",
       "             5.3509e-08, -2.1268e-08, -2.8530e-07, -2.7083e-08, -4.5494e-08,\n",
       "            -8.6210e-08,  8.0141e-08, -7.1313e-09, -2.0825e-08, -1.0574e-08,\n",
       "             2.2512e-08,  3.1154e-08,  2.5911e-08, -7.2224e-08, -5.5484e-09,\n",
       "             2.7435e-09,  4.6520e-07, -2.4206e-08, -5.5140e-08, -3.6633e-08,\n",
       "            -2.7921e-08,  3.2812e-08,  1.1160e-07, -5.7615e-08,  3.1630e-08,\n",
       "            -2.7829e-08, -1.5230e-07,  1.4237e-07, -3.3097e-08,  8.9684e-08,\n",
       "            -9.5436e-08,  3.1542e-08,  6.4138e-08,  5.4031e-08, -5.3003e-08,\n",
       "             3.3191e-08, -2.6022e-07,  4.8905e-08], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([5.6552e-14, 4.9912e-14, 8.2297e-14, 1.3483e-13, 1.4844e-13, 2.3219e-14,\n",
       "            1.9259e-14, 1.5154e-13, 6.9224e-14, 4.2686e-14, 7.0556e-13, 3.9842e-14,\n",
       "            3.5832e-14, 8.6441e-14, 3.5168e-14, 1.2642e-13, 1.2460e-14, 3.7249e-13,\n",
       "            1.3018e-13, 2.8579e-13, 7.0544e-12, 6.8821e-13, 2.0930e-13, 1.3950e-14,\n",
       "            1.6083e-14, 4.2780e-14, 4.6578e-14, 7.5646e-14, 7.0745e-14, 2.1892e-12,\n",
       "            3.0660e-14, 3.7587e-13, 1.2434e-13, 5.3543e-14, 1.8469e-13, 5.5922e-14,\n",
       "            1.2771e-13, 6.0515e-14, 1.8006e-14, 7.3179e-14, 4.2444e-14, 2.8204e-14,\n",
       "            6.9690e-14, 1.5736e-14, 2.0147e-14, 2.6077e-14, 1.7963e-14, 1.5312e-14,\n",
       "            4.5606e-14, 2.5565e-14, 2.5219e-14, 1.8894e-14, 1.1453e-12, 4.0803e-14,\n",
       "            1.0958e-13, 8.6553e-15, 1.3574e-13, 1.5755e-14, 2.1253e-14, 3.6701e-14,\n",
       "            3.7999e-14, 2.1891e-14, 2.7075e-14, 3.5744e-14, 1.7118e-13, 7.0588e-14,\n",
       "            3.9426e-14, 1.7249e-14, 2.8362e-14, 2.2303e-14, 1.2630e-14, 9.2816e-14,\n",
       "            1.0528e-13, 8.0837e-14, 5.3506e-15, 3.2735e-14, 1.7678e-12, 1.4336e-14,\n",
       "            2.8260e-14, 9.5990e-14, 1.5213e-14, 1.2382e-13, 1.5328e-12, 3.2895e-14,\n",
       "            2.0795e-14, 1.2844e-13, 4.6160e-14, 2.3829e-13, 4.8606e-14, 1.6026e-13,\n",
       "            8.7361e-14, 5.8660e-14, 1.9610e-12, 1.0205e-14, 1.1364e-14, 1.9961e-14,\n",
       "            2.4813e-14, 3.9371e-14, 6.7710e-14, 1.0300e-14, 2.5152e-14, 1.6743e-14,\n",
       "            9.9839e-14, 2.2599e-14, 5.8677e-14, 3.8132e-14, 3.9861e-12, 5.0648e-15,\n",
       "            1.1220e-14, 8.6463e-14, 1.3522e-12, 5.7457e-13, 4.6162e-13, 1.4156e-14,\n",
       "            1.7943e-14, 3.8948e-14, 7.3496e-13, 6.1167e-14, 8.5688e-13, 8.6530e-15,\n",
       "            3.3163e-14, 5.6763e-14, 5.9343e-13, 8.6603e-14, 2.4679e-14, 1.6074e-13,\n",
       "            8.8860e-13, 1.7979e-14], device='cuda:0')},\n",
       "   30: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([-1.5644e-07,  2.5212e-08, -2.1675e-10,  1.9461e-07,  8.5343e-08,\n",
       "             2.9616e-08, -1.3970e-07,  7.6025e-08, -5.3694e-08,  4.3998e-08,\n",
       "            -1.5591e-08, -5.1638e-08, -2.2294e-08,  2.3565e-08,  1.9713e-07,\n",
       "            -5.3351e-10, -1.1089e-07, -7.8696e-08, -1.0771e-07, -7.6155e-08,\n",
       "             4.5680e-08, -7.5150e-08, -1.9834e-08, -1.5266e-08,  4.7319e-08,\n",
       "             2.2504e-08,  2.0878e-08,  6.2852e-09, -1.2707e-08, -8.2129e-08,\n",
       "            -1.6899e-08, -1.7395e-07, -7.6334e-08,  7.8612e-08, -9.3047e-08,\n",
       "             1.5640e-08,  4.5406e-08, -1.3456e-07, -4.2791e-09, -3.4581e-08,\n",
       "             1.3462e-08, -6.6655e-08, -2.4196e-08,  7.3593e-09,  6.0250e-09,\n",
       "             1.5271e-07, -1.3826e-07,  7.1587e-08,  7.4004e-08, -8.9564e-08,\n",
       "            -1.8119e-09, -2.4735e-08,  3.3485e-08,  3.7205e-08, -2.6917e-08,\n",
       "            -7.3116e-08, -7.2091e-08, -8.0131e-08,  4.3341e-08,  5.9564e-08,\n",
       "             1.0335e-07,  1.8045e-08, -9.8093e-09,  1.2046e-08,  9.5205e-08,\n",
       "             4.6979e-09, -2.3848e-08, -8.4468e-08,  3.8720e-10, -1.4929e-07,\n",
       "            -2.0796e-08, -3.4478e-09,  6.4755e-08, -1.4779e-08,  6.0083e-08,\n",
       "            -1.4560e-08,  4.8194e-08,  2.8912e-08,  2.6477e-08, -3.7333e-08,\n",
       "             1.8883e-08, -5.2224e-08,  6.8143e-09, -6.0131e-08, -9.3527e-08,\n",
       "            -8.1388e-09, -3.4556e-08, -4.5599e-08, -4.3715e-08,  2.0055e-10,\n",
       "            -7.9452e-08,  3.1939e-08, -1.0573e-07,  3.4629e-08, -2.9133e-08,\n",
       "             4.6005e-08,  8.9923e-08,  2.2446e-08,  1.6038e-07,  8.3697e-10,\n",
       "            -2.5586e-08, -3.6462e-08, -2.1453e-08,  1.0982e-07,  2.4944e-08,\n",
       "             2.9795e-09,  1.8264e-08,  3.9926e-08, -5.1175e-08,  1.5941e-08,\n",
       "            -1.8900e-08, -7.7399e-09,  8.2126e-08,  9.3533e-08, -1.0304e-07,\n",
       "             5.6172e-08, -5.2814e-08, -2.9358e-09,  1.4145e-08, -1.2613e-07,\n",
       "             4.9221e-08, -5.6286e-08, -3.3167e-08,  2.8342e-08,  4.4366e-08,\n",
       "             6.0318e-08, -3.5930e-08, -2.5009e-08], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([1.5377e-13, 1.3401e-13, 2.2232e-13, 1.7482e-13, 1.3821e-13, 5.6705e-14,\n",
       "            1.1271e-13, 4.4662e-13, 9.9235e-14, 1.5998e-13, 7.8215e-13, 1.0748e-13,\n",
       "            1.1247e-13, 2.3196e-13, 6.2996e-14, 3.4025e-13, 3.7742e-14, 2.8933e-13,\n",
       "            1.1830e-13, 1.6542e-13, 1.0086e-12, 2.7147e-13, 1.9138e-13, 3.7778e-14,\n",
       "            4.3109e-14, 1.0545e-13, 1.3308e-13, 2.0476e-13, 2.0191e-13, 3.6265e-13,\n",
       "            8.2378e-14, 3.6255e-13, 3.3624e-13, 1.1365e-13, 4.9656e-13, 1.5075e-13,\n",
       "            3.7155e-13, 8.1863e-14, 4.9529e-14, 2.0140e-13, 1.5545e-13, 6.9497e-14,\n",
       "            9.2874e-14, 5.0022e-14, 5.4200e-14, 6.8615e-14, 5.4213e-14, 7.0394e-14,\n",
       "            1.3752e-13, 1.2085e-13, 1.3973e-13, 5.4323e-14, 3.0446e-13, 1.2755e-13,\n",
       "            1.5030e-13, 2.8403e-14, 3.6514e-13, 9.5113e-14, 5.7667e-14, 6.6464e-14,\n",
       "            1.3825e-13, 5.7194e-14, 1.5641e-13, 1.1577e-13, 2.1673e-14, 2.2204e-13,\n",
       "            1.0750e-13, 4.6812e-14, 7.7462e-14, 1.0907e-13, 3.4299e-14, 2.5003e-13,\n",
       "            2.8260e-13, 2.2354e-13, 1.9006e-14, 9.2727e-14, 2.9698e-13, 6.0393e-14,\n",
       "            6.0242e-14, 6.5107e-14, 4.8786e-14, 3.3219e-13, 4.1460e-13, 8.9656e-14,\n",
       "            6.4724e-14, 3.6015e-13, 1.3345e-13, 1.0714e-13, 1.3047e-13, 4.9797e-13,\n",
       "            2.4380e-13, 1.6112e-13, 3.6104e-13, 7.2953e-14, 1.0383e-13, 6.1203e-14,\n",
       "            1.8878e-14, 1.0532e-13, 2.2244e-13, 4.8176e-14, 6.7510e-14, 4.5371e-14,\n",
       "            2.7149e-13, 1.3957e-13, 4.7016e-14, 1.0450e-13, 5.3325e-13, 4.8319e-14,\n",
       "            2.3551e-14, 2.5879e-13, 4.2840e-13, 2.6204e-13, 7.2021e-14, 4.3231e-14,\n",
       "            1.0195e-13, 1.0691e-13, 1.3855e-13, 2.0860e-13, 4.9653e-13, 3.6804e-14,\n",
       "            1.0108e-13, 1.5710e-13, 3.6732e-13, 2.5649e-13, 7.1259e-14, 1.8301e-13,\n",
       "            3.1719e-13, 3.4322e-13], device='cuda:0')},\n",
       "   31: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([-5.6445e-07, -1.3265e-06,  5.4596e-07, -5.0047e-07,  8.6621e-07,\n",
       "             1.2086e-07, -7.0630e-07,  6.2380e-08,  1.9921e-06,  1.2286e-07,\n",
       "            -1.2429e-06,  3.1481e-07, -2.2554e-07, -9.4923e-07, -7.8609e-07,\n",
       "             8.2046e-08,  3.9425e-07, -9.6465e-07, -3.5933e-06, -1.3572e-06,\n",
       "            -7.2426e-06, -3.7586e-07, -8.5347e-07,  1.7989e-07, -1.0689e-06,\n",
       "             4.9116e-07, -1.4042e-06, -2.9499e-07, -5.2982e-07,  3.4883e-06,\n",
       "            -3.7883e-07, -1.4706e-06, -5.4803e-07, -4.8463e-08, -3.0054e-07,\n",
       "            -1.1593e-07, -1.1145e-06, -3.4615e-07, -6.3871e-07, -1.6058e-07,\n",
       "             3.7605e-07,  5.6351e-07,  8.0331e-07,  3.0278e-09,  7.7852e-08,\n",
       "            -3.6487e-07, -1.6024e-07, -2.9986e-07,  6.6184e-08, -2.2171e-08,\n",
       "            -3.6675e-07,  4.0683e-07, -2.9734e-06,  7.1759e-07, -2.8361e-09,\n",
       "             2.4626e-06, -7.9664e-07, -7.1834e-07, -7.0477e-07, -3.0028e-07,\n",
       "             7.9948e-07, -1.3171e-07,  1.3860e-06,  1.5971e-07, -3.8837e-07,\n",
       "             9.3720e-07,  4.3862e-07, -7.4732e-07, -5.9756e-08, -1.1123e-06,\n",
       "            -4.5780e-07, -1.1347e-07, -1.5032e-07, -1.3201e-07, -3.1760e-07,\n",
       "            -1.0894e-06, -1.3785e-06,  1.7376e-07, -8.3978e-07,  4.3385e-07,\n",
       "             1.3704e-09, -1.8497e-06, -2.9442e-06,  4.0147e-07, -4.8729e-08,\n",
       "            -6.4267e-07,  3.4938e-07,  8.5292e-07,  3.7951e-07,  1.0545e-06,\n",
       "             9.5480e-07, -1.7320e-06, -4.2312e-06, -2.1312e-07,  2.4519e-07,\n",
       "             1.0615e-06,  2.4488e-07, -6.6989e-07, -2.1593e-07,  3.5269e-07,\n",
       "            -4.4870e-07,  8.1405e-08, -4.8166e-07,  6.8200e-07,  6.1869e-08,\n",
       "            -1.4935e-07, -2.2992e-06, -4.4084e-07,  1.6376e-06, -1.6170e-06,\n",
       "            -3.5497e-06, -9.0493e-07, -1.6758e-06,  3.0162e-07,  5.1379e-07,\n",
       "            -2.6333e-08, -2.8369e-06, -4.6076e-07,  5.4724e-08, -1.2397e-07,\n",
       "            -4.7322e-07, -2.3452e-08, -1.2041e-07,  8.7902e-07, -7.3354e-07,\n",
       "             2.3512e-07, -2.7392e-06,  4.0732e-07], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([2.6563e-11, 6.4142e-12, 4.9542e-12, 5.6526e-11, 3.2336e-10, 1.3883e-11,\n",
       "            1.0866e-11, 7.0284e-12, 4.9426e-11, 5.6006e-11, 5.1359e-11, 1.9286e-11,\n",
       "            1.9962e-12, 3.4789e-11, 2.7094e-11, 5.0003e-12, 5.6281e-12, 4.1878e-11,\n",
       "            1.0742e-10, 1.7201e-11, 5.8171e-10, 5.1817e-11, 1.5152e-10, 4.4114e-12,\n",
       "            1.0145e-11, 2.6655e-11, 1.1600e-11, 1.2461e-11, 9.9057e-12, 7.3591e-10,\n",
       "            2.4325e-11, 2.9142e-10, 7.1492e-12, 2.0975e-11, 1.1605e-11, 3.4351e-12,\n",
       "            3.3938e-11, 7.0699e-11, 1.2614e-11, 1.3626e-11, 4.4966e-12, 8.0360e-12,\n",
       "            2.7691e-11, 5.2885e-12, 1.0372e-11, 1.7964e-11, 2.8492e-11, 2.3332e-11,\n",
       "            1.1508e-11, 7.8379e-12, 6.1047e-12, 5.5282e-11, 1.0908e-10, 4.5621e-11,\n",
       "            1.6025e-11, 1.6361e-11, 6.3131e-12, 8.3768e-11, 2.6713e-12, 1.3335e-11,\n",
       "            1.3617e-11, 8.1517e-12, 1.4223e-11, 4.4957e-12, 4.5732e-11, 1.3364e-11,\n",
       "            6.8039e-11, 5.0123e-12, 3.5483e-12, 9.3170e-12, 4.8072e-12, 4.5483e-12,\n",
       "            4.0107e-12, 1.0623e-12, 1.6002e-11, 3.1292e-11, 9.3735e-11, 1.3726e-11,\n",
       "            6.5797e-12, 2.0380e-10, 4.8347e-12, 7.8569e-12, 2.4205e-10, 1.5183e-11,\n",
       "            8.5050e-12, 1.8591e-11, 2.6565e-12, 4.7487e-11, 5.3059e-12, 2.8091e-11,\n",
       "            2.5054e-12, 7.0273e-12, 1.9767e-10, 1.0256e-11, 1.1037e-11, 5.9897e-12,\n",
       "            2.1710e-11, 9.2375e-12, 9.8051e-12, 7.4732e-12, 5.1975e-12, 2.0668e-11,\n",
       "            7.9477e-12, 3.9469e-12, 3.6079e-11, 1.1554e-11, 3.9894e-10, 9.0078e-12,\n",
       "            6.8598e-11, 4.1531e-12, 6.9610e-11, 1.8531e-11, 6.0218e-10, 1.5876e-11,\n",
       "            4.5500e-11, 8.7856e-12, 3.1942e-11, 1.8483e-11, 8.7707e-11, 1.2202e-11,\n",
       "            3.6631e-12, 1.1676e-11, 2.1785e-11, 5.6801e-12, 6.3510e-12, 1.6609e-11,\n",
       "            2.4205e-11, 1.0033e-11], device='cuda:0')},\n",
       "   32: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([-9.0236e-07,  2.5872e-06, -1.1202e-06, -3.8210e-07, -1.0684e-06,\n",
       "             8.0398e-07,  1.0352e-06,  9.5598e-07, -7.5436e-07,  4.6655e-07,\n",
       "             1.7241e-06, -2.1879e-06,  6.3420e-07,  1.4908e-06, -3.5603e-07,\n",
       "            -5.4165e-07,  1.3527e-07,  3.6613e-08, -1.1138e-06,  1.1898e-06,\n",
       "            -1.3303e-06, -3.5988e-07,  2.4201e-06,  3.1890e-07,  1.2014e-06,\n",
       "            -5.2624e-07,  1.5255e-06,  9.2879e-07, -2.0222e-06,  1.1040e-06,\n",
       "             7.7882e-07, -8.6800e-07,  2.0769e-06,  1.3281e-06, -1.9067e-07,\n",
       "            -2.9314e-07,  1.6869e-06,  1.4372e-06,  1.3347e-06,  3.8376e-08,\n",
       "            -7.9333e-07, -2.3524e-06,  2.0417e-06,  9.9553e-07,  1.4983e-07,\n",
       "            -5.4994e-07,  4.9268e-08,  6.2096e-07, -6.4524e-07, -1.5143e-06,\n",
       "            -7.7790e-07, -2.5734e-07, -1.5700e-06, -8.3200e-07, -5.8334e-09,\n",
       "             7.8935e-07,  9.5604e-07,  1.3814e-06,  2.0499e-06, -9.7522e-07,\n",
       "             2.0042e-06,  4.0626e-08,  2.8432e-07, -4.7464e-07, -1.9493e-06,\n",
       "             1.3334e-07, -7.2419e-08,  1.5351e-06,  1.2619e-07, -1.6117e-06,\n",
       "            -1.6355e-06, -4.3803e-07,  9.6860e-07,  2.0520e-06, -1.1412e-06,\n",
       "             7.4487e-07, -6.9873e-07,  3.7602e-09,  4.3998e-08, -7.3673e-07,\n",
       "            -3.2241e-07,  2.7165e-06,  1.1296e-06, -8.1250e-07, -5.7824e-08,\n",
       "             7.4872e-07, -8.3793e-07,  1.8671e-06, -8.6687e-07, -3.9861e-07,\n",
       "            -1.5812e-06,  2.5191e-06, -2.7585e-07, -1.2845e-06,  1.9982e-06,\n",
       "            -1.4672e-06,  4.5091e-07,  5.1136e-07,  3.6394e-07, -8.0294e-07,\n",
       "             7.3456e-08, -3.5867e-07, -4.0847e-08, -1.5037e-06,  1.1174e-07,\n",
       "             2.6678e-07, -6.8120e-07,  1.4985e-06, -2.0488e-08, -4.6523e-07,\n",
       "            -1.2985e-06, -2.2020e-07,  1.3269e-06, -1.8413e-07, -1.1264e-06,\n",
       "            -2.2571e-06,  5.9869e-07,  1.1985e-07, -1.7258e-06,  4.3042e-07,\n",
       "             1.0743e-06,  1.2485e-06,  2.2246e-06, -2.1875e-06,  1.1044e-06,\n",
       "             6.3456e-07,  4.3282e-07,  9.3803e-07], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([1.5268e-11, 2.9683e-11, 2.3873e-11, 2.1682e-11, 8.0890e-11, 3.5952e-11,\n",
       "            4.2179e-11, 5.2781e-11, 6.4632e-11, 3.2889e-11, 5.5428e-11, 4.5351e-11,\n",
       "            2.2582e-11, 3.1750e-11, 4.5024e-11, 2.8213e-11, 1.7886e-11, 3.1766e-11,\n",
       "            6.4389e-11, 2.5306e-11, 6.6675e-11, 5.3570e-11, 6.2632e-11, 3.7977e-11,\n",
       "            1.4808e-11, 7.9549e-11, 4.6272e-11, 3.7423e-11, 8.0230e-11, 6.1369e-11,\n",
       "            2.8424e-11, 8.4446e-11, 3.5524e-11, 3.8047e-11, 1.9432e-11, 5.6928e-11,\n",
       "            3.8456e-11, 6.4429e-11, 5.1069e-11, 3.3593e-11, 2.3047e-11, 2.5881e-11,\n",
       "            3.9105e-11, 2.9273e-11, 2.4505e-11, 3.5851e-11, 3.0821e-11, 3.1837e-11,\n",
       "            3.4630e-11, 1.6148e-11, 5.7530e-11, 4.8657e-11, 2.1363e-11, 3.5982e-11,\n",
       "            2.5473e-11, 2.2541e-11, 1.6026e-11, 4.7310e-11, 3.7899e-11, 1.8465e-11,\n",
       "            4.0753e-11, 2.1289e-11, 2.6343e-11, 1.8655e-11, 4.0130e-11, 5.3525e-11,\n",
       "            5.7152e-11, 3.4160e-11, 1.3336e-11, 3.8586e-11, 3.1896e-11, 2.2322e-11,\n",
       "            1.8354e-11, 2.2722e-11, 8.9431e-11, 4.2053e-11, 4.4057e-11, 3.4371e-11,\n",
       "            2.5955e-11, 6.5771e-11, 1.8609e-11, 2.2961e-11, 6.0494e-11, 3.4343e-11,\n",
       "            4.2326e-11, 2.1462e-11, 1.6929e-11, 3.2552e-11, 2.1667e-11, 8.2390e-11,\n",
       "            2.8622e-11, 5.0755e-11, 7.0406e-11, 3.4547e-11, 4.9516e-11, 2.2283e-11,\n",
       "            4.2065e-11, 2.6613e-11, 2.1101e-11, 2.6624e-11, 3.5877e-11, 4.2810e-11,\n",
       "            2.8893e-11, 4.3205e-11, 6.0558e-11, 2.1840e-11, 5.7041e-11, 1.8772e-11,\n",
       "            2.8005e-11, 5.7963e-11, 4.6297e-11, 5.5248e-11, 9.8170e-11, 3.0971e-11,\n",
       "            5.3299e-11, 2.2202e-11, 4.0140e-11, 3.7217e-11, 2.9005e-11, 4.9086e-11,\n",
       "            1.7492e-11, 2.3966e-11, 2.4861e-11, 1.8919e-11, 2.1459e-11, 5.6423e-11,\n",
       "            7.4277e-11, 2.3413e-11], device='cuda:0')},\n",
       "   33: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([-6.2471e-07,  9.0263e-07,  8.8827e-07,  7.6952e-07, -3.5239e-07,\n",
       "            -3.4872e-07, -1.3925e-07,  5.5730e-07, -1.1044e-06, -5.0471e-07,\n",
       "             2.0591e-07, -3.9618e-07,  1.5218e-07,  4.1880e-07, -6.1573e-08,\n",
       "             1.4425e-06,  1.4985e-07,  1.0054e-06, -1.2585e-06,  9.3774e-07,\n",
       "            -1.1985e-06, -1.3731e-06,  3.1919e-07,  2.9941e-07,  5.7187e-07,\n",
       "             2.9462e-07, -1.4959e-07, -1.4393e-07, -4.4209e-07, -1.4680e-06,\n",
       "            -7.9821e-08, -2.5873e-06,  2.3725e-07,  2.8272e-08, -2.5222e-07,\n",
       "             5.1491e-07,  1.0820e-07, -7.1533e-07,  3.2155e-07, -2.5191e-07,\n",
       "            -3.7737e-08,  1.1710e-06,  2.0159e-07,  6.4803e-07,  5.1863e-07,\n",
       "             6.7611e-07,  1.8010e-07, -2.0938e-07, -2.9459e-07,  1.3106e-06,\n",
       "             2.7753e-07,  5.0095e-07,  2.5160e-06,  2.0457e-07,  2.9884e-07,\n",
       "             3.5352e-07,  5.6762e-07, -1.8276e-07,  1.1175e-07,  9.0973e-07,\n",
       "            -3.7890e-07,  7.3788e-07, -1.1068e-07,  7.6319e-08,  7.3364e-07,\n",
       "            -1.0278e-07, -4.1347e-07,  9.3728e-08, -1.4032e-07,  6.7029e-07,\n",
       "             9.0509e-07,  1.1853e-06,  1.8554e-07, -3.1377e-07,  1.1198e-06,\n",
       "             3.1612e-08,  8.4808e-08, -8.0072e-07,  1.9508e-06, -1.4429e-07,\n",
       "            -3.9800e-07,  5.5840e-07,  2.4041e-07, -4.3844e-07,  3.4246e-07,\n",
       "            -1.9673e-07,  4.8032e-07,  2.6270e-07, -1.6800e-07,  7.6115e-07,\n",
       "             2.2599e-07, -9.0256e-09, -5.9660e-07, -7.2349e-07,  5.1013e-07,\n",
       "            -3.3036e-07,  2.7456e-07,  1.0221e-07,  9.0145e-08, -1.0551e-07,\n",
       "            -2.6699e-07,  2.9441e-07, -6.1568e-08, -1.0607e-06,  9.3056e-07,\n",
       "             1.7935e-07, -3.4007e-06, -7.1721e-07,  2.6113e-07, -2.3291e-07,\n",
       "             2.4023e-07, -4.8546e-07,  3.6168e-07,  7.1105e-08, -1.7895e-07,\n",
       "             4.3756e-08,  3.2703e-07, -9.4942e-07,  2.9975e-07,  2.9443e-07,\n",
       "            -2.3562e-07,  1.4950e-07, -2.0418e-07,  2.0085e-07, -4.5085e-08,\n",
       "             5.2220e-07,  2.6795e-09, -5.3400e-07], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([7.2695e-12, 1.3149e-11, 6.5429e-12, 2.2724e-11, 2.5121e-11, 7.9256e-12,\n",
       "            5.3688e-12, 8.1248e-12, 6.9316e-12, 7.7513e-12, 5.2424e-12, 1.4650e-11,\n",
       "            1.8131e-11, 5.3748e-12, 7.0733e-12, 3.1257e-11, 2.0148e-12, 4.8395e-11,\n",
       "            1.8400e-11, 4.0206e-11, 2.0136e-10, 9.4938e-12, 9.4884e-12, 1.2827e-11,\n",
       "            7.6970e-12, 8.8927e-12, 1.9785e-12, 1.1138e-12, 2.8763e-12, 6.3489e-11,\n",
       "            7.9864e-13, 1.5106e-10, 2.4933e-12, 1.8381e-12, 4.4493e-12, 5.9764e-12,\n",
       "            6.9883e-12, 2.4317e-11, 8.2334e-12, 1.5245e-11, 1.7224e-12, 6.5086e-12,\n",
       "            2.7336e-12, 1.2597e-12, 6.3390e-12, 4.3921e-12, 9.4094e-12, 5.7738e-12,\n",
       "            5.9134e-12, 1.2550e-11, 5.6504e-12, 4.8183e-12, 1.3654e-10, 2.9526e-12,\n",
       "            1.0750e-11, 9.4063e-12, 1.8452e-11, 3.8790e-12, 1.0655e-11, 8.5668e-12,\n",
       "            2.2256e-11, 3.2225e-12, 3.3061e-12, 5.3792e-12, 1.9210e-10, 1.8080e-12,\n",
       "            7.1819e-12, 1.3157e-11, 6.8908e-12, 2.9517e-12, 6.9931e-12, 1.2707e-11,\n",
       "            3.4317e-11, 6.7738e-12, 6.4948e-12, 1.5812e-11, 4.1857e-11, 3.8363e-12,\n",
       "            1.2566e-11, 9.5575e-12, 2.3826e-11, 7.8426e-12, 1.9240e-11, 1.8231e-11,\n",
       "            1.0830e-11, 3.2466e-11, 2.7191e-11, 2.1625e-11, 2.3022e-11, 1.3867e-11,\n",
       "            1.2675e-11, 1.6021e-11, 5.4589e-11, 4.0553e-12, 6.3652e-12, 1.3620e-12,\n",
       "            4.9161e-12, 1.2336e-11, 7.0025e-12, 4.2144e-12, 4.3436e-12, 1.8035e-12,\n",
       "            2.1698e-12, 1.0620e-11, 3.7314e-11, 2.0425e-12, 1.2475e-10, 7.2491e-12,\n",
       "            2.2204e-11, 1.0018e-11, 1.9669e-11, 3.0800e-11, 1.5316e-10, 6.0270e-12,\n",
       "            2.0179e-11, 1.0572e-11, 6.5932e-11, 1.8757e-11, 8.3139e-11, 8.1981e-12,\n",
       "            2.0676e-12, 3.3144e-12, 6.2171e-12, 2.2105e-11, 6.1398e-12, 1.2802e-11,\n",
       "            4.7399e-11, 3.8993e-12], device='cuda:0')},\n",
       "   34: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([ 1.1923e-06, -1.2164e-06, -1.3119e-06,  1.6889e-06, -4.5041e-07,\n",
       "             6.7979e-07, -2.4325e-07, -9.4123e-07, -1.1468e-06,  7.6411e-07,\n",
       "             9.9165e-08,  1.5851e-07, -1.7860e-07, -1.0631e-06, -1.0745e-06,\n",
       "            -8.7731e-07, -6.4083e-07, -6.8021e-07, -4.2795e-07,  1.7881e-06,\n",
       "             1.0060e-06,  1.0431e-06, -2.2602e-06, -1.2155e-06, -1.5343e-06,\n",
       "            -4.4073e-07, -8.2249e-07,  7.0832e-07,  7.5373e-07, -9.0634e-07,\n",
       "             1.5548e-07, -4.6050e-07, -6.3455e-07,  3.2322e-08,  5.2915e-07,\n",
       "            -1.0748e-06, -2.2412e-07, -1.1934e-06, -5.6755e-07,  3.6251e-07,\n",
       "            -4.6793e-07, -2.5320e-07,  5.8142e-07,  1.3207e-06, -7.0080e-07,\n",
       "            -1.4215e-06, -9.1558e-07,  2.1892e-06,  4.8544e-07, -1.8081e-06,\n",
       "            -1.3039e-06, -1.0081e-06,  1.3559e-06, -2.0861e-07,  2.8057e-07,\n",
       "            -1.2317e-06, -7.4186e-07,  3.3898e-07, -1.4263e-07,  9.2079e-07,\n",
       "             3.3448e-07,  1.3529e-07, -6.2740e-07, -2.7899e-07,  6.4085e-07,\n",
       "            -1.1260e-07, -3.2423e-07, -3.7979e-07,  2.2887e-07, -1.0540e-06,\n",
       "            -2.5335e-07, -1.4774e-06,  8.6323e-10, -2.9727e-07,  2.7468e-08,\n",
       "             9.2352e-08, -7.5771e-08, -1.0260e-06,  4.9921e-07, -7.6957e-07,\n",
       "             3.5297e-07, -3.2127e-07, -2.1314e-07, -7.2985e-07, -3.6340e-07,\n",
       "            -1.4590e-09, -5.9895e-07, -5.8269e-07,  9.5926e-08, -9.3522e-07,\n",
       "            -3.3841e-07,  4.8558e-08,  4.5405e-07,  4.5297e-07, -1.1714e-06,\n",
       "            -9.3914e-08, -8.9309e-08, -3.2091e-07, -3.8250e-07,  3.1260e-07,\n",
       "            -1.9833e-07, -7.5465e-07, -1.6933e-07, -8.9391e-07,  1.1216e-06,\n",
       "            -6.7388e-07, -9.9519e-07, -9.1451e-07, -6.5186e-07,  6.9564e-07,\n",
       "             3.5077e-07, -2.0859e-07,  8.8770e-07, -4.3464e-07,  3.3730e-07,\n",
       "             2.5453e-08,  4.6977e-07,  1.5827e-07,  3.2774e-07,  9.2711e-07,\n",
       "             6.6079e-07, -7.2352e-07,  5.4349e-07, -4.4370e-07, -5.1899e-07,\n",
       "             7.6225e-07,  3.1785e-07,  2.8223e-07], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([1.7024e-11, 2.1874e-11, 1.4637e-11, 4.4897e-11, 1.4923e-11, 3.6166e-11,\n",
       "            4.8688e-11, 3.0829e-11, 2.7210e-11, 1.3358e-11, 1.0696e-11, 2.3902e-11,\n",
       "            3.3936e-11, 2.6530e-11, 1.5560e-11, 3.6932e-11, 1.3050e-11, 3.1805e-11,\n",
       "            2.0207e-11, 5.7240e-11, 9.3875e-11, 4.1477e-11, 1.5201e-11, 3.0137e-11,\n",
       "            3.3184e-11, 3.8404e-11, 2.8714e-11, 1.1228e-11, 3.4902e-11, 1.7663e-11,\n",
       "            1.0094e-11, 5.9922e-11, 2.1332e-11, 1.2709e-11, 2.6547e-11, 2.7386e-11,\n",
       "            2.8176e-11, 2.5106e-11, 2.5303e-11, 4.5983e-11, 1.0615e-11, 1.6411e-11,\n",
       "            1.5229e-11, 9.1377e-12, 1.5607e-11, 1.5337e-11, 2.2956e-11, 1.7596e-11,\n",
       "            1.8990e-11, 3.0340e-11, 2.1250e-11, 2.5714e-11, 6.1760e-11, 1.5921e-11,\n",
       "            2.1564e-11, 2.0544e-11, 3.7655e-11, 2.1945e-11, 2.2664e-11, 3.4226e-11,\n",
       "            4.5586e-11, 2.1743e-11, 2.7088e-11, 1.1913e-11, 6.1798e-11, 1.6757e-11,\n",
       "            2.1843e-11, 1.9409e-11, 1.1183e-11, 1.1627e-11, 2.4897e-11, 1.8123e-11,\n",
       "            4.5104e-11, 1.8837e-11, 3.5359e-11, 3.9335e-11, 1.9518e-11, 2.1183e-11,\n",
       "            1.0993e-10, 2.0788e-11, 2.6761e-11, 1.4353e-11, 2.8692e-11, 4.8758e-11,\n",
       "            1.9444e-11, 3.8116e-11, 2.4451e-11, 5.0647e-11, 1.6314e-11, 1.9783e-11,\n",
       "            1.7690e-11, 2.7386e-11, 3.9316e-11, 1.4121e-11, 3.2129e-11, 1.9147e-11,\n",
       "            7.3610e-12, 2.5073e-11, 3.0797e-11, 2.1256e-11, 5.1786e-11, 3.0522e-11,\n",
       "            2.1731e-11, 1.7283e-11, 3.0673e-11, 1.7568e-11, 3.3271e-11, 1.4372e-11,\n",
       "            2.1134e-11, 4.0613e-11, 1.8448e-11, 2.0845e-11, 3.6688e-11, 3.2381e-11,\n",
       "            3.2836e-11, 2.1499e-11, 2.2116e-11, 5.5028e-11, 2.4108e-11, 1.4239e-11,\n",
       "            1.9495e-11, 2.6668e-11, 1.2767e-11, 2.7711e-11, 1.3041e-11, 1.9096e-11,\n",
       "            1.4698e-11, 1.7064e-11], device='cuda:0')},\n",
       "   35: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 8.6092e-08, -1.0449e-07,  1.6811e-06,  ..., -1.8977e-07,\n",
       "              1.1962e-06, -4.9310e-07],\n",
       "            [-9.3481e-08, -1.8433e-07,  9.7631e-08,  ..., -3.8728e-07,\n",
       "              1.8653e-07,  2.8331e-07],\n",
       "            [-1.5287e-06,  3.3850e-07,  1.0236e-07,  ...,  1.0453e-06,\n",
       "              2.7389e-08, -3.1906e-07],\n",
       "            ...,\n",
       "            [ 4.3897e-07, -4.2274e-07,  2.6312e-07,  ..., -1.5048e-08,\n",
       "              1.8543e-06, -5.4263e-07],\n",
       "            [ 4.7535e-08,  2.6229e-07,  1.7957e-06,  ..., -3.8179e-07,\n",
       "              1.8137e-06,  8.5396e-07],\n",
       "            [-2.2989e-06,  3.3106e-07, -3.6084e-07,  ...,  5.5159e-07,\n",
       "             -8.0787e-08,  6.5852e-07]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[4.3277e-11, 2.7022e-11, 1.7733e-11,  ..., 3.0613e-11, 3.8585e-11,\n",
       "             2.7745e-11],\n",
       "            [1.0631e-10, 2.0519e-11, 1.9723e-12,  ..., 1.0818e-11, 3.0939e-11,\n",
       "             1.9152e-11],\n",
       "            [1.5325e-10, 1.9395e-11, 2.3233e-11,  ..., 3.0492e-11, 1.0537e-10,\n",
       "             9.9368e-11],\n",
       "            ...,\n",
       "            [7.7284e-11, 2.0593e-11, 3.6124e-11,  ..., 2.7800e-11, 4.4406e-11,\n",
       "             4.2472e-11],\n",
       "            [2.6471e-11, 5.6989e-11, 2.1674e-11,  ..., 1.9591e-11, 3.5481e-11,\n",
       "             2.1927e-11],\n",
       "            [2.4210e-10, 3.0373e-11, 1.2925e-11,  ..., 3.3588e-11, 7.7482e-11,\n",
       "             5.5155e-11]], device='cuda:0')},\n",
       "   36: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([-9.3058e-07,  9.2441e-08, -1.2115e-06,  6.0107e-07, -3.1932e-06,\n",
       "            -1.2596e-06,  2.6088e-07, -4.1345e-08, -5.5713e-07,  1.3313e-06,\n",
       "             2.4300e-08,  6.4059e-07, -5.5853e-07,  2.3386e-06,  1.0972e-06,\n",
       "            -8.7109e-07, -1.3308e-06,  1.4708e-06,  1.0684e-09, -3.0888e-06,\n",
       "             3.7960e-07, -1.9292e-06,  4.4923e-07, -4.8892e-07, -2.0909e-08,\n",
       "            -1.4712e-06, -3.7737e-06,  2.8818e-06,  3.4007e-06,  1.0203e-06,\n",
       "             8.7240e-08,  1.1936e-06, -6.2670e-08,  5.8437e-07,  3.7415e-07,\n",
       "             1.4722e-06,  9.5548e-07, -1.0046e-06,  1.0068e-07,  1.5348e-06,\n",
       "            -9.6756e-07,  1.0888e-42,  3.1732e-06, -1.1415e-07,  1.1487e-08,\n",
       "             1.6192e-06,  2.9069e-06, -1.2795e-22,  2.9456e-06,  4.2686e-07,\n",
       "            -2.7462e-07,  1.2510e-06,  5.3995e-07, -1.6688e-06, -3.4668e-06,\n",
       "            -1.0293e-06,  1.8823e-06,  2.7209e-07, -2.5497e-06,  1.1552e-06,\n",
       "            -1.2976e-06, -5.9287e-07,  1.4951e-08, -8.3561e-07, -6.7920e-08,\n",
       "             3.8984e-06, -8.6844e-07, -2.0125e-07, -9.8065e-07,  1.6394e-07,\n",
       "            -1.4980e-06, -1.1219e-06,  2.0512e-40,  3.4557e-06,  1.9455e-07,\n",
       "            -3.6125e-06,  1.0017e-07,  1.1439e-06,  3.1618e-10,  1.2319e-06,\n",
       "            -1.3521e-06,  3.4943e-09, -6.4543e-07,  3.2658e-06,  2.3697e-07,\n",
       "            -5.9722e-07, -6.3726e-08,  1.6594e-06,  1.6459e-06, -3.5697e-06,\n",
       "            -9.8534e-08, -3.4585e-07,  1.8936e-06, -3.2912e-06, -1.4494e-06,\n",
       "            -1.0694e-06, -5.6671e-07, -2.7210e-06, -1.1963e-06,  6.1941e-07,\n",
       "             2.7824e-06, -4.4143e-07,  8.4854e-08,  6.4165e-07, -5.6236e-07,\n",
       "            -1.6011e-06,  4.2699e-07,  4.3511e-09, -2.1263e-06, -1.5466e-06,\n",
       "            -1.2032e-06, -1.3244e-06,  2.4879e-06, -1.7355e-06, -6.5766e-06,\n",
       "            -2.0980e-06,  2.0124e-07, -2.2358e-06, -1.0225e-06,  1.2365e-06,\n",
       "            -2.2167e-06,  1.7687e-06, -2.4282e-08, -1.0356e-06,  3.4629e-06,\n",
       "             2.4346e-07, -1.7112e-06, -6.7486e-07], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([9.2866e-11, 4.0301e-11, 1.1733e-10, 7.5242e-11, 6.8178e-11, 1.0355e-10,\n",
       "            8.3969e-12, 5.6926e-11, 1.8107e-10, 3.5689e-11, 1.0195e-11, 1.1080e-10,\n",
       "            3.3829e-11, 9.7041e-11, 1.8061e-10, 1.0174e-10, 1.1361e-10, 1.1231e-10,\n",
       "            8.6667e-12, 1.8799e-10, 1.3075e-10, 1.6902e-10, 3.7884e-11, 3.9801e-11,\n",
       "            7.0515e-11, 1.5902e-10, 1.8642e-10, 1.1162e-10, 3.4159e-11, 7.2028e-11,\n",
       "            9.4110e-11, 8.0840e-11, 9.8924e-11, 6.0553e-11, 5.3730e-11, 1.1548e-10,\n",
       "            9.4581e-11, 3.6036e-11, 9.9808e-12, 4.0480e-11, 1.1013e-10, 7.0692e-14,\n",
       "            9.9226e-11, 1.6888e-10, 1.3860e-10, 1.7157e-10, 1.3408e-10, 6.0271e-12,\n",
       "            1.8779e-10, 1.0380e-10, 4.3952e-11, 1.3898e-10, 2.7299e-10, 1.8051e-10,\n",
       "            1.0073e-10, 1.4763e-10, 8.7456e-11, 8.4866e-11, 7.9107e-11, 3.2042e-11,\n",
       "            1.7510e-10, 3.9689e-11, 1.6308e-15, 2.4277e-10, 9.6389e-11, 8.1606e-11,\n",
       "            1.3363e-10, 6.2715e-11, 4.8762e-11, 1.7269e-11, 8.7341e-11, 9.5428e-11,\n",
       "            9.5967e-13, 3.4348e-11, 7.5036e-11, 1.3786e-10, 7.5585e-14, 1.5032e-11,\n",
       "            4.3964e-17, 4.0427e-11, 1.3521e-10, 1.0230e-14, 1.7003e-10, 1.1483e-10,\n",
       "            3.6846e-13, 5.3845e-11, 1.0302e-10, 9.4644e-12, 3.1076e-10, 5.2682e-11,\n",
       "            9.2335e-11, 7.6702e-11, 9.7976e-11, 7.8608e-11, 8.9368e-11, 1.3291e-10,\n",
       "            8.0905e-11, 8.0851e-11, 9.8847e-12, 4.4114e-11, 1.3290e-10, 2.9904e-10,\n",
       "            1.1082e-10, 1.2231e-10, 6.7654e-11, 1.1204e-10, 4.2757e-11, 1.9010e-14,\n",
       "            1.5805e-10, 1.0100e-10, 1.7218e-10, 3.1528e-11, 9.8831e-11, 3.3336e-11,\n",
       "            9.3437e-11, 4.7514e-11, 3.1091e-10, 1.1676e-10, 6.3334e-11, 3.2181e-12,\n",
       "            8.3864e-11, 3.5154e-11, 9.8255e-13, 1.1766e-10, 1.4044e-10, 1.2549e-10,\n",
       "            1.0782e-10, 1.0930e-10], device='cuda:0')},\n",
       "   37: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-1.6641e-06, -5.3788e-07,  1.8895e-07,  ...,  8.7746e-07,\n",
       "              1.1123e-06,  4.5009e-07],\n",
       "            [ 2.7210e-06,  1.3372e-07,  1.7028e-06,  ..., -3.9815e-06,\n",
       "             -1.8163e-06, -6.3740e-07],\n",
       "            [ 1.2648e-06, -3.6229e-07,  3.0906e-07,  ..., -1.8302e-06,\n",
       "             -4.5924e-07,  1.7066e-08],\n",
       "            ...,\n",
       "            [ 3.8321e-06,  1.2620e-06,  3.4465e-08,  ..., -2.7304e-06,\n",
       "             -2.4941e-06, -8.9052e-07],\n",
       "            [-6.0805e-06,  8.5927e-08,  4.2757e-07,  ...,  7.0470e-06,\n",
       "              5.3967e-06,  1.0334e-06],\n",
       "            [ 3.1120e-06,  8.4267e-08, -7.2671e-07,  ..., -2.7807e-06,\n",
       "             -3.6570e-06, -7.8825e-07]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[3.1271e-11, 7.9586e-11, 3.3462e-11,  ..., 1.6178e-10, 5.0836e-11,\n",
       "             8.1600e-11],\n",
       "            [1.4968e-11, 2.4943e-11, 1.3821e-11,  ..., 7.1194e-11, 4.0116e-11,\n",
       "             3.1952e-11],\n",
       "            [2.8350e-11, 4.1651e-11, 1.6076e-11,  ..., 1.1661e-10, 4.5458e-11,\n",
       "             4.9452e-11],\n",
       "            ...,\n",
       "            [4.7955e-11, 1.0867e-10, 8.1891e-11,  ..., 2.0877e-10, 7.3594e-11,\n",
       "             1.0822e-10],\n",
       "            [8.8282e-11, 5.2108e-11, 8.8021e-11,  ..., 3.1362e-10, 9.2886e-11,\n",
       "             7.7779e-11],\n",
       "            [5.9361e-11, 1.1573e-10, 8.1476e-11,  ..., 2.7208e-10, 6.4343e-11,\n",
       "             1.2128e-10]], device='cuda:0')},\n",
       "   38: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 3.4473e-07, -4.0725e-08, -3.1845e-08,  ..., -2.0618e-07,\n",
       "             -8.7025e-08,  1.6256e-08],\n",
       "            [ 2.3119e-09, -3.5277e-08, -1.3552e-08,  ..., -1.5034e-08,\n",
       "              1.3430e-08, -4.8775e-09],\n",
       "            [-2.4243e-07, -2.9755e-08, -8.4756e-08,  ...,  1.3923e-07,\n",
       "              8.4384e-09, -5.9433e-08],\n",
       "            ...,\n",
       "            [-1.0791e-06, -3.0518e-07,  3.0981e-07,  ...,  6.1943e-07,\n",
       "              6.3164e-08,  2.3375e-07],\n",
       "            [ 8.9055e-07,  2.2985e-07, -4.2219e-07,  ..., -4.7104e-07,\n",
       "             -1.6049e-07, -2.1377e-07],\n",
       "            [-1.2943e-06, -4.1221e-07,  5.1342e-07,  ...,  7.3490e-07,\n",
       "              1.2883e-07,  3.0161e-07]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[2.0467e-13, 3.7084e-14, 8.2314e-14,  ..., 8.6953e-14, 5.6384e-14,\n",
       "             3.3334e-14],\n",
       "            [3.1824e-13, 4.8206e-14, 9.0042e-14,  ..., 1.7684e-13, 5.2854e-14,\n",
       "             2.6761e-14],\n",
       "            [1.8181e-13, 2.5126e-14, 4.8686e-14,  ..., 9.2232e-14, 3.1817e-14,\n",
       "             1.8653e-14],\n",
       "            ...,\n",
       "            [1.2097e-11, 2.3044e-12, 2.8228e-12,  ..., 5.5851e-12, 2.1601e-12,\n",
       "             1.2389e-12],\n",
       "            [6.5020e-12, 1.3801e-12, 2.1085e-12,  ..., 2.9649e-12, 1.5609e-12,\n",
       "             6.8107e-13],\n",
       "            [1.8302e-11, 3.6170e-12, 4.7224e-12,  ..., 8.3561e-12, 3.8166e-12,\n",
       "             1.9120e-12]], device='cuda:0')},\n",
       "   39: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-5.1198e-09, -5.8408e-09,  4.7719e-08,  ..., -5.2781e-08,\n",
       "              2.9639e-08, -1.4190e-07],\n",
       "            [-2.9928e-07, -2.3273e-08, -1.0470e-07,  ...,  1.8585e-07,\n",
       "              2.0938e-08, -2.3280e-07],\n",
       "            [-9.4496e-08, -2.0204e-08, -3.9867e-08,  ...,  6.2368e-08,\n",
       "              5.3730e-09, -4.5151e-08],\n",
       "            ...,\n",
       "            [-6.8820e-07,  2.2143e-08, -1.2349e-07,  ...,  9.4994e-08,\n",
       "             -6.0415e-08,  1.5536e-08],\n",
       "            [ 1.0513e-06, -9.9875e-09,  2.0324e-07,  ..., -2.2983e-07,\n",
       "              9.1720e-08,  1.0859e-07],\n",
       "            [-1.0648e-06,  1.5443e-08, -2.1058e-07,  ...,  2.6124e-07,\n",
       "             -8.3068e-08, -1.5356e-07]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[4.9617e-14, 2.1771e-14, 1.5502e-14,  ..., 3.1072e-14, 2.6862e-14,\n",
       "             2.6133e-14],\n",
       "            [4.7664e-13, 6.3984e-14, 1.1002e-13,  ..., 3.0144e-13, 1.4905e-13,\n",
       "             1.9081e-13],\n",
       "            [2.8092e-14, 3.5696e-15, 7.6128e-15,  ..., 1.7778e-14, 1.0740e-14,\n",
       "             1.1085e-14],\n",
       "            ...,\n",
       "            [1.7546e-12, 1.2972e-13, 5.5635e-13,  ..., 7.7135e-13, 6.2550e-13,\n",
       "             3.2701e-13],\n",
       "            [5.6442e-12, 1.3983e-13, 8.4224e-13,  ..., 2.0964e-12, 8.9362e-13,\n",
       "             4.5991e-13],\n",
       "            [4.9849e-12, 1.3775e-13, 1.1985e-12,  ..., 1.8369e-12, 1.2378e-12,\n",
       "             5.9304e-13]], device='cuda:0')},\n",
       "   40: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 2.2969e-08,  7.1506e-08, -3.4954e-07,  ..., -1.5553e-06,\n",
       "              2.7789e-06, -2.8525e-06],\n",
       "            [ 2.8173e-06, -3.3500e-07,  1.3819e-06,  ..., -8.9135e-06,\n",
       "              4.1354e-06, -1.1619e-05],\n",
       "            [ 3.1142e-06, -8.2735e-07,  1.0606e-06,  ..., -1.4477e-05,\n",
       "              7.1460e-06, -1.6675e-05],\n",
       "            ...,\n",
       "            [ 3.9753e-06, -2.0047e-06,  1.8882e-06,  ..., -1.1733e-05,\n",
       "              5.4671e-06, -1.3585e-05],\n",
       "            [ 1.0675e-06, -8.9584e-07,  6.2762e-07,  ..., -1.3815e-06,\n",
       "              1.8972e-06, -2.0579e-06],\n",
       "            [-4.8926e-07,  2.6526e-07, -7.3232e-08,  ..., -1.1213e-07,\n",
       "             -8.6788e-07, -6.6021e-08]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[2.1931e-11, 3.6412e-11, 2.0444e-11,  ..., 4.5147e-11, 2.2973e-11,\n",
       "             1.3308e-10],\n",
       "            [4.0438e-11, 9.1257e-11, 1.2718e-10,  ..., 1.0435e-10, 7.4335e-11,\n",
       "             2.3983e-10],\n",
       "            [8.4966e-11, 6.7571e-11, 6.6058e-11,  ..., 1.5706e-10, 1.9600e-10,\n",
       "             3.9490e-10],\n",
       "            ...,\n",
       "            [1.1422e-10, 9.3792e-11, 6.6882e-11,  ..., 2.6173e-10, 1.5093e-10,\n",
       "             7.2864e-10],\n",
       "            [3.6097e-11, 5.2292e-11, 3.9414e-11,  ..., 4.9223e-11, 2.5329e-11,\n",
       "             1.4059e-10],\n",
       "            [3.6104e-11, 4.1320e-11, 3.8176e-11,  ..., 7.3453e-11, 9.0729e-11,\n",
       "             1.9391e-10]], device='cuda:0')},\n",
       "   41: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([ 9.7310e-07, -8.8279e-06, -1.0950e-05,  3.0376e-06,  2.1410e-06,\n",
       "             1.0153e-06,  2.0295e-06, -5.5877e-06,  2.6037e-07, -6.0716e-06,\n",
       "             1.7537e-07,  7.6747e-06, -1.5702e-06, -2.9119e-06,  7.2449e-06,\n",
       "             3.6960e-06, -1.0774e-05,  7.3688e-06, -7.8047e-08, -1.3984e-06,\n",
       "            -1.4925e-06, -1.0020e-05,  1.1314e-05, -4.8551e-07, -5.6821e-06,\n",
       "             9.4403e-06, -6.7253e-06,  6.3668e-06, -8.8266e-07, -4.5799e-06,\n",
       "            -1.6731e-06,  2.7884e-06, -5.4903e-06, -2.3122e-07, -5.7061e-08,\n",
       "            -1.7711e-06, -6.6392e-07, -1.5057e-07,  5.8507e-07,  5.7490e-06,\n",
       "             8.0774e-06,  4.9157e-06, -3.0762e-06, -5.7196e-06, -3.2426e-06,\n",
       "             8.4280e-06, -8.4454e-06, -3.8467e-06, -7.9081e-06, -2.9979e-06,\n",
       "             8.8746e-06, -3.2262e-06, -2.1607e-06, -4.3218e-06,  5.8094e-07,\n",
       "             5.3314e-06, -7.3794e-06,  5.0066e-06,  7.0423e-06,  4.0486e-06,\n",
       "            -2.6549e-06, -6.3340e-06, -2.0876e-06, -4.3360e-06, -3.4209e-06,\n",
       "            -6.3645e-06,  2.8922e-06, -4.8085e-07, -3.5534e-06,  2.9929e-06,\n",
       "            -1.4999e-06, -7.0612e-06, -2.2132e-06, -4.7874e-06, -3.2070e-06,\n",
       "            -2.2414e-07, -5.4030e-06,  4.6060e-06,  1.6573e-06,  3.9001e-10,\n",
       "             4.6828e-06,  6.3932e-06,  7.0072e-07,  3.0631e-06, -4.3388e-06,\n",
       "             3.0847e-06,  5.4981e-08,  7.7510e-07,  5.1089e-06, -3.1816e-06,\n",
       "            -1.9398e-06, -2.1384e-06,  5.0958e-07,  2.0342e-06,  2.6132e-06,\n",
       "            -1.4367e-06,  5.2009e-08, -1.0993e-06,  1.4513e-06, -2.4590e-06,\n",
       "            -7.1869e-06,  2.9731e-06,  2.0221e-06,  1.0187e-06, -6.9270e-06,\n",
       "            -3.5024e-06, -8.3382e-06, -3.7679e-07, -6.2175e-06,  2.8933e-06,\n",
       "            -5.9252e-06, -1.1537e-06,  1.9347e-06,  1.8217e-06, -2.9863e-06,\n",
       "            -3.7659e-06, -1.0089e-05, -9.2384e-06,  3.4959e-07, -8.7385e-07,\n",
       "            -3.5783e-06,  5.6084e-06,  1.0382e-06,  2.5434e-06,  2.1555e-06,\n",
       "            -1.0118e-05, -5.1790e-07, -7.4651e-07], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([1.8228e-10, 4.8697e-10, 7.6686e-10, 4.6317e-10, 2.8120e-10, 5.2681e-10,\n",
       "            3.1067e-10, 4.5861e-10, 4.9888e-10, 3.7839e-10, 4.1710e-10, 5.1711e-10,\n",
       "            7.9144e-10, 6.2940e-10, 4.3163e-10, 2.6103e-10, 1.3272e-09, 4.8820e-10,\n",
       "            3.2673e-10, 3.3959e-10, 4.3617e-10, 2.9450e-10, 5.9758e-10, 1.9704e-10,\n",
       "            3.3647e-10, 5.6134e-10, 3.8834e-10, 5.1618e-10, 5.0952e-10, 2.1266e-10,\n",
       "            3.4712e-10, 2.9543e-10, 6.9290e-10, 5.8371e-10, 6.9579e-10, 2.7300e-10,\n",
       "            2.5711e-10, 3.8144e-10, 2.6579e-10, 2.3356e-10, 3.4820e-10, 4.2650e-10,\n",
       "            2.9517e-10, 4.1597e-10, 3.9237e-10, 8.8550e-10, 1.0280e-09, 4.6719e-10,\n",
       "            3.7965e-10, 4.2731e-10, 6.2279e-10, 4.5942e-10, 2.6476e-10, 2.6870e-10,\n",
       "            8.6653e-10, 2.3822e-10, 3.5274e-10, 2.6586e-10, 5.4696e-10, 2.8785e-10,\n",
       "            4.2403e-10, 3.5007e-10, 6.9341e-10, 4.5948e-10, 4.0914e-10, 9.3536e-10,\n",
       "            4.3688e-10, 3.8188e-10, 2.3228e-10, 3.8965e-10, 1.8424e-10, 2.8856e-10,\n",
       "            3.2015e-10, 8.6345e-10, 2.1693e-10, 3.1684e-10, 5.4423e-10, 3.1828e-10,\n",
       "            3.4495e-10, 1.9313e-10, 3.2446e-10, 2.6782e-10, 5.2080e-10, 8.3053e-10,\n",
       "            6.4668e-10, 2.6153e-10, 3.6822e-10, 3.1392e-10, 4.2858e-10, 4.5635e-10,\n",
       "            3.8917e-10, 2.4237e-10, 3.8311e-10, 3.9919e-10, 8.8788e-10, 6.2314e-10,\n",
       "            2.6780e-10, 2.6730e-10, 2.8341e-10, 6.2562e-10, 5.5459e-10, 4.8606e-10,\n",
       "            3.0951e-10, 2.3214e-10, 7.7814e-10, 9.2476e-10, 2.1508e-09, 2.7165e-10,\n",
       "            2.9382e-10, 3.0849e-10, 3.3613e-10, 3.5976e-10, 4.9761e-10, 2.9341e-10,\n",
       "            4.0735e-10, 4.8266e-10, 6.4856e-10, 8.8158e-10, 5.7130e-10, 2.7905e-10,\n",
       "            5.5692e-10, 3.7386e-10, 1.6086e-10, 1.7926e-10, 4.1958e-10, 8.8300e-10,\n",
       "            2.0486e-10, 3.6807e-10], device='cuda:0')},\n",
       "   42: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([ 9.8526e-07, -1.2036e-07,  4.9844e-07,  4.2497e-07,  5.5844e-06,\n",
       "            -2.5853e-06, -1.4106e-06,  1.5300e-06,  1.3921e-05,  4.8644e-08,\n",
       "             2.6507e-06,  2.4478e-06,  7.7273e-08,  3.1753e-07,  2.1169e-06,\n",
       "            -5.5046e-07, -4.9614e-07,  6.5928e-07,  2.2124e-06, -6.8685e-08,\n",
       "             5.9166e-06, -6.5634e-07, -2.5050e-06, -4.6608e-07, -2.0344e-07,\n",
       "            -1.9428e-06, -3.6221e-07,  1.6760e-07,  7.8898e-07, -4.0181e-06,\n",
       "             9.2945e-07,  1.0805e-05,  1.1527e-06, -1.3873e-06,  3.1704e-06,\n",
       "            -3.6016e-07, -9.0836e-07,  1.2284e-06,  2.3322e-06, -1.8375e-07,\n",
       "            -3.1588e-07,  6.9525e-07, -2.6622e-07,  3.2751e-07,  4.7328e-07,\n",
       "            -3.4993e-07,  1.1790e-06, -6.2998e-07, -3.8950e-08, -3.8945e-07,\n",
       "            -3.5006e-06,  3.3467e-07,  1.8158e-06, -2.5744e-08,  6.8137e-07,\n",
       "             2.8868e-07, -3.1038e-07, -9.4195e-07,  1.4863e-06,  1.3985e-06,\n",
       "             1.3595e-06, -1.5258e-07, -5.2880e-07,  4.0142e-06,  1.7388e-06,\n",
       "             4.9881e-07, -1.4901e-07, -1.1775e-06,  4.9230e-08, -3.3012e-07,\n",
       "            -8.5894e-08,  6.0206e-08,  9.9365e-07,  1.2538e-06, -3.8108e-08,\n",
       "             5.4589e-07, -3.4608e-07, -5.8467e-07,  1.9818e-06, -4.3227e-06,\n",
       "            -6.1494e-07,  3.9677e-08,  3.3098e-06, -8.6213e-08,  2.2891e-06,\n",
       "             1.0677e-06,  9.8511e-07, -9.2484e-07,  1.0566e-06,  4.1141e-07,\n",
       "            -1.3151e-07, -4.3191e-07,  3.3344e-07,  1.0895e-06,  5.4113e-07,\n",
       "             5.4646e-08,  6.3791e-07, -5.7382e-07, -1.2802e-06,  4.1604e-07,\n",
       "             2.7723e-07, -2.3748e-07, -4.1368e-07,  6.4342e-07,  6.4819e-07,\n",
       "             3.9696e-09,  3.8486e-06, -1.3045e-06,  3.0369e-07, -3.5902e-06,\n",
       "             6.3115e-06, -2.0252e-07, -4.8601e-06, -3.1228e-08,  2.9336e-07,\n",
       "             1.1125e-06,  8.5312e-07,  9.1385e-08,  4.1397e-06,  2.1190e-07,\n",
       "             2.4888e-06, -4.8657e-07,  8.8214e-07, -1.6592e-06, -3.4165e-07,\n",
       "            -3.0021e-07,  8.7388e-08, -5.9745e-07], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([1.0191e-10, 9.9767e-11, 8.9383e-12, 2.2735e-11, 3.4826e-10, 2.1852e-11,\n",
       "            7.8217e-12, 2.6078e-10, 1.7525e-10, 1.8368e-11, 3.2768e-10, 7.0608e-11,\n",
       "            1.1862e-11, 1.2001e-11, 1.9394e-11, 4.7434e-11, 8.4745e-12, 7.1406e-11,\n",
       "            2.6171e-11, 2.6846e-12, 2.4983e-09, 4.1270e-11, 1.8307e-10, 8.6813e-12,\n",
       "            8.8710e-12, 2.7792e-11, 1.2579e-11, 3.2143e-12, 3.6457e-11, 1.0159e-09,\n",
       "            1.3871e-11, 9.8863e-10, 2.8005e-11, 3.4967e-11, 4.9082e-11, 2.9749e-11,\n",
       "            3.8445e-12, 4.1523e-11, 1.3250e-11, 1.8777e-11, 4.7646e-11, 1.6676e-11,\n",
       "            2.4833e-11, 5.5612e-11, 3.1225e-11, 3.1652e-12, 1.3677e-11, 1.7168e-11,\n",
       "            1.5478e-11, 1.0244e-11, 2.1723e-10, 1.2168e-11, 1.7551e-11, 8.9005e-12,\n",
       "            1.8345e-11, 1.3583e-11, 6.5709e-12, 1.8161e-10, 1.5515e-11, 5.8170e-11,\n",
       "            5.3660e-12, 3.1611e-11, 2.3095e-11, 4.8794e-11, 5.2683e-10, 1.5367e-10,\n",
       "            2.0360e-11, 3.3859e-11, 1.9669e-11, 1.1358e-11, 2.6123e-11, 1.4723e-11,\n",
       "            1.0583e-11, 2.5102e-11, 7.0362e-11, 3.9866e-11, 1.3340e-11, 1.1555e-11,\n",
       "            1.5655e-10, 8.2997e-11, 1.2308e-11, 1.6625e-11, 9.2189e-11, 5.9131e-12,\n",
       "            2.0587e-11, 2.2155e-11, 2.5668e-11, 4.0185e-11, 3.8949e-11, 1.0779e-11,\n",
       "            4.7020e-12, 7.0251e-12, 2.0265e-11, 8.8993e-12, 3.1669e-11, 5.6282e-12,\n",
       "            1.5793e-11, 9.7426e-12, 5.1132e-11, 2.3913e-11, 8.3267e-12, 2.9893e-12,\n",
       "            6.2259e-12, 4.2719e-11, 8.0891e-11, 8.6834e-12, 2.3778e-09, 1.5306e-11,\n",
       "            1.1934e-11, 1.1062e-10, 1.9146e-10, 2.4493e-11, 1.4593e-09, 1.9514e-11,\n",
       "            1.4914e-11, 4.1987e-11, 6.9192e-11, 1.5933e-11, 9.4220e-11, 6.4296e-12,\n",
       "            2.8080e-11, 2.4125e-11, 2.1433e-11, 4.1101e-11, 2.8865e-12, 3.8027e-11,\n",
       "            2.7048e-11, 1.0992e-11], device='cuda:0')},\n",
       "   43: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-7.7851e-08,  7.2582e-08, -2.5609e-08,  ...,  3.0194e-09,\n",
       "             -3.0540e-08,  4.3474e-08],\n",
       "            [-5.7157e-08,  3.3389e-08,  7.1094e-08,  ...,  7.0018e-08,\n",
       "              3.7013e-08, -5.0216e-08],\n",
       "            [-2.1221e-07,  2.6430e-07,  3.0814e-07,  ...,  1.6411e-07,\n",
       "              1.3842e-07, -6.5850e-08],\n",
       "            ...,\n",
       "            [-4.9747e-07,  4.7315e-07,  3.1145e-07,  ...,  4.7358e-07,\n",
       "              3.3969e-07, -7.9144e-08],\n",
       "            [-1.5300e-07,  2.3386e-07, -7.1588e-08,  ..., -6.2068e-09,\n",
       "              3.1355e-08,  2.6548e-07],\n",
       "            [ 5.8664e-07, -5.5814e-07, -3.4151e-07,  ..., -2.9449e-07,\n",
       "             -3.1067e-07, -6.9152e-08]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[2.0755e-13, 2.8241e-13, 7.0024e-14,  ..., 5.9170e-14, 4.9870e-14,\n",
       "             2.1699e-13],\n",
       "            [1.5485e-13, 1.8320e-13, 3.2862e-14,  ..., 3.9491e-14, 3.3807e-14,\n",
       "             9.9377e-14],\n",
       "            [2.2688e-13, 2.0010e-13, 5.7376e-14,  ..., 5.0170e-14, 6.4008e-14,\n",
       "             6.7821e-14],\n",
       "            ...,\n",
       "            [1.8663e-12, 2.2979e-12, 5.8350e-13,  ..., 3.1938e-13, 2.1450e-13,\n",
       "             2.0346e-12],\n",
       "            [5.0557e-13, 5.7189e-13, 1.7595e-13,  ..., 1.4497e-13, 1.2672e-13,\n",
       "             4.2433e-13],\n",
       "            [3.4529e-13, 3.0808e-13, 9.2420e-14,  ..., 1.0250e-13, 1.2607e-13,\n",
       "             2.2176e-13]], device='cuda:0')},\n",
       "   44: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 1.3798e-07,  2.8209e-08, -9.1720e-08,  ...,  8.7705e-08,\n",
       "             -8.7748e-08,  8.6288e-08],\n",
       "            [-3.9778e-08, -9.4185e-09, -3.0249e-08,  ...,  6.1038e-08,\n",
       "              1.1791e-08,  3.9343e-07],\n",
       "            [-1.8640e-07,  3.4795e-08,  2.8075e-07,  ...,  2.7688e-07,\n",
       "              2.7650e-07, -5.5887e-07],\n",
       "            ...,\n",
       "            [ 7.3694e-07,  1.4932e-07,  1.2798e-07,  ...,  2.7620e-07,\n",
       "              2.3261e-07, -9.2202e-07],\n",
       "            [-4.2001e-07,  3.7718e-07, -3.9345e-07,  ...,  1.1448e-06,\n",
       "             -2.8533e-07,  4.8815e-07],\n",
       "            [ 4.0680e-07, -3.4267e-07,  1.5158e-07,  ..., -9.5875e-07,\n",
       "             -4.9661e-07, -1.8120e-06]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[3.2752e-12, 1.5715e-13, 2.8083e-13,  ..., 8.3800e-13, 2.5723e-13,\n",
       "             1.7271e-13],\n",
       "            [4.6478e-13, 4.2565e-13, 7.2687e-14,  ..., 1.0081e-12, 1.2947e-13,\n",
       "             3.6420e-13],\n",
       "            [8.2784e-13, 1.9517e-13, 1.8622e-13,  ..., 8.3684e-13, 1.8741e-13,\n",
       "             3.6088e-13],\n",
       "            ...,\n",
       "            [5.5032e-12, 2.5805e-12, 1.0952e-12,  ..., 9.5320e-12, 1.0232e-12,\n",
       "             1.7147e-12],\n",
       "            [1.8863e-12, 1.2588e-12, 5.9072e-13,  ..., 6.6607e-12, 1.1756e-12,\n",
       "             1.2628e-12],\n",
       "            [1.8055e-12, 5.2072e-13, 4.0340e-13,  ..., 2.2970e-12, 4.5073e-13,\n",
       "             3.6952e-12]], device='cuda:0')},\n",
       "   45: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 8.4611e-07, -6.6790e-07,  4.4470e-07,  ..., -1.0436e-07,\n",
       "             -1.8088e-07, -4.0116e-07],\n",
       "            [ 1.8792e-08,  2.3290e-07,  2.2099e-07,  ..., -1.4276e-07,\n",
       "              3.4063e-07,  1.3964e-07],\n",
       "            [ 2.4489e-08, -1.7470e-07,  8.6543e-07,  ...,  7.4673e-07,\n",
       "              2.3109e-07, -8.7503e-07],\n",
       "            ...,\n",
       "            [-1.1421e-09, -1.0979e-07, -6.3122e-07,  ..., -3.5436e-07,\n",
       "              2.4397e-08,  1.2084e-07],\n",
       "            [-4.1150e-07,  4.0672e-07,  1.0152e-06,  ...,  9.3397e-07,\n",
       "              6.0713e-07, -3.4137e-07],\n",
       "            [-3.9272e-07,  5.2308e-07, -1.1839e-06,  ..., -7.3108e-07,\n",
       "             -8.7337e-09,  1.1030e-06]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[1.4803e-11, 2.6361e-11, 3.6690e-12,  ..., 3.5585e-12, 4.9748e-12,\n",
       "             1.3784e-11],\n",
       "            [2.7307e-11, 3.2489e-11, 8.5903e-12,  ..., 5.6006e-12, 3.8201e-12,\n",
       "             2.3843e-11],\n",
       "            [1.1121e-12, 5.9170e-13, 6.3284e-13,  ..., 2.0842e-13, 7.1488e-13,\n",
       "             1.3958e-12],\n",
       "            ...,\n",
       "            [6.7390e-12, 8.8574e-12, 1.9287e-12,  ..., 1.8539e-12, 2.3262e-12,\n",
       "             5.2225e-12],\n",
       "            [1.0954e-11, 1.0883e-11, 2.9587e-12,  ..., 2.1002e-12, 1.7735e-12,\n",
       "             4.2186e-12],\n",
       "            [2.2406e-12, 2.5043e-12, 1.0204e-12,  ..., 6.2573e-13, 7.2357e-13,\n",
       "             2.1358e-12]], device='cuda:0')},\n",
       "   46: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-1.4960e-06, -7.4870e-09,  5.8636e-07,  ...,  2.4545e-07,\n",
       "              3.1060e-07, -1.4704e-06],\n",
       "            [-1.9892e-06, -2.2646e-07,  4.9659e-07,  ...,  5.1468e-07,\n",
       "              9.8776e-07, -1.3783e-06],\n",
       "            [-1.3228e-06, -7.0753e-08,  5.7818e-07,  ..., -5.9731e-07,\n",
       "              7.3153e-07, -5.2306e-07],\n",
       "            ...,\n",
       "            [ 3.8650e-06, -3.3015e-07,  5.2762e-07,  ..., -1.8947e-06,\n",
       "              5.2170e-07,  3.6012e-06],\n",
       "            [ 1.8058e-06, -5.3722e-08,  5.5285e-07,  ..., -1.6056e-06,\n",
       "              3.6865e-07, -1.4008e-06],\n",
       "            [ 1.3703e-06,  4.5261e-07, -1.2067e-06,  ...,  2.2695e-07,\n",
       "             -1.1430e-06,  4.9399e-07]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[6.9874e-10, 8.1145e-12, 4.3132e-11,  ..., 9.3752e-11, 2.4664e-11,\n",
       "             1.3718e-11],\n",
       "            [5.5467e-11, 4.4412e-11, 1.3782e-11,  ..., 1.3978e-10, 1.7041e-11,\n",
       "             1.2343e-11],\n",
       "            [2.6421e-12, 1.1172e-12, 1.0085e-12,  ..., 2.6239e-12, 1.0534e-12,\n",
       "             7.7254e-13],\n",
       "            ...,\n",
       "            [1.0215e-10, 8.3785e-12, 1.1517e-11,  ..., 4.3609e-11, 9.8847e-12,\n",
       "             1.7662e-11],\n",
       "            [2.7687e-11, 8.8360e-12, 5.4637e-12,  ..., 3.8608e-11, 7.4919e-12,\n",
       "             1.9993e-11],\n",
       "            [1.0180e-11, 4.4754e-12, 4.4173e-12,  ..., 1.7367e-11, 4.7580e-12,\n",
       "             2.0750e-11]], device='cuda:0')},\n",
       "   47: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 2.6888e-07, -3.9870e-08, -1.1848e-06,  ..., -8.5449e-07,\n",
       "             -4.8494e-07,  1.1573e-06],\n",
       "            [-4.7463e-07,  6.6912e-07, -2.0049e-06,  ..., -1.3576e-06,\n",
       "             -7.5507e-07,  2.3865e-06],\n",
       "            [-3.5353e-07,  2.5339e-07,  2.5219e-07,  ...,  3.6911e-07,\n",
       "              1.5137e-07, -1.3169e-07],\n",
       "            ...,\n",
       "            [-9.9050e-07,  9.4146e-07,  1.4023e-06,  ...,  1.3610e-06,\n",
       "              8.3161e-07, -5.5105e-07],\n",
       "            [ 1.0664e-06, -1.0802e-06, -9.9739e-07,  ..., -6.0424e-07,\n",
       "             -1.3142e-06, -1.1477e-07],\n",
       "            [ 6.8702e-07, -5.9013e-07, -1.7947e-06,  ..., -1.0583e-06,\n",
       "             -6.7517e-07,  8.8046e-07]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[2.3745e-11, 1.7922e-11, 4.4300e-12,  ..., 3.4318e-12, 1.4404e-11,\n",
       "             1.2476e-11],\n",
       "            [1.0561e-10, 9.1132e-11, 2.4262e-11,  ..., 2.2489e-11, 2.1780e-11,\n",
       "             3.8644e-11],\n",
       "            [1.8375e-11, 5.2783e-12, 1.2149e-12,  ..., 9.6452e-13, 1.1591e-11,\n",
       "             4.3720e-12],\n",
       "            ...,\n",
       "            [2.6664e-11, 2.0685e-11, 4.9490e-12,  ..., 3.6721e-12, 1.0005e-11,\n",
       "             1.5636e-11],\n",
       "            [4.0057e-11, 2.4550e-11, 7.3505e-12,  ..., 4.0454e-12, 3.8210e-11,\n",
       "             2.5562e-11],\n",
       "            [5.0074e-11, 3.1899e-11, 5.3677e-12,  ..., 7.0994e-12, 2.6985e-11,\n",
       "             1.7469e-11]], device='cuda:0')},\n",
       "   48: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 5.1197e-08,  2.0191e-07, -9.7063e-07,  ...,  1.6783e-06,\n",
       "             -4.2103e-07,  1.3576e-07],\n",
       "            [ 1.7759e-07,  6.0830e-08, -3.2906e-06,  ...,  3.5801e-06,\n",
       "             -2.9142e-06,  1.0398e-07],\n",
       "            [-8.7764e-09,  3.4153e-08, -2.9064e-07,  ..., -8.5743e-08,\n",
       "             -2.9406e-07, -1.2632e-07],\n",
       "            ...,\n",
       "            [-1.7522e-07, -1.3176e-08,  5.3482e-07,  ...,  1.3185e-06,\n",
       "              7.8093e-07, -4.6347e-07],\n",
       "            [ 8.5757e-08,  4.5921e-08,  8.5761e-07,  ...,  1.2060e-07,\n",
       "              6.1750e-07, -3.1379e-08],\n",
       "            [ 2.1524e-07, -7.7340e-08, -1.0363e-07,  ..., -1.5720e-06,\n",
       "             -8.1858e-07,  2.5396e-07]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[1.6789e-12, 2.0588e-12, 3.2199e-12,  ..., 1.1240e-11, 2.5546e-12,\n",
       "             2.1377e-12],\n",
       "            [5.5528e-12, 3.8997e-12, 3.0808e-11,  ..., 2.5027e-10, 2.6250e-11,\n",
       "             6.9083e-12],\n",
       "            [1.3586e-12, 1.5714e-12, 1.7868e-12,  ..., 7.7477e-12, 1.3314e-12,\n",
       "             1.2910e-12],\n",
       "            ...,\n",
       "            [1.8407e-12, 1.0955e-12, 7.8090e-12,  ..., 6.5015e-11, 3.4995e-12,\n",
       "             3.6683e-12],\n",
       "            [4.0644e-12, 2.5143e-12, 1.8742e-11,  ..., 4.6121e-11, 1.1694e-11,\n",
       "             5.0241e-12],\n",
       "            [7.1239e-12, 2.9050e-12, 7.9695e-12,  ..., 4.1562e-11, 3.3007e-12,\n",
       "             1.1234e-11]], device='cuda:0')},\n",
       "   49: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([ 8.3567e-07, -1.3682e-06, -6.1147e-07,  1.0851e-07,  1.6355e-06,\n",
       "            -3.6779e-07, -6.1728e-07, -1.8393e-06,  8.8883e-07, -1.2490e-06,\n",
       "             1.6186e-06,  3.8247e-06,  7.7757e-07,  7.9761e-07,  2.6371e-08,\n",
       "            -3.2830e-08,  5.6798e-07, -9.6259e-08, -8.1332e-07, -1.3134e-06,\n",
       "             1.3967e-06, -1.0091e-06, -4.7166e-08, -1.9017e-06, -2.8552e-06,\n",
       "            -2.1657e-06,  3.8265e-08,  1.8520e-06,  6.0787e-07,  3.4137e-06,\n",
       "            -8.5759e-07,  1.3929e-06,  1.1770e-06,  4.3824e-07,  1.5920e-06,\n",
       "            -1.6985e-06, -6.0359e-07, -1.9536e-07, -2.6122e-06, -8.2180e-07,\n",
       "            -2.0316e-08, -1.0156e-06, -1.5211e-07,  3.5206e-06,  5.1531e-07,\n",
       "            -2.0969e-06,  4.6615e-07,  1.9696e-07,  1.9020e-06, -3.2254e-07,\n",
       "            -5.2663e-07, -2.2773e-07,  2.2033e-07,  3.7820e-06,  6.5095e-07,\n",
       "             8.7486e-07, -6.1698e-08,  4.1074e-06, -9.6800e-08,  3.4289e-07,\n",
       "            -6.8108e-07, -1.5490e-06,  1.5616e-07, -1.0247e-06,  4.2098e-07,\n",
       "            -1.4968e-07,  9.6624e-07,  1.4977e-06, -1.2347e-06,  1.0707e-06,\n",
       "             3.8351e-07, -5.3385e-07, -1.4372e-06,  1.3370e-06,  1.3770e-06,\n",
       "             1.1529e-06,  9.3217e-07,  1.8645e-06,  7.8343e-07, -4.5510e-06,\n",
       "             2.0849e-07,  1.1019e-06,  6.2096e-07, -1.6285e-07, -1.2109e-06,\n",
       "             5.2394e-07,  1.3804e-06, -6.9842e-07,  1.1972e-06,  7.7652e-07,\n",
       "            -1.2188e-06, -1.1550e-06,  1.9938e-06,  2.1936e-07, -4.4792e-07,\n",
       "            -2.3955e-06,  1.6550e-06,  1.0521e-06, -1.1248e-06, -3.5262e-07,\n",
       "             5.5811e-07,  2.5820e-06, -1.0727e-06,  2.0227e-07, -5.4144e-07,\n",
       "             1.3353e-06, -1.0446e-05,  2.7818e-08, -1.6186e-06, -1.2562e-06,\n",
       "             1.0293e-05, -1.4746e-06, -6.2986e-06, -2.4973e-07,  1.4682e-06,\n",
       "             1.9916e-06,  1.8652e-07, -4.1098e-07, -5.4567e-07,  2.0917e-07,\n",
       "            -1.9847e-07, -1.0215e-06, -7.0175e-08, -4.9539e-07,  1.9875e-07,\n",
       "             7.6074e-07,  1.1377e-06, -5.4140e-07], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([3.8183e-11, 2.3078e-11, 1.9213e-11, 3.2822e-11, 7.4957e-11, 8.2416e-12,\n",
       "            1.6500e-11, 4.3122e-11, 1.5583e-10, 1.4905e-10, 1.6770e-11, 7.5409e-10,\n",
       "            3.1118e-11, 2.7177e-11, 9.5047e-12, 1.5268e-11, 9.2166e-12, 2.7912e-11,\n",
       "            5.7183e-11, 1.7035e-11, 3.3200e-10, 3.6131e-11, 2.2428e-11, 4.5726e-11,\n",
       "            1.5854e-11, 7.2392e-11, 2.5865e-11, 2.9970e-11, 7.6865e-12, 1.2025e-09,\n",
       "            1.7648e-11, 8.2024e-10, 9.1875e-11, 3.7990e-11, 4.5619e-11, 3.7132e-11,\n",
       "            1.7174e-11, 6.2614e-12, 1.9153e-11, 2.6975e-11, 1.9705e-11, 8.5898e-12,\n",
       "            2.2083e-11, 8.2597e-11, 1.4632e-11, 4.9829e-11, 2.3309e-11, 1.8508e-11,\n",
       "            2.9413e-11, 1.7153e-11, 1.9416e-11, 7.0156e-11, 2.4940e-11, 9.1549e-11,\n",
       "            1.1302e-11, 2.1240e-11, 5.7416e-12, 1.4454e-10, 2.9701e-12, 4.5426e-11,\n",
       "            2.5707e-11, 8.5578e-11, 1.9042e-11, 5.8674e-11, 1.5848e-10, 3.0728e-11,\n",
       "            2.0871e-11, 7.1073e-11, 1.1972e-10, 5.1797e-11, 6.1914e-11, 5.7809e-11,\n",
       "            1.8984e-11, 1.8678e-11, 5.7466e-11, 2.7375e-11, 5.5536e-11, 2.1070e-10,\n",
       "            3.8198e-11, 1.4890e-10, 2.8419e-11, 5.4961e-11, 3.6593e-11, 3.1936e-11,\n",
       "            1.7533e-11, 4.6124e-11, 4.1761e-11, 1.0618e-10, 1.4458e-11, 1.5762e-11,\n",
       "            1.9280e-11, 7.4682e-12, 3.0558e-11, 6.7726e-12, 4.1655e-11, 2.4028e-11,\n",
       "            1.1228e-11, 1.1316e-10, 1.7737e-11, 5.3931e-12, 6.7151e-11, 7.3840e-11,\n",
       "            2.0106e-11, 2.0216e-11, 2.2152e-11, 1.7579e-11, 2.5494e-09, 1.5772e-11,\n",
       "            7.2978e-11, 5.4603e-11, 5.5678e-10, 4.1202e-11, 4.1132e-10, 3.4486e-11,\n",
       "            1.6523e-11, 4.4455e-11, 2.0365e-11, 1.1469e-11, 4.5411e-11, 1.2271e-11,\n",
       "            2.5349e-11, 5.6739e-11, 1.8583e-11, 9.9741e-12, 2.3682e-11, 2.5578e-11,\n",
       "            5.3749e-11, 3.2655e-11], device='cuda:0')},\n",
       "   50: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 2.9490e-08,  2.0560e-15, -2.9234e-08,  ...,  9.2643e-08,\n",
       "              2.7093e-08, -7.5612e-08],\n",
       "            [ 1.4137e-08,  5.7953e-16, -1.1108e-07,  ...,  2.0520e-09,\n",
       "              1.2918e-08,  2.2080e-08],\n",
       "            [-3.8897e-08, -5.2966e-15,  9.2299e-10,  ...,  3.8348e-09,\n",
       "             -6.1684e-08, -1.9091e-07],\n",
       "            ...,\n",
       "            [-5.4707e-08,  2.7223e-15, -5.1968e-08,  ...,  6.5197e-08,\n",
       "             -6.3557e-08,  1.2613e-07],\n",
       "            [-9.9028e-09,  1.0277e-14,  2.7257e-07,  ...,  6.3673e-08,\n",
       "              2.1524e-07,  1.4310e-07],\n",
       "            [ 9.3535e-09,  3.3814e-16,  1.8862e-07,  ...,  7.1737e-08,\n",
       "              1.1520e-07,  2.1365e-07]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[7.7547e-13, 7.8456e-18, 5.5970e-12,  ..., 5.2509e-14, 3.2714e-12,\n",
       "             3.2281e-12],\n",
       "            [3.2516e-13, 9.3247e-19, 4.7674e-13,  ..., 7.2535e-15, 2.8538e-13,\n",
       "             3.8705e-13],\n",
       "            [2.1062e-13, 1.4251e-17, 1.6570e-12,  ..., 1.2955e-14, 9.5585e-13,\n",
       "             1.0165e-12],\n",
       "            ...,\n",
       "            [2.4323e-12, 9.5818e-19, 2.7589e-13,  ..., 1.0433e-13, 6.4759e-14,\n",
       "             2.3853e-12],\n",
       "            [3.4973e-13, 9.0449e-18, 2.4605e-13,  ..., 7.2141e-15, 1.3267e-13,\n",
       "             1.1196e-12],\n",
       "            [4.6176e-13, 4.7964e-19, 1.0130e-12,  ..., 1.7601e-14, 5.4627e-13,\n",
       "             6.5272e-13]], device='cuda:0')},\n",
       "   51: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 8.3142e-08,  7.9091e-08,  9.3612e-08,  ...,  2.1525e-08,\n",
       "              1.4770e-07,  3.6844e-08],\n",
       "            [ 3.3652e-08,  1.5511e-07,  2.6852e-08,  ...,  8.3573e-08,\n",
       "             -2.5377e-07, -8.4883e-08],\n",
       "            [ 1.0632e-07, -7.7458e-08,  3.2013e-08,  ..., -3.3371e-07,\n",
       "              2.5360e-07, -7.3406e-08],\n",
       "            ...,\n",
       "            [-3.5156e-07, -1.8468e-07,  7.5079e-08,  ...,  2.1140e-07,\n",
       "              3.3523e-08,  4.1439e-07],\n",
       "            [ 1.8473e-07, -7.1749e-08,  6.4273e-08,  ...,  1.0650e-08,\n",
       "              1.1959e-07,  1.8326e-08],\n",
       "            [ 5.6848e-08, -5.7042e-09,  2.9105e-08,  ...,  1.1083e-07,\n",
       "              7.4573e-08,  3.5467e-07]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[5.5612e-12, 4.1889e-13, 1.5175e-12,  ..., 6.0827e-13, 5.0035e-13,\n",
       "             6.9921e-13],\n",
       "            [4.1351e-13, 8.8080e-14, 1.2831e-13,  ..., 7.5678e-14, 6.7311e-14,\n",
       "             5.0739e-14],\n",
       "            [1.4615e-12, 1.9914e-13, 6.3889e-13,  ..., 3.3919e-13, 2.8519e-13,\n",
       "             4.8879e-13],\n",
       "            ...,\n",
       "            [1.0050e-12, 8.0774e-13, 3.0340e-13,  ..., 2.3083e-12, 1.3157e-12,\n",
       "             6.8577e-13],\n",
       "            [1.0921e-12, 2.9177e-13, 3.7673e-13,  ..., 2.0001e-12, 1.3466e-12,\n",
       "             6.9446e-13],\n",
       "            [7.7551e-13, 8.6759e-14, 1.8911e-13,  ..., 1.3682e-13, 1.1058e-13,\n",
       "             3.9069e-13]], device='cuda:0')},\n",
       "   52: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-1.2419e-08, -6.3603e-15, -9.7880e-07,  ...,  6.8010e-08,\n",
       "             -9.7021e-07, -9.5368e-07],\n",
       "            [-4.1210e-08,  1.2511e-14,  3.8095e-07,  ...,  1.7282e-07,\n",
       "              3.5489e-07,  1.0301e-06],\n",
       "            [ 9.1387e-08,  2.3468e-15, -9.0215e-07,  ..., -3.5219e-07,\n",
       "             -9.6775e-07, -5.2562e-07],\n",
       "            ...,\n",
       "            [ 6.0457e-08, -4.7275e-15, -1.0380e-06,  ..., -1.8251e-09,\n",
       "             -8.7203e-07, -8.2723e-07],\n",
       "            [ 1.8970e-08, -3.0841e-14, -1.0843e-06,  ..., -2.1737e-07,\n",
       "             -9.1250e-07, -1.3605e-06],\n",
       "            [ 6.5371e-08, -2.1879e-15,  5.8215e-07,  ..., -3.9991e-07,\n",
       "              5.9951e-07,  5.4318e-07]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[4.1186e-12, 1.3486e-17, 9.8541e-11,  ..., 9.7482e-13, 4.7613e-11,\n",
       "             3.8272e-11],\n",
       "            [3.9998e-13, 1.8289e-17, 3.1992e-12,  ..., 1.9817e-13, 2.0733e-12,\n",
       "             7.4011e-12],\n",
       "            [2.4883e-12, 6.2629e-17, 1.7493e-11,  ..., 7.8236e-14, 8.3909e-12,\n",
       "             1.0114e-11],\n",
       "            ...,\n",
       "            [3.0156e-11, 1.5822e-17, 1.7809e-11,  ..., 7.4842e-14, 9.9976e-12,\n",
       "             2.0030e-11],\n",
       "            [3.2404e-11, 9.2833e-16, 5.8048e-12,  ..., 1.6486e-13, 3.5226e-12,\n",
       "             1.3503e-11],\n",
       "            [5.2698e-12, 1.6971e-17, 6.4523e-11,  ..., 7.9654e-13, 2.8692e-11,\n",
       "             2.6191e-11]], device='cuda:0')},\n",
       "   53: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-1.0169e-06,  2.7707e-07, -7.8089e-07,  ..., -1.0245e-07,\n",
       "              2.6286e-07, -8.3751e-08],\n",
       "            [-6.8189e-07, -2.1007e-07,  1.0814e-06,  ...,  9.2439e-07,\n",
       "              7.4027e-07, -1.2937e-07],\n",
       "            [-1.1763e-06,  4.3953e-08,  4.8927e-08,  ...,  1.2644e-06,\n",
       "              4.3203e-07, -1.8211e-07],\n",
       "            ...,\n",
       "            [-8.6519e-07,  3.3103e-07, -8.4394e-07,  ..., -1.3113e-07,\n",
       "             -3.1536e-07,  2.5010e-08],\n",
       "            [-5.8152e-07,  8.3472e-10, -1.9351e-07,  ..., -2.2242e-07,\n",
       "             -1.3010e-07, -5.3914e-07],\n",
       "            [ 7.1334e-07, -8.9983e-09,  8.7389e-08,  ..., -3.2399e-07,\n",
       "             -4.8351e-07, -6.9869e-07]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[7.5844e-11, 2.6550e-12, 1.5942e-11,  ..., 2.3444e-12, 3.4569e-12,\n",
       "             2.8743e-12],\n",
       "            [7.9886e-12, 8.7887e-13, 2.2871e-12,  ..., 1.4341e-11, 4.5198e-12,\n",
       "             8.9489e-13],\n",
       "            [1.5151e-11, 2.1575e-12, 4.8851e-12,  ..., 6.6689e-12, 3.7206e-12,\n",
       "             4.6228e-12],\n",
       "            ...,\n",
       "            [1.6420e-11, 1.1972e-12, 4.0452e-12,  ..., 3.8443e-12, 2.7435e-12,\n",
       "             1.8975e-12],\n",
       "            [1.5384e-11, 2.3611e-12, 6.5857e-12,  ..., 2.5002e-11, 1.0758e-11,\n",
       "             3.6540e-12],\n",
       "            [4.7050e-11, 3.7386e-12, 1.2139e-11,  ..., 6.6314e-12, 4.4301e-12,\n",
       "             3.6806e-11]], device='cuda:0')},\n",
       "   54: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-2.7703e-09,  6.9861e-15,  8.9906e-07,  ..., -7.5513e-08,\n",
       "              6.8070e-07,  1.5084e-06],\n",
       "            [ 4.6626e-07, -6.8045e-15, -2.2560e-07,  ...,  8.0295e-07,\n",
       "             -1.0509e-07,  1.9015e-07],\n",
       "            [ 5.9041e-07, -8.5063e-15,  2.3629e-06,  ...,  7.7444e-07,\n",
       "              1.8596e-06,  1.9998e-06],\n",
       "            ...,\n",
       "            [-1.6885e-07,  4.7165e-14,  7.4953e-07,  ..., -1.5141e-07,\n",
       "              6.3441e-07,  4.3350e-07],\n",
       "            [ 4.1612e-07, -9.9593e-15, -7.0920e-06,  ..., -6.0661e-07,\n",
       "             -6.3640e-06, -6.0886e-06],\n",
       "            [-1.3888e-07, -2.7940e-14,  2.2091e-06,  ...,  1.7334e-07,\n",
       "              5.3948e-07, -3.7760e-07]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[1.8856e-10, 4.0375e-16, 6.8354e-11,  ..., 9.6582e-13, 4.2597e-11,\n",
       "             9.6751e-11],\n",
       "            [1.4424e-10, 2.4129e-15, 3.1968e-10,  ..., 1.4341e-12, 1.3638e-10,\n",
       "             1.8567e-10],\n",
       "            [5.0311e-10, 2.1150e-16, 4.5535e-11,  ..., 1.0472e-12, 2.1188e-11,\n",
       "             1.5059e-10],\n",
       "            ...,\n",
       "            [2.8999e-10, 2.3491e-16, 1.2248e-11,  ..., 4.0570e-13, 8.5444e-12,\n",
       "             1.3786e-10],\n",
       "            [1.2862e-10, 6.4331e-17, 1.4001e-10,  ..., 2.7389e-12, 1.0127e-10,\n",
       "             1.0500e-10],\n",
       "            [1.3033e-10, 1.7856e-15, 2.1416e-10,  ..., 4.5469e-13, 9.7672e-11,\n",
       "             1.1696e-10]], device='cuda:0')},\n",
       "   55: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-5.0752e-07, -3.4364e-07,  1.0478e-06,  ..., -1.0884e-07,\n",
       "              3.1358e-07,  1.8102e-07],\n",
       "            [ 1.7638e-07,  2.1039e-07, -4.1281e-07,  ...,  1.2887e-07,\n",
       "             -4.7175e-07,  8.8709e-10],\n",
       "            [ 9.6627e-07, -6.9298e-07,  4.7456e-07,  ...,  4.7815e-07,\n",
       "              8.2263e-07,  7.4092e-07],\n",
       "            ...,\n",
       "            [ 2.9710e-07, -1.6570e-07, -3.6393e-08,  ...,  7.4276e-09,\n",
       "              2.3317e-07,  1.2111e-06],\n",
       "            [-7.8875e-07,  1.1794e-06, -9.6972e-07,  ...,  4.3369e-07,\n",
       "             -4.1280e-07,  1.1128e-06],\n",
       "            [ 9.0134e-07, -1.2891e-06,  3.6552e-07,  ...,  1.0990e-06,\n",
       "              1.9661e-06, -2.9420e-06]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[9.4327e-12, 1.5988e-12, 7.3634e-12,  ..., 7.9675e-12, 1.8343e-11,\n",
       "             5.3860e-12],\n",
       "            [3.2600e-11, 3.3872e-12, 6.2793e-12,  ..., 6.5264e-12, 2.5246e-11,\n",
       "             8.0656e-12],\n",
       "            [1.7128e-11, 3.0962e-12, 1.0613e-11,  ..., 1.1197e-11, 2.9229e-11,\n",
       "             5.8514e-11],\n",
       "            ...,\n",
       "            [5.6980e-12, 8.8800e-13, 1.5636e-11,  ..., 3.3826e-12, 2.7536e-11,\n",
       "             2.7406e-11],\n",
       "            [1.9937e-11, 3.0462e-12, 3.6755e-12,  ..., 4.2563e-12, 3.1405e-12,\n",
       "             3.6296e-12],\n",
       "            [3.1084e-11, 3.7516e-12, 7.2712e-12,  ..., 7.6553e-12, 1.8531e-11,\n",
       "             2.7117e-11]], device='cuda:0')},\n",
       "   56: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([ 1.9080e-07, -6.3554e-08,  1.7197e-07,  2.2116e-07,  1.2592e-06,\n",
       "             3.2984e-08, -5.0324e-07,  6.6359e-09,  1.7706e-07, -2.3976e-07,\n",
       "            -2.4704e-06, -7.6887e-08, -3.2388e-07,  1.3455e-07,  6.0219e-08,\n",
       "            -6.6341e-08,  9.5729e-08, -2.4152e-07,  3.0348e-07, -1.1392e-07,\n",
       "             9.2387e-07,  2.1507e-07, -3.6099e-07, -2.1829e-07,  2.2171e-07,\n",
       "            -4.1234e-07,  2.4390e-07,  2.6796e-07, -7.4996e-08,  1.7001e-06,\n",
       "             2.7607e-07, -2.8183e-06,  7.9054e-08,  1.7128e-07, -7.5120e-09,\n",
       "             1.7505e-07, -4.8173e-07,  9.2829e-08, -6.3380e-07,  8.6659e-08,\n",
       "            -2.4978e-07, -5.0969e-08,  8.0150e-08, -7.0090e-07,  5.1674e-08,\n",
       "            -1.9465e-07, -1.4055e-08,  2.3240e-07, -2.5432e-07, -1.4354e-07,\n",
       "             1.5207e-07,  8.2102e-08,  4.7798e-07, -7.2489e-07,  1.1106e-07,\n",
       "             7.1560e-08,  4.2851e-08,  1.4815e-07, -5.7615e-09, -5.4014e-08,\n",
       "            -1.4643e-07,  2.7623e-07, -4.9221e-07,  8.8098e-08,  3.5956e-07,\n",
       "            -2.4046e-08, -1.7948e-07,  2.3811e-07,  2.0511e-08,  5.0800e-08,\n",
       "            -3.0836e-07,  5.7808e-08, -3.3767e-07, -2.0907e-07, -3.2750e-07,\n",
       "            -1.9845e-07, -1.2291e-07, -2.9936e-07,  2.0698e-07,  6.6999e-07,\n",
       "             5.4884e-08, -4.2935e-08,  5.1966e-07, -4.1906e-07,  1.2921e-07,\n",
       "            -3.2055e-07, -2.5434e-08, -9.4986e-07,  1.9838e-07, -5.3118e-08,\n",
       "            -1.5601e-07, -2.0233e-07, -3.9732e-08, -7.4110e-07, -5.0885e-07,\n",
       "            -4.7192e-07, -1.0938e-07, -2.2248e-07, -3.2742e-07, -5.1173e-07,\n",
       "             3.3607e-08,  1.9310e-07, -4.1656e-07,  1.5368e-07,  4.2762e-07,\n",
       "            -2.0041e-07,  1.9180e-06,  1.2711e-07, -2.1879e-07,  3.1185e-08,\n",
       "             1.1757e-06,  9.7705e-08,  3.1700e-06, -7.3323e-08, -1.1961e-06,\n",
       "            -4.5166e-07, -1.1790e-07, -1.9639e-07, -1.8581e-06, -2.5156e-07,\n",
       "            -7.3195e-08,  4.7172e-08,  6.6584e-08,  1.9077e-08,  6.2990e-08,\n",
       "             1.3292e-07, -3.9800e-08, -1.3344e-07], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([2.3589e-12, 2.4325e-13, 6.9717e-13, 3.6182e-12, 4.2153e-11, 2.4840e-13,\n",
       "            7.5751e-13, 2.1284e-14, 6.7955e-12, 7.3620e-13, 7.2597e-11, 8.7216e-13,\n",
       "            8.8930e-13, 6.8830e-12, 8.2982e-13, 5.7286e-12, 3.9960e-12, 1.2957e-11,\n",
       "            3.3256e-12, 7.1666e-12, 8.1745e-11, 1.6055e-12, 1.8030e-12, 2.1613e-13,\n",
       "            1.9981e-12, 1.7550e-12, 8.8675e-13, 5.6506e-13, 4.8219e-13, 9.4643e-11,\n",
       "            1.8916e-12, 1.4052e-10, 9.5280e-14, 4.0880e-13, 4.0941e-12, 4.4784e-12,\n",
       "            4.2653e-12, 2.2739e-13, 6.0117e-12, 6.6065e-14, 5.9128e-13, 2.2815e-13,\n",
       "            1.1118e-13, 1.2693e-12, 3.6338e-12, 4.1244e-13, 3.4077e-13, 6.4514e-14,\n",
       "            4.5538e-13, 6.6719e-14, 1.8432e-13, 4.9506e-14, 9.5075e-13, 2.1168e-12,\n",
       "            4.9598e-13, 3.0660e-13, 3.0946e-13, 7.6181e-14, 5.3946e-14, 1.6730e-12,\n",
       "            1.3725e-12, 2.1683e-12, 6.0159e-12, 3.5088e-12, 8.5173e-13, 3.4790e-12,\n",
       "            3.0809e-12, 9.2439e-14, 4.1931e-13, 2.6932e-13, 4.2043e-13, 3.4986e-12,\n",
       "            3.6648e-13, 4.1236e-13, 1.2047e-12, 1.9619e-12, 3.2522e-13, 6.0346e-12,\n",
       "            1.3109e-12, 1.3725e-11, 1.1501e-13, 1.1050e-13, 9.8194e-12, 1.5522e-12,\n",
       "            2.2752e-13, 7.2943e-12, 6.7596e-13, 3.2690e-12, 8.8970e-13, 3.3125e-12,\n",
       "            8.5967e-14, 1.5290e-12, 1.5174e-12, 3.0967e-12, 1.2575e-12, 2.0631e-12,\n",
       "            1.6648e-12, 2.7990e-12, 3.1165e-13, 2.9288e-13, 1.4788e-14, 4.0656e-13,\n",
       "            1.0356e-12, 8.7542e-14, 1.2282e-11, 3.8529e-13, 4.2624e-10, 2.4703e-13,\n",
       "            1.9752e-12, 4.9693e-14, 3.2227e-11, 1.2518e-12, 3.1865e-10, 1.4026e-12,\n",
       "            4.1285e-12, 1.4879e-12, 3.6193e-13, 1.3494e-12, 7.3180e-11, 2.5924e-12,\n",
       "            2.0360e-12, 5.3696e-13, 1.2508e-12, 6.7993e-14, 1.1068e-13, 3.4538e-13,\n",
       "            1.9415e-13, 6.0745e-12], device='cuda:0')},\n",
       "   57: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([ 3.4387e-08,  1.3032e-07, -6.4480e-08,  6.9902e-08,  4.0675e-07,\n",
       "            -1.1893e-07, -1.1378e-07, -1.3077e-07,  2.1287e-08, -3.7132e-08,\n",
       "            -3.8284e-07,  1.9042e-07,  4.2146e-07, -1.9357e-07,  2.1382e-08,\n",
       "            -1.3397e-07,  1.4077e-07,  5.3080e-09, -3.2507e-08, -5.1374e-08,\n",
       "             2.9307e-08, -1.9446e-07, -2.6434e-07, -2.3537e-07, -1.5145e-07,\n",
       "            -5.7055e-08, -1.5447e-07, -3.6517e-07, -1.7055e-07,  1.2286e-07,\n",
       "            -2.9682e-07,  3.2749e-07,  4.3778e-08,  1.2706e-07, -2.0579e-07,\n",
       "            -1.8586e-07,  1.7059e-07, -3.0354e-07, -1.7912e-07,  6.0100e-08,\n",
       "             6.7085e-08, -1.9633e-07,  7.7943e-08,  1.0971e-07, -4.3278e-09,\n",
       "            -1.0011e-07,  2.8336e-08,  4.2386e-08, -9.2196e-08,  3.5823e-08,\n",
       "            -1.3774e-07,  2.1235e-07,  5.4632e-08,  7.7759e-08, -3.1712e-08,\n",
       "             1.5517e-07,  7.1945e-08, -7.5779e-08,  1.3666e-07, -1.3775e-07,\n",
       "             3.3235e-07,  2.4363e-07,  1.8088e-07, -2.6100e-07, -1.6164e-08,\n",
       "             3.1870e-08,  1.0063e-07, -2.8717e-07,  5.5685e-08, -1.0445e-07,\n",
       "            -1.0660e-07, -2.4378e-08,  9.2829e-08,  2.3635e-07, -2.1883e-07,\n",
       "             6.7133e-08,  2.8170e-07,  1.3702e-07,  5.9888e-08, -1.7607e-08,\n",
       "            -2.7541e-07,  3.5159e-08, -1.2942e-07,  2.6769e-07, -4.8062e-08,\n",
       "            -2.2226e-07, -2.2687e-07, -3.1150e-07,  8.8150e-08,  1.1116e-08,\n",
       "             2.4236e-07,  1.4382e-07, -8.5476e-08,  5.9363e-08, -2.8109e-07,\n",
       "             6.6520e-08, -1.1373e-07, -1.9301e-07,  3.0386e-07, -3.1393e-07,\n",
       "             3.4306e-08, -4.1211e-08,  2.7819e-07, -2.6195e-07,  3.2310e-07,\n",
       "             9.8303e-08, -2.1894e-07,  1.4922e-07,  1.6900e-07, -1.8896e-07,\n",
       "             2.6417e-08, -1.7639e-07,  2.2053e-07,  1.6917e-07,  3.5420e-07,\n",
       "             4.4590e-07,  3.2109e-07,  3.0190e-07,  3.5455e-07,  4.8191e-08,\n",
       "            -2.8325e-07,  6.6039e-09, -1.3052e-07,  1.0730e-07, -1.7499e-07,\n",
       "            -7.9876e-08, -9.1058e-08, -2.1006e-07], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([3.4354e-12, 7.7230e-12, 2.3632e-12, 8.3833e-12, 1.1911e-11, 6.9906e-13,\n",
       "            6.7519e-12, 1.9315e-13, 5.1405e-12, 7.3127e-12, 3.5828e-11, 1.7570e-12,\n",
       "            1.1406e-11, 1.6292e-11, 1.5567e-12, 4.1450e-11, 9.5113e-12, 9.7258e-12,\n",
       "            3.8886e-12, 1.2794e-11, 1.3132e-11, 4.0788e-12, 2.1132e-12, 1.8786e-12,\n",
       "            5.1075e-12, 3.3605e-12, 3.9475e-12, 2.4836e-12, 1.6010e-12, 6.4310e-12,\n",
       "            2.1098e-11, 3.8683e-11, 7.1977e-13, 4.2204e-12, 1.3863e-11, 2.2077e-11,\n",
       "            2.2646e-11, 1.1567e-12, 3.2472e-11, 4.3299e-13, 5.7903e-12, 6.5060e-13,\n",
       "            5.8711e-13, 1.3436e-11, 7.8179e-12, 2.4833e-12, 2.4457e-12, 3.6480e-13,\n",
       "            5.1639e-12, 7.3569e-13, 1.5876e-12, 2.3152e-13, 1.8201e-12, 1.3296e-11,\n",
       "            3.4368e-12, 1.1166e-12, 1.6341e-12, 2.4022e-13, 6.8531e-13, 3.8823e-12,\n",
       "            2.2516e-12, 3.3151e-12, 9.2933e-12, 2.1203e-11, 2.1994e-12, 8.3836e-12,\n",
       "            1.1025e-11, 3.6540e-13, 1.1104e-12, 1.6537e-12, 2.9353e-12, 6.5385e-12,\n",
       "            3.0517e-12, 7.8724e-12, 4.6458e-12, 7.1942e-12, 2.9864e-12, 1.1763e-11,\n",
       "            3.3268e-12, 2.7898e-12, 3.3184e-13, 2.5869e-12, 2.1029e-12, 6.2534e-12,\n",
       "            1.5337e-12, 1.8744e-11, 9.3265e-12, 6.1643e-13, 1.0028e-12, 6.6441e-12,\n",
       "            4.2737e-13, 5.4160e-12, 7.2369e-12, 1.4167e-11, 1.0428e-11, 8.6415e-12,\n",
       "            1.2674e-11, 1.2357e-11, 1.5544e-12, 4.2628e-12, 2.7446e-13, 2.0584e-12,\n",
       "            5.0846e-12, 3.3936e-13, 2.0369e-11, 2.4255e-12, 2.1522e-11, 5.8895e-13,\n",
       "            1.2022e-11, 3.0359e-13, 5.2589e-12, 6.1203e-12, 3.7308e-11, 3.2127e-12,\n",
       "            1.5356e-11, 1.3508e-11, 5.8555e-12, 3.4163e-12, 3.4568e-11, 1.0138e-11,\n",
       "            1.5580e-11, 1.0407e-12, 1.0107e-12, 1.3129e-12, 3.3237e-13, 5.5792e-13,\n",
       "            7.0196e-13, 3.1044e-11], device='cuda:0')},\n",
       "   58: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([-1.1105e-07, -4.0429e-07,  2.6400e-07,  4.7670e-08, -4.2955e-07,\n",
       "             2.3109e-07,  5.9972e-07,  5.4765e-07,  1.8279e-06,  1.0309e-07,\n",
       "             1.4263e-07, -1.0886e-06, -4.5944e-07, -7.5094e-07, -1.3493e-06,\n",
       "             6.6559e-07, -1.0540e-07,  1.2270e-06, -4.1794e-07, -3.5116e-07,\n",
       "             1.7352e-06,  9.6516e-07, -6.0861e-07, -2.3975e-07, -4.2683e-07,\n",
       "             3.1333e-07,  1.3311e-06, -1.7282e-06,  6.0247e-07,  3.5579e-06,\n",
       "             4.1732e-07,  1.4287e-06,  4.5393e-07,  9.8145e-08, -7.8210e-08,\n",
       "             9.9198e-07,  1.1288e-07, -6.2910e-08, -1.5384e-07,  2.9598e-07,\n",
       "             4.1643e-07,  1.6222e-06, -3.3690e-07,  6.6575e-07,  2.7200e-07,\n",
       "             1.5379e-06, -6.3608e-07,  1.6621e-06,  5.0447e-07, -1.6637e-06,\n",
       "            -2.1837e-06,  8.7566e-07, -5.2265e-07,  3.9057e-07,  7.6869e-07,\n",
       "             7.2964e-07,  9.5209e-08, -6.0924e-07, -4.4908e-07,  3.0228e-07,\n",
       "            -2.2268e-07, -1.0153e-06, -9.6314e-07, -6.1844e-07, -1.1402e-07,\n",
       "            -1.5645e-07, -5.8647e-07, -4.1978e-07,  1.6308e-07, -6.4204e-07,\n",
       "            -2.1250e-08, -2.2656e-07, -2.1707e-07, -1.9898e-07, -2.1844e-07,\n",
       "            -1.0517e-08, -1.3540e-06, -1.5559e-06,  1.3830e-06,  1.7971e-06,\n",
       "            -1.9660e-07, -2.2247e-07, -1.2343e-06,  6.1038e-08,  6.1222e-07,\n",
       "             9.4479e-07,  5.2405e-07, -1.8776e-06,  1.0232e-07,  1.8163e-07,\n",
       "             4.0787e-07,  6.5612e-07, -1.4165e-07, -4.4464e-07, -1.1052e-06,\n",
       "             3.5602e-07,  8.0771e-07, -1.1766e-06,  3.9862e-07,  5.5995e-07,\n",
       "             9.6633e-07,  1.3142e-06,  5.5560e-07,  3.1153e-07,  4.6158e-07,\n",
       "             2.4511e-07,  3.7601e-06,  9.5102e-07, -4.6860e-07, -8.0717e-07,\n",
       "            -4.7665e-06, -1.0721e-06,  6.7929e-06,  6.0518e-07,  1.1643e-06,\n",
       "            -9.2961e-08,  2.9404e-07,  1.8173e-06, -1.6681e-06, -1.2235e-06,\n",
       "            -4.2154e-07, -4.3246e-07, -7.0820e-08,  1.6706e-06,  6.5942e-07,\n",
       "             3.9875e-07,  5.4702e-07,  5.0074e-06], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([1.4904e-10, 1.3164e-11, 1.7557e-11, 4.9110e-11, 1.5654e-10, 2.7400e-11,\n",
       "            2.7648e-11, 5.4358e-11, 7.9499e-11, 4.1393e-11, 3.6258e-11, 4.0821e-11,\n",
       "            3.9333e-11, 8.2324e-11, 2.7956e-11, 3.6447e-11, 2.0762e-11, 4.0947e-11,\n",
       "            2.3333e-11, 1.0846e-10, 1.0125e-10, 1.0756e-11, 3.9267e-11, 9.3742e-11,\n",
       "            4.4911e-11, 6.6561e-11, 3.4505e-11, 6.2235e-11, 3.0076e-11, 6.3265e-10,\n",
       "            9.1512e-12, 1.5612e-09, 3.0149e-11, 7.8826e-11, 6.5271e-11, 3.1694e-11,\n",
       "            1.7884e-11, 4.7866e-11, 1.2124e-11, 4.3213e-11, 1.5808e-11, 2.3343e-11,\n",
       "            2.8625e-11, 3.0937e-11, 1.9728e-11, 2.7953e-11, 4.2114e-11, 7.5693e-11,\n",
       "            4.6221e-11, 2.4085e-11, 4.4406e-11, 1.9271e-11, 2.5092e-11, 1.2816e-10,\n",
       "            6.5643e-11, 1.0295e-10, 1.7269e-11, 6.2951e-11, 3.0340e-11, 6.6822e-11,\n",
       "            1.1507e-11, 8.7979e-11, 9.7986e-12, 2.4151e-11, 7.8110e-11, 5.8301e-11,\n",
       "            3.9678e-11, 1.2389e-10, 2.1149e-11, 6.0829e-11, 1.3162e-10, 6.5832e-11,\n",
       "            1.0979e-11, 1.5931e-11, 1.8705e-11, 7.9269e-12, 6.3453e-11, 9.5790e-11,\n",
       "            1.1875e-11, 7.9587e-11, 2.7525e-11, 2.1776e-11, 2.9141e-11, 1.6744e-11,\n",
       "            8.9636e-12, 2.0605e-11, 1.7674e-11, 1.1194e-10, 1.3698e-10, 3.5971e-11,\n",
       "            6.9081e-11, 1.0522e-11, 8.0993e-11, 1.6095e-11, 2.3518e-11, 3.2975e-11,\n",
       "            1.8416e-11, 5.3360e-11, 5.4795e-11, 2.5454e-11, 1.4300e-11, 2.1748e-11,\n",
       "            5.4488e-11, 4.0313e-11, 7.6094e-11, 1.5728e-11, 2.7432e-09, 2.0755e-11,\n",
       "            2.4342e-11, 9.8582e-11, 9.4588e-11, 5.3511e-11, 9.1664e-10, 2.4662e-11,\n",
       "            5.0993e-11, 5.4845e-11, 3.6858e-11, 2.4980e-11, 8.8666e-11, 1.9966e-11,\n",
       "            2.1626e-11, 5.7974e-11, 3.4465e-11, 5.0109e-11, 8.0204e-11, 1.6963e-11,\n",
       "            4.9854e-11, 5.7904e-11], device='cuda:0')},\n",
       "   59: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([ 4.1274e-07,  4.2614e-08, -1.1625e-06, -8.8075e-07,  1.4423e-06,\n",
       "            -1.0707e-06,  7.5566e-08,  7.6766e-07, -5.1737e-07, -7.0721e-07,\n",
       "             2.1279e-06,  8.6453e-07,  1.0914e-06,  3.1871e-07,  2.0352e-07,\n",
       "             1.0143e-06,  4.7847e-07, -1.4416e-06,  3.1886e-07,  9.9818e-07,\n",
       "             4.9137e-07,  1.2685e-06,  1.5884e-06, -1.0783e-06,  2.3362e-07,\n",
       "             3.8086e-07, -1.7077e-06, -1.4734e-07,  1.7621e-07,  1.9017e-06,\n",
       "            -1.7927e-06, -1.3236e-06,  1.7836e-06, -1.4405e-06,  7.7938e-07,\n",
       "            -1.6220e-06,  3.3733e-06,  7.5303e-07, -2.6592e-06,  4.9614e-07,\n",
       "             1.2529e-07, -2.0524e-06, -3.8903e-07, -1.7354e-07, -2.1759e-07,\n",
       "            -1.4953e-06, -2.3454e-06, -3.4786e-07,  1.3168e-06, -4.4350e-07,\n",
       "             7.9980e-07,  1.1068e-06,  1.2346e-06,  8.0288e-07, -3.6268e-07,\n",
       "             1.4431e-06,  2.1640e-07,  5.5688e-07, -1.9305e-06, -4.3721e-07,\n",
       "             2.7127e-06,  8.4036e-07,  4.7099e-07, -1.2955e-06, -1.8832e-06,\n",
       "            -1.7655e-06, -6.9551e-07,  9.3296e-07, -1.4863e-06, -1.3001e-06,\n",
       "            -9.9760e-07, -9.5505e-07, -1.3540e-06,  9.8048e-07,  2.6878e-07,\n",
       "             4.5064e-07,  6.1298e-07,  2.2902e-06,  1.0276e-06, -4.4583e-07,\n",
       "            -1.3110e-06, -7.2702e-08, -6.3228e-07, -1.4895e-06, -1.4226e-06,\n",
       "             1.2191e-06,  7.7478e-07,  2.2330e-07, -1.0442e-06, -3.2427e-08,\n",
       "            -3.6190e-07, -4.1055e-07, -1.0393e-06, -2.8605e-06,  3.8100e-07,\n",
       "             1.0798e-06,  2.7190e-07,  9.1391e-07, -1.9491e-06,  1.0618e-06,\n",
       "             2.4661e-06, -1.4404e-06,  2.7132e-07,  7.7372e-07, -2.8906e-07,\n",
       "             1.0228e-06, -4.7121e-07, -6.6028e-07,  3.7013e-07,  6.0037e-07,\n",
       "             5.2156e-09,  2.2779e-06,  1.0712e-06,  1.7006e-06, -1.5107e-06,\n",
       "             3.6034e-07, -9.0025e-08, -1.7063e-06,  5.7147e-08,  3.0758e-06,\n",
       "             1.6886e-07, -6.6853e-07, -1.0953e-06, -2.9643e-06, -4.5830e-07,\n",
       "             9.7840e-07, -1.2338e-06, -8.5622e-07], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([1.5520e-10, 8.1506e-11, 3.6888e-11, 6.3021e-11, 6.3176e-11, 3.1348e-11,\n",
       "            6.6872e-11, 6.2045e-11, 4.8903e-11, 4.6652e-11, 6.0037e-11, 1.0219e-10,\n",
       "            4.6882e-11, 6.0677e-11, 7.9310e-11, 7.6470e-11, 5.9878e-11, 4.3443e-11,\n",
       "            4.0272e-11, 8.8623e-11, 5.9086e-11, 5.8051e-11, 4.2474e-11, 8.0666e-11,\n",
       "            5.5937e-11, 4.4388e-11, 6.5650e-11, 7.1593e-11, 6.3863e-11, 6.9161e-11,\n",
       "            3.5539e-11, 1.6335e-10, 8.3214e-11, 7.4924e-11, 9.2614e-11, 5.4039e-11,\n",
       "            6.8079e-11, 8.9147e-11, 6.8523e-11, 8.3871e-11, 5.4909e-11, 3.2403e-11,\n",
       "            5.4308e-11, 6.8037e-11, 4.8861e-11, 7.8237e-11, 7.2056e-11, 8.7376e-11,\n",
       "            6.9944e-11, 1.0800e-10, 1.0425e-10, 6.3862e-11, 5.3100e-11, 7.9356e-11,\n",
       "            9.3469e-11, 5.8709e-11, 6.7780e-11, 8.6726e-11, 4.1073e-11, 5.2145e-11,\n",
       "            4.4599e-11, 6.3271e-11, 5.9937e-11, 1.0083e-10, 6.1532e-11, 9.1060e-11,\n",
       "            3.0712e-11, 9.2814e-11, 5.9980e-11, 7.9748e-11, 9.3216e-11, 3.8608e-11,\n",
       "            6.0279e-11, 3.8708e-11, 3.9846e-11, 7.3304e-11, 8.5265e-11, 5.6301e-11,\n",
       "            4.8876e-11, 6.4565e-11, 5.3168e-11, 1.0336e-10, 6.3527e-11, 4.3504e-11,\n",
       "            5.4239e-11, 5.1196e-11, 7.7287e-11, 9.2936e-11, 1.6037e-10, 7.7835e-11,\n",
       "            7.4813e-11, 6.5413e-11, 1.0868e-10, 5.5298e-11, 9.1086e-11, 6.3775e-11,\n",
       "            4.8172e-11, 8.7408e-11, 7.7005e-11, 5.4538e-11, 5.5977e-11, 8.2648e-11,\n",
       "            7.4531e-11, 6.7289e-11, 4.6092e-11, 9.0451e-11, 2.5145e-10, 3.5926e-11,\n",
       "            9.8200e-11, 9.4591e-11, 3.4267e-11, 4.8358e-11, 7.3085e-11, 3.6486e-11,\n",
       "            9.6480e-11, 5.9821e-11, 4.6049e-11, 4.9432e-11, 5.1699e-11, 4.9356e-11,\n",
       "            6.4382e-11, 4.6068e-11, 8.2572e-11, 6.2408e-11, 7.4271e-11, 5.0224e-11,\n",
       "            8.0512e-11, 8.0555e-11], device='cuda:0')},\n",
       "   60: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([-8.4513e-07, -2.8369e-07,  1.9807e-08,  1.1739e-08,  6.2741e-06,\n",
       "             4.1566e-06,  7.1049e-08, -3.1070e-06, -4.8417e-06, -5.3781e-06,\n",
       "            -5.4711e-07,  3.6485e-06,  3.6521e-07,  4.0672e-06,  1.3266e-06,\n",
       "             1.8932e-06, -2.0294e-06, -1.5023e-05, -2.8892e-06,  4.5879e-06,\n",
       "            -1.8778e-06, -4.5498e-06, -6.3875e-06, -6.8675e-09, -1.2783e-06,\n",
       "            -3.5317e-06, -2.4818e-06, -1.3078e-06, -5.1401e-06, -1.9511e-06,\n",
       "             9.7698e-07, -2.8123e-05, -1.4963e-06, -3.5359e-06,  3.4428e-06,\n",
       "            -1.7424e-06,  1.7575e-06, -6.1542e-07,  1.8009e-06, -1.0845e-08,\n",
       "             3.0512e-07, -4.1915e-07, -3.6239e-07,  2.1951e-08, -1.0569e-06,\n",
       "             5.4575e-07, -6.6144e-06,  1.1774e-06, -6.5275e-07, -5.2453e-07,\n",
       "            -6.5921e-07, -1.0219e-07, -2.4881e-06,  3.3743e-06,  2.8227e-06,\n",
       "            -1.7855e-06, -6.0592e-07,  2.1929e-07, -1.2232e-06, -1.5066e-06,\n",
       "             2.3440e-07, -2.1105e-06,  2.9469e-06, -4.0277e-07, -4.7755e-07,\n",
       "             4.9327e-06, -2.8487e-07, -7.9247e-08,  1.1435e-08,  5.1561e-07,\n",
       "            -2.3211e-06, -2.1632e-06,  1.5526e-06,  3.3687e-07, -4.3069e-06,\n",
       "            -9.5412e-08, -2.0932e-06,  1.8025e-06,  5.0629e-06,  6.4707e-07,\n",
       "             1.3106e-06,  1.0020e-06,  5.2984e-06, -1.1408e-06, -1.1718e-06,\n",
       "            -5.7764e-06, -4.9698e-08, -6.8811e-06, -4.8640e-06, -2.1800e-06,\n",
       "            -8.9795e-06,  1.8282e-06, -5.2330e-06, -4.4479e-06, -2.8934e-06,\n",
       "             1.0649e-06, -1.5146e-06, -5.2184e-06, -1.1037e-06,  7.4137e-07,\n",
       "             1.5671e-06, -7.2388e-07, -4.1800e-07, -3.3098e-06,  4.7688e-07,\n",
       "            -6.8015e-07, -4.4936e-05, -8.9906e-06, -4.1874e-06, -2.3018e-06,\n",
       "             9.2273e-07, -2.9879e-06, -2.9162e-06, -5.4564e-07, -3.8452e-06,\n",
       "            -3.6217e-06,  6.1779e-07,  2.7255e-07,  4.1438e-07, -2.8187e-07,\n",
       "             2.8790e-07, -5.1984e-07,  4.6456e-07,  4.5299e-07,  4.5253e-06,\n",
       "            -1.1052e-06, -7.8733e-07, -4.9106e-07], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([8.7693e-12, 8.9789e-12, 1.0403e-11, 4.3490e-11, 4.7041e-10, 4.4849e-11,\n",
       "            1.5462e-11, 2.4080e-11, 4.2387e-11, 7.5737e-11, 1.8478e-11, 3.7558e-10,\n",
       "            1.7544e-11, 1.7301e-10, 1.1870e-10, 1.3786e-10, 1.0459e-11, 7.3136e-10,\n",
       "            4.9604e-11, 2.0511e-10, 8.0923e-11, 2.1922e-10, 1.4390e-10, 4.5523e-11,\n",
       "            1.3441e-11, 6.6097e-11, 2.6734e-11, 4.7749e-12, 1.2052e-10, 3.4723e-10,\n",
       "            3.1252e-11, 1.2091e-09, 1.3863e-11, 1.0295e-10, 7.0881e-11, 3.3444e-11,\n",
       "            2.5938e-11, 5.8688e-12, 2.4305e-11, 6.8220e-12, 2.7262e-12, 1.7820e-11,\n",
       "            1.3381e-11, 1.2860e-11, 2.9622e-11, 3.2964e-12, 1.0142e-10, 7.5300e-11,\n",
       "            7.1148e-12, 7.1101e-11, 7.8601e-12, 5.0551e-12, 2.4527e-11, 1.7769e-10,\n",
       "            4.7095e-11, 1.3342e-11, 1.2587e-11, 8.3326e-12, 1.6342e-11, 2.4797e-11,\n",
       "            1.2641e-11, 1.7130e-11, 3.0462e-10, 2.4060e-11, 2.2108e-11, 1.8220e-10,\n",
       "            2.8442e-11, 1.1407e-11, 2.7299e-11, 1.7849e-11, 9.3236e-12, 3.3244e-11,\n",
       "            2.1718e-11, 1.6347e-11, 2.9901e-11, 1.7789e-11, 5.7574e-11, 7.9808e-11,\n",
       "            1.5573e-10, 1.3823e-11, 6.1071e-11, 4.1947e-11, 1.6102e-09, 3.6179e-11,\n",
       "            3.3775e-11, 1.9569e-10, 1.8415e-10, 2.7858e-10, 1.0839e-10, 8.3859e-11,\n",
       "            1.2008e-10, 2.8406e-11, 5.8213e-11, 5.7958e-11, 2.0164e-11, 2.5997e-11,\n",
       "            4.4324e-11, 8.6602e-11, 5.2505e-11, 1.9622e-11, 3.2795e-11, 6.5473e-12,\n",
       "            1.6479e-11, 4.8893e-11, 2.6572e-11, 2.8475e-11, 6.2978e-09, 2.0919e-10,\n",
       "            1.0862e-10, 5.6863e-11, 2.0712e-10, 4.0946e-11, 7.9052e-09, 3.0297e-12,\n",
       "            1.9990e-10, 9.4191e-11, 3.5038e-10, 1.6105e-11, 3.1451e-10, 7.5592e-12,\n",
       "            2.2289e-11, 1.1334e-11, 6.7676e-11, 4.8665e-11, 7.4936e-11, 4.5559e-11,\n",
       "            1.9701e-11, 1.7074e-11], device='cuda:0')},\n",
       "   61: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([ 1.4146e-06, -1.3763e-06, -6.1751e-07, -9.0039e-07, -1.5113e-06,\n",
       "            -4.7710e-06,  2.9510e-06, -6.3209e-06, -1.2885e-06,  7.0136e-06,\n",
       "             2.2652e-06, -2.6372e-06,  2.7617e-06, -3.5862e-06,  2.8387e-07,\n",
       "             5.1756e-06,  1.4015e-06, -5.7679e-06, -1.7123e-06, -1.9154e-06,\n",
       "             1.3016e-06,  4.0478e-07,  4.2951e-06,  9.4339e-07,  1.0421e-06,\n",
       "            -3.6026e-06,  3.6902e-06, -5.7428e-06, -3.4600e-06, -2.0564e-07,\n",
       "             6.3372e-07, -6.9220e-06, -4.3336e-06, -4.0319e-06, -6.8248e-06,\n",
       "             1.0534e-06, -4.3109e-06,  7.7121e-07, -2.9111e-06, -1.2910e-06,\n",
       "            -2.3413e-06,  4.9618e-07,  1.8780e-06,  1.2791e-06,  5.9144e-07,\n",
       "            -1.3604e-06, -1.0356e-05, -4.3848e-06,  3.5342e-06,  1.4053e-06,\n",
       "            -3.9384e-06,  6.4475e-07, -2.0995e-07, -4.6080e-06, -2.1237e-06,\n",
       "             2.7700e-06, -9.9296e-07, -6.7299e-07,  1.0939e-06,  2.3668e-06,\n",
       "            -4.8345e-07,  3.4225e-06, -3.4676e-06,  1.1732e-06, -7.0626e-07,\n",
       "            -2.8712e-06,  3.4693e-06, -6.9592e-07, -3.0537e-07, -1.2873e-06,\n",
       "            -3.8081e-06,  2.1882e-06,  1.8672e-06,  4.5522e-07,  4.5827e-06,\n",
       "            -4.4663e-07,  4.4857e-06, -6.6733e-06, -3.2207e-06,  1.3797e-06,\n",
       "             2.1992e-06,  3.8884e-06,  1.0028e-05, -2.1914e-07,  3.2691e-06,\n",
       "             4.1588e-06, -2.7894e-06,  5.9295e-06,  3.2244e-06,  1.5037e-06,\n",
       "             7.1728e-06, -2.1786e-06,  3.9163e-06,  1.8428e-06,  5.2467e-06,\n",
       "             3.2631e-06,  5.8015e-06,  7.2941e-06,  2.5639e-06,  1.8535e-06,\n",
       "             1.3532e-06, -7.3647e-07, -2.3053e-06, -1.0251e-06, -8.6806e-07,\n",
       "            -4.5373e-07, -8.1196e-06,  5.1617e-06, -3.3200e-06, -1.1781e-06,\n",
       "             1.5770e-06, -3.5045e-06,  7.8014e-06, -3.1996e-07, -2.5953e-06,\n",
       "            -3.6111e-06,  5.1215e-06,  5.5731e-07,  6.3045e-07,  1.0340e-06,\n",
       "             8.0543e-07,  1.3078e-06,  1.8040e-06,  2.0341e-06,  5.4548e-06,\n",
       "            -1.8197e-06, -2.6594e-06, -3.0026e-06], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([3.8633e-11, 7.7880e-11, 2.9785e-11, 6.4614e-11, 2.9091e-10, 5.1880e-10,\n",
       "            9.4284e-11, 1.2257e-10, 6.8812e-11, 2.2817e-10, 7.8904e-11, 4.3941e-10,\n",
       "            2.8457e-10, 4.1768e-10, 1.0646e-10, 3.1397e-10, 3.0384e-11, 4.2125e-10,\n",
       "            2.1147e-10, 1.2046e-10, 7.1298e-11, 2.0136e-10, 2.3109e-10, 2.5573e-10,\n",
       "            6.5002e-11, 1.0469e-10, 5.6966e-11, 1.3070e-10, 2.1237e-10, 6.1322e-11,\n",
       "            1.0394e-10, 2.8561e-10, 2.1076e-10, 5.3871e-10, 2.6052e-10, 6.4843e-11,\n",
       "            2.0305e-10, 4.8829e-11, 9.5624e-11, 1.7995e-10, 1.0866e-10, 7.6336e-11,\n",
       "            1.2383e-10, 1.1553e-10, 5.1816e-11, 5.0634e-11, 7.7998e-10, 6.1394e-10,\n",
       "            1.9486e-10, 2.0779e-10, 6.2640e-11, 1.2730e-10, 1.0003e-10, 3.5152e-10,\n",
       "            9.4513e-11, 3.9533e-11, 2.8816e-11, 6.6333e-11, 1.1811e-10, 7.8608e-11,\n",
       "            4.2392e-11, 3.9549e-11, 3.7012e-10, 1.1419e-10, 4.8278e-11, 2.6528e-10,\n",
       "            1.2726e-10, 5.1457e-11, 6.0435e-11, 8.3150e-11, 8.2760e-11, 5.8079e-11,\n",
       "            1.8112e-10, 1.9974e-10, 5.7011e-11, 5.3552e-11, 3.0192e-10, 1.3108e-10,\n",
       "            2.7029e-10, 2.2855e-11, 1.8017e-10, 2.2875e-10, 6.5884e-10, 7.7921e-11,\n",
       "            9.4344e-11, 1.2875e-10, 1.5150e-10, 3.7832e-10, 7.1957e-11, 5.2105e-11,\n",
       "            3.1840e-10, 4.8791e-11, 2.1390e-10, 2.6065e-10, 1.5459e-10, 1.3948e-10,\n",
       "            2.0089e-10, 5.6166e-10, 1.7174e-10, 1.6098e-10, 6.6762e-11, 3.0191e-11,\n",
       "            1.1520e-10, 5.5055e-11, 4.3665e-11, 1.5144e-10, 7.4603e-10, 1.6250e-10,\n",
       "            1.3767e-10, 9.3498e-11, 1.5954e-10, 1.2525e-10, 6.9173e-10, 3.4143e-11,\n",
       "            1.6357e-10, 1.4393e-10, 2.9434e-10, 5.6772e-11, 9.7547e-11, 4.0675e-11,\n",
       "            1.7758e-10, 8.2965e-11, 8.1614e-11, 6.5570e-11, 1.4616e-10, 4.6248e-11,\n",
       "            5.9787e-11, 1.4672e-10], device='cuda:0')},\n",
       "   62: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 2.8548e-09,  6.6833e-07, -5.1176e-07,  ..., -3.1868e-07,\n",
       "             -2.6064e-07,  2.5492e-07],\n",
       "            [-1.2122e-11,  7.1237e-12, -1.0721e-11,  ...,  4.8401e-12,\n",
       "             -1.4359e-11,  1.6166e-12],\n",
       "            [ 5.2257e-06, -2.0736e-06,  2.8664e-06,  ..., -1.1659e-06,\n",
       "             -1.0146e-08,  1.2127e-07],\n",
       "            ...,\n",
       "            [ 1.8330e-07, -8.2569e-07,  2.9603e-07,  ...,  7.7791e-07,\n",
       "              4.8290e-07,  2.7776e-06],\n",
       "            [-9.1761e-07,  5.0295e-07, -6.6369e-07,  ...,  1.3992e-07,\n",
       "             -9.7518e-08, -3.0608e-07],\n",
       "            [ 9.2450e-08,  5.8449e-07,  1.3062e-07,  ...,  2.7874e-07,\n",
       "              8.3735e-07,  2.7515e-06]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[2.2436e-10, 4.5093e-11, 5.6250e-11,  ..., 1.2960e-10, 2.4102e-10,\n",
       "             1.6174e-10],\n",
       "            [1.3334e-13, 3.2197e-14, 5.0125e-14,  ..., 1.1698e-14, 8.5325e-14,\n",
       "             5.5445e-16],\n",
       "            [3.5245e-10, 1.0067e-10, 9.2869e-11,  ..., 8.6084e-11, 5.4815e-11,\n",
       "             5.1673e-11],\n",
       "            ...,\n",
       "            [6.7992e-12, 1.0436e-12, 2.5344e-12,  ..., 8.9158e-13, 5.2891e-13,\n",
       "             1.2806e-11],\n",
       "            [2.1616e-10, 2.0480e-11, 4.0525e-11,  ..., 1.0315e-11, 1.1450e-11,\n",
       "             1.2900e-11],\n",
       "            [2.4656e-10, 4.7074e-11, 6.9421e-11,  ..., 1.1179e-10, 1.7994e-10,\n",
       "             9.1059e-11]], device='cuda:0')},\n",
       "   63: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([-7.2898e-07,  1.5335e-11,  2.5519e-06, -1.2747e-06, -4.4764e-06,\n",
       "            -2.6400e-06, -1.3549e-06,  3.6356e-07,  2.7875e-06,  7.4211e-08,\n",
       "             3.1558e-06, -4.0199e-06, -2.2382e-06, -9.1064e-07,  1.2299e-07,\n",
       "            -1.6380e-06, -1.3750e-06,  1.3293e-06,  2.8345e-06,  9.4247e-07,\n",
       "             1.3579e-06, -5.0942e-07, -1.6741e-07,  7.9655e-06,  3.8134e-06,\n",
       "            -2.0301e-06, -1.4624e-06, -5.7048e-07,  1.0939e-06, -2.2192e-06,\n",
       "             5.0576e-07,  3.6273e-07,  1.9542e-25,  1.5033e-06, -1.5215e-06,\n",
       "             2.7913e-06, -1.3164e-06,  9.5302e-07,  2.2230e-06,  2.4896e-06,\n",
       "             1.4589e-06, -8.7119e-07,  3.7807e-06, -1.3426e-06,  3.0466e-06,\n",
       "            -1.7122e-06,  5.3763e-06,  1.4912e-06,  2.7330e-06, -1.1012e-06,\n",
       "            -9.1335e-07, -2.4811e-06, -1.9244e-06,  1.1560e-06,  3.5120e-06,\n",
       "             1.4490e-06,  3.5258e-12,  1.0302e-06, -5.6783e-07, -1.3465e-06,\n",
       "             1.1513e-06, -1.9287e-06,  2.1986e-06,  1.0242e-06, -2.8717e-06,\n",
       "             1.0101e-06, -5.6884e-07, -3.8032e-07,  2.0601e-07, -1.1834e-08,\n",
       "             5.2151e-07,  4.8350e-07,  3.4937e-06,  1.8646e-06,  5.5397e-07,\n",
       "            -1.0223e-07,  1.1950e-06,  1.4638e-07, -6.9849e-07,  1.3757e-07,\n",
       "             6.9147e-08, -3.5883e-06,  2.4372e-06,  6.3478e-07, -2.9712e-07,\n",
       "            -2.5686e-06,  1.4051e-06,  1.9631e-06,  2.6397e-06, -3.0895e-06,\n",
       "             1.9970e-06,  1.3708e-06, -2.2533e-06,  5.8215e-07,  1.7285e-07,\n",
       "             1.1380e-06,  2.4282e-06,  8.4758e-07,  1.2107e-06, -2.0346e-07,\n",
       "             1.6171e-07,  6.8678e-07,  3.6944e-06,  9.5307e-07, -6.0400e-07,\n",
       "            -5.4781e-07, -8.7355e-07, -1.3514e-06, -2.4295e-06, -1.7048e-06,\n",
       "             4.1518e-06,  1.4579e-06,  1.9453e-06,  1.6260e-06, -1.5960e-06,\n",
       "             2.4390e-06, -7.5084e-07, -2.3975e-07, -5.0991e-06, -1.4252e-06,\n",
       "            -1.4377e-06, -1.5532e-06, -5.4534e-07,  2.0762e-06, -1.1314e-06,\n",
       "             2.3644e-06, -5.8328e-07, -1.5018e-06], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([2.1415e-10, 1.3141e-13, 2.0913e-10, 1.3750e-10, 1.0835e-09, 4.3296e-10,\n",
       "            2.1181e-10, 4.7355e-11, 1.6953e-10, 5.5550e-11, 2.5670e-10, 9.1466e-11,\n",
       "            6.2612e-11, 7.7931e-11, 6.1373e-11, 4.6812e-11, 5.0334e-11, 9.5259e-11,\n",
       "            2.3745e-10, 1.9024e-10, 9.5220e-11, 2.8459e-10, 2.6719e-10, 4.7615e-10,\n",
       "            7.7523e-12, 1.2830e-10, 1.3441e-11, 9.8771e-11, 4.9554e-11, 1.6306e-10,\n",
       "            2.8601e-10, 2.8132e-10, 6.8735e-11, 4.3996e-11, 2.8947e-10, 3.8845e-10,\n",
       "            1.7936e-10, 1.9981e-10, 1.8272e-10, 5.8735e-10, 5.4265e-11, 5.2749e-11,\n",
       "            1.9893e-10, 1.1212e-10, 2.1857e-11, 7.7837e-11, 6.4494e-10, 2.1537e-10,\n",
       "            1.7760e-10, 1.3401e-10, 1.7144e-10, 3.6324e-10, 1.3500e-10, 2.3310e-11,\n",
       "            1.4487e-10, 2.0969e-10, 2.3659e-15, 2.3791e-10, 4.2152e-11, 6.9331e-11,\n",
       "            1.5740e-10, 2.5129e-10, 2.7444e-10, 1.0187e-10, 3.7241e-10, 3.9690e-11,\n",
       "            2.6803e-10, 4.6426e-10, 1.0583e-10, 3.8037e-10, 2.1941e-10, 3.8501e-10,\n",
       "            7.1740e-11, 5.4579e-11, 1.0956e-10, 1.0283e-10, 1.4909e-10, 1.0621e-10,\n",
       "            1.8494e-10, 2.7853e-10, 6.7930e-11, 1.9410e-10, 1.3061e-10, 4.8503e-11,\n",
       "            1.7497e-10, 2.0020e-10, 8.8204e-11, 2.2057e-10, 5.2575e-11, 2.4156e-10,\n",
       "            4.9850e-10, 7.8375e-11, 1.3709e-10, 1.7991e-11, 7.4554e-11, 1.8004e-10,\n",
       "            7.1693e-11, 1.8017e-10, 2.3632e-10, 1.5655e-10, 6.2017e-12, 3.3631e-10,\n",
       "            9.7518e-11, 1.9036e-10, 1.4556e-10, 2.3032e-10, 3.8209e-10, 2.7061e-10,\n",
       "            2.0211e-10, 3.7809e-10, 1.9907e-10, 9.2818e-11, 1.0096e-10, 7.5619e-11,\n",
       "            3.4956e-10, 1.9132e-10, 2.4492e-10, 2.9449e-10, 8.5409e-10, 2.3814e-11,\n",
       "            5.6105e-11, 5.6855e-10, 3.7084e-11, 1.7396e-10, 4.9297e-11, 3.2140e-12,\n",
       "            6.9250e-11, 2.1001e-10], device='cuda:0')},\n",
       "   64: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-2.8213e-08,  3.0624e-06,  1.7592e-06,  ..., -2.1431e-06,\n",
       "             -1.7596e-06, -2.6533e-06],\n",
       "            [-2.1323e-06,  1.7002e-06, -2.4765e-07,  ...,  1.3210e-07,\n",
       "             -2.5978e-06, -2.8268e-06],\n",
       "            [-4.1140e-06, -1.2977e-07, -3.0336e-06,  ...,  3.2996e-06,\n",
       "             -7.8779e-07, -1.1640e-06],\n",
       "            ...,\n",
       "            [-4.0325e-06, -1.5404e-06, -1.2310e-06,  ...,  4.1125e-06,\n",
       "              1.3661e-06,  9.5258e-07],\n",
       "            [-3.2927e-06,  9.0701e-07, -1.2573e-07,  ...,  1.0487e-06,\n",
       "             -2.9632e-07, -2.2262e-06],\n",
       "            [-4.0067e-06,  4.7038e-06, -1.7944e-06,  ..., -2.9591e-06,\n",
       "             -8.0598e-06, -6.8231e-06]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[1.8971e-10, 1.7340e-10, 1.9906e-10,  ..., 4.7256e-10, 2.5198e-10,\n",
       "             4.5927e-10],\n",
       "            [1.7761e-10, 1.8342e-10, 2.3917e-10,  ..., 5.4776e-10, 3.7940e-10,\n",
       "             6.0053e-10],\n",
       "            [3.6575e-10, 4.3124e-10, 4.3123e-10,  ..., 1.1052e-09, 5.3909e-10,\n",
       "             9.4799e-10],\n",
       "            ...,\n",
       "            [9.2281e-11, 1.5228e-10, 2.5463e-10,  ..., 4.4072e-10, 3.4197e-10,\n",
       "             5.7801e-10],\n",
       "            [1.0943e-10, 1.2641e-10, 1.0732e-10,  ..., 2.7858e-10, 1.1704e-10,\n",
       "             2.6605e-10],\n",
       "            [2.2687e-10, 3.6172e-10, 3.1729e-10,  ..., 8.3020e-10, 5.6292e-10,\n",
       "             9.5321e-10]], device='cuda:0')},\n",
       "   65: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 4.3948e-07, -2.3961e-07,  4.9219e-07,  ..., -5.2232e-07,\n",
       "              2.8083e-07,  2.1144e-07],\n",
       "            [-3.1620e-07, -8.3541e-08,  8.7900e-08,  ...,  2.5114e-07,\n",
       "             -1.6142e-07, -2.1996e-07],\n",
       "            [ 2.2362e-07,  3.6898e-07, -8.5708e-07,  ...,  2.4011e-07,\n",
       "             -7.9631e-08,  1.2482e-07],\n",
       "            ...,\n",
       "            [ 1.0182e-08, -5.0154e-08,  1.6438e-07,  ...,  5.7096e-08,\n",
       "             -1.5997e-07, -1.1395e-07],\n",
       "            [-3.4612e-07, -5.4432e-07,  1.5930e-06,  ...,  4.8719e-07,\n",
       "             -6.1094e-08, -5.2290e-07],\n",
       "            [-5.8339e-08,  3.8584e-07, -1.0742e-06,  ...,  3.0820e-08,\n",
       "             -1.8450e-07, -6.2857e-08]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[5.6506e-12, 7.5019e-13, 2.3199e-12,  ..., 1.3687e-12, 1.9528e-12,\n",
       "             1.5335e-12],\n",
       "            [4.9881e-13, 6.4734e-14, 1.7705e-13,  ..., 8.9607e-14, 1.4240e-13,\n",
       "             1.1679e-13],\n",
       "            [1.8524e-11, 2.0344e-12, 6.2863e-12,  ..., 4.1743e-12, 3.8911e-12,\n",
       "             4.9108e-12],\n",
       "            ...,\n",
       "            [9.4243e-13, 1.1554e-13, 3.8196e-13,  ..., 2.2766e-13, 3.8093e-13,\n",
       "             2.6409e-13],\n",
       "            [6.8281e-12, 1.7849e-12, 3.5335e-12,  ..., 1.1519e-12, 3.6550e-12,\n",
       "             2.5318e-12],\n",
       "            [3.8493e-12, 1.0477e-12, 1.8366e-12,  ..., 7.4528e-13, 2.4239e-12,\n",
       "             1.3590e-12]], device='cuda:0')},\n",
       "   66: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 3.4531e-07,  2.6611e-09,  2.2175e-07,  ..., -1.2196e-07,\n",
       "              1.4578e-07,  1.6506e-07],\n",
       "            [-1.0567e-07, -6.3562e-08, -8.6744e-08,  ...,  1.0099e-07,\n",
       "             -2.8624e-08, -5.8412e-08],\n",
       "            [-2.9764e-07,  4.6917e-07, -4.4493e-07,  ..., -7.1024e-08,\n",
       "             -5.8546e-07, -4.6418e-07],\n",
       "            ...,\n",
       "            [-1.0950e-08, -8.8813e-08,  3.5776e-08,  ..., -6.5527e-09,\n",
       "              1.2146e-07, -1.3502e-07],\n",
       "            [ 3.9007e-07, -6.5063e-08,  4.5874e-07,  ...,  2.0667e-07,\n",
       "              4.0521e-07,  2.1611e-07],\n",
       "            [ 8.2279e-08,  1.1669e-07, -1.1920e-07,  ..., -4.1990e-07,\n",
       "             -3.2528e-07,  2.0304e-07]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[1.1672e-12, 1.2399e-13, 4.3465e-13,  ..., 2.7104e-13, 3.8733e-13,\n",
       "             3.6605e-13],\n",
       "            [1.0250e-12, 9.7704e-14, 3.3791e-13,  ..., 2.2842e-13, 3.3674e-13,\n",
       "             2.7813e-13],\n",
       "            [4.8507e-12, 9.7874e-13, 2.3173e-12,  ..., 1.1974e-12, 1.1698e-12,\n",
       "             1.7990e-12],\n",
       "            ...,\n",
       "            [3.0897e-12, 4.8877e-13, 1.3354e-12,  ..., 8.3099e-13, 7.9002e-13,\n",
       "             8.6048e-13],\n",
       "            [1.2439e-12, 2.0687e-13, 5.5431e-13,  ..., 3.5983e-13, 3.0354e-13,\n",
       "             3.3452e-13],\n",
       "            [2.4247e-12, 3.2952e-13, 9.3827e-13,  ..., 7.1012e-13, 6.9797e-13,\n",
       "             5.8434e-13]], device='cuda:0')},\n",
       "   67: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 5.5386e-06,  5.4986e-06,  2.7127e-06,  ...,  6.8738e-07,\n",
       "              5.3942e-06,  7.5024e-06],\n",
       "            [-2.1025e-06, -9.5267e-07,  6.1502e-07,  ...,  4.7642e-07,\n",
       "             -1.2934e-06, -4.2819e-07],\n",
       "            [ 3.6801e-06,  1.7180e-06,  5.7729e-07,  ...,  9.0796e-07,\n",
       "              1.1810e-06,  1.3168e-06],\n",
       "            ...,\n",
       "            [ 2.5037e-06, -1.9071e-06, -1.7681e-06,  ..., -7.0809e-07,\n",
       "             -2.7384e-06, -2.9215e-08],\n",
       "            [-7.5352e-07, -2.9792e-07,  2.3035e-06,  ...,  1.2189e-07,\n",
       "             -2.0438e-07, -1.9990e-06],\n",
       "            [-4.3859e-06, -5.0232e-06, -3.5670e-06,  ...,  3.2051e-06,\n",
       "             -2.4313e-06, -4.4240e-06]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[5.1708e-10, 7.9534e-10, 2.9517e-10,  ..., 5.4552e-10, 4.5698e-10,\n",
       "             5.6947e-10],\n",
       "            [2.5563e-10, 4.4376e-10, 1.5128e-10,  ..., 3.1437e-10, 3.3288e-10,\n",
       "             3.3571e-10],\n",
       "            [4.1210e-10, 6.2553e-10, 1.9711e-10,  ..., 4.6946e-10, 4.2062e-10,\n",
       "             4.1436e-10],\n",
       "            ...,\n",
       "            [5.3592e-10, 6.7951e-10, 3.1057e-10,  ..., 4.6568e-10, 4.8317e-10,\n",
       "             6.2005e-10],\n",
       "            [1.8661e-10, 3.1728e-10, 1.2782e-10,  ..., 2.4006e-10, 2.7240e-10,\n",
       "             2.6879e-10],\n",
       "            [4.2392e-10, 6.6507e-10, 1.6771e-10,  ..., 5.0610e-10, 3.9192e-10,\n",
       "             5.8248e-10]], device='cuda:0')},\n",
       "   68: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([-1.2727e-05,  3.4544e-06, -3.5262e-06, -6.7182e-06, -6.8626e-06,\n",
       "             6.1099e-06,  1.9466e-06, -2.1942e-06, -1.1876e-06, -5.5196e-06,\n",
       "             4.7888e-06, -5.0257e-06,  4.1278e-06,  1.6685e-07,  1.0322e-05,\n",
       "            -6.6603e-06, -6.5967e-06,  4.3275e-06, -8.5963e-06, -2.4791e-06,\n",
       "            -2.1939e-06,  2.0921e-06, -3.4232e-06, -3.5801e-06, -3.0390e-06,\n",
       "             4.7383e-06,  1.7060e-06, -4.6290e-06,  2.6048e-06, -3.1736e-06,\n",
       "            -3.2097e-06,  4.4254e-06,  1.4189e-05,  8.3035e-06,  3.6846e-07,\n",
       "             3.3898e-08,  4.5805e-06, -7.9436e-06, -5.0415e-06,  5.2290e-06,\n",
       "            -4.3088e-06, -3.0817e-06,  2.5406e-06,  3.2966e-06, -5.0190e-06,\n",
       "             1.7019e-05, -4.8820e-06, -3.4383e-06, -1.1243e-05,  1.5778e-07,\n",
       "            -7.8655e-06,  4.2918e-06,  3.5727e-06,  4.5904e-06,  2.0781e-06,\n",
       "             3.1882e-06, -2.7439e-06,  2.4528e-06,  3.8682e-06,  1.1048e-06,\n",
       "             4.6346e-06,  1.4363e-06,  2.5567e-06, -5.6418e-07,  6.1473e-06,\n",
       "             9.9884e-07, -5.3409e-06,  4.8591e-06, -5.8751e-06,  3.8990e-06,\n",
       "             5.3738e-06,  8.4301e-07,  3.4756e-06,  2.0278e-06,  4.3311e-06,\n",
       "            -8.0570e-06, -2.3950e-06, -1.9264e-06, -1.3075e-05, -3.8105e-07,\n",
       "             5.3142e-06,  1.0132e-06, -2.7369e-08,  2.7969e-06, -9.6961e-06,\n",
       "             4.7937e-06,  1.0552e-06,  7.0999e-06,  2.7394e-06, -5.7535e-06,\n",
       "            -3.2344e-06,  9.7347e-06,  6.0501e-06,  6.0128e-06,  1.3741e-07,\n",
       "             2.3523e-06, -1.8390e-06,  5.3581e-06,  2.4885e-06, -4.2207e-07,\n",
       "            -4.4049e-06,  1.1171e-05, -1.8025e-06, -4.2802e-06,  1.1690e-06,\n",
       "             8.4185e-06, -1.4269e-05,  2.4430e-06, -1.1591e-05, -9.5020e-06,\n",
       "             1.4181e-06, -5.7125e-06,  1.2446e-06,  1.1145e-06,  3.8414e-06,\n",
       "             2.5511e-06, -4.9488e-06, -4.5852e-06,  1.6608e-06,  4.2709e-06,\n",
       "             4.4952e-06, -3.9320e-07,  7.4913e-06, -1.3285e-05, -4.1764e-07,\n",
       "             4.8719e-06, -4.1237e-07,  8.0772e-06], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([2.3362e-09, 1.0885e-09, 1.6635e-09, 5.4057e-09, 1.0185e-09, 9.1823e-10,\n",
       "            1.2945e-09, 1.0639e-09, 8.7408e-10, 1.5233e-09, 1.5405e-09, 2.3985e-09,\n",
       "            1.2601e-09, 2.3820e-09, 2.5807e-09, 2.7180e-09, 1.5952e-09, 7.3843e-10,\n",
       "            1.1159e-09, 1.1927e-09, 2.0519e-09, 1.3423e-09, 5.7131e-10, 2.7777e-09,\n",
       "            1.9448e-09, 1.0883e-09, 2.3310e-09, 2.6970e-09, 1.7730e-09, 7.0473e-10,\n",
       "            3.7490e-09, 6.4610e-10, 8.8048e-10, 3.7404e-09, 2.7024e-09, 5.5665e-09,\n",
       "            2.5220e-09, 2.9143e-09, 3.6306e-09, 1.0137e-09, 4.1007e-09, 2.1753e-09,\n",
       "            9.7709e-10, 1.1057e-09, 1.7503e-09, 1.2250e-09, 1.5576e-09, 3.3533e-09,\n",
       "            2.3208e-09, 1.5552e-09, 1.9537e-09, 1.5012e-09, 1.3824e-09, 3.8465e-09,\n",
       "            8.4187e-10, 1.6964e-09, 2.2215e-09, 1.1612e-09, 2.1041e-09, 1.3942e-09,\n",
       "            2.4847e-09, 1.4531e-09, 2.1578e-09, 8.3443e-10, 1.3858e-09, 1.8162e-09,\n",
       "            7.9013e-10, 8.7818e-10, 1.3937e-09, 1.7819e-09, 1.4546e-09, 1.1012e-09,\n",
       "            2.3484e-09, 1.0383e-09, 1.1904e-09, 2.1196e-09, 1.4817e-09, 1.2019e-09,\n",
       "            1.5553e-09, 1.3871e-09, 1.3514e-09, 1.5267e-09, 1.3247e-09, 1.3686e-09,\n",
       "            3.0556e-09, 2.4951e-09, 2.4406e-09, 1.4801e-09, 1.3001e-09, 1.4695e-09,\n",
       "            1.8475e-09, 1.7800e-09, 2.3904e-09, 1.5823e-09, 1.2196e-09, 2.1804e-09,\n",
       "            3.5470e-09, 9.2150e-10, 2.7669e-09, 1.5284e-09, 1.8160e-09, 1.2650e-09,\n",
       "            1.1497e-09, 1.5881e-09, 1.4989e-09, 2.5625e-09, 1.9509e-09, 7.9451e-10,\n",
       "            2.1737e-09, 1.3111e-09, 1.1103e-09, 1.4233e-09, 1.0442e-09, 5.3518e-10,\n",
       "            9.0016e-10, 1.1034e-09, 8.2506e-10, 1.2477e-09, 2.0655e-09, 2.8066e-09,\n",
       "            9.2298e-10, 7.8444e-10, 3.5266e-09, 1.2009e-09, 1.4670e-09, 1.8931e-09,\n",
       "            8.1198e-10, 1.7654e-09], device='cuda:0')},\n",
       "   69: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([ 3.0414e-07,  2.4735e-06,  9.0666e-07, -1.1533e-06,  7.4785e-06,\n",
       "            -4.7129e-07,  5.8018e-06,  3.0705e-06,  2.6528e-07, -3.8119e-06,\n",
       "             6.8742e-06,  2.4467e-08, -1.3444e-07,  5.7057e-06, -5.5796e-07,\n",
       "            -5.1164e-06,  5.8296e-07, -5.3564e-07, -1.2979e-06,  7.3187e-07,\n",
       "            -3.3304e-07,  2.0132e-07,  1.4326e-06,  2.1715e-07,  7.6133e-06,\n",
       "             2.0502e-08, -9.1905e-07, -9.4436e-06,  2.8899e-06,  4.1386e-06,\n",
       "             4.8827e-06,  9.8414e-07,  4.3379e-07, -2.1667e-06, -1.2154e-06,\n",
       "             6.4886e-07,  3.1070e-06,  5.6529e-07,  1.9485e-06,  1.3784e-06,\n",
       "            -2.8053e-07,  8.7433e-07, -1.4910e-06,  8.9697e-07, -4.9409e-06,\n",
       "            -3.4728e-07,  3.4956e-06, -1.4048e-06,  2.0230e-06,  2.1060e-07,\n",
       "            -2.7263e-06, -1.8683e-06,  2.3260e-06, -5.6439e-06, -1.0752e-06,\n",
       "            -1.7649e-06, -6.7873e-06, -5.9223e-07,  1.5183e-06,  1.5356e-06,\n",
       "            -2.1498e-06, -1.0481e-06, -2.0287e-06, -2.8038e-07, -8.8156e-06,\n",
       "            -2.1451e-06, -1.5305e-07, -1.1342e-06, -1.2319e-06, -7.2502e-08,\n",
       "            -1.4962e-06,  4.4676e-07, -3.6554e-06, -7.0475e-07,  1.5022e-06,\n",
       "             8.5515e-07, -1.2332e-06,  8.6862e-08, -3.2178e-06, -2.5854e-06,\n",
       "             1.1793e-06, -3.9553e-06,  2.0753e-06,  6.0098e-06, -7.0541e-07,\n",
       "             8.7461e-07, -1.4345e-06,  1.2911e-06,  1.0873e-06,  1.7613e-06,\n",
       "             1.4438e-06,  1.9589e-06,  6.3181e-06,  2.4277e-06, -2.4628e-06,\n",
       "            -7.3986e-07, -2.3477e-06, -6.6988e-07, -8.9123e-06, -1.1790e-06,\n",
       "             2.1646e-06, -4.5932e-06, -9.8646e-07,  8.8838e-08, -1.4375e-06,\n",
       "            -3.2579e-07,  1.1307e-05,  2.6817e-06, -3.3936e-07,  2.5150e-06,\n",
       "             3.1594e-07,  1.0967e-06, -1.1415e-05, -1.9837e-06,  2.2664e-06,\n",
       "            -2.4589e-07, -8.1723e-07, -2.8268e-06,  2.9499e-06, -4.5542e-08,\n",
       "             1.4359e-06, -1.4516e-06,  6.5572e-07, -2.0964e-07, -2.3581e-06,\n",
       "             1.1638e-06, -2.3831e-06,  1.0393e-06], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([7.3493e-10, 2.9405e-11, 4.9232e-11, 2.9149e-11, 3.2091e-10, 3.6357e-11,\n",
       "            3.1400e-10, 2.3480e-10, 8.2908e-11, 4.1832e-11, 3.8669e-11, 2.0438e-10,\n",
       "            6.7986e-11, 2.4630e-10, 2.3744e-11, 2.2685e-10, 4.9883e-11, 3.3751e-11,\n",
       "            1.4111e-10, 9.5417e-11, 1.0990e-09, 6.2190e-11, 1.0954e-10, 1.3164e-11,\n",
       "            2.6970e-10, 6.8708e-11, 4.2353e-11, 8.4720e-10, 7.8765e-11, 2.2679e-10,\n",
       "            3.0920e-10, 2.4631e-09, 3.2360e-10, 6.4828e-11, 2.0202e-11, 1.2339e-11,\n",
       "            9.3951e-11, 6.1159e-11, 3.5066e-11, 2.7447e-10, 1.7601e-11, 1.9400e-11,\n",
       "            1.0273e-10, 3.5021e-11, 3.2337e-10, 5.2064e-11, 5.7300e-10, 1.6986e-11,\n",
       "            3.8965e-11, 5.2323e-11, 2.9028e-11, 3.6408e-11, 3.5574e-11, 2.0320e-10,\n",
       "            2.3380e-11, 3.2337e-11, 4.3973e-11, 7.8330e-11, 1.2000e-10, 1.2260e-10,\n",
       "            2.8700e-11, 3.0052e-11, 2.1684e-11, 3.6201e-11, 7.2983e-10, 4.8243e-11,\n",
       "            4.6399e-11, 7.8443e-11, 2.1884e-10, 1.7811e-11, 8.2163e-11, 4.0283e-11,\n",
       "            9.5731e-11, 4.5727e-11, 2.4263e-11, 1.5368e-11, 1.5044e-11, 1.5257e-11,\n",
       "            1.4438e-10, 1.2474e-10, 4.5676e-11, 3.0870e-10, 3.3449e-11, 2.3509e-10,\n",
       "            2.0976e-11, 6.5450e-11, 1.8457e-10, 5.7193e-11, 6.0681e-11, 4.6930e-11,\n",
       "            1.3580e-10, 7.7633e-11, 5.8743e-10, 5.4732e-11, 8.3264e-11, 1.4476e-11,\n",
       "            1.0490e-10, 2.3484e-11, 1.2760e-10, 6.3991e-11, 8.0376e-11, 1.4381e-10,\n",
       "            6.0082e-11, 4.4501e-11, 2.5560e-11, 4.7356e-11, 2.2084e-08, 1.1934e-10,\n",
       "            5.5120e-11, 2.3041e-11, 3.3394e-10, 8.9187e-11, 1.0240e-09, 4.9779e-11,\n",
       "            4.1569e-11, 8.3093e-12, 1.1182e-10, 2.5346e-10, 5.9891e-11, 2.4492e-11,\n",
       "            3.8562e-11, 9.2365e-11, 2.8180e-11, 2.5311e-11, 4.9746e-11, 4.8314e-11,\n",
       "            6.6762e-11, 1.0961e-10], device='cuda:0')},\n",
       "   70: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 7.7142e-07,  8.3660e-08,  1.4252e-06,  ...,  1.5045e-06,\n",
       "              8.0246e-07,  5.7018e-07],\n",
       "            [-1.8560e-07, -1.2941e-07, -2.2247e-07,  ..., -3.1669e-07,\n",
       "             -3.0609e-07, -6.6707e-08],\n",
       "            [ 2.1745e-07,  3.9520e-07,  5.6055e-08,  ...,  2.5458e-08,\n",
       "              2.0465e-07, -2.1138e-07],\n",
       "            ...,\n",
       "            [-4.1687e-07, -2.8848e-07, -3.2213e-07,  ..., -6.5557e-07,\n",
       "             -3.6094e-07, -1.1228e-07],\n",
       "            [ 1.2330e-07,  2.6166e-09, -5.0716e-08,  ...,  1.4040e-07,\n",
       "             -4.8316e-08, -9.4597e-09],\n",
       "            [-7.4005e-08, -3.0288e-07,  2.1926e-07,  ..., -1.7917e-07,\n",
       "              1.5312e-07,  2.7354e-07]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[8.7437e-13, 2.1105e-12, 3.3072e-12,  ..., 3.3045e-12, 2.5349e-12,\n",
       "             1.6767e-12],\n",
       "            [5.9991e-14, 2.3734e-13, 2.6101e-13,  ..., 1.4655e-13, 1.5822e-13,\n",
       "             1.2950e-13],\n",
       "            [9.6998e-14, 7.3279e-13, 3.3845e-13,  ..., 2.3870e-13, 3.4975e-13,\n",
       "             1.1246e-13],\n",
       "            ...,\n",
       "            [4.5870e-13, 4.1321e-13, 1.0665e-12,  ..., 1.4320e-12, 7.7402e-13,\n",
       "             3.6869e-13],\n",
       "            [2.0061e-13, 9.8903e-13, 8.2654e-13,  ..., 7.8368e-13, 5.4897e-13,\n",
       "             3.2755e-13],\n",
       "            [1.3299e-13, 2.0509e-12, 6.0303e-13,  ..., 3.2499e-13, 7.9496e-13,\n",
       "             2.1438e-13]], device='cuda:0')},\n",
       "   71: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 9.1572e-07,  5.9211e-07,  4.5069e-07,  ..., -1.1328e-06,\n",
       "              1.3390e-07,  4.9820e-07],\n",
       "            [ 1.5122e-07, -1.2950e-07, -1.2628e-07,  ...,  5.4066e-08,\n",
       "              1.5382e-07, -1.2514e-08],\n",
       "            [ 3.5951e-07, -3.5166e-07,  3.1455e-07,  ..., -9.9586e-08,\n",
       "              2.9834e-07,  2.5034e-07],\n",
       "            ...,\n",
       "            [-5.4948e-07, -1.1136e-07, -2.9070e-07,  ...,  4.7493e-07,\n",
       "              2.0953e-07, -4.4179e-07],\n",
       "            [-4.1228e-07, -6.4023e-08, -2.7025e-07,  ...,  3.9085e-07,\n",
       "              9.6129e-08, -2.0419e-08],\n",
       "            [-3.0534e-07,  2.1278e-07, -1.0995e-07,  ...,  1.4443e-08,\n",
       "             -4.5276e-08, -2.1644e-07]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[1.4066e-11, 1.5669e-12, 4.0357e-12,  ..., 6.2892e-12, 3.7322e-12,\n",
       "             3.5910e-12],\n",
       "            [7.4692e-13, 3.3409e-13, 3.9885e-13,  ..., 4.7242e-13, 3.9637e-13,\n",
       "             2.5880e-13],\n",
       "            [4.0753e-12, 3.2229e-13, 1.3975e-12,  ..., 8.4508e-13, 1.2668e-12,\n",
       "             1.4397e-12],\n",
       "            ...,\n",
       "            [2.7766e-12, 3.8747e-13, 1.0493e-12,  ..., 1.7973e-12, 8.6633e-13,\n",
       "             7.1384e-13],\n",
       "            [4.7530e-12, 7.0087e-13, 1.8139e-12,  ..., 2.1593e-12, 2.8087e-12,\n",
       "             1.4828e-12],\n",
       "            [1.3474e-11, 5.1309e-13, 2.1975e-12,  ..., 1.7929e-12, 4.3649e-12,\n",
       "             4.1868e-12]], device='cuda:0')},\n",
       "   72: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 1.7265e-10, -1.7339e-06,  3.4240e-07,  ...,  1.3385e-08,\n",
       "             -1.3593e-06, -7.8825e-07],\n",
       "            [ 9.7528e-07, -2.5704e-07,  1.9504e-06,  ...,  2.4137e-06,\n",
       "              5.0341e-07,  8.5641e-07],\n",
       "            [-1.4525e-06, -1.0151e-06, -5.1993e-07,  ..., -1.1221e-06,\n",
       "             -6.6799e-07,  2.3942e-07],\n",
       "            ...,\n",
       "            [ 7.9882e-07,  2.6671e-07,  1.0380e-06,  ...,  1.3111e-06,\n",
       "              1.3200e-06,  5.7920e-07],\n",
       "            [-7.4487e-08,  7.8260e-07, -9.5384e-07,  ...,  4.0090e-07,\n",
       "             -1.2995e-06, -1.1864e-06],\n",
       "            [-2.1817e-07, -5.2465e-07,  5.2589e-07,  ..., -1.7606e-07,\n",
       "             -2.8760e-07,  1.1472e-07]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[1.2578e-11, 1.5386e-10, 4.4266e-11,  ..., 2.3037e-11, 6.5917e-11,\n",
       "             1.4693e-11],\n",
       "            [1.1859e-12, 4.3407e-12, 3.7992e-12,  ..., 3.5833e-12, 4.8769e-12,\n",
       "             3.0056e-12],\n",
       "            [2.5286e-12, 3.4703e-12, 9.5751e-12,  ..., 9.1643e-12, 5.8474e-12,\n",
       "             3.2846e-12],\n",
       "            ...,\n",
       "            [2.3660e-12, 4.6817e-12, 7.3966e-12,  ..., 9.5646e-12, 6.1540e-12,\n",
       "             1.9646e-12],\n",
       "            [8.2836e-13, 3.3826e-12, 7.8671e-12,  ..., 3.2276e-12, 2.9555e-12,\n",
       "             1.8056e-12],\n",
       "            [2.1910e-12, 1.9805e-11, 7.7851e-12,  ..., 5.7075e-12, 1.0195e-11,\n",
       "             2.9581e-12]], device='cuda:0')},\n",
       "   73: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-6.5107e-07,  5.1478e-07, -2.1598e-06,  ...,  1.1043e-06,\n",
       "             -1.7345e-06, -6.3766e-07],\n",
       "            [ 1.4162e-06,  9.5360e-07,  6.0040e-07,  ..., -1.4731e-06,\n",
       "             -1.0408e-06,  1.4062e-07],\n",
       "            [-1.8860e-07, -1.3850e-06,  1.3318e-07,  ..., -1.8573e-07,\n",
       "              9.4403e-07, -2.4163e-06],\n",
       "            ...,\n",
       "            [ 2.1901e-06, -4.7666e-08,  1.0266e-06,  ..., -1.7174e-06,\n",
       "             -7.2697e-07,  6.6309e-08],\n",
       "            [-8.1959e-07, -5.7630e-07, -1.0923e-07,  ...,  6.7183e-07,\n",
       "              1.2083e-06,  2.2560e-07],\n",
       "            [-1.2300e-06,  3.2386e-08, -4.9400e-07,  ...,  4.4031e-07,\n",
       "             -2.1380e-07, -1.9459e-06]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[1.0598e-09, 2.8304e-11, 1.8994e-10,  ..., 1.3493e-10, 3.3813e-10,\n",
       "             2.8238e-10],\n",
       "            [2.5774e-11, 1.8864e-12, 7.0978e-12,  ..., 1.0671e-11, 8.0330e-12,\n",
       "             5.9502e-12],\n",
       "            [1.7421e-11, 3.3103e-12, 9.0282e-12,  ..., 1.2093e-11, 7.5609e-12,\n",
       "             4.6719e-12],\n",
       "            ...,\n",
       "            [3.5512e-11, 4.5760e-12, 1.3527e-11,  ..., 1.5902e-11, 1.2972e-11,\n",
       "             1.2990e-11],\n",
       "            [1.3758e-11, 5.0054e-12, 7.1540e-12,  ..., 7.5044e-12, 8.7502e-12,\n",
       "             4.0406e-12],\n",
       "            [1.3867e-10, 7.5317e-12, 3.0924e-11,  ..., 2.4767e-11, 4.8914e-11,\n",
       "             4.7235e-11]], device='cuda:0')},\n",
       "   74: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 1.2514e-06,  3.8497e-06, -6.6758e-07,  ..., -1.3436e-06,\n",
       "              4.1301e-06,  1.0856e-06],\n",
       "            [ 6.9404e-06, -1.9150e-06,  1.2228e-05,  ...,  1.1054e-05,\n",
       "              8.3057e-06,  7.0833e-06],\n",
       "            [-2.1705e-06,  5.7942e-07, -3.5930e-06,  ..., -2.9129e-06,\n",
       "             -2.0344e-06, -1.4141e-06],\n",
       "            ...,\n",
       "            [-3.6003e-07,  6.3622e-07, -3.4196e-06,  ..., -1.9166e-06,\n",
       "             -2.3465e-06, -1.9348e-06],\n",
       "            [ 7.4080e-07, -4.2452e-07,  1.1648e-06,  ..., -7.5818e-07,\n",
       "              2.1489e-06,  1.5326e-06],\n",
       "            [-1.5303e-06, -2.3958e-07, -1.9952e-06,  ..., -2.6468e-06,\n",
       "             -1.1080e-06, -5.0803e-07]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[4.9643e-11, 4.0995e-10, 2.7675e-10,  ..., 2.2783e-10, 1.6482e-10,\n",
       "             6.3432e-11],\n",
       "            [1.0357e-10, 7.3056e-11, 3.0210e-10,  ..., 3.4415e-10, 1.9943e-10,\n",
       "             1.0417e-10],\n",
       "            [6.6330e-12, 6.8708e-12, 2.8184e-11,  ..., 1.3311e-11, 8.5667e-12,\n",
       "             4.8478e-12],\n",
       "            ...,\n",
       "            [2.5593e-11, 5.3935e-11, 1.2344e-10,  ..., 7.4763e-11, 6.4153e-11,\n",
       "             3.4810e-11],\n",
       "            [1.4366e-11, 3.6981e-11, 1.1053e-10,  ..., 3.7746e-11, 2.8473e-11,\n",
       "             2.1918e-11],\n",
       "            [1.5469e-11, 8.8598e-11, 7.8788e-11,  ..., 3.7601e-11, 6.2973e-11,\n",
       "             2.8479e-11]], device='cuda:0')},\n",
       "   75: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-1.9357e-06, -3.9062e-07,  1.2581e-06,  ...,  2.4067e-06,\n",
       "              6.9449e-07,  4.5779e-07],\n",
       "            [ 4.2179e-06,  2.5042e-06,  4.6962e-06,  ..., -9.4932e-06,\n",
       "             -5.1132e-06,  2.6994e-07],\n",
       "            [ 1.5170e-07, -4.0269e-07, -6.3424e-07,  ...,  1.3535e-06,\n",
       "              1.4922e-06,  1.7038e-07],\n",
       "            ...,\n",
       "            [ 2.4246e-06, -1.5816e-06,  9.1613e-07,  ...,  1.6268e-06,\n",
       "              1.4779e-06,  1.0427e-06],\n",
       "            [-8.3127e-07,  6.5475e-07, -7.4707e-07,  ..., -5.5784e-07,\n",
       "             -7.9472e-07, -3.2220e-07],\n",
       "            [-2.3355e-07, -6.7552e-07,  2.2816e-07,  ...,  6.9632e-07,\n",
       "              6.4776e-07,  3.1414e-07]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[1.3889e-09, 2.8257e-11, 2.0636e-10,  ..., 1.2027e-10, 1.4112e-10,\n",
       "             1.2585e-10],\n",
       "            [7.4507e-11, 1.2331e-11, 1.5410e-10,  ..., 2.4077e-10, 8.2202e-11,\n",
       "             5.6347e-12],\n",
       "            [1.7232e-11, 1.4790e-12, 5.3749e-12,  ..., 6.5254e-12, 2.6444e-12,\n",
       "             1.5421e-12],\n",
       "            ...,\n",
       "            [2.0880e-10, 1.2750e-11, 5.7550e-11,  ..., 3.8494e-11, 1.6121e-11,\n",
       "             1.6460e-11],\n",
       "            [5.9318e-11, 7.3301e-12, 2.7808e-11,  ..., 2.4244e-11, 1.4557e-11,\n",
       "             4.5391e-12],\n",
       "            [3.6069e-10, 8.5239e-12, 5.2349e-11,  ..., 4.5803e-11, 2.3381e-11,\n",
       "             3.1882e-11]], device='cuda:0')},\n",
       "   76: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([ 3.0471e-07, -1.7484e-06,  6.2967e-06, -1.7857e-06,  2.0020e-06,\n",
       "             1.2673e-06,  2.8688e-07, -2.1300e-06, -1.6032e-06, -3.6668e-08,\n",
       "            -2.4635e-06,  1.1419e-06,  3.2020e-07,  7.4717e-07, -1.6616e-06,\n",
       "            -1.0506e-06, -1.2553e-06,  1.4342e-06,  5.0693e-07, -1.1592e-06,\n",
       "            -7.2101e-06, -8.8488e-07,  5.6561e-07,  1.3101e-07, -8.8017e-08,\n",
       "             1.8856e-06,  1.2328e-06, -1.2022e-06, -2.3917e-06,  1.0767e-06,\n",
       "             2.6717e-06,  6.7225e-06,  5.0222e-06,  6.7639e-07, -8.3402e-06,\n",
       "            -8.5425e-07, -2.3024e-06,  2.4337e-06,  2.7740e-07,  2.4383e-07,\n",
       "            -7.5644e-07, -1.8444e-06,  2.9056e-06, -7.3502e-08, -6.3073e-07,\n",
       "            -1.4852e-06,  9.9977e-07, -5.1280e-07, -2.8206e-06,  1.5593e-06,\n",
       "            -1.5110e-06,  6.8791e-06, -5.6585e-07,  5.6557e-06, -3.0524e-07,\n",
       "            -2.3395e-06, -1.0995e-07, -1.6149e-06, -1.7320e-06, -1.3326e-07,\n",
       "            -9.5037e-08,  1.1314e-06,  1.3743e-06,  1.3631e-06,  4.7412e-06,\n",
       "             6.8270e-07, -7.5564e-07, -4.5576e-06, -3.9556e-06, -4.4613e-06,\n",
       "             1.6276e-07, -2.0084e-06,  1.4660e-06, -1.1566e-06,  9.2606e-06,\n",
       "             6.3072e-07,  6.7583e-08,  4.8629e-06,  2.0285e-06,  1.6484e-06,\n",
       "            -4.3607e-07, -3.5998e-06, -8.6680e-07,  1.2240e-06,  5.7402e-06,\n",
       "             1.8131e-05,  8.0296e-06, -1.4285e-06,  1.0997e-05, -9.2854e-08,\n",
       "             8.1620e-07,  1.9262e-06,  2.5984e-06,  8.7931e-07, -6.3379e-07,\n",
       "             1.6496e-06, -4.7906e-07, -2.3250e-06,  2.9314e-06,  9.7050e-07,\n",
       "            -2.6184e-07, -1.0443e-06, -1.6587e-06,  1.0077e-06, -2.7536e-06,\n",
       "             6.7644e-06, -4.9770e-06,  1.0807e-06,  7.3464e-07, -3.2289e-06,\n",
       "             1.2337e-06, -2.9472e-07,  3.7074e-06, -3.9696e-06, -5.7933e-07,\n",
       "            -3.2944e-07, -7.7089e-07,  3.9138e-06, -5.4779e-08,  1.6328e-06,\n",
       "             5.0745e-07, -1.7306e-07, -1.3904e-07,  1.9395e-06, -1.7301e-06,\n",
       "            -1.1450e-06, -1.3182e-06, -5.0293e-07], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([4.4954e-10, 4.1223e-11, 2.3048e-10, 3.5527e-11, 2.4368e-10, 1.2570e-10,\n",
       "            3.4411e-11, 2.4964e-11, 1.4644e-10, 1.5004e-11, 2.2517e-10, 8.3766e-11,\n",
       "            1.1145e-10, 1.5029e-10, 6.0436e-11, 1.1408e-10, 2.0484e-11, 4.4244e-11,\n",
       "            1.0771e-10, 6.1132e-11, 2.4127e-09, 4.5805e-10, 1.8217e-11, 8.1460e-11,\n",
       "            7.0635e-11, 1.0522e-10, 1.1987e-10, 6.7438e-11, 5.2321e-11, 1.0295e-10,\n",
       "            6.9059e-11, 1.1884e-09, 2.5598e-10, 3.2900e-11, 1.5309e-10, 3.0576e-11,\n",
       "            1.4655e-10, 1.0369e-10, 9.3875e-11, 7.8517e-11, 7.9905e-11, 2.3978e-10,\n",
       "            4.8502e-10, 1.7339e-11, 7.7389e-11, 1.7967e-11, 7.8331e-11, 7.4766e-11,\n",
       "            1.1293e-10, 1.8676e-11, 1.0981e-10, 1.5965e-10, 1.6718e-11, 1.5156e-10,\n",
       "            2.2953e-11, 3.6573e-11, 2.4177e-11, 5.0037e-11, 1.3469e-10, 2.0769e-11,\n",
       "            1.6677e-10, 5.8193e-11, 1.5599e-11, 7.8943e-11, 3.0144e-09, 1.4753e-10,\n",
       "            3.9396e-11, 1.4087e-10, 9.4403e-10, 1.7703e-10, 7.5796e-11, 2.0559e-10,\n",
       "            6.2955e-11, 4.0780e-10, 2.5672e-10, 1.0915e-10, 4.8878e-11, 1.5282e-10,\n",
       "            1.6939e-10, 6.6051e-11, 6.4496e-11, 1.0783e-10, 2.6959e-11, 7.3328e-10,\n",
       "            8.4289e-10, 1.8648e-09, 1.6497e-10, 9.3987e-11, 3.3330e-10, 2.6522e-11,\n",
       "            5.2251e-11, 2.4162e-10, 1.2143e-10, 3.6997e-11, 2.4555e-11, 4.2535e-11,\n",
       "            1.2491e-10, 1.5862e-10, 1.0310e-10, 9.0310e-11, 9.9477e-11, 5.9827e-11,\n",
       "            7.8582e-11, 2.9896e-10, 1.3150e-10, 2.9746e-10, 1.4619e-08, 3.1077e-11,\n",
       "            6.8370e-11, 5.5786e-11, 2.4214e-10, 3.9817e-11, 5.6544e-10, 1.1837e-10,\n",
       "            3.3491e-11, 1.6356e-11, 3.1592e-11, 2.0283e-10, 8.0390e-11, 4.7823e-11,\n",
       "            1.3010e-10, 2.3361e-11, 8.5458e-11, 1.9514e-10, 5.5418e-10, 9.6248e-11,\n",
       "            5.8656e-11, 4.2783e-11], device='cuda:0')},\n",
       "   77: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 1.0194e-08,  2.7288e-08,  6.1854e-07,  ...,  7.6121e-08,\n",
       "              1.4312e-07,  2.3538e-07],\n",
       "            [ 8.3954e-08,  2.5452e-08, -3.6454e-07,  ..., -5.4773e-08,\n",
       "             -2.5708e-08,  9.1852e-08],\n",
       "            [ 2.2125e-08,  8.1579e-08, -9.3496e-08,  ..., -5.4246e-09,\n",
       "             -3.6881e-08,  1.9275e-07],\n",
       "            ...,\n",
       "            [ 8.2780e-09,  1.8319e-08,  2.6455e-07,  ...,  6.9840e-08,\n",
       "              1.9441e-07,  1.3713e-07],\n",
       "            [-4.6329e-08, -4.2773e-08, -2.5091e-08,  ..., -3.4091e-08,\n",
       "             -5.8260e-08, -3.8749e-08],\n",
       "            [ 5.6568e-08, -5.3281e-08,  7.3539e-08,  ..., -1.1533e-08,\n",
       "             -1.8979e-08, -2.0187e-08]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[7.7674e-14, 4.9427e-14, 5.9789e-13,  ..., 3.9155e-14, 4.7049e-13,\n",
       "             2.2722e-13],\n",
       "            [1.3825e-13, 2.9293e-14, 2.8858e-13,  ..., 3.9670e-14, 1.0003e-12,\n",
       "             1.8606e-13],\n",
       "            [3.0333e-14, 2.4211e-14, 3.4432e-14,  ..., 1.8011e-14, 4.8043e-13,\n",
       "             9.3358e-14],\n",
       "            ...,\n",
       "            [1.6157e-14, 3.6391e-15, 2.7534e-13,  ..., 8.6594e-15, 4.8961e-13,\n",
       "             3.8173e-13],\n",
       "            [3.2105e-14, 1.4787e-14, 6.6975e-14,  ..., 2.1782e-14, 4.8757e-13,\n",
       "             1.6182e-13],\n",
       "            [2.1998e-14, 6.2989e-15, 3.2010e-14,  ..., 5.6166e-15, 6.5853e-14,\n",
       "             4.6395e-13]], device='cuda:0')},\n",
       "   78: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 1.0420e-07, -2.6389e-07,  9.7955e-07,  ...,  1.5538e-07,\n",
       "              7.4012e-07, -1.1449e-07],\n",
       "            [ 2.3085e-07, -2.7219e-07,  6.9858e-09,  ..., -2.0769e-08,\n",
       "             -7.2376e-08, -5.3724e-08],\n",
       "            [ 2.0450e-07,  8.2108e-09, -1.0787e-07,  ...,  1.3001e-07,\n",
       "              1.3945e-07,  4.7555e-07],\n",
       "            ...,\n",
       "            [-3.9670e-07,  2.0890e-07,  6.8439e-07,  ...,  5.0372e-07,\n",
       "              3.1726e-07,  2.3522e-07],\n",
       "            [-3.4749e-07,  1.7131e-07, -7.5853e-09,  ...,  2.4788e-07,\n",
       "             -7.4802e-08,  2.5102e-07],\n",
       "            [-5.4210e-08,  3.8201e-08,  6.1049e-08,  ..., -3.5569e-08,\n",
       "              1.6175e-07, -2.2563e-08]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[6.9704e-12, 1.4587e-12, 1.1243e-12,  ..., 1.8790e-12, 2.3477e-12,\n",
       "             1.4091e-12],\n",
       "            [4.5651e-12, 2.8682e-12, 1.5017e-12,  ..., 1.2683e-12, 2.2901e-12,\n",
       "             1.4919e-12],\n",
       "            [1.3807e-12, 7.2738e-13, 1.4908e-12,  ..., 4.4402e-13, 1.2072e-12,\n",
       "             4.5398e-13],\n",
       "            ...,\n",
       "            [1.1386e-12, 5.7322e-13, 3.0960e-13,  ..., 7.9831e-13, 4.3271e-13,\n",
       "             2.4404e-13],\n",
       "            [1.6300e-12, 6.0097e-13, 1.1892e-12,  ..., 5.4345e-13, 1.3857e-12,\n",
       "             3.3469e-13],\n",
       "            [4.1949e-13, 1.8498e-13, 1.3859e-13,  ..., 1.3817e-13, 1.8943e-13,\n",
       "             2.5054e-13]], device='cuda:0')},\n",
       "   79: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-1.0021e-07, -2.8292e-07, -3.2747e-07,  ...,  3.9334e-08,\n",
       "             -2.8962e-07, -5.5127e-07],\n",
       "            [ 1.3905e-08, -3.7967e-08,  1.3667e-06,  ...,  8.0813e-08,\n",
       "              1.2606e-07, -7.7990e-09],\n",
       "            [ 9.8439e-08,  1.1965e-07,  3.3369e-07,  ...,  3.1027e-08,\n",
       "             -1.3850e-06, -2.9304e-08],\n",
       "            ...,\n",
       "            [ 6.5382e-08, -1.1355e-07, -4.0876e-08,  ...,  1.3954e-07,\n",
       "             -1.9046e-07, -3.8834e-07],\n",
       "            [-6.7722e-08, -2.1505e-07,  1.3356e-06,  ...,  8.4131e-08,\n",
       "              1.7241e-07, -5.7055e-08],\n",
       "            [-2.8518e-09, -3.6838e-08, -9.0191e-07,  ..., -2.8010e-07,\n",
       "             -3.4553e-07,  6.6360e-08]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[2.2963e-12, 2.1935e-12, 5.9325e-12,  ..., 2.6869e-12, 1.2213e-11,\n",
       "             2.9129e-12],\n",
       "            [2.9748e-13, 4.3310e-14, 5.5230e-12,  ..., 5.8880e-14, 1.7229e-12,\n",
       "             3.1372e-13],\n",
       "            [2.8065e-12, 9.5975e-13, 2.7231e-12,  ..., 1.1296e-12, 2.0352e-11,\n",
       "             3.8279e-12],\n",
       "            ...,\n",
       "            [3.6558e-12, 2.3145e-13, 1.0130e-11,  ..., 4.8806e-13, 5.7836e-12,\n",
       "             9.1458e-12],\n",
       "            [5.4417e-13, 2.8196e-13, 6.8291e-12,  ..., 9.9959e-14, 8.6793e-12,\n",
       "             1.5275e-12],\n",
       "            [1.2427e-13, 1.5780e-13, 1.1527e-12,  ..., 2.0042e-13, 1.2563e-11,\n",
       "             2.4418e-12]], device='cuda:0')},\n",
       "   80: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-1.2642e-06, -3.0127e-07, -2.7808e-06,  ...,  2.7072e-06,\n",
       "             -1.3661e-06,  7.2766e-07],\n",
       "            [-1.8727e-06,  8.1158e-08,  5.6215e-07,  ...,  1.0496e-06,\n",
       "              4.5334e-07,  1.4375e-06],\n",
       "            [-9.6933e-08, -2.0026e-06, -5.8215e-06,  ..., -4.1993e-06,\n",
       "             -3.2740e-06,  1.5218e-06],\n",
       "            ...,\n",
       "            [-2.0839e-07,  7.9298e-07,  1.0368e-08,  ...,  8.4300e-08,\n",
       "             -5.0082e-07,  1.9375e-06],\n",
       "            [-1.2834e-06,  6.9744e-07,  5.8988e-07,  ...,  1.2530e-06,\n",
       "              1.0449e-06,  2.5212e-08],\n",
       "            [ 2.4723e-07, -3.0719e-07, -9.3866e-07,  ...,  7.9440e-07,\n",
       "             -4.6920e-07,  7.1744e-07]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[1.8406e-10, 5.6573e-11, 8.7117e-11,  ..., 3.4239e-11, 1.5838e-10,\n",
       "             4.0920e-11],\n",
       "            [2.4310e-11, 5.6061e-12, 2.9902e-12,  ..., 9.5346e-12, 4.6053e-12,\n",
       "             2.2987e-12],\n",
       "            [5.6689e-11, 2.9801e-11, 5.9512e-11,  ..., 2.0395e-11, 4.8168e-11,\n",
       "             1.3489e-11],\n",
       "            ...,\n",
       "            [5.6210e-11, 1.4524e-11, 4.9782e-12,  ..., 2.5051e-11, 1.5532e-11,\n",
       "             9.5826e-12],\n",
       "            [2.5092e-11, 8.4915e-12, 7.5662e-12,  ..., 1.0995e-11, 8.1282e-12,\n",
       "             5.8052e-12],\n",
       "            [1.1576e-11, 8.4622e-12, 1.0141e-11,  ..., 7.6379e-12, 7.9568e-12,\n",
       "             2.6849e-12]], device='cuda:0')},\n",
       "   81: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 3.1746e-07,  4.0279e-07, -1.0203e-06,  ..., -2.1711e-07,\n",
       "             -1.8662e-06, -3.1507e-07],\n",
       "            [-1.4821e-07,  2.5752e-07,  3.7357e-07,  ...,  4.9646e-07,\n",
       "              7.2778e-07, -5.0608e-07],\n",
       "            [-1.4155e-07, -1.3341e-07,  2.3647e-06,  ...,  6.0216e-07,\n",
       "              6.2633e-07, -8.1591e-07],\n",
       "            ...,\n",
       "            [ 2.0149e-08, -2.0678e-07,  1.5612e-08,  ..., -1.7451e-07,\n",
       "              1.3198e-07, -1.0525e-07],\n",
       "            [ 4.4462e-07,  1.2652e-06, -1.1582e-06,  ..., -3.1265e-07,\n",
       "             -1.2985e-06,  5.1930e-06],\n",
       "            [-1.5502e-07, -4.1502e-07,  2.4026e-06,  ..., -1.5735e-06,\n",
       "             -2.8658e-06, -1.9841e-07]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[7.1132e-11, 4.4953e-12, 2.1634e-11,  ..., 2.4617e-12, 1.2279e-10,\n",
       "             8.7501e-11],\n",
       "            [4.7152e-11, 1.2506e-11, 2.8575e-11,  ..., 1.1890e-11, 1.3421e-10,\n",
       "             1.1865e-10],\n",
       "            [1.8719e-11, 5.0902e-13, 9.2743e-12,  ..., 1.0745e-12, 4.8465e-11,\n",
       "             2.6940e-11],\n",
       "            ...,\n",
       "            [3.5696e-11, 2.9064e-12, 1.1374e-11,  ..., 4.5480e-12, 3.3373e-11,\n",
       "             5.4304e-11],\n",
       "            [2.0088e-12, 6.9343e-12, 4.7290e-12,  ..., 1.0748e-12, 3.0889e-11,\n",
       "             1.5447e-10],\n",
       "            [7.1785e-11, 1.1532e-11, 1.6423e-10,  ..., 1.9262e-11, 1.7421e-10,\n",
       "             2.2096e-10]], device='cuda:0')},\n",
       "   82: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 9.6324e-07, -1.4253e-06, -6.5026e-07,  ..., -2.0942e-07,\n",
       "              6.5442e-07,  9.4177e-07],\n",
       "            [ 1.5414e-06,  8.4648e-07,  1.4393e-06,  ..., -5.6109e-07,\n",
       "             -2.8950e-07, -1.0967e-07],\n",
       "            [-8.0559e-07,  5.0520e-07, -3.0417e-07,  ..., -2.3213e-07,\n",
       "             -7.1828e-07,  1.1814e-07],\n",
       "            ...,\n",
       "            [ 2.5239e-07,  4.7997e-07,  1.7105e-07,  ...,  1.1309e-07,\n",
       "             -1.3920e-07, -8.5111e-09],\n",
       "            [ 2.3163e-06, -2.6617e-06, -3.2504e-07,  ...,  1.4029e-07,\n",
       "              2.8489e-06,  2.4476e-06],\n",
       "            [-3.4091e-06,  2.1391e-06,  5.2965e-07,  ...,  1.3658e-06,\n",
       "              3.5357e-06,  1.0106e-06]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[1.6224e-10, 3.3784e-11, 1.2800e-11,  ..., 4.5182e-11, 9.8838e-11,\n",
       "             8.5882e-12],\n",
       "            [2.8381e-10, 5.0247e-11, 6.4098e-11,  ..., 4.1989e-11, 3.6929e-11,\n",
       "             1.1300e-11],\n",
       "            [1.6019e-11, 1.0017e-11, 1.6406e-12,  ..., 1.2819e-11, 5.4842e-12,\n",
       "             2.2266e-12],\n",
       "            ...,\n",
       "            [5.0846e-11, 8.8967e-12, 1.2468e-11,  ..., 1.1566e-11, 6.5200e-12,\n",
       "             3.7772e-12],\n",
       "            [3.9824e-11, 1.1998e-11, 7.4402e-12,  ..., 2.2129e-11, 2.1029e-11,\n",
       "             1.3835e-11],\n",
       "            [2.1920e-10, 6.4084e-11, 6.6623e-11,  ..., 1.2991e-10, 5.1714e-11,\n",
       "             1.3049e-11]], device='cuda:0')},\n",
       "   83: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([-1.9829e-06, -5.9525e-08,  1.3214e-06, -1.1816e-07, -3.0310e-06,\n",
       "            -1.5564e-06, -9.6572e-07, -5.8609e-07, -1.1531e-07, -5.1736e-07,\n",
       "            -3.8504e-07,  1.0303e-06, -1.9550e-07, -4.5050e-07,  1.6176e-06,\n",
       "             4.5759e-08,  8.9554e-07, -3.7766e-07,  5.7137e-07, -5.1097e-07,\n",
       "            -8.7606e-07, -4.8532e-07,  1.2791e-06, -1.0884e-07,  3.3753e-07,\n",
       "            -1.4081e-06, -1.4588e-06, -9.6525e-07, -9.0132e-08, -2.1520e-06,\n",
       "            -2.1051e-06,  8.4768e-07, -9.6111e-07, -5.2123e-07,  9.5889e-07,\n",
       "            -4.1550e-09, -1.2439e-07, -8.0034e-07, -7.2263e-07, -1.5878e-07,\n",
       "             6.1302e-07,  4.0937e-07, -1.0797e-06,  4.8119e-07, -1.8042e-07,\n",
       "             1.4609e-06,  1.3201e-06,  1.1061e-06, -5.3654e-07,  1.7402e-07,\n",
       "            -4.1155e-07, -1.9969e-06,  6.1174e-07, -6.0333e-08, -7.2209e-07,\n",
       "            -3.7063e-07, -6.1559e-07,  7.0626e-08, -1.6609e-07,  6.4881e-07,\n",
       "            -4.6200e-07, -1.5506e-06, -4.1096e-08,  4.5711e-07, -5.0917e-07,\n",
       "            -2.7606e-07, -2.0460e-06,  3.4138e-07, -6.9012e-07, -6.1948e-07,\n",
       "            -7.7085e-07, -7.4150e-07, -4.7064e-07, -5.8544e-07, -1.1175e-07,\n",
       "             4.4252e-07,  6.5031e-07, -3.8552e-07,  1.8711e-07, -1.8247e-06,\n",
       "             5.5100e-07,  2.0545e-08,  4.8667e-07, -3.2917e-08,  3.6045e-07,\n",
       "            -3.4929e-07, -3.9778e-07, -1.1596e-06,  3.7293e-08, -9.2117e-07,\n",
       "             4.6654e-07, -4.8663e-07,  1.7646e-06,  4.1996e-09, -4.6323e-07,\n",
       "             9.3146e-07, -3.4386e-07,  1.1625e-06, -3.6879e-07, -4.0410e-07,\n",
       "             3.0523e-07, -1.5994e-06,  4.7905e-08, -9.9345e-07, -5.8031e-07,\n",
       "            -5.8484e-07,  1.6877e-05,  6.2748e-08,  6.5516e-08,  1.0733e-06,\n",
       "             2.1439e-06, -3.5328e-07,  7.9557e-06,  9.6283e-09,  1.1124e-06,\n",
       "            -1.0239e-06, -3.2952e-06, -1.2909e-06, -3.1658e-07, -6.9099e-07,\n",
       "            -1.4105e-07, -6.3269e-07,  1.2963e-06, -4.6105e-07, -7.0128e-07,\n",
       "            -8.2912e-07,  2.3416e-07,  2.1199e-07], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([1.2091e-11, 9.6444e-12, 8.2941e-12, 4.9420e-12, 6.4864e-11, 2.0472e-11,\n",
       "            7.5500e-12, 1.7882e-12, 3.6317e-12, 6.0647e-13, 2.4893e-12, 3.0371e-12,\n",
       "            1.2482e-11, 2.0212e-12, 3.9651e-11, 8.5020e-13, 8.1905e-12, 6.4558e-12,\n",
       "            1.8775e-12, 1.1956e-11, 5.6324e-11, 5.5795e-12, 5.3542e-11, 8.8287e-13,\n",
       "            4.9903e-11, 4.3262e-12, 3.1769e-11, 1.6185e-11, 4.2663e-12, 2.6665e-11,\n",
       "            1.8264e-11, 1.1139e-09, 5.3516e-12, 4.6172e-12, 2.8597e-12, 1.4095e-12,\n",
       "            8.5127e-13, 1.7765e-11, 2.5294e-12, 1.2615e-11, 1.3870e-12, 1.3916e-11,\n",
       "            2.7989e-12, 1.1739e-11, 3.6295e-12, 4.9380e-12, 1.2268e-11, 2.9166e-12,\n",
       "            3.8946e-12, 1.4659e-12, 6.2621e-13, 2.5758e-11, 1.1531e-11, 8.0912e-12,\n",
       "            3.9489e-12, 5.8081e-12, 7.7909e-13, 1.8213e-11, 3.5744e-12, 1.3858e-11,\n",
       "            2.6529e-12, 9.9972e-12, 6.2804e-12, 1.4417e-12, 6.5041e-12, 1.9709e-12,\n",
       "            5.1732e-12, 6.1904e-12, 1.1152e-12, 2.5416e-12, 1.2088e-12, 4.9383e-12,\n",
       "            1.1143e-12, 1.2678e-12, 1.2907e-12, 1.0050e-11, 3.7410e-12, 1.1262e-11,\n",
       "            1.5192e-12, 4.1869e-12, 2.6097e-12, 4.2719e-12, 1.0741e-11, 5.8172e-12,\n",
       "            6.0803e-12, 5.2267e-12, 5.4529e-12, 5.8282e-11, 3.2803e-12, 3.2349e-12,\n",
       "            5.3715e-12, 6.8798e-13, 1.1958e-11, 4.8067e-12, 7.3755e-13, 7.8178e-12,\n",
       "            2.8251e-12, 1.6792e-12, 2.7494e-12, 1.9527e-12, 4.6218e-12, 5.1544e-12,\n",
       "            4.6242e-12, 1.3275e-11, 6.2618e-12, 7.5979e-12, 3.5355e-09, 1.5830e-11,\n",
       "            1.0519e-12, 1.1943e-11, 3.2721e-11, 1.2742e-11, 3.0291e-10, 9.4657e-12,\n",
       "            8.0984e-12, 3.9415e-12, 1.5568e-11, 4.5629e-12, 4.4490e-11, 1.2696e-11,\n",
       "            3.7383e-12, 2.0682e-12, 1.3975e-11, 6.9895e-12, 2.2279e-11, 2.3936e-12,\n",
       "            1.8893e-12, 2.3127e-12], device='cuda:0')},\n",
       "   84: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([ 2.5768e-07, -6.0501e-07, -7.3297e-07, -2.1879e-07, -3.8471e-07,\n",
       "             1.3479e-07,  4.3946e-07, -2.3226e-07,  4.8968e-07,  9.3374e-08,\n",
       "            -1.1492e-07, -1.2230e-07, -2.4187e-07, -4.3589e-07, -1.4515e-06,\n",
       "             2.4675e-07, -1.3669e-07,  3.2275e-07, -5.0921e-07,  8.2181e-08,\n",
       "             3.1284e-07,  4.3946e-08, -6.9357e-07, -2.0160e-07, -5.5740e-07,\n",
       "            -1.0935e-08, -1.4397e-07, -1.6394e-07,  2.3168e-07, -5.8377e-07,\n",
       "             3.3323e-07,  1.0073e-06, -2.6194e-07, -1.9124e-07,  4.8748e-08,\n",
       "             1.7390e-07,  1.2022e-07, -2.6999e-07,  7.2561e-08, -7.6655e-07,\n",
       "             4.2055e-08,  1.7986e-08,  1.8119e-07,  6.2975e-07, -7.2163e-09,\n",
       "             5.7701e-07,  5.9498e-07, -2.8923e-07, -3.4098e-07, -9.6362e-08,\n",
       "            -7.1685e-07, -1.0469e-08, -5.7421e-07, -5.2349e-07,  3.1085e-07,\n",
       "             3.9034e-07, -2.4128e-07, -4.3481e-07, -6.8107e-08,  9.9082e-08,\n",
       "            -1.0631e-06,  2.7521e-07,  5.1262e-07, -4.2944e-07, -7.4797e-08,\n",
       "            -5.0044e-08, -2.2981e-07,  4.1510e-07, -5.1245e-07, -1.9813e-07,\n",
       "            -1.3794e-07,  1.5642e-08, -9.6509e-08,  2.1123e-07,  1.9503e-07,\n",
       "            -4.5822e-07, -7.1853e-07,  5.5817e-07, -1.9977e-07,  1.0167e-07,\n",
       "             1.7302e-07,  2.7431e-07, -1.6481e-07, -4.3052e-07, -3.0996e-07,\n",
       "             3.6394e-07,  6.8116e-08,  6.9084e-07, -4.2871e-07, -2.5124e-07,\n",
       "            -3.5668e-07, -3.7609e-07, -5.3230e-07, -1.9797e-07, -1.2458e-07,\n",
       "            -3.9846e-07, -2.3015e-07, -5.6799e-07, -5.1733e-09, -8.3595e-08,\n",
       "            -2.9661e-07,  1.5410e-07,  5.3850e-07,  1.4715e-07, -1.0722e-07,\n",
       "            -2.0283e-08,  1.1453e-06, -1.3559e-07, -7.0297e-08, -6.7696e-07,\n",
       "            -3.5046e-07,  5.8197e-08,  1.6635e-07, -2.5825e-07,  1.0356e-06,\n",
       "             4.0309e-07,  1.4138e-07,  2.0225e-07,  5.2993e-07, -5.5778e-08,\n",
       "             2.3419e-07, -2.6552e-07,  6.8106e-07, -1.6415e-07,  1.3865e-07,\n",
       "             1.7756e-07,  1.6304e-07, -2.1016e-07], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([6.8194e-12, 7.0636e-11, 3.0286e-11, 1.3323e-11, 2.2819e-11, 3.9262e-11,\n",
       "            1.2855e-11, 3.9533e-12, 1.7168e-11, 2.6730e-12, 6.3554e-12, 1.8981e-11,\n",
       "            5.5469e-11, 1.5981e-11, 1.1324e-10, 3.0638e-12, 2.0004e-11, 2.3730e-11,\n",
       "            1.4258e-11, 1.8777e-11, 1.9718e-11, 3.4582e-11, 8.1492e-11, 9.8781e-12,\n",
       "            5.4786e-11, 6.2765e-12, 8.7223e-11, 1.9276e-11, 6.8401e-12, 9.4649e-12,\n",
       "            3.3193e-11, 1.7460e-10, 1.5415e-11, 7.5088e-12, 1.3110e-11, 1.3109e-11,\n",
       "            4.7000e-12, 4.4856e-11, 1.7857e-11, 2.4619e-11, 7.0990e-12, 2.2059e-11,\n",
       "            2.0101e-11, 6.9970e-11, 6.7759e-12, 2.7242e-11, 4.4530e-11, 1.4480e-11,\n",
       "            7.6280e-12, 1.3071e-11, 1.4946e-11, 9.5155e-11, 1.9417e-11, 8.2523e-12,\n",
       "            5.3382e-12, 1.5996e-11, 2.4224e-12, 2.6528e-11, 2.5524e-11, 1.1526e-11,\n",
       "            3.5672e-11, 8.6927e-12, 2.6036e-11, 3.3128e-12, 1.3853e-11, 1.0960e-11,\n",
       "            6.1792e-12, 9.1685e-12, 4.8509e-12, 1.3554e-11, 5.2559e-12, 8.5271e-12,\n",
       "            1.5494e-11, 6.4088e-12, 7.6288e-12, 3.6412e-11, 1.9274e-11, 3.0724e-11,\n",
       "            1.4200e-11, 7.1455e-12, 4.1357e-12, 2.7110e-11, 2.9981e-11, 5.0028e-12,\n",
       "            3.0981e-11, 2.5299e-11, 9.6591e-12, 6.3455e-12, 3.4996e-12, 1.1652e-11,\n",
       "            1.6293e-11, 2.5035e-12, 2.4881e-11, 9.0799e-12, 5.0619e-12, 3.3542e-11,\n",
       "            1.8511e-11, 1.0970e-11, 1.7252e-11, 1.0341e-11, 1.0801e-11, 1.3120e-11,\n",
       "            5.4477e-11, 2.8354e-11, 1.7243e-11, 2.1345e-11, 9.0016e-11, 3.2717e-11,\n",
       "            1.2035e-11, 9.4018e-11, 5.8363e-12, 2.7695e-11, 5.8451e-11, 3.0364e-11,\n",
       "            1.8399e-11, 8.2624e-12, 2.0842e-11, 1.2059e-11, 3.6891e-11, 2.5501e-11,\n",
       "            2.7804e-11, 2.2082e-11, 2.8441e-11, 8.3629e-12, 4.1199e-11, 5.2852e-12,\n",
       "            4.4334e-12, 1.0252e-11], device='cuda:0')},\n",
       "   85: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([-4.2957e-06,  2.5442e-07,  2.0960e-06,  1.9252e-06, -1.1787e-06,\n",
       "            -4.3374e-06, -3.2830e-06,  3.6935e-06,  9.4312e-07, -3.5953e-06,\n",
       "             4.1561e-07,  2.0337e-06,  1.3328e-06, -3.3332e-06,  1.5944e-06,\n",
       "             9.4336e-07, -2.1372e-06,  9.7018e-07,  1.5562e-06, -2.0318e-06,\n",
       "            -1.6545e-06, -2.1527e-07, -1.0321e-06,  7.1670e-07, -6.1753e-06,\n",
       "            -2.0009e-06, -1.8091e-06, -2.5047e-06,  1.7534e-06, -9.9026e-06,\n",
       "            -1.7951e-06,  1.2491e-05, -1.3434e-06, -2.9676e-06, -3.3862e-06,\n",
       "            -1.4274e-06, -1.1132e-06,  1.1274e-06, -2.2608e-06,  6.5186e-06,\n",
       "            -1.5017e-06,  2.5559e-06, -1.7756e-06,  6.0749e-09,  1.4030e-06,\n",
       "            -8.3685e-07,  5.0938e-06,  1.6605e-06,  1.4575e-06,  6.6061e-07,\n",
       "            -1.3501e-06,  1.6680e-06,  8.4265e-07, -2.4316e-06,  1.5270e-06,\n",
       "            -1.1480e-06,  3.4974e-06, -8.8553e-07, -1.1765e-06,  7.3243e-07,\n",
       "             6.2060e-07,  7.4803e-07, -5.0626e-07,  1.3141e-06,  4.6540e-07,\n",
       "            -5.5531e-07,  3.2335e-06, -2.5297e-07, -1.5484e-06,  7.8292e-07,\n",
       "             3.2131e-06, -2.5809e-06,  1.4668e-06,  3.9242e-06,  9.3053e-07,\n",
       "            -8.1183e-08,  1.7225e-06,  3.9825e-06, -2.1165e-06, -7.8887e-07,\n",
       "             2.4143e-06,  1.1953e-06, -1.9857e-06, -4.0152e-06, -2.2800e-07,\n",
       "            -3.9138e-06, -2.1565e-06,  1.0621e-07,  2.0453e-06, -8.5019e-07,\n",
       "            -9.6733e-07, -3.0198e-07,  2.7979e-06, -9.0755e-07,  5.4404e-06,\n",
       "             3.2857e-06, -2.5824e-06, -2.2657e-07,  5.4443e-06,  3.5383e-07,\n",
       "            -9.2898e-07, -1.3781e-07,  3.9103e-06, -1.3341e-06, -4.2925e-07,\n",
       "            -1.9885e-06, -1.8352e-05, -3.0788e-06,  8.0820e-07, -7.4141e-07,\n",
       "            -1.6393e-06, -2.2407e-07,  6.5841e-06,  1.1890e-06, -6.2367e-07,\n",
       "             2.1620e-06,  1.8105e-06,  3.3117e-08, -4.8088e-06, -4.0000e-07,\n",
       "            -2.1985e-06,  3.2025e-06, -9.1652e-07,  2.5621e-06, -3.3530e-06,\n",
       "             9.7920e-07,  6.7202e-07,  2.3395e-06], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([5.4670e-10, 5.0280e-11, 1.3220e-10, 7.5634e-11, 1.2466e-10, 2.1494e-10,\n",
       "            1.7365e-10, 1.4391e-10, 1.3399e-10, 7.5701e-11, 5.4162e-11, 1.0537e-10,\n",
       "            1.2606e-10, 1.3291e-10, 9.1597e-11, 2.5748e-10, 3.5453e-11, 1.2431e-10,\n",
       "            1.5492e-10, 1.9899e-10, 7.8254e-10, 8.6632e-11, 3.4131e-10, 2.8420e-10,\n",
       "            4.4209e-10, 6.0954e-11, 8.8534e-11, 2.2043e-10, 1.4068e-10, 1.6028e-09,\n",
       "            2.0388e-10, 1.6885e-09, 9.5349e-11, 8.3507e-11, 8.8132e-11, 8.2224e-11,\n",
       "            1.3370e-10, 1.0535e-10, 6.4749e-11, 1.1513e-10, 9.5128e-11, 1.4251e-10,\n",
       "            1.4324e-10, 9.1724e-11, 4.6285e-10, 8.7759e-11, 1.5269e-10, 3.1323e-10,\n",
       "            1.6554e-10, 1.2361e-10, 3.3078e-11, 1.0814e-10, 7.7919e-11, 2.4747e-10,\n",
       "            3.5213e-10, 6.5044e-11, 2.0085e-10, 1.1954e-10, 5.9931e-11, 3.6384e-11,\n",
       "            3.5906e-10, 4.0377e-10, 9.9903e-11, 7.1495e-11, 4.2268e-10, 6.0523e-11,\n",
       "            4.2894e-10, 3.3286e-11, 1.2106e-10, 5.3276e-11, 2.0251e-10, 2.8588e-10,\n",
       "            1.1784e-10, 1.3058e-10, 7.2748e-11, 1.3499e-10, 2.3667e-10, 5.7822e-11,\n",
       "            5.6699e-11, 1.3184e-10, 1.6526e-10, 1.2960e-10, 1.2049e-10, 2.0727e-10,\n",
       "            1.3186e-10, 1.5981e-10, 4.0154e-10, 5.2077e-10, 6.6301e-11, 9.0807e-11,\n",
       "            1.6983e-10, 7.3757e-11, 3.0738e-10, 7.9331e-11, 4.1828e-10, 1.9453e-10,\n",
       "            1.2464e-10, 3.4058e-10, 2.6337e-10, 1.0973e-10, 1.0149e-09, 1.6615e-10,\n",
       "            4.8225e-11, 3.4246e-10, 2.8128e-10, 6.3120e-11, 2.1120e-08, 1.1310e-10,\n",
       "            6.3483e-11, 3.8327e-11, 1.8187e-10, 1.2131e-10, 3.9806e-09, 1.1907e-10,\n",
       "            6.4225e-11, 9.8192e-11, 1.4052e-10, 1.2892e-10, 2.6979e-10, 7.1166e-11,\n",
       "            2.1209e-10, 6.7072e-11, 7.1367e-11, 1.0520e-10, 1.2880e-10, 5.3611e-11,\n",
       "            2.3832e-10, 2.4895e-11], device='cuda:0')},\n",
       "   86: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([ 6.9455e-06, -5.1973e-08, -2.7738e-06,  1.2569e-06,  1.4441e-06,\n",
       "            -1.1682e-06,  1.3151e-07,  1.1137e-06,  9.4706e-07,  3.7439e-06,\n",
       "            -4.7147e-06,  1.2070e-06, -3.5795e-06,  3.6624e-06,  4.2580e-07,\n",
       "             2.7118e-06, -1.3768e-06, -2.0206e-06,  2.0165e-06,  1.7004e-06,\n",
       "            -6.7522e-07,  7.6072e-07, -1.8145e-06, -7.1831e-06,  4.3133e-06,\n",
       "            -3.1148e-06,  1.6263e-06,  1.5022e-06,  2.3981e-06, -7.2761e-06,\n",
       "             2.5367e-07, -2.9154e-06,  1.7707e-06, -1.7208e-07, -4.5013e-06,\n",
       "             2.5474e-06,  4.5781e-07,  2.2288e-06, -7.0280e-07, -2.6262e-06,\n",
       "             1.2700e-07, -6.6230e-07, -3.3010e-06,  4.6527e-06,  2.4599e-06,\n",
       "            -2.4782e-06, -2.5324e-06, -1.1070e-06,  1.1928e-06, -4.5581e-06,\n",
       "            -1.1240e-06,  3.4170e-06, -2.9116e-06, -4.6651e-07, -2.4870e-06,\n",
       "            -1.4317e-07, -4.5508e-06, -3.7097e-06, -3.1976e-06, -1.8627e-06,\n",
       "             2.3189e-06, -2.6876e-06, -2.9102e-06, -6.2815e-07, -5.3538e-06,\n",
       "            -2.9622e-06, -9.3108e-08,  4.7462e-07,  3.7131e-06,  2.1525e-06,\n",
       "             3.9170e-07,  3.3810e-06, -4.0090e-06,  1.8808e-06, -3.8743e-07,\n",
       "            -1.7085e-06,  2.8215e-08, -2.4272e-06,  1.1119e-06,  1.7324e-06,\n",
       "             2.9135e-06, -1.5871e-06,  6.9507e-07, -3.6176e-07,  6.9058e-07,\n",
       "            -8.2998e-07,  4.9951e-06, -1.0206e-06,  1.2984e-06, -2.7748e-06,\n",
       "             2.2034e-06,  2.6517e-06,  1.0200e-06,  1.1378e-06, -1.7724e-06,\n",
       "            -1.7568e-06,  5.6245e-07,  2.6288e-06,  1.1952e-06,  2.0680e-06,\n",
       "             1.0077e-06, -8.6583e-07,  3.3785e-06, -1.5275e-06, -4.8108e-07,\n",
       "             2.6156e-06, -2.1999e-06, -5.7719e-06, -1.2711e-06,  1.4603e-06,\n",
       "             2.0199e-06, -3.8445e-06,  4.7981e-06, -1.8898e-06, -2.6790e-06,\n",
       "            -1.8857e-06,  1.7031e-07,  5.5428e-06, -4.1280e-06, -2.2248e-06,\n",
       "            -2.7257e-06, -4.5745e-06, -8.3484e-07, -1.5770e-06, -6.2223e-06,\n",
       "             3.6310e-06,  3.8956e-06,  1.1124e-06], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([6.8825e-10, 1.4933e-10, 2.2466e-10, 1.6801e-10, 2.2913e-10, 2.6099e-10,\n",
       "            3.4563e-10, 2.7225e-10, 1.4081e-10, 3.0147e-10, 1.7188e-10, 3.4826e-10,\n",
       "            4.7141e-10, 2.1834e-10, 2.3737e-10, 2.0202e-10, 1.3399e-10, 1.9630e-10,\n",
       "            1.8672e-10, 2.2033e-10, 4.2868e-10, 2.3800e-10, 3.3963e-10, 2.2699e-10,\n",
       "            2.7292e-10, 1.5337e-10, 1.1780e-10, 3.5555e-10, 1.6409e-10, 5.2405e-10,\n",
       "            2.3362e-10, 4.7204e-10, 3.6538e-10, 9.2946e-11, 3.8637e-10, 3.1583e-10,\n",
       "            2.6304e-10, 1.5582e-10, 2.3395e-10, 4.1287e-10, 1.6727e-10, 1.5127e-10,\n",
       "            2.2961e-10, 1.6911e-10, 3.2030e-10, 2.0242e-10, 2.7052e-10, 1.4557e-10,\n",
       "            2.0779e-10, 4.5386e-10, 1.0356e-10, 1.9636e-10, 2.1821e-10, 5.1720e-10,\n",
       "            1.7398e-10, 1.6917e-10, 6.1562e-10, 2.5635e-10, 3.1582e-10, 1.9445e-10,\n",
       "            2.7935e-10, 4.2380e-10, 1.4661e-10, 2.0007e-10, 2.0001e-10, 1.9134e-10,\n",
       "            3.1898e-10, 1.0959e-10, 2.7166e-10, 1.6721e-10, 2.7099e-10, 2.2317e-10,\n",
       "            2.2362e-10, 3.3845e-10, 2.8398e-10, 2.1290e-10, 3.4928e-10, 2.4069e-10,\n",
       "            1.4832e-10, 2.6572e-10, 1.9081e-10, 4.0849e-10, 2.6305e-10, 3.0855e-10,\n",
       "            3.6849e-10, 3.5264e-10, 2.4028e-10, 2.1082e-10, 1.4786e-10, 2.0896e-10,\n",
       "            1.7260e-10, 1.8262e-10, 3.9845e-10, 1.4653e-10, 6.0502e-10, 1.8431e-10,\n",
       "            1.8184e-10, 2.3271e-10, 3.2654e-10, 2.2083e-10, 3.1985e-10, 1.9424e-10,\n",
       "            3.2624e-10, 4.1086e-10, 3.8760e-10, 1.8538e-10, 9.7436e-10, 3.2685e-10,\n",
       "            1.6089e-10, 3.1314e-10, 2.4132e-10, 2.4266e-10, 5.3861e-10, 1.5243e-10,\n",
       "            1.3789e-10, 2.1215e-10, 3.8348e-10, 5.9577e-10, 2.8350e-10, 1.2235e-10,\n",
       "            1.4091e-10, 1.3460e-10, 2.3864e-10, 1.8567e-10, 2.7180e-10, 1.9379e-10,\n",
       "            4.0526e-10, 1.0334e-10], device='cuda:0')},\n",
       "   87: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([ 1.1092e-05,  4.1467e-06, -3.7841e-06,  8.0629e-06,  3.4795e-08,\n",
       "             4.2373e-06,  3.9888e-06,  1.0389e-06, -3.0489e-06, -1.0183e-05,\n",
       "            -3.8567e-06, -7.0695e-07,  7.0369e-06, -6.6140e-06, -1.3715e-06,\n",
       "             5.4784e-06, -5.5871e-06, -1.4024e-05, -1.1465e-06, -1.2249e-06,\n",
       "             9.7128e-06, -9.4666e-06, -1.2610e-05, -2.9117e-06, -3.6245e-06,\n",
       "             6.2504e-06, -1.3283e-06, -1.4705e-07, -3.6190e-06,  3.2495e-06,\n",
       "            -2.4260e-06, -6.7009e-06, -6.8270e-07, -5.9340e-06, -1.5945e-05,\n",
       "             1.0332e-06, -1.7163e-06,  1.9028e-06, -1.8571e-07,  1.8232e-06,\n",
       "            -1.9272e-06,  1.2965e-06,  1.1368e-06, -9.8641e-06,  1.1413e-05,\n",
       "             1.6539e-06, -3.0860e-06,  5.9264e-06,  2.3715e-06,  2.7766e-06,\n",
       "            -1.0122e-07,  3.9217e-07,  7.2578e-07, -4.0933e-07,  9.2936e-07,\n",
       "             3.7778e-07,  5.6669e-08, -6.7478e-06, -1.2559e-06,  7.3374e-06,\n",
       "             9.7831e-07,  1.1264e-05,  4.0295e-06, -1.0223e-07,  2.4930e-05,\n",
       "             8.7906e-06, -7.0301e-06, -3.8401e-06,  2.2655e-06, -7.4368e-06,\n",
       "             1.4311e-06, -5.5555e-06, -3.1200e-06,  7.1738e-06, -2.7350e-06,\n",
       "            -1.2420e-05, -8.3397e-06, -1.2001e-07, -2.7518e-07, -2.1798e-06,\n",
       "             5.7231e-06,  9.3651e-06,  6.4597e-06, -7.3508e-06,  9.9621e-07,\n",
       "             3.6584e-06, -2.1382e-06, -2.6194e-06, -1.0001e-06, -2.9864e-06,\n",
       "            -7.7161e-06, -2.8843e-06, -1.7198e-06, -2.2843e-06,  1.9775e-06,\n",
       "            -6.2964e-07,  1.6042e-06, -7.7649e-06, -2.7251e-06,  1.2376e-06,\n",
       "             1.0654e-06,  2.3870e-06, -3.5465e-07, -1.0594e-06,  3.7481e-06,\n",
       "             3.0348e-07,  2.2290e-06, -3.7269e-06,  3.1374e-06,  1.0245e-06,\n",
       "             3.5631e-06,  9.3424e-07, -6.0475e-05,  3.0867e-06, -3.7923e-06,\n",
       "            -4.1894e-06, -6.8866e-07,  2.5519e-07,  3.8031e-06,  8.1248e-06,\n",
       "             1.1353e-06,  7.5941e-07,  4.4469e-06,  2.2818e-06,  1.1590e-06,\n",
       "             5.1668e-06,  8.7628e-06,  1.3081e-06], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([3.4495e-10, 3.3435e-10, 2.9601e-10, 4.1523e-10, 2.2639e-10, 9.2859e-11,\n",
       "            3.1118e-10, 4.2987e-10, 1.6540e-10, 1.9802e-10, 8.7986e-11, 9.7509e-11,\n",
       "            5.8828e-10, 1.1820e-09, 2.0837e-10, 6.2504e-10, 1.6792e-10, 1.1411e-09,\n",
       "            1.7644e-10, 5.3360e-10, 3.1391e-09, 3.1446e-10, 9.5031e-10, 3.9785e-11,\n",
       "            2.2107e-10, 1.2842e-10, 8.9361e-11, 2.0792e-10, 8.3763e-10, 3.6414e-10,\n",
       "            6.6165e-11, 4.6305e-09, 8.7956e-11, 2.4359e-10, 3.1595e-10, 8.8090e-11,\n",
       "            4.0708e-11, 3.1342e-10, 2.1116e-11, 2.8124e-10, 6.2782e-11, 1.3382e-10,\n",
       "            2.2653e-10, 1.6132e-10, 2.7073e-10, 1.1072e-10, 1.6584e-10, 1.1366e-10,\n",
       "            6.0332e-11, 8.7263e-11, 1.4251e-11, 1.9120e-11, 4.5070e-11, 7.6700e-10,\n",
       "            1.7643e-10, 8.9656e-11, 6.1981e-11, 2.1095e-10, 4.7628e-11, 2.5870e-10,\n",
       "            4.7821e-11, 3.6413e-10, 2.1347e-10, 6.8574e-11, 2.6028e-09, 2.8542e-10,\n",
       "            1.1633e-10, 2.9671e-10, 6.8940e-11, 2.3725e-10, 4.2545e-11, 1.3251e-10,\n",
       "            8.9519e-11, 3.7023e-10, 1.4559e-09, 8.4358e-10, 1.1792e-09, 3.5922e-10,\n",
       "            3.0539e-10, 2.2746e-10, 3.9445e-10, 6.1126e-10, 4.6676e-10, 2.7624e-10,\n",
       "            2.3840e-10, 4.2263e-10, 1.1457e-09, 9.1370e-10, 9.4360e-10, 3.6614e-11,\n",
       "            1.6441e-10, 6.1493e-11, 2.8328e-10, 1.8466e-10, 1.1076e-10, 7.7047e-11,\n",
       "            1.0566e-10, 4.4070e-10, 8.9103e-11, 9.9810e-11, 1.3815e-10, 1.2191e-10,\n",
       "            1.4714e-10, 7.3495e-11, 2.3401e-10, 3.9271e-11, 8.4267e-09, 9.4430e-11,\n",
       "            9.4442e-11, 7.6617e-10, 5.4337e-10, 5.5111e-11, 3.4546e-08, 1.8313e-10,\n",
       "            8.4196e-10, 2.1188e-10, 3.1539e-11, 9.9712e-11, 3.3954e-09, 2.4865e-09,\n",
       "            2.5125e-10, 2.9008e-10, 1.0835e-09, 3.7801e-11, 3.6056e-10, 3.4681e-10,\n",
       "            8.1457e-10, 1.5420e-10], device='cuda:0')},\n",
       "   88: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([ 5.1601e-06, -1.8504e-06,  2.0993e-06, -1.5238e-06,  1.2281e-06,\n",
       "             8.1251e-08, -6.4059e-06, -5.5198e-07, -2.5944e-06,  1.2186e-06,\n",
       "            -1.9810e-06, -8.3379e-07,  3.0008e-06, -1.1036e-05, -2.0397e-06,\n",
       "             6.0211e-06,  5.1216e-06, -1.6490e-06,  3.7674e-07,  3.2439e-07,\n",
       "            -7.9795e-07, -2.7684e-06,  2.7588e-06, -1.2962e-06,  3.1614e-07,\n",
       "            -2.2934e-06, -1.1267e-06, -5.9811e-07, -5.3867e-07,  1.6039e-06,\n",
       "            -2.2910e-06, -2.4896e-06,  7.9009e-07, -1.5703e-06,  2.9886e-07,\n",
       "            -6.5552e-06, -3.2083e-06, -3.4389e-06, -5.9990e-08,  8.4958e-07,\n",
       "             3.0574e-08, -2.0022e-06, -1.1819e-07, -1.5515e-07,  3.6025e-06,\n",
       "            -1.8281e-06, -2.2901e-06,  2.1632e-07,  1.9085e-06, -1.2369e-06,\n",
       "             1.0864e-06,  5.5832e-07,  1.0292e-06, -1.5778e-06, -9.6317e-07,\n",
       "            -5.2129e-07, -3.0511e-06,  4.6856e-07,  2.5583e-06,  1.4413e-06,\n",
       "            -1.0956e-06,  3.8443e-06, -5.1322e-06, -1.2918e-07,  5.1102e-06,\n",
       "            -3.4690e-06,  6.1179e-07, -1.5116e-06, -6.2678e-07,  3.5562e-06,\n",
       "            -1.0201e-06, -9.8369e-08, -3.0325e-06, -8.4996e-07,  3.8509e-06,\n",
       "            -6.9422e-06,  2.2858e-06, -2.8891e-06, -2.7214e-06, -2.2033e-06,\n",
       "             1.4415e-06,  5.9564e-06,  1.9707e-06,  8.7718e-07,  3.8877e-06,\n",
       "            -4.2340e-06, -1.4362e-06, -1.9092e-06, -2.5270e-06, -3.7773e-07,\n",
       "            -3.7745e-06,  1.7836e-06, -2.5407e-08,  1.7368e-06,  2.6402e-06,\n",
       "             5.0833e-06,  2.5518e-07, -1.3421e-07,  3.6476e-06,  1.2064e-06,\n",
       "            -6.2478e-08, -8.4437e-07, -2.0266e-06, -5.4152e-06,  4.0378e-07,\n",
       "            -8.1047e-07, -5.4197e-07,  2.0109e-06,  2.6296e-06,  2.8446e-06,\n",
       "            -3.4716e-06, -2.9010e-06,  1.4872e-06,  2.0611e-06, -1.9188e-06,\n",
       "            -2.9075e-06, -3.5827e-06,  1.5817e-07, -2.6968e-06,  1.6351e-06,\n",
       "            -2.7951e-06, -6.2735e-08,  2.5418e-06, -3.9514e-06,  2.3017e-06,\n",
       "             1.4381e-07,  3.7346e-06,  2.9023e-07], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([1.6591e-09, 5.3944e-10, 1.6051e-10, 3.8528e-10, 4.2417e-10, 2.0311e-09,\n",
       "            5.3582e-10, 1.0371e-09, 4.6120e-10, 4.4528e-10, 6.3376e-10, 3.5013e-10,\n",
       "            2.2891e-09, 7.8067e-09, 2.8493e-10, 1.7438e-09, 1.2394e-09, 2.5994e-09,\n",
       "            3.7003e-10, 2.3791e-10, 2.0567e-09, 7.7197e-10, 9.5308e-10, 3.2405e-10,\n",
       "            9.0415e-10, 1.4664e-10, 4.1112e-10, 4.3618e-10, 2.9877e-09, 2.4898e-10,\n",
       "            9.6045e-10, 1.9127e-09, 3.3855e-10, 4.8630e-10, 1.5270e-09, 1.5837e-09,\n",
       "            1.3932e-09, 1.1990e-09, 1.4604e-10, 1.1795e-09, 2.5934e-10, 3.0100e-10,\n",
       "            7.6450e-10, 9.1037e-10, 1.9180e-09, 1.2704e-10, 2.5892e-09, 3.3022e-10,\n",
       "            5.7091e-10, 4.1667e-10, 8.5132e-11, 3.2076e-10, 2.2229e-10, 1.8138e-09,\n",
       "            2.9355e-10, 2.0240e-10, 2.1889e-10, 2.5067e-10, 1.1228e-10, 3.6569e-10,\n",
       "            1.2145e-10, 1.7375e-09, 2.8412e-09, 2.2657e-10, 4.7846e-10, 2.8820e-09,\n",
       "            1.5015e-10, 6.8926e-10, 5.5327e-10, 2.8375e-09, 4.6205e-10, 3.9133e-10,\n",
       "            7.4507e-10, 7.9023e-10, 2.8420e-09, 3.5441e-09, 2.5898e-09, 8.3441e-10,\n",
       "            1.2174e-09, 2.4866e-10, 3.9740e-10, 2.7396e-09, 2.3800e-10, 1.1427e-09,\n",
       "            4.9328e-10, 1.0642e-09, 4.8625e-10, 5.4768e-10, 9.0549e-10, 2.0622e-10,\n",
       "            3.9433e-10, 8.8568e-10, 2.0755e-09, 6.8502e-10, 5.4926e-10, 6.3949e-10,\n",
       "            2.7909e-10, 8.5809e-10, 4.6874e-10, 3.1296e-10, 1.6825e-10, 1.8348e-10,\n",
       "            3.6840e-10, 1.0862e-09, 3.7129e-10, 4.5684e-10, 1.0133e-09, 2.6472e-10,\n",
       "            1.0056e-10, 1.5123e-09, 7.4418e-10, 1.5771e-10, 3.5292e-09, 2.3347e-10,\n",
       "            1.9524e-09, 4.4308e-10, 9.1068e-10, 2.8264e-10, 2.1612e-09, 2.0021e-09,\n",
       "            5.1487e-10, 4.0411e-10, 3.1829e-10, 1.1443e-09, 2.0736e-10, 6.1911e-10,\n",
       "            7.2543e-10, 2.1333e-10], device='cuda:0')},\n",
       "   89: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-2.1573e-06,  1.9190e-06, -3.6484e-07,  ..., -1.0493e-07,\n",
       "             -2.2009e-06, -1.1197e-06],\n",
       "            [-8.5216e-07, -2.0526e-07, -2.1304e-07,  ...,  9.0106e-07,\n",
       "              6.5007e-07, -8.8682e-07],\n",
       "            [-1.5039e-06,  1.0647e-06,  5.7469e-07,  ..., -6.4237e-07,\n",
       "              1.8652e-06,  2.5381e-06],\n",
       "            ...,\n",
       "            [-1.2241e-05,  5.5379e-06,  2.5612e-06,  ...,  4.7500e-06,\n",
       "              3.0951e-06,  2.2296e-06],\n",
       "            [-8.7492e-08,  3.8446e-06,  8.2926e-06,  ...,  3.4226e-06,\n",
       "              7.3672e-06,  2.7139e-06],\n",
       "            [ 3.1977e-06, -2.6775e-06, -9.3858e-07,  ..., -4.0498e-07,\n",
       "              3.3800e-06,  1.3089e-06]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[2.4509e-10, 1.1840e-10, 7.7385e-11,  ..., 4.9052e-11, 1.1438e-10,\n",
       "             1.0914e-10],\n",
       "            [5.4891e-11, 1.1768e-11, 3.5007e-11,  ..., 9.6039e-12, 4.5533e-11,\n",
       "             1.3573e-11],\n",
       "            [4.6348e-10, 4.9408e-11, 3.3514e-11,  ..., 1.3908e-10, 9.4994e-11,\n",
       "             6.9220e-11],\n",
       "            ...,\n",
       "            [1.5968e-10, 3.9254e-11, 6.5696e-11,  ..., 4.0811e-11, 8.4277e-11,\n",
       "             4.2185e-11],\n",
       "            [1.0078e-09, 4.0995e-10, 5.1350e-10,  ..., 1.7483e-10, 6.6478e-10,\n",
       "             2.0015e-10],\n",
       "            [9.0116e-11, 6.3719e-11, 5.7261e-11,  ..., 8.3726e-11, 1.0571e-10,\n",
       "             3.7073e-11]], device='cuda:0')},\n",
       "   90: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([-1.1105e-06, -4.6157e-08,  1.4145e-06, -1.5869e-06,  4.5587e-06,\n",
       "            -1.1865e-06,  8.8461e-07,  5.6225e-06,  3.5707e-06,  2.0944e-06,\n",
       "             3.6012e-06,  7.0634e-06, -1.5463e-06,  3.8156e-06,  3.7687e-06,\n",
       "            -1.6270e-06, -9.4802e-07,  1.5006e-06,  3.0676e-06,  3.8173e-06,\n",
       "            -4.1537e-06, -6.4498e-06, -6.7699e-06,  8.7711e-06,  3.5982e-06,\n",
       "             1.3446e-07,  9.7394e-06, -7.3289e-07,  6.1425e-06,  1.1415e-06,\n",
       "            -1.0486e-06, -3.7987e-06,  1.7318e-06, -1.0312e-05, -4.2254e-06,\n",
       "            -2.1790e-06,  1.2484e-06, -7.2899e-06,  4.4963e-06, -1.6244e-05,\n",
       "            -1.2375e-07,  6.5476e-07,  5.3680e-06,  3.5576e-06,  6.9065e-10,\n",
       "             3.9794e-06,  5.5869e-06,  6.2833e-08,  3.4633e-06,  1.4132e-06,\n",
       "             1.3560e-06, -4.3532e-06,  2.8660e-06,  1.7518e-06, -3.4412e-06,\n",
       "             5.4666e-06, -1.0279e-05,  1.3822e-05, -1.1456e-06,  4.4687e-06,\n",
       "            -2.1436e-06, -1.0843e-06, -3.1055e-07,  2.9509e-06, -7.2383e-06,\n",
       "             8.7249e-07, -3.7618e-06, -7.1084e-07,  4.9948e-06, -1.6012e-06,\n",
       "            -7.0622e-06, -1.3104e-06,  6.9840e-06,  4.8001e-06, -9.6009e-06,\n",
       "            -3.6394e-06, -6.4053e-06, -5.1364e-06, -1.5169e-06, -1.4693e-06,\n",
       "            -2.2926e-07, -6.0494e-07, -9.3597e-07,  1.6490e-06,  6.2535e-08,\n",
       "            -1.0605e-06, -2.3810e-06, -3.1904e-06,  2.4874e-06, -3.5253e-06,\n",
       "            -8.0954e-06, -1.0678e-07,  8.4488e-07, -1.2401e-05,  1.2588e-05,\n",
       "             8.7709e-07,  1.3322e-05,  4.7786e-07,  6.8607e-06, -8.2029e-07,\n",
       "             5.2024e-06, -1.2011e-07,  4.4886e-06,  5.9814e-07, -1.2583e-06,\n",
       "            -5.6369e-06,  3.0697e-07, -9.1811e-09, -2.7901e-06, -5.2771e-06,\n",
       "            -3.3166e-06,  1.8818e-06, -4.6840e-06,  8.7347e-06,  1.4745e-06,\n",
       "             2.0669e-06,  2.8081e-06,  2.0223e-06,  1.0991e-06,  8.7013e-06,\n",
       "            -3.0651e-06,  2.9629e-06,  1.3307e-06, -8.7653e-06,  8.5879e-06,\n",
       "             1.0220e-05,  7.7342e-06,  3.7662e-06], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([3.0087e-10, 5.2949e-11, 4.1285e-10, 3.0093e-10, 3.1904e-10, 1.5557e-09,\n",
       "            3.9782e-10, 7.6988e-10, 1.1112e-09, 4.3593e-10, 3.6836e-10, 3.4132e-10,\n",
       "            5.3221e-11, 8.9425e-10, 3.0643e-10, 5.8840e-10, 5.9857e-10, 3.8104e-10,\n",
       "            3.1369e-10, 5.5540e-10, 8.5720e-10, 9.0268e-10, 1.4209e-09, 3.1728e-10,\n",
       "            5.2005e-10, 2.4361e-10, 5.5837e-10, 2.3825e-10, 1.5989e-09, 2.9348e-11,\n",
       "            4.8987e-10, 1.4808e-10, 8.0073e-10, 5.2875e-10, 1.4366e-09, 5.6043e-10,\n",
       "            1.3387e-10, 1.0515e-09, 1.6647e-09, 2.0434e-09, 7.0888e-10, 1.0409e-09,\n",
       "            1.2629e-09, 5.2835e-10, 1.5144e-13, 1.8179e-09, 8.9381e-10, 3.2906e-10,\n",
       "            1.5188e-09, 2.5520e-10, 5.4006e-11, 1.8045e-09, 2.6027e-10, 9.5024e-10,\n",
       "            8.2765e-10, 3.7104e-10, 1.3812e-09, 1.3722e-09, 2.0282e-10, 5.2522e-10,\n",
       "            8.9711e-10, 8.4708e-10, 1.7595e-11, 3.9363e-10, 7.9194e-10, 3.4600e-11,\n",
       "            2.5456e-10, 6.4634e-10, 8.3715e-10, 4.3621e-10, 5.6540e-10, 4.4681e-11,\n",
       "            1.4205e-09, 5.3608e-10, 1.4720e-09, 7.8587e-10, 1.0327e-09, 4.1294e-10,\n",
       "            2.0719e-10, 1.0756e-09, 8.1067e-10, 6.6766e-10, 5.0288e-10, 6.0967e-10,\n",
       "            3.0791e-11, 7.7034e-10, 8.8862e-10, 4.7635e-10, 6.9760e-10, 1.2554e-09,\n",
       "            2.5423e-09, 1.9213e-09, 1.3852e-09, 1.3806e-09, 3.7347e-10, 9.7413e-10,\n",
       "            1.2115e-09, 8.8966e-10, 1.3007e-09, 4.2842e-10, 1.7431e-10, 1.0196e-09,\n",
       "            7.6261e-10, 2.2546e-11, 3.1093e-10, 1.8489e-09, 1.5924e-10, 1.1631e-10,\n",
       "            1.0655e-10, 4.4160e-10, 1.9535e-09, 6.0378e-11, 4.8961e-10, 8.7576e-10,\n",
       "            3.6288e-10, 1.0163e-10, 9.9803e-11, 7.4815e-10, 8.7448e-10, 1.7687e-09,\n",
       "            1.7594e-09, 8.3071e-10, 2.4846e-10, 8.3433e-10, 4.4963e-10, 1.3643e-10,\n",
       "            9.1600e-10, 2.5027e-10], device='cuda:0')},\n",
       "   91: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-2.9700e-06, -1.7439e-07,  3.4821e-06,  ..., -1.2865e-06,\n",
       "             -1.7818e-06, -2.0677e-06],\n",
       "            [ 6.0522e-06,  6.1543e-06,  7.6643e-07,  ..., -7.2530e-06,\n",
       "             -3.0387e-06, -2.6023e-06],\n",
       "            [-4.4979e-07,  1.6270e-06,  4.1357e-06,  ..., -3.4532e-06,\n",
       "             -2.4793e-06, -2.2818e-06],\n",
       "            ...,\n",
       "            [ 1.2974e-05, -1.9933e-06, -2.2928e-06,  ..., -2.6724e-06,\n",
       "              6.6412e-06,  8.6163e-06],\n",
       "            [-3.2700e-05, -1.4415e-06, -6.7108e-06,  ...,  1.4733e-05,\n",
       "              4.1560e-06,  5.9641e-06],\n",
       "            [ 5.1429e-06,  4.0022e-06, -1.9834e-06,  ..., -1.8973e-06,\n",
       "              2.2569e-06,  2.9559e-06]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[8.4078e-10, 2.4532e-10, 3.1348e-10,  ..., 1.1288e-09, 3.2536e-10,\n",
       "             4.7502e-10],\n",
       "            [1.3145e-09, 2.8993e-10, 5.3363e-10,  ..., 1.7604e-09, 5.4400e-10,\n",
       "             1.0492e-09],\n",
       "            [1.1969e-09, 2.2359e-10, 1.7830e-10,  ..., 1.2451e-09, 2.7394e-10,\n",
       "             3.7803e-10],\n",
       "            ...,\n",
       "            [1.6182e-09, 4.2366e-10, 3.1074e-10,  ..., 1.7867e-09, 3.3191e-10,\n",
       "             5.3437e-10],\n",
       "            [1.1274e-08, 1.8547e-09, 4.1639e-09,  ..., 7.2755e-09, 1.4489e-09,\n",
       "             1.6856e-09],\n",
       "            [1.3888e-09, 3.8428e-10, 1.1285e-09,  ..., 1.7388e-09, 6.7571e-10,\n",
       "             1.1960e-09]], device='cuda:0')},\n",
       "   92: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-1.8438e-07,  1.2736e-07,  1.5794e-07,  ...,  4.0989e-08,\n",
       "              1.0878e-07,  4.4812e-08],\n",
       "            [-4.3412e-08, -2.0641e-07,  6.7451e-08,  ...,  2.3796e-07,\n",
       "             -1.0111e-07, -4.1942e-08],\n",
       "            [-2.5854e-07, -1.0865e-06,  8.3561e-07,  ...,  5.2976e-07,\n",
       "              3.9186e-07,  1.3137e-07],\n",
       "            ...,\n",
       "            [ 3.5258e-06,  8.8263e-06, -4.5716e-06,  ...,  2.3751e-06,\n",
       "              7.1308e-06,  5.7033e-06],\n",
       "            [ 8.5529e-06,  1.1888e-05, -5.0108e-06,  ...,  5.0571e-06,\n",
       "              1.4159e-05,  1.0182e-05],\n",
       "            [ 5.4397e-07, -9.4084e-07, -4.4412e-07,  ...,  4.8065e-07,\n",
       "              1.1318e-06,  8.0409e-07]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[2.4586e-12, 1.6335e-12, 4.4042e-12,  ..., 4.6322e-13, 6.3035e-12,\n",
       "             3.9019e-12],\n",
       "            [2.0255e-12, 1.1806e-12, 4.9289e-12,  ..., 4.1036e-13, 4.3952e-12,\n",
       "             2.5556e-12],\n",
       "            [4.7498e-12, 2.6876e-12, 8.5862e-12,  ..., 9.6571e-13, 9.7130e-12,\n",
       "             5.2127e-12],\n",
       "            ...,\n",
       "            [2.8597e-10, 3.2902e-10, 2.2701e-10,  ..., 2.3972e-10, 1.2460e-09,\n",
       "             4.3905e-10],\n",
       "            [7.1512e-10, 7.2315e-10, 5.7910e-10,  ..., 5.6734e-10, 3.0719e-09,\n",
       "             1.1747e-09],\n",
       "            [7.6822e-12, 1.2877e-11, 8.4036e-12,  ..., 6.0960e-12, 3.6324e-11,\n",
       "             1.5058e-11]], device='cuda:0')},\n",
       "   93: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-4.7744e-07,  2.0983e-07, -6.6379e-09,  ...,  3.2736e-07,\n",
       "             -9.5964e-08, -1.5856e-07],\n",
       "            [ 4.2501e-07, -9.7019e-08,  2.2579e-07,  ..., -3.0413e-07,\n",
       "             -1.0349e-07, -2.4382e-07],\n",
       "            [ 1.0007e-07,  2.0040e-07,  1.2722e-07,  ..., -5.8266e-08,\n",
       "              1.5011e-07, -5.1208e-07],\n",
       "            ...,\n",
       "            [ 4.4365e-07,  1.1578e-06, -8.3715e-08,  ..., -3.5269e-07,\n",
       "             -2.3183e-06, -8.5342e-07],\n",
       "            [ 1.5680e-06,  1.2148e-07, -1.4927e-07,  ..., -2.5273e-07,\n",
       "             -9.5030e-07,  7.2881e-07],\n",
       "            [-1.9293e-07, -7.2421e-07, -1.4772e-07,  ...,  7.6982e-07,\n",
       "              1.3192e-06,  3.9637e-07]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[6.3796e-13, 3.4295e-13, 4.2579e-13,  ..., 3.0906e-13, 1.2242e-12,\n",
       "             6.1313e-13],\n",
       "            [3.4374e-12, 3.1735e-12, 2.9387e-12,  ..., 3.1271e-12, 8.4679e-12,\n",
       "             3.4553e-12],\n",
       "            [1.6698e-12, 1.1366e-12, 1.1697e-12,  ..., 8.0051e-13, 3.5974e-12,\n",
       "             1.4079e-12],\n",
       "            ...,\n",
       "            [1.5747e-11, 1.3981e-11, 1.1871e-11,  ..., 8.4931e-12, 3.0869e-11,\n",
       "             1.8689e-11],\n",
       "            [1.1991e-11, 7.0326e-12, 5.3542e-12,  ..., 6.0693e-12, 1.1629e-11,\n",
       "             1.1775e-11],\n",
       "            [5.6174e-11, 4.0528e-11, 3.8056e-11,  ..., 3.5122e-11, 1.2167e-10,\n",
       "             6.3819e-11]], device='cuda:0')},\n",
       "   94: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 6.8675e-07, -3.0643e-07,  8.0555e-06,  ..., -4.8851e-06,\n",
       "              3.9598e-07, -3.2098e-08],\n",
       "            [ 3.2739e-06, -2.3842e-06, -6.2689e-06,  ...,  1.6985e-05,\n",
       "             -4.3786e-06, -2.8556e-06],\n",
       "            [ 4.8444e-07,  1.8440e-06, -1.1462e-05,  ...,  2.0213e-05,\n",
       "             -5.3247e-06, -6.0826e-07],\n",
       "            ...,\n",
       "            [ 3.4570e-07,  7.6674e-07, -5.2239e-07,  ...,  8.2629e-06,\n",
       "             -4.8220e-06,  2.5014e-06],\n",
       "            [ 1.7250e-06, -6.2858e-06, -5.2606e-06,  ...,  1.3637e-05,\n",
       "             -1.3793e-06, -3.2135e-06],\n",
       "            [ 1.7209e-07, -9.0601e-07, -7.8458e-06,  ...,  1.9587e-05,\n",
       "             -2.5458e-06, -5.5120e-06]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[2.3083e-10, 7.0276e-10, 1.1697e-09,  ..., 2.3236e-09, 5.8605e-10,\n",
       "             1.8115e-09],\n",
       "            [1.3851e-10, 9.4769e-10, 1.0773e-09,  ..., 2.0391e-09, 5.2233e-10,\n",
       "             1.9152e-09],\n",
       "            [2.3827e-10, 1.1964e-09, 1.2323e-09,  ..., 2.3346e-09, 4.4782e-10,\n",
       "             2.4719e-09],\n",
       "            ...,\n",
       "            [1.9147e-10, 6.1074e-10, 1.1348e-09,  ..., 1.7339e-09, 2.6155e-10,\n",
       "             1.6692e-09],\n",
       "            [2.9578e-10, 8.5781e-10, 1.4029e-09,  ..., 2.4665e-09, 4.1149e-10,\n",
       "             2.0302e-09],\n",
       "            [4.0389e-10, 1.7172e-09, 1.4165e-09,  ..., 1.9918e-09, 3.9239e-10,\n",
       "             3.1089e-09]], device='cuda:0')},\n",
       "   95: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([-2.3765e-06,  1.0656e-05,  2.8161e-06,  5.8191e-06, -4.2929e-06,\n",
       "            -7.6526e-06,  5.1514e-07,  2.0763e-05, -6.0513e-06, -3.7974e-06,\n",
       "            -1.0086e-05,  4.1075e-06, -7.1670e-06, -5.3359e-06, -3.4414e-06,\n",
       "            -1.1167e-05, -2.1367e-06, -5.7402e-08,  4.1470e-06, -1.1357e-06,\n",
       "            -9.2369e-06, -2.2544e-06,  1.6228e-05,  9.0980e-06,  2.0790e-05,\n",
       "             1.3120e-05, -1.1845e-07, -1.5168e-05, -9.0049e-06,  1.1763e-05,\n",
       "             1.0608e-05, -1.0632e-05, -2.1494e-05,  8.2843e-06, -1.1385e-05,\n",
       "            -1.4998e-05,  5.8336e-06,  7.3326e-06, -1.6162e-05,  2.3305e-05,\n",
       "             1.9112e-05, -3.9913e-06, -1.6901e-05,  3.0469e-06,  2.7591e-05,\n",
       "            -3.2948e-06,  1.4155e-05, -1.4632e-06,  1.4030e-05, -4.6102e-06,\n",
       "            -2.0690e-05, -8.2693e-06, -2.0230e-05,  1.0063e-05,  1.0852e-05,\n",
       "             3.5271e-07, -1.4235e-05, -4.2765e-07,  2.3864e-06,  1.0394e-05,\n",
       "             1.2843e-05, -6.6545e-06,  1.3991e-06,  1.5409e-06, -2.9899e-07,\n",
       "            -1.5831e-06,  1.0593e-05,  3.8749e-08, -1.0503e-05,  1.0867e-05,\n",
       "             3.0070e-06,  1.7633e-05,  3.9682e-06, -3.9302e-06, -4.0856e-08,\n",
       "            -1.2109e-06, -2.2896e-06, -2.1921e-05, -1.0422e-05, -1.0362e-05,\n",
       "             1.3143e-05, -1.8251e-06,  1.7119e-05, -4.0557e-06,  3.3738e-06,\n",
       "             1.0180e-06, -1.1762e-05, -3.0273e-06,  8.7398e-06, -6.3526e-06,\n",
       "             1.2185e-05,  7.3791e-06, -1.4045e-05,  8.9580e-07, -1.5290e-05,\n",
       "            -6.8933e-06,  4.1871e-06,  8.7432e-07, -3.3426e-06,  2.1687e-06,\n",
       "            -8.2286e-06, -3.8571e-06,  5.7064e-06, -1.4110e-05, -1.9114e-05,\n",
       "             4.2164e-06,  2.0681e-05,  1.2973e-06, -2.4586e-06, -1.7755e-05,\n",
       "             1.9089e-06,  3.6211e-05,  1.4660e-05,  1.9120e-05,  1.2637e-05,\n",
       "            -7.0614e-06,  2.8750e-06, -9.9336e-06, -2.0848e-05,  1.9132e-05,\n",
       "             1.7755e-06, -8.7355e-06, -1.1574e-06,  2.1634e-05,  5.6632e-06,\n",
       "             1.6300e-06,  2.0847e-05,  8.0621e-06], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([3.8080e-09, 4.1232e-09, 4.7481e-09, 2.3776e-09, 8.6246e-09, 3.1884e-09,\n",
       "            8.0677e-09, 1.0682e-08, 6.1364e-09, 6.1471e-09, 1.3070e-08, 2.7842e-09,\n",
       "            4.9870e-09, 4.4738e-09, 7.2872e-09, 3.2344e-09, 3.4412e-09, 1.1915e-08,\n",
       "            2.8757e-09, 5.0312e-09, 5.5559e-09, 3.4133e-09, 4.2461e-09, 3.7552e-09,\n",
       "            8.0227e-09, 7.4275e-09, 5.7117e-09, 7.7810e-09, 3.3770e-09, 5.7519e-09,\n",
       "            2.7958e-09, 4.3306e-09, 6.8072e-09, 3.2503e-09, 3.4122e-09, 5.7454e-09,\n",
       "            4.3710e-09, 5.0324e-09, 9.5180e-09, 6.2150e-09, 3.5605e-09, 2.4038e-09,\n",
       "            5.1617e-09, 6.3950e-09, 5.3457e-09, 8.2101e-09, 1.1376e-08, 7.5021e-09,\n",
       "            8.4079e-09, 5.4733e-09, 8.3924e-09, 5.6203e-09, 9.5246e-09, 6.7438e-09,\n",
       "            2.4165e-09, 8.3287e-09, 4.7237e-09, 5.4184e-09, 3.6551e-09, 4.6505e-09,\n",
       "            1.2292e-08, 2.7891e-09, 3.6625e-09, 4.4976e-09, 4.6424e-09, 6.2477e-09,\n",
       "            6.4634e-09, 5.9859e-09, 4.3632e-09, 6.0722e-09, 2.2226e-09, 6.8441e-09,\n",
       "            7.2318e-09, 3.0527e-09, 3.6357e-09, 7.6770e-09, 3.1696e-09, 5.2137e-09,\n",
       "            6.0525e-09, 8.7437e-09, 7.9073e-09, 6.2927e-09, 7.6298e-09, 5.6178e-09,\n",
       "            5.2602e-09, 3.6493e-09, 8.1372e-09, 4.7791e-09, 5.3866e-09, 6.5510e-09,\n",
       "            4.6755e-09, 4.3687e-09, 3.8460e-09, 7.1051e-09, 5.8911e-09, 3.0534e-09,\n",
       "            4.7448e-09, 5.0665e-09, 2.9174e-09, 3.8559e-09, 8.5805e-09, 5.6732e-09,\n",
       "            6.5683e-09, 6.6508e-09, 6.8547e-09, 7.8716e-09, 6.4862e-09, 1.6880e-08,\n",
       "            5.0923e-09, 6.5282e-09, 1.0912e-08, 6.3397e-09, 6.1062e-09, 3.8329e-09,\n",
       "            6.8001e-09, 6.4816e-09, 2.8883e-09, 2.9116e-09, 4.9864e-09, 3.8618e-09,\n",
       "            7.4149e-09, 9.3937e-09, 5.0305e-09, 8.8059e-09, 5.6974e-09, 3.3833e-09,\n",
       "            4.1002e-09, 5.2448e-09], device='cuda:0')},\n",
       "   96: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([ 5.5398e-06, -9.6896e-07,  3.0827e-06, -1.9900e-06,  9.0392e-06,\n",
       "             3.2291e-06, -1.3576e-06, -4.4058e-06,  3.2596e-07, -1.5140e-07,\n",
       "            -1.1494e-06, -6.0690e-06, -1.4507e-06,  3.6956e-06, -5.0429e-06,\n",
       "             2.1993e-06, -3.5812e-06, -9.6710e-07, -8.3718e-07,  2.6780e-06,\n",
       "            -2.1652e-06,  1.1146e-06, -1.3142e-06, -2.0607e-06, -1.7484e-06,\n",
       "            -7.1906e-07, -1.9597e-06,  9.9763e-07,  5.2785e-06, -2.4969e-07,\n",
       "             1.4505e-06,  2.2699e-05,  3.2970e-06,  2.5160e-06, -5.5625e-06,\n",
       "            -8.8017e-08,  2.1871e-06,  2.6803e-06, -1.1714e-06,  1.1678e-06,\n",
       "             4.9537e-07, -3.5108e-06,  2.5458e-06,  5.8968e-06, -2.4688e-06,\n",
       "            -2.0859e-06,  2.9909e-07,  1.6243e-06, -1.1008e-06,  2.0515e-06,\n",
       "            -9.4027e-07, -5.0339e-06,  1.7695e-06, -5.3513e-06,  2.1677e-07,\n",
       "            -7.0823e-06,  1.4706e-05, -1.1451e-07,  8.9735e-07,  2.2768e-06,\n",
       "            -2.4340e-06, -9.1984e-07,  2.2122e-06, -1.8119e-05,  1.4162e-05,\n",
       "             7.1214e-08, -2.6963e-07,  3.0035e-06, -6.2256e-07,  4.1643e-06,\n",
       "             5.9719e-06,  1.1877e-06, -2.8550e-06,  2.2672e-08, -4.3019e-06,\n",
       "            -1.3732e-06, -1.9512e-06, -1.6757e-06, -4.9361e-06, -9.8542e-07,\n",
       "             1.3753e-06, -2.2522e-06,  1.3129e-06, -6.7050e-06, -5.0584e-06,\n",
       "             4.7654e-08,  1.3448e-06,  3.4259e-06, -6.8009e-06, -2.1925e-06,\n",
       "            -3.5069e-07, -2.1088e-06,  5.8758e-06,  9.0050e-07,  2.5238e-06,\n",
       "             1.0533e-06, -7.2932e-07,  5.3904e-07, -3.5250e-07, -8.1976e-06,\n",
       "             4.5163e-07,  2.1976e-06,  4.8512e-07,  9.1166e-07,  2.0818e-06,\n",
       "             1.5418e-06, -3.9780e-05, -4.6584e-06,  2.5665e-06,  3.3371e-06,\n",
       "             2.1702e-06, -8.9331e-07,  9.1327e-06,  9.6226e-07, -9.8040e-08,\n",
       "             4.9608e-07, -3.9743e-06, -5.3368e-06,  1.0698e-06, -1.5146e-05,\n",
       "             2.7430e-06,  2.0684e-06,  3.6649e-06,  3.9929e-06,  7.2055e-06,\n",
       "            -2.0890e-06,  5.5462e-07,  3.8843e-07], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([2.8541e-10, 9.9620e-11, 1.0066e-10, 1.3718e-10, 3.4736e-10, 5.9956e-11,\n",
       "            5.5160e-11, 6.7060e-10, 5.9193e-11, 1.1495e-10, 1.0711e-10, 2.3989e-10,\n",
       "            3.0270e-10, 4.4610e-10, 3.1249e-10, 7.5339e-11, 3.0483e-10, 3.5163e-10,\n",
       "            2.7529e-10, 3.3996e-10, 7.5731e-10, 1.4978e-10, 1.2764e-10, 2.3572e-11,\n",
       "            1.4921e-10, 3.0495e-10, 9.9501e-11, 7.6072e-11, 6.0023e-11, 1.2516e-10,\n",
       "            2.1520e-10, 5.3188e-09, 1.7161e-10, 4.8161e-11, 1.0214e-10, 1.1506e-10,\n",
       "            2.3486e-10, 2.9760e-10, 4.8544e-11, 1.6813e-10, 7.5237e-11, 1.3844e-10,\n",
       "            1.2741e-10, 1.7997e-10, 5.9745e-10, 4.1571e-10, 2.1053e-10, 1.3474e-10,\n",
       "            4.0985e-11, 1.6383e-10, 3.7452e-10, 4.3118e-10, 1.6267e-10, 4.7794e-10,\n",
       "            3.7734e-10, 9.3065e-11, 9.0809e-10, 6.5853e-11, 8.9747e-11, 1.6756e-10,\n",
       "            7.5618e-11, 3.7520e-10, 1.1841e-10, 1.4696e-09, 1.6932e-09, 6.5022e-11,\n",
       "            5.5285e-10, 7.7336e-11, 3.3448e-10, 1.1183e-10, 5.7217e-10, 1.0452e-10,\n",
       "            1.2647e-10, 1.1276e-10, 3.5963e-10, 7.6258e-11, 4.7383e-11, 1.1413e-10,\n",
       "            2.4198e-10, 9.6392e-11, 1.5488e-10, 4.3939e-10, 1.5228e-10, 2.4383e-10,\n",
       "            2.0055e-10, 6.7115e-11, 6.1539e-11, 4.6661e-10, 1.1093e-09, 1.7993e-10,\n",
       "            6.4808e-11, 1.5590e-10, 1.9397e-09, 8.4257e-11, 6.0125e-11, 2.6122e-10,\n",
       "            1.3257e-10, 1.7747e-10, 5.9246e-10, 9.9252e-10, 6.3641e-11, 5.1205e-11,\n",
       "            8.7856e-11, 1.4096e-10, 6.3279e-10, 3.7903e-11, 6.5185e-08, 1.1124e-10,\n",
       "            2.5172e-10, 1.4157e-10, 9.8766e-11, 2.9039e-10, 1.5433e-09, 1.6868e-10,\n",
       "            3.7110e-10, 1.8176e-10, 1.4367e-10, 1.0579e-10, 1.2098e-10, 5.0698e-10,\n",
       "            5.0110e-10, 1.2581e-10, 2.3011e-10, 3.7583e-10, 7.0371e-10, 1.2589e-10,\n",
       "            2.3497e-10, 1.0286e-10], device='cuda:0')},\n",
       "   97: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-4.3212e-07, -2.0588e-07,  2.1669e-08,  ..., -2.8689e-07,\n",
       "              9.1866e-08, -2.6684e-07],\n",
       "            [ 2.5908e-07,  5.9946e-07,  1.1517e-06,  ..., -5.6776e-07,\n",
       "              3.5054e-07,  5.8891e-08],\n",
       "            [-3.7786e-07, -3.7313e-07, -1.8978e-07,  ..., -3.7301e-07,\n",
       "              8.6749e-08, -2.3617e-07],\n",
       "            ...,\n",
       "            [ 9.1321e-08, -2.3913e-08, -8.6000e-08,  ..., -1.3276e-08,\n",
       "              6.6104e-08,  8.6693e-08],\n",
       "            [-8.4717e-08,  3.4059e-07,  5.2043e-07,  ..., -6.1078e-07,\n",
       "             -9.9639e-08, -1.2290e-07],\n",
       "            [-2.0993e-07, -2.9974e-07, -4.5185e-07,  ..., -1.5505e-09,\n",
       "             -1.9878e-07, -1.5996e-07]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[7.5363e-12, 6.3126e-12, 4.2867e-12,  ..., 4.6358e-12, 1.2548e-12,\n",
       "             9.7527e-13],\n",
       "            [1.7882e-12, 1.9708e-12, 2.0249e-12,  ..., 2.0936e-12, 8.0405e-13,\n",
       "             5.0819e-13],\n",
       "            [4.8214e-12, 3.5768e-12, 3.5531e-12,  ..., 2.0347e-12, 1.1845e-12,\n",
       "             6.6338e-13],\n",
       "            ...,\n",
       "            [1.1308e-12, 1.2022e-12, 1.2272e-12,  ..., 4.6878e-13, 1.9543e-13,\n",
       "             1.3746e-13],\n",
       "            [4.4692e-12, 4.2375e-12, 4.1003e-12,  ..., 2.7023e-12, 9.1699e-13,\n",
       "             7.7188e-13],\n",
       "            [4.2328e-12, 3.7478e-12, 2.6132e-12,  ..., 3.1386e-12, 1.0883e-12,\n",
       "             6.9658e-13]], device='cuda:0')},\n",
       "   98: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 1.9622e-07,  8.7830e-07,  9.5954e-08,  ..., -1.5022e-07,\n",
       "             -1.0005e-06, -1.1554e-06],\n",
       "            [ 4.7679e-07, -6.0033e-07,  3.9145e-08,  ...,  3.4760e-08,\n",
       "             -3.1335e-07,  2.0453e-07],\n",
       "            [-2.2917e-07, -3.5091e-07,  2.8825e-07,  ...,  2.7025e-07,\n",
       "             -9.7370e-08, -1.2943e-07],\n",
       "            ...,\n",
       "            [-9.0702e-08,  9.9982e-08, -1.0080e-07,  ...,  2.1574e-07,\n",
       "              4.9061e-07, -9.6707e-08],\n",
       "            [-2.9003e-07,  2.0958e-07, -4.4102e-07,  ..., -2.5041e-07,\n",
       "             -1.4462e-06,  7.4145e-08],\n",
       "            [-5.3507e-07, -2.4844e-07,  5.6063e-08,  ...,  2.2346e-07,\n",
       "             -1.5106e-08, -1.2432e-07]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[1.2750e-11, 5.0850e-12, 6.6249e-12,  ..., 1.8819e-12, 1.3173e-11,\n",
       "             1.2672e-11],\n",
       "            [2.3434e-12, 3.3820e-12, 1.7683e-12,  ..., 1.1723e-12, 3.7645e-12,\n",
       "             2.9919e-12],\n",
       "            [5.4779e-12, 2.0756e-12, 3.4764e-12,  ..., 1.1931e-12, 4.3910e-12,\n",
       "             5.4004e-12],\n",
       "            ...,\n",
       "            [1.0131e-12, 6.1062e-13, 4.7714e-13,  ..., 7.3730e-13, 1.0920e-12,\n",
       "             7.9561e-13],\n",
       "            [4.7269e-12, 2.8255e-12, 2.2010e-12,  ..., 1.9342e-12, 6.5134e-12,\n",
       "             5.0468e-12],\n",
       "            [6.0218e-12, 2.4286e-12, 2.8490e-12,  ..., 1.3715e-12, 6.5619e-12,\n",
       "             6.7467e-12]], device='cuda:0')},\n",
       "   99: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 1.8396e-06,  1.7274e-06,  1.1346e-06,  ...,  1.2662e-06,\n",
       "             -8.1503e-07,  7.2667e-07],\n",
       "            [-5.3470e-07, -5.4226e-07, -1.1826e-06,  ..., -6.8198e-07,\n",
       "             -4.7861e-07,  4.9095e-07],\n",
       "            [ 2.5468e-06,  2.4365e-06,  2.7387e-06,  ...,  6.7166e-07,\n",
       "              1.6881e-07,  1.4890e-06],\n",
       "            ...,\n",
       "            [-1.4233e-06, -1.5959e-06, -1.5409e-06,  ..., -3.0367e-07,\n",
       "              4.4500e-08, -7.6770e-07],\n",
       "            [ 1.7972e-06,  1.0568e-06,  2.9575e-06,  ...,  6.2804e-07,\n",
       "              8.8130e-07,  7.7313e-07],\n",
       "            [-2.4195e-07,  1.9386e-07,  2.0702e-07,  ...,  7.4043e-09,\n",
       "             -2.2091e-07, -1.4007e-07]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[3.7341e-11, 4.2195e-11, 3.2011e-11,  ..., 2.2578e-11, 8.8285e-12,\n",
       "             7.6565e-12],\n",
       "            [1.5794e-11, 1.2921e-11, 2.2366e-11,  ..., 1.5454e-11, 5.1362e-12,\n",
       "             4.1563e-12],\n",
       "            [3.0811e-11, 1.8893e-11, 2.0546e-11,  ..., 9.6979e-12, 1.5796e-12,\n",
       "             5.2449e-12],\n",
       "            ...,\n",
       "            [2.3066e-11, 1.6061e-11, 1.2635e-11,  ..., 1.5133e-11, 4.1198e-12,\n",
       "             4.8309e-12],\n",
       "            [2.0756e-11, 2.4785e-11, 5.1915e-11,  ..., 3.6334e-11, 1.5150e-11,\n",
       "             1.2048e-11],\n",
       "            [1.5887e-11, 1.3934e-11, 2.4562e-11,  ..., 1.3173e-11, 5.4867e-12,\n",
       "             4.4924e-12]], device='cuda:0')},\n",
       "   100: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-3.1151e-06, -2.1213e-06, -5.4524e-07,  ..., -8.6366e-07,\n",
       "             -6.6455e-07,  1.7383e-06],\n",
       "            [-8.0826e-07,  2.2927e-07,  1.4981e-06,  ..., -1.2744e-06,\n",
       "              1.5507e-06, -2.8607e-08],\n",
       "            [ 2.4224e-06, -1.1866e-06, -6.5666e-07,  ..., -3.6995e-07,\n",
       "             -5.2694e-07,  1.9630e-06],\n",
       "            ...,\n",
       "            [-1.5798e-06,  8.7878e-07,  1.3929e-06,  ...,  7.1257e-07,\n",
       "              7.8344e-08, -1.2625e-06],\n",
       "            [ 3.3465e-06, -4.1120e-07,  6.8916e-07,  ..., -4.4547e-08,\n",
       "              1.1006e-06, -1.3328e-06],\n",
       "            [ 3.2478e-07,  2.3756e-09, -4.4367e-07,  ...,  4.4822e-07,\n",
       "              2.0924e-08,  1.5359e-06]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[5.8571e-11, 1.0101e-10, 5.1760e-11,  ..., 2.5088e-11, 9.5143e-11,\n",
       "             1.2604e-10],\n",
       "            [1.4789e-11, 1.5401e-11, 6.6669e-12,  ..., 1.2328e-11, 2.5417e-11,\n",
       "             1.3636e-11],\n",
       "            [3.7956e-11, 9.3568e-12, 1.3855e-11,  ..., 8.3565e-12, 3.5304e-11,\n",
       "             2.3447e-11],\n",
       "            ...,\n",
       "            [2.7222e-11, 8.3852e-12, 1.0212e-11,  ..., 5.1333e-12, 2.7717e-11,\n",
       "             2.2603e-11],\n",
       "            [4.0100e-11, 4.1652e-11, 2.4055e-11,  ..., 2.5779e-11, 5.3101e-11,\n",
       "             4.3351e-11],\n",
       "            [1.4327e-11, 1.9996e-11, 8.7113e-12,  ..., 1.3772e-11, 2.5794e-11,\n",
       "             1.1986e-11]], device='cuda:0')},\n",
       "   101: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 6.0348e-06,  2.4621e-06,  6.0660e-06,  ...,  1.9530e-06,\n",
       "              1.3080e-06,  2.7746e-06],\n",
       "            [ 1.5462e-06, -1.7361e-06, -2.7892e-06,  ...,  1.9125e-06,\n",
       "             -2.5350e-06,  2.3970e-06],\n",
       "            [-5.6129e-06, -6.9052e-06, -5.4328e-06,  ..., -1.8943e-06,\n",
       "              1.8395e-06, -2.6054e-06],\n",
       "            ...,\n",
       "            [ 1.8786e-06,  2.1938e-06,  2.5740e-06,  ..., -6.6125e-07,\n",
       "              6.3976e-07,  1.4846e-06],\n",
       "            [ 8.8650e-06,  1.1288e-05,  1.7055e-05,  ..., -2.5062e-06,\n",
       "              1.3875e-06,  3.7701e-06],\n",
       "            [ 1.2670e-06,  5.4701e-07,  1.2365e-06,  ...,  1.1609e-06,\n",
       "              3.8793e-07,  1.4884e-07]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[7.9873e-10, 5.3962e-10, 5.3040e-10,  ..., 7.7733e-10, 2.3691e-10,\n",
       "             2.8915e-10],\n",
       "            [6.4774e-10, 5.0549e-10, 4.3158e-10,  ..., 3.8141e-10, 1.1103e-10,\n",
       "             8.6672e-11],\n",
       "            [3.3411e-10, 2.3937e-10, 1.9044e-10,  ..., 2.8554e-10, 1.2425e-10,\n",
       "             7.2744e-11],\n",
       "            ...,\n",
       "            [2.5901e-10, 8.5186e-11, 6.5739e-11,  ..., 2.5800e-10, 6.2971e-11,\n",
       "             6.9910e-11],\n",
       "            [7.4397e-10, 6.6956e-10, 5.1785e-10,  ..., 6.9870e-10, 1.8663e-10,\n",
       "             1.8113e-10],\n",
       "            [1.1354e-10, 7.8755e-11, 6.8819e-11,  ..., 9.7346e-11, 4.1525e-11,\n",
       "             2.2050e-11]], device='cuda:0')},\n",
       "   102: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 2.2755e-06, -4.5786e-07,  1.4911e-06,  ..., -6.0934e-07,\n",
       "              2.5927e-06, -2.5742e-07],\n",
       "            [ 3.5008e-06, -2.3570e-06, -2.4344e-06,  ...,  5.6941e-07,\n",
       "              4.9269e-06,  1.1122e-06],\n",
       "            [-1.7994e-06, -6.6600e-07, -6.0302e-07,  ...,  2.0083e-06,\n",
       "              3.1327e-06,  1.0797e-06],\n",
       "            ...,\n",
       "            [ 1.8399e-06,  3.1231e-07, -4.4291e-07,  ...,  1.2431e-06,\n",
       "              3.8893e-07, -1.9037e-07],\n",
       "            [ 2.1287e-06,  1.4839e-07,  1.1497e-06,  ..., -2.8014e-06,\n",
       "             -2.8015e-06, -9.3831e-07],\n",
       "            [-2.4147e-07, -1.3325e-06,  9.0318e-08,  ..., -9.5247e-07,\n",
       "              1.3150e-07,  1.6478e-07]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[1.6466e-10, 2.0251e-10, 1.5252e-10,  ..., 9.2251e-11, 3.5837e-10,\n",
       "             1.0363e-10],\n",
       "            [1.8415e-10, 6.1756e-11, 6.8133e-11,  ..., 3.1467e-11, 2.7311e-10,\n",
       "             3.8459e-11],\n",
       "            [9.0744e-11, 6.3537e-11, 4.2633e-11,  ..., 2.8675e-11, 1.0470e-10,\n",
       "             2.9271e-11],\n",
       "            ...,\n",
       "            [4.2604e-11, 4.6635e-11, 3.7154e-11,  ..., 9.6729e-12, 6.5781e-11,\n",
       "             1.5803e-11],\n",
       "            [2.3124e-10, 1.8931e-10, 1.5897e-10,  ..., 6.0720e-11, 3.1163e-10,\n",
       "             1.0184e-10],\n",
       "            [3.0642e-11, 3.5342e-11, 2.2263e-11,  ..., 1.1684e-11, 5.9286e-11,\n",
       "             1.4205e-11]], device='cuda:0')},\n",
       "   103: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([ 7.7429e-06,  1.9041e-06, -3.1945e-06,  5.8613e-06, -2.3150e-06,\n",
       "             1.4617e-06,  1.2891e-06, -5.6589e-06, -3.2696e-06,  5.3530e-06,\n",
       "             8.5696e-07,  2.6433e-06, -1.4702e-06, -1.7338e-06,  5.5572e-06,\n",
       "            -5.4707e-07,  1.4109e-07, -4.0497e-06,  1.6934e-06, -1.5811e-06,\n",
       "             8.4742e-06,  2.4187e-06,  1.5589e-07, -4.3260e-06,  6.1246e-06,\n",
       "             1.0526e-05, -1.3914e-06,  2.4462e-06, -3.4019e-07,  7.0825e-06,\n",
       "             1.2677e-05,  1.0361e-05, -4.6875e-06,  1.7054e-06,  3.5723e-06,\n",
       "            -7.8456e-06, -1.3388e-06,  8.9635e-07,  1.9693e-06, -1.5672e-06,\n",
       "            -1.1862e-06, -1.6459e-05, -4.0021e-08, -6.4417e-07,  4.9173e-07,\n",
       "             1.0409e-06, -3.7822e-06, -1.2094e-05,  1.0236e-05,  8.1707e-06,\n",
       "            -1.9643e-06, -2.5821e-06, -1.1760e-06,  7.4522e-06,  2.8578e-06,\n",
       "             7.8542e-07,  3.9999e-07, -1.1970e-06,  2.5008e-06,  1.0177e-05,\n",
       "            -5.3742e-06,  2.2148e-06,  1.2573e-05,  3.5957e-07, -1.4993e-06,\n",
       "            -2.5969e-06,  1.1743e-06,  3.0516e-06, -4.7072e-06,  3.1155e-06,\n",
       "             2.6922e-06,  9.3900e-07, -3.0316e-06, -5.6102e-06, -3.1390e-06,\n",
       "            -1.7885e-06,  1.1958e-06,  2.0114e-06,  3.5823e-06, -1.5159e-06,\n",
       "            -1.7829e-06,  5.0000e-06, -1.5495e-06,  1.3667e-05,  7.2130e-06,\n",
       "            -3.3562e-06,  4.0001e-06, -5.5261e-06,  4.0253e-06,  9.8134e-06,\n",
       "             2.8996e-06, -2.7343e-06,  4.1341e-06, -1.7560e-07, -2.1821e-06,\n",
       "            -1.1105e-06, -7.1344e-06, -3.8468e-07,  1.2783e-05,  5.9797e-06,\n",
       "             1.3403e-06, -9.7137e-06, -1.5712e-06, -5.7574e-06,  1.5548e-06,\n",
       "            -1.2671e-06, -1.3639e-05, -5.7415e-06, -6.6359e-06, -2.1921e-07,\n",
       "             1.3363e-06, -9.1788e-07,  5.5009e-06,  2.0683e-07, -7.2127e-07,\n",
       "             4.5408e-06, -2.8789e-06,  1.8792e-06, -1.3836e-06,  2.6526e-06,\n",
       "             8.2477e-07,  5.3529e-06,  3.7842e-06,  9.5021e-06, -3.4021e-06,\n",
       "             9.4993e-06, -4.3129e-06, -1.6651e-06], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([1.1969e-10, 4.8692e-10, 1.6828e-10, 2.6141e-10, 3.0233e-10, 7.0039e-10,\n",
       "            2.6132e-10, 5.4907e-10, 1.9558e-10, 2.2635e-10, 4.4814e-10, 2.7719e-10,\n",
       "            7.3206e-10, 3.8754e-10, 8.7693e-10, 1.0234e-10, 2.7443e-10, 1.8100e-09,\n",
       "            1.1266e-09, 2.7935e-10, 5.4759e-10, 3.5314e-10, 2.4033e-10, 6.8730e-10,\n",
       "            1.2430e-10, 7.3296e-10, 1.5169e-10, 4.9916e-10, 2.8798e-10, 6.0776e-10,\n",
       "            1.0451e-09, 1.1070e-09, 5.9966e-10, 4.4869e-10, 6.2910e-10, 4.2564e-10,\n",
       "            2.5793e-10, 2.9500e-10, 3.3935e-10, 6.9564e-10, 1.7005e-10, 1.6284e-09,\n",
       "            1.6431e-10, 8.6937e-11, 2.3421e-10, 2.9994e-10, 1.8973e-10, 3.0588e-10,\n",
       "            6.3313e-10, 3.3443e-10, 7.7726e-10, 1.0253e-10, 1.6386e-10, 1.4870e-09,\n",
       "            4.2685e-10, 3.7758e-10, 3.4017e-10, 4.5599e-10, 2.2807e-10, 7.0111e-10,\n",
       "            5.8025e-10, 4.0055e-10, 2.8016e-09, 1.1119e-10, 2.5519e-09, 4.9962e-10,\n",
       "            1.2031e-10, 8.9600e-10, 3.9830e-10, 2.1808e-10, 5.7423e-10, 2.1634e-10,\n",
       "            9.1420e-11, 5.5680e-10, 2.3420e-10, 1.0510e-10, 5.5134e-10, 1.2096e-10,\n",
       "            5.8719e-10, 1.3628e-10, 1.9545e-10, 7.5075e-10, 2.2028e-10, 8.9471e-10,\n",
       "            8.9608e-10, 1.9229e-10, 2.4022e-10, 3.8444e-10, 1.2832e-09, 1.4536e-09,\n",
       "            6.4315e-10, 2.3261e-10, 4.2391e-10, 2.5698e-10, 1.4435e-10, 1.5782e-10,\n",
       "            6.9989e-10, 9.5928e-10, 4.6455e-09, 5.1302e-10, 5.7292e-10, 6.1237e-10,\n",
       "            3.9680e-10, 9.7232e-11, 3.0350e-10, 1.0690e-10, 2.6071e-08, 3.4049e-10,\n",
       "            6.7418e-10, 2.0736e-10, 2.4627e-10, 1.7638e-10, 1.3200e-09, 8.3575e-10,\n",
       "            1.1987e-10, 3.7368e-10, 1.1684e-09, 2.8863e-10, 1.3124e-09, 2.0683e-10,\n",
       "            3.6452e-10, 2.6882e-10, 5.9821e-10, 2.3375e-09, 4.5946e-10, 4.5002e-10,\n",
       "            2.0761e-10, 1.5410e-10], device='cuda:0')},\n",
       "   104: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-1.9963e-06, -5.8550e-07, -1.3031e-06,  ..., -5.7716e-07,\n",
       "             -2.1003e-08,  4.4115e-07],\n",
       "            [-3.5255e-07,  7.0891e-08, -9.6700e-08,  ...,  1.5686e-07,\n",
       "              1.1967e-07,  8.9078e-08],\n",
       "            [-1.3165e-06, -1.5395e-07, -7.4084e-07,  ...,  9.2801e-09,\n",
       "              9.8679e-08, -2.7423e-08],\n",
       "            ...,\n",
       "            [ 7.4226e-08,  1.5169e-07,  1.7229e-07,  ...,  2.8299e-07,\n",
       "              7.3927e-08,  4.9019e-07],\n",
       "            [ 1.7992e-06,  4.0824e-07,  1.2532e-06,  ...,  9.5975e-07,\n",
       "              1.7227e-07, -1.0058e-06],\n",
       "            [ 3.4280e-07,  6.0077e-07,  2.3285e-07,  ...,  4.8268e-08,\n",
       "              5.6209e-08, -2.6829e-07]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[1.0940e-11, 3.4575e-12, 4.2260e-12,  ..., 1.6913e-12, 2.5815e-14,\n",
       "             4.3174e-12],\n",
       "            [2.7068e-12, 4.7353e-12, 7.7122e-13,  ..., 8.9379e-13, 2.0380e-14,\n",
       "             1.7362e-12],\n",
       "            [2.8408e-11, 9.4788e-12, 1.2269e-11,  ..., 4.6516e-12, 2.1100e-14,\n",
       "             2.8422e-12],\n",
       "            ...,\n",
       "            [8.4635e-13, 8.3200e-13, 2.3558e-13,  ..., 1.0529e-12, 3.1066e-14,\n",
       "             5.8085e-12],\n",
       "            [5.5201e-11, 1.8979e-11, 2.2965e-11,  ..., 7.1470e-12, 5.8213e-14,\n",
       "             1.2002e-11],\n",
       "            [1.5299e-11, 8.5615e-12, 3.9836e-12,  ..., 3.3795e-12, 1.4066e-14,\n",
       "             2.4784e-12]], device='cuda:0')},\n",
       "   105: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 1.4825e-06, -2.0879e-07,  4.4569e-07,  ..., -6.0020e-08,\n",
       "              1.3125e-06,  1.3883e-06],\n",
       "            [ 3.2948e-07, -1.8381e-07, -7.6754e-08,  ..., -1.2019e-07,\n",
       "              1.4777e-07,  8.6003e-08],\n",
       "            [ 7.3071e-07, -1.5526e-07,  4.1444e-07,  ..., -2.6812e-07,\n",
       "              8.0707e-07,  9.6032e-07],\n",
       "            ...,\n",
       "            [-8.2661e-08, -1.6316e-07,  8.6261e-09,  ..., -8.3678e-08,\n",
       "              2.3429e-07,  3.2280e-08],\n",
       "            [-1.3311e-06, -6.6932e-08, -3.9484e-07,  ...,  3.1042e-07,\n",
       "             -1.4320e-06, -1.4735e-06],\n",
       "            [ 3.3981e-08,  3.4658e-07, -2.8933e-07,  ..., -4.1378e-08,\n",
       "             -2.4101e-07, -4.3979e-07]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[5.4297e-12, 1.9682e-12, 4.1701e-12,  ..., 9.0711e-13, 6.5028e-12,\n",
       "             5.7954e-12],\n",
       "            [1.5329e-12, 3.2674e-12, 3.3723e-12,  ..., 4.6458e-13, 3.1243e-12,\n",
       "             3.4155e-12],\n",
       "            [1.1511e-11, 2.9575e-12, 1.1383e-11,  ..., 8.3699e-13, 1.4573e-11,\n",
       "             1.3280e-11],\n",
       "            ...,\n",
       "            [1.1474e-12, 7.6806e-13, 7.9683e-13,  ..., 1.1692e-12, 2.7303e-12,\n",
       "             1.2906e-12],\n",
       "            [2.8379e-11, 9.3465e-12, 2.3641e-11,  ..., 1.6212e-12, 3.5890e-11,\n",
       "             3.1421e-11],\n",
       "            [5.5043e-12, 6.0085e-12, 7.7182e-12,  ..., 2.1605e-12, 7.8317e-12,\n",
       "             1.1961e-11]], device='cuda:0')},\n",
       "   106: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-1.2802e-06, -7.1863e-06, -1.6984e-06,  ..., -8.5559e-07,\n",
       "             -3.1125e-07, -6.0549e-07],\n",
       "            [-1.5286e-06, -1.4164e-07, -5.5528e-07,  ..., -7.5580e-07,\n",
       "              1.3524e-08,  1.3802e-06],\n",
       "            [-7.1158e-07,  4.5048e-06, -7.7647e-07,  ...,  4.2282e-07,\n",
       "              7.3498e-07, -8.1104e-07],\n",
       "            ...,\n",
       "            [-5.5119e-06, -4.5227e-06, -3.7737e-06,  ..., -3.0828e-06,\n",
       "             -1.2920e-07, -3.1768e-06],\n",
       "            [ 1.7647e-07, -1.5909e-07,  2.8861e-07,  ...,  3.2988e-07,\n",
       "             -2.0208e-07,  5.2390e-06],\n",
       "            [-7.1590e-07, -1.6834e-07,  1.7078e-07,  ...,  5.4500e-07,\n",
       "             -8.6011e-08,  5.5718e-07]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[3.8087e-12, 1.2753e-10, 2.3008e-12,  ..., 4.2821e-12, 8.3255e-14,\n",
       "             3.3979e-11],\n",
       "            [4.1567e-10, 4.9048e-11, 1.6955e-10,  ..., 5.5185e-11, 9.5279e-13,\n",
       "             5.9996e-11],\n",
       "            [5.3548e-11, 1.2166e-10, 1.3310e-11,  ..., 1.2784e-11, 2.0955e-13,\n",
       "             1.9914e-11],\n",
       "            ...,\n",
       "            [1.4184e-10, 7.1965e-11, 3.6581e-11,  ..., 3.7798e-11, 4.6859e-13,\n",
       "             1.8036e-10],\n",
       "            [2.2970e-11, 1.9228e-12, 5.1488e-12,  ..., 2.6738e-11, 1.5267e-13,\n",
       "             1.7586e-10],\n",
       "            [7.0470e-11, 9.0479e-12, 2.4003e-11,  ..., 1.7275e-11, 1.7555e-13,\n",
       "             8.1886e-11]], device='cuda:0')},\n",
       "   107: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-2.6410e-06, -4.7883e-06,  5.1447e-06,  ...,  8.6076e-07,\n",
       "              6.0805e-07,  4.3144e-06],\n",
       "            [ 1.2517e-06, -9.5659e-07,  2.5968e-07,  ..., -5.3499e-08,\n",
       "              1.1844e-06,  1.5293e-06],\n",
       "            [ 3.6761e-06,  3.3479e-06, -2.5424e-06,  ..., -1.2006e-06,\n",
       "              1.2210e-06, -1.6341e-06],\n",
       "            ...,\n",
       "            [ 2.6535e-06, -1.4062e-06,  2.9058e-06,  ..., -2.7247e-07,\n",
       "              2.5154e-06,  3.7090e-06],\n",
       "            [-7.3533e-07,  3.7379e-07, -6.8059e-07,  ...,  1.9412e-06,\n",
       "              1.4949e-06,  3.8656e-07],\n",
       "            [ 4.4739e-07,  9.5147e-07,  1.0694e-07,  ...,  1.1852e-06,\n",
       "              8.3725e-07,  1.0002e-06]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[1.0828e-11, 7.4578e-11, 6.9437e-11,  ..., 1.4316e-11, 3.8122e-11,\n",
       "             5.8545e-11],\n",
       "            [1.2026e-10, 2.6615e-11, 8.1848e-11,  ..., 1.6278e-11, 1.7249e-10,\n",
       "             1.3395e-10],\n",
       "            [3.3013e-11, 7.1397e-11, 9.3067e-11,  ..., 1.0334e-11, 8.7177e-11,\n",
       "             9.5230e-11],\n",
       "            ...,\n",
       "            [6.5111e-11, 4.7016e-11, 7.3513e-11,  ..., 2.6817e-11, 1.1090e-10,\n",
       "             1.0139e-10],\n",
       "            [1.2323e-11, 9.9140e-12, 9.1412e-12,  ..., 1.1851e-11, 5.5696e-11,\n",
       "             2.3168e-11],\n",
       "            [2.0302e-11, 9.4649e-12, 1.4731e-11,  ..., 8.1394e-12, 4.0648e-11,\n",
       "             2.2568e-11]], device='cuda:0')},\n",
       "   108: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-1.6556e-06, -1.3113e-06, -7.3411e-07,  ..., -1.6197e-06,\n",
       "             -1.0095e-06,  2.5699e-06],\n",
       "            [-2.2372e-06,  2.1452e-06, -1.0024e-06,  ...,  1.5694e-06,\n",
       "             -1.1365e-07,  3.0944e-05],\n",
       "            [-5.5914e-06,  9.5661e-06, -3.4892e-06,  ...,  8.8905e-07,\n",
       "              1.1885e-08,  7.7283e-06],\n",
       "            ...,\n",
       "            [-3.5488e-06, -3.7223e-07, -2.6857e-06,  ..., -7.3500e-07,\n",
       "             -5.2152e-08,  2.2769e-06],\n",
       "            [ 1.8470e-05,  2.0262e-06,  9.7900e-06,  ...,  1.6812e-06,\n",
       "              1.1268e-06, -1.2104e-05],\n",
       "            [ 8.6077e-06,  9.6710e-07,  4.9289e-06,  ...,  6.7842e-07,\n",
       "              9.4325e-07, -3.7334e-07]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[7.8864e-11, 1.3835e-11, 1.5440e-11,  ..., 1.5742e-10, 3.6215e-12,\n",
       "             1.6336e-09],\n",
       "            [9.4567e-10, 1.1289e-10, 2.6344e-10,  ..., 4.6476e-10, 1.1043e-11,\n",
       "             3.0498e-09],\n",
       "            [5.1017e-10, 5.1791e-10, 1.4094e-10,  ..., 1.4754e-10, 3.7044e-12,\n",
       "             4.0208e-10],\n",
       "            ...,\n",
       "            [3.3862e-10, 7.0170e-11, 1.0703e-10,  ..., 3.7511e-11, 2.5359e-13,\n",
       "             1.7403e-10],\n",
       "            [8.7915e-10, 8.3410e-11, 2.3998e-10,  ..., 5.2153e-10, 7.4055e-12,\n",
       "             1.2259e-09],\n",
       "            [1.1495e-09, 5.9634e-11, 1.8410e-10,  ..., 6.4613e-10, 5.4518e-12,\n",
       "             5.3963e-09]], device='cuda:0')},\n",
       "   109: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 1.0418e-06, -1.0400e-06,  4.6075e-07,  ...,  5.9120e-07,\n",
       "              1.3140e-06,  1.5364e-06],\n",
       "            [ 3.1037e-06, -6.1399e-07, -1.8778e-07,  ...,  3.1220e-06,\n",
       "              1.0373e-05,  2.6084e-06],\n",
       "            [ 5.7507e-06,  1.6614e-06, -6.9211e-06,  ..., -2.1894e-06,\n",
       "              2.5405e-06, -1.0639e-06],\n",
       "            ...,\n",
       "            [ 1.8566e-06, -3.4647e-07,  1.2983e-06,  ...,  9.2487e-08,\n",
       "              2.8134e-06,  2.5111e-06],\n",
       "            [-2.3873e-06, -1.2944e-06, -2.6742e-06,  ...,  1.7876e-06,\n",
       "             -5.7484e-06, -6.2271e-06],\n",
       "            [-1.1109e-06, -2.6935e-06, -5.7878e-07,  ..., -9.5838e-07,\n",
       "             -2.2583e-06, -3.4867e-06]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[1.3543e-11, 9.6926e-12, 1.4218e-11,  ..., 2.0553e-11, 1.6482e-10,\n",
       "             1.5362e-11],\n",
       "            [1.1446e-10, 4.7862e-11, 1.7431e-10,  ..., 7.0761e-11, 5.8944e-10,\n",
       "             1.7147e-10],\n",
       "            [6.7115e-11, 5.5165e-11, 2.8048e-10,  ..., 3.3395e-11, 2.3476e-10,\n",
       "             1.4680e-10],\n",
       "            ...,\n",
       "            [3.2875e-11, 1.3342e-11, 6.1686e-11,  ..., 7.3089e-12, 6.2010e-11,\n",
       "             5.3284e-11],\n",
       "            [7.9194e-11, 4.1252e-11, 1.4882e-10,  ..., 5.9103e-11, 3.1999e-10,\n",
       "             1.3796e-10],\n",
       "            [7.9030e-11, 3.5497e-11, 1.0402e-10,  ..., 8.0443e-11, 7.1603e-10,\n",
       "             1.1523e-10]], device='cuda:0')},\n",
       "   110: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([ 4.3282e-07,  7.1600e-07,  9.5315e-08, -2.9034e-07, -1.5240e-06,\n",
       "            -2.2112e-06, -3.3382e-07, -6.0372e-07, -9.7147e-07, -3.0536e-07,\n",
       "            -1.2516e-06, -1.4347e-07, -6.1860e-07, -3.1171e-07,  2.0017e-07,\n",
       "            -1.5175e-06, -3.1917e-07, -1.1699e-07, -6.4117e-07,  6.0255e-07,\n",
       "             2.0612e-07, -9.3653e-07, -9.2668e-07, -3.7364e-07, -1.0575e-06,\n",
       "            -2.3326e-07, -1.2845e-06, -1.8983e-06, -9.0411e-07, -1.3232e-06,\n",
       "            -1.6453e-08, -4.2836e-07, -6.8355e-07,  1.3073e-07, -4.5671e-07,\n",
       "            -1.4976e-06, -3.1654e-07,  1.5009e-07, -3.8418e-07, -6.2135e-07,\n",
       "            -2.5706e-07, -1.2266e-06,  6.8285e-07, -1.7936e-07, -1.7847e-06,\n",
       "             5.6658e-08, -6.2879e-07, -6.1539e-08, -1.1644e-06, -2.8447e-07,\n",
       "            -7.2734e-07, -4.6566e-08, -7.4302e-07, -1.7903e-06, -4.1170e-07,\n",
       "            -3.3047e-07, -1.2237e-06, -5.6130e-07,  2.8398e-07,  7.9235e-07,\n",
       "             2.5636e-07, -1.4701e-06, -3.3715e-07, -3.2105e-07, -7.4001e-07,\n",
       "             2.9858e-07, -2.4554e-06, -1.9674e-07, -4.9313e-09, -2.2182e-07,\n",
       "            -7.3333e-07, -1.6651e-06, -6.5722e-07, -5.6011e-07,  1.7079e-07,\n",
       "            -1.1880e-07, -1.0763e-06, -4.2918e-07, -1.0776e-06,  1.2610e-07,\n",
       "            -1.2159e-06, -4.8501e-08,  7.8006e-07, -8.0655e-07, -1.0692e-06,\n",
       "            -2.7835e-07, -1.9740e-07, -1.2318e-06,  7.0453e-07,  9.8355e-09,\n",
       "            -2.1195e-07, -4.1412e-07, -3.2817e-06, -2.5406e-07, -8.4802e-07,\n",
       "            -8.5815e-07,  4.2049e-08, -1.8973e-07, -1.6342e-06,  5.6479e-07,\n",
       "            -6.1968e-07, -1.6327e-07,  5.1768e-07, -4.8816e-07,  1.8862e-07,\n",
       "            -9.0142e-07, -1.8738e-06,  3.8124e-07, -1.7686e-06, -3.6917e-08,\n",
       "            -7.1386e-07,  1.0302e-06, -4.2378e-06, -1.0605e-07,  1.3793e-07,\n",
       "             1.6640e-07, -4.4325e-07, -7.9740e-08,  2.5833e-07, -2.2139e-06,\n",
       "            -8.1057e-08, -1.8183e-07, -5.4720e-07, -1.5863e-07, -4.1418e-07,\n",
       "             4.7108e-08, -4.7057e-07, -7.4851e-08], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([5.0422e-12, 1.7980e-12, 3.7564e-13, 1.9361e-12, 2.3596e-11, 2.8877e-11,\n",
       "            2.6128e-11, 3.1842e-12, 1.4874e-11, 1.8774e-12, 1.6653e-11, 5.8029e-12,\n",
       "            4.5840e-12, 8.8414e-13, 1.5302e-12, 1.6560e-11, 1.2114e-12, 2.0215e-12,\n",
       "            2.0234e-11, 3.5505e-12, 4.7436e-11, 1.3589e-11, 3.1533e-12, 7.9823e-12,\n",
       "            2.5782e-11, 4.8605e-12, 5.8141e-12, 6.9475e-11, 6.3748e-12, 3.3361e-11,\n",
       "            7.1233e-11, 4.4006e-11, 1.2294e-11, 5.0893e-12, 2.5814e-12, 4.7113e-11,\n",
       "            7.0384e-13, 2.2118e-12, 3.6720e-12, 2.6341e-12, 2.6113e-12, 9.5306e-12,\n",
       "            7.0958e-12, 1.5706e-11, 2.3149e-11, 3.9102e-12, 2.6829e-11, 6.9615e-12,\n",
       "            2.9561e-11, 4.3756e-12, 1.2436e-11, 5.3791e-12, 2.5465e-12, 6.9877e-11,\n",
       "            7.5345e-11, 6.4228e-12, 5.2909e-11, 1.5160e-11, 2.8401e-12, 1.5190e-12,\n",
       "            6.7111e-12, 2.5859e-11, 4.9675e-12, 3.1352e-12, 1.1669e-11, 4.4601e-12,\n",
       "            5.8352e-11, 2.5544e-12, 1.2397e-12, 6.9552e-13, 4.5181e-12, 2.0172e-11,\n",
       "            1.1943e-11, 1.2432e-11, 5.5310e-12, 1.5117e-12, 1.5550e-11, 1.7721e-12,\n",
       "            1.5391e-11, 1.5800e-11, 1.3253e-11, 3.3141e-12, 1.1535e-11, 7.0369e-12,\n",
       "            2.7880e-11, 1.3945e-11, 1.0110e-12, 2.0677e-11, 3.1632e-12, 2.7146e-12,\n",
       "            2.2781e-12, 2.2920e-12, 6.3044e-11, 5.0897e-12, 2.8962e-11, 3.0704e-11,\n",
       "            8.0726e-13, 1.8104e-12, 2.0285e-11, 5.9179e-12, 4.8186e-12, 4.0061e-12,\n",
       "            5.8955e-13, 6.6855e-12, 1.5434e-12, 1.8989e-11, 2.1275e-10, 7.3432e-13,\n",
       "            1.2346e-11, 4.3725e-12, 6.2138e-12, 8.8447e-12, 3.0430e-10, 7.8849e-13,\n",
       "            5.3628e-13, 1.5805e-11, 2.3903e-11, 1.5050e-11, 8.1230e-13, 5.5101e-11,\n",
       "            8.7625e-12, 8.5099e-12, 8.0162e-12, 1.9051e-12, 4.8085e-12, 1.0250e-12,\n",
       "            3.9825e-12, 2.4706e-12], device='cuda:0')},\n",
       "   111: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([-1.2026e-06, -1.1156e-06, -4.5235e-07,  1.6035e-07, -4.4823e-07,\n",
       "            -6.4133e-07, -2.5804e-07, -2.2011e-07,  2.7597e-07, -3.9296e-07,\n",
       "             5.9925e-07, -6.4049e-08, -4.2295e-07, -2.6072e-07,  8.6276e-08,\n",
       "            -2.9659e-07, -2.8653e-07,  1.0760e-06,  1.5505e-07,  6.2720e-07,\n",
       "            -2.5277e-07, -7.7246e-07, -5.4690e-07,  6.7586e-07,  2.4794e-07,\n",
       "            -4.1650e-08,  5.6632e-07,  6.6550e-07,  2.0402e-07,  6.7491e-07,\n",
       "             1.0240e-08,  9.3441e-07,  1.1560e-07, -6.7590e-07, -9.8994e-08,\n",
       "             1.1632e-06,  3.0608e-07, -2.0394e-07, -6.2975e-07, -1.1847e-07,\n",
       "             7.2992e-08,  8.1930e-07, -3.8714e-07, -5.6649e-07, -1.6820e-06,\n",
       "             1.1061e-07,  1.1208e-07,  2.1157e-07, -1.2558e-06,  1.6196e-07,\n",
       "             1.0023e-06, -4.5349e-07, -6.4591e-07,  2.5024e-08,  4.2455e-07,\n",
       "             4.0069e-07, -4.8844e-08,  4.3005e-07, -1.2822e-07,  5.3075e-07,\n",
       "             7.6579e-07, -8.9361e-07,  4.0023e-07, -3.7532e-08, -3.6457e-07,\n",
       "             1.7193e-06,  9.4959e-07, -6.3104e-07, -4.7471e-07,  2.1637e-07,\n",
       "             4.0881e-07, -7.8768e-07, -5.9714e-07, -2.3458e-07, -2.2744e-07,\n",
       "            -2.3743e-07, -4.9231e-07,  3.8364e-07, -5.1254e-07,  3.1223e-07,\n",
       "             1.5361e-07,  9.9389e-07,  1.9539e-07,  6.1226e-07,  3.1375e-07,\n",
       "             5.2031e-07, -2.3834e-07, -2.5964e-07, -7.6174e-07, -2.6887e-07,\n",
       "            -2.6350e-07,  1.2021e-07, -1.5299e-06,  3.5960e-07, -4.8226e-07,\n",
       "            -9.9438e-07,  3.3603e-08,  1.1340e-07, -3.3213e-07,  1.2336e-07,\n",
       "            -1.3607e-07,  1.5993e-08,  8.2710e-07,  8.5804e-07, -1.2174e-07,\n",
       "             1.5394e-07,  3.6628e-07, -3.0337e-07, -1.1560e-06, -7.6116e-07,\n",
       "             7.9916e-07, -1.2534e-06, -2.2195e-07,  2.9984e-07, -2.5649e-07,\n",
       "             1.1461e-07,  2.5382e-07, -1.1524e-06, -1.0966e-07, -3.1819e-07,\n",
       "             5.7968e-07,  1.3914e-07, -1.5086e-07,  1.8288e-08,  2.8471e-07,\n",
       "             5.8801e-07, -4.6055e-07, -2.9486e-07], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([8.7644e-12, 6.9007e-12, 1.4338e-12, 3.8544e-12, 9.9082e-12, 2.1304e-11,\n",
       "            1.8189e-11, 3.5916e-12, 2.1933e-11, 1.1545e-11, 1.6368e-11, 9.6298e-12,\n",
       "            6.7323e-12, 2.6139e-12, 2.7101e-12, 1.8462e-11, 5.1020e-12, 9.5044e-12,\n",
       "            1.6686e-11, 1.0317e-11, 4.0200e-11, 1.2346e-11, 7.8882e-12, 2.2032e-11,\n",
       "            1.3083e-11, 7.2003e-12, 6.1229e-12, 3.6735e-11, 7.0346e-12, 1.8291e-11,\n",
       "            3.3513e-11, 6.7148e-12, 1.7227e-11, 1.0425e-11, 8.3464e-12, 2.7533e-11,\n",
       "            2.7064e-12, 1.0125e-11, 6.8022e-12, 6.0885e-12, 5.4717e-12, 1.0612e-11,\n",
       "            1.3932e-11, 2.7647e-11, 1.8609e-11, 8.8978e-12, 1.2047e-11, 8.3928e-12,\n",
       "            3.8004e-11, 6.1818e-12, 1.1529e-11, 6.8982e-12, 4.6426e-12, 2.7352e-11,\n",
       "            4.5112e-11, 2.3042e-11, 2.3102e-11, 2.0745e-11, 6.5074e-12, 5.9627e-12,\n",
       "            1.7697e-11, 1.8711e-11, 6.1184e-12, 4.4357e-12, 8.7198e-12, 1.0316e-11,\n",
       "            2.1003e-11, 1.0329e-11, 2.3761e-12, 3.3707e-12, 7.3110e-12, 2.2218e-11,\n",
       "            1.4865e-11, 8.8798e-12, 1.5687e-11, 5.9508e-12, 1.1157e-11, 3.7350e-12,\n",
       "            2.9264e-11, 1.3607e-11, 8.4926e-12, 1.8577e-11, 1.6348e-11, 9.2715e-12,\n",
       "            2.0294e-11, 2.0676e-11, 3.7552e-12, 1.6175e-11, 8.3438e-12, 7.6532e-12,\n",
       "            9.2622e-12, 2.8953e-12, 3.6836e-11, 8.5236e-12, 1.5571e-11, 1.8265e-11,\n",
       "            5.1388e-12, 6.6651e-12, 1.1594e-11, 8.6458e-12, 1.3938e-11, 1.1091e-11,\n",
       "            2.9036e-12, 1.3943e-11, 3.0891e-12, 1.7027e-11, 9.6330e-12, 3.9992e-12,\n",
       "            1.0139e-11, 6.2781e-12, 1.3696e-11, 1.9327e-11, 9.2819e-11, 4.8916e-12,\n",
       "            2.5349e-12, 1.6125e-11, 1.0983e-11, 2.5282e-11, 5.7133e-12, 3.0145e-11,\n",
       "            1.1242e-11, 1.4885e-11, 2.3449e-11, 4.3420e-12, 1.6063e-11, 4.4005e-12,\n",
       "            4.8457e-12, 4.0325e-12], device='cuda:0')},\n",
       "   112: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([ 8.4301e-06, -3.7458e-06, -3.2304e-06,  1.9879e-06, -3.4469e-07,\n",
       "             3.9889e-06, -5.7286e-06, -7.4040e-06, -5.6842e-06, -1.8610e-06,\n",
       "            -3.0087e-06, -3.4875e-06,  1.0705e-05,  1.5410e-06, -2.0745e-06,\n",
       "             7.5684e-06,  5.6201e-07,  3.1613e-06,  4.4764e-07,  3.1953e-06,\n",
       "             7.5759e-06,  8.4044e-06, -3.2071e-06, -7.4346e-07,  9.9184e-06,\n",
       "             9.6020e-06, -8.5948e-07, -4.0664e-07, -7.0436e-07, -3.9707e-07,\n",
       "             3.2271e-06, -5.1280e-07, -1.3781e-06, -5.5384e-06,  3.0675e-07,\n",
       "            -8.8008e-06,  1.3542e-06, -5.7896e-08, -3.3471e-06, -6.9269e-06,\n",
       "             3.9284e-06, -6.4178e-06,  4.9013e-06,  3.2270e-06, -3.5312e-06,\n",
       "             1.3754e-06,  6.1401e-06, -5.3404e-06, -1.9269e-06,  3.8492e-06,\n",
       "             5.0285e-06,  6.4918e-06,  4.0973e-07, -2.0435e-06, -2.1851e-06,\n",
       "             8.3668e-07,  3.9123e-06,  1.8511e-06, -6.0597e-06,  3.6936e-06,\n",
       "            -2.2248e-06, -4.5418e-06,  4.0374e-06,  3.0993e-06,  2.3999e-05,\n",
       "             5.0184e-06,  4.8761e-07, -2.0329e-06,  1.3149e-06, -2.3915e-06,\n",
       "             1.2214e-06,  4.7881e-06, -4.7471e-06,  1.9564e-06, -2.8853e-06,\n",
       "            -8.2974e-07,  5.8594e-07,  1.4776e-06, -6.0288e-06, -1.0485e-06,\n",
       "             4.1535e-06, -4.4022e-06, -4.2499e-06, -2.9796e-06, -1.5763e-06,\n",
       "            -2.9779e-06, -3.7558e-06,  6.2140e-06,  1.1934e-06,  2.0750e-06,\n",
       "             4.0503e-06,  9.0925e-06,  1.3213e-06, -2.4163e-06,  4.8294e-06,\n",
       "             9.0676e-07, -1.2771e-06,  7.0134e-06,  3.0706e-07, -3.7085e-07,\n",
       "             2.1737e-07, -5.5411e-06,  2.0573e-06, -8.9079e-06, -3.3335e-06,\n",
       "            -1.0093e-06, -3.8779e-05, -2.1519e-07, -2.6641e-06,  9.0781e-06,\n",
       "             3.6642e-06,  2.2006e-06,  5.4208e-06, -1.2682e-06, -1.8520e-06,\n",
       "             3.4236e-06,  5.7251e-06, -4.0548e-06, -9.5802e-07, -7.7235e-06,\n",
       "            -5.6997e-06,  4.3453e-07, -4.2648e-06,  1.8918e-07, -3.6987e-06,\n",
       "            -9.1536e-08, -8.7292e-06, -3.4106e-06], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([1.0437e-09, 2.9313e-10, 2.6093e-10, 2.2469e-10, 1.5332e-09, 8.0092e-10,\n",
       "            6.5497e-10, 7.8872e-10, 6.1668e-10, 2.4739e-10, 2.1733e-10, 5.6493e-10,\n",
       "            9.3267e-10, 8.8832e-10, 2.5588e-10, 5.1814e-10, 5.4217e-10, 4.6562e-10,\n",
       "            3.7651e-10, 1.1317e-10, 6.6459e-10, 8.4203e-10, 4.9543e-10, 5.1022e-10,\n",
       "            1.1660e-09, 3.2113e-10, 5.7468e-10, 2.0863e-09, 5.4735e-10, 1.3313e-09,\n",
       "            8.2495e-10, 2.4733e-09, 4.4035e-10, 2.2073e-10, 3.4401e-10, 2.0730e-09,\n",
       "            3.5092e-10, 4.9214e-10, 4.7276e-10, 3.9011e-10, 5.1394e-10, 1.1574e-09,\n",
       "            3.6264e-10, 2.2694e-10, 5.9581e-10, 4.8414e-10, 1.1097e-09, 5.9235e-10,\n",
       "            2.8722e-10, 5.5659e-10, 4.9804e-10, 1.3595e-09, 2.2680e-10, 1.3540e-09,\n",
       "            1.3935e-09, 1.9518e-10, 1.1267e-09, 3.5545e-10, 5.4530e-10, 1.7044e-10,\n",
       "            3.8037e-10, 6.3785e-10, 8.9130e-10, 5.7130e-10, 1.8535e-09, 4.3448e-10,\n",
       "            1.2735e-09, 8.6229e-11, 1.1424e-10, 1.5717e-10, 3.0942e-09, 9.5919e-10,\n",
       "            4.1625e-10, 7.9850e-10, 1.5825e-10, 3.3370e-10, 1.0623e-09, 5.4272e-10,\n",
       "            6.3452e-10, 5.2751e-10, 9.3213e-10, 1.9555e-09, 3.5159e-10, 1.0812e-09,\n",
       "            5.9440e-10, 8.5203e-10, 2.2181e-10, 4.2739e-10, 2.9485e-10, 8.9888e-10,\n",
       "            3.7983e-10, 9.2658e-10, 6.1047e-10, 2.2156e-10, 1.4906e-09, 2.5390e-10,\n",
       "            3.5742e-10, 5.6432e-10, 4.5245e-10, 3.8595e-10, 1.8201e-10, 8.5600e-10,\n",
       "            1.7349e-10, 1.9875e-09, 2.2741e-10, 1.8452e-09, 7.9814e-08, 2.2537e-10,\n",
       "            9.1554e-10, 4.3115e-10, 9.2118e-10, 6.3425e-10, 1.2951e-09, 8.0385e-10,\n",
       "            2.0126e-10, 8.8413e-10, 1.6592e-09, 2.9719e-10, 2.3428e-10, 5.5948e-10,\n",
       "            5.4521e-10, 2.7554e-10, 5.0574e-10, 5.7854e-10, 2.2138e-10, 1.9114e-10,\n",
       "            4.3186e-10, 2.4575e-10], device='cuda:0')},\n",
       "   113: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([ 8.2284e-07, -1.0240e-05,  7.9658e-06,  4.6465e-06,  7.4068e-06,\n",
       "             6.0269e-07, -1.0180e-06, -2.6160e-06, -2.7663e-07,  6.9259e-06,\n",
       "             4.6467e-07, -3.4736e-06, -7.5625e-08, -1.0600e-06, -6.1050e-06,\n",
       "            -6.9320e-06, -6.8802e-06,  7.9503e-06, -7.8357e-07,  2.4599e-06,\n",
       "            -6.1369e-06, -3.2881e-07, -1.1984e-06,  3.6650e-06,  2.8097e-06,\n",
       "            -9.0589e-06, -5.3692e-06,  1.9750e-06, -3.6180e-06, -3.7325e-06,\n",
       "            -1.6965e-06,  4.4640e-06,  7.7668e-06,  7.9704e-06, -7.1357e-06,\n",
       "            -1.5798e-06,  1.4061e-06, -2.4939e-06, -9.0288e-06,  1.7481e-07,\n",
       "             3.9392e-06, -9.1785e-06,  3.1350e-06, -4.3383e-06, -3.9700e-06,\n",
       "            -3.2840e-06,  1.0034e-05, -1.7564e-06, -3.5040e-06, -6.4257e-06,\n",
       "            -3.9069e-06, -6.8472e-06,  1.5498e-05,  2.1072e-06,  4.9075e-06,\n",
       "             4.8165e-06, -6.5987e-06, -7.2610e-06, -1.6769e-06,  2.0912e-06,\n",
       "             1.1724e-07,  8.6486e-06, -4.8944e-06, -9.1029e-06, -5.8971e-06,\n",
       "             6.9611e-06,  2.8783e-06, -5.5093e-07,  2.2754e-06,  1.7426e-06,\n",
       "            -1.7040e-06, -1.0401e-05,  3.7338e-07,  7.6112e-06,  1.1538e-06,\n",
       "             6.4878e-07,  4.5302e-06, -7.4828e-06, -2.4878e-06, -4.3761e-06,\n",
       "             1.1220e-05,  4.5479e-06,  3.8520e-06,  4.9934e-06,  1.1843e-05,\n",
       "             5.0508e-06, -1.1628e-06, -4.7925e-06,  6.4409e-06,  2.1813e-06,\n",
       "            -4.3221e-06,  4.4144e-06,  1.0804e-06,  1.3343e-06, -2.9159e-07,\n",
       "            -1.3237e-06,  8.3155e-08, -1.6223e-06,  5.2296e-06, -3.7577e-06,\n",
       "            -8.8426e-06, -7.1311e-06, -2.8517e-06, -6.3211e-06,  6.2718e-06,\n",
       "             1.9138e-06, -9.7432e-06, -4.6913e-06, -1.7036e-06, -2.9719e-06,\n",
       "             1.6870e-06,  4.2619e-06,  1.0965e-06, -1.2470e-06, -4.1890e-06,\n",
       "            -1.6798e-06, -3.9965e-06, -5.9414e-06,  4.3982e-06,  1.1013e-06,\n",
       "            -5.2770e-06, -6.4253e-06,  2.9396e-07,  2.0907e-06,  2.9225e-06,\n",
       "             9.8510e-07,  1.9270e-06,  4.5949e-06], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([1.9798e-09, 1.3583e-09, 4.9006e-10, 7.1692e-10, 1.0829e-09, 6.7515e-10,\n",
       "            9.8172e-10, 9.0989e-10, 6.5405e-10, 6.6191e-10, 9.0814e-10, 6.8655e-10,\n",
       "            1.1186e-09, 1.1842e-09, 7.0372e-10, 1.3394e-09, 1.3372e-09, 1.0709e-09,\n",
       "            8.7065e-10, 5.3384e-10, 1.1285e-09, 1.1652e-09, 1.0042e-09, 8.1963e-10,\n",
       "            1.1791e-09, 7.5096e-10, 8.3760e-10, 1.7366e-09, 7.2166e-10, 1.1666e-09,\n",
       "            4.8368e-10, 9.6920e-10, 9.4213e-10, 7.8302e-10, 1.1586e-09, 1.7822e-09,\n",
       "            6.2697e-10, 7.9853e-10, 9.3360e-10, 1.2746e-09, 6.8928e-10, 1.2820e-09,\n",
       "            7.9234e-10, 6.7296e-10, 8.7956e-10, 6.4023e-10, 1.7123e-09, 6.6525e-10,\n",
       "            8.7408e-10, 1.1320e-09, 5.7264e-10, 1.8937e-09, 5.4072e-10, 1.0120e-09,\n",
       "            1.0788e-09, 6.5119e-10, 7.7044e-10, 7.6736e-10, 6.5461e-10, 4.0043e-10,\n",
       "            5.9001e-10, 1.3700e-09, 8.2940e-10, 9.9595e-10, 3.0423e-09, 5.7996e-10,\n",
       "            6.2103e-10, 4.4493e-10, 9.8230e-10, 6.5191e-10, 1.7200e-09, 1.3150e-09,\n",
       "            5.8396e-10, 7.1441e-10, 7.5191e-10, 5.6260e-10, 9.0643e-10, 6.3384e-10,\n",
       "            4.9722e-10, 8.5648e-10, 1.3221e-09, 1.3401e-09, 1.2877e-09, 7.8226e-10,\n",
       "            7.7189e-10, 7.3948e-10, 6.8362e-10, 5.3962e-10, 1.2116e-09, 6.4996e-10,\n",
       "            6.6306e-10, 1.0759e-09, 1.0245e-09, 6.3400e-10, 9.0220e-10, 6.7006e-10,\n",
       "            1.0856e-09, 1.3821e-09, 6.3414e-10, 5.1017e-10, 7.3742e-10, 9.0210e-10,\n",
       "            7.2029e-10, 1.8959e-09, 8.5050e-10, 1.3845e-09, 2.3068e-09, 5.3517e-10,\n",
       "            7.4024e-10, 7.5927e-10, 4.2960e-10, 1.2087e-09, 6.9257e-10, 8.3589e-10,\n",
       "            5.5917e-10, 7.7182e-10, 6.1124e-10, 6.3892e-10, 5.9354e-10, 9.9326e-10,\n",
       "            7.4314e-10, 6.8658e-10, 7.4566e-10, 7.4110e-10, 6.8720e-10, 5.4988e-10,\n",
       "            4.4297e-10, 2.8853e-10], device='cuda:0')},\n",
       "   114: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([-2.8886e-06,  2.8749e-06,  2.8149e-06,  4.8475e-06, -2.8338e-06,\n",
       "             3.8706e-06,  3.6794e-06,  5.7115e-07,  2.4715e-06, -5.9344e-06,\n",
       "             4.8362e-06, -3.8601e-06, -6.3200e-06, -4.1984e-06, -5.8277e-06,\n",
       "            -1.6955e-05,  4.5686e-06,  2.4126e-07, -2.7487e-06, -5.0568e-07,\n",
       "            -4.4013e-05,  4.3408e-07, -1.3555e-05,  1.7273e-06,  2.1343e-06,\n",
       "             2.8593e-06,  4.7704e-06, -2.1214e-06,  1.2806e-06,  4.4308e-06,\n",
       "            -8.2098e-07, -6.6751e-06, -4.7572e-06, -7.6078e-06, -1.0450e-06,\n",
       "            -2.2215e-06, -5.9643e-06,  2.0396e-08, -6.9158e-06,  9.3663e-06,\n",
       "             1.5123e-06, -2.2903e-07,  4.0914e-08, -3.5660e-07, -2.4959e-06,\n",
       "            -8.3646e-07, -3.1046e-06, -1.3966e-06, -1.3041e-06, -3.4202e-06,\n",
       "            -2.1643e-06, -3.4986e-06, -6.0148e-06, -1.7014e-06,  5.1545e-07,\n",
       "             1.6139e-06,  9.3590e-07,  7.9304e-06, -3.8587e-06, -1.4882e-06,\n",
       "            -3.7956e-06, -5.5176e-07,  4.9775e-06,  5.9001e-06, -7.3196e-05,\n",
       "            -2.7218e-06, -9.8344e-06,  1.7560e-07, -3.1326e-06,  6.2009e-06,\n",
       "             2.9609e-06, -3.3074e-06, -2.9922e-06,  3.0311e-07, -2.8520e-06,\n",
       "             9.4723e-06, -8.7051e-07, -8.9613e-06, -4.8296e-06, -3.2733e-06,\n",
       "            -5.6048e-06, -3.3037e-06,  1.0660e-05, -5.4150e-06, -5.0258e-07,\n",
       "            -7.5121e-06, -3.5809e-06,  1.6370e-05,  1.2881e-05, -2.0482e-07,\n",
       "            -1.1777e-06,  3.4864e-06,  6.5683e-07,  1.0335e-05, -5.6181e-07,\n",
       "             1.0683e-06, -6.6394e-06, -2.5497e-06, -3.5453e-06,  1.0964e-05,\n",
       "            -2.6730e-06, -1.8354e-06,  4.7233e-06,  5.8211e-06, -1.4570e-06,\n",
       "             2.7537e-06, -3.9911e-05, -4.0294e-06, -6.1356e-06, -4.1806e-07,\n",
       "             1.1618e-05, -2.5911e-06, -1.9067e-05,  1.0960e-06, -6.9870e-07,\n",
       "             1.3398e-06, -1.0792e-06, -1.9264e-06, -1.0975e-06,  6.2238e-06,\n",
       "             1.0578e-06,  4.7051e-07,  5.0869e-06, -6.2088e-08, -4.4139e-06,\n",
       "            -1.0884e-06,  4.8662e-06, -1.2891e-06], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([1.5016e-09, 3.5564e-10, 2.7550e-10, 5.7996e-10, 6.5359e-10, 2.8308e-09,\n",
       "            2.9364e-10, 3.0850e-10, 8.0666e-10, 1.5091e-10, 8.4481e-10, 1.5377e-10,\n",
       "            9.2914e-10, 1.4671e-09, 4.0372e-10, 2.2294e-09, 1.8118e-10, 2.4402e-09,\n",
       "            3.3437e-10, 1.7149e-09, 1.8444e-08, 1.1684e-09, 4.5960e-09, 3.1652e-10,\n",
       "            5.7172e-10, 4.1175e-10, 1.6276e-09, 2.1271e-10, 1.8287e-09, 8.9692e-10,\n",
       "            3.5907e-10, 1.4192e-08, 2.2857e-10, 5.3406e-10, 1.1464e-10, 7.1656e-10,\n",
       "            1.6840e-10, 1.3045e-10, 5.0054e-10, 3.5290e-10, 1.3296e-10, 1.3097e-10,\n",
       "            4.7905e-10, 2.2696e-10, 6.1395e-10, 2.1223e-10, 1.2814e-09, 2.7383e-10,\n",
       "            9.4677e-11, 3.2539e-10, 1.4593e-09, 4.2020e-10, 2.1964e-10, 3.6143e-10,\n",
       "            2.1029e-09, 2.7797e-10, 1.8486e-10, 3.2170e-10, 3.3454e-10, 2.3186e-10,\n",
       "            1.5680e-10, 6.2433e-10, 3.5553e-10, 2.8192e-10, 9.1476e-08, 2.8700e-10,\n",
       "            1.7109e-09, 5.1662e-10, 4.1243e-10, 3.2919e-10, 3.7060e-10, 1.3778e-09,\n",
       "            5.9738e-10, 2.1611e-10, 2.6005e-10, 4.8163e-10, 2.0421e-10, 2.3095e-10,\n",
       "            4.3843e-10, 1.5358e-09, 1.1491e-09, 4.2341e-10, 7.6811e-10, 9.9012e-10,\n",
       "            5.8675e-10, 2.4474e-09, 1.2031e-09, 3.6041e-09, 1.1509e-09, 7.7493e-10,\n",
       "            1.0380e-09, 4.2550e-10, 1.7086e-09, 9.1674e-10, 3.9826e-10, 3.2465e-10,\n",
       "            9.6476e-10, 4.5278e-10, 6.9439e-10, 1.1076e-09, 3.9982e-10, 4.0014e-10,\n",
       "            1.1765e-09, 1.4845e-09, 2.5688e-10, 9.1644e-10, 1.4804e-07, 6.5966e-10,\n",
       "            7.6738e-10, 2.1515e-10, 8.2316e-10, 3.6079e-10, 3.6958e-09, 2.7996e-10,\n",
       "            7.9232e-10, 2.6522e-10, 2.8391e-10, 3.4067e-10, 4.7873e-10, 9.9874e-10,\n",
       "            5.8954e-10, 2.0973e-10, 6.1865e-09, 9.8637e-10, 3.9466e-10, 1.0710e-09,\n",
       "            1.9288e-10, 4.4786e-10], device='cuda:0')},\n",
       "   115: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([-5.2891e-06, -7.3975e-06,  2.4210e-06, -3.3908e-07,  1.1245e-05,\n",
       "             1.4006e-05, -4.0532e-06,  8.9221e-07, -1.4189e-06,  2.9335e-06,\n",
       "            -2.0296e-05,  9.1302e-08, -5.6100e-06,  3.1709e-06,  1.0203e-07,\n",
       "            -1.2624e-05,  2.0702e-07,  1.0501e-05, -3.9432e-06,  4.1760e-06,\n",
       "            -1.3910e-05, -2.2438e-06,  4.1401e-07, -5.2325e-06, -1.7319e-06,\n",
       "            -1.9711e-06,  2.8736e-06,  4.8056e-06,  4.6006e-06,  2.9372e-06,\n",
       "             1.1184e-05, -2.9186e-06,  7.0399e-08,  8.3322e-06,  5.9601e-08,\n",
       "            -6.1820e-06,  4.7582e-06, -5.6511e-06,  1.0381e-05, -1.1330e-05,\n",
       "            -5.0447e-06, -5.9914e-06, -7.5697e-06,  3.6904e-06,  9.3457e-06,\n",
       "            -1.1111e-05,  2.1134e-06, -7.2702e-08,  5.7346e-06,  1.8642e-07,\n",
       "            -3.5748e-06, -5.4365e-06,  5.6068e-06, -1.1921e-05, -8.6318e-07,\n",
       "            -2.2645e-06,  7.8504e-06, -8.7486e-07,  4.3221e-06,  4.4950e-06,\n",
       "            -7.6594e-06, -9.2323e-07,  2.2921e-06, -6.9773e-07, -1.0872e-05,\n",
       "             1.2679e-07, -2.0415e-06,  4.5428e-07, -5.6800e-07, -6.2283e-06,\n",
       "            -4.1514e-06, -3.1302e-06,  1.1316e-06, -4.4883e-06,  1.5582e-06,\n",
       "            -8.7864e-06,  5.0574e-06, -1.2489e-07, -8.6955e-06, -1.4702e-06,\n",
       "            -2.3949e-06, -3.8969e-06,  8.5711e-06, -4.6732e-06, -1.3027e-05,\n",
       "            -2.6496e-06,  1.2456e-06, -9.5331e-06, -7.2263e-06, -1.4472e-07,\n",
       "            -2.9200e-06,  3.1193e-06, -1.3900e-06,  4.5046e-07,  4.9259e-06,\n",
       "            -1.0798e-06,  1.0556e-05,  1.0686e-05,  5.5061e-07,  8.9830e-06,\n",
       "             4.9354e-06,  5.6614e-06,  8.9386e-06,  4.7005e-06, -5.6526e-06,\n",
       "             2.1192e-06, -9.2666e-06, -1.9825e-06,  7.7280e-06, -1.8890e-07,\n",
       "             4.5616e-06,  3.6946e-06,  6.4921e-06,  4.8537e-06,  1.7966e-06,\n",
       "             9.6279e-06,  9.1069e-06,  2.1396e-06,  1.2402e-06,  8.8032e-06,\n",
       "            -4.6130e-06,  3.4522e-06,  6.0776e-06, -2.3630e-06, -2.8042e-06,\n",
       "             4.4544e-06,  7.4554e-06,  4.9077e-06], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([2.1341e-09, 2.5153e-09, 1.0191e-09, 1.1467e-09, 1.6942e-09, 1.0606e-08,\n",
       "            8.2122e-10, 7.5326e-10, 1.4480e-09, 2.6326e-09, 1.3867e-09, 1.3974e-09,\n",
       "            1.4452e-09, 3.4662e-09, 1.2721e-09, 6.7302e-09, 9.6039e-10, 7.8957e-09,\n",
       "            1.1075e-09, 9.7193e-10, 2.7118e-09, 6.3513e-10, 4.0632e-09, 2.1172e-09,\n",
       "            3.9478e-10, 2.4827e-09, 1.3600e-09, 8.5210e-10, 2.1064e-09, 1.3501e-09,\n",
       "            6.7996e-09, 2.2272e-09, 2.8219e-09, 6.6301e-09, 5.1565e-10, 1.6228e-09,\n",
       "            3.0272e-10, 8.1358e-10, 3.1616e-09, 9.1488e-10, 9.3490e-10, 1.6330e-09,\n",
       "            2.6303e-09, 1.2464e-09, 1.2724e-09, 6.8451e-10, 1.8485e-09, 7.3287e-10,\n",
       "            3.5721e-10, 3.7493e-10, 2.5998e-09, 8.4751e-10, 4.3596e-10, 1.5028e-09,\n",
       "            1.6947e-09, 1.1145e-09, 1.6468e-09, 2.8377e-09, 1.0349e-09, 7.3486e-10,\n",
       "            5.4385e-10, 1.2908e-09, 1.1454e-09, 7.9777e-10, 5.8452e-09, 1.3913e-09,\n",
       "            3.2368e-09, 6.2303e-10, 6.8611e-10, 1.8326e-09, 1.8309e-09, 1.6598e-09,\n",
       "            2.2788e-09, 1.0215e-09, 1.8360e-09, 2.0200e-09, 7.7023e-10, 1.3994e-09,\n",
       "            1.7586e-09, 3.3041e-09, 5.8985e-09, 1.5348e-09, 1.1124e-09, 2.3619e-09,\n",
       "            1.3983e-09, 1.6077e-09, 7.4334e-10, 1.2012e-09, 8.6712e-10, 2.2781e-09,\n",
       "            9.4611e-10, 1.0597e-09, 3.4782e-09, 2.0528e-09, 1.1609e-09, 7.6723e-10,\n",
       "            2.9720e-09, 8.0508e-10, 1.1171e-09, 8.4379e-10, 5.9108e-10, 8.9402e-10,\n",
       "            2.6967e-09, 1.0170e-09, 1.5613e-09, 8.3629e-10, 6.2906e-09, 6.9065e-10,\n",
       "            1.2925e-09, 3.0838e-10, 5.6063e-10, 1.4469e-09, 2.4225e-09, 1.0126e-09,\n",
       "            1.4365e-09, 1.8233e-09, 6.2283e-10, 6.7702e-10, 3.5845e-10, 1.2171e-09,\n",
       "            1.2181e-09, 8.4595e-10, 4.7822e-09, 7.9659e-10, 4.0405e-10, 6.4822e-10,\n",
       "            4.4787e-10, 1.7808e-09], device='cuda:0')},\n",
       "   116: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-2.5100e-06, -3.9348e-07,  1.0887e-06,  ..., -3.9068e-06,\n",
       "              8.1619e-07,  7.4831e-07],\n",
       "            [ 9.6446e-06, -4.0466e-06,  1.1070e-05,  ..., -2.9149e-06,\n",
       "              9.9424e-06,  1.4794e-05],\n",
       "            [-9.2638e-06,  2.1045e-06, -2.8552e-06,  ...,  5.9493e-07,\n",
       "             -8.0291e-06, -6.2719e-06],\n",
       "            ...,\n",
       "            [ 9.6724e-06, -3.5788e-07, -3.2950e-06,  ...,  6.1043e-07,\n",
       "              1.7117e-05,  1.0404e-05],\n",
       "            [ 3.2271e-06,  7.8052e-07,  2.8248e-07,  ...,  6.7114e-06,\n",
       "              3.6202e-06,  6.1168e-07],\n",
       "            [ 3.5792e-06, -3.5470e-07,  1.2176e-06,  ...,  2.3822e-06,\n",
       "              6.0593e-06,  5.8398e-06]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[2.0178e-09, 1.6919e-09, 2.0676e-09,  ..., 2.2580e-09, 2.1772e-09,\n",
       "             6.2800e-09],\n",
       "            [2.3996e-09, 1.0169e-09, 2.1969e-09,  ..., 1.5367e-10, 2.4132e-09,\n",
       "             3.2974e-09],\n",
       "            [1.4992e-09, 7.5033e-10, 1.9236e-09,  ..., 3.5089e-10, 1.7474e-09,\n",
       "             2.5254e-09],\n",
       "            ...,\n",
       "            [1.3343e-09, 7.0238e-10, 1.0226e-09,  ..., 8.8253e-10, 2.7237e-09,\n",
       "             2.5900e-09],\n",
       "            [4.4828e-11, 1.3475e-10, 7.5772e-11,  ..., 1.9106e-10, 1.6478e-10,\n",
       "             1.6911e-10],\n",
       "            [1.0610e-09, 3.8008e-10, 3.1853e-10,  ..., 6.3024e-10, 3.1532e-09,\n",
       "             1.0271e-09]], device='cuda:0')},\n",
       "   117: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([-3.5430e-07, -1.8937e-05,  2.3273e-06,  2.0644e-05,  9.2499e-06,\n",
       "             1.6022e-05,  6.6470e-06,  1.2962e-05, -4.1593e-06,  3.8246e-06,\n",
       "            -5.7100e-08, -9.5569e-06,  5.2563e-07, -1.0779e-05, -3.5895e-06,\n",
       "             6.4276e-06,  8.1066e-06, -6.7429e-06,  1.8655e-06,  7.8347e-08,\n",
       "             1.7532e-05,  1.5589e-06, -9.7340e-06, -8.6955e-06,  3.9113e-06,\n",
       "            -7.8905e-06, -1.4196e-05,  5.9050e-06,  8.7137e-06, -1.0761e-05,\n",
       "             1.8299e-08,  2.3725e-06,  3.8753e-09, -4.9672e-06, -1.2630e-05,\n",
       "            -1.1918e-05,  1.3492e-05,  7.8956e-06, -8.6907e-06,  7.5341e-06,\n",
       "            -5.7438e-07,  1.2644e-05, -3.6645e-06,  1.6357e-06,  6.1416e-07,\n",
       "             6.1451e-06,  1.3179e-05, -5.7349e-06, -1.0254e-05,  2.2367e-05,\n",
       "            -3.3435e-06,  5.2359e-06,  6.4785e-06,  8.8151e-06, -1.5206e-05,\n",
       "            -1.0123e-06, -6.0559e-06, -2.9522e-06, -7.6977e-06,  2.0193e-06,\n",
       "             9.9862e-07,  1.3213e-05,  1.8841e-05, -1.5741e-06,  1.8594e-05,\n",
       "            -5.5511e-06,  5.3795e-06,  5.8173e-06,  9.9856e-06,  1.7168e-06,\n",
       "             5.2458e-06, -4.0349e-06, -7.4510e-06, -4.8581e-06,  1.0527e-06,\n",
       "             1.5482e-06, -8.4851e-06, -8.3010e-07, -4.9563e-06, -8.4540e-06,\n",
       "            -2.8933e-06, -1.0417e-05,  2.6483e-06,  1.1294e-06,  1.3931e-05,\n",
       "            -7.9100e-06, -5.2572e-06, -5.0773e-06, -1.4963e-05, -1.4363e-05,\n",
       "             1.1389e-06, -4.3379e-08, -1.3902e-05, -8.1382e-07,  4.0881e-06,\n",
       "            -1.7132e-07, -1.4500e-05,  5.0248e-06,  2.6017e-07,  2.6885e-06,\n",
       "            -6.0439e-06,  1.0152e-05, -1.5060e-05, -9.1810e-06, -1.5288e-05,\n",
       "            -3.2800e-07,  4.3825e-07, -1.0452e-05, -1.5211e-06, -4.0653e-06,\n",
       "             6.0218e-06,  9.4366e-06,  1.2515e-06, -7.2119e-06,  8.7912e-06,\n",
       "             1.3513e-05,  2.0502e-05, -7.1538e-06, -5.1052e-06,  2.3496e-06,\n",
       "            -3.4283e-06, -4.1629e-06, -1.8424e-07, -2.4075e-07,  1.8951e-06,\n",
       "            -3.5250e-07,  3.6017e-06,  2.1066e-07], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([3.7013e-09, 3.3331e-09, 2.4220e-09, 8.5995e-10, 6.9247e-09, 1.7816e-09,\n",
       "            2.7717e-09, 5.1205e-09, 3.7597e-10, 2.2663e-09, 3.0174e-09, 3.7150e-09,\n",
       "            3.7949e-09, 1.8857e-09, 1.5914e-09, 1.7219e-09, 2.1586e-09, 5.9220e-09,\n",
       "            1.5929e-09, 1.6360e-09, 2.8196e-09, 2.3443e-09, 2.6309e-09, 4.4250e-09,\n",
       "            2.3314e-09, 4.7661e-09, 2.2983e-09, 1.9511e-09, 1.7718e-09, 4.1083e-09,\n",
       "            2.2184e-12, 1.0632e-10, 1.8104e-09, 1.4349e-09, 1.3770e-10, 1.4773e-08,\n",
       "            1.8410e-09, 2.2136e-09, 3.6176e-09, 2.8516e-09, 2.2664e-09, 7.1841e-10,\n",
       "            2.1037e-09, 3.0279e-09, 1.3144e-09, 6.6759e-10, 3.5472e-09, 2.1407e-09,\n",
       "            4.7639e-09, 2.6164e-09, 4.2623e-09, 4.3061e-09, 1.9072e-09, 1.2453e-09,\n",
       "            1.7718e-09, 8.4372e-11, 1.3330e-09, 2.2201e-09, 5.4711e-09, 1.5647e-09,\n",
       "            3.7629e-09, 3.5130e-09, 2.0441e-09, 3.0938e-09, 2.0225e-09, 3.2117e-09,\n",
       "            2.0814e-09, 6.0437e-10, 2.6723e-09, 1.8642e-09, 7.4660e-10, 8.5559e-10,\n",
       "            4.6265e-09, 2.3419e-09, 9.9346e-10, 1.1693e-09, 3.0160e-09, 3.5087e-09,\n",
       "            1.5585e-09, 2.5074e-09, 2.2440e-09, 2.5828e-09, 1.1844e-09, 3.5254e-09,\n",
       "            1.2598e-09, 1.6524e-09, 1.3467e-09, 1.1978e-09, 3.2294e-09, 1.6094e-09,\n",
       "            1.4126e-10, 4.5807e-10, 8.3008e-09, 7.8356e-10, 1.2441e-09, 3.2963e-09,\n",
       "            2.3554e-10, 1.4905e-09, 4.4635e-09, 2.7318e-09, 2.5463e-09, 3.6982e-09,\n",
       "            1.7100e-09, 2.2046e-09, 2.1776e-09, 1.1930e-09, 2.4620e-10, 2.8588e-09,\n",
       "            2.6649e-09, 1.5161e-09, 2.7590e-10, 1.9151e-09, 2.6369e-09, 3.6864e-09,\n",
       "            2.0794e-09, 2.4048e-09, 3.8038e-09, 1.5025e-09, 1.3415e-09, 9.2688e-10,\n",
       "            7.3340e-10, 1.5587e-09, 2.8649e-09, 4.7207e-09, 3.3014e-09, 2.4006e-09,\n",
       "            1.5734e-10, 1.4647e-09], device='cuda:0')},\n",
       "   118: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-3.9760e-05, -1.3181e-05, -1.5941e-05,  ...,  4.7610e-05,\n",
       "              2.3879e-05,  1.4756e-05],\n",
       "            [-1.6608e-05,  1.7137e-05, -2.0058e-05,  ...,  2.1122e-05,\n",
       "             -5.9171e-06, -6.8228e-06],\n",
       "            [-3.5358e-05,  1.1982e-05, -2.3053e-05,  ...,  5.0282e-05,\n",
       "             -7.2691e-06, -1.2317e-05],\n",
       "            ...,\n",
       "            [ 8.2518e-05, -2.1631e-05,  2.7944e-05,  ..., -8.3455e-05,\n",
       "              8.6702e-07,  2.4397e-05],\n",
       "            [ 2.4437e-05, -9.2909e-06,  3.0407e-05,  ..., -3.2167e-05,\n",
       "             -7.9770e-06,  6.5106e-06],\n",
       "            [ 5.0872e-05, -3.1166e-05,  2.9002e-05,  ..., -7.3960e-05,\n",
       "             -7.0212e-06,  1.0466e-05]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[3.5162e-08, 5.1135e-09, 8.8258e-09,  ..., 3.4207e-08, 8.5942e-09,\n",
       "             4.7850e-09],\n",
       "            [6.0843e-09, 7.4278e-10, 2.2683e-09,  ..., 6.3788e-09, 2.0906e-09,\n",
       "             1.3474e-09],\n",
       "            [2.6538e-08, 2.9169e-09, 1.0380e-08,  ..., 2.9228e-08, 9.7782e-09,\n",
       "             6.4927e-09],\n",
       "            ...,\n",
       "            [2.3578e-08, 3.3132e-09, 9.0270e-09,  ..., 2.5160e-08, 8.7542e-09,\n",
       "             5.3035e-09],\n",
       "            [1.1739e-08, 1.8862e-09, 7.8625e-09,  ..., 1.6873e-08, 1.0431e-08,\n",
       "             7.2447e-09],\n",
       "            [1.3121e-08, 1.5437e-09, 6.5695e-09,  ..., 1.7557e-08, 4.6854e-09,\n",
       "             2.0426e-09]], device='cuda:0')},\n",
       "   119: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-9.7332e-07,  5.2324e-08, -6.8777e-07,  ..., -1.4658e-07,\n",
       "             -2.1945e-08,  2.6314e-07],\n",
       "            [ 2.5507e-06,  6.5725e-06, -1.8263e-06,  ...,  7.2887e-07,\n",
       "             -8.2709e-06, -6.0266e-06],\n",
       "            [ 6.1565e-07, -2.9813e-06, -2.9548e-06,  ..., -3.7919e-07,\n",
       "              4.8146e-06,  2.7309e-06],\n",
       "            ...,\n",
       "            [-7.0416e-06,  1.2925e-05,  1.1735e-05,  ..., -4.6219e-07,\n",
       "             -1.4123e-05, -4.9157e-06],\n",
       "            [-8.4251e-06,  1.6458e-05,  1.5052e-05,  ..., -1.1856e-06,\n",
       "             -1.8057e-05, -5.5218e-06],\n",
       "            [-1.4379e-05,  4.8391e-05,  2.4106e-05,  ..., -1.1607e-06,\n",
       "             -4.8011e-05, -1.8740e-05]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[5.1862e-12, 1.8487e-11, 5.4638e-12,  ..., 2.5147e-13, 1.0683e-11,\n",
       "             7.4808e-12],\n",
       "            [4.8008e-11, 1.6678e-10, 4.1530e-11,  ..., 3.8743e-12, 9.5369e-11,\n",
       "             5.9471e-11],\n",
       "            [1.6490e-11, 8.6932e-11, 2.0257e-11,  ..., 8.2558e-13, 4.7549e-11,\n",
       "             3.5210e-11],\n",
       "            ...,\n",
       "            [6.1254e-11, 1.0193e-10, 5.8739e-11,  ..., 2.6348e-12, 1.1603e-10,\n",
       "             5.7390e-11],\n",
       "            [3.2056e-10, 5.5014e-10, 3.3468e-10,  ..., 1.9177e-11, 6.3838e-10,\n",
       "             3.8354e-10],\n",
       "            [8.8645e-10, 1.7656e-09, 8.0746e-10,  ..., 5.7472e-11, 1.8788e-09,\n",
       "             9.2930e-10]], device='cuda:0')},\n",
       "   120: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 1.0326e-06, -1.0023e-06, -1.8895e-07,  ..., -7.7711e-07,\n",
       "              1.5414e-06,  1.7939e-06],\n",
       "            [-3.8145e-06,  2.6808e-06,  6.3681e-07,  ...,  2.3566e-06,\n",
       "             -5.4037e-06, -5.4464e-06],\n",
       "            [-1.2983e-08,  9.5839e-08, -7.1971e-08,  ...,  6.2337e-08,\n",
       "              5.8021e-08, -2.1276e-07],\n",
       "            ...,\n",
       "            [-1.9446e-06,  2.3896e-06, -1.5705e-06,  ...,  1.8180e-06,\n",
       "             -6.9013e-07, -2.0254e-06],\n",
       "            [ 1.4762e-05, -1.4492e-05, -9.6568e-08,  ..., -9.6356e-06,\n",
       "              1.6657e-05,  2.1801e-05],\n",
       "            [-1.4746e-06,  2.2835e-06, -2.1734e-06,  ...,  1.9110e-06,\n",
       "             -7.9240e-07, -8.4998e-08]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[5.7181e-12, 9.2741e-12, 2.9408e-12,  ..., 1.3464e-12, 1.5080e-11,\n",
       "             1.0236e-11],\n",
       "            [1.4907e-11, 2.5724e-11, 3.8981e-12,  ..., 3.7256e-12, 2.4795e-11,\n",
       "             2.9677e-11],\n",
       "            [1.1052e-11, 2.0973e-11, 2.6301e-12,  ..., 1.8376e-12, 2.1643e-11,\n",
       "             2.7212e-11],\n",
       "            ...,\n",
       "            [3.6008e-11, 5.6956e-11, 5.7216e-12,  ..., 6.0812e-12, 4.5159e-11,\n",
       "             6.5436e-11],\n",
       "            [2.4766e-10, 3.9050e-10, 4.5704e-11,  ..., 6.2376e-11, 3.1495e-10,\n",
       "             4.8088e-10],\n",
       "            [3.3992e-11, 5.9475e-11, 8.6743e-12,  ..., 9.2332e-12, 5.2458e-11,\n",
       "             6.6327e-11]], device='cuda:0')},\n",
       "   121: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-1.6213e-05, -3.6214e-05,  3.7047e-05,  ..., -3.0140e-05,\n",
       "              3.6819e-05,  2.3367e-05],\n",
       "            [-7.1964e-07,  3.3463e-05, -2.2082e-05,  ...,  2.1972e-05,\n",
       "             -2.9176e-05, -2.2734e-05],\n",
       "            [ 4.0182e-06, -7.2260e-06, -7.1230e-06,  ..., -1.2056e-05,\n",
       "              6.9627e-06, -1.0797e-05],\n",
       "            ...,\n",
       "            [ 1.5277e-06,  2.2670e-05, -2.0140e-06,  ...,  8.3781e-06,\n",
       "             -1.1862e-05,  6.1544e-07],\n",
       "            [-5.1946e-06,  1.0317e-05, -5.5729e-06,  ...,  1.1611e-05,\n",
       "             -1.4785e-05, -1.9665e-05],\n",
       "            [ 3.4963e-06,  7.0045e-06, -1.1315e-05,  ...,  1.5537e-05,\n",
       "             -8.1127e-06, -1.1822e-06]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[5.8034e-09, 8.3606e-09, 8.6708e-09,  ..., 5.0462e-09, 4.7256e-09,\n",
       "             1.4736e-08],\n",
       "            [3.5909e-09, 6.5607e-09, 5.7582e-09,  ..., 2.0118e-09, 2.6826e-09,\n",
       "             1.1416e-08],\n",
       "            [1.5582e-09, 4.5711e-09, 3.6498e-09,  ..., 2.8184e-09, 3.1799e-09,\n",
       "             6.2988e-09],\n",
       "            ...,\n",
       "            [4.4449e-09, 1.0053e-08, 8.2717e-09,  ..., 6.3097e-09, 8.6461e-09,\n",
       "             1.5886e-08],\n",
       "            [2.6511e-09, 6.4171e-09, 4.8168e-09,  ..., 2.8396e-09, 3.9897e-09,\n",
       "             1.0216e-08],\n",
       "            [1.3021e-09, 3.3353e-09, 2.6359e-09,  ..., 1.8697e-09, 1.8647e-09,\n",
       "             5.3225e-09]], device='cuda:0')},\n",
       "   122: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([-7.9339e-05,  5.1060e-05,  3.9670e-06,  7.6968e-06,  1.6512e-05,\n",
       "             1.7778e-05, -1.5993e-05,  5.9236e-07, -9.6785e-05,  5.9547e-05,\n",
       "            -1.6379e-05,  3.3067e-05, -4.6411e-05, -1.7804e-05, -1.3521e-04,\n",
       "            -5.1448e-05, -8.7996e-05, -2.0402e-05,  1.7280e-06, -6.1012e-05,\n",
       "            -8.7922e-05,  5.7070e-05,  4.6343e-05,  7.4398e-05,  4.1943e-05,\n",
       "            -1.8465e-05, -2.3460e-06,  6.7797e-05, -2.8779e-05,  4.9645e-05,\n",
       "             7.3172e-05, -2.9307e-05, -1.2088e-05, -8.7017e-05, -5.4602e-05,\n",
       "             5.8768e-05,  7.0000e-05,  9.0638e-05,  5.1871e-06,  3.8794e-05,\n",
       "            -5.3957e-06,  7.0921e-06,  6.6913e-05, -4.7023e-06,  3.4053e-05,\n",
       "            -3.8517e-05,  4.7393e-05, -1.0413e-05,  5.7891e-05, -2.1861e-05,\n",
       "            -1.3908e-05,  5.6013e-06, -1.4627e-06,  8.4916e-05, -3.7205e-05,\n",
       "            -1.2997e-05,  1.4793e-05,  7.0332e-05,  3.9237e-05, -1.4603e-04,\n",
       "             1.3996e-04,  3.2874e-05, -3.8031e-05, -5.4908e-05, -2.9266e-05,\n",
       "            -2.2662e-05,  7.8864e-06,  1.0248e-05, -1.4441e-04,  9.5957e-06,\n",
       "            -4.1691e-05,  1.1119e-05, -3.5182e-05,  8.9396e-05,  4.3056e-05,\n",
       "             8.6023e-05,  4.4189e-05,  6.4518e-05, -2.9449e-05, -6.4219e-05,\n",
       "            -6.2821e-05, -1.3493e-04,  4.4384e-05,  2.0547e-05, -9.0643e-05,\n",
       "            -7.8335e-05, -7.6139e-05,  5.9327e-05, -1.3252e-05, -4.3248e-05,\n",
       "             4.1605e-05,  3.4556e-05, -1.3541e-05,  4.1275e-05, -8.9112e-05,\n",
       "             9.8176e-05, -4.9977e-05, -2.0807e-05, -1.8240e-05,  3.4813e-05,\n",
       "            -4.2485e-05,  4.4859e-05,  2.0174e-05,  1.0951e-05,  1.5090e-05,\n",
       "            -5.9685e-05, -1.7311e-05, -8.3954e-05, -4.9511e-05, -6.9557e-05,\n",
       "            -1.5097e-05, -5.1599e-05, -8.9630e-05, -3.0625e-05, -1.1867e-05,\n",
       "            -1.4439e-05,  6.6958e-06,  3.0433e-05,  1.6065e-05, -1.6006e-05,\n",
       "            -7.2990e-06, -1.0036e-05, -4.5248e-05,  7.1011e-05, -4.5034e-05,\n",
       "             2.2806e-05,  9.9286e-06,  1.4262e-05], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([3.8569e-08, 2.6131e-08, 1.7154e-08, 2.8555e-08, 1.7634e-08, 2.4999e-08,\n",
       "            2.7125e-08, 4.1824e-08, 4.0251e-08, 3.6093e-08, 1.6582e-08, 2.1837e-08,\n",
       "            2.2318e-08, 4.2175e-08, 4.0630e-08, 2.1995e-08, 8.7920e-08, 4.0540e-08,\n",
       "            1.6513e-08, 9.8388e-08, 6.5269e-08, 2.0325e-08, 7.2237e-08, 2.8903e-08,\n",
       "            5.1992e-08, 2.8123e-08, 2.6927e-08, 3.0682e-08, 3.6277e-08, 2.2956e-08,\n",
       "            5.8658e-08, 5.2083e-08, 4.8941e-08, 2.6017e-08, 4.7914e-08, 7.0717e-08,\n",
       "            4.6546e-08, 3.2962e-08, 2.9756e-08, 2.3448e-08, 7.1361e-08, 1.2283e-07,\n",
       "            2.2979e-08, 3.9362e-08, 3.5746e-08, 1.4524e-08, 2.8850e-08, 2.2516e-08,\n",
       "            6.7050e-08, 2.4058e-08, 4.0748e-08, 2.1198e-08, 3.2277e-08, 2.3135e-08,\n",
       "            3.3718e-08, 2.1945e-08, 2.8298e-08, 1.3118e-08, 2.6859e-08, 6.3751e-08,\n",
       "            1.1067e-07, 2.2213e-08, 2.0720e-08, 6.5718e-08, 3.0039e-08, 2.5026e-08,\n",
       "            4.2025e-08, 2.9457e-08, 5.3901e-08, 2.7381e-08, 4.4918e-08, 1.2655e-08,\n",
       "            5.5748e-08, 4.0647e-08, 2.5792e-08, 2.3470e-08, 4.4953e-08, 4.5809e-08,\n",
       "            2.5125e-08, 6.8168e-08, 2.6547e-08, 1.1179e-07, 3.3355e-08, 4.3168e-08,\n",
       "            8.6318e-08, 9.1505e-08, 3.2444e-08, 1.5521e-08, 3.9219e-08, 2.2862e-08,\n",
       "            1.3679e-08, 5.2300e-08, 3.6841e-08, 1.6325e-08, 4.1825e-08, 1.3169e-07,\n",
       "            3.0854e-08, 2.8284e-08, 7.9802e-08, 4.2210e-08, 2.3993e-08, 5.2305e-08,\n",
       "            2.0628e-08, 2.5298e-08, 1.9042e-08, 2.3965e-08, 1.5938e-08, 6.1767e-08,\n",
       "            5.0735e-08, 6.5333e-08, 5.8498e-08, 2.2123e-08, 4.0480e-08, 5.1140e-08,\n",
       "            5.7463e-08, 2.7121e-08, 4.6127e-08, 3.0134e-08, 2.5566e-08, 5.9401e-08,\n",
       "            2.5965e-08, 3.6479e-08, 5.1677e-08, 4.6650e-08, 3.2465e-08, 4.1036e-08,\n",
       "            2.5462e-08, 1.2029e-08], device='cuda:0')},\n",
       "   123: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([ 1.1964e-07, -3.0635e-05,  2.1016e-06, -1.7631e-06, -1.2221e-05,\n",
       "            -1.2362e-05,  4.2826e-06,  1.9119e-07, -5.7989e-06, -2.3039e-05,\n",
       "             3.4422e-06, -1.1814e-05,  1.2458e-06, -2.2593e-05,  6.1631e-06,\n",
       "            -3.5154e-06,  4.1458e-06, -1.5979e-06,  1.0095e-05,  1.9579e-06,\n",
       "             2.6714e-07,  1.5352e-06, -4.7420e-06, -4.5373e-06,  2.6310e-06,\n",
       "             6.7490e-07, -4.1578e-05,  3.7389e-06,  8.7228e-07,  5.6000e-07,\n",
       "             6.0135e-06,  5.7401e-06, -9.7996e-07, -1.5244e-05, -3.4638e-06,\n",
       "            -1.1813e-05, -7.7159e-06,  2.0588e-06, -1.5808e-05, -4.2917e-06,\n",
       "            -5.6754e-07, -7.6196e-06, -3.1820e-06, -7.1356e-06,  6.6010e-06,\n",
       "             6.1097e-07,  8.3603e-06,  2.1642e-06,  4.4608e-06, -5.8985e-06,\n",
       "             9.1185e-06,  3.3315e-07,  2.0161e-06,  5.1393e-06, -1.8450e-06,\n",
       "             5.9526e-06,  7.1447e-06,  2.2561e-06,  4.7963e-06, -6.3081e-06,\n",
       "            -1.8091e-05,  8.0631e-06, -4.1425e-06, -7.9394e-06, -6.5190e-06,\n",
       "            -8.8276e-06, -1.1604e-05, -4.2067e-07, -1.0944e-05, -8.8400e-06,\n",
       "             6.1528e-06, -1.8101e-05, -1.0467e-05, -7.2129e-06,  1.5562e-06,\n",
       "             6.7842e-07, -8.1943e-06, -7.4826e-06, -1.3980e-05,  2.0455e-06,\n",
       "             7.5971e-07,  2.6863e-06,  1.4369e-05,  3.2723e-06, -1.0827e-06,\n",
       "             3.1921e-06, -5.0014e-06, -3.7397e-06, -1.8837e-06,  4.6771e-06,\n",
       "             1.9228e-06,  3.6810e-06, -4.3823e-06, -6.2323e-06, -6.2741e-06,\n",
       "            -4.3099e-06,  7.8404e-06,  1.8528e-05,  1.6519e-05,  7.4180e-06,\n",
       "            -3.9414e-06,  7.2730e-06, -1.2210e-06,  4.3383e-06, -3.3277e-06,\n",
       "             6.9094e-06, -1.7836e-04,  3.3504e-06,  3.6539e-05,  6.4230e-06,\n",
       "             3.5604e-06, -1.9223e-05,  9.8243e-06, -6.9877e-06,  7.9410e-07,\n",
       "            -3.3528e-05,  4.4707e-06, -1.2189e-05,  1.3423e-05, -1.0356e-05,\n",
       "             8.0449e-06, -3.3143e-06, -1.8957e-06,  1.0190e-05,  2.3892e-05,\n",
       "             4.1285e-07,  6.5788e-06, -4.7478e-08], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([4.1707e-10, 2.5420e-09, 2.0739e-10, 7.8085e-10, 4.2345e-09, 1.6657e-09,\n",
       "            1.0907e-09, 1.0949e-09, 1.0204e-09, 1.0656e-09, 4.2195e-10, 2.2173e-09,\n",
       "            2.4228e-09, 5.4178e-09, 7.2905e-10, 3.3740e-10, 1.8471e-09, 1.3255e-09,\n",
       "            7.1826e-10, 6.1887e-10, 6.0134e-10, 1.3894e-09, 1.4062e-09, 1.3973e-09,\n",
       "            8.2427e-10, 9.1112e-10, 4.3898e-08, 3.0224e-10, 1.2042e-10, 2.9350e-10,\n",
       "            4.8696e-10, 1.6001e-09, 8.4286e-10, 1.8242e-09, 3.5917e-10, 1.0488e-09,\n",
       "            9.9155e-10, 6.3122e-10, 3.4491e-09, 4.7785e-10, 8.4452e-10, 7.2276e-10,\n",
       "            3.2321e-09, 2.5384e-10, 4.9612e-10, 4.7639e-10, 1.8219e-09, 3.1756e-10,\n",
       "            2.7197e-09, 2.0564e-09, 1.2258e-09, 2.0933e-10, 7.6007e-10, 1.9399e-09,\n",
       "            8.2347e-10, 2.8725e-10, 2.4511e-10, 5.9482e-10, 1.4515e-09, 1.0748e-09,\n",
       "            1.5466e-09, 1.4475e-09, 8.3223e-10, 6.8857e-10, 9.9126e-10, 2.3808e-09,\n",
       "            2.4815e-09, 1.3958e-09, 1.8745e-09, 1.3534e-09, 2.6889e-09, 2.8815e-09,\n",
       "            1.0495e-09, 8.0705e-10, 2.7544e-10, 4.1539e-10, 7.1720e-10, 1.1587e-09,\n",
       "            9.4828e-10, 1.1498e-09, 6.2641e-10, 1.2359e-09, 5.0384e-10, 2.3455e-09,\n",
       "            1.1273e-09, 1.0174e-10, 7.1625e-10, 9.0895e-10, 8.0910e-10, 1.8759e-10,\n",
       "            2.3026e-10, 1.6050e-09, 6.4936e-10, 8.7098e-10, 6.2126e-10, 4.9523e-10,\n",
       "            1.9491e-09, 3.4131e-09, 7.0789e-09, 6.6608e-10, 1.6628e-09, 5.2916e-10,\n",
       "            1.7090e-09, 7.5081e-10, 6.0287e-10, 5.1540e-10, 4.7647e-07, 1.6920e-09,\n",
       "            6.1922e-09, 4.7224e-10, 5.1641e-10, 1.1709e-09, 1.3701e-09, 7.5809e-10,\n",
       "            5.5539e-10, 9.3444e-09, 2.2366e-09, 3.0488e-10, 1.4605e-09, 6.2999e-10,\n",
       "            1.3215e-09, 1.5505e-09, 2.0709e-09, 6.6208e-10, 1.5523e-09, 4.4479e-10,\n",
       "            2.7647e-10, 1.9061e-09], device='cuda:0')},\n",
       "   124: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 2.6926e-07, -2.1812e-07,  1.0755e-07,  ..., -1.4187e-07,\n",
       "             -1.2748e-07,  1.1990e-07],\n",
       "            [ 8.5511e-06, -4.9088e-06,  9.0405e-07,  ...,  6.9792e-07,\n",
       "              4.4357e-08, -3.1017e-06],\n",
       "            [ 2.2034e-07,  1.1652e-07,  3.4716e-08,  ..., -1.6780e-08,\n",
       "             -3.0366e-08,  3.5645e-08],\n",
       "            ...,\n",
       "            [ 1.8892e-06, -1.1708e-06,  3.1428e-07,  ..., -8.8002e-09,\n",
       "             -1.9664e-07, -6.7904e-07],\n",
       "            [-2.2699e-06,  1.2051e-06, -5.1738e-07,  ..., -7.5727e-07,\n",
       "             -7.2423e-08,  1.6396e-07],\n",
       "            [-1.3641e-05,  7.4082e-06, -2.2977e-06,  ..., -9.8858e-07,\n",
       "              3.6041e-07,  4.7915e-06]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[2.8452e-11, 1.5108e-11, 4.2429e-12,  ..., 7.8533e-12, 8.7486e-12,\n",
       "             7.9986e-12],\n",
       "            [1.4918e-10, 6.2616e-11, 6.7807e-12,  ..., 1.7330e-11, 1.7478e-11,\n",
       "             2.5293e-11],\n",
       "            [6.9222e-12, 3.2720e-12, 5.3327e-13,  ..., 1.6698e-12, 2.7421e-12,\n",
       "             1.6615e-12],\n",
       "            ...,\n",
       "            [1.6902e-11, 7.3590e-12, 3.3050e-13,  ..., 1.1296e-12, 1.0061e-12,\n",
       "             3.9532e-12],\n",
       "            [2.6687e-11, 1.4304e-11, 3.0728e-12,  ..., 1.5325e-11, 1.4962e-11,\n",
       "             1.6458e-11],\n",
       "            [4.5169e-10, 1.8466e-10, 8.9814e-12,  ..., 3.0051e-11, 2.2068e-11,\n",
       "             7.9446e-11]], device='cuda:0')},\n",
       "   125: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 4.7461e-08, -6.0063e-08,  9.1038e-08,  ..., -3.2578e-07,\n",
       "              3.1621e-08, -1.4026e-07],\n",
       "            [-3.1013e-06,  7.3773e-06,  1.2147e-06,  ...,  2.9961e-06,\n",
       "             -4.3574e-06, -7.3280e-06],\n",
       "            [ 1.3976e-08, -2.3979e-07,  5.5695e-09,  ...,  2.3413e-07,\n",
       "             -2.2184e-07, -9.0806e-08],\n",
       "            ...,\n",
       "            [-8.9698e-07,  1.3676e-07, -6.2305e-08,  ...,  1.0259e-07,\n",
       "             -6.5004e-07, -7.9414e-07],\n",
       "            [ 1.7900e-06, -1.6728e-06, -8.2492e-07,  ..., -4.5904e-07,\n",
       "              1.3779e-06,  2.1639e-06],\n",
       "            [ 7.1641e-06, -1.0960e-05, -1.3271e-06,  ..., -5.4103e-06,\n",
       "              8.8177e-06,  1.2248e-05]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[3.2444e-11, 4.6834e-11, 4.1940e-12,  ..., 5.4111e-12, 3.6018e-11,\n",
       "             4.7168e-11],\n",
       "            [8.1308e-11, 1.4805e-10, 1.4046e-11,  ..., 1.8516e-11, 1.0912e-10,\n",
       "             1.4331e-10],\n",
       "            [2.8738e-12, 7.2094e-12, 1.8159e-12,  ..., 9.0087e-13, 3.6757e-12,\n",
       "             6.0018e-12],\n",
       "            ...,\n",
       "            [7.8932e-12, 1.3410e-11, 1.3888e-12,  ..., 3.4992e-12, 1.1098e-11,\n",
       "             1.4717e-11],\n",
       "            [4.4511e-11, 1.1178e-10, 9.5134e-12,  ..., 9.6649e-12, 8.6085e-11,\n",
       "             1.1820e-10],\n",
       "            [2.8182e-10, 4.9957e-10, 3.3643e-11,  ..., 6.2472e-11, 3.9185e-10,\n",
       "             5.3859e-10]], device='cuda:0')},\n",
       "   126: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-1.6128e-07,  4.4104e-07, -1.1966e-06,  ..., -9.5709e-07,\n",
       "             -1.7579e-07,  2.5206e-07],\n",
       "            [ 2.2288e-05, -1.2795e-05,  2.1557e-06,  ...,  8.5958e-07,\n",
       "              2.9580e-06, -8.6203e-06],\n",
       "            [-1.3036e-06,  8.3227e-07,  3.7901e-07,  ..., -6.0460e-07,\n",
       "             -2.5541e-07,  5.2225e-07],\n",
       "            ...,\n",
       "            [ 1.3971e-06, -1.2355e-06,  1.5600e-06,  ..., -2.0349e-06,\n",
       "             -5.5730e-07, -1.9692e-06],\n",
       "            [ 3.1281e-07,  1.0019e-06, -1.1819e-06,  ...,  4.6748e-07,\n",
       "             -1.6428e-06,  1.2554e-06],\n",
       "            [ 1.9897e-06, -4.4940e-07,  8.0984e-07,  ...,  1.3966e-06,\n",
       "             -9.4493e-07, -3.0873e-08]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[5.9147e-11, 4.9188e-11, 1.5034e-11,  ..., 3.5012e-11, 3.3068e-11,\n",
       "             3.0459e-11],\n",
       "            [1.0333e-09, 4.5671e-10, 3.6854e-11,  ..., 6.9656e-11, 9.6028e-11,\n",
       "             1.7934e-10],\n",
       "            [2.4108e-11, 2.4206e-11, 5.7272e-12,  ..., 2.1566e-11, 1.5855e-11,\n",
       "             2.1936e-11],\n",
       "            ...,\n",
       "            [8.6589e-11, 6.1580e-11, 8.7998e-12,  ..., 4.9874e-11, 3.5643e-11,\n",
       "             5.7466e-11],\n",
       "            [4.6048e-11, 2.7795e-11, 8.5499e-12,  ..., 1.6279e-11, 2.8361e-11,\n",
       "             2.1077e-11],\n",
       "            [2.7122e-10, 2.2763e-10, 2.4874e-11,  ..., 1.4043e-10, 1.0001e-10,\n",
       "             1.2536e-10]], device='cuda:0')},\n",
       "   127: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 1.0991e-06, -3.1481e-06, -2.1594e-06,  ..., -1.4822e-06,\n",
       "              3.0776e-06,  3.5220e-06],\n",
       "            [-1.1455e-05,  1.7819e-05,  1.0167e-06,  ...,  1.1445e-05,\n",
       "             -1.1933e-05, -1.9817e-05],\n",
       "            [-1.2426e-07, -1.7855e-06, -1.1322e-06,  ..., -7.7260e-07,\n",
       "              6.9197e-07,  1.4228e-06],\n",
       "            ...,\n",
       "            [ 3.3318e-08,  1.8002e-06,  4.7963e-07,  ...,  1.1790e-07,\n",
       "             -1.4350e-07, -5.0838e-07],\n",
       "            [-1.2524e-06,  1.5350e-06,  5.1735e-07,  ..., -8.0550e-07,\n",
       "             -1.6955e-06, -2.2130e-07],\n",
       "            [-3.0914e-06,  3.1074e-06, -2.6874e-07,  ...,  2.0326e-06,\n",
       "             -4.5469e-06, -4.3448e-06]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[3.5434e-11, 8.0040e-11, 2.0643e-11,  ..., 1.1233e-11, 5.4587e-11,\n",
       "             6.5228e-11],\n",
       "            [4.7764e-10, 1.0369e-09, 1.0941e-10,  ..., 1.2976e-10, 7.7662e-10,\n",
       "             1.1177e-09],\n",
       "            [1.4364e-11, 2.5241e-11, 1.1302e-11,  ..., 6.8086e-12, 3.2533e-11,\n",
       "             2.8874e-11],\n",
       "            ...,\n",
       "            [2.8386e-11, 1.0313e-10, 1.7517e-11,  ..., 1.2978e-11, 6.9499e-11,\n",
       "             8.2844e-11],\n",
       "            [2.2400e-11, 4.1610e-11, 2.3741e-11,  ..., 4.5209e-12, 3.0010e-11,\n",
       "             2.7651e-11],\n",
       "            [1.3103e-10, 1.8244e-10, 4.2838e-11,  ..., 1.0450e-10, 3.3477e-10,\n",
       "             2.9535e-10]], device='cuda:0')},\n",
       "   128: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-7.5535e-06, -1.5348e-06, -7.8906e-07,  ..., -6.5929e-06,\n",
       "              2.4655e-06, -2.7273e-06],\n",
       "            [-1.9520e-05,  5.1035e-06, -1.3163e-06,  ..., -2.8178e-07,\n",
       "              9.5754e-06, -4.3289e-07],\n",
       "            [-7.8763e-06,  3.7315e-06,  4.5886e-06,  ...,  1.4042e-06,\n",
       "              2.7413e-06,  3.4076e-06],\n",
       "            ...,\n",
       "            [-1.1872e-05,  1.5231e-06,  5.4915e-06,  ..., -2.7652e-06,\n",
       "              5.2032e-06,  4.1181e-07],\n",
       "            [ 7.5365e-07,  2.6760e-06, -1.2536e-05,  ...,  1.7301e-06,\n",
       "             -2.5674e-07,  3.4125e-06],\n",
       "            [ 2.1958e-05, -1.1962e-05, -1.1757e-06,  ...,  1.9843e-06,\n",
       "              2.7218e-06, -1.3091e-05]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[1.6925e-09, 1.2711e-09, 3.2632e-10,  ..., 9.9189e-10, 9.2537e-10,\n",
       "             7.7234e-10],\n",
       "            [1.5695e-09, 9.1947e-10, 1.9786e-10,  ..., 3.2457e-10, 6.2354e-10,\n",
       "             4.1624e-10],\n",
       "            [1.1546e-09, 1.2126e-09, 2.1989e-10,  ..., 8.1982e-10, 6.3185e-10,\n",
       "             1.0340e-09],\n",
       "            ...,\n",
       "            [1.6988e-09, 1.6135e-09, 5.1048e-10,  ..., 1.0707e-09, 1.1160e-09,\n",
       "             9.1663e-10],\n",
       "            [1.2181e-09, 9.5527e-10, 4.1747e-10,  ..., 8.2349e-10, 6.5796e-10,\n",
       "             5.0148e-10],\n",
       "            [4.3390e-09, 3.0321e-09, 6.6957e-10,  ..., 1.2560e-09, 1.9863e-09,\n",
       "             1.4135e-09]], device='cuda:0')},\n",
       "   129: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-3.4992e-08, -4.7295e-06,  7.3933e-07,  ...,  8.7653e-07,\n",
       "              6.7624e-06,  2.8592e-06],\n",
       "            [ 2.1941e-06, -5.2240e-06, -1.1701e-06,  ..., -3.0934e-06,\n",
       "              1.3925e-05,  9.3741e-06],\n",
       "            [ 1.0402e-06, -8.8320e-07,  2.3863e-06,  ..., -1.9420e-06,\n",
       "              1.2322e-06,  1.4640e-06],\n",
       "            ...,\n",
       "            [ 2.2709e-06, -4.0064e-06, -3.2993e-08,  ..., -1.5654e-06,\n",
       "              9.8569e-06,  8.2086e-06],\n",
       "            [ 2.4760e-06, -2.8956e-06, -1.3038e-06,  ...,  1.2576e-06,\n",
       "              1.6701e-06, -4.3222e-07],\n",
       "            [ 1.1363e-06,  3.9376e-06, -7.5619e-07,  ...,  4.5609e-06,\n",
       "             -6.1582e-06, -9.6201e-06]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[1.0417e-10, 4.1118e-10, 1.2747e-10,  ..., 6.3360e-11, 4.6405e-10,\n",
       "             3.5916e-10],\n",
       "            [1.0558e-10, 3.0082e-10, 4.5435e-11,  ..., 3.9757e-11, 7.0510e-10,\n",
       "             3.6956e-10],\n",
       "            [9.2744e-11, 1.2738e-10, 7.5123e-11,  ..., 3.2871e-11, 5.0496e-10,\n",
       "             1.4258e-10],\n",
       "            ...,\n",
       "            [1.5824e-10, 2.8283e-10, 1.3391e-10,  ..., 4.1734e-11, 5.9005e-10,\n",
       "             2.2803e-10],\n",
       "            [8.9426e-11, 2.6997e-10, 8.0730e-11,  ..., 2.6554e-11, 3.2999e-10,\n",
       "             1.7148e-10],\n",
       "            [2.6851e-10, 8.3744e-10, 1.8960e-10,  ..., 1.1971e-10, 1.5527e-09,\n",
       "             8.8761e-10]], device='cuda:0')},\n",
       "   130: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([ 8.6906e-06, -1.4944e-05,  1.0384e-05,  8.2684e-06,  4.1354e-06,\n",
       "            -8.3129e-06,  1.0545e-05,  1.4175e-06,  6.2255e-06,  5.9747e-06,\n",
       "             3.5857e-06,  2.8357e-06, -5.4902e-06,  1.9634e-06,  1.1977e-05,\n",
       "             3.2219e-06,  1.0518e-06, -1.1916e-05, -4.6492e-06, -3.7678e-06,\n",
       "             1.4466e-06,  2.7456e-06, -1.2155e-06,  4.9140e-06,  1.9935e-05,\n",
       "             8.2964e-06, -1.4122e-05, -7.5272e-06, -5.4503e-06,  1.7650e-06,\n",
       "            -1.4647e-05, -6.3615e-06, -4.2422e-06,  5.5390e-06,  1.7851e-07,\n",
       "            -5.1118e-07, -1.6122e-05, -1.1858e-05,  1.7652e-05,  1.7430e-05,\n",
       "             9.1695e-06,  3.1081e-06,  4.3430e-07,  1.4004e-07, -4.2911e-06,\n",
       "            -6.6562e-06,  2.9650e-06,  1.6102e-06,  1.7899e-05,  4.8107e-06,\n",
       "             2.7221e-06,  7.3285e-07, -1.0567e-06, -3.4315e-06, -6.9059e-07,\n",
       "            -5.8363e-06,  6.3307e-06, -2.9098e-06, -5.7253e-06, -7.3093e-07,\n",
       "             5.8667e-06,  1.5835e-05,  3.4662e-06, -1.2858e-07, -5.9496e-06,\n",
       "             1.3890e-05, -1.5716e-06, -2.2574e-06,  1.9881e-06,  2.0641e-05,\n",
       "             2.5469e-05, -1.2095e-05,  3.9272e-06, -1.3841e-06,  2.9452e-05,\n",
       "             8.8396e-06,  6.1414e-06,  1.7418e-05,  6.6674e-06,  8.1010e-06,\n",
       "             3.8753e-06,  6.4089e-07, -5.5973e-06,  3.1939e-07, -1.9961e-06,\n",
       "             6.5270e-06, -5.3943e-06, -5.2038e-07,  7.9475e-06, -1.3877e-05,\n",
       "             5.4541e-06, -2.3602e-06,  1.1451e-05, -1.4938e-05, -6.1436e-06,\n",
       "             1.7983e-06,  4.2644e-06,  5.2522e-06, -8.8035e-06, -4.9870e-06,\n",
       "            -9.6915e-06, -6.0565e-07,  2.2451e-06,  2.0495e-05, -4.0800e-06,\n",
       "            -7.8111e-06, -1.5839e-05,  1.8703e-06, -2.5584e-05, -1.4741e-07,\n",
       "            -9.9064e-06,  3.4381e-06, -6.5969e-06, -3.3548e-06,  3.9511e-06,\n",
       "             4.6942e-06,  5.5026e-06, -1.4327e-06, -6.3575e-07,  2.9848e-06,\n",
       "             2.7193e-06, -4.2621e-06, -4.4504e-06,  3.5262e-06, -1.3227e-06,\n",
       "             1.3223e-05, -4.0661e-06, -2.7833e-06], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([8.6375e-10, 4.5834e-10, 2.3048e-09, 1.9288e-09, 3.6805e-09, 1.7875e-09,\n",
       "            1.9805e-09, 1.3768e-09, 5.9684e-10, 9.6242e-10, 4.4997e-10, 2.7739e-09,\n",
       "            1.9582e-09, 6.0761e-09, 8.9781e-10, 1.4204e-09, 1.2108e-09, 1.8772e-09,\n",
       "            1.9701e-09, 6.9351e-10, 8.4891e-10, 4.3559e-09, 4.2419e-09, 2.6509e-09,\n",
       "            5.9296e-09, 9.6665e-10, 7.4784e-09, 1.9670e-09, 1.5716e-09, 1.3667e-09,\n",
       "            4.4986e-09, 2.6523e-09, 1.0540e-09, 9.3615e-10, 5.6711e-10, 1.7823e-10,\n",
       "            1.1785e-09, 3.8769e-09, 3.8682e-09, 3.4701e-09, 1.4986e-09, 5.1422e-10,\n",
       "            2.1739e-09, 2.1362e-09, 4.7178e-10, 1.3339e-09, 3.9380e-10, 6.5873e-10,\n",
       "            2.7130e-09, 6.8067e-10, 1.6817e-09, 6.1019e-09, 4.3094e-10, 1.2246e-09,\n",
       "            1.2597e-09, 4.0302e-10, 3.8282e-09, 7.4725e-10, 9.8258e-10, 1.4322e-09,\n",
       "            5.6524e-10, 6.7559e-09, 1.0034e-09, 2.6525e-09, 2.4760e-09, 1.4693e-09,\n",
       "            5.0651e-10, 1.6999e-09, 3.6649e-09, 3.6232e-09, 4.1390e-09, 1.6754e-09,\n",
       "            4.2046e-10, 2.3739e-09, 2.4114e-09, 1.2502e-09, 6.0787e-10, 2.4669e-09,\n",
       "            2.0841e-09, 6.1737e-10, 8.5283e-10, 3.5274e-09, 2.3811e-10, 2.3795e-09,\n",
       "            3.0601e-10, 1.8571e-09, 2.8910e-09, 1.5251e-09, 1.1448e-09, 1.5064e-09,\n",
       "            9.8656e-09, 5.9209e-10, 3.6697e-09, 1.2815e-09, 2.5947e-09, 3.1938e-10,\n",
       "            2.7108e-09, 1.0561e-09, 1.0874e-09, 1.6244e-09, 1.0460e-09, 1.2804e-10,\n",
       "            1.7272e-09, 3.4528e-10, 1.7776e-09, 5.2527e-10, 7.2879e-08, 1.3848e-09,\n",
       "            1.4040e-09, 3.7675e-10, 1.5915e-09, 2.8742e-09, 1.4274e-09, 3.6926e-10,\n",
       "            9.8671e-10, 5.8174e-10, 1.8106e-09, 2.8846e-09, 2.7476e-09, 4.6448e-10,\n",
       "            6.0700e-10, 7.2724e-10, 1.3701e-09, 1.5650e-09, 4.5671e-10, 1.0829e-09,\n",
       "            1.0448e-09, 3.4267e-09], device='cuda:0')},\n",
       "   131: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 1.0909e-06,  1.0760e-06,  1.4910e-08,  ...,  8.1998e-08,\n",
       "              1.4628e-06,  1.6258e-06],\n",
       "            [-7.4312e-07, -3.9504e-08,  1.9215e-07,  ...,  3.6602e-08,\n",
       "             -1.1874e-06, -9.7663e-07],\n",
       "            [ 8.4204e-07, -7.5310e-07, -1.0897e-07,  ...,  4.6553e-07,\n",
       "              4.7576e-07,  2.1788e-07],\n",
       "            ...,\n",
       "            [ 5.2535e-08, -4.2569e-07,  1.7052e-07,  ..., -7.3976e-10,\n",
       "              8.8912e-07,  1.7358e-07],\n",
       "            [-1.3651e-06, -3.7902e-07,  6.7256e-08,  ..., -1.4289e-07,\n",
       "             -1.8023e-06, -9.9180e-07],\n",
       "            [ 4.7501e-07,  7.3464e-07, -3.4114e-07,  ...,  5.5817e-07,\n",
       "              8.9380e-08,  1.2959e-07]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[9.3433e-11, 3.0928e-11, 1.1637e-12,  ..., 2.3542e-12, 5.1235e-11,\n",
       "             2.6272e-11],\n",
       "            [4.7418e-11, 3.5701e-12, 5.7213e-13,  ..., 1.5511e-12, 2.2983e-11,\n",
       "             5.7961e-12],\n",
       "            [2.4630e-11, 1.1747e-11, 2.5217e-12,  ..., 3.6757e-12, 8.0877e-12,\n",
       "             2.1713e-12],\n",
       "            ...,\n",
       "            [2.3008e-11, 1.4261e-12, 7.1743e-13,  ..., 4.8344e-13, 1.2475e-11,\n",
       "             6.5151e-12],\n",
       "            [4.8488e-11, 1.7686e-11, 2.4530e-12,  ..., 1.2420e-12, 2.8302e-11,\n",
       "             1.6240e-11],\n",
       "            [7.9001e-11, 2.7122e-12, 2.6586e-13,  ..., 2.5125e-12, 6.7178e-11,\n",
       "             5.2210e-11]], device='cuda:0')},\n",
       "   132: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-6.4057e-07,  1.0894e-07,  2.4101e-07,  ...,  1.6900e-07,\n",
       "             -1.2366e-06, -1.0138e-06],\n",
       "            [ 4.6030e-07, -1.7945e-07,  1.1619e-08,  ..., -3.0690e-07,\n",
       "              9.6897e-07,  9.4638e-07],\n",
       "            [-4.5446e-07,  1.1967e-07,  8.6172e-07,  ...,  5.7450e-07,\n",
       "             -4.0348e-07, -1.2678e-06],\n",
       "            ...,\n",
       "            [-3.5398e-07,  4.7742e-07, -3.0354e-07,  ..., -1.1514e-07,\n",
       "             -5.8814e-08,  5.0217e-07],\n",
       "            [ 1.3935e-06,  1.3492e-07, -1.0375e-06,  ...,  3.1209e-07,\n",
       "              1.6290e-06,  9.5919e-07],\n",
       "            [ 2.4314e-07, -7.3642e-07,  1.1371e-06,  ...,  9.8678e-07,\n",
       "              1.7252e-08, -1.2623e-06]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[5.0269e-11, 1.6205e-11, 1.6517e-11,  ..., 1.3777e-11, 4.3452e-11,\n",
       "             3.1511e-11],\n",
       "            [7.0441e-12, 1.0100e-11, 2.3451e-12,  ..., 2.5459e-12, 6.5341e-12,\n",
       "             7.6705e-12],\n",
       "            [1.2261e-11, 3.8377e-12, 9.3299e-12,  ..., 3.1851e-12, 6.5808e-12,\n",
       "             4.1151e-12],\n",
       "            ...,\n",
       "            [4.4106e-12, 3.7510e-12, 2.0348e-12,  ..., 4.6621e-12, 7.0350e-12,\n",
       "             8.6411e-12],\n",
       "            [2.2972e-11, 1.1302e-11, 1.0118e-11,  ..., 8.8986e-12, 2.8534e-11,\n",
       "             2.1226e-11],\n",
       "            [1.6836e-11, 1.8561e-11, 4.3154e-12,  ..., 1.8196e-11, 4.1492e-11,\n",
       "             4.8728e-11]], device='cuda:0')},\n",
       "   133: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 4.9172e-07, -3.8315e-06, -1.3388e-06,  ..., -3.5004e-07,\n",
       "             -3.3792e-06,  8.4960e-08],\n",
       "            [ 6.9378e-06,  2.6447e-06,  1.1276e-07,  ...,  1.3266e-06,\n",
       "              1.2624e-05,  1.1025e-05],\n",
       "            [-2.2919e-06, -4.0511e-06, -2.2006e-06,  ..., -2.7739e-08,\n",
       "             -5.9333e-06, -3.1095e-06],\n",
       "            ...,\n",
       "            [-7.4659e-06,  2.1119e-07,  6.0589e-07,  ..., -8.0509e-07,\n",
       "             -1.5621e-05, -1.3774e-05],\n",
       "            [-8.1765e-07,  5.5538e-06, -3.6334e-07,  ..., -2.1165e-07,\n",
       "             -3.6031e-06, -2.3419e-06],\n",
       "            [ 6.9157e-07, -2.0235e-06,  1.7443e-07,  ..., -6.4867e-07,\n",
       "              4.9883e-06,  5.0322e-06]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[4.7674e-10, 3.4479e-11, 1.4036e-10,  ..., 7.1230e-12, 2.8439e-10,\n",
       "             1.1333e-10],\n",
       "            [2.7831e-10, 1.0609e-10, 2.2979e-12,  ..., 7.5967e-11, 2.8655e-10,\n",
       "             1.9631e-10],\n",
       "            [9.1408e-10, 3.4642e-10, 8.4678e-11,  ..., 6.3850e-11, 5.5141e-10,\n",
       "             3.8047e-10],\n",
       "            ...,\n",
       "            [9.7722e-10, 1.3538e-11, 2.5597e-12,  ..., 1.3529e-11, 7.4920e-10,\n",
       "             5.9419e-10],\n",
       "            [5.3062e-10, 2.8714e-10, 5.7183e-12,  ..., 6.4077e-11, 1.6351e-10,\n",
       "             4.8332e-11],\n",
       "            [2.0205e-09, 2.9452e-10, 1.6824e-11,  ..., 5.9886e-11, 1.2542e-09,\n",
       "             3.1755e-10]], device='cuda:0')},\n",
       "   134: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-5.0967e-06, -1.1695e-06,  3.7296e-06,  ..., -2.5466e-08,\n",
       "             -3.5885e-06, -3.1889e-07],\n",
       "            [-1.6638e-06,  1.9822e-06, -1.7941e-07,  ...,  3.9678e-06,\n",
       "             -5.5753e-06, -7.2461e-06],\n",
       "            [-2.6564e-06, -2.8580e-06,  3.8713e-06,  ..., -2.8886e-06,\n",
       "             -7.3417e-07,  2.7632e-07],\n",
       "            ...,\n",
       "            [ 6.2704e-06, -4.0041e-06,  1.5835e-06,  ..., -5.6976e-06,\n",
       "              8.8476e-06,  6.5291e-06],\n",
       "            [ 4.4408e-06, -1.8118e-06, -7.4669e-07,  ..., -6.2366e-08,\n",
       "              2.9412e-06, -3.4379e-07],\n",
       "            [-2.9138e-06,  1.7302e-06, -2.1159e-06,  ...,  1.7567e-06,\n",
       "             -2.5121e-06, -1.5410e-06]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[1.1200e-10, 1.2677e-10, 9.1133e-11,  ..., 7.1837e-11, 1.2590e-10,\n",
       "             1.5817e-10],\n",
       "            [8.3254e-11, 6.1751e-11, 3.0895e-11,  ..., 4.3265e-11, 1.3432e-10,\n",
       "             1.3199e-10],\n",
       "            [3.9322e-10, 1.8912e-10, 2.6106e-10,  ..., 1.5493e-10, 4.5852e-10,\n",
       "             3.6772e-10],\n",
       "            ...,\n",
       "            [1.2746e-10, 1.7379e-10, 4.4270e-11,  ..., 7.3550e-11, 3.2879e-10,\n",
       "             3.8676e-10],\n",
       "            [2.1759e-10, 7.2869e-11, 9.9320e-11,  ..., 5.6334e-11, 1.2932e-10,\n",
       "             9.0127e-11],\n",
       "            [5.0694e-10, 2.6514e-10, 2.4384e-10,  ..., 1.1391e-10, 2.9679e-10,\n",
       "             3.0965e-10]], device='cuda:0')},\n",
       "   135: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-1.1523e-05, -1.7544e-05,  7.0880e-07,  ...,  5.2560e-07,\n",
       "             -5.6736e-06,  2.9348e-06],\n",
       "            [-3.9706e-06,  4.9385e-05,  8.6845e-07,  ..., -3.3089e-06,\n",
       "             -3.0026e-05, -1.9537e-05],\n",
       "            [-1.1636e-06,  1.4651e-05, -1.6682e-06,  ..., -4.3607e-07,\n",
       "             -5.3327e-06, -3.0629e-06],\n",
       "            ...,\n",
       "            [-4.1663e-06,  4.5602e-06,  1.5583e-07,  ..., -1.8936e-06,\n",
       "             -7.8972e-06, -3.3204e-06],\n",
       "            [ 4.9438e-06, -4.2667e-06,  1.6332e-06,  ...,  2.0834e-06,\n",
       "              1.1341e-05,  9.2823e-06],\n",
       "            [ 1.2397e-05, -3.8788e-06,  2.6030e-06,  ..., -1.1608e-06,\n",
       "              2.8049e-05,  2.4506e-05]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[7.3985e-09, 2.0261e-09, 8.8361e-11,  ..., 5.7053e-10, 2.3492e-09,\n",
       "             6.7613e-10],\n",
       "            [1.4308e-08, 6.3320e-09, 3.7231e-10,  ..., 1.8723e-09, 8.0273e-09,\n",
       "             4.4915e-09],\n",
       "            [3.5327e-09, 2.2010e-09, 6.7417e-11,  ..., 5.3371e-10, 1.0957e-09,\n",
       "             2.6885e-10],\n",
       "            ...,\n",
       "            [1.8247e-09, 5.2519e-10, 4.7516e-11,  ..., 6.0440e-11, 2.9371e-10,\n",
       "             6.5795e-11],\n",
       "            [7.2226e-09, 4.0985e-09, 1.5387e-10,  ..., 1.9531e-09, 2.3066e-09,\n",
       "             3.4349e-10],\n",
       "            [4.5109e-09, 8.8127e-10, 1.2216e-10,  ..., 3.0223e-10, 3.4401e-09,\n",
       "             1.7236e-09]], device='cuda:0')},\n",
       "   136: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-4.6356e-07, -1.3597e-06,  8.4934e-07,  ..., -2.0269e-06,\n",
       "              2.7235e-06,  7.4801e-07],\n",
       "            [ 1.5903e-05, -8.1687e-06, -1.3843e-06,  ...,  3.2843e-06,\n",
       "              7.1526e-06, -1.8772e-06],\n",
       "            [ 2.9214e-06, -1.6675e-06,  4.9157e-07,  ..., -1.9610e-06,\n",
       "              1.1639e-06, -2.2118e-07],\n",
       "            ...,\n",
       "            [ 1.1065e-06, -4.7633e-07, -8.3684e-07,  ..., -7.6806e-07,\n",
       "              1.2990e-06,  1.0544e-06],\n",
       "            [-2.2874e-06,  2.4138e-06,  2.6892e-07,  ...,  4.1067e-07,\n",
       "             -1.0515e-06, -2.2413e-06],\n",
       "            [-5.7767e-06,  3.9007e-06, -2.7055e-07,  ...,  3.6430e-06,\n",
       "             -3.7355e-06, -1.2673e-06]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[4.2645e-10, 4.8311e-10, 1.2382e-10,  ..., 1.8178e-10, 2.8401e-10,\n",
       "             2.3447e-10],\n",
       "            [1.0748e-09, 8.6308e-10, 2.6750e-10,  ..., 2.3987e-10, 6.4449e-10,\n",
       "             6.1746e-10],\n",
       "            [3.0071e-10, 1.7232e-10, 1.1879e-10,  ..., 1.0312e-10, 2.0337e-10,\n",
       "             1.0883e-10],\n",
       "            ...,\n",
       "            [9.7761e-11, 5.3641e-11, 3.5992e-11,  ..., 2.7758e-11, 6.4026e-11,\n",
       "             2.6677e-11],\n",
       "            [4.1976e-10, 2.6916e-10, 2.0468e-10,  ..., 1.6487e-10, 3.1240e-10,\n",
       "             1.1472e-10],\n",
       "            [2.1584e-10, 2.8054e-10, 6.9516e-11,  ..., 5.9427e-11, 8.3020e-11,\n",
       "             1.6092e-10]], device='cuda:0')},\n",
       "   137: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([-2.8242e-06,  6.2561e-06, -1.8594e-06,  7.7606e-06, -3.4102e-06,\n",
       "             5.3381e-06, -9.8889e-07,  2.2798e-06, -9.8486e-06,  2.0089e-07,\n",
       "            -2.8749e-06, -4.9834e-06, -1.7878e-06, -2.3370e-06,  6.0601e-07,\n",
       "            -1.3792e-05,  4.5830e-06,  4.7285e-07,  7.0360e-07,  4.2129e-07,\n",
       "             2.9539e-06, -3.7319e-06,  3.5360e-06, -6.4304e-06, -1.4345e-05,\n",
       "             4.1301e-06, -2.3710e-05, -8.7275e-06,  6.6351e-06, -2.4257e-06,\n",
       "             1.1640e-06, -1.1095e-05, -4.7353e-06, -1.8160e-06, -3.9960e-06,\n",
       "             1.0848e-05, -6.0059e-06, -6.1896e-07, -8.4061e-06, -1.9961e-05,\n",
       "             3.0051e-06,  1.7625e-06,  3.0492e-06, -1.5830e-06,  6.7505e-06,\n",
       "             7.0696e-07, -1.2591e-05, -1.4124e-05,  2.6141e-06,  1.2262e-06,\n",
       "            -2.6974e-06, -3.6407e-06, -1.2069e-06, -1.0212e-05, -7.4362e-06,\n",
       "            -1.8955e-06, -5.4610e-06, -7.7539e-06, -1.2110e-05, -5.4808e-06,\n",
       "            -1.9426e-06, -8.3900e-06, -8.8683e-06, -1.2281e-05,  2.7816e-06,\n",
       "             2.5831e-06, -8.5116e-06, -3.3768e-06, -1.0241e-05, -3.3211e-06,\n",
       "             1.2541e-06, -8.1289e-06,  7.4615e-06,  5.3096e-07,  5.2638e-06,\n",
       "            -1.6454e-06, -1.0437e-06, -1.3554e-06, -1.1049e-05,  5.0300e-06,\n",
       "            -4.1544e-07,  4.6031e-06, -7.4444e-07, -5.2532e-07, -1.7632e-06,\n",
       "            -1.9383e-06, -7.3946e-06,  7.6436e-07,  3.0022e-07, -2.0571e-06,\n",
       "            -1.1921e-06,  3.3620e-06, -7.6934e-06,  3.7942e-06, -1.5508e-05,\n",
       "            -1.4473e-06, -2.3895e-07, -1.1983e-06, -1.0691e-05, -2.3955e-08,\n",
       "             2.8517e-06, -2.5519e-06, -1.5784e-06,  8.4810e-08,  9.0772e-06,\n",
       "            -1.0181e-05,  2.6733e-05,  3.2401e-06, -1.3258e-05, -3.0588e-07,\n",
       "             9.7787e-07,  3.1548e-06, -3.1291e-06,  2.2444e-06,  4.5620e-06,\n",
       "            -4.1362e-06, -1.0401e-05, -1.0182e-05, -4.3544e-06, -2.1041e-05,\n",
       "            -6.2841e-06,  8.3541e-06, -1.9040e-06,  3.6072e-06, -1.9156e-06,\n",
       "             5.1791e-06, -6.3802e-06,  1.4311e-07], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([1.3147e-10, 1.5803e-10, 1.5662e-11, 4.7236e-10, 3.1722e-10, 1.4509e-10,\n",
       "            1.7599e-09, 8.5502e-11, 1.0303e-09, 1.4968e-10, 3.5954e-11, 7.5503e-11,\n",
       "            6.0859e-10, 1.8992e-10, 1.1987e-10, 1.8651e-10, 3.7325e-10, 3.1332e-11,\n",
       "            1.1278e-10, 1.9770e-11, 2.3508e-10, 1.4266e-10, 1.2189e-10, 7.9126e-10,\n",
       "            2.3615e-10, 5.4405e-11, 3.2783e-09, 3.1864e-10, 1.5315e-10, 2.1929e-10,\n",
       "            2.7341e-11, 2.2880e-10, 1.2589e-10, 2.1229e-10, 1.9178e-09, 2.0738e-10,\n",
       "            1.3870e-10, 3.6851e-11, 7.1128e-10, 1.5450e-09, 8.9265e-11, 5.0999e-10,\n",
       "            4.9630e-11, 9.6724e-12, 4.4554e-11, 7.6419e-11, 1.5046e-09, 2.5576e-10,\n",
       "            1.8558e-11, 4.6086e-10, 4.6421e-10, 2.8856e-10, 3.6130e-11, 2.8334e-10,\n",
       "            1.4494e-09, 3.7925e-10, 6.9277e-10, 4.1463e-10, 1.3344e-10, 4.5597e-10,\n",
       "            1.8767e-11, 6.9095e-11, 1.1090e-09, 1.9476e-09, 1.2257e-10, 2.8930e-11,\n",
       "            1.6816e-09, 2.4012e-10, 4.4523e-10, 7.9062e-11, 3.3018e-11, 1.2026e-10,\n",
       "            2.3157e-10, 8.0393e-11, 1.4444e-10, 1.6831e-11, 2.6328e-09, 9.1763e-11,\n",
       "            6.8803e-11, 3.5476e-11, 1.4393e-10, 6.2275e-11, 8.6575e-11, 6.1489e-11,\n",
       "            9.9424e-11, 4.0262e-10, 5.1283e-10, 1.5918e-11, 6.9116e-11, 2.3358e-10,\n",
       "            2.0396e-11, 6.7016e-11, 1.0094e-09, 3.6441e-11, 5.7858e-10, 1.5376e-10,\n",
       "            2.2220e-10, 3.4300e-11, 5.2814e-10, 5.8410e-11, 1.0047e-10, 1.5646e-11,\n",
       "            1.1702e-10, 2.3720e-11, 1.8154e-10, 1.1672e-09, 3.6654e-08, 6.7909e-11,\n",
       "            7.9198e-10, 1.8499e-11, 1.2836e-11, 8.2476e-11, 2.2097e-10, 3.3231e-11,\n",
       "            9.6773e-11, 1.6570e-10, 2.4222e-09, 2.2993e-10, 4.1384e-10, 1.0537e-09,\n",
       "            9.7090e-10, 1.4785e-10, 9.0674e-11, 2.9455e-10, 5.1074e-11, 2.0654e-11,\n",
       "            5.4049e-10, 1.5029e-10], device='cuda:0')},\n",
       "   138: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([ 3.9744e-06,  1.0886e-05, -1.3724e-06,  1.7911e-05,  7.1654e-06,\n",
       "            -5.0260e-06,  2.2696e-05, -4.3566e-06,  1.3319e-06,  3.3086e-06,\n",
       "            -6.9333e-06, -6.3877e-06, -6.4299e-06, -8.5013e-07, -1.1767e-06,\n",
       "             3.7481e-06,  1.3107e-05,  1.8216e-06,  2.3262e-05, -1.5644e-06,\n",
       "            -4.3183e-06,  1.2064e-06, -4.7124e-06, -9.6108e-06,  5.9801e-06,\n",
       "            -6.6920e-06, -4.6737e-06, -3.0956e-06,  1.3674e-05,  1.0810e-06,\n",
       "            -1.5601e-06, -2.9455e-06, -3.2495e-06, -1.8365e-06,  5.8659e-06,\n",
       "             1.6643e-05, -1.0506e-05, -2.7091e-06, -3.5514e-06,  7.4555e-06,\n",
       "            -2.3535e-06, -1.8943e-06,  2.0826e-07, -4.4056e-06, -1.1516e-05,\n",
       "            -3.3239e-06, -1.6725e-05,  2.0379e-05, -6.8360e-06, -1.1894e-05,\n",
       "            -9.2591e-06, -5.2614e-07,  1.5802e-06, -9.2298e-06,  2.9490e-06,\n",
       "            -1.6847e-05,  2.5065e-06, -3.7911e-07,  1.1273e-05, -8.3790e-06,\n",
       "            -4.9905e-06, -9.8734e-06,  9.7856e-06, -1.3788e-05, -4.5215e-06,\n",
       "             5.5821e-06, -5.3923e-07, -6.9667e-06,  8.9897e-06,  1.2429e-06,\n",
       "             9.0950e-07,  4.1822e-06,  1.2894e-05, -6.7933e-06, -7.5460e-06,\n",
       "            -3.7972e-06,  7.4889e-06,  6.5753e-06,  6.3643e-06,  1.2264e-05,\n",
       "             6.4374e-06, -1.1761e-05, -1.5271e-06,  3.8636e-07,  2.5496e-07,\n",
       "            -2.7139e-06,  5.6925e-06, -3.4642e-06, -1.6264e-06, -6.7368e-06,\n",
       "             1.4025e-05,  9.4556e-06,  7.6039e-06, -1.6552e-06,  1.5557e-05,\n",
       "             4.0102e-06,  4.7562e-06, -4.1548e-06,  5.2856e-06,  5.5628e-06,\n",
       "            -5.6325e-06,  5.3316e-06, -1.4894e-05,  2.3154e-06,  8.9688e-06,\n",
       "            -5.6697e-06,  4.2268e-06, -4.6957e-06,  6.7359e-06,  6.6205e-07,\n",
       "             1.1884e-06, -7.2838e-06,  2.5746e-05, -7.5185e-07, -4.9904e-06,\n",
       "             2.3371e-07, -3.6331e-06,  8.8596e-06, -8.2504e-06,  1.3731e-05,\n",
       "             9.2892e-06, -1.4359e-05, -8.2816e-06, -6.3832e-06, -3.2417e-07,\n",
       "             1.1601e-05,  6.1165e-06, -1.1695e-06], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([3.5846e-10, 1.9301e-10, 1.6684e-10, 7.6466e-10, 2.1814e-10, 1.4098e-10,\n",
       "            2.1024e-09, 1.3087e-10, 5.9873e-10, 2.1560e-10, 1.1774e-10, 2.1548e-10,\n",
       "            7.7681e-10, 4.6235e-10, 1.3870e-10, 2.9491e-10, 8.8336e-10, 8.2681e-11,\n",
       "            9.3365e-10, 1.2907e-10, 3.9774e-10, 1.5278e-10, 1.8445e-10, 8.5502e-10,\n",
       "            1.8387e-10, 4.2877e-10, 9.1655e-10, 2.6275e-10, 3.9487e-10, 3.6345e-10,\n",
       "            3.9106e-11, 1.4264e-10, 1.8447e-10, 1.8174e-10, 8.9536e-10, 3.8499e-10,\n",
       "            4.5820e-10, 1.3721e-10, 3.4648e-10, 9.7253e-10, 7.8055e-11, 7.1664e-10,\n",
       "            2.0576e-10, 4.9684e-11, 1.6897e-10, 4.0328e-10, 1.1691e-09, 1.0414e-09,\n",
       "            2.0440e-10, 5.8010e-10, 6.0085e-10, 3.9019e-10, 1.9042e-10, 2.3885e-10,\n",
       "            8.5875e-10, 9.9744e-10, 4.5081e-10, 4.4703e-10, 3.1520e-10, 9.0008e-10,\n",
       "            7.0505e-11, 2.6226e-10, 1.0061e-09, 1.1823e-09, 1.7321e-10, 1.1205e-10,\n",
       "            7.2252e-10, 3.8478e-10, 4.6537e-10, 5.1195e-10, 6.3610e-11, 1.4575e-10,\n",
       "            4.1245e-10, 1.1515e-10, 5.1003e-10, 7.9907e-11, 1.2962e-09, 9.9809e-11,\n",
       "            1.2225e-10, 3.8309e-10, 1.4601e-10, 3.4671e-10, 1.5323e-10, 7.6626e-11,\n",
       "            1.2443e-10, 3.0035e-10, 4.2418e-10, 1.0434e-10, 1.5109e-10, 9.0277e-10,\n",
       "            1.6667e-10, 2.9738e-10, 6.4624e-10, 9.5916e-11, 5.3866e-10, 2.5321e-10,\n",
       "            2.1247e-10, 9.6335e-11, 4.9437e-10, 1.0565e-10, 1.1130e-10, 1.3842e-10,\n",
       "            6.0291e-10, 1.1177e-10, 2.4453e-10, 5.0839e-10, 5.5407e-10, 1.2537e-10,\n",
       "            5.4006e-10, 1.4056e-10, 1.0005e-10, 1.6827e-10, 1.0752e-09, 6.7129e-11,\n",
       "            2.2577e-10, 9.9268e-11, 1.1332e-09, 2.7321e-10, 5.7432e-10, 1.3234e-09,\n",
       "            7.4005e-10, 2.9373e-10, 2.6972e-10, 3.5081e-10, 2.1346e-10, 2.1121e-10,\n",
       "            4.7571e-10, 3.0707e-10], device='cuda:0')},\n",
       "   139: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([ 1.7990e-05, -9.2951e-06,  2.0346e-05, -9.6176e-06,  2.0402e-05,\n",
       "            -2.4047e-05,  1.2420e-06, -1.9412e-05, -3.4247e-06, -1.0126e-05,\n",
       "             1.0655e-05, -5.7073e-06,  1.2551e-05, -1.7571e-05, -7.5040e-06,\n",
       "            -1.1094e-05, -1.1187e-05, -1.2128e-06, -3.6134e-06, -7.4856e-06,\n",
       "             1.8479e-06, -1.2651e-05,  1.2576e-05, -1.6330e-05, -2.1144e-05,\n",
       "            -8.3075e-06,  1.1906e-05, -3.5560e-06, -6.2928e-06, -1.6830e-06,\n",
       "            -2.7543e-06,  2.1107e-05, -3.7995e-06,  8.7659e-07, -2.3097e-05,\n",
       "             4.5096e-06,  9.4063e-08, -5.1004e-06, -7.1138e-06, -8.6689e-06,\n",
       "            -6.7348e-07,  8.0950e-06,  2.9159e-06,  7.9658e-06,  1.5506e-05,\n",
       "             2.9738e-06,  6.2773e-06, -1.1741e-05, -1.9869e-07,  2.3566e-05,\n",
       "             1.0420e-05, -1.9364e-05, -9.3159e-06, -1.6409e-05, -2.4929e-05,\n",
       "            -2.9899e-06, -6.6281e-06, -5.3876e-06,  6.4967e-06,  2.3507e-07,\n",
       "             1.3134e-05, -2.4859e-05, -2.6033e-06, -1.4265e-05,  6.3928e-06,\n",
       "            -3.7943e-06, -2.1294e-05,  9.3778e-06,  8.8045e-06,  7.6257e-06,\n",
       "            -2.5791e-06, -8.9022e-06, -1.1458e-06, -2.1076e-05, -5.3712e-06,\n",
       "             1.1994e-05, -1.2348e-05, -1.2106e-05, -1.4747e-05, -1.4484e-05,\n",
       "            -9.9719e-06, -1.0402e-05,  4.3805e-06, -1.7638e-05, -4.6221e-06,\n",
       "             1.1483e-05,  6.9566e-06, -1.7593e-06, -1.6594e-05, -6.7147e-06,\n",
       "            -1.6496e-06, -9.6767e-06, -1.9933e-05, -1.6039e-05, -2.0437e-05,\n",
       "            -8.1760e-06,  5.7947e-06, -3.6805e-06,  1.3810e-05, -1.1692e-05,\n",
       "             9.2291e-06,  1.3031e-05,  6.4662e-06, -1.6446e-07, -9.4235e-06,\n",
       "            -5.3394e-06, -1.7727e-04,  1.0978e-07, -2.6791e-05, -7.1708e-06,\n",
       "             1.2836e-05,  1.2574e-05, -1.7930e-05, -4.5389e-06, -2.3397e-06,\n",
       "            -3.7727e-06,  1.9570e-06,  1.0381e-05,  6.6969e-06,  3.2389e-06,\n",
       "            -2.9266e-06, -1.5348e-06, -6.7143e-06,  2.9390e-05,  2.2683e-06,\n",
       "             6.1978e-06, -8.0935e-06, -3.7184e-05], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([5.8255e-09, 2.7288e-09, 1.9136e-09, 1.3125e-09, 3.2981e-09, 9.6649e-09,\n",
       "            1.9763e-09, 6.3583e-09, 2.4646e-09, 4.3077e-09, 1.5488e-09, 2.5283e-09,\n",
       "            3.9313e-09, 2.8029e-09, 1.1290e-09, 1.3907e-09, 3.2650e-09, 1.8652e-09,\n",
       "            1.8389e-09, 2.0366e-09, 8.0780e-10, 5.6405e-09, 3.8051e-09, 1.0759e-08,\n",
       "            5.8896e-09, 1.2742e-09, 3.8059e-09, 4.9974e-09, 4.4026e-09, 1.9364e-09,\n",
       "            4.1244e-09, 2.2190e-09, 6.8545e-10, 2.0567e-09, 3.9128e-09, 1.1805e-09,\n",
       "            1.3837e-09, 1.2125e-09, 2.7233e-09, 4.3914e-09, 6.9281e-09, 1.5043e-09,\n",
       "            2.0958e-09, 9.7616e-10, 1.2058e-09, 1.7854e-09, 4.4063e-09, 1.0555e-09,\n",
       "            1.9919e-09, 1.8849e-09, 1.6154e-09, 4.7479e-09, 2.1799e-09, 2.1275e-09,\n",
       "            4.3150e-09, 1.1089e-09, 4.7032e-09, 2.8278e-09, 1.0079e-09, 1.1047e-09,\n",
       "            1.5252e-09, 6.6321e-09, 3.1575e-09, 5.7768e-09, 4.7350e-09, 3.0948e-09,\n",
       "            5.3830e-09, 1.0220e-09, 2.0269e-09, 2.7654e-09, 7.2922e-09, 2.3110e-09,\n",
       "            1.2532e-09, 1.2810e-09, 1.4875e-09, 2.2171e-09, 6.6908e-09, 3.1428e-09,\n",
       "            4.1835e-09, 2.9116e-09, 3.5406e-09, 2.6631e-09, 1.4315e-09, 3.3249e-09,\n",
       "            2.5580e-09, 2.8580e-09, 2.4006e-09, 1.5116e-09, 2.2383e-09, 1.5494e-09,\n",
       "            1.6995e-09, 5.9518e-09, 9.4299e-09, 5.9115e-09, 3.3157e-09, 8.6031e-10,\n",
       "            1.0383e-08, 3.0504e-09, 2.8867e-09, 3.8850e-09, 1.7883e-09, 1.8365e-09,\n",
       "            4.2446e-09, 3.9876e-09, 2.8955e-09, 8.1676e-09, 4.1064e-07, 2.1227e-09,\n",
       "            1.1264e-08, 1.6867e-09, 1.7486e-09, 5.2740e-10, 3.6086e-09, 1.1078e-09,\n",
       "            1.0393e-09, 1.3402e-09, 4.5347e-10, 2.8104e-09, 2.0080e-09, 1.1034e-09,\n",
       "            1.6236e-09, 8.7161e-09, 1.7114e-09, 6.4591e-09, 2.2445e-09, 1.7147e-09,\n",
       "            2.8179e-09, 1.9473e-08], device='cuda:0')},\n",
       "   140: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([ 1.1023e-05, -2.4341e-06,  1.1291e-05,  3.0341e-06,  1.8490e-05,\n",
       "             3.0561e-05,  1.1455e-05,  7.5826e-06,  7.0540e-06, -8.7091e-07,\n",
       "            -1.5460e-05,  1.1506e-05, -6.0644e-06,  9.3229e-06,  3.4020e-06,\n",
       "             3.6288e-06,  2.0669e-05, -2.4275e-06,  2.0908e-06, -4.8380e-06,\n",
       "            -1.0945e-05, -5.5921e-06, -9.5978e-06, -1.7975e-05,  1.3144e-05,\n",
       "             2.1037e-05, -1.7516e-05, -1.2904e-05, -1.7509e-05,  6.1515e-07,\n",
       "             1.7438e-05, -9.9536e-06,  1.1315e-07,  1.5575e-05, -1.6036e-05,\n",
       "            -4.6513e-06, -2.5818e-06, -8.5653e-07, -2.4955e-05,  1.5021e-05,\n",
       "             1.1762e-05, -1.1079e-05,  1.4834e-06, -1.8921e-06, -1.1696e-05,\n",
       "            -1.2523e-05, -1.1013e-05,  1.5844e-05, -2.5175e-06,  4.2026e-06,\n",
       "             9.5149e-06, -1.9634e-05,  5.8515e-06, -4.2153e-06,  1.3210e-05,\n",
       "             6.5467e-07,  6.5365e-06, -6.8628e-06,  2.4186e-06,  1.8508e-05,\n",
       "            -2.7694e-05,  2.0032e-05,  7.1399e-06,  4.0358e-06, -1.4091e-05,\n",
       "            -3.0705e-05, -1.1316e-05, -1.7181e-06, -1.6336e-05,  1.2032e-05,\n",
       "            -3.4275e-05,  1.1569e-05,  3.5418e-06, -3.6723e-06,  1.4658e-06,\n",
       "            -2.8533e-05, -1.0270e-05,  1.2001e-05, -7.4040e-06, -1.2215e-05,\n",
       "             6.1548e-06,  8.0941e-07, -2.0383e-06, -1.9193e-05, -2.0632e-05,\n",
       "            -2.2046e-05,  2.6842e-06, -7.6654e-06,  6.1182e-06,  2.3315e-05,\n",
       "             4.4936e-06,  3.9110e-06,  2.3067e-05,  4.2054e-05, -1.9357e-07,\n",
       "             1.8926e-05,  3.1397e-05,  1.0118e-05, -2.1235e-05, -3.4254e-05,\n",
       "             3.0864e-06, -7.8842e-06, -8.3186e-06,  1.0885e-06,  9.6293e-06,\n",
       "             5.7353e-06, -1.7447e-05, -1.2037e-05,  3.4882e-05, -2.5833e-07,\n",
       "            -1.4031e-06,  4.8216e-06, -8.3066e-06,  7.1572e-06, -1.1834e-05,\n",
       "             2.3123e-06, -9.6906e-07,  2.3226e-05, -1.7170e-05, -2.1325e-05,\n",
       "             2.4678e-06,  2.3505e-05,  1.8473e-06, -4.8414e-05, -3.1448e-06,\n",
       "             1.5557e-05,  6.1406e-06,  3.5407e-05], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([3.8806e-09, 4.7972e-09, 3.0961e-09, 3.5670e-09, 5.3445e-09, 6.9758e-09,\n",
       "            3.3889e-09, 4.6973e-09, 3.4437e-09, 6.7718e-09, 2.2212e-09, 2.5238e-09,\n",
       "            5.6232e-09, 3.4679e-09, 2.7190e-09, 2.7946e-09, 3.0835e-09, 2.5472e-09,\n",
       "            4.3658e-09, 3.0990e-09, 2.2675e-09, 4.1308e-09, 6.7879e-09, 5.4889e-09,\n",
       "            5.4181e-09, 3.3797e-09, 3.1155e-09, 5.1708e-09, 4.2968e-09, 2.4374e-09,\n",
       "            5.9564e-09, 3.0999e-09, 2.9495e-09, 4.5269e-09, 3.0669e-09, 2.3027e-09,\n",
       "            5.3524e-09, 2.0164e-09, 3.8200e-09, 2.6489e-09, 7.2579e-09, 2.8078e-09,\n",
       "            7.9621e-09, 2.5240e-09, 3.2906e-09, 4.6763e-09, 4.3455e-09, 4.7405e-09,\n",
       "            2.5466e-09, 4.9078e-09, 2.9537e-09, 4.9702e-09, 4.1117e-09, 2.6485e-09,\n",
       "            3.4582e-09, 2.4562e-09, 4.4123e-09, 4.1558e-09, 2.5784e-09, 2.7523e-09,\n",
       "            4.1094e-09, 5.7241e-09, 3.7784e-09, 2.7972e-09, 4.8177e-09, 6.3149e-09,\n",
       "            2.8259e-09, 2.1861e-09, 4.2215e-09, 2.6401e-09, 6.1606e-09, 2.2918e-09,\n",
       "            1.9474e-09, 2.9703e-09, 2.7175e-09, 5.9072e-09, 2.5308e-09, 2.2030e-09,\n",
       "            3.3662e-09, 4.5600e-09, 3.3674e-09, 3.4484e-09, 1.9619e-09, 5.2452e-09,\n",
       "            5.5561e-09, 2.8067e-09, 3.9826e-09, 3.6632e-09, 2.5940e-09, 3.0630e-09,\n",
       "            3.0894e-09, 6.8200e-09, 1.2299e-08, 8.0246e-09, 4.3769e-09, 2.4776e-09,\n",
       "            8.1682e-09, 5.7132e-09, 5.3972e-09, 5.5742e-09, 3.4677e-09, 2.1969e-09,\n",
       "            6.2054e-09, 5.8546e-09, 3.3310e-09, 2.4832e-09, 1.3493e-08, 4.1586e-09,\n",
       "            1.1184e-08, 2.1153e-09, 3.8458e-09, 2.2909e-09, 3.2514e-09, 2.7692e-09,\n",
       "            2.8740e-09, 2.2996e-09, 2.0053e-09, 6.6003e-09, 3.6571e-09, 2.1377e-09,\n",
       "            1.6644e-09, 6.9439e-09, 4.2475e-09, 8.3314e-09, 3.5838e-09, 4.1973e-09,\n",
       "            2.8534e-09, 1.3434e-08], device='cuda:0')},\n",
       "   141: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([-3.8946e-05, -4.7638e-05, -7.4331e-06,  2.3672e-05,  1.1267e-05,\n",
       "            -5.3159e-05,  2.2180e-05, -3.5873e-05,  2.7651e-05,  7.3253e-06,\n",
       "            -6.3846e-06,  7.1158e-05, -1.8151e-04,  7.1242e-05,  6.8809e-05,\n",
       "            -9.5698e-05,  4.3629e-05, -2.0901e-05, -1.7480e-06,  1.8744e-05,\n",
       "            -1.4745e-05,  1.9190e-06, -5.2441e-05,  1.1223e-05,  7.1805e-05,\n",
       "             3.2611e-05, -8.3199e-06, -3.0098e-05, -4.5090e-05,  1.1833e-05,\n",
       "            -1.0087e-05,  1.5753e-06, -8.0938e-05,  3.7603e-06, -2.3498e-05,\n",
       "            -2.0896e-05,  2.9045e-05,  8.7860e-06, -3.0401e-05,  2.4796e-05,\n",
       "            -9.3982e-06,  7.1275e-06,  2.6840e-06,  1.7881e-06, -2.0129e-05,\n",
       "            -3.7997e-06,  2.3734e-05,  1.7457e-05, -9.3686e-06,  1.7417e-07,\n",
       "             1.0560e-05, -4.2403e-05, -6.9316e-06,  1.6372e-05, -4.7859e-06,\n",
       "             2.2570e-05, -3.0853e-05,  9.8221e-06, -5.2668e-05,  1.0991e-05,\n",
       "             1.4062e-05, -3.7199e-06,  7.2867e-06, -1.9610e-05, -4.7649e-04,\n",
       "            -1.7327e-05,  1.2521e-05, -5.6287e-06, -6.2923e-05, -3.6372e-05,\n",
       "             5.9272e-05,  1.6419e-05, -1.9368e-05, -1.1160e-05, -1.0838e-04,\n",
       "            -6.0299e-05, -3.3346e-05,  1.8264e-05, -2.5288e-06,  5.7274e-05,\n",
       "             4.9750e-05, -8.6726e-06,  5.6639e-05, -1.4221e-05, -4.1314e-05,\n",
       "             2.2840e-05,  1.4136e-05,  7.6534e-05,  1.0852e-04,  5.9198e-05,\n",
       "            -1.6614e-05,  3.1368e-05,  1.3459e-05,  4.0097e-05,  2.3629e-05,\n",
       "             3.4039e-06,  1.9746e-05, -8.7322e-06, -3.4302e-05, -6.1670e-05,\n",
       "             4.6751e-05,  2.2189e-06, -3.6600e-05,  7.8454e-06,  8.9744e-05,\n",
       "            -5.7609e-05, -4.6806e-04,  4.5066e-06,  3.3311e-05,  1.3625e-04,\n",
       "            -2.7874e-05, -9.5887e-07, -3.0161e-05,  6.1727e-05, -7.7537e-06,\n",
       "            -3.9367e-05, -1.2135e-05,  5.0822e-05,  1.1020e-05, -2.3851e-05,\n",
       "             2.5834e-05,  5.7122e-06, -1.5818e-05,  1.8382e-05, -4.4801e-05,\n",
       "            -3.3212e-05, -1.6431e-05,  1.4326e-06], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([6.4001e-09, 1.2635e-08, 2.1117e-08, 5.0517e-09, 2.7453e-08, 3.9930e-09,\n",
       "            2.3564e-08, 3.8517e-08, 5.0412e-09, 3.2586e-08, 3.7145e-09, 2.0023e-08,\n",
       "            8.3067e-08, 1.5399e-08, 1.9739e-08, 3.3580e-08, 3.0142e-08, 1.0934e-08,\n",
       "            3.3871e-09, 1.3352e-08, 1.7347e-07, 3.8567e-08, 1.9411e-08, 6.2052e-09,\n",
       "            5.3922e-08, 6.3985e-09, 3.4857e-09, 9.1363e-09, 1.8516e-08, 3.8229e-09,\n",
       "            7.8408e-09, 1.6681e-07, 3.7128e-08, 1.6565e-08, 1.3236e-08, 3.4419e-09,\n",
       "            6.4974e-09, 8.0832e-09, 1.0201e-08, 4.2033e-09, 3.1618e-08, 1.5728e-09,\n",
       "            1.4688e-09, 6.1559e-09, 2.9622e-08, 1.4764e-09, 1.5545e-08, 7.3634e-09,\n",
       "            1.9517e-09, 1.2225e-08, 5.5336e-09, 1.8896e-08, 1.7926e-09, 1.2767e-08,\n",
       "            1.6509e-08, 1.9851e-09, 1.1061e-08, 3.3147e-09, 1.9796e-08, 1.4546e-08,\n",
       "            6.2161e-09, 1.8948e-09, 4.3491e-09, 2.6255e-09, 3.9702e-06, 5.1015e-09,\n",
       "            6.1178e-08, 6.1404e-09, 3.0457e-08, 2.3117e-09, 1.8923e-08, 1.2923e-08,\n",
       "            3.5217e-09, 3.4263e-09, 5.5250e-08, 1.2995e-08, 2.6678e-09, 9.3378e-09,\n",
       "            9.0239e-09, 2.4615e-08, 2.8614e-08, 8.2226e-09, 1.1170e-08, 1.4304e-08,\n",
       "            1.1282e-08, 1.0603e-08, 8.1237e-09, 1.2340e-08, 2.0410e-08, 1.0584e-08,\n",
       "            1.5687e-08, 3.7897e-09, 1.1691e-08, 1.2385e-08, 1.7026e-08, 1.5739e-09,\n",
       "            1.9465e-08, 3.1725e-09, 9.6442e-09, 2.3027e-08, 1.2345e-08, 1.4644e-09,\n",
       "            1.6337e-08, 1.2271e-08, 5.0730e-08, 1.1636e-08, 2.7963e-06, 2.9883e-09,\n",
       "            7.0191e-09, 2.0622e-08, 1.9592e-08, 1.9950e-09, 3.2604e-09, 6.1671e-09,\n",
       "            2.2162e-09, 6.3884e-08, 4.2189e-09, 1.4314e-08, 1.6838e-08, 1.1721e-08,\n",
       "            1.0486e-08, 3.7168e-09, 1.3293e-08, 1.1381e-07, 2.8930e-09, 5.5862e-09,\n",
       "            9.1434e-09, 3.1594e-09], device='cuda:0')},\n",
       "   142: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([ 1.2223e-05, -5.8178e-05, -7.5172e-05,  1.0004e-04,  1.0637e-04,\n",
       "             4.9713e-05, -2.6570e-06,  1.1283e-05, -1.1000e-05, -3.9197e-06,\n",
       "            -6.6614e-05, -6.5702e-05,  9.7334e-05, -1.0772e-04, -7.2438e-05,\n",
       "             1.1022e-04,  7.2022e-05, -2.4771e-05, -8.5721e-06, -6.7008e-05,\n",
       "            -6.9557e-05, -8.9866e-06,  2.9265e-06, -2.1449e-05,  1.4816e-04,\n",
       "             1.5484e-05, -6.9829e-05, -3.1686e-05, -5.3746e-05,  1.9658e-05,\n",
       "             7.2707e-05, -5.7662e-05,  1.0800e-05, -9.4909e-05,  6.7005e-06,\n",
       "            -2.8454e-05, -7.0226e-05, -1.7547e-05,  1.4449e-05,  4.4292e-05,\n",
       "             6.0524e-05,  1.1158e-05, -1.4118e-05, -1.2240e-05, -3.2755e-05,\n",
       "            -9.4575e-06, -1.0833e-05,  4.9913e-05,  2.5339e-05,  9.3693e-06,\n",
       "            -1.1022e-05, -5.4779e-05, -1.0088e-05, -1.1524e-04, -1.4455e-06,\n",
       "            -3.5393e-05,  8.9482e-05,  1.3556e-05,  4.9570e-06, -2.3300e-05,\n",
       "            -7.7884e-05, -6.6640e-06, -1.7481e-05,  4.4824e-05,  1.5956e-04,\n",
       "            -9.8028e-05, -2.1995e-05, -3.7471e-05,  9.4328e-05,  3.2587e-05,\n",
       "            -5.9884e-05,  5.1413e-06, -6.8341e-05,  2.5101e-05,  3.9702e-05,\n",
       "            -6.1533e-05,  5.2153e-05,  3.3191e-05, -1.0928e-04, -8.1312e-05,\n",
       "            -1.8937e-05,  1.0616e-04,  9.7744e-05, -3.2599e-05, -5.0937e-05,\n",
       "            -2.7250e-05, -1.4890e-05, -5.5068e-05, -6.2562e-05, -3.8600e-06,\n",
       "             1.1196e-05, -4.7125e-05,  1.1291e-04,  1.0229e-05,  8.3348e-05,\n",
       "             5.9238e-05,  4.7527e-05, -1.4345e-05,  1.0949e-05, -5.7612e-05,\n",
       "            -2.6464e-05, -3.2180e-06, -3.0159e-05,  2.2128e-07, -7.1563e-06,\n",
       "            -8.8963e-05, -9.2554e-05,  1.9593e-05,  8.8271e-05,  9.4635e-05,\n",
       "            -2.1478e-05,  8.1963e-05,  1.5205e-05,  4.5551e-05, -1.6217e-05,\n",
       "            -8.6739e-05,  2.9316e-06,  7.8015e-05,  4.1720e-06, -3.2614e-05,\n",
       "             2.5742e-05, -1.7161e-05, -3.0863e-05, -4.8418e-05, -2.0897e-05,\n",
       "            -9.5066e-06,  2.4823e-05, -1.9280e-05], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([7.4705e-09, 7.4333e-08, 8.5038e-08, 5.3941e-08, 3.2553e-08, 5.1786e-09,\n",
       "            2.0907e-08, 7.0704e-08, 3.2253e-08, 3.0205e-08, 1.4818e-08, 6.7050e-08,\n",
       "            1.1254e-07, 4.1055e-08, 2.0365e-08, 4.8375e-08, 8.3490e-08, 1.6378e-08,\n",
       "            2.0472e-08, 1.5119e-08, 2.6583e-08, 1.5979e-08, 6.4712e-09, 8.1607e-09,\n",
       "            4.9899e-08, 8.8011e-09, 8.2114e-09, 2.3082e-08, 1.4122e-08, 3.6702e-09,\n",
       "            5.7262e-08, 5.9409e-08, 9.1795e-09, 4.7249e-08, 4.9402e-09, 5.8474e-09,\n",
       "            1.2064e-08, 1.6608e-08, 8.4682e-09, 1.7551e-08, 1.5650e-08, 2.4998e-08,\n",
       "            2.6620e-08, 1.0940e-08, 1.9198e-08, 2.7539e-09, 2.0916e-08, 1.2201e-08,\n",
       "            1.1342e-08, 2.2421e-08, 1.0917e-08, 1.8962e-08, 5.6575e-09, 4.1717e-08,\n",
       "            1.2705e-08, 6.9440e-09, 3.9931e-08, 9.1199e-09, 1.5901e-08, 2.9913e-08,\n",
       "            3.0177e-08, 2.2156e-08, 6.9669e-09, 6.7320e-09, 1.6654e-07, 2.0273e-08,\n",
       "            2.1215e-08, 5.3542e-09, 4.7409e-08, 1.8678e-08, 3.9951e-08, 1.2611e-08,\n",
       "            7.8371e-09, 7.1648e-09, 4.8292e-08, 6.1768e-08, 1.6582e-08, 6.7615e-08,\n",
       "            4.5541e-08, 3.1006e-08, 2.2492e-08, 4.1962e-08, 2.4388e-08, 1.6226e-08,\n",
       "            1.1255e-08, 4.1826e-09, 5.1873e-09, 9.1700e-09, 7.5832e-09, 6.2022e-09,\n",
       "            2.1058e-08, 9.7467e-09, 2.7564e-08, 5.8356e-09, 1.8804e-08, 1.4321e-08,\n",
       "            2.5015e-08, 1.1102e-08, 5.4089e-09, 1.6827e-08, 9.2083e-09, 3.5790e-09,\n",
       "            4.6873e-08, 9.3492e-09, 1.4432e-08, 2.3335e-08, 7.0321e-08, 1.1310e-08,\n",
       "            1.9656e-08, 1.5787e-08, 3.9391e-08, 1.9464e-08, 3.2449e-09, 8.2474e-09,\n",
       "            1.5098e-08, 1.1602e-08, 1.7781e-08, 1.9110e-08, 2.0504e-08, 2.2438e-08,\n",
       "            6.3357e-09, 1.2454e-08, 1.8220e-08, 3.7384e-08, 9.7702e-09, 6.9235e-09,\n",
       "            1.6782e-08, 1.2552e-08], device='cuda:0')},\n",
       "   143: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 5.1524e-06, -2.9612e-05,  2.5213e-05,  ...,  8.8601e-06,\n",
       "              1.7600e-05, -1.6257e-05],\n",
       "            [-3.6735e-05,  1.5069e-05,  1.3538e-05,  ..., -2.0013e-05,\n",
       "             -4.0923e-05,  3.5789e-05],\n",
       "            [-3.4819e-06,  4.3108e-06,  1.2064e-06,  ...,  4.5485e-06,\n",
       "             -7.2095e-06, -6.0784e-06],\n",
       "            ...,\n",
       "            [-7.2086e-06,  1.0294e-06, -4.7201e-06,  ..., -1.4196e-06,\n",
       "             -6.6869e-06, -6.9084e-06],\n",
       "            [ 1.8986e-05,  1.0281e-05, -2.8898e-05,  ...,  1.0190e-05,\n",
       "              1.9240e-05,  6.5181e-07],\n",
       "            [ 7.2628e-07,  1.1557e-05, -1.4539e-05,  ...,  4.8312e-06,\n",
       "             -5.9770e-06, -8.8897e-06]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[2.5875e-08, 1.7583e-08, 1.4506e-08,  ..., 7.4165e-09, 2.2177e-08,\n",
       "             2.0444e-08],\n",
       "            [1.9373e-08, 5.5073e-09, 1.3368e-08,  ..., 2.8988e-09, 8.9458e-09,\n",
       "             6.1893e-09],\n",
       "            [4.3870e-09, 3.7422e-09, 2.8219e-09,  ..., 9.8014e-10, 2.1918e-09,\n",
       "             1.9655e-09],\n",
       "            ...,\n",
       "            [1.8068e-09, 1.5197e-09, 1.7430e-09,  ..., 9.4624e-10, 2.3734e-09,\n",
       "             3.5406e-09],\n",
       "            [1.0930e-08, 6.8288e-09, 9.8680e-09,  ..., 2.2351e-09, 7.4606e-09,\n",
       "             8.9596e-09],\n",
       "            [2.3198e-09, 3.5432e-09, 4.9387e-09,  ..., 1.2542e-09, 3.5582e-09,\n",
       "             5.9339e-09]], device='cuda:0')},\n",
       "   144: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([ 3.6552e-06, -2.5962e-05, -2.8073e-06, -2.3614e-06,  7.9423e-06,\n",
       "             1.4412e-06,  1.3353e-06,  4.7223e-06, -3.7187e-05, -1.0752e-06,\n",
       "             4.4811e-06, -3.5598e-05, -4.4880e-08, -1.1727e-05,  2.6132e-05,\n",
       "             3.7590e-05, -6.5991e-06,  1.0664e-05,  3.2428e-05, -8.1397e-05,\n",
       "            -1.7614e-05,  6.2968e-06,  4.8348e-07,  1.3502e-06,  7.1620e-06,\n",
       "             1.4933e-05, -1.3187e-06,  5.1194e-06,  2.0201e-05, -1.2245e-08,\n",
       "             4.1448e-05,  1.1403e-05, -1.2068e-05,  3.5878e-06,  4.4279e-06,\n",
       "            -4.1072e-05,  2.7911e-06, -1.8964e-07, -4.4133e-05,  4.8130e-06,\n",
       "            -3.8202e-07,  1.6007e-05,  1.6057e-05, -4.2701e-05,  1.0309e-05,\n",
       "             1.7325e-06,  2.0931e-05,  4.2401e-06,  2.3387e-06, -1.3434e-07,\n",
       "            -8.0528e-06, -4.6803e-05,  2.3735e-05,  1.6270e-05,  1.0208e-05,\n",
       "            -1.3030e-05,  8.3828e-06,  2.8808e-05,  2.5801e-05, -9.5029e-06,\n",
       "            -1.0214e-06,  3.7010e-05, -5.6205e-05, -1.0168e-05, -4.3919e-06,\n",
       "             1.1005e-05, -3.8441e-05, -3.1350e-06, -3.6647e-06, -2.4252e-05,\n",
       "             1.2544e-05,  1.4394e-05, -3.7550e-05, -1.4947e-05,  5.6255e-06,\n",
       "            -1.3698e-05, -7.6872e-06, -2.4188e-05,  1.9355e-05, -4.3917e-05,\n",
       "             1.4292e-05, -1.0259e-05, -1.7430e-05, -9.5898e-06,  2.3592e-05,\n",
       "             4.6043e-06, -1.3880e-05,  1.0399e-05,  4.7520e-05, -2.0230e-06,\n",
       "            -2.5143e-05, -3.0247e-05, -3.2939e-06, -4.0288e-05,  1.1580e-05,\n",
       "             7.9116e-06, -2.2366e-05, -2.0741e-06, -2.6250e-05,  1.5739e-05,\n",
       "            -2.7874e-05,  1.2019e-05, -3.2891e-06,  8.4848e-06,  1.7842e-06,\n",
       "            -1.1294e-05,  6.1077e-08, -4.8477e-05, -3.0576e-05,  2.4233e-05,\n",
       "             7.6451e-06, -6.2223e-06, -3.1943e-07, -1.1668e-05,  2.3877e-06,\n",
       "             1.1855e-05, -1.8388e-06, -4.1750e-06,  1.0098e-05,  1.6331e-05,\n",
       "             9.6733e-06,  6.9848e-06, -2.4206e-05, -8.4224e-06,  1.4617e-05,\n",
       "             5.6699e-06,  1.6998e-05,  1.0124e-05], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([1.9181e-08, 9.0690e-09, 2.4995e-09, 1.6387e-09, 2.3651e-09, 1.5201e-08,\n",
       "            1.7045e-10, 5.9136e-09, 3.0235e-08, 1.0430e-08, 2.6563e-09, 1.4070e-08,\n",
       "            5.9095e-11, 1.8458e-08, 3.0923e-08, 2.1008e-08, 6.4158e-09, 1.5160e-09,\n",
       "            2.4761e-08, 2.2352e-08, 1.3803e-08, 8.1162e-09, 3.3945e-11, 2.8011e-08,\n",
       "            7.9705e-09, 3.4710e-09, 4.1534e-09, 6.6229e-09, 1.3583e-09, 2.7400e-09,\n",
       "            8.0348e-09, 6.3498e-09, 7.0124e-09, 4.1035e-10, 2.1026e-09, 1.4699e-08,\n",
       "            6.1183e-09, 1.7548e-09, 2.4709e-08, 3.1949e-09, 3.7393e-10, 7.4045e-09,\n",
       "            1.4172e-08, 2.4828e-08, 3.0564e-09, 7.4686e-10, 2.5461e-09, 1.3318e-09,\n",
       "            5.3953e-09, 1.1792e-09, 1.4961e-08, 8.7789e-09, 9.9768e-09, 7.6922e-09,\n",
       "            2.1098e-09, 1.9360e-08, 3.2624e-10, 3.1989e-09, 2.5581e-09, 1.4913e-08,\n",
       "            3.4187e-09, 1.8277e-08, 2.1212e-08, 7.0906e-09, 6.6827e-09, 4.9391e-09,\n",
       "            9.9310e-09, 1.2013e-08, 5.9423e-09, 9.6114e-09, 3.1958e-08, 1.1414e-08,\n",
       "            2.5940e-08, 6.8434e-09, 8.8314e-09, 6.3927e-09, 4.3377e-09, 6.9833e-09,\n",
       "            6.3636e-09, 1.7354e-08, 4.5556e-10, 1.1844e-08, 1.2458e-08, 1.4565e-09,\n",
       "            6.0871e-09, 5.8502e-09, 2.2002e-08, 9.0875e-09, 1.1029e-08, 6.8908e-09,\n",
       "            3.6430e-09, 1.5296e-08, 9.9814e-09, 3.1643e-08, 6.8507e-09, 6.8876e-09,\n",
       "            1.1799e-08, 1.7981e-08, 2.0978e-08, 5.6163e-09, 8.9578e-09, 1.5719e-08,\n",
       "            1.6061e-08, 1.7066e-08, 4.8121e-10, 2.5218e-09, 6.7328e-10, 4.4068e-08,\n",
       "            1.0282e-08, 1.0879e-08, 2.4199e-08, 2.5109e-08, 9.7224e-10, 1.6194e-09,\n",
       "            7.8442e-11, 6.3195e-09, 1.5754e-09, 7.7694e-09, 7.7206e-09, 8.5904e-09,\n",
       "            1.2539e-08, 1.2973e-08, 1.3474e-08, 7.7507e-09, 8.2896e-09, 2.5016e-09,\n",
       "            8.2158e-09, 4.5483e-09], device='cuda:0')},\n",
       "   145: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-9.1527e-05, -4.0778e-05,  1.3227e-04,  ..., -1.0198e-04,\n",
       "             -3.8823e-07, -5.4506e-05],\n",
       "            [ 3.8573e-05,  1.8811e-05, -1.4532e-04,  ...,  8.1090e-06,\n",
       "             -1.6787e-05, -2.4681e-06],\n",
       "            [-3.8120e-05,  4.7568e-06,  7.1793e-05,  ..., -1.4926e-04,\n",
       "             -3.9741e-05, -8.5970e-05],\n",
       "            ...,\n",
       "            [-2.4990e-05, -4.6586e-07, -3.1317e-05,  ...,  1.8663e-05,\n",
       "             -1.3533e-05, -1.8726e-06],\n",
       "            [ 1.5147e-05, -7.9850e-06,  4.4063e-05,  ...,  4.3410e-06,\n",
       "              8.4902e-06, -7.4998e-06],\n",
       "            [ 2.2464e-06, -5.7748e-06,  5.5546e-05,  ..., -4.0943e-05,\n",
       "             -1.0667e-06, -1.4820e-05]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[2.5914e-08, 1.1040e-08, 2.8034e-08,  ..., 7.1511e-08, 3.8008e-09,\n",
       "             3.0487e-08],\n",
       "            [3.5377e-08, 1.1314e-08, 2.6078e-08,  ..., 1.0004e-07, 7.3854e-09,\n",
       "             4.7259e-08],\n",
       "            [6.2833e-08, 3.8338e-08, 5.0302e-08,  ..., 2.1099e-07, 1.5573e-08,\n",
       "             1.2494e-07],\n",
       "            ...,\n",
       "            [1.8609e-08, 2.0938e-09, 3.9563e-09,  ..., 3.5979e-08, 1.8759e-09,\n",
       "             1.0668e-08],\n",
       "            [2.9791e-08, 3.4635e-09, 7.2412e-09,  ..., 6.9717e-08, 2.0341e-09,\n",
       "             1.9477e-08],\n",
       "            [2.4599e-08, 5.4652e-09, 6.1286e-09,  ..., 5.2378e-08, 4.1681e-09,\n",
       "             2.3022e-08]], device='cuda:0')},\n",
       "   146: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 4.7251e-05,  8.5825e-06, -2.2146e-05,  ...,  4.1130e-07,\n",
       "              1.1058e-05,  5.5760e-06],\n",
       "            [ 5.1784e-05,  9.2454e-06, -1.6092e-05,  ..., -8.3563e-07,\n",
       "              1.3239e-05,  4.9143e-06],\n",
       "            [-4.0627e-05, -1.1508e-05,  1.1964e-05,  ..., -1.5250e-06,\n",
       "             -9.1282e-06, -3.3424e-06],\n",
       "            ...,\n",
       "            [ 2.7687e-06,  6.4694e-07,  3.9527e-07,  ..., -7.6424e-07,\n",
       "             -2.5125e-07,  3.2868e-06],\n",
       "            [-8.2613e-06,  6.4394e-07, -2.1642e-06,  ..., -3.1336e-06,\n",
       "             -2.2394e-07,  1.0129e-05],\n",
       "            [-8.0354e-06, -3.1864e-08,  1.1830e-05,  ..., -3.8776e-06,\n",
       "             -1.2676e-06,  9.9091e-06]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[2.4118e-09, 1.1530e-09, 2.2929e-09,  ..., 6.8704e-11, 1.5451e-10,\n",
       "             6.8936e-10],\n",
       "            [3.8315e-09, 1.8614e-09, 4.0048e-09,  ..., 1.2659e-10, 1.9807e-10,\n",
       "             1.0152e-09],\n",
       "            [1.6026e-09, 7.8312e-10, 1.5790e-09,  ..., 4.2950e-11, 1.0254e-10,\n",
       "             4.7457e-10],\n",
       "            ...,\n",
       "            [1.5607e-10, 4.1827e-11, 1.8107e-10,  ..., 2.4996e-12, 1.3050e-11,\n",
       "             6.1044e-11],\n",
       "            [2.5140e-09, 6.1539e-10, 2.8071e-09,  ..., 3.4781e-11, 1.9226e-10,\n",
       "             9.9293e-10],\n",
       "            [1.7841e-09, 5.9165e-10, 2.0422e-09,  ..., 2.7804e-11, 1.4397e-10,\n",
       "             6.6377e-10]], device='cuda:0')},\n",
       "   147: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-5.1037e-06, -2.9477e-06,  1.3655e-06,  ...,  1.4047e-05,\n",
       "             -4.6536e-06, -8.7316e-06],\n",
       "            [-5.0877e-06, -5.4152e-06,  2.6483e-06,  ...,  1.7385e-05,\n",
       "             -8.5736e-06, -1.2500e-05],\n",
       "            [ 3.9497e-06, -9.6854e-07, -1.4600e-06,  ..., -1.4943e-06,\n",
       "             -2.4763e-06, -9.3429e-07],\n",
       "            ...,\n",
       "            [-6.1987e-06,  2.1631e-06,  1.6300e-06,  ...,  1.0520e-06,\n",
       "             -2.4952e-06,  4.4373e-07],\n",
       "            [-1.4969e-05,  4.4948e-06,  3.8210e-06,  ...,  2.1007e-06,\n",
       "             -5.7391e-06,  1.3587e-06],\n",
       "            [-1.4718e-05,  3.9646e-06,  4.7440e-06,  ...,  3.2376e-06,\n",
       "             -5.5116e-06, -3.4772e-07]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[2.9624e-09, 5.1497e-10, 5.7066e-10,  ..., 4.6727e-10, 1.7283e-09,\n",
       "             1.2390e-09],\n",
       "            [2.8242e-09, 5.0760e-10, 5.2843e-10,  ..., 4.5522e-10, 1.6074e-09,\n",
       "             1.2979e-09],\n",
       "            [5.2163e-10, 8.5695e-11, 1.1106e-10,  ..., 8.7083e-11, 3.1095e-10,\n",
       "             2.5600e-10],\n",
       "            ...,\n",
       "            [3.2498e-11, 1.0049e-11, 1.6604e-11,  ..., 1.1900e-11, 2.6811e-11,\n",
       "             1.0652e-11],\n",
       "            [5.8087e-10, 1.2706e-10, 2.8715e-10,  ..., 2.2559e-10, 4.8151e-10,\n",
       "             2.0052e-10],\n",
       "            [6.0142e-10, 1.0813e-10, 2.8579e-10,  ..., 2.3681e-10, 4.9554e-10,\n",
       "             2.0609e-10]], device='cuda:0')},\n",
       "   148: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 1.6842e-05,  9.3039e-06, -3.2175e-05,  ...,  1.2714e-05,\n",
       "             -2.5695e-06,  1.9865e-06],\n",
       "            [ 2.8542e-05, -1.0676e-05,  1.4843e-05,  ...,  3.8156e-06,\n",
       "             -3.4291e-05,  2.8189e-05],\n",
       "            [-1.8096e-06,  2.1204e-05, -1.7263e-05,  ..., -9.8913e-06,\n",
       "             -1.3340e-05, -9.4343e-06],\n",
       "            ...,\n",
       "            [ 1.1391e-05,  2.3910e-05,  2.5158e-05,  ..., -8.0633e-06,\n",
       "             -4.8731e-05,  3.8011e-05],\n",
       "            [ 6.7068e-07, -1.5951e-06,  2.5075e-05,  ...,  2.1952e-06,\n",
       "              1.5640e-05,  8.1858e-07],\n",
       "            [-1.3154e-05,  2.7884e-05, -4.2180e-05,  ..., -1.4557e-05,\n",
       "              3.2749e-06, -2.7487e-05]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[1.4031e-08, 1.1159e-08, 2.7478e-08,  ..., 5.8863e-09, 6.7191e-09,\n",
       "             3.7765e-09],\n",
       "            [1.1492e-08, 1.7573e-08, 1.9177e-08,  ..., 5.3618e-09, 9.3704e-09,\n",
       "             7.1931e-09],\n",
       "            [1.4864e-08, 1.3424e-08, 2.9224e-08,  ..., 7.0040e-09, 8.2516e-09,\n",
       "             5.5792e-09],\n",
       "            ...,\n",
       "            [1.3113e-08, 1.0962e-08, 1.4450e-08,  ..., 3.9883e-09, 7.3394e-09,\n",
       "             6.8530e-09],\n",
       "            [5.5194e-09, 7.6991e-09, 1.2150e-08,  ..., 2.9111e-09, 6.8720e-09,\n",
       "             3.6824e-09],\n",
       "            [1.1534e-08, 1.0103e-08, 2.3039e-08,  ..., 5.6111e-09, 8.4611e-09,\n",
       "             6.4355e-09]], device='cuda:0')},\n",
       "   149: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([ 1.2117e-05, -7.3299e-05,  4.2019e-05, -5.1952e-05, -1.0141e-04,\n",
       "             3.2087e-05,  6.4101e-05, -3.4864e-05,  9.1794e-05, -1.3330e-04,\n",
       "             6.3183e-05, -2.3965e-06, -3.3146e-05,  6.8771e-05, -2.9029e-05,\n",
       "             1.9741e-05, -8.1058e-06, -4.6487e-06,  7.1724e-05,  7.1020e-05,\n",
       "            -1.7419e-06,  7.0640e-05,  3.6925e-05,  1.2453e-05,  5.6260e-05,\n",
       "             9.7170e-05,  8.0574e-05,  4.6534e-05, -4.6750e-05,  7.0821e-05,\n",
       "             8.1576e-05,  1.0742e-04,  9.1450e-05, -4.2085e-05,  6.6171e-05,\n",
       "             6.2453e-05,  1.5304e-04, -4.5511e-05,  3.0215e-05, -4.2911e-06,\n",
       "            -2.5289e-05, -6.7412e-05,  9.3198e-05,  3.5777e-06, -1.6162e-05,\n",
       "            -2.6218e-05, -2.1328e-05, -7.7670e-05, -2.6015e-05, -7.5029e-05,\n",
       "            -7.6317e-05, -7.4902e-05, -6.9094e-05,  2.7045e-05, -9.9409e-05,\n",
       "            -6.9776e-05,  1.4943e-04, -1.6773e-06, -6.0204e-06, -1.1992e-04,\n",
       "             1.2036e-04,  3.1075e-05, -1.9010e-06, -1.7831e-04,  1.0324e-04,\n",
       "            -5.6094e-06, -1.2261e-04,  1.4577e-04, -1.7551e-04, -1.5150e-04,\n",
       "            -7.6870e-05,  7.5555e-05, -5.5590e-05,  6.7867e-05, -2.7642e-05,\n",
       "            -5.4887e-05, -6.1978e-05, -7.4661e-05,  6.0296e-05, -1.6814e-05,\n",
       "             9.5417e-06,  5.6328e-05, -1.6853e-05,  1.6163e-04, -8.0978e-07,\n",
       "            -3.5232e-05, -8.3065e-05,  1.3153e-05,  7.0413e-05, -2.8256e-05,\n",
       "            -1.0088e-04, -2.8990e-05,  4.4386e-06,  5.4372e-05,  5.3982e-05,\n",
       "             1.0999e-05, -8.6703e-06, -5.4806e-05,  5.7180e-05,  6.0357e-05,\n",
       "             6.5979e-05,  8.3688e-05, -1.7136e-05, -4.6198e-05, -5.1779e-05,\n",
       "             6.7785e-05, -9.2946e-05,  6.8265e-05,  3.4363e-05,  7.3979e-05,\n",
       "            -9.4205e-05, -5.0314e-05,  4.6062e-05,  1.1080e-04,  2.5581e-05,\n",
       "             6.0154e-05, -4.5150e-05, -2.7095e-05, -1.0354e-04, -1.1629e-05,\n",
       "             5.5538e-05, -5.2834e-05, -2.1111e-05,  1.4209e-05, -8.4929e-05,\n",
       "            -3.9122e-06, -1.6612e-05,  1.4557e-05], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([7.5096e-08, 1.2738e-07, 9.1116e-08, 8.4013e-08, 1.1526e-07, 5.1174e-08,\n",
       "            5.4441e-08, 5.4286e-08, 4.1408e-08, 8.7907e-08, 9.5908e-08, 1.3679e-07,\n",
       "            6.8559e-08, 7.6698e-08, 1.1404e-07, 9.8190e-08, 8.5240e-08, 1.2720e-07,\n",
       "            8.4054e-08, 8.5294e-08, 8.4443e-08, 7.2156e-08, 2.9316e-07, 5.4836e-08,\n",
       "            6.5423e-08, 1.3427e-07, 1.9513e-07, 4.8455e-08, 7.7489e-08, 9.6354e-08,\n",
       "            4.6399e-08, 7.0924e-08, 1.4060e-07, 1.1746e-07, 1.1666e-07, 7.0192e-08,\n",
       "            1.3188e-07, 9.3878e-08, 4.9498e-08, 5.8550e-08, 9.8387e-08, 4.3961e-08,\n",
       "            1.6545e-07, 3.9054e-08, 7.1280e-08, 7.2118e-08, 1.1631e-07, 1.1191e-07,\n",
       "            1.3245e-07, 1.2264e-07, 7.5796e-08, 5.8147e-08, 8.3391e-08, 1.1274e-07,\n",
       "            9.2569e-08, 1.0033e-07, 3.1686e-07, 7.6599e-08, 4.0111e-08, 6.5215e-08,\n",
       "            2.2429e-07, 6.2068e-08, 1.0997e-07, 3.4122e-07, 4.9707e-08, 6.6115e-08,\n",
       "            1.0431e-07, 1.5575e-07, 2.2951e-07, 1.8538e-07, 5.8196e-08, 1.4657e-07,\n",
       "            1.7082e-07, 8.0503e-08, 1.1721e-07, 3.4495e-08, 6.8241e-08, 1.1149e-07,\n",
       "            1.4853e-07, 3.6295e-08, 1.0585e-07, 7.6913e-08, 6.4245e-08, 2.4720e-07,\n",
       "            1.3361e-07, 1.2831e-07, 1.0785e-07, 7.2646e-08, 1.1064e-07, 1.0973e-07,\n",
       "            6.3190e-08, 8.1754e-08, 5.6155e-08, 8.7436e-08, 1.6410e-07, 4.7202e-08,\n",
       "            4.8229e-08, 1.1218e-07, 1.0277e-07, 1.3970e-07, 4.6201e-08, 9.5323e-08,\n",
       "            9.7941e-08, 5.2569e-08, 6.1050e-08, 4.4050e-08, 1.5499e-07, 8.0075e-08,\n",
       "            5.2420e-08, 4.5801e-08, 8.1891e-08, 1.4319e-07, 4.1991e-08, 1.6307e-07,\n",
       "            1.1426e-07, 1.7954e-07, 7.1657e-08, 9.5154e-08, 1.8731e-07, 5.7897e-08,\n",
       "            1.3278e-07, 1.2249e-07, 2.7861e-08, 1.0734e-07, 7.6577e-08, 1.0413e-07,\n",
       "            4.9666e-08, 9.5352e-08], device='cuda:0')},\n",
       "   150: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([-2.4126e-05,  2.6531e-06, -9.6750e-06, -1.0748e-05, -1.4744e-06,\n",
       "            -7.0163e-06, -2.5495e-05, -6.0605e-07, -1.6568e-05, -2.8879e-05,\n",
       "            -9.3382e-06, -6.4969e-06,  8.2372e-06, -8.7384e-06, -8.2496e-06,\n",
       "            -5.8978e-06,  1.3905e-05, -1.6702e-05,  1.1092e-05,  3.0011e-06,\n",
       "            -1.2261e-05, -5.4111e-06, -2.1907e-05, -5.0728e-06, -6.0229e-06,\n",
       "            -1.5761e-05,  6.7898e-06, -2.6698e-05, -7.0483e-06, -9.9109e-06,\n",
       "            -1.0979e-05, -8.8135e-06, -8.6994e-06,  1.0981e-05, -1.3305e-07,\n",
       "             7.9996e-06,  1.1019e-05,  2.0638e-05, -7.8759e-06, -5.2932e-06,\n",
       "            -3.9093e-06, -3.0667e-06, -2.9928e-06, -6.5692e-08,  1.1350e-06,\n",
       "            -4.4537e-06,  6.5331e-06,  2.9468e-07, -1.1617e-05,  1.5435e-06,\n",
       "             2.9699e-05, -5.6293e-06, -1.6143e-06, -1.2381e-06, -6.6030e-06,\n",
       "             5.0022e-06, -1.5960e-06, -4.9304e-06,  8.9446e-06, -4.6561e-06,\n",
       "            -2.1698e-06, -5.8875e-06, -9.5607e-06,  3.6201e-06,  6.0785e-06,\n",
       "            -1.8075e-05, -1.2097e-05, -1.5265e-05, -3.8394e-06, -5.0924e-06,\n",
       "            -6.6273e-06, -4.5205e-06, -2.6517e-07, -1.6080e-05, -1.6576e-05,\n",
       "            -1.9289e-06, -9.2476e-06,  2.4758e-06, -9.1573e-06, -2.0339e-05,\n",
       "            -9.1702e-06, -1.1307e-05,  8.1590e-06,  7.5184e-06,  5.2732e-06,\n",
       "            -3.9341e-06, -1.8587e-05, -8.3270e-06, -2.8009e-06,  2.1323e-06,\n",
       "             1.0768e-05, -1.0236e-05,  6.7704e-07,  7.8240e-06,  1.2345e-05,\n",
       "             2.9036e-06, -3.1371e-06, -8.0709e-06, -2.9688e-07, -1.6309e-05,\n",
       "            -8.1342e-06,  2.4971e-06, -2.0954e-05,  2.5156e-07, -2.9841e-06,\n",
       "            -1.2233e-05, -1.1457e-04,  1.8132e-05,  8.2800e-06, -4.7340e-06,\n",
       "             1.9815e-05, -3.8337e-06, -1.7748e-05, -2.0854e-05,  2.2465e-06,\n",
       "             8.9818e-06, -9.3177e-06,  4.3732e-06,  2.2417e-05,  1.2752e-05,\n",
       "             6.0759e-07, -1.9358e-05,  4.3946e-06,  2.1254e-05, -1.4763e-05,\n",
       "            -7.6417e-06, -1.8529e-05, -6.5812e-07], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([1.0358e-08, 2.1960e-09, 6.1208e-09, 4.1253e-09, 2.9764e-09, 4.0364e-09,\n",
       "            2.7227e-09, 7.0702e-09, 3.6796e-09, 1.4393e-08, 5.4735e-10, 5.7882e-09,\n",
       "            1.0984e-08, 6.5600e-09, 1.2372e-09, 7.5692e-10, 1.0596e-08, 1.1866e-08,\n",
       "            4.9505e-09, 3.9260e-09, 1.9996e-09, 2.7485e-09, 2.6688e-09, 1.8764e-09,\n",
       "            6.9750e-10, 3.0739e-09, 2.0355e-08, 4.9040e-09, 2.8950e-09, 4.8240e-09,\n",
       "            2.1163e-08, 3.6705e-09, 3.1551e-09, 3.4479e-09, 1.0506e-09, 6.0814e-09,\n",
       "            3.1065e-09, 1.9836e-08, 1.4530e-09, 2.0179e-09, 1.5110e-09, 3.8990e-09,\n",
       "            2.8819e-09, 3.5234e-09, 1.4879e-09, 3.0450e-09, 6.6165e-10, 2.0692e-09,\n",
       "            4.7596e-09, 1.6222e-09, 8.4431e-09, 8.7478e-10, 4.4458e-09, 2.0495e-09,\n",
       "            3.5079e-09, 3.8968e-09, 2.9638e-09, 1.0031e-09, 4.3214e-09, 2.5655e-09,\n",
       "            1.0626e-09, 3.0838e-09, 2.3606e-09, 1.9639e-09, 2.8756e-09, 5.4119e-09,\n",
       "            5.3488e-09, 1.9386e-09, 1.9184e-09, 8.0778e-09, 1.0854e-08, 1.7886e-09,\n",
       "            1.0839e-09, 1.2726e-09, 1.9523e-08, 1.1151e-09, 2.5403e-08, 1.8554e-08,\n",
       "            2.2414e-09, 6.1107e-09, 3.0603e-09, 4.0186e-09, 6.7032e-10, 1.4011e-09,\n",
       "            2.3143e-09, 1.4569e-09, 1.7645e-08, 1.2444e-08, 8.7961e-10, 6.7925e-10,\n",
       "            7.6094e-09, 1.2265e-09, 3.0915e-09, 1.8724e-09, 6.8632e-09, 1.6166e-09,\n",
       "            1.9504e-09, 1.9430e-09, 1.1311e-08, 5.1607e-09, 1.1154e-09, 1.0863e-09,\n",
       "            2.8706e-09, 2.6777e-09, 1.6105e-09, 3.3827e-09, 1.7068e-07, 2.6320e-09,\n",
       "            5.4200e-09, 4.6239e-09, 2.0992e-08, 3.0982e-09, 5.7149e-09, 1.0724e-08,\n",
       "            9.2633e-10, 1.5506e-09, 1.2185e-09, 3.9718e-09, 5.1459e-09, 2.9621e-09,\n",
       "            8.5400e-09, 3.9340e-09, 6.9449e-10, 7.1166e-09, 1.6881e-09, 6.8237e-10,\n",
       "            8.4035e-09, 2.4974e-09], device='cuda:0')},\n",
       "   151: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-4.1942e-07, -4.0644e-06,  9.6798e-07,  ..., -5.9272e-08,\n",
       "              4.8748e-06,  1.4055e-06],\n",
       "            [-1.0203e-06,  4.7712e-07, -2.1081e-07,  ...,  7.4850e-08,\n",
       "              2.5404e-07, -1.1373e-06],\n",
       "            [-1.0299e-06, -1.4475e-06, -7.8991e-07,  ..., -8.4683e-07,\n",
       "              3.3744e-06, -1.8428e-07],\n",
       "            ...,\n",
       "            [ 6.6343e-07,  2.7182e-06,  1.8152e-08,  ..., -1.8564e-07,\n",
       "             -5.4064e-06, -9.8615e-07],\n",
       "            [-7.7918e-07,  4.6612e-07, -5.8609e-07,  ..., -5.1557e-07,\n",
       "              4.4290e-08, -4.2514e-07],\n",
       "            [ 5.4855e-07,  2.9234e-06,  3.2958e-07,  ...,  6.2981e-07,\n",
       "             -3.5052e-06,  2.4626e-07]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[9.9130e-11, 2.4833e-11, 6.2773e-11,  ..., 3.3967e-11, 6.1060e-10,\n",
       "             1.7952e-10],\n",
       "            [2.3770e-11, 6.8806e-12, 3.8643e-11,  ..., 6.3422e-11, 2.6536e-10,\n",
       "             2.7925e-11],\n",
       "            [8.7718e-12, 2.4615e-12, 5.7313e-12,  ..., 2.8493e-12, 5.8163e-11,\n",
       "             1.4877e-11],\n",
       "            ...,\n",
       "            [1.3435e-11, 3.9346e-12, 9.7092e-12,  ..., 7.5100e-12, 1.0743e-10,\n",
       "             3.6559e-11],\n",
       "            [3.3595e-11, 1.1891e-11, 3.2957e-11,  ..., 3.3763e-11, 2.5653e-10,\n",
       "             9.3138e-11],\n",
       "            [2.5040e-11, 6.1937e-12, 1.9713e-11,  ..., 9.5522e-12, 1.8546e-10,\n",
       "             8.8688e-11]], device='cuda:0')},\n",
       "   152: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-1.1752e-05, -6.4570e-07,  8.1350e-06,  ...,  5.1646e-06,\n",
       "             -5.1841e-06, -3.5452e-06],\n",
       "            [-2.8414e-07, -1.4982e-06,  1.1335e-06,  ...,  6.3694e-07,\n",
       "              1.8223e-08, -2.5110e-07],\n",
       "            [-3.5784e-06,  2.0291e-06,  8.8788e-07,  ...,  3.0079e-06,\n",
       "             -3.7427e-06, -8.6776e-07],\n",
       "            ...,\n",
       "            [ 7.0950e-06,  2.4881e-07, -5.0571e-06,  ..., -4.5079e-06,\n",
       "              3.3173e-06,  1.8401e-06],\n",
       "            [ 5.8678e-07,  2.5000e-07, -1.4043e-07,  ...,  1.9090e-06,\n",
       "             -9.3693e-07, -1.7900e-06],\n",
       "            [ 7.2948e-06,  1.8211e-06, -4.4841e-06,  ..., -4.0315e-06,\n",
       "              2.8956e-06,  2.6918e-06]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[9.9757e-10, 1.0527e-10, 4.3517e-10,  ..., 3.9585e-10, 5.7942e-10,\n",
       "             2.3095e-10],\n",
       "            [1.1877e-10, 9.8935e-11, 3.6112e-11,  ..., 5.8576e-11, 1.0144e-10,\n",
       "             3.4069e-11],\n",
       "            [7.4910e-11, 1.3537e-11, 3.9262e-11,  ..., 3.0234e-11, 4.6514e-11,\n",
       "             1.9405e-11],\n",
       "            ...,\n",
       "            [1.5172e-10, 2.3351e-11, 6.1971e-11,  ..., 7.7526e-11, 1.0147e-10,\n",
       "             4.3007e-11],\n",
       "            [2.8415e-10, 7.7455e-11, 1.2553e-10,  ..., 1.6959e-10, 2.4688e-10,\n",
       "             1.0911e-10],\n",
       "            [3.6001e-10, 3.5950e-11, 1.5685e-10,  ..., 1.7797e-10, 2.4253e-10,\n",
       "             1.2624e-10]], device='cuda:0')},\n",
       "   153: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-1.4273e-06, -7.4139e-06, -3.5002e-06,  ...,  1.1810e-06,\n",
       "              1.6499e-05, -4.3664e-06],\n",
       "            [-2.8680e-06,  1.3326e-06, -3.1264e-07,  ..., -2.1785e-06,\n",
       "             -7.8328e-06, -9.6145e-07],\n",
       "            [-2.0980e-06, -2.9972e-06, -5.0296e-06,  ..., -5.6034e-06,\n",
       "              1.2896e-06, -2.1721e-06],\n",
       "            ...,\n",
       "            [-1.4834e-06, -3.3098e-07, -4.9293e-07,  ..., -1.6552e-06,\n",
       "              2.9115e-06, -3.5693e-06],\n",
       "            [-9.3195e-08, -6.5954e-06, -1.3759e-06,  ..., -3.1019e-06,\n",
       "              1.1442e-05,  5.8930e-08],\n",
       "            [ 1.0222e-06, -1.6914e-06,  1.9458e-06,  ...,  2.2086e-07,\n",
       "              2.1152e-06,  4.3473e-07]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[7.4497e-10, 2.6527e-10, 5.0648e-10,  ..., 1.5571e-10, 3.5283e-09,\n",
       "             2.1799e-09],\n",
       "            [1.3618e-10, 4.0428e-11, 1.1160e-10,  ..., 4.9021e-11, 9.5593e-10,\n",
       "             2.9629e-10],\n",
       "            [9.8613e-10, 3.1445e-10, 2.4932e-10,  ..., 1.3994e-10, 1.6820e-09,\n",
       "             9.8116e-10],\n",
       "            ...,\n",
       "            [5.9021e-11, 1.6629e-11, 2.3590e-11,  ..., 3.1866e-11, 3.4185e-10,\n",
       "             6.4211e-11],\n",
       "            [4.3446e-10, 1.1919e-10, 3.9609e-10,  ..., 1.8696e-10, 3.6796e-09,\n",
       "             1.3276e-09],\n",
       "            [2.7135e-10, 6.1796e-11, 8.5797e-11,  ..., 1.6021e-10, 1.3197e-09,\n",
       "             1.9013e-10]], device='cuda:0')},\n",
       "   154: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-1.3108e-05, -2.2183e-06,  6.5377e-06,  ...,  1.5662e-05,\n",
       "             -1.6020e-05, -1.4694e-05],\n",
       "            [-1.3840e-05, -1.7452e-06,  7.0494e-06,  ...,  3.1139e-06,\n",
       "             -2.5774e-07, -2.1098e-06],\n",
       "            [-4.9741e-06,  5.6651e-07, -1.4617e-06,  ...,  1.0107e-05,\n",
       "             -1.2095e-05, -6.6701e-06],\n",
       "            ...,\n",
       "            [-8.2361e-06, -2.7562e-06,  6.2306e-06,  ...,  6.0731e-06,\n",
       "             -4.2671e-06, -3.4705e-06],\n",
       "            [-2.7934e-05,  2.6272e-06,  1.5031e-05,  ...,  1.7636e-05,\n",
       "             -1.4499e-05, -8.7444e-06],\n",
       "            [-4.5979e-06, -1.0542e-06,  1.8839e-06,  ..., -1.0659e-06,\n",
       "             -1.0580e-06,  3.5386e-07]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[7.5773e-09, 9.5074e-10, 3.0762e-09,  ..., 3.8227e-09, 5.1480e-09,\n",
       "             2.5504e-09],\n",
       "            [1.0989e-09, 3.8580e-10, 5.3719e-10,  ..., 6.4900e-10, 9.7255e-10,\n",
       "             4.2564e-10],\n",
       "            [2.1094e-09, 3.5743e-10, 8.2077e-10,  ..., 6.4431e-10, 8.9353e-10,\n",
       "             7.7278e-10],\n",
       "            ...,\n",
       "            [2.7084e-10, 7.7489e-11, 1.1244e-10,  ..., 1.0516e-10, 1.7757e-10,\n",
       "             8.3659e-11],\n",
       "            [5.4607e-09, 8.1770e-10, 2.3599e-09,  ..., 2.7705e-09, 4.1742e-09,\n",
       "             1.5348e-09],\n",
       "            [8.2466e-10, 2.7385e-10, 2.9685e-10,  ..., 1.9478e-10, 3.4677e-10,\n",
       "             3.8995e-10]], device='cuda:0')},\n",
       "   155: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 5.6093e-06, -2.3565e-06,  6.2522e-06,  ..., -1.1192e-05,\n",
       "             -7.4236e-06, -1.6941e-05],\n",
       "            [-4.4622e-08,  5.2439e-06, -3.0788e-06,  ...,  8.7016e-06,\n",
       "              3.0876e-05, -1.2959e-05],\n",
       "            [ 6.0067e-06,  3.2879e-06,  4.7672e-06,  ...,  6.4708e-06,\n",
       "             -8.2310e-06,  6.6865e-06],\n",
       "            ...,\n",
       "            [ 1.6809e-05,  1.3110e-05,  1.1467e-05,  ...,  1.5292e-05,\n",
       "             -4.5048e-05,  3.3744e-05],\n",
       "            [-2.9927e-06, -1.2907e-05, -2.5052e-06,  ..., -7.6381e-06,\n",
       "              2.6860e-05, -3.2506e-06],\n",
       "            [-2.8789e-06,  5.2211e-06, -7.5862e-06,  ...,  4.8883e-07,\n",
       "              2.4328e-05, -1.1387e-05]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[4.2861e-09, 1.3024e-09, 2.2220e-09,  ..., 1.6131e-09, 1.7539e-08,\n",
       "             8.6282e-09],\n",
       "            [8.8336e-09, 2.0903e-09, 3.4437e-09,  ..., 2.7611e-09, 3.1756e-08,\n",
       "             1.2789e-08],\n",
       "            [1.1074e-08, 1.5847e-09, 6.2912e-09,  ..., 8.4446e-09, 7.7493e-08,\n",
       "             1.4526e-08],\n",
       "            ...,\n",
       "            [8.5036e-09, 1.5973e-09, 4.2275e-09,  ..., 4.9873e-09, 4.6582e-08,\n",
       "             1.1019e-08],\n",
       "            [8.1459e-09, 2.0103e-09, 8.6690e-09,  ..., 1.7637e-08, 4.9874e-08,\n",
       "             1.1560e-08],\n",
       "            [6.7226e-09, 1.8431e-09, 2.9805e-09,  ..., 5.0585e-09, 2.3640e-08,\n",
       "             7.6440e-09]], device='cuda:0')},\n",
       "   156: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-4.0421e-05, -6.4732e-06,  1.7565e-05,  ...,  2.3485e-05,\n",
       "             -2.3504e-06, -6.8936e-06],\n",
       "            [ 2.3710e-05,  5.4527e-06, -8.7070e-06,  ...,  2.2370e-06,\n",
       "             -4.8112e-06, -4.1369e-06],\n",
       "            [ 3.5060e-05,  1.0459e-05, -1.5805e-05,  ..., -2.3029e-05,\n",
       "              1.1016e-05,  1.0292e-05],\n",
       "            ...,\n",
       "            [ 6.9278e-05,  2.6567e-06, -2.4802e-05,  ..., -4.7565e-05,\n",
       "              2.6456e-05,  1.8938e-05],\n",
       "            [-4.7201e-05,  3.6940e-06,  1.7402e-05,  ...,  3.2823e-05,\n",
       "             -8.5891e-06, -7.8234e-06],\n",
       "            [ 2.1847e-06, -1.9343e-07, -1.4612e-06,  ...,  5.4450e-06,\n",
       "             -6.4556e-06, -4.0504e-06]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[1.2613e-08, 1.5004e-09, 8.8542e-10,  ..., 5.4816e-09, 3.2019e-09,\n",
       "             1.2621e-09],\n",
       "            [1.9916e-08, 2.3350e-09, 9.1369e-10,  ..., 1.0476e-08, 7.7902e-09,\n",
       "             2.5074e-09],\n",
       "            [2.0100e-08, 3.8031e-09, 1.6253e-09,  ..., 7.6847e-09, 6.7873e-09,\n",
       "             3.0778e-09],\n",
       "            ...,\n",
       "            [2.1296e-08, 3.5222e-09, 1.6999e-09,  ..., 8.7317e-09, 6.0380e-09,\n",
       "             2.7866e-09],\n",
       "            [1.6001e-08, 3.2152e-09, 1.0951e-09,  ..., 8.0604e-09, 5.1125e-09,\n",
       "             2.0823e-09],\n",
       "            [6.4653e-09, 1.9599e-09, 5.3369e-10,  ..., 3.0645e-09, 2.3732e-09,\n",
       "             1.0509e-09]], device='cuda:0')},\n",
       "   157: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([-7.9600e-06, -2.7005e-06,  4.1524e-06, -6.2259e-06,  3.5559e-05,\n",
       "            -8.5207e-06,  1.7014e-05,  5.1314e-06,  6.0369e-06,  4.7346e-05,\n",
       "            -8.0615e-07,  4.2566e-05,  4.3362e-05, -1.6271e-05,  3.2394e-05,\n",
       "             1.0262e-06,  3.9387e-05,  1.2703e-05,  5.5618e-06,  1.2918e-05,\n",
       "            -7.8053e-06,  6.4896e-06, -1.8631e-05,  3.9022e-06, -1.6715e-05,\n",
       "             4.1853e-05, -1.1382e-05, -5.8948e-06, -4.2565e-08,  1.2494e-05,\n",
       "             3.7726e-07,  6.1291e-06,  2.4996e-05,  2.0555e-07,  1.1950e-06,\n",
       "             8.7785e-06,  8.5083e-06,  1.7841e-05,  7.1679e-07,  2.1838e-05,\n",
       "            -4.9298e-06,  5.2343e-05, -6.2414e-08, -1.8870e-06, -1.2909e-05,\n",
       "            -1.6963e-05,  2.4709e-05,  5.5161e-06, -3.4644e-06,  2.1963e-06,\n",
       "             1.8162e-05, -1.0647e-05,  5.0776e-06, -7.4603e-08,  1.0664e-05,\n",
       "             2.8735e-06, -2.7392e-05,  1.2103e-05,  4.5854e-06,  1.8636e-06,\n",
       "             4.5559e-07, -7.7352e-06,  1.3572e-05,  2.9965e-05, -1.3298e-05,\n",
       "             1.1942e-05,  1.1231e-05,  2.3507e-05, -2.0819e-05,  1.0839e-05,\n",
       "            -1.7268e-06, -9.8579e-06,  8.5875e-06,  1.0193e-05, -2.2553e-05,\n",
       "             5.2070e-06,  1.7552e-05, -6.9909e-06,  1.3446e-05, -1.4965e-05,\n",
       "             5.6227e-06,  9.3496e-06, -1.3251e-05, -7.5818e-06,  1.9050e-05,\n",
       "             2.7727e-05,  2.4935e-05,  1.4717e-05,  1.5566e-05, -3.1730e-06,\n",
       "             1.2689e-05,  1.6855e-05,  4.7735e-05, -3.9287e-06, -1.6762e-05,\n",
       "            -3.0189e-07, -1.6421e-05,  5.1711e-06,  1.1946e-05,  2.5584e-06,\n",
       "            -2.8078e-05,  3.9412e-05,  1.0266e-06,  2.5715e-05,  1.9224e-06,\n",
       "            -2.5896e-06, -5.1507e-05,  2.6341e-05,  5.0846e-06,  2.4678e-05,\n",
       "             8.5307e-06, -1.0513e-05,  2.1135e-06,  1.4074e-05, -7.6679e-06,\n",
       "             1.2442e-05,  6.1781e-06,  1.1919e-05,  5.8669e-06, -7.2032e-07,\n",
       "             1.1286e-05, -5.0571e-05,  4.2323e-05, -7.5990e-06,  6.7358e-06,\n",
       "            -5.0225e-06,  5.7775e-06,  3.0557e-06], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([5.7565e-09, 7.3413e-09, 4.1492e-09, 8.2273e-09, 1.7704e-08, 2.8550e-08,\n",
       "            3.2961e-08, 3.5955e-08, 5.3790e-09, 8.3607e-09, 1.0205e-09, 2.1404e-08,\n",
       "            1.2806e-08, 2.5430e-08, 5.5461e-09, 1.7200e-09, 2.1215e-08, 3.7528e-09,\n",
       "            4.0824e-09, 1.1043e-08, 1.9043e-09, 6.8027e-08, 3.2950e-09, 5.3071e-09,\n",
       "            2.7328e-09, 2.3471e-08, 2.5242e-09, 8.1521e-09, 1.2638e-09, 1.3788e-08,\n",
       "            5.8096e-09, 6.0280e-09, 1.7722e-08, 5.1402e-09, 1.9141e-09, 3.1598e-09,\n",
       "            9.3216e-10, 1.3783e-08, 2.3018e-09, 5.7716e-09, 2.5753e-09, 5.8923e-09,\n",
       "            5.9347e-09, 8.7149e-09, 4.8482e-09, 4.4112e-09, 2.8132e-08, 8.9804e-09,\n",
       "            1.3093e-08, 7.6802e-09, 7.5614e-09, 7.4341e-09, 3.8888e-09, 2.2551e-09,\n",
       "            1.3457e-08, 9.3412e-09, 1.3883e-08, 2.4960e-09, 8.3494e-10, 6.9581e-09,\n",
       "            2.6645e-09, 7.2211e-09, 1.8646e-08, 4.4464e-09, 3.8965e-09, 2.9733e-09,\n",
       "            2.8449e-08, 1.4784e-08, 7.5287e-09, 7.7071e-09, 4.2091e-09, 6.1999e-10,\n",
       "            1.2212e-08, 1.3539e-08, 4.1839e-09, 9.6355e-10, 4.6069e-09, 1.0291e-08,\n",
       "            1.7918e-08, 2.4080e-09, 2.4541e-09, 1.2928e-08, 4.3951e-08, 4.1090e-09,\n",
       "            1.7497e-09, 3.1015e-09, 1.9493e-09, 7.1093e-09, 4.1130e-09, 8.3933e-09,\n",
       "            2.9483e-08, 4.7507e-09, 1.0207e-08, 3.6455e-08, 7.6072e-09, 7.4646e-09,\n",
       "            5.9905e-09, 4.1737e-09, 7.6088e-09, 9.0477e-09, 4.7430e-09, 1.3342e-08,\n",
       "            3.0978e-09, 4.9939e-09, 4.7827e-09, 1.4100e-09, 8.7620e-08, 5.9090e-09,\n",
       "            4.7711e-09, 5.2885e-09, 1.9797e-09, 3.2749e-09, 5.5431e-09, 8.8364e-09,\n",
       "            1.2108e-08, 2.3603e-09, 9.8692e-10, 1.9904e-09, 6.5969e-09, 2.2340e-09,\n",
       "            5.7107e-09, 5.2102e-09, 9.7817e-08, 8.0810e-08, 3.0668e-09, 2.8976e-09,\n",
       "            1.0511e-09, 1.1921e-08], device='cuda:0')},\n",
       "   158: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-4.2504e-07, -9.9704e-07, -6.9324e-08,  ..., -3.1181e-07,\n",
       "              3.5101e-06,  1.8265e-06],\n",
       "            [ 9.3650e-08,  3.3645e-06, -5.4506e-08,  ...,  2.3951e-07,\n",
       "              2.5365e-06,  2.4965e-06],\n",
       "            [-2.8868e-07, -1.4454e-06,  3.2626e-07,  ..., -2.3753e-07,\n",
       "              6.6496e-07,  1.1822e-06],\n",
       "            ...,\n",
       "            [-2.6531e-07, -4.7046e-06, -3.0393e-07,  ..., -5.1435e-07,\n",
       "             -3.8692e-06, -4.9894e-06],\n",
       "            [-9.7817e-08,  3.2319e-06, -1.1069e-07,  ...,  1.2236e-07,\n",
       "              2.8891e-07,  1.6174e-07],\n",
       "            [-1.2575e-07,  6.3724e-06, -5.7002e-07,  ..., -3.2535e-08,\n",
       "              1.0145e-06,  2.3461e-07]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[1.9430e-12, 3.7889e-10, 3.8875e-12,  ..., 1.1983e-11, 6.9591e-12,\n",
       "             4.8591e-10],\n",
       "            [4.1078e-12, 4.9644e-10, 1.3740e-11,  ..., 4.5548e-13, 7.7343e-12,\n",
       "             1.0747e-09],\n",
       "            [2.1679e-12, 2.0782e-10, 2.1077e-12,  ..., 3.9789e-13, 2.8525e-12,\n",
       "             1.4524e-10],\n",
       "            ...,\n",
       "            [1.5299e-11, 1.1810e-09, 4.5684e-11,  ..., 1.2113e-11, 1.5862e-11,\n",
       "             2.2324e-09],\n",
       "            [1.0250e-12, 2.3391e-10, 4.4663e-12,  ..., 2.5827e-12, 2.3486e-12,\n",
       "             1.9471e-10],\n",
       "            [5.9941e-13, 2.3517e-10, 1.9142e-12,  ..., 1.6824e-12, 5.4182e-12,\n",
       "             3.4326e-10]], device='cuda:0')},\n",
       "   159: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-3.4694e-06, -1.2897e-06,  6.6389e-07,  ...,  1.3934e-06,\n",
       "              5.7050e-07, -1.1492e-08],\n",
       "            [-2.8903e-06, -1.2876e-06, -2.2975e-07,  ...,  2.7131e-06,\n",
       "             -3.8414e-07, -6.5035e-07],\n",
       "            [-9.4580e-07,  5.5055e-08,  9.8122e-07,  ...,  1.2022e-06,\n",
       "              1.5799e-07, -7.4642e-07],\n",
       "            ...,\n",
       "            [ 3.2041e-06,  1.0140e-06, -9.2971e-07,  ..., -3.1115e-06,\n",
       "              6.5818e-07, -3.5980e-08],\n",
       "            [ 3.9165e-07, -1.1724e-07, -9.9573e-07,  ...,  1.1220e-07,\n",
       "             -6.5024e-07, -2.1281e-06],\n",
       "            [-1.1462e-06, -2.0502e-06, -1.2573e-06,  ...,  1.2104e-06,\n",
       "              2.6935e-07, -2.9788e-06]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[2.6757e-10, 1.9032e-11, 1.3282e-10,  ..., 1.5281e-10, 3.3437e-11,\n",
       "             3.4811e-11],\n",
       "            [1.3356e-10, 1.0079e-10, 9.5348e-11,  ..., 1.5272e-10, 3.0442e-11,\n",
       "             5.3131e-11],\n",
       "            [8.4908e-11, 8.0260e-12, 7.1379e-11,  ..., 6.5243e-11, 1.4475e-11,\n",
       "             1.2304e-11],\n",
       "            ...,\n",
       "            [5.1560e-10, 1.2852e-10, 2.9794e-10,  ..., 5.0551e-10, 8.0320e-11,\n",
       "             9.9157e-11],\n",
       "            [9.0971e-11, 9.6999e-12, 9.0367e-11,  ..., 5.5620e-11, 3.0938e-11,\n",
       "             1.5758e-11],\n",
       "            [8.9121e-11, 2.3192e-11, 3.5717e-11,  ..., 6.5184e-11, 9.7229e-12,\n",
       "             5.7122e-11]], device='cuda:0')},\n",
       "   160: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 1.2681e-06,  3.5078e-06,  4.1977e-07,  ...,  4.5101e-07,\n",
       "              1.9889e-06,  2.8367e-06],\n",
       "            [ 1.0057e-06,  4.2719e-06, -1.2882e-08,  ...,  1.0700e-06,\n",
       "             -4.1305e-06, -5.8207e-07],\n",
       "            [-4.4318e-07, -1.5295e-05,  3.7782e-06,  ...,  1.8023e-07,\n",
       "             -2.6321e-06,  5.9940e-06],\n",
       "            ...,\n",
       "            [ 6.9580e-07,  1.2243e-05, -5.4703e-07,  ...,  6.5911e-07,\n",
       "             -2.1619e-06, -1.5070e-06],\n",
       "            [-6.4288e-07, -6.8689e-06, -1.0562e-06,  ..., -1.3229e-06,\n",
       "              5.0965e-07, -1.2010e-06],\n",
       "            [ 2.0220e-06,  3.5104e-06,  2.6811e-06,  ...,  5.8317e-07,\n",
       "             -9.4934e-07, -1.4714e-06]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[4.6687e-11, 5.3778e-09, 1.8172e-10,  ..., 5.9335e-12, 1.2153e-10,\n",
       "             3.8534e-09],\n",
       "            [1.6872e-11, 1.0165e-08, 1.6104e-10,  ..., 1.5543e-11, 1.2318e-10,\n",
       "             4.7528e-09],\n",
       "            [3.2153e-11, 3.7677e-09, 4.0699e-10,  ..., 3.4204e-11, 5.7313e-11,\n",
       "             4.3290e-09],\n",
       "            ...,\n",
       "            [1.4673e-10, 3.0546e-09, 2.1833e-10,  ..., 6.6327e-12, 3.7024e-11,\n",
       "             1.8964e-09],\n",
       "            [3.1573e-11, 9.0224e-10, 7.5616e-11,  ..., 1.0076e-11, 1.3539e-11,\n",
       "             1.0546e-09],\n",
       "            [8.9904e-11, 1.0705e-08, 1.7553e-10,  ..., 2.6902e-11, 2.1254e-10,\n",
       "             5.3762e-09]], device='cuda:0')},\n",
       "   161: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-1.8337e-06, -1.2277e-06,  7.0773e-07,  ...,  6.0228e-07,\n",
       "              9.5534e-07,  1.2947e-06],\n",
       "            [ 9.4019e-06,  3.5761e-06, -4.8482e-06,  ..., -1.3801e-06,\n",
       "              3.0011e-06, -2.4108e-06],\n",
       "            [-6.6227e-07,  8.4552e-06,  6.5875e-06,  ...,  4.4882e-06,\n",
       "             -1.6346e-06,  5.8184e-06],\n",
       "            ...,\n",
       "            [ 8.4856e-06, -1.4138e-06, -5.5338e-06,  ..., -1.9207e-06,\n",
       "              5.7922e-07, -3.3940e-06],\n",
       "            [-1.3038e-06,  4.7092e-09,  3.2996e-06,  ...,  2.5760e-06,\n",
       "              1.3288e-06,  2.4426e-06],\n",
       "            [ 1.0714e-05, -3.8814e-07, -9.0801e-06,  ..., -4.4246e-06,\n",
       "              1.7253e-06, -5.3547e-06]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[1.8888e-09, 2.3620e-10, 1.4771e-09,  ..., 9.3827e-10, 2.9608e-10,\n",
       "             3.2261e-10],\n",
       "            [2.7670e-09, 4.0582e-10, 1.8954e-09,  ..., 1.9825e-09, 3.7236e-10,\n",
       "             7.7698e-10],\n",
       "            [6.8345e-10, 2.7392e-10, 4.1985e-10,  ..., 5.6543e-10, 1.1860e-10,\n",
       "             2.4006e-10],\n",
       "            ...,\n",
       "            [7.6804e-10, 1.5511e-10, 6.3272e-10,  ..., 3.5208e-10, 1.3662e-10,\n",
       "             2.0636e-10],\n",
       "            [1.3846e-10, 8.9865e-11, 1.3627e-10,  ..., 1.3568e-10, 3.9695e-11,\n",
       "             8.2688e-11],\n",
       "            [4.0324e-09, 3.7394e-10, 3.7460e-09,  ..., 2.2866e-09, 8.9975e-10,\n",
       "             5.8246e-10]], device='cuda:0')},\n",
       "   162: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-5.2671e-06,  2.8772e-05, -4.5941e-06,  ..., -6.5752e-06,\n",
       "              7.7304e-06,  1.0242e-05],\n",
       "            [ 3.3258e-06, -7.1183e-05, -3.5854e-06,  ..., -6.2584e-08,\n",
       "             -1.1250e-06, -2.1434e-05],\n",
       "            [ 4.3147e-07, -5.3730e-05, -1.4873e-05,  ..., -5.5554e-06,\n",
       "             -3.3994e-05, -6.6232e-05],\n",
       "            ...,\n",
       "            [-2.4059e-06, -1.5275e-06,  2.7173e-06,  ..., -3.0540e-06,\n",
       "             -7.9345e-06, -1.4236e-05],\n",
       "            [ 3.3869e-06,  2.7057e-05,  7.9790e-06,  ...,  1.3482e-06,\n",
       "              1.4198e-05,  2.2134e-05],\n",
       "            [ 3.8655e-06,  1.2326e-04,  7.8496e-06,  ...,  4.7475e-06,\n",
       "              2.6779e-05,  6.6005e-05]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[4.3980e-10, 3.3405e-08, 1.0405e-09,  ..., 4.1256e-11, 5.6264e-10,\n",
       "             2.9607e-08],\n",
       "            [1.0280e-09, 1.2147e-07, 1.2884e-09,  ..., 1.8617e-10, 1.3750e-09,\n",
       "             6.7349e-08],\n",
       "            [2.3840e-09, 1.1698e-07, 4.9889e-09,  ..., 1.3894e-09, 1.2291e-09,\n",
       "             1.7768e-07],\n",
       "            ...,\n",
       "            [8.3367e-10, 1.0790e-07, 4.1808e-09,  ..., 3.3017e-10, 8.1354e-10,\n",
       "             1.3137e-07],\n",
       "            [2.4243e-09, 3.5656e-08, 4.4539e-09,  ..., 2.2287e-10, 5.7050e-10,\n",
       "             9.3856e-08],\n",
       "            [1.5416e-09, 2.1234e-07, 4.3375e-09,  ..., 1.1722e-09, 2.3448e-09,\n",
       "             1.8422e-07]], device='cuda:0')},\n",
       "   163: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-1.6726e-06, -3.7937e-06,  4.9347e-06,  ...,  1.1903e-06,\n",
       "             -5.8489e-07, -6.3137e-07],\n",
       "            [-2.9790e-06, -5.1233e-07, -3.6419e-06,  ..., -1.1293e-05,\n",
       "              8.5104e-06, -1.5348e-06],\n",
       "            [ 1.0051e-05,  9.0645e-06, -8.2063e-06,  ..., -2.0250e-05,\n",
       "              8.2527e-06,  7.8133e-06],\n",
       "            ...,\n",
       "            [ 4.4361e-06,  9.1726e-07, -4.3491e-06,  ..., -9.4357e-06,\n",
       "             -1.6351e-06, -1.6378e-06],\n",
       "            [-7.7125e-06, -2.9951e-06,  4.9278e-06,  ...,  8.6053e-06,\n",
       "             -1.7923e-06,  1.0125e-06],\n",
       "            [-1.3908e-06, -1.6694e-05,  4.2595e-06,  ...,  6.2309e-06,\n",
       "             -4.0941e-06, -1.3350e-05]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[1.0732e-09, 6.8264e-10, 1.7367e-09,  ..., 1.4653e-09, 2.7151e-10,\n",
       "             4.3134e-10],\n",
       "            [3.5282e-09, 1.3955e-09, 5.5507e-09,  ..., 6.8076e-09, 7.6836e-10,\n",
       "             1.0034e-09],\n",
       "            [4.6316e-09, 2.4992e-09, 4.3058e-09,  ..., 7.6005e-09, 8.9767e-10,\n",
       "             2.0088e-09],\n",
       "            ...,\n",
       "            [2.3031e-09, 2.3131e-09, 1.9101e-09,  ..., 2.6770e-09, 6.8162e-10,\n",
       "             8.8509e-10],\n",
       "            [1.0885e-09, 1.3810e-09, 1.3563e-09,  ..., 1.5310e-09, 5.7251e-10,\n",
       "             3.9784e-10],\n",
       "            [6.5761e-09, 3.2749e-09, 1.1024e-08,  ..., 9.9312e-09, 1.9913e-09,\n",
       "             2.2069e-09]], device='cuda:0')},\n",
       "   164: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([ 8.2772e-06, -4.6007e-06, -1.5688e-07, -4.8212e-06,  3.6170e-06,\n",
       "            -1.1186e-07, -5.8807e-06,  2.4548e-06,  2.4354e-06,  2.9712e-06,\n",
       "             3.5199e-06,  8.9509e-07,  1.3956e-06,  1.0517e-05, -2.0291e-06,\n",
       "             8.8016e-06,  4.5305e-06, -7.0984e-07, -4.5427e-06, -1.1281e-06,\n",
       "            -4.2950e-07, -3.8355e-07,  5.6076e-06, -2.3408e-07, -4.7170e-09,\n",
       "            -3.2213e-07, -6.4262e-06,  5.5302e-06,  4.9952e-06,  9.5598e-06,\n",
       "             3.5131e-07,  1.5671e-06,  1.8038e-06,  9.1614e-07,  2.1117e-06,\n",
       "            -1.7503e-06, -8.3018e-07,  9.6301e-07,  6.6974e-06, -4.2634e-06,\n",
       "            -2.7772e-08,  3.5707e-07,  3.4630e-06,  4.4940e-06, -6.7618e-06,\n",
       "            -3.0868e-06,  4.1016e-06,  8.8568e-06, -8.7813e-07, -2.5991e-06,\n",
       "             2.0382e-06, -6.2343e-06, -9.5226e-06,  2.0597e-06,  6.9665e-06,\n",
       "             3.0477e-07, -4.0301e-06,  1.8504e-06,  9.8585e-07, -7.3870e-06,\n",
       "            -3.2279e-06, -1.1229e-06,  8.3279e-06, -1.8849e-06, -4.4305e-07,\n",
       "            -1.9989e-06, -1.5206e-07,  1.2472e-05, -4.8434e-06, -1.9833e-06,\n",
       "            -5.0054e-06, -1.0143e-05,  7.1142e-07,  3.0241e-07, -4.8044e-06,\n",
       "            -1.4263e-06, -4.8302e-06,  2.9701e-06,  5.9236e-06,  1.0838e-06,\n",
       "             4.8214e-06,  5.0189e-06,  3.4123e-06, -1.9996e-07,  8.5700e-06,\n",
       "            -7.8797e-06,  5.1678e-06, -5.8951e-06,  6.9109e-06, -1.0332e-05,\n",
       "            -2.3193e-06, -3.6352e-06,  3.6314e-06, -4.9480e-06, -9.8203e-06,\n",
       "             3.1948e-06, -9.5230e-06,  6.3011e-06,  4.1956e-06,  3.3805e-06,\n",
       "            -5.7701e-07,  6.0715e-06, -6.4852e-07,  1.0666e-05,  4.1885e-08,\n",
       "             6.4771e-06,  3.5051e-05, -7.8287e-08,  2.1849e-06, -1.3790e-05,\n",
       "             1.0990e-07, -1.3332e-07, -5.4297e-06, -4.2673e-06, -4.7657e-06,\n",
       "            -2.0114e-06,  5.7990e-06,  3.2264e-06,  1.6898e-06,  3.7083e-06,\n",
       "             2.5577e-06, -9.6283e-06, -9.6209e-06, -6.5403e-06,  1.0616e-06,\n",
       "             2.7644e-06, -4.3530e-06,  4.8864e-06], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([6.1303e-10, 8.8608e-10, 6.0180e-11, 4.7404e-09, 8.5880e-10, 2.7157e-10,\n",
       "            7.9766e-10, 3.7374e-10, 7.0859e-11, 1.0242e-10, 1.8972e-11, 1.7132e-10,\n",
       "            9.7676e-11, 1.3675e-09, 6.2336e-11, 3.7245e-10, 6.0918e-11, 3.9829e-10,\n",
       "            4.9537e-10, 9.2391e-11, 2.0841e-10, 4.4127e-11, 2.4441e-10, 1.9965e-11,\n",
       "            7.7363e-10, 3.2363e-09, 1.3834e-09, 2.8774e-10, 1.9153e-09, 1.3380e-09,\n",
       "            3.2526e-11, 2.1002e-11, 2.6427e-10, 2.3298e-09, 6.3391e-09, 4.7420e-10,\n",
       "            2.8290e-11, 2.9337e-10, 1.4087e-09, 2.1661e-10, 4.2264e-10, 1.4980e-11,\n",
       "            8.8811e-10, 7.2645e-09, 3.3635e-10, 5.1967e-11, 1.0551e-09, 7.0391e-10,\n",
       "            1.9729e-11, 3.4247e-11, 2.2294e-10, 2.8477e-09, 1.0053e-09, 5.7900e-10,\n",
       "            1.9076e-10, 2.1370e-10, 1.4832e-10, 3.8283e-11, 1.1436e-09, 8.4083e-10,\n",
       "            2.6936e-10, 7.0084e-11, 2.2291e-09, 6.4832e-11, 1.7313e-10, 3.7278e-11,\n",
       "            1.1823e-09, 1.8399e-09, 3.1050e-10, 1.5420e-09, 1.3030e-09, 6.2710e-10,\n",
       "            1.2964e-09, 2.2300e-10, 2.8699e-09, 3.1840e-11, 7.9386e-10, 3.9073e-10,\n",
       "            1.6316e-10, 2.4315e-09, 2.6100e-10, 1.1656e-09, 1.2805e-09, 2.8405e-09,\n",
       "            1.2248e-09, 1.2724e-09, 8.8528e-10, 7.3789e-10, 9.4673e-10, 8.6662e-10,\n",
       "            2.8729e-09, 8.4261e-11, 1.1989e-09, 1.9780e-09, 1.6920e-09, 7.7397e-10,\n",
       "            4.6768e-09, 1.0603e-09, 5.2182e-10, 2.1532e-10, 2.6893e-10, 1.0011e-09,\n",
       "            4.5306e-11, 1.7684e-09, 1.9884e-09, 1.6641e-09, 3.6571e-08, 9.8839e-11,\n",
       "            1.6590e-10, 2.6594e-09, 3.3381e-10, 1.4267e-10, 3.3576e-09, 6.7039e-11,\n",
       "            1.2623e-09, 2.6305e-10, 8.0724e-10, 2.0741e-09, 1.0013e-10, 6.6198e-11,\n",
       "            5.0784e-10, 4.5865e-09, 1.2310e-09, 1.0816e-10, 1.8129e-09, 1.6130e-09,\n",
       "            6.0571e-09, 7.2409e-10], device='cuda:0')},\n",
       "   165: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([-5.1900e-06,  5.2407e-06, -5.7049e-07,  5.3538e-06, -4.5457e-06,\n",
       "            -1.2975e-06,  2.9090e-06, -6.4815e-06,  5.9107e-06, -2.6187e-06,\n",
       "            -1.1012e-06, -1.0097e-06, -4.0724e-06, -1.0042e-06, -1.2860e-06,\n",
       "            -8.0910e-06, -4.5205e-06, -1.0507e-06,  9.2965e-07,  3.0214e-06,\n",
       "            -1.1151e-06,  3.1203e-06,  5.8745e-06,  1.2070e-06,  1.2204e-06,\n",
       "            -7.8657e-07,  3.1783e-06,  5.4412e-06, -3.6293e-06, -6.6302e-06,\n",
       "            -2.1094e-06, -4.6031e-06,  7.8729e-06, -1.2610e-06,  7.6178e-07,\n",
       "             1.9701e-06,  6.5823e-07, -3.6870e-08, -2.0614e-06, -4.7290e-06,\n",
       "            -5.7902e-06,  3.4489e-06, -9.8547e-06, -1.4815e-06,  1.3980e-07,\n",
       "            -2.4991e-06, -5.2271e-06,  1.4346e-06, -1.9668e-06, -3.8303e-06,\n",
       "            -2.7399e-06, -2.9966e-06,  3.7503e-06,  2.8565e-06, -3.5021e-06,\n",
       "            -6.8708e-06, -2.1930e-07,  3.6487e-06, -3.0137e-06,  1.1785e-06,\n",
       "             3.8121e-06,  1.8324e-07, -2.2584e-06,  8.8681e-06,  8.3273e-06,\n",
       "            -1.4548e-06,  4.8576e-07,  1.7844e-06,  2.5554e-06,  8.7588e-07,\n",
       "             2.0407e-06,  6.0342e-07,  3.8776e-06,  3.3145e-06, -2.7149e-06,\n",
       "            -3.5533e-06, -1.8311e-06,  8.3134e-07, -3.6948e-06,  1.8027e-06,\n",
       "             5.8449e-06,  2.1299e-06, -3.5361e-06, -1.0317e-06,  5.2479e-06,\n",
       "            -3.9763e-07, -4.3079e-06, -4.1523e-06, -3.3106e-06,  2.7559e-06,\n",
       "             3.5379e-06, -9.5180e-07, -5.3631e-06, -2.5963e-06,  4.7342e-06,\n",
       "            -3.6036e-06,  9.4695e-07, -1.0476e-06,  1.1689e-05,  4.8556e-06,\n",
       "             3.0047e-06, -4.5216e-06,  5.4867e-06,  6.1949e-06,  4.4392e-06,\n",
       "             1.8128e-06,  4.4522e-06,  3.3510e-06, -2.8989e-06,  3.0832e-06,\n",
       "             2.8081e-07,  3.8800e-06, -2.8902e-07,  3.6274e-06,  2.0163e-06,\n",
       "             3.5741e-06, -4.8117e-06, -8.4294e-07, -6.7215e-06, -3.3292e-06,\n",
       "             7.6822e-07, -9.5292e-07,  5.2202e-07, -5.4984e-07,  7.3602e-07,\n",
       "            -1.9386e-06, -2.5082e-06, -1.9344e-06], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([2.4146e-10, 2.3275e-09, 5.6044e-11, 3.9770e-09, 4.9795e-10, 2.2243e-10,\n",
       "            1.2841e-09, 2.7832e-10, 8.7939e-11, 7.3145e-11, 7.8239e-11, 1.0285e-10,\n",
       "            1.6693e-10, 1.4330e-09, 2.3057e-10, 5.0321e-10, 1.1667e-10, 6.5183e-10,\n",
       "            6.9978e-10, 1.8086e-10, 4.0367e-10, 1.8726e-10, 3.1524e-10, 6.6543e-11,\n",
       "            1.6853e-09, 3.5761e-09, 8.5693e-10, 3.5447e-10, 2.4592e-09, 1.1178e-09,\n",
       "            2.8606e-10, 1.0358e-10, 6.9669e-10, 3.5506e-09, 4.1294e-09, 1.0875e-09,\n",
       "            1.7335e-10, 1.7677e-10, 2.7231e-09, 7.6488e-10, 1.5522e-09, 9.6774e-11,\n",
       "            2.6563e-09, 2.7813e-09, 5.6782e-10, 1.6397e-10, 6.1681e-10, 8.6958e-10,\n",
       "            6.8826e-11, 9.6195e-11, 5.3379e-10, 2.4772e-09, 1.6348e-09, 1.1257e-09,\n",
       "            1.6831e-10, 5.6491e-10, 4.6406e-10, 1.0749e-10, 1.0419e-09, 9.5442e-10,\n",
       "            5.2789e-10, 9.1566e-11, 3.4095e-09, 1.3017e-10, 2.8471e-10, 4.9295e-11,\n",
       "            1.2328e-09, 1.5961e-09, 7.0781e-10, 2.2468e-09, 1.4766e-09, 8.5064e-10,\n",
       "            1.8325e-09, 6.5336e-10, 3.0653e-09, 1.7296e-10, 9.5292e-10, 4.0900e-10,\n",
       "            2.0713e-10, 3.4035e-09, 2.1933e-10, 1.7659e-09, 2.3359e-09, 3.2202e-09,\n",
       "            2.4057e-09, 1.2318e-09, 6.9740e-10, 1.9320e-09, 1.1649e-09, 2.1948e-09,\n",
       "            3.5739e-09, 2.2225e-10, 3.2444e-09, 2.1665e-09, 2.1691e-09, 5.0344e-10,\n",
       "            2.9780e-09, 3.7593e-09, 6.1884e-10, 3.0137e-10, 4.1042e-10, 1.1404e-09,\n",
       "            1.7929e-10, 2.8470e-09, 3.1424e-09, 8.8864e-10, 3.2368e-09, 1.9499e-10,\n",
       "            1.2802e-10, 3.3778e-09, 2.6293e-10, 2.5579e-10, 4.1062e-09, 1.0540e-10,\n",
       "            1.0674e-09, 1.4133e-09, 2.7569e-09, 2.2673e-09, 3.3561e-10, 3.5522e-10,\n",
       "            8.1061e-10, 3.5003e-09, 2.5097e-09, 2.4642e-10, 2.8081e-09, 2.6294e-09,\n",
       "            4.1212e-09, 9.0080e-10], device='cuda:0')},\n",
       "   166: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([-7.3246e-05,  3.8795e-06, -1.5695e-05, -4.8091e-05, -2.2781e-05,\n",
       "            -4.7969e-05, -4.8696e-05, -1.7820e-05, -1.7166e-05, -3.7316e-05,\n",
       "            -1.5906e-05,  1.6790e-06, -2.3564e-05, -1.4761e-05, -1.7514e-05,\n",
       "            -2.5450e-05, -1.9601e-05, -2.6500e-06, -5.8982e-06, -4.4622e-06,\n",
       "             3.3453e-05, -1.0349e-05, -2.0403e-05,  4.2800e-06, -3.0867e-05,\n",
       "            -5.1780e-05, -2.9943e-05,  1.4347e-05, -2.3274e-05, -1.7763e-05,\n",
       "            -3.4676e-06, -5.0388e-07, -5.3025e-07, -5.4394e-06, -4.2512e-05,\n",
       "            -3.5331e-05, -4.6675e-06, -2.4935e-05, -3.6923e-05,  1.3637e-05,\n",
       "            -5.9015e-06,  4.3157e-05,  6.7590e-06, -4.8317e-05, -2.2781e-05,\n",
       "             5.3527e-07, -2.4747e-05,  8.9345e-06,  2.4389e-05,  3.4001e-05,\n",
       "             8.3471e-06, -5.3395e-06,  2.3767e-07, -4.8912e-06, -4.8410e-06,\n",
       "            -9.7460e-06,  2.3205e-05, -4.5779e-06,  2.3893e-05, -4.0770e-05,\n",
       "            -1.7309e-05, -1.4087e-05,  3.4351e-06, -2.0048e-05, -1.2462e-05,\n",
       "            -2.7342e-05, -4.3628e-05, -3.4976e-05, -2.6435e-05, -3.2327e-05,\n",
       "            -7.3009e-05,  2.3422e-05, -5.7374e-06, -1.3297e-06, -6.9538e-05,\n",
       "            -1.0436e-06,  2.4389e-06, -7.8031e-05, -3.6128e-05, -9.5249e-06,\n",
       "            -1.8989e-05, -9.5620e-06, -3.4519e-05, -3.6869e-05, -2.0402e-05,\n",
       "             9.8465e-06,  3.1479e-05,  5.5087e-06, -1.1301e-05,  4.8802e-06,\n",
       "            -3.8358e-05,  1.4729e-05,  1.4774e-05,  8.2664e-05, -1.2029e-05,\n",
       "             2.1754e-05,  1.4551e-05, -1.0856e-05, -2.1521e-05,  5.0958e-06,\n",
       "             7.7273e-06, -2.1214e-05, -5.3201e-07, -3.5255e-05, -7.1326e-06,\n",
       "             1.6428e-05, -1.4959e-04, -3.9081e-05,  1.3085e-05, -1.4524e-05,\n",
       "            -5.0961e-05, -8.5828e-05,  9.1874e-06, -1.0551e-05, -3.0797e-05,\n",
       "             1.3673e-06,  8.0430e-06, -5.6108e-07, -1.1161e-05, -2.3451e-05,\n",
       "             2.9030e-06,  5.4785e-05, -2.7538e-05, -2.9237e-05, -2.2684e-05,\n",
       "            -2.4800e-05,  1.0331e-05,  1.0058e-06], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([3.2588e-08, 6.3967e-09, 1.3409e-08, 5.5315e-08, 1.2477e-08, 2.5842e-08,\n",
       "            4.7360e-08, 6.9581e-09, 5.1557e-09, 6.6098e-09, 3.8717e-09, 7.5341e-09,\n",
       "            5.4860e-09, 4.7863e-09, 4.1679e-09, 1.5403e-08, 1.5680e-08, 9.7569e-09,\n",
       "            7.0909e-08, 2.0915e-08, 3.6926e-09, 9.9588e-09, 1.8499e-08, 1.3486e-08,\n",
       "            5.2247e-09, 5.4509e-08, 3.4791e-08, 7.1000e-09, 3.6303e-08, 1.7333e-08,\n",
       "            1.3136e-08, 7.7357e-09, 9.0524e-09, 3.8407e-09, 1.9042e-08, 5.5252e-09,\n",
       "            2.6217e-09, 2.2579e-08, 2.9819e-08, 6.4766e-09, 5.7125e-09, 1.0234e-08,\n",
       "            5.3122e-09, 1.1377e-08, 9.1491e-09, 1.5525e-09, 1.6243e-08, 1.7491e-08,\n",
       "            3.1333e-08, 6.1187e-09, 1.9331e-08, 1.4298e-08, 5.4348e-09, 3.8478e-08,\n",
       "            2.4996e-08, 1.4249e-08, 8.9035e-09, 5.5934e-09, 8.6957e-09, 3.1231e-08,\n",
       "            5.9213e-09, 9.0602e-09, 1.9121e-08, 8.6602e-09, 1.3609e-08, 3.0126e-08,\n",
       "            5.0042e-08, 2.0478e-08, 8.0279e-09, 8.8612e-09, 1.1423e-07, 2.9184e-08,\n",
       "            5.8763e-09, 5.6694e-09, 1.1489e-08, 1.3766e-09, 5.6527e-09, 3.6364e-08,\n",
       "            9.6330e-09, 9.5776e-09, 1.1765e-08, 1.3064e-08, 6.7685e-09, 8.9586e-09,\n",
       "            5.5803e-09, 1.1496e-08, 1.2817e-08, 6.8903e-09, 1.2308e-08, 3.7478e-09,\n",
       "            6.7777e-08, 1.0881e-08, 7.3727e-09, 2.2409e-08, 2.6315e-08, 2.0017e-08,\n",
       "            2.7553e-08, 9.6903e-09, 5.4846e-09, 4.0743e-09, 2.1682e-09, 2.2321e-08,\n",
       "            7.1761e-09, 6.0304e-09, 1.9158e-08, 7.7243e-09, 6.4630e-07, 1.8008e-08,\n",
       "            7.7944e-09, 2.0530e-08, 9.4763e-09, 1.4308e-08, 2.2956e-09, 2.0157e-08,\n",
       "            3.6435e-08, 3.3598e-09, 3.1458e-09, 7.0174e-09, 1.1898e-08, 4.1727e-09,\n",
       "            6.8529e-09, 4.1079e-08, 3.5966e-08, 9.1809e-09, 9.8079e-09, 2.4328e-08,\n",
       "            6.8306e-09, 5.4117e-09], device='cuda:0')},\n",
       "   167: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([ 4.7179e-06,  4.6078e-05,  1.0863e-05, -5.5689e-05,  1.2899e-05,\n",
       "             2.1175e-05,  5.6436e-05,  2.3105e-05,  4.6921e-06,  5.1035e-05,\n",
       "             5.2853e-06, -8.2204e-06, -2.0593e-05, -3.6971e-05, -2.9894e-05,\n",
       "             4.3907e-05,  2.7434e-05, -1.8580e-05,  1.6892e-05, -1.8250e-05,\n",
       "            -2.0533e-07,  2.9818e-05, -4.4116e-05,  4.5290e-05, -2.8793e-05,\n",
       "             4.1748e-05, -9.3424e-05, -4.0348e-05,  2.2500e-05, -1.4169e-05,\n",
       "             1.1332e-05,  3.0103e-05,  3.9077e-05, -1.2133e-05, -3.4095e-05,\n",
       "            -3.9061e-06,  1.4546e-05, -8.1130e-05,  7.5173e-05,  3.2482e-05,\n",
       "            -1.5744e-05,  3.5152e-05,  3.0881e-05,  3.9590e-05,  3.5610e-05,\n",
       "             7.4565e-06, -3.3899e-05,  5.1077e-06, -4.2994e-05,  4.5021e-05,\n",
       "             5.1545e-05, -6.3096e-06, -7.6598e-06, -4.0838e-05, -1.2699e-05,\n",
       "             8.3369e-06, -3.1437e-05, -2.0315e-05,  8.5605e-05, -3.1098e-05,\n",
       "             6.3266e-07, -8.5485e-06, -2.0194e-05,  6.7449e-06,  3.2138e-05,\n",
       "            -6.1793e-05, -5.7800e-05,  3.0968e-05, -6.0118e-05, -2.4991e-05,\n",
       "            -8.8833e-05, -3.0434e-05, -1.5404e-05, -4.9889e-06,  2.6297e-05,\n",
       "             6.1250e-06,  9.8978e-06, -7.7304e-05,  4.6557e-05, -1.7975e-05,\n",
       "            -3.0570e-05, -3.0346e-05,  3.1699e-05, -4.3682e-05, -2.4852e-05,\n",
       "            -8.0463e-06,  4.4528e-05,  8.3182e-06,  5.8398e-06,  2.8380e-05,\n",
       "            -2.9925e-05,  3.7621e-05, -3.0999e-06, -9.3226e-05, -3.4037e-05,\n",
       "             2.4391e-05,  4.4757e-06,  4.4888e-05, -3.5769e-05, -3.0948e-05,\n",
       "            -1.4457e-05,  7.0056e-05,  2.9105e-05, -2.3409e-05, -1.4020e-05,\n",
       "            -2.6839e-05, -2.2074e-05, -8.1111e-06,  2.1230e-05,  1.1478e-06,\n",
       "             1.1640e-05,  1.1104e-05, -1.8995e-05,  2.0694e-05,  3.3266e-05,\n",
       "             1.8844e-05, -7.9184e-06, -3.6533e-05, -6.4374e-05, -3.0940e-05,\n",
       "             1.5625e-05, -4.3057e-05, -6.1195e-05,  4.5728e-07, -3.2090e-05,\n",
       "            -8.3882e-06,  2.8236e-05, -1.9622e-05], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([1.9194e-08, 1.5320e-08, 1.6573e-08, 3.6748e-08, 7.3888e-09, 2.6553e-08,\n",
       "            3.6740e-08, 1.0957e-08, 9.9230e-09, 1.4182e-08, 8.3875e-09, 8.6136e-09,\n",
       "            1.0598e-08, 1.5928e-08, 1.0265e-08, 1.5907e-08, 2.6487e-08, 2.6688e-08,\n",
       "            3.7628e-08, 2.6164e-08, 7.0718e-09, 8.8606e-09, 2.9855e-08, 2.7378e-08,\n",
       "            1.6468e-08, 3.4442e-08, 7.4338e-08, 2.2319e-08, 2.4750e-08, 2.0953e-08,\n",
       "            1.1600e-08, 1.7775e-08, 1.6612e-08, 1.2247e-08, 1.7820e-08, 7.9797e-09,\n",
       "            5.8869e-09, 3.3050e-08, 4.8447e-08, 1.1159e-08, 1.4745e-08, 2.2895e-08,\n",
       "            1.2559e-08, 1.4342e-08, 1.0861e-08, 1.0542e-08, 1.1602e-08, 1.0569e-08,\n",
       "            3.2959e-08, 1.3601e-08, 2.6592e-08, 1.0615e-08, 1.3971e-08, 3.8336e-08,\n",
       "            2.5957e-08, 1.1148e-08, 6.3795e-09, 9.4192e-09, 2.2872e-08, 2.4540e-08,\n",
       "            1.5513e-08, 1.7122e-08, 1.0821e-08, 1.3759e-08, 3.5218e-08, 2.9749e-08,\n",
       "            3.2802e-08, 2.0305e-08, 1.9669e-08, 1.1701e-08, 6.4421e-08, 1.4891e-08,\n",
       "            1.4611e-08, 8.7607e-09, 1.7190e-08, 6.3001e-09, 9.4528e-09, 4.3117e-08,\n",
       "            2.0278e-08, 1.4876e-08, 1.4357e-08, 2.3006e-08, 5.3137e-09, 8.2340e-09,\n",
       "            1.0337e-08, 7.7662e-09, 3.6807e-08, 1.0568e-08, 9.6712e-09, 7.6832e-09,\n",
       "            3.8636e-08, 2.3054e-08, 1.3183e-08, 3.1050e-08, 2.5259e-08, 1.1178e-08,\n",
       "            1.5488e-08, 1.9691e-08, 1.2222e-08, 5.1689e-09, 6.1768e-09, 3.4537e-08,\n",
       "            1.7280e-08, 1.1983e-08, 1.9251e-08, 1.1345e-08, 3.3522e-08, 2.2686e-08,\n",
       "            1.1177e-08, 2.8382e-08, 1.8026e-08, 2.9430e-08, 9.7167e-09, 2.4667e-08,\n",
       "            2.8725e-08, 8.6500e-09, 7.9744e-09, 1.0390e-08, 1.9553e-08, 7.4409e-09,\n",
       "            1.0307e-08, 1.9949e-08, 4.0672e-08, 1.7879e-08, 1.4719e-08, 2.4848e-08,\n",
       "            2.1707e-08, 1.0366e-08], device='cuda:0')},\n",
       "   168: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([ 1.0815e-06, -9.4263e-07,  4.8990e-05,  8.2921e-05,  1.5443e-05,\n",
       "             2.4508e-05,  3.6373e-06,  5.3848e-05,  2.8767e-05,  1.3714e-05,\n",
       "             1.3146e-05,  9.8466e-05,  9.1648e-05,  6.8420e-05,  1.4457e-04,\n",
       "            -1.3122e-05,  1.9321e-05,  2.0769e-05,  1.8091e-05, -5.3490e-05,\n",
       "            -8.1902e-05, -1.7829e-05, -2.5392e-05, -1.2970e-05,  1.8548e-05,\n",
       "             4.8698e-06,  4.5954e-05, -2.7704e-05, -2.3047e-06, -2.7473e-05,\n",
       "             3.0455e-05,  1.7068e-05, -5.7817e-05,  1.1577e-06,  2.8570e-05,\n",
       "             3.1928e-06, -5.3933e-07,  2.2674e-05, -1.1806e-05, -1.4892e-05,\n",
       "             4.7588e-05, -9.5208e-06, -4.1641e-07,  5.6775e-05,  8.0208e-05,\n",
       "            -1.0209e-05, -3.1124e-05, -1.4604e-05, -5.1821e-06,  1.1973e-05,\n",
       "            -1.5975e-05,  2.9170e-05,  7.0201e-05,  2.3576e-05, -3.5863e-06,\n",
       "            -9.3773e-05,  3.2818e-05,  1.6862e-05, -9.7732e-06, -2.2546e-05,\n",
       "            -4.3567e-06, -4.7331e-06,  3.0136e-05,  5.7601e-06, -1.0548e-05,\n",
       "            -1.0358e-05, -2.7012e-05,  5.2056e-05, -6.3598e-05,  3.0502e-05,\n",
       "            -3.1546e-05,  6.3667e-05,  3.5048e-05,  9.0620e-06, -4.9564e-05,\n",
       "             1.5967e-04,  5.6300e-05,  7.2399e-05,  6.0155e-05,  8.1075e-05,\n",
       "            -2.9088e-05,  8.8467e-05, -4.0940e-05,  1.1211e-06, -9.3071e-07,\n",
       "            -6.6008e-05,  9.3551e-06,  3.2784e-05, -6.4002e-05,  1.5261e-05,\n",
       "            -4.6946e-05, -2.3781e-05, -5.2177e-06,  5.4141e-05,  6.9023e-05,\n",
       "             4.4462e-05,  2.9813e-05, -5.6174e-06, -4.0293e-05,  2.1940e-05,\n",
       "            -4.0977e-06, -2.8275e-05, -1.0287e-05, -5.2717e-05,  1.1187e-05,\n",
       "            -5.9466e-05,  9.5842e-05, -2.6607e-05, -1.6227e-05, -3.5297e-05,\n",
       "            -1.1365e-05,  1.0803e-05, -7.3395e-06, -2.5462e-05, -4.0608e-06,\n",
       "            -6.6952e-06, -1.1617e-05, -2.8311e-05, -3.8014e-05, -8.1045e-05,\n",
       "            -5.5729e-05,  1.9889e-06, -1.6663e-04,  2.8447e-05, -4.6689e-06,\n",
       "            -9.5134e-06,  2.2425e-05, -5.9789e-06], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([9.3363e-08, 5.1982e-09, 1.7579e-08, 3.3698e-08, 2.2520e-08, 2.5449e-08,\n",
       "            3.3474e-08, 2.1469e-08, 1.3785e-08, 2.7540e-08, 1.9292e-08, 4.7525e-08,\n",
       "            8.7705e-08, 2.3341e-08, 1.1987e-07, 1.0318e-07, 2.1465e-08, 2.6235e-08,\n",
       "            2.8893e-08, 5.0836e-08, 4.4981e-08, 4.3324e-08, 8.4429e-08, 1.6356e-08,\n",
       "            1.1871e-08, 8.6425e-09, 1.7373e-08, 5.0212e-08, 4.7514e-09, 2.9921e-08,\n",
       "            1.2360e-08, 3.6973e-09, 2.2212e-08, 4.4201e-09, 3.0318e-08, 8.3177e-09,\n",
       "            7.3391e-10, 2.2091e-08, 1.5776e-09, 1.6686e-09, 2.4296e-08, 7.0486e-10,\n",
       "            8.0303e-09, 1.8162e-08, 1.2882e-08, 6.7588e-10, 5.1432e-09, 9.0563e-09,\n",
       "            2.5550e-09, 2.1767e-09, 2.8941e-09, 9.5921e-09, 4.2100e-08, 2.5723e-09,\n",
       "            1.4858e-08, 4.8582e-08, 1.0355e-08, 4.1050e-09, 5.2740e-09, 1.3388e-09,\n",
       "            9.5391e-09, 4.4237e-09, 8.0376e-09, 9.6505e-09, 4.1970e-09, 9.5249e-09,\n",
       "            5.5074e-09, 2.4189e-08, 6.2182e-08, 1.3931e-08, 2.1527e-08, 1.9808e-08,\n",
       "            4.4303e-09, 6.6744e-09, 3.2198e-08, 3.0495e-08, 5.5045e-08, 1.7994e-08,\n",
       "            3.7055e-08, 1.2193e-07, 8.1335e-08, 2.2509e-08, 3.7884e-08, 2.9215e-08,\n",
       "            1.3583e-08, 2.9219e-08, 4.7947e-08, 8.1231e-08, 8.3372e-08, 1.2426e-07,\n",
       "            9.7340e-09, 1.8693e-08, 4.0022e-09, 5.6960e-08, 1.0714e-08, 1.0216e-08,\n",
       "            8.0764e-09, 3.0293e-09, 2.7407e-08, 9.4085e-09, 7.0624e-09, 1.3477e-08,\n",
       "            6.6286e-09, 2.6219e-08, 1.0565e-08, 5.2589e-08, 1.2779e-06, 4.2783e-08,\n",
       "            4.2483e-08, 5.2151e-08, 8.5675e-09, 1.5345e-08, 2.5108e-08, 8.4437e-09,\n",
       "            1.8566e-08, 1.2492e-08, 7.4819e-09, 4.0260e-08, 8.3843e-09, 4.6301e-08,\n",
       "            7.1409e-09, 1.5015e-08, 1.1610e-07, 3.1499e-08, 4.9638e-09, 3.7159e-08,\n",
       "            3.8044e-09, 1.5961e-08], device='cuda:0')},\n",
       "   169: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([ 1.4750e-04, -6.0159e-05, -1.2280e-04, -4.2349e-06,  5.9212e-05,\n",
       "             8.7814e-05,  8.8782e-06, -8.1645e-06,  5.6894e-05, -1.9916e-05,\n",
       "             4.3144e-05, -7.4385e-05,  1.9659e-04, -1.0530e-04, -1.1509e-04,\n",
       "             3.9413e-05,  1.1159e-04,  2.4648e-05, -2.9918e-06,  3.7111e-05,\n",
       "             6.9137e-05, -7.1113e-06, -2.4900e-05, -3.3800e-05, -3.0853e-05,\n",
       "             5.3511e-05,  5.1654e-05, -1.2127e-05, -5.3485e-05, -1.8706e-05,\n",
       "             4.0417e-05,  2.2138e-05, -5.9050e-05, -3.5904e-05,  5.6753e-05,\n",
       "             8.9855e-06,  2.6317e-05, -4.2976e-05, -7.5856e-06, -2.2519e-05,\n",
       "            -1.0755e-04, -2.2803e-05, -3.9249e-05, -6.3640e-05, -7.5113e-05,\n",
       "            -3.7806e-05,  7.8385e-05,  1.1704e-05,  3.3898e-05, -8.6157e-06,\n",
       "             1.1475e-05, -2.3133e-05, -6.9536e-05,  4.8843e-05,  3.8523e-05,\n",
       "             1.1601e-04, -3.5386e-05,  2.5807e-06,  1.1176e-04, -4.1276e-05,\n",
       "            -3.2574e-05,  4.4715e-05, -5.0895e-05, -7.6924e-05,  3.0408e-05,\n",
       "            -1.0999e-04,  3.5883e-05, -7.0834e-05,  1.1008e-04,  6.0917e-05,\n",
       "             3.0943e-05, -4.3715e-05,  4.5391e-05,  1.8075e-05,  3.6779e-05,\n",
       "            -1.2973e-04, -5.7673e-06,  5.4503e-05,  1.3574e-05, -1.3270e-04,\n",
       "             2.5052e-05,  1.4347e-04,  1.1963e-04,  6.1287e-05,  9.8876e-05,\n",
       "             3.8791e-05, -4.4776e-05, -1.0736e-05,  3.3644e-05, -3.1484e-05,\n",
       "            -5.2596e-05, -7.0693e-05,  2.7698e-05, -7.2951e-05,  4.9408e-05,\n",
       "             6.1884e-05, -5.8458e-05,  1.5669e-06, -3.8292e-05,  1.9882e-05,\n",
       "             7.6868e-07, -7.5249e-05, -3.2736e-05, -3.0271e-05,  1.2693e-05,\n",
       "            -9.9954e-05,  3.1794e-05, -4.6198e-05, -4.1997e-05, -1.8422e-05,\n",
       "            -1.4278e-05,  9.3026e-06,  2.0227e-06, -4.5898e-05, -3.7699e-05,\n",
       "            -4.3055e-06, -5.1995e-05, -1.6880e-05, -4.2519e-05, -9.8804e-05,\n",
       "            -7.1039e-05, -1.5170e-04, -1.2321e-04,  7.6029e-06, -6.8147e-06,\n",
       "            -1.3694e-05, -1.2546e-05, -1.4438e-05], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([1.9545e-07, 2.3890e-08, 1.6808e-07, 5.2943e-08, 8.9707e-08, 7.3079e-08,\n",
       "            4.6668e-08, 3.9871e-08, 4.9771e-08, 4.5664e-08, 1.4686e-07, 4.5593e-08,\n",
       "            3.0357e-07, 8.7075e-08, 1.4864e-07, 6.5412e-08, 1.3355e-07, 2.5082e-08,\n",
       "            2.5232e-08, 3.4994e-08, 3.8612e-08, 2.0418e-08, 9.3868e-08, 1.0013e-07,\n",
       "            3.1767e-08, 7.2187e-08, 2.1211e-08, 4.2738e-08, 1.7857e-08, 2.3041e-08,\n",
       "            2.4238e-08, 7.5289e-09, 3.3776e-08, 4.2597e-08, 4.0806e-08, 2.6972e-08,\n",
       "            1.8619e-08, 5.1947e-08, 1.0618e-08, 1.6437e-08, 1.5883e-07, 8.8429e-09,\n",
       "            3.0559e-08, 1.7865e-08, 3.3000e-08, 2.1674e-08, 7.1029e-08, 2.4619e-08,\n",
       "            1.8249e-08, 1.4094e-08, 1.3184e-08, 1.0433e-08, 5.5985e-08, 2.9418e-08,\n",
       "            6.8279e-08, 1.6289e-07, 1.6379e-08, 3.3543e-08, 1.2565e-07, 2.3880e-08,\n",
       "            2.3896e-08, 1.0241e-08, 1.9742e-08, 3.6274e-08, 2.0319e-08, 4.4346e-08,\n",
       "            2.8121e-08, 2.6877e-08, 9.1142e-08, 6.7538e-08, 2.9700e-08, 1.6190e-08,\n",
       "            2.5188e-08, 4.1132e-08, 2.5826e-08, 1.6861e-07, 7.5926e-08, 5.7505e-08,\n",
       "            5.8560e-08, 2.4189e-07, 7.3578e-08, 8.7340e-08, 2.1815e-07, 3.6962e-08,\n",
       "            5.5005e-08, 1.0584e-08, 2.1575e-08, 2.3005e-08, 3.0387e-08, 1.4518e-07,\n",
       "            8.9210e-08, 6.4334e-08, 1.2174e-08, 1.2204e-07, 2.0571e-08, 2.6814e-08,\n",
       "            3.9485e-08, 1.6919e-08, 3.2711e-08, 1.0415e-08, 1.8309e-08, 5.4823e-08,\n",
       "            1.0531e-08, 1.6235e-08, 7.9412e-09, 2.0902e-07, 1.8499e-07, 6.7410e-08,\n",
       "            2.2249e-08, 2.6726e-08, 6.3096e-09, 1.2639e-08, 1.4958e-08, 1.7286e-08,\n",
       "            5.5333e-08, 1.0516e-08, 2.5013e-08, 3.2133e-08, 1.7044e-08, 6.9938e-08,\n",
       "            1.0505e-08, 1.8482e-07, 7.5083e-08, 2.2757e-08, 1.1316e-08, 2.1844e-08,\n",
       "            1.2634e-08, 1.6401e-08], device='cuda:0')},\n",
       "   170: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-8.4055e-06,  7.1546e-06,  9.0406e-06,  ...,  1.0457e-05,\n",
       "             -3.2975e-06,  2.9621e-06],\n",
       "            [-1.1082e-04,  2.7386e-05,  4.2173e-05,  ...,  4.7474e-05,\n",
       "             -3.5928e-05,  1.4494e-05],\n",
       "            [-5.7880e-07,  1.3557e-05,  1.3167e-05,  ...,  2.8296e-05,\n",
       "             -6.4459e-06, -1.5163e-05],\n",
       "            ...,\n",
       "            [-7.4389e-06,  1.4906e-05, -6.9322e-07,  ..., -1.4708e-06,\n",
       "             -3.7720e-06,  6.8529e-06],\n",
       "            [-5.7250e-05, -1.5498e-05,  3.4091e-05,  ...,  5.3497e-05,\n",
       "             -2.2265e-05, -4.6843e-06],\n",
       "            [ 1.5676e-05, -8.9912e-07,  2.1727e-05,  ...,  8.9581e-06,\n",
       "             -3.2274e-05, -4.6741e-05]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[5.7347e-09, 5.6977e-09, 7.3490e-09,  ..., 8.6251e-09, 1.9485e-09,\n",
       "             3.5010e-09],\n",
       "            [9.9857e-08, 2.8911e-08, 5.4347e-08,  ..., 6.5272e-08, 2.2282e-08,\n",
       "             3.6039e-08],\n",
       "            [3.2038e-08, 2.5802e-08, 2.0877e-08,  ..., 5.6649e-08, 1.2841e-08,\n",
       "             1.0064e-08],\n",
       "            ...,\n",
       "            [2.7830e-09, 2.3965e-10, 1.1685e-09,  ..., 2.5761e-09, 3.8887e-10,\n",
       "             8.2486e-10],\n",
       "            [9.3078e-09, 3.2149e-09, 3.7258e-09,  ..., 5.4824e-09, 1.5955e-09,\n",
       "             3.3611e-09],\n",
       "            [2.0178e-07, 1.3156e-07, 9.5014e-08,  ..., 2.2554e-07, 5.6557e-08,\n",
       "             9.6715e-08]], device='cuda:0')},\n",
       "   171: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([-6.3479e-06,  2.3325e-05,  1.9994e-05,  1.7855e-05,  8.7540e-06,\n",
       "            -1.2515e-05, -4.4104e-06,  4.2808e-06, -5.5705e-05,  4.2305e-05,\n",
       "            -5.1133e-05, -4.3710e-05, -2.9986e-05,  2.6453e-05,  3.8876e-06,\n",
       "             2.7859e-05, -3.7433e-05,  2.1931e-05, -5.4341e-05, -1.1094e-04,\n",
       "             1.2420e-05,  6.6921e-05, -5.2797e-05,  2.0950e-06,  4.1850e-06,\n",
       "            -9.8874e-05,  4.0919e-05, -4.2715e-05, -5.2910e-05, -1.2073e-05,\n",
       "            -2.7207e-05, -6.1310e-05,  1.8141e-06, -1.1995e-04, -3.1492e-06,\n",
       "            -6.7629e-05, -6.2944e-06,  7.0966e-06,  1.2213e-05, -1.6320e-04,\n",
       "            -2.1080e-05, -9.5291e-06,  5.3590e-06,  1.8748e-06, -4.3295e-05,\n",
       "            -1.7150e-04, -1.0488e-04,  6.3599e-06,  1.3660e-08, -8.1156e-05,\n",
       "             2.9491e-05,  9.4035e-06,  1.6250e-06, -6.8964e-05, -4.3577e-06,\n",
       "            -2.7676e-05,  2.8807e-05, -1.4327e-06, -6.7376e-06, -3.6085e-05,\n",
       "            -3.9129e-05,  2.2722e-06, -3.2327e-05, -5.3696e-05, -1.3783e-05,\n",
       "            -4.6243e-06,  4.4903e-05, -6.5627e-05,  1.1621e-05, -3.9428e-05,\n",
       "            -3.0404e-06,  2.6981e-06,  2.2231e-05, -2.3367e-05, -1.9683e-05,\n",
       "             5.3795e-05, -4.9997e-05, -1.0555e-04,  1.5574e-05, -9.7305e-06,\n",
       "            -1.1474e-06,  4.1818e-05,  4.9787e-07,  1.0119e-05, -9.2286e-06,\n",
       "             6.1256e-06,  2.4297e-05,  1.5344e-04,  5.0649e-06,  2.1672e-05,\n",
       "             5.4390e-06,  9.4123e-06, -7.9625e-05, -3.5062e-05, -1.2854e-04,\n",
       "            -2.4965e-05, -6.9910e-05, -2.5645e-06,  7.8702e-05, -1.3205e-05,\n",
       "            -3.8516e-08, -5.7690e-06, -7.4631e-06, -1.5620e-05,  7.8031e-06,\n",
       "             2.1623e-05,  2.5108e-05,  2.3716e-05, -5.7668e-05, -1.8972e-05,\n",
       "             3.6741e-05, -3.2649e-06, -1.3070e-04, -7.4144e-05, -5.0389e-05,\n",
       "            -5.8597e-05,  7.4873e-06, -2.8287e-05,  3.9143e-06, -2.7839e-06,\n",
       "            -3.8718e-05,  4.8117e-06,  3.8636e-05, -1.3661e-06,  1.0102e-05,\n",
       "            -6.2798e-06,  4.1450e-05,  9.3688e-05], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([7.1832e-09, 6.3351e-08, 3.1705e-08, 9.9047e-09, 1.1415e-08, 4.8019e-08,\n",
       "            1.6418e-09, 2.6739e-09, 7.1945e-08, 3.6699e-08, 1.0195e-07, 6.9182e-08,\n",
       "            5.3347e-09, 8.3173e-09, 2.0632e-08, 8.1937e-09, 3.0159e-08, 1.2471e-07,\n",
       "            3.8321e-08, 1.1374e-07, 3.1156e-08, 3.9241e-08, 1.3095e-07, 3.7135e-09,\n",
       "            1.8042e-09, 2.5920e-08, 2.2555e-08, 1.9174e-07, 1.8330e-09, 2.4834e-08,\n",
       "            5.9912e-10, 3.2218e-08, 3.0942e-08, 1.7133e-07, 2.4918e-09, 3.1086e-08,\n",
       "            4.9586e-08, 8.2729e-09, 1.9126e-09, 7.3561e-08, 1.5784e-09, 6.7619e-10,\n",
       "            3.2357e-08, 5.1254e-08, 1.9661e-07, 1.9236e-07, 1.6426e-07, 1.6885e-09,\n",
       "            2.1410e-10, 5.3476e-08, 4.1678e-08, 1.7530e-07, 3.6873e-08, 1.7279e-07,\n",
       "            1.0478e-08, 6.7272e-09, 1.8870e-08, 1.3040e-08, 9.4163e-08, 6.9126e-08,\n",
       "            1.5441e-08, 4.6295e-08, 2.2820e-08, 8.6390e-09, 2.7952e-09, 2.6837e-08,\n",
       "            8.0753e-09, 1.1300e-07, 4.0052e-08, 3.3117e-08, 3.4363e-10, 5.3012e-10,\n",
       "            1.1168e-07, 2.3042e-08, 4.1581e-08, 5.0448e-08, 8.4381e-08, 2.0636e-08,\n",
       "            4.7767e-09, 6.9553e-09, 1.3250e-08, 2.4531e-08, 6.6723e-10, 1.9559e-08,\n",
       "            6.1516e-09, 3.9667e-08, 1.1031e-08, 7.2946e-08, 1.5978e-08, 1.4430e-08,\n",
       "            1.0506e-09, 2.8207e-08, 2.4537e-07, 3.8454e-09, 4.1278e-07, 3.4482e-09,\n",
       "            1.0421e-07, 5.1684e-09, 2.9408e-08, 1.7481e-08, 1.4563e-09, 1.4093e-08,\n",
       "            9.9826e-09, 8.8187e-09, 1.1947e-08, 2.1258e-07, 4.3816e-08, 1.1911e-08,\n",
       "            2.2242e-08, 1.0295e-08, 2.2849e-08, 1.7019e-10, 1.8836e-07, 7.5406e-08,\n",
       "            4.6036e-08, 1.6171e-07, 3.3959e-08, 4.5545e-08, 3.1926e-08, 3.8925e-10,\n",
       "            5.8050e-08, 5.0877e-09, 2.9838e-08, 1.4621e-08, 2.2812e-08, 1.1120e-09,\n",
       "            5.5558e-09, 1.5485e-07], device='cuda:0')},\n",
       "   172: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 4.1258e-05, -7.3694e-06, -2.4603e-05,  ..., -4.0223e-05,\n",
       "              1.3647e-05,  3.3635e-06],\n",
       "            [ 2.1900e-06, -3.4647e-07, -1.4881e-06,  ..., -1.2787e-06,\n",
       "              6.0177e-07,  1.0915e-06],\n",
       "            [ 1.1091e-05,  1.4024e-05, -8.0486e-06,  ..., -2.7745e-05,\n",
       "             -1.0771e-05,  2.2229e-05],\n",
       "            ...,\n",
       "            [ 1.2447e-05,  4.7579e-06, -1.3774e-05,  ..., -2.1257e-05,\n",
       "             -2.2796e-06,  4.9855e-06],\n",
       "            [ 1.4259e-08,  7.6532e-09,  1.2114e-08,  ..., -7.6432e-09,\n",
       "             -3.3016e-09, -5.8950e-09],\n",
       "            [ 1.3803e-05, -4.6162e-06, -5.7568e-06,  ...,  1.4928e-07,\n",
       "              4.9741e-06, -9.6647e-06]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[2.5898e-08, 2.9480e-09, 2.2863e-08,  ..., 3.9138e-08, 6.3201e-09,\n",
       "             2.0731e-08],\n",
       "            [1.5742e-11, 8.0308e-12, 4.3073e-11,  ..., 7.5507e-11, 1.1982e-11,\n",
       "             1.8218e-11],\n",
       "            [2.1762e-08, 2.5236e-09, 1.8000e-08,  ..., 2.8617e-08, 4.0386e-09,\n",
       "             1.5744e-08],\n",
       "            ...,\n",
       "            [1.0155e-08, 1.5474e-09, 9.9972e-09,  ..., 1.7811e-08, 2.8360e-09,\n",
       "             8.6235e-09],\n",
       "            [3.5558e-12, 2.1975e-12, 9.9955e-13,  ..., 2.2947e-13, 3.4460e-13,\n",
       "             6.3542e-13],\n",
       "            [1.6965e-09, 6.8117e-10, 9.1765e-10,  ..., 1.2360e-09, 4.6876e-10,\n",
       "             6.8519e-10]], device='cuda:0')},\n",
       "   173: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([-1.2402e-05, -1.1408e-06, -7.8674e-05, -9.3634e-05, -1.0526e-04,\n",
       "             2.1354e-04, -9.1550e-05,  2.4691e-05, -7.5814e-05,  9.2985e-05,\n",
       "            -5.5638e-05,  3.4345e-06, -8.6676e-05, -7.4103e-05, -5.0568e-05,\n",
       "            -7.9398e-05, -6.5261e-05, -7.4223e-05,  1.9374e-05, -4.6488e-05,\n",
       "             3.6630e-05, -1.5572e-04, -6.4314e-05, -1.3096e-05, -3.9318e-05,\n",
       "            -1.0452e-04, -7.4267e-05, -8.9095e-06, -9.0134e-05, -8.0900e-05,\n",
       "             3.1429e-06,  8.5195e-06,  4.8700e-07, -5.3145e-05,  3.1003e-06,\n",
       "            -8.3234e-05, -1.2359e-04,  9.5872e-07,  2.9249e-06, -1.2526e-04,\n",
       "            -1.3679e-04,  1.2167e-05, -7.6610e-05, -1.2738e-04,  2.2973e-05,\n",
       "            -2.6955e-05,  8.0732e-06, -3.0419e-05, -1.7481e-05, -1.4663e-05,\n",
       "            -4.6377e-06, -8.0292e-05, -4.5179e-06,  1.4819e-05, -1.0383e-04,\n",
       "            -1.6414e-04, -6.5478e-05, -8.5021e-05, -8.7145e-05, -4.3166e-05,\n",
       "             3.1167e-05,  1.2165e-05, -4.1463e-06, -7.4049e-06, -7.4117e-05,\n",
       "            -4.5011e-05,  3.5580e-05,  9.7799e-06, -4.7729e-06,  8.4630e-06,\n",
       "             1.7792e-05, -4.0805e-07,  4.9412e-06, -5.3460e-05, -5.5654e-05,\n",
       "            -9.9546e-05,  1.0426e-06,  5.1232e-07,  4.8968e-06,  5.6558e-06,\n",
       "             5.5908e-06, -9.7620e-05, -1.0485e-04, -4.2115e-05, -4.2758e-06,\n",
       "            -8.5157e-05,  2.5592e-05,  4.4644e-07,  1.7442e-05,  8.7862e-05,\n",
       "             1.2832e-04, -1.1642e-04, -7.3365e-06, -1.7386e-04, -1.0884e-04,\n",
       "            -3.1210e-05, -1.0739e-04,  9.7819e-06,  1.2724e-04, -6.9166e-05,\n",
       "            -3.7358e-05, -6.3509e-05, -1.2161e-04, -1.1605e-04, -9.8348e-05,\n",
       "            -1.5123e-04, -3.8960e-05, -8.1717e-05, -7.4214e-05,  4.1997e-05,\n",
       "            -2.1640e-05, -7.1245e-06, -1.9829e-04, -7.5094e-06, -1.3319e-04,\n",
       "             1.5056e-05, -1.9338e-05,  2.0676e-05, -9.3641e-07,  4.3861e-05,\n",
       "            -6.5612e-05, -7.3354e-07, -1.3743e-04,  4.7152e-05,  1.4402e-04,\n",
       "            -3.7966e-05, -2.2571e-08,  3.9455e-05], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([9.4002e-08, 2.6021e-10, 6.6934e-08, 5.1405e-08, 1.1773e-07, 1.6933e-07,\n",
       "            2.3398e-08, 2.6334e-08, 4.1572e-08, 2.0167e-07, 7.6787e-08, 3.7175e-09,\n",
       "            4.5840e-08, 2.4968e-08, 1.2470e-07, 1.1034e-07, 1.0532e-07, 7.2707e-08,\n",
       "            3.4125e-09, 9.2100e-08, 3.0526e-08, 4.0842e-07, 4.3678e-08, 5.7471e-10,\n",
       "            4.5222e-09, 5.7585e-08, 4.7132e-08, 9.7467e-09, 1.4726e-07, 2.1063e-07,\n",
       "            2.3861e-08, 3.7424e-08, 2.4026e-09, 4.6670e-08, 4.6480e-11, 7.3236e-08,\n",
       "            7.5117e-08, 2.3458e-10, 1.8111e-08, 1.4493e-07, 3.0905e-07, 1.3139e-08,\n",
       "            1.6779e-07, 2.2094e-07, 1.4114e-08, 4.6804e-09, 2.1983e-10, 8.1952e-08,\n",
       "            8.1276e-08, 3.4896e-09, 1.3656e-10, 1.5304e-07, 1.3149e-10, 4.2101e-09,\n",
       "            1.0895e-07, 2.3404e-07, 2.6527e-08, 3.3167e-08, 5.1746e-08, 5.7290e-08,\n",
       "            2.7046e-08, 1.2090e-08, 6.4143e-09, 3.2820e-08, 7.8936e-08, 1.3303e-08,\n",
       "            6.8409e-08, 9.8361e-10, 7.2630e-10, 4.2409e-08, 1.2407e-08, 1.2537e-10,\n",
       "            4.7183e-09, 4.2286e-08, 2.0868e-08, 1.0870e-07, 3.0489e-08, 2.6891e-11,\n",
       "            1.4863e-08, 1.7930e-09, 4.2695e-10, 2.8195e-08, 8.7356e-08, 5.8284e-08,\n",
       "            2.4801e-08, 1.9659e-08, 4.5511e-08, 2.3284e-09, 1.3152e-08, 8.9472e-08,\n",
       "            1.0872e-07, 1.1835e-07, 2.0790e-08, 2.3112e-07, 7.5915e-08, 9.8698e-09,\n",
       "            6.9267e-08, 7.1082e-10, 9.4330e-08, 1.5139e-07, 8.0646e-08, 2.1100e-08,\n",
       "            4.9258e-08, 1.1949e-07, 8.4900e-08, 2.9955e-07, 3.1769e-08, 4.3863e-08,\n",
       "            8.0412e-08, 3.1344e-08, 2.3700e-09, 2.2085e-09, 1.1243e-07, 2.5083e-08,\n",
       "            1.1994e-07, 5.1475e-08, 1.0201e-07, 5.0469e-08, 1.9250e-09, 4.0678e-08,\n",
       "            3.2883e-08, 7.3760e-08, 1.2756e-07, 2.4308e-09, 1.5455e-07, 5.4656e-08,\n",
       "            1.1102e-11, 1.0427e-08], device='cuda:0')},\n",
       "   174: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 2.1146e-05, -1.6122e-05, -1.1963e-05,  ..., -2.0842e-06,\n",
       "              1.9243e-05, -1.5676e-05],\n",
       "            [ 3.1807e-06, -2.2079e-06,  5.8061e-06,  ...,  5.5600e-06,\n",
       "             -2.9116e-06, -3.2260e-06],\n",
       "            [ 1.7131e-07,  1.0222e-07, -1.3395e-07,  ..., -4.5798e-07,\n",
       "              5.7992e-08,  5.1448e-07],\n",
       "            ...,\n",
       "            [-2.2514e-05,  1.5530e-05,  1.1640e-05,  ...,  1.1064e-05,\n",
       "             -2.1777e-05,  1.8111e-05],\n",
       "            [-1.1931e-06,  4.1763e-07,  1.3838e-06,  ...,  1.8607e-06,\n",
       "             -6.3257e-07, -1.0663e-06],\n",
       "            [ 1.0559e-05, -5.4052e-06, -4.8402e-06,  ..., -4.2748e-06,\n",
       "              7.8515e-06, -6.1491e-06]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[2.0457e-08, 1.5073e-09, 1.9368e-08,  ..., 2.4218e-08, 4.8475e-09,\n",
       "             1.7147e-08],\n",
       "            [4.7851e-09, 2.3731e-09, 5.6617e-09,  ..., 1.6780e-08, 9.3049e-09,\n",
       "             6.2458e-09],\n",
       "            [2.1069e-11, 1.6018e-11, 1.8233e-11,  ..., 4.6381e-11, 2.1260e-11,\n",
       "             1.7989e-11],\n",
       "            ...,\n",
       "            [4.3353e-09, 2.9667e-09, 8.2037e-10,  ..., 1.8034e-09, 1.6786e-09,\n",
       "             2.4561e-09],\n",
       "            [1.9644e-10, 3.7697e-11, 2.2386e-10,  ..., 3.3676e-10, 8.2486e-11,\n",
       "             1.6937e-10],\n",
       "            [6.9028e-10, 4.1721e-10, 6.2837e-10,  ..., 6.4640e-10, 6.3912e-10,\n",
       "             4.6772e-10]], device='cuda:0')},\n",
       "   175: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([ 4.5702e-05,  6.7110e-06, -7.9997e-07,  1.1324e-05, -5.3055e-07,\n",
       "             9.2890e-05,  1.8640e-05, -8.1942e-05, -2.2310e-04,  5.9750e-06,\n",
       "             1.8536e-05, -2.2564e-05, -2.2111e-04, -1.1094e-06,  1.4613e-05,\n",
       "            -2.9908e-05, -2.0121e-04,  8.2836e-06,  4.7592e-05, -9.7623e-06,\n",
       "            -6.0988e-05, -1.5154e-05,  4.1525e-05, -1.8002e-05,  4.0076e-05,\n",
       "             7.8755e-07,  4.8815e-05,  6.1463e-07, -1.7744e-05,  3.1400e-09,\n",
       "             3.0220e-05,  6.2961e-06, -4.9538e-06, -1.0739e-05,  1.8170e-05,\n",
       "            -1.8387e-06,  1.0142e-05, -7.1779e-06,  6.4264e-07, -1.9327e-05,\n",
       "            -1.3314e-05, -2.2745e-05, -1.7930e-06, -2.0799e-05,  1.6080e-05,\n",
       "            -1.6334e-05, -1.8727e-06, -1.1147e-05, -1.9522e-05, -5.6704e-06,\n",
       "            -2.4485e-06, -5.0495e-05, -3.2611e-05, -2.4423e-06, -1.5589e-05,\n",
       "             3.0548e-07, -1.6315e-06,  7.0801e-05, -3.5368e-05,  1.4481e-06,\n",
       "             3.2609e-05,  8.6190e-05, -5.2696e-05, -3.0672e-08,  4.9728e-05,\n",
       "             2.8624e-05,  4.9136e-06,  1.6936e-05, -4.7790e-05,  1.8116e-06,\n",
       "             7.1889e-07, -1.8753e-06,  1.2165e-05,  9.6597e-06, -1.3129e-04,\n",
       "            -3.0517e-06,  1.6354e-05,  1.7125e-05, -1.9794e-08, -4.1932e-05,\n",
       "             3.3877e-05,  1.2877e-05, -3.6331e-05, -1.2488e-06,  7.0972e-06,\n",
       "             1.9192e-06, -2.8936e-05, -1.4298e-04,  4.4840e-05, -8.2714e-05,\n",
       "             6.4631e-05, -2.4549e-04,  3.7608e-05,  4.6485e-06, -1.0331e-04,\n",
       "             1.3905e-05, -1.0133e-05,  8.1093e-06,  8.2795e-07,  1.7914e-05,\n",
       "             5.5712e-07,  1.3389e-05, -1.5123e-04,  8.3614e-06, -3.0700e-05,\n",
       "             1.6297e-05,  5.4363e-07, -2.1715e-05, -4.9526e-05,  5.7642e-05,\n",
       "             6.0020e-07,  6.6639e-05, -1.3931e-04,  5.1515e-06, -9.6503e-05,\n",
       "            -1.1949e-06, -1.0334e-05,  1.1780e-04, -4.3990e-06,  6.5747e-05,\n",
       "            -1.0558e-05,  1.3418e-04, -5.2262e-06, -1.2763e-05,  7.2431e-09,\n",
       "            -5.4706e-05,  2.1314e-06,  2.5058e-05], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([4.8018e-08, 2.3821e-07, 4.8674e-10, 2.7278e-07, 2.4117e-08, 2.1573e-06,\n",
       "            5.0848e-08, 9.5028e-07, 1.6147e-06, 9.4367e-07, 2.2551e-07, 2.8162e-08,\n",
       "            1.4862e-07, 8.0893e-08, 1.0028e-08, 2.1493e-07, 5.1393e-07, 9.2678e-08,\n",
       "            2.1720e-06, 2.6117e-09, 3.7849e-07, 2.8090e-08, 1.7413e-07, 5.8083e-08,\n",
       "            2.9228e-07, 3.4168e-08, 9.1976e-07, 7.8229e-08, 9.4895e-09, 3.6375e-08,\n",
       "            6.1019e-07, 6.9441e-09, 3.6189e-09, 2.0221e-06, 8.0037e-07, 6.0386e-09,\n",
       "            3.6273e-09, 5.4893e-10, 1.0323e-10, 3.1370e-10, 2.0660e-08, 1.7154e-07,\n",
       "            1.4511e-07, 6.4226e-07, 1.5190e-07, 2.6685e-08, 1.4431e-09, 1.4474e-08,\n",
       "            1.6252e-07, 4.0690e-08, 2.2751e-08, 3.7994e-07, 2.4904e-08, 9.5707e-10,\n",
       "            1.4974e-08, 2.1500e-08, 2.4704e-07, 8.8089e-07, 2.5209e-07, 1.3468e-08,\n",
       "            3.7188e-07, 5.1592e-07, 5.5705e-08, 1.7886e-12, 4.4225e-07, 6.8666e-08,\n",
       "            9.0110e-10, 6.3009e-08, 4.1853e-07, 8.0691e-10, 1.3064e-08, 1.3232e-07,\n",
       "            1.6693e-08, 2.1915e-07, 7.0466e-08, 5.8255e-09, 1.6098e-07, 1.1349e-07,\n",
       "            1.0486e-09, 3.2087e-07, 6.6011e-07, 6.1126e-07, 1.5764e-07, 5.3896e-08,\n",
       "            7.8435e-08, 6.1679e-09, 2.3497e-07, 2.4390e-06, 8.5064e-08, 3.3486e-06,\n",
       "            5.1013e-07, 1.0263e-05, 1.8711e-07, 5.5217e-09, 1.5794e-06, 4.1783e-08,\n",
       "            7.4284e-09, 1.6885e-08, 3.1953e-08, 3.5966e-07, 4.4527e-10, 5.5049e-07,\n",
       "            3.3372e-06, 2.9440e-08, 3.9631e-08, 1.1946e-08, 4.4909e-10, 1.3608e-08,\n",
       "            6.5354e-07, 2.9711e-07, 1.4474e-10, 5.2838e-07, 4.1864e-06, 9.2995e-09,\n",
       "            1.1273e-06, 3.9779e-07, 2.2897e-08, 8.9979e-07, 3.7204e-07, 4.4406e-09,\n",
       "            3.8403e-10, 4.4538e-07, 8.4506e-09, 9.0513e-08, 4.7661e-09, 4.1858e-08,\n",
       "            1.0293e-09, 1.1920e-08], device='cuda:0')},\n",
       "   176: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[ 6.3327e-04,  6.3441e-06,  4.4727e-04,  4.7380e-04,  1.3109e-04,\n",
       "              4.0118e-04, -2.5114e-04,  6.3628e-04,  9.7594e-04,  3.7355e-04,\n",
       "              6.0911e-04,  1.2640e-04,  2.5520e-04,  1.4786e-04,  2.4825e-04,\n",
       "              4.7761e-04,  3.2229e-04,  2.5778e-04,  6.6602e-05,  6.8752e-04,\n",
       "              3.1054e-04,  8.0188e-04, -3.1137e-05,  3.6985e-05,  3.2299e-05,\n",
       "              1.0076e-03,  2.4720e-04,  4.0920e-04,  5.9832e-04,  7.9793e-04,\n",
       "              2.1372e-04,  2.5796e-04,  2.6543e-04, -1.0432e-06,  1.4286e-06,\n",
       "              7.6140e-04,  5.7156e-04,  2.3114e-06,  1.4023e-04,  7.3761e-04,\n",
       "              1.4017e-04,  4.5035e-04,  8.4182e-04,  1.2313e-03,  5.9329e-05,\n",
       "              2.1060e-04,  1.3739e-04,  4.0683e-04, -1.5625e-04,  4.7744e-04,\n",
       "             -6.8119e-05,  5.8588e-04, -4.6200e-06,  1.8698e-04,  6.0110e-04,\n",
       "              8.2038e-04,  3.5809e-04,  6.8377e-04, -2.8945e-05, -1.3202e-04,\n",
       "              2.4595e-04,  3.6913e-04,  1.4711e-04,  6.1300e-05,  2.5669e-04,\n",
       "             -9.7782e-05,  3.4033e-04,  1.2165e-04,  6.6232e-05,  1.3392e-04,\n",
       "              3.0667e-04,  8.8996e-06,  1.1795e-04,  4.4912e-04,  5.7229e-05,\n",
       "              6.2976e-04,  2.2753e-04, -1.4165e-05,  4.2660e-04,  1.9576e-05,\n",
       "             -1.1505e-05, -1.2844e-05,  4.3963e-04,  4.9019e-04,  9.6909e-05,\n",
       "             -1.5557e-04, -7.3927e-05, -1.6955e-05,  3.8334e-04,  6.3670e-04,\n",
       "              6.2871e-04,  2.3586e-04,  6.3083e-04,  4.5758e-04,  5.7568e-04,\n",
       "             -2.0187e-04,  3.7724e-04,  2.9204e-05,  7.2225e-04,  8.0124e-04,\n",
       "              5.3250e-04,  2.4978e-05,  6.3384e-04,  3.8768e-04,  1.2655e-04,\n",
       "              4.9067e-04, -2.8819e-06,  9.1808e-05,  2.9184e-04,  2.4776e-04,\n",
       "              1.1440e-04,  3.6299e-05,  8.7744e-06,  3.6190e-04,  4.0334e-04,\n",
       "              2.3951e-04, -2.3824e-05,  6.2687e-05,  1.1163e-05,  3.6269e-04,\n",
       "              1.2865e-04, -7.2131e-06,  6.1937e-04,  2.0027e-04,  9.4364e-04,\n",
       "              4.6225e-04, -2.8674e-08,  4.2008e-04],\n",
       "            [ 9.0016e-04,  1.0572e-05,  6.1156e-04,  1.8439e-03,  1.4841e-03,\n",
       "              4.0211e-04,  1.7158e-03,  6.9870e-04,  7.9101e-04,  1.3134e-03,\n",
       "              1.6854e-03,  4.8403e-04,  2.2996e-03,  1.1077e-03,  5.5872e-04,\n",
       "              1.1205e-03,  5.1864e-04,  4.8150e-04,  2.6236e-05,  7.1268e-04,\n",
       "             -1.1212e-04,  1.0264e-03,  7.7749e-04,  1.2027e-04,  5.8852e-04,\n",
       "              2.0654e-03, -5.3351e-05,  5.8935e-06,  1.9246e-03,  8.5131e-04,\n",
       "              1.4607e-04,  1.4163e-04,  3.5691e-04, -1.3741e-05, -2.3246e-06,\n",
       "              1.6882e-03,  2.3785e-03,  5.6819e-06, -6.1911e-05,  1.9595e-03,\n",
       "              8.9815e-04,  3.6397e-04,  1.0993e-03,  1.3776e-03, -1.0798e-05,\n",
       "              3.6782e-04,  1.4140e-04,  6.9079e-04,  1.2010e-03,  3.4609e-04,\n",
       "              1.2203e-04,  1.0022e-03,  1.2534e-05,  2.8643e-04,  1.5953e-03,\n",
       "              3.4669e-03,  1.0257e-03,  1.3346e-03,  1.1685e-03,  1.6445e-03,\n",
       "              1.2427e-04,  5.3145e-04, -8.1143e-05, -2.5823e-04,  1.6987e-03,\n",
       "              8.1431e-04,  4.6576e-04, -1.1894e-04,  4.8643e-05,  2.1635e-04,\n",
       "              1.0295e-03,  8.6366e-06,  1.9317e-04,  5.8701e-04,  9.8423e-04,\n",
       "              1.5178e-03, -6.2109e-05,  1.4832e-05,  1.4804e-04, -2.9640e-05,\n",
       "             -4.1643e-06,  9.6241e-04,  1.2961e-03,  5.0654e-04,  2.7434e-04,\n",
       "              6.9782e-04,  7.9487e-04,  2.1271e-06,  3.8245e-04,  7.9704e-04,\n",
       "              6.0013e-04,  9.4126e-04,  4.0215e-04,  1.8417e-03,  1.7323e-03,\n",
       "              1.2119e-03,  1.4388e-03, -2.5538e-05,  3.9391e-04,  1.2279e-03,\n",
       "              5.5069e-04,  1.8687e-03,  1.6056e-03,  2.0226e-03,  9.8587e-04,\n",
       "              1.7081e-03,  2.8383e-04,  1.8162e-03,  9.6761e-04,  2.1972e-04,\n",
       "             -7.9793e-05,  1.5010e-04,  2.3780e-03,  2.9326e-04,  1.5479e-03,\n",
       "              2.4167e-04,  8.5879e-04,  4.3009e-04,  1.2270e-05,  4.0483e-04,\n",
       "              3.2056e-04,  3.6512e-04,  1.3171e-03,  3.0628e-05,  9.0837e-04,\n",
       "              7.0165e-04, -2.3565e-09,  4.9857e-04],\n",
       "            [-1.5334e-03, -1.6916e-05, -1.0588e-03, -2.3177e-03, -1.6151e-03,\n",
       "             -8.0329e-04, -1.4646e-03, -1.3350e-03, -1.7670e-03, -1.6870e-03,\n",
       "             -2.2945e-03, -6.1043e-04, -2.5548e-03, -1.2555e-03, -8.0697e-04,\n",
       "             -1.5981e-03, -8.4094e-04, -7.3928e-04, -9.2839e-05, -1.4002e-03,\n",
       "             -1.9842e-04, -1.8283e-03, -7.4635e-04, -1.5726e-04, -6.2082e-04,\n",
       "             -3.0730e-03, -1.9385e-04, -4.1509e-04, -2.5229e-03, -1.6492e-03,\n",
       "             -3.5979e-04, -3.9960e-04, -6.2234e-04,  1.4783e-05,  8.9604e-07,\n",
       "             -2.4496e-03, -2.9501e-03, -7.9934e-06, -7.8317e-05, -2.6971e-03,\n",
       "             -1.0383e-03, -8.1432e-04, -1.9411e-03, -2.6089e-03, -4.8531e-05,\n",
       "             -5.7842e-04, -2.7878e-04, -1.0976e-03, -1.0448e-03, -8.2353e-04,\n",
       "             -5.3912e-05, -1.5880e-03, -7.9144e-06, -4.7341e-04, -2.1964e-03,\n",
       "             -4.2873e-03, -1.3838e-03, -2.0184e-03, -1.1395e-03, -1.5125e-03,\n",
       "             -3.7023e-04, -9.0057e-04, -6.5966e-05,  1.9693e-04, -1.9553e-03,\n",
       "             -7.1653e-04, -8.0609e-04, -2.7090e-06, -1.1488e-04, -3.5028e-04,\n",
       "             -1.3362e-03, -1.7536e-05, -3.1112e-04, -1.0361e-03, -1.0415e-03,\n",
       "             -2.1476e-03, -1.6542e-04, -6.6695e-07, -5.7464e-04,  1.0064e-05,\n",
       "              1.5669e-05, -9.4956e-04, -1.7357e-03, -9.9674e-04, -3.7125e-04,\n",
       "             -5.4224e-04, -7.2094e-04,  1.4828e-05, -7.6579e-04, -1.4337e-03,\n",
       "             -1.2288e-03, -1.1771e-03, -1.0330e-03, -2.2992e-03, -2.3080e-03,\n",
       "             -1.0101e-03, -1.8160e-03, -3.6659e-06, -1.1162e-03, -2.0291e-03,\n",
       "             -1.0832e-03, -1.8937e-03, -2.2395e-03, -2.4102e-03, -1.1124e-03,\n",
       "             -2.1987e-03, -2.8094e-04, -1.9080e-03, -1.2594e-03, -4.6748e-04,\n",
       "             -3.4608e-05, -1.8640e-04, -2.3867e-03, -6.5515e-04, -1.9512e-03,\n",
       "             -4.8118e-04, -8.3497e-04, -4.9277e-04, -2.3432e-05, -7.6752e-04,\n",
       "             -4.4921e-04, -3.5791e-04, -1.9365e-03, -2.3090e-04, -1.8520e-03,\n",
       "             -1.1639e-03,  3.1030e-08, -9.1865e-04]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[4.8545e-05, 3.6551e-09, 2.4245e-05, 1.8969e-05, 2.3085e-05, 8.3899e-06,\n",
       "             4.0424e-06, 9.3000e-07, 3.0253e-05, 2.8440e-05, 2.0355e-05, 2.4287e-06,\n",
       "             4.3983e-05, 4.4278e-06, 5.1646e-05, 3.5192e-05, 2.0018e-05, 1.2361e-05,\n",
       "             1.8182e-07, 3.6868e-05, 1.6560e-06, 8.5429e-05, 3.0746e-05, 3.8904e-07,\n",
       "             1.5393e-06, 2.5625e-05, 1.5332e-06, 3.1552e-05, 4.0963e-05, 6.9877e-05,\n",
       "             2.3699e-06, 2.1402e-05, 1.0929e-06, 2.1238e-06, 2.5453e-09, 5.6883e-05,\n",
       "             4.6206e-05, 8.5197e-09, 1.6034e-06, 7.9025e-05, 9.6827e-05, 1.0195e-06,\n",
       "             9.9738e-05, 1.0059e-04, 5.1517e-06, 1.3365e-06, 2.4301e-08, 3.5189e-05,\n",
       "             7.9752e-05, 5.1418e-07, 2.4187e-08, 4.4354e-05, 1.0776e-08, 3.1545e-06,\n",
       "             1.8405e-05, 1.4345e-04, 1.9863e-05, 6.5882e-05, 3.1515e-05, 2.6401e-05,\n",
       "             6.3842e-06, 8.2332e-06, 6.0184e-07, 4.6564e-06, 3.0793e-05, 6.6687e-06,\n",
       "             2.2495e-05, 1.3148e-07, 2.8603e-08, 2.8368e-05, 8.9456e-06, 9.9268e-09,\n",
       "             1.9391e-06, 3.0011e-05, 2.1260e-06, 8.1389e-05, 3.7014e-06, 1.3975e-09,\n",
       "             3.4124e-05, 4.7162e-08, 2.2804e-08, 3.0544e-06, 3.0023e-05, 2.1820e-05,\n",
       "             1.7195e-05, 1.7877e-06, 1.1759e-05, 7.0679e-08, 6.7289e-06, 1.5475e-05,\n",
       "             1.0202e-05, 2.3161e-05, 1.1598e-06, 4.1692e-05, 1.2549e-04, 3.6354e-06,\n",
       "             3.1010e-05, 3.3464e-08, 9.6569e-06, 5.0081e-05, 6.4188e-05, 5.9325e-05,\n",
       "             3.5602e-05, 7.7483e-05, 2.6779e-05, 7.4825e-05, 1.5952e-06, 5.2763e-05,\n",
       "             5.3960e-05, 2.9509e-06, 1.2663e-07, 3.5436e-07, 1.2520e-05, 1.9293e-06,\n",
       "             4.3373e-05, 1.1525e-05, 2.0137e-05, 1.9747e-05, 1.1745e-07, 6.4936e-06,\n",
       "             4.8332e-06, 1.4116e-05, 3.7843e-05, 1.4582e-07, 1.3716e-05, 2.7449e-05,\n",
       "             1.8307e-10, 1.5830e-06],\n",
       "            [5.7798e-05, 3.7349e-09, 2.9000e-05, 1.9437e-05, 1.4456e-05, 6.7024e-06,\n",
       "             2.7497e-06, 1.1800e-06, 3.5616e-05, 1.7292e-05, 2.1341e-05, 2.9517e-06,\n",
       "             3.4628e-05, 2.9364e-06, 5.1216e-05, 3.8906e-05, 2.4279e-05, 1.5334e-05,\n",
       "             1.0955e-07, 4.5033e-05, 1.2370e-06, 9.8948e-05, 2.9866e-05, 3.6589e-07,\n",
       "             1.2918e-06, 2.9338e-05, 2.2231e-06, 3.0024e-05, 3.9847e-05, 7.7613e-05,\n",
       "             2.7318e-06, 1.2048e-05, 8.0879e-07, 2.8516e-06, 1.5665e-09, 3.8019e-05,\n",
       "             4.1616e-05, 1.0128e-08, 1.2282e-06, 8.8857e-05, 1.0413e-04, 6.5440e-07,\n",
       "             1.1109e-04, 1.0661e-04, 3.0899e-06, 1.7018e-06, 5.2955e-08, 2.1299e-05,\n",
       "             4.6998e-05, 9.9912e-07, 2.2681e-08, 5.1277e-05, 1.0088e-08, 3.6204e-06,\n",
       "             1.7954e-05, 1.4242e-04, 1.6645e-05, 4.9361e-05, 2.4323e-05, 1.5480e-05,\n",
       "             3.9874e-06, 7.4825e-06, 6.8910e-07, 3.7848e-06, 3.1852e-05, 7.7654e-06,\n",
       "             1.3243e-05, 1.7786e-07, 3.3459e-08, 1.7523e-05, 8.9702e-06, 1.0348e-08,\n",
       "             2.1247e-06, 2.9461e-05, 1.2045e-06, 8.7938e-05, 2.4367e-06, 6.5821e-10,\n",
       "             2.1201e-05, 4.4898e-08, 2.1685e-08, 2.1108e-06, 3.7068e-05, 2.7808e-05,\n",
       "             1.0247e-05, 9.2321e-07, 6.6860e-06, 6.3259e-08, 8.0314e-06, 9.2087e-06,\n",
       "             1.0055e-05, 2.4669e-05, 1.3935e-06, 4.6127e-05, 1.2810e-04, 2.3258e-06,\n",
       "             3.2618e-05, 1.7187e-08, 8.1127e-06, 5.6466e-05, 6.0549e-05, 4.4692e-05,\n",
       "             4.3621e-05, 7.5115e-05, 3.0555e-05, 7.9551e-05, 6.5378e-07, 4.6354e-05,\n",
       "             3.4141e-05, 3.6834e-06, 2.0473e-07, 4.0289e-07, 8.6069e-06, 2.4729e-06,\n",
       "             4.3866e-05, 7.2883e-06, 1.1828e-05, 1.2041e-05, 1.2632e-07, 7.3893e-06,\n",
       "             6.1666e-06, 7.5721e-06, 3.3348e-05, 2.0674e-07, 1.4476e-05, 3.2188e-05,\n",
       "             2.0856e-10, 2.3909e-06],\n",
       "            [1.3545e-04, 4.1158e-09, 6.8514e-05, 3.9897e-05, 2.9650e-05, 7.5654e-06,\n",
       "             5.4355e-06, 1.4062e-06, 8.4978e-05, 3.0152e-05, 4.7836e-05, 4.9616e-06,\n",
       "             8.1491e-05, 5.7153e-06, 1.2386e-04, 8.8379e-05, 5.7345e-05, 3.3792e-05,\n",
       "             1.0279e-07, 1.0578e-04, 2.0877e-06, 2.3199e-04, 6.8104e-05, 5.4657e-07,\n",
       "             2.2159e-06, 6.5295e-05, 4.5631e-06, 6.4252e-05, 8.3868e-05, 1.7991e-04,\n",
       "             5.5035e-06, 1.6349e-05, 1.3129e-06, 6.1791e-06, 1.2833e-09, 8.2126e-05,\n",
       "             9.7423e-05, 1.9035e-08, 2.5699e-06, 2.0551e-04, 2.4227e-04, 1.4461e-06,\n",
       "             2.5815e-04, 2.5521e-04, 4.4143e-06, 2.7295e-06, 3.5655e-08, 4.1071e-05,\n",
       "             7.6335e-05, 6.7407e-07, 2.5646e-08, 1.1572e-04, 1.8201e-08, 4.7575e-06,\n",
       "             3.9055e-05, 3.2876e-04, 3.9775e-05, 1.0377e-04, 5.5127e-05, 2.6675e-05,\n",
       "             4.9016e-06, 1.2803e-05, 1.3230e-06, 2.7867e-06, 7.1144e-05, 1.5462e-05,\n",
       "             1.7709e-05, 2.2529e-07, 4.9320e-08, 2.5577e-05, 1.3078e-05, 1.2572e-08,\n",
       "             2.9919e-06, 6.4612e-05, 2.2454e-06, 2.0465e-04, 4.1226e-06, 1.3466e-09,\n",
       "             3.2278e-05, 7.3858e-08, 4.1515e-08, 4.0195e-06, 8.6292e-05, 6.2812e-05,\n",
       "             1.4358e-05, 1.7647e-06, 1.0545e-05, 8.5642e-08, 9.8882e-06, 1.2602e-05,\n",
       "             9.6837e-06, 5.4025e-05, 1.6723e-06, 1.0486e-04, 2.9899e-04, 4.4464e-06,\n",
       "             7.3759e-05, 2.4268e-08, 7.7859e-06, 1.2662e-04, 1.3985e-04, 9.6449e-05,\n",
       "             1.0072e-04, 1.7406e-04, 6.7395e-05, 1.8889e-04, 1.3205e-06, 1.0246e-04,\n",
       "             6.7803e-05, 4.1361e-06, 3.0997e-07, 4.4070e-07, 1.5954e-05, 3.0244e-06,\n",
       "             1.0326e-04, 1.3971e-05, 1.6665e-05, 1.5308e-05, 1.4380e-07, 8.4590e-06,\n",
       "             1.3999e-05, 1.0689e-05, 8.0119e-05, 1.7879e-07, 1.8816e-05, 6.9887e-05,\n",
       "             1.2685e-10, 2.9246e-06]], device='cuda:0')},\n",
       "   177: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([ 0.0008,  0.0027, -0.0036], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([1.0804e-04, 8.4614e-05, 1.6190e-04], device='cuda:0')},\n",
       "   178: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([[-5.9254e-04, -8.5921e-06, -2.4564e-05, -5.8382e-04,  4.6026e-05,\n",
       "             -3.4887e-04, -7.3189e-04, -1.5365e-03, -9.5068e-05, -3.4917e-05,\n",
       "             -3.3265e-05, -5.9986e-05, -5.3330e-04, -7.5162e-05, -7.8884e-06,\n",
       "             -1.2712e-04, -9.3416e-06, -4.4288e-04, -2.2255e-04, -4.6515e-05,\n",
       "             -1.6193e-04, -1.4448e-05, -6.0138e-04,  7.8483e-06, -7.8616e-04,\n",
       "             -2.9161e-04, -9.2140e-04, -4.8690e-08, -2.5351e-04, -4.1448e-05,\n",
       "             -1.7043e-04, -1.3519e-05,  2.3981e-05, -8.0924e-05, -5.9589e-04,\n",
       "              6.4842e-06, -1.4118e-04,  3.6022e-05, -3.9685e-05, -1.7809e-05,\n",
       "              2.2137e-05, -6.6725e-05,  3.3151e-05, -2.9403e-05,  4.2443e-05,\n",
       "             -5.6179e-04,  3.7244e-05,  5.8581e-06, -1.3642e-04, -6.7656e-05,\n",
       "             -1.8710e-05, -1.9044e-03, -5.2683e-04, -7.3857e-04,  5.4644e-05,\n",
       "             -1.6578e-05, -1.2988e-04, -1.1337e-03, -2.9160e-04, -3.1576e-05,\n",
       "              1.3483e-04, -2.6804e-04, -4.4739e-04,  1.7961e-06, -6.9105e-05,\n",
       "              6.3608e-05,  8.9217e-06, -1.9380e-04, -7.6406e-05, -7.4653e-05,\n",
       "              7.9841e-05,  2.0813e-06,  3.0586e-07, -1.2248e-05, -1.2742e-04,\n",
       "              4.9189e-05,  1.7501e-04, -1.1609e-04,  7.3696e-09, -2.9538e-04,\n",
       "             -8.3921e-05, -1.7610e-04, -6.9429e-05,  3.5451e-05, -6.0166e-04,\n",
       "              7.5746e-06, -3.4203e-04, -7.5817e-04, -2.0929e-05, -1.6047e-04,\n",
       "             -8.1316e-04, -2.1609e-04, -8.9348e-04, -1.0624e-04, -5.0980e-04,\n",
       "              6.0536e-05, -1.0514e-03, -1.6135e-06,  1.3889e-04, -2.6467e-06,\n",
       "             -1.5741e-05, -4.2279e-05, -2.7604e-04, -2.5668e-05, -3.2276e-04,\n",
       "              1.7703e-05, -1.2129e-05,  8.0991e-05, -3.2002e-05, -8.0815e-04,\n",
       "              1.0607e-05, -2.0196e-03, -6.7208e-04, -1.3115e-04, -6.2888e-04,\n",
       "             -5.8798e-04, -3.6705e-04, -5.2279e-04,  2.6132e-05, -3.5044e-04,\n",
       "             -2.6309e-04, -4.5965e-04,  2.5711e-06,  5.6845e-06, -8.0412e-09,\n",
       "              3.0571e-05, -3.0861e-04, -2.3026e-04]], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([[1.0443e-06, 1.9027e-07, 3.2593e-06, 8.4597e-04, 4.5420e-07, 1.1632e-05,\n",
       "             3.3224e-04, 1.3075e-04, 2.9592e-06, 1.1224e-04, 1.4079e-07, 1.1220e-07,\n",
       "             1.5312e-06, 8.0284e-07, 3.1782e-08, 1.4613e-05, 6.6482e-07, 4.7256e-05,\n",
       "             3.8435e-05, 1.0157e-08, 9.9741e-06, 4.2937e-09, 1.9552e-05, 6.9707e-08,\n",
       "             1.1636e-04, 1.4783e-05, 2.6877e-05, 1.0269e-07, 3.0711e-06, 9.3942e-07,\n",
       "             6.8511e-06, 1.0769e-07, 6.8703e-09, 7.1508e-06, 2.1870e-04, 1.4468e-08,\n",
       "             2.2074e-07, 9.4440e-09, 3.8861e-09, 1.3902e-09, 3.2782e-07, 1.2134e-05,\n",
       "             7.6481e-07, 2.5739e-05, 1.2830e-06, 5.8146e-06, 2.3878e-07, 2.5964e-08,\n",
       "             4.6507e-07, 1.3620e-06, 6.4404e-08, 4.3138e-05, 1.4112e-05, 1.2477e-05,\n",
       "             5.6148e-06, 3.8306e-08, 4.3859e-07, 5.0306e-05, 1.5148e-04, 4.6640e-07,\n",
       "             3.3381e-05, 1.8268e-05, 5.4341e-06, 7.6583e-08, 9.6448e-06, 1.3678e-05,\n",
       "             5.1404e-08, 1.6785e-06, 9.8618e-06, 4.8532e-06, 9.1957e-07, 2.3430e-07,\n",
       "             4.1729e-09, 9.5634e-07, 5.1929e-08, 8.9869e-08, 1.9677e-06, 8.4079e-05,\n",
       "             4.5239e-09, 2.1989e-04, 4.3616e-06, 8.0801e-05, 1.2095e-06, 2.0454e-08,\n",
       "             9.0358e-05, 1.7870e-08, 1.1687e-04, 2.7430e-04, 1.8697e-07, 1.3819e-04,\n",
       "             1.4051e-04, 4.8526e-05, 7.2144e-04, 1.9601e-07, 1.6218e-04, 3.9290e-08,\n",
       "             3.2058e-06, 9.5141e-08, 3.2023e-05, 2.2332e-04, 1.5952e-08, 1.3857e-06,\n",
       "             2.5320e-04, 2.2827e-07, 2.0456e-06, 3.4364e-08, 5.3118e-07, 2.1726e-07,\n",
       "             1.5765e-05, 3.1301e-05, 5.2048e-07, 1.9459e-04, 1.7632e-04, 3.2482e-06,\n",
       "             2.3768e-05, 3.6817e-05, 1.2521e-04, 1.0809e-05, 7.1679e-07, 6.2862e-08,\n",
       "             1.8580e-07, 2.0215e-05, 3.1813e-08, 8.3884e-08, 1.0293e-08, 7.6279e-08,\n",
       "             4.9655e-05, 3.3187e-07]], device='cuda:0')},\n",
       "   179: {'step': tensor(1080., device='cuda:0'),\n",
       "    'exp_avg': tensor([-0.0015], device='cuda:0'),\n",
       "    'exp_avg_sq': tensor([0.0003], device='cuda:0')}},\n",
       "  'param_groups': [{'lr': 0.000324,\n",
       "    'betas': (0.9, 0.999),\n",
       "    'eps': 1e-08,\n",
       "    'weight_decay': 0.01,\n",
       "    'amsgrad': False,\n",
       "    'foreach': None,\n",
       "    'maximize': False,\n",
       "    'capturable': False,\n",
       "    'differentiable': False,\n",
       "    'fused': None,\n",
       "    'params': [0,\n",
       "     1,\n",
       "     2,\n",
       "     3,\n",
       "     4,\n",
       "     5,\n",
       "     6,\n",
       "     7,\n",
       "     8,\n",
       "     9,\n",
       "     10,\n",
       "     11,\n",
       "     12,\n",
       "     13,\n",
       "     14,\n",
       "     15,\n",
       "     16,\n",
       "     17,\n",
       "     18,\n",
       "     19,\n",
       "     20,\n",
       "     21,\n",
       "     22,\n",
       "     23,\n",
       "     24,\n",
       "     25,\n",
       "     26,\n",
       "     27,\n",
       "     28,\n",
       "     29,\n",
       "     30,\n",
       "     31,\n",
       "     32,\n",
       "     33,\n",
       "     34,\n",
       "     35,\n",
       "     36,\n",
       "     37,\n",
       "     38,\n",
       "     39,\n",
       "     40,\n",
       "     41,\n",
       "     42,\n",
       "     43,\n",
       "     44,\n",
       "     45,\n",
       "     46,\n",
       "     47,\n",
       "     48,\n",
       "     49,\n",
       "     50,\n",
       "     51,\n",
       "     52,\n",
       "     53,\n",
       "     54,\n",
       "     55,\n",
       "     56,\n",
       "     57,\n",
       "     58,\n",
       "     59,\n",
       "     60,\n",
       "     61,\n",
       "     62,\n",
       "     63,\n",
       "     64,\n",
       "     65,\n",
       "     66,\n",
       "     67,\n",
       "     68,\n",
       "     69,\n",
       "     70,\n",
       "     71,\n",
       "     72,\n",
       "     73,\n",
       "     74,\n",
       "     75,\n",
       "     76,\n",
       "     77,\n",
       "     78,\n",
       "     79,\n",
       "     80,\n",
       "     81,\n",
       "     82,\n",
       "     83,\n",
       "     84,\n",
       "     85,\n",
       "     86,\n",
       "     87,\n",
       "     88,\n",
       "     89,\n",
       "     90,\n",
       "     91,\n",
       "     92,\n",
       "     93,\n",
       "     94,\n",
       "     95,\n",
       "     96,\n",
       "     97,\n",
       "     98,\n",
       "     99,\n",
       "     100,\n",
       "     101,\n",
       "     102,\n",
       "     103,\n",
       "     104,\n",
       "     105,\n",
       "     106,\n",
       "     107,\n",
       "     108,\n",
       "     109,\n",
       "     110,\n",
       "     111,\n",
       "     112,\n",
       "     113,\n",
       "     114,\n",
       "     115,\n",
       "     116,\n",
       "     117,\n",
       "     118,\n",
       "     119,\n",
       "     120,\n",
       "     121,\n",
       "     122,\n",
       "     123,\n",
       "     124,\n",
       "     125,\n",
       "     126,\n",
       "     127,\n",
       "     128,\n",
       "     129,\n",
       "     130,\n",
       "     131,\n",
       "     132,\n",
       "     133,\n",
       "     134,\n",
       "     135,\n",
       "     136,\n",
       "     137,\n",
       "     138,\n",
       "     139,\n",
       "     140,\n",
       "     141,\n",
       "     142,\n",
       "     143,\n",
       "     144,\n",
       "     145,\n",
       "     146,\n",
       "     147,\n",
       "     148,\n",
       "     149,\n",
       "     150,\n",
       "     151,\n",
       "     152,\n",
       "     153,\n",
       "     154,\n",
       "     155,\n",
       "     156,\n",
       "     157,\n",
       "     158,\n",
       "     159,\n",
       "     160,\n",
       "     161,\n",
       "     162,\n",
       "     163,\n",
       "     164,\n",
       "     165,\n",
       "     166,\n",
       "     167,\n",
       "     168,\n",
       "     169,\n",
       "     170,\n",
       "     171,\n",
       "     172,\n",
       "     173,\n",
       "     174,\n",
       "     175,\n",
       "     176,\n",
       "     177,\n",
       "     178,\n",
       "     179]}]},\n",
       " 'numpy_rng_state': ('MT19937',\n",
       "  array([2617177648, 2017220167, 1464876909, 3615162526, 1164401473,\n",
       "         2184564609, 2432713522, 3058447366, 1803679528, 1562658776,\n",
       "         2492933965, 3062955410, 1689853080,  252813941, 1894234661,\n",
       "          113582504,  462153458, 1009741748, 1887366836,   33711303,\n",
       "         4286407419,   49015167, 1389461379, 1916064777, 3202609405,\n",
       "          477623851, 4192862919, 1100156849, 1852789881, 3774079607,\n",
       "          283021180, 2690401734, 4156949535, 2955574470,  648902279,\n",
       "         2469424789, 2691568991, 2263183505,  350721173, 3375872317,\n",
       "          764623098, 4133155942, 3917816095,  536387565,  187709574,\n",
       "         2595586577,  947777192,  317993870, 2577919689, 2202798912,\n",
       "         2619721391, 3125765789, 3555222009, 2451762166, 1037666332,\n",
       "         2801996329, 2880743530, 1914301906, 1302135408, 4268093959,\n",
       "         2954492239, 3439304812, 4214605184, 4008444385, 3444632606,\n",
       "         1542252480, 4236830773, 2214151243, 3511286255, 1644874558,\n",
       "           72513335,  108933044, 3701872527,  985364240, 2156932893,\n",
       "         3863618248, 3164510440, 1136636251, 1213105164,  197435689,\n",
       "         1286160007,  510637731, 2371402437, 1806252603, 3365290055,\n",
       "         2933621967, 4126408742, 2724199221, 4284810028, 3767124821,\n",
       "         1008647268,  599814069, 2490780640, 2390644346, 3822371574,\n",
       "          553729228,  533206854, 2635464569, 3433721856, 2190969445,\n",
       "          707971156, 1935689620, 2840598901, 2071845879,  622609442,\n",
       "         2567433659, 1293712134, 3857868391, 1051568204,  781789802,\n",
       "          471979427, 1813305070,  577414227, 4016024324, 3546009681,\n",
       "         1898716329, 3685414380, 3502818716,  158905578, 4000501208,\n",
       "          119750826, 3768249350, 3804005894,  828321213, 1231725931,\n",
       "         3987621994, 4072758051, 3465547037, 2009160920, 3423840959,\n",
       "         3976496697,  325862281, 3555487517, 1330359807,  908626125,\n",
       "         4131490012, 1104016484, 3073707806,    7959924,  439907898,\n",
       "         2478673052,  313455389, 2273708435,  619490433, 3222119540,\n",
       "         4250181583, 2905075437, 1606067327,  780864474, 1994978940,\n",
       "          281626444, 1535406958,   19810352, 2433616773,  356008213,\n",
       "         1270365517, 3328743806, 1686888152,  886797742, 2206680778,\n",
       "         1244960570, 3862644183, 1091117091, 2504659951,  863059264,\n",
       "         2290884655, 2105460699, 3697359211, 3194655700,  790531573,\n",
       "         3020796823, 2759089846, 1193374272, 3540528812, 1792846558,\n",
       "          615997133, 3300361945, 2093445148, 1513813095, 1436807590,\n",
       "         3107668010, 2770086431,  218336198, 1993077985, 3751263902,\n",
       "          138162464, 3914008998,  907582125, 3331841814, 2614104980,\n",
       "          407386154,  936614366, 1457354564, 2014294260, 1041615487,\n",
       "          216417364,  164082102, 1700882773, 1659096913, 3517208942,\n",
       "         4139870440,  238290591, 1535704786, 1118499112, 3560849055,\n",
       "         1541910996,  611892026, 2662680408, 3230339186,  906110414,\n",
       "         2223501338, 3441800119,  336250843, 3395798899, 2034797645,\n",
       "         4199489061, 1485184975, 1470135467, 3356465525, 4054849733,\n",
       "         2500224912, 1841461009, 1564140751, 1503081818,  656410286,\n",
       "           93144801, 2527774976, 2937168609, 4022302316, 1916031957,\n",
       "          147274726, 1513833259, 4245068901, 2991459000, 1822061996,\n",
       "         2723936459, 1272538922,  635001528, 2484437460,  563291220,\n",
       "         1403552683, 1136952850,  390345778, 1537857934,  811096156,\n",
       "         1021638034,  293014145,  327234438, 3061051335, 1446915784,\n",
       "          426482589, 2199890447, 1234341089,  260127671, 3816769129,\n",
       "         1450474340, 3495205463, 3227099352, 1666266407, 2159104278,\n",
       "          279520557,  552498648,  392640070, 2529900172, 3773191557,\n",
       "         3384711455, 2062663111, 3798340854,  750446366, 3979767040,\n",
       "         1362611745, 3613529381, 4208503259, 1306049881, 2565368606,\n",
       "          557432350, 3858051110, 2931336654, 1193033834, 2050896750,\n",
       "         3293831088, 1870958245,  869876015, 1679489885, 1397851637,\n",
       "         2963360919, 2411898243, 3961243659, 2057877803, 1811579308,\n",
       "         3499040505,  153991434, 1167694666, 3909407461,  565323076,\n",
       "          358650791,  247979684, 3098503769, 2937329442, 1754927626,\n",
       "          299841275, 3978844233, 2153539229, 3022913085, 2899627520,\n",
       "         2748504787, 3610172303, 1413309901, 2144581376,  621799033,\n",
       "          882515899, 1471686248, 2081683367, 2578259803,  398110028,\n",
       "          165831666, 3229720818,  342796376,  452002023, 1420432749,\n",
       "         3569101247, 3466046401, 2542949939,  645989617, 2463556997,\n",
       "         2589281379, 4250249076, 2097056622, 2071003154, 2772894694,\n",
       "          933819209,   33706386, 2118170596, 4141627197, 1809654556,\n",
       "         3427768287, 3792701203, 2168124091, 2329983254, 1628871985,\n",
       "         2007816319,  912968651, 2732004214, 1200812821, 2201737432,\n",
       "          815333230, 1728488688, 2177389144,  914542118, 2187315561,\n",
       "          854666672, 3798588025, 4101337846, 2575702736, 2265256563,\n",
       "          820270787, 2213462589, 3437687559, 2228249585, 1066951304,\n",
       "         1798736981, 3129469041, 4098537852,  564000986, 1176914133,\n",
       "          537863534, 3694024248, 3025057161, 3859585039,  173287298,\n",
       "         3690045264,  507629489, 2222498428, 1146801403, 1921419120,\n",
       "         4084416163, 3817061943, 2247772940,  444880805, 3214153555,\n",
       "          615518147, 4019471266,  157879423, 3476330532, 2898902730,\n",
       "         2029398135,  856302763, 2085790242, 1166038532, 1997997601,\n",
       "         1697516559, 3411299012, 3805025912, 2994124032, 3521587974,\n",
       "         3531588847,   66316375, 2231931198, 3842527177, 3539946351,\n",
       "         3418436290, 2809065455, 1880118833, 2115811131,    1976858,\n",
       "         3443643959, 3810782594,  673266545, 3998489184, 2730680884,\n",
       "         3751058274, 1045202140, 1773560517, 4006480687, 3649567686,\n",
       "         2830237558, 4264364462,  159273256,  449296049, 2156535162,\n",
       "          914675072, 2829946700, 2338546069, 4287150744, 3400826112,\n",
       "          594652551,  675557490, 2571727374, 2089822132, 1043791488,\n",
       "         3090870572,  678106235, 1776005391, 3968568714, 3978101867,\n",
       "          169043939, 3563063140, 2818292570, 1017290836,  500150216,\n",
       "          690351249, 3099978291, 2262133236, 2049838569, 4050132763,\n",
       "         3557138995, 2758691448,  596134991, 2724613989, 2680008939,\n",
       "           30833568,  713044208, 3878753205, 2743634366, 4022567392,\n",
       "         1305615136,   77663703,  488406969, 3478526714,  606380862,\n",
       "          208035976, 1588596394,  611222698, 2835896714, 1133657556,\n",
       "         3219768572, 2838955260, 1257147891, 3441338944, 3581885858,\n",
       "          343986260, 2076398545, 3276571738, 2651087169,  801064243,\n",
       "         4147885121,  342108797, 3516907570,   39253554,  574863862,\n",
       "          664276969,  873930082,  807557833, 1999108813, 2900605981,\n",
       "         1072264771, 3473551806, 1962087489, 4099036772,   37225437,\n",
       "         4003560013, 1088504251,  827031602,  593559354, 2475107796,\n",
       "         2439821561, 2545826031,   18012944, 4028839039, 3071793482,\n",
       "         3864113816, 2102135505, 2120990335,  287649655, 2504905676,\n",
       "         1404526437, 2322877496, 2203237823, 1571159467, 2713200213,\n",
       "         1797361797, 4139302093, 3323461801,    8056600, 3126231677,\n",
       "          965270133,  770090255, 3491186045, 3548503330, 3673730698,\n",
       "         4145003001,   28453440, 4135696762, 3012096530,  841841475,\n",
       "          313884152,  742283181, 4279900111,  895046728, 3334880339,\n",
       "          168076440, 3130365919,  675190583, 1294651296, 1927632742,\n",
       "         1550365741,  952283011, 1758619927, 2516270583, 2498860508,\n",
       "         1238400414, 3786924698,  590745861, 1610476831, 4152990383,\n",
       "          623135750, 2109415175, 3930534394, 3551035905, 3682238343,\n",
       "          272379615,  492623470, 3279502872, 3713587517, 2664217248,\n",
       "         3862893285, 3212469588, 1978982905, 1267227739,  224180382,\n",
       "         1661214933, 1341789615, 1945423912, 1404645043, 3934170080,\n",
       "         2676424524,  431606061, 4041922616, 1895429935, 3014667061,\n",
       "          188724211, 1679948699,  823831363, 1600696018, 3403620652,\n",
       "          910330720, 1420157251, 3871222364, 3720909411, 3253157786,\n",
       "          985626369, 1335571366, 2869448747, 3948910970,  890872504,\n",
       "         3950469330, 4035998268, 2537675099, 1946544619, 1960961068,\n",
       "         3925974546, 2781757516,  965363404, 3479530663,  144097885,\n",
       "         3013785023, 1149320456, 2727512911, 1804643731, 3671649738,\n",
       "         3237929279,  637007973, 2311540636,  743412706, 2851490031,\n",
       "         3870975936,  678317277, 3792989046,  771636568, 2718527100,\n",
       "          643297387, 2210241152, 2255226437,  186914393, 2433017520,\n",
       "         1495362707, 1390220348, 3962705664, 2469734158, 3786158070,\n",
       "         1107074642, 2818905224, 3787699856,  235705167], dtype=uint32),\n",
       "  223,\n",
       "  0,\n",
       "  0.0),\n",
       " 'torch_rng_state': tensor([ 69, 202,  22,  ...,   0,   0,   0], dtype=torch.uint8),\n",
       " 'torch_cuda_rng_state': tensor([ 93, 226, 243, 166,  67, 141,  10,   0,  72,  74,  18,   0,   0,   0,\n",
       "           0,   0], dtype=torch.uint8)}"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "checkpoint"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
