{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "2955d0f927ac4732",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-21T11:28:02.808607Z",
     "start_time": "2025-09-21T11:28:02.805124Z"
    }
   },
   "outputs": [],
   "source": [
    "device = \"cuda:0\""
   ]
  },
  {
   "cell_type": "markdown",
   "id": "532e6d5a5bfb58d7",
   "metadata": {},
   "source": [
    "### Preliminaries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "dcda88145fb79c1c",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-21T11:28:05.116607Z",
     "start_time": "2025-09-21T11:28:02.899531Z"
    }
   },
   "outputs": [],
   "source": [
    "import itertools\n",
    "import random\n",
    "import collections\n",
    "\n",
    "\n",
    "import transformers\n",
    "import torch\n",
    "import tqdm.auto\n",
    "from torch import Tensor"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "614f8a6b0c65d853",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-21T11:28:05.407538Z",
     "start_time": "2025-09-21T11:28:05.400176Z"
    }
   },
   "outputs": [],
   "source": [
    "def sinusoidal_encode(\n",
    "    x: Tensor,\n",
    "    embedding_dim: int,\n",
    "    min_value: int,\n",
    "    max_value: int,\n",
    "    use_l2_norm: bool = False,\n",
    "    norm_const: float | None = None,\n",
    ") -> Tensor:\n",
    "    \"\"\"\n",
    "    Encodes a tensor of numbers into a sinusoidal representation, inspired by how absolute positional\n",
    "    encoding works in transformers.\n",
    "\n",
    "    The encoding is an evaluation of a sine and cosine function at different frequencies, where the\n",
    "    frequency is determined by the embedding dimension and the allowed range of the input values.\n",
    "\n",
    "    >>> sinusoidal_encode(\n",
    "    ...     torch.tensor([-5, 2, 1, 0]),\n",
    "    ...     embedding_dim=6,\n",
    "    ...     min_value=-5,\n",
    "    ...     max_value=5,\n",
    "    ... )\n",
    "    tensor([[ 0.0000,  1.0000,  0.0000,  1.0000,  0.0000,  1.0000],\n",
    "            [ 0.6570,  0.7539, -0.1073, -0.9942,  0.9980,  0.0627],\n",
    "            [-0.2794,  0.9602,  0.3491, -0.9371,  0.9616,  0.2746],\n",
    "            [-0.9589,  0.2837,  0.7317, -0.6816,  0.8806,  0.4738]])\n",
    "    \"\"\"\n",
    "\n",
    "    if embedding_dim % 2 != 0 and not use_l2_norm:\n",
    "        raise ValueError(\"Embedding dimension must be even\")\n",
    "\n",
    "    if use_l2_norm:\n",
    "        if embedding_dim % 2 == 0:\n",
    "            reserved_dim = 2\n",
    "        else:\n",
    "            reserved_dim = 1\n",
    "        embedding_dim -= reserved_dim\n",
    "    else:\n",
    "        reserved_dim = 0  # will not be used\n",
    "\n",
    "    domain = max_value - min_value\n",
    "    y_shape = x.shape + (embedding_dim,)\n",
    "    y = torch.zeros(y_shape, device=x.device)\n",
    "    even_indices = torch.arange(0, embedding_dim, 2)\n",
    "    log_term = torch.log(torch.tensor(domain)) / embedding_dim\n",
    "    div_term = torch.exp(even_indices * -log_term)\n",
    "    x = x - min_value\n",
    "    values = x.unsqueeze(-1).float() * div_term\n",
    "    y[..., 0::2] = torch.sin(values)\n",
    "    y[..., 1::2] = torch.cos(values)\n",
    "\n",
    "    if use_l2_norm:\n",
    "        y = torch.cat([y, torch.ones_like(y[..., :reserved_dim])], dim=-1)\n",
    "        y /= y.norm(dim=-1, keepdim=True, p=2)\n",
    "\n",
    "    if norm_const is not None:\n",
    "        y *= norm_const\n",
    "\n",
    "    return y\n",
    "\n",
    "\n",
    "def binary_encode(\n",
    "    x: Tensor,\n",
    "    embedding_dim: int,\n",
    "    min_value: int | float,\n",
    "    max_value: int | float,\n",
    "    use_l2_norm: bool = False,\n",
    "    norm_const: float | None = None,\n",
    ") -> Tensor:\n",
    "    y = torch.zeros(x.shape + (embedding_dim,), device=x.device)\n",
    "    reserve_dim = 0 if not use_l2_norm else 1\n",
    "    x = x - min_value\n",
    "    maximum = x.max()\n",
    "    for i in range(embedding_dim - reserve_dim):\n",
    "        coeff = 2**i\n",
    "        if maximum < coeff:\n",
    "            break\n",
    "        y[..., -i - 1] = torch.floor(x / coeff) % 2\n",
    "        x = x - coeff * y[..., -i - 1]\n",
    "    if use_l2_norm:\n",
    "        y = torch.cat([y, torch.ones_like(y[..., :reserve_dim])], dim=-1)\n",
    "        y /= y.norm(dim=-1, keepdim=True, p=2)\n",
    "    if norm_const is not None:\n",
    "        y *= norm_const\n",
    "    return y"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d0d14df112e1e26c",
   "metadata": {},
   "source": [
    "### Prepare model and data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "25e209717977de2b",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-21T11:29:10.072056Z",
     "start_time": "2025-09-21T11:28:05.498235Z"
    }
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a0e1bc69dbe34ba4bc433f061451f634",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "model_ckpt = \"meta-llama/Llama-3.2-3B\"\n",
    "model = transformers.AutoModel.from_pretrained(model_ckpt).eval()\n",
    "tokenizer = transformers.AutoTokenizer.from_pretrained(model_ckpt)\n",
    "model = model.half().to(device).eval()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "98e6df87a1182d14",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-21T11:29:10.951145Z",
     "start_time": "2025-09-21T11:29:10.945681Z"
    }
   },
   "outputs": [],
   "source": [
    "all_values = torch.arange(0, 1000)\n",
    "mask = torch.rand(len(all_values), generator=torch.Generator().manual_seed(0))\n",
    "train_mask = mask < 0.9\n",
    "valid_mask = ~train_mask & (mask < 0.95)\n",
    "test_mask = ~train_mask & ~valid_mask\n",
    "\n",
    "train_values = all_values[train_mask]\n",
    "valid_values = all_values[valid_mask]\n",
    "test_values = all_values[test_mask]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "f00e5e4134d983a7",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-21T11:29:11.670092Z",
     "start_time": "2025-09-21T11:29:11.073016Z"
    }
   },
   "outputs": [],
   "source": [
    "all_inputs = [(x1, x2) for x1, x2 in itertools.product(all_values.tolist(), repeat=2) if x1 * x2 < 1000]\n",
    "train_values_set = set(train_values.tolist())\n",
    "valid_values_set = set(valid_values.tolist())\n",
    "test_values_set = set(test_values.tolist())\n",
    "\n",
    "all_inputs_add = [(x1, x2) for x1, x2 in itertools.product(all_values.tolist(), repeat=2) if x1 + x2 < 1000]\n",
    "train_values_set = set(train_values.tolist())\n",
    "valid_values_set = set(valid_values.tolist())\n",
    "test_values_set = set(test_values.tolist())\n",
    "\n",
    "train_inputs = [(x1, x2) for x1, x2 in all_inputs if x1 * x2 in train_values_set]\n",
    "train_inputs_add = [(x1, x2) for x1, x2 in all_inputs_add if x1 + x2 in train_values_set]\n",
    "valid_inputs = [(x1, x2) for x1, x2 in all_inputs if x1 * x2 in valid_values_set]\n",
    "valid_inputs_add = [(x1, x2) for x1, x2 in all_inputs_add if x1 + x2 in valid_values_set]\n",
    "test_inputs = [(x1, x2) for x1, x2 in all_inputs if x1 * x2 in test_values_set]\n",
    "test_inputs_add = [(x1, x2) for x1, x2 in all_inputs_add if x1 + x2 in test_values_set]\n",
    "\n",
    "# sanity check\n",
    "assert set(train_inputs) & set(valid_inputs) == set()\n",
    "assert set(train_inputs) & set(test_inputs) == set()\n",
    "assert set(valid_inputs) & set(test_inputs) == set()\n",
    "\n",
    "assert set(train_inputs_add) & set(valid_inputs_add) == set()\n",
    "assert set(train_inputs_add) & set(test_inputs_add) == set()\n",
    "assert set(valid_inputs_add) & set(test_inputs_add) == set()\n",
    "\n",
    "random.seed(0)\n",
    "random.shuffle(train_inputs)\n",
    "random.shuffle(valid_inputs)\n",
    "random.shuffle(test_inputs)\n",
    "\n",
    "random.shuffle(train_inputs_add)\n",
    "random.shuffle(valid_inputs_add)\n",
    "random.shuffle(test_inputs_add)\n",
    "\n",
    "valid_size = 4096\n",
    "train_size = 50_000  # TODO: change back to 100_000\n",
    "train_inputs = train_inputs[:train_size]\n",
    "train_inputs_add = train_inputs_add[:train_size]\n",
    "valid_inputs = valid_inputs[:valid_size]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 154,
   "id": "41e33bc2ed4f3ecd",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-21T11:29:11.804367Z",
     "start_time": "2025-09-21T11:29:11.797035Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "('3 multiplied by 500 is ', '3 multiplied by 0 is ')"
      ]
     },
     "execution_count": 154,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "num_templates = 6\n",
    "\n",
    "def make_str_input(operands: tuple[int, int] | list[int], template_idx: int = 1) -> str:\n",
    "    x1, x2 = operands\n",
    "    options = [\n",
    "        f\"{x1} times {x2} is \",\n",
    "        f\"{x1} multiplied by {x2} is \",\n",
    "        f\"{x1} multiplied by {x2} equals to \",\n",
    "        f\"{x1} * {x2} = \",\n",
    "        f\"A multiplication of {x1} and {x2} equals to \",\n",
    "        f\"A result of multiplying {x1} and {x2} is \",\n",
    "    ]\n",
    "    assert num_templates == len(options)\n",
    "    # return f\"{x1} times {x2} is \"  # 0.78\n",
    "    # return f\"{x1} multiplied by {x2} is \"  # 90.38\n",
    "    return options[template_idx]\n",
    "\n",
    "make_str_input((3, 500)), make_str_input((3, 0))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 155,
   "id": "4d58550541c86d6",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-21T11:29:12.005705Z",
     "start_time": "2025-09-21T11:29:11.997652Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "('3 plus 500 is equal to ', '3 plus 0 is equal to ')"
      ]
     },
     "execution_count": 155,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def make_str_input_add(operands: tuple[int, int] | list[int]) -> str:\n",
    "    x1, x2 = operands\n",
    "    return f\"{x1} plus {x2} is equal to \"  # TODO: maybe switch back\n",
    "\n",
    "make_str_input_add((3, 500)), make_str_input_add((3, 0))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 156,
   "id": "7f27bc52-c899-464d-96ac-6a55d1424674",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[128000, 18, 5636, 220, 2636, 374, 6273, 311, 220]"
      ]
     },
     "execution_count": 156,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tokenizer('3 plus 500 is equal to ').input_ids"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 157,
   "id": "97d1f38e76bac3aa",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-21T11:29:12.181502Z",
     "start_time": "2025-09-21T11:29:12.157668Z"
    }
   },
   "outputs": [],
   "source": [
    "def get_hidden_states_and_preds(model, str_inputs: list[str], batch_size: int) -> tuple[dict[int, Tensor], list[str]]:\n",
    "    model.eval()\n",
    "    hidden_states = collections.defaultdict(list)\n",
    "    model_preds = []\n",
    "    with torch.no_grad():\n",
    "        num_batches = (len(str_inputs) + batch_size - 1) // batch_size\n",
    "        for batch_str in tqdm.auto.tqdm(itertools.batched(str_inputs, n=batch_size), total=num_batches, desc=\"Inferring model hidden states\"):\n",
    "            batch_inputs = tokenizer(batch_str, return_tensors=\"pt\")\n",
    "            model_outputs = model(**batch_inputs.to(model.device), output_hidden_states=True)\n",
    "            hidden_reprs = model_outputs.hidden_states\n",
    "            logits = model_outputs.last_hidden_state @ model.embed_tokens.weight.T\n",
    "            next_token_ids = logits[:, -1, :].argmax(dim=-1)\n",
    "            model_preds.extend(tokenizer.batch_decode(next_token_ids))\n",
    "\n",
    "            for layer_idx, hidden_state in enumerate(hidden_reprs):\n",
    "                hidden_states[layer_idx].extend(hidden_state[:, -1, :].detach().cpu())\n",
    "    return {k: torch.stack(v) for k, v in hidden_states.items()}, model_preds"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 159,
   "id": "8f26e3c9f4c6e5bd",
   "metadata": {
    "ExecuteTime": {
     "start_time": "2025-09-21T11:29:12.361522Z"
    },
    "jupyter": {
     "is_executing": true
    }
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "7f716436654b4b53a405f9d1e5806a7c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Inferring model hidden states:   0%|          | 0/9 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "98504061d8d94e278df34bcc74205c99",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Inferring model hidden states:   0%|          | 0/9 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "2321a3f394e446f1817ffabd192bb389",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Inferring model hidden states:   0%|          | 0/9 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "09d8e1340b09435cb980afe077deafcc",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Inferring model hidden states:   0%|          | 0/9 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "6b3bab791fa043279819db0134da8efa",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Inferring model hidden states:   0%|          | 0/9 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "fc0f487289684badaaead58ca14a7c57",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Inferring model hidden states:   0%|          | 0/9 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "batch_size = 1024\n",
    "# train_hidden_states, train_preds = get_hidden_states_and_preds(\n",
    "#         model,\n",
    "#         [make_str_input(val, 0) for val in train_inputs] + [make_str_input(val, 1) for val in train_inputs] + [make_str_input(val, 2) for val in train_inputs],\n",
    "#         batch_size\n",
    "# )\n",
    "states_preds = [get_hidden_states_and_preds(model, [make_str_input(val, i) for val in train_inputs], batch_size) for i in range(num_templates)]\n",
    "\n",
    "hidden_states_all = [x[0] for x in states_preds]\n",
    "preds_all = [x[1] for x in states_preds]\n",
    "\n",
    "train_hidden_states = {k: torch.concat([hidden_states_all[i][k] for i in range(num_templates)]) for k in hidden_states_all[0].keys()}\n",
    "train_preds = list(itertools.chain(*preds_all))\n",
    "\n",
    "# train_hidden_states, train_preds = get_hidden_states_and_preds(\n",
    "#         model,\n",
    "#         [make_str_input(val) for val in train_inputs],\n",
    "#         batch_size\n",
    "# )\n",
    "# train_hidden_states, train_preds = get_hidden_states_and_preds(\n",
    "#         model,\n",
    "#         [make_str_input_add(val) for val in train_inputs_add],\n",
    "#         batch_size\n",
    "# )\n",
    "# valid_hidden_states, valid_preds = get_hidden_states_and_preds(\n",
    "#         model,\n",
    "#         [make_str_input(val) for val in valid_inputs],\n",
    "#         batch_size\n",
    "# )\n",
    "# test_hidden_states, test_preds = get_hidden_states_and_preds(\n",
    "#         model,\n",
    "#         [make_str_input(val) for val in test_inputs],\n",
    "#         batch_size\n",
    "# )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 160,
   "id": "5264b095f386b894",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "78bc1a78c19149548e3668ce6e1c413b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Inferring model hidden states:   0%|          | 0/1 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "9b90a6c91a914abeaca8144580183a9d",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Inferring model hidden states:   0%|          | 0/1 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "valid_hidden_states, valid_preds = get_hidden_states_and_preds(\n",
    "        model,\n",
    "        [make_str_input(val) for val in valid_inputs],\n",
    "        batch_size\n",
    ")\n",
    "test_hidden_states, test_preds = get_hidden_states_and_preds(\n",
    "        model,\n",
    "        [make_str_input(val) for val in test_inputs],\n",
    "        batch_size\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 161,
   "id": "a987d1e1dd401fe2",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-21T11:27:59.189652Z",
     "start_time": "2025-09-20T20:50:09.537840Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([760, 829, 560,  ...,   0, 368, 211])"
      ]
     },
     "execution_count": 161,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_inputs_t = torch.tensor(train_inputs)\n",
    "\n",
    "train_inputs_t[:, 0] * train_inputs_t[:, 1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 162,
   "id": "598f9a35335acb13",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-20T20:50:09.666918Z",
     "start_time": "2025-09-20T20:50:09.639124Z"
    }
   },
   "outputs": [],
   "source": [
    "def sanitize_pred(pred: str) -> int:\n",
    "    try:\n",
    "        return int(pred)\n",
    "    except ValueError:\n",
    "        return -1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 163,
   "id": "fbb0c06e4e6a5d91",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-20T20:50:09.864195Z",
     "start_time": "2025-09-20T20:50:09.769493Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(1.)"
      ]
     },
     "execution_count": 163,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_inputs_t = torch.tensor(test_inputs)\n",
    "\n",
    "train_preds_t = torch.tensor([sanitize_pred(pred) for pred in train_preds])\n",
    "valid_preds_t = torch.tensor([sanitize_pred(pred) for pred in valid_preds])\n",
    "test_preds_t = torch.tensor([sanitize_pred(pred) for pred in test_preds])\n",
    "\n",
    "# ratio of properly extracted train predictions\n",
    "sum(train_preds_t != -1) / len(train_preds_t)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 164,
   "id": "40311667-9942-46fc-8e54-8b886b4a2bdf",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(0.9038)"
      ]
     },
     "execution_count": 164,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# absolute model accuracy on test set\n",
    "test_labels_ref = torch.tensor([x1 * x2 for x1, x2 in test_inputs])\n",
    "sum(test_preds_t == test_labels_ref) / len(test_inputs)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e246be877b3eee1c",
   "metadata": {},
   "source": [
    "### Probing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 165,
   "id": "95b127e4fa0b35a1",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-20T20:50:10.150920Z",
     "start_time": "2025-09-20T20:50:10.091541Z"
    }
   },
   "outputs": [],
   "source": [
    "basis_embs_sin = sinusoidal_encode(\n",
    "    torch.arange(1000),\n",
    "    min_value=0,\n",
    "    max_value=1000,\n",
    "    embedding_dim=train_hidden_states[0].shape[-1],\n",
    ")\n",
    "\n",
    "\n",
    "basis_embs_bin = binary_encode(\n",
    "    torch.arange(1000),\n",
    "    min_value=0,\n",
    "    max_value=1000,\n",
    "    embedding_dim=10,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 166,
   "id": "364edd5b36dfd13b",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-20T20:50:10.258990Z",
     "start_time": "2025-09-20T20:50:10.241575Z"
    }
   },
   "outputs": [],
   "source": [
    "class ClassifierProbe(torch.nn.Module):\n",
    "    def __init__(self, emb_dim: int, hidden_dim: int, basis: torch.Tensor, heldout_mask: torch.Tensor):\n",
    "        super().__init__()\n",
    "        self.emb_to_latent = torch.nn.Linear(emb_dim, hidden_dim, bias=True)\n",
    "        self.basis_to_latent = torch.nn.Linear(basis.shape[-1], hidden_dim, bias=True)\n",
    "        self.basis: torch.nn.Buffer\n",
    "        self.heldout_mask: torch.nn.Buffer\n",
    "        self.register_buffer(\"basis\", basis)\n",
    "        self.register_buffer(\"heldout_mask\", heldout_mask)\n",
    "    def forward(self, x: Tensor, holdout_eval_tokens: bool) -> Tensor:\n",
    "        latent_x = self.emb_to_latent(x)\n",
    "        # during training, model learns to choose among only training tokens\n",
    "        # but during eval, model must choose among all tokens\n",
    "        # this means that the model is never exposed to the eval tokens during training\n",
    "        latent_choices = self.basis_to_latent(self.basis)\n",
    "        logits = latent_x @ latent_choices.T\n",
    "        if holdout_eval_tokens:\n",
    "            logits[:, self.heldout_mask] = float(\"-inf\")\n",
    "        return logits"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 167,
   "id": "708ffb5bef3dbd49",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-20T20:50:10.438923Z",
     "start_time": "2025-09-20T20:50:10.433709Z"
    }
   },
   "outputs": [],
   "source": [
    "# train_labels_ref = torch.tensor([x1 * x2 for x1, x2 in train_inputs])\n",
    "# valid_labels_ref = torch.tensor([x1 * x2 for x1, x2 in valid_inputs]).to(device)\n",
    "# test_labels_ref = torch.tensor([x1 * x2 for x1, x2 in test_inputs]).to(device)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 168,
   "id": "4451d0a344301b40",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-20T20:50:10.610651Z",
     "start_time": "2025-09-20T20:50:10.603253Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "torch.Size([49158, 3072])"
      ]
     },
     "execution_count": 168,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_hidden_states[0].shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 169,
   "id": "c810d88ec56cede3",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-21T11:27:59.203923Z",
     "start_time": "2025-09-20T20:50:10.760038Z"
    }
   },
   "outputs": [],
   "source": [
    "# train_labels_ref = torch.tensor([x1 * x2 for x1, x2 in train_inputs] + [x1 + x2 for x1, x2 in train_inputs_add])\n",
    "train_labels_ref = torch.tensor([x1 + x2 for x1, x2 in train_inputs])\n",
    "\n",
    "valid_labels_ref = torch.tensor([x1 * x2 for x1, x2 in valid_inputs]).to(device)\n",
    "test_labels_ref = torch.tensor([x1 * x2 for x1, x2 in test_inputs]).to(device)\n",
    "\n",
    "train_labels = train_preds_t.detach().clone()\n",
    "train_hidden_states = {k: v[train_labels != -1] for k, v in train_hidden_states.items()}\n",
    "train_labels = train_labels[train_labels != -1]\n",
    "\n",
    "valid_labels = valid_preds_t.detach().clone()\n",
    "valid_hidden_states = {k: v[valid_labels != -1] for k, v in valid_hidden_states.items()}\n",
    "valid_labels = valid_labels[valid_labels != -1].to(device)\n",
    "\n",
    "test_labels = test_preds_t.detach().clone()\n",
    "test_hidden_states = {k: v[test_labels != -1] for k, v in test_hidden_states.items()}\n",
    "test_labels = test_labels[test_labels != -1].to(device)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 170,
   "id": "fff731bebf247070",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-21T11:27:59.205689Z",
     "start_time": "2025-09-20T20:50:11.267801Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(1.)"
      ]
     },
     "execution_count": 170,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# how many of the model outputs are valid numbers (=usable as labels in training)\n",
    "sum(train_preds_t != -1) / len(train_preds_t)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 171,
   "id": "485bac1a-a98f-4ec4-9c96-f62fac9b0718",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "torch.Size([49158, 3072])"
      ]
     },
     "execution_count": 171,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_hidden_states[0].shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 173,
   "id": "6e0936f779eacc40",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-21T11:27:59.217101Z",
     "start_time": "2025-09-21T10:45:01.531389Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "sin i=    0 train loss: 67.89  train acc: 0.00  val loss: 36.10  valid acc: 0.00\n",
      "sin i=  500 train loss:  5.23  train acc: 0.57  val loss:  3.34  valid acc: 0.12\n",
      "sin i= 1000 train loss:  4.06  train acc: 0.52  val loss:  2.70  valid acc: 0.30\n",
      "sin i= 1500 train loss:  3.37  train acc: 0.72  val loss:  2.71  valid acc: 0.27\n",
      "sin i= 2000 train loss:  3.76  train acc: 0.62  val loss:  2.91  valid acc: 0.23\n",
      "sin i= 2500 train loss:  3.65  train acc: 0.62  val loss:  2.61  valid acc: 0.32\n",
      "sin i= 3000 train loss:  4.23  train acc: 0.55  val loss:  2.45  valid acc: 0.33\n",
      "sin i= 3500 train loss: 40.06  train acc: 0.66  val loss: 15.25  valid acc: 0.29\n",
      "sin i= 4000 train loss: 13.45  train acc: 0.70  val loss:  4.53  valid acc: 0.35\n",
      "sin i= 4500 train loss:  7.01  train acc: 0.66  val loss:  2.57  valid acc: 0.47\n",
      "sin i= 5000 train loss:  4.58  train acc: 0.68  val loss:  2.44  valid acc: 0.44\n",
      "sin i= 5500 train loss:  3.55  train acc: 0.75  val loss:  2.45  valid acc: 0.39\n",
      "sin i= 6000 train loss:  3.13  train acc: 0.73  val loss:  2.06  valid acc: 0.47\n",
      "sin i= 6500 train loss:  2.91  train acc: 0.77  val loss:  2.10  valid acc: 0.48\n",
      "sin i= 7000 train loss:  3.30  train acc: 0.68  val loss:  1.99  valid acc: 0.51\n",
      "sin i= 7500 train loss:  3.12  train acc: 0.73  val loss:  2.09  valid acc: 0.50\n",
      "sin i= 8000 train loss:  3.05  train acc: 0.73  val loss:  2.37  valid acc: 0.42\n",
      "sin i= 8500 train loss:  3.13  train acc: 0.69  val loss:  2.02  valid acc: 0.48\n",
      "sin i= 9000 train loss:  3.40  train acc: 0.68  val loss:  2.10  valid acc: 0.44\n",
      "sin i= 9500 train loss:  3.34  train acc: 0.70  val loss:  1.99  valid acc: 0.52\n",
      "sin i=10000 train loss:  3.50  train acc: 0.59  val loss:  2.42  valid acc: 0.42\n",
      "sin i=10500 train loss:  3.44  train acc: 0.67  val loss:  2.19  valid acc: 0.48\n",
      "sin i=11000 train loss:  3.87  train acc: 0.64  val loss:  2.29  valid acc: 0.45\n",
      "sin i=11500 train loss: 31.39  train acc: 0.66  val loss:  8.62  valid acc: 0.33\n",
      "sin i=12000 train loss:  6.85  train acc: 0.73  val loss:  2.07  valid acc: 0.49\n",
      "sin i=12500 train loss:  3.62  train acc: 0.73  val loss:  2.03  valid acc: 0.51\n",
      "sin i=13000 train loss:  3.07  train acc: 0.70  val loss:  2.02  valid acc: 0.51\n",
      "sin i=13500 train loss:  2.76  train acc: 0.72  val loss:  2.26  valid acc: 0.42\n",
      "sin i=14000 train loss:  3.05  train acc: 0.71  val loss:  2.01  valid acc: 0.51\n",
      "sin i=14500 train loss:  2.75  train acc: 0.69  val loss:  2.15  valid acc: 0.46\n",
      "sin i=15000 train loss:  2.88  train acc: 0.70  val loss:  2.05  valid acc: 0.48\n",
      "sin i=15500 train loss:  2.76  train acc: 0.77  val loss:  2.06  valid acc: 0.52\n",
      "sin i=16000 train loss:  3.09  train acc: 0.68  val loss:  1.95  valid acc: 0.52\n",
      "sin i=16500 train loss:  2.86  train acc: 0.76  val loss:  2.21  valid acc: 0.48\n",
      "sin i=17000 train loss:  3.10  train acc: 0.69  val loss:  2.11  valid acc: 0.48\n",
      "sin i=17500 train loss:  2.97  train acc: 0.66  val loss:  2.20  valid acc: 0.46\n",
      "sin i=18000 train loss:  3.05  train acc: 0.69  val loss:  2.15  valid acc: 0.49\n",
      "sin i=18500 train loss:  3.04  train acc: 0.70  val loss:  1.92  valid acc: 0.57\n",
      "sin i=19000 train loss:  2.99  train acc: 0.74  val loss:  2.05  valid acc: 0.52\n",
      "sin i=19500 train loss:  2.74  train acc: 0.76  val loss:  2.16  valid acc: 0.47\n",
      "sin i=20000 train loss:  3.01  train acc: 0.70  val loss:  2.03  valid acc: 0.49\n",
      "sin i=20500 train loss:  2.87  train acc: 0.80  val loss:  1.94  valid acc: 0.55\n",
      "sin i=21000 train loss:  2.64  train acc: 0.79  val loss:  1.90  valid acc: 0.56\n",
      "sin i=21500 train loss:  2.91  train acc: 0.76  val loss:  1.81  valid acc: 0.58\n",
      "sin i=22000 train loss:  2.81  train acc: 0.72  val loss:  1.95  valid acc: 0.54\n",
      "sin i=22500 train loss:  2.89  train acc: 0.70  val loss:  1.84  valid acc: 0.58\n",
      "sin i=23000 train loss:  2.79  train acc: 0.71  val loss:  2.02  valid acc: 0.52\n",
      "sin i=23500 train loss:  2.50  train acc: 0.77  val loss:  1.95  valid acc: 0.54\n",
      "sin i=24000 train loss:  2.66  train acc: 0.79  val loss:  1.89  valid acc: 0.54\n",
      "sin i=24500 train loss:  2.43  train acc: 0.81  val loss:  1.81  valid acc: 0.57\n",
      "sin i=25000 train loss:  2.37  train acc: 0.79  val loss:  1.84  valid acc: 0.60\n",
      "sin i=25500 train loss:  2.40  train acc: 0.77  val loss:  1.79  valid acc: 0.57\n",
      "sin i=26000 train loss:  2.62  train acc: 0.73  val loss:  1.86  valid acc: 0.56\n",
      "sin i=26500 train loss:  2.46  train acc: 0.76  val loss:  1.82  valid acc: 0.56\n",
      "sin i=27000 train loss:  2.80  train acc: 0.77  val loss:  1.79  valid acc: 0.61\n",
      "sin i=27500 train loss:  2.48  train acc: 0.75  val loss:  1.80  valid acc: 0.58\n",
      "sin i=28000 train loss:  2.45  train acc: 0.75  val loss:  1.80  valid acc: 0.58\n",
      "sin i=28500 train loss:  2.53  train acc: 0.74  val loss:  1.76  valid acc: 0.59\n",
      "sin i=29000 train loss:  2.23  train acc: 0.78  val loss:  1.74  valid acc: 0.61\n",
      "sin i=29500 train loss:  2.38  train acc: 0.77  val loss:  1.75  valid acc: 0.59\n",
      "sin i=30000 train loss:  2.23  train acc: 0.81  val loss:  1.76  valid acc: 0.61\n",
      "sin i=30500 train loss:  2.24  train acc: 0.82  val loss:  1.74  valid acc: 0.60\n",
      "sin i=31000 train loss:  2.23  train acc: 0.77  val loss:  1.75  valid acc: 0.61\n",
      "sin i=31500 train loss:  2.18  train acc: 0.80  val loss:  1.74  valid acc: 0.61\n",
      "sin i=32000 train loss:  2.14  train acc: 0.82  val loss:  1.74  valid acc: 0.61\n",
      "sin i=32500 train loss:  2.22  train acc: 0.85  val loss:  1.76  valid acc: 0.60\n",
      "sin i=33000 train loss:  2.15  train acc: 0.79  val loss:  1.75  valid acc: 0.61\n",
      "sin i=33500 train loss:  2.24  train acc: 0.80  val loss:  1.74  valid acc: 0.61\n",
      "sin i=34000 train loss:  2.37  train acc: 0.71  val loss:  1.76  valid acc: 0.60\n",
      "sin i=34500 train loss:  2.22  train acc: 0.80  val loss:  1.76  valid acc: 0.62\n",
      "sin i=35000 train loss:  2.24  train acc: 0.73  val loss:  1.77  valid acc: 0.61\n",
      "sin i=35500 train loss:  2.36  train acc: 0.80  val loss:  1.74  valid acc: 0.61\n",
      "sin i=36000 train loss:  2.26  train acc: 0.79  val loss:  1.72  valid acc: 0.63\n",
      "sin i=36500 train loss:  2.08  train acc: 0.78  val loss:  1.73  valid acc: 0.63\n",
      "sin i=37000 train loss:  2.10  train acc: 0.80  val loss:  1.75  valid acc: 0.61\n",
      "sin i=37500 train loss:  1.93  train acc: 0.86  val loss:  1.76  valid acc: 0.61\n",
      "sin i=38000 train loss:  2.17  train acc: 0.78  val loss:  1.74  valid acc: 0.61\n",
      "sin i=38500 train loss:  2.12  train acc: 0.82  val loss:  1.76  valid acc: 0.61\n",
      "sin i=39000 train loss:  2.42  train acc: 0.74  val loss:  1.75  valid acc: 0.60\n",
      "sin i=39500 train loss:  2.10  train acc: 0.82  val loss:  1.76  valid acc: 0.61\n",
      "sin i=40000 train loss:  2.24  train acc: 0.81  val loss:  1.73  valid acc: 0.60\n",
      "sin i=40500 train loss:  2.25  train acc: 0.71  val loss:  1.75  valid acc: 0.61\n",
      "sin i=41000 train loss:  2.37  train acc: 0.72  val loss:  1.74  valid acc: 0.61\n",
      "sin i=41500 train loss:  2.26  train acc: 0.78  val loss:  1.73  valid acc: 0.60\n",
      "sin i=42000 train loss:  2.12  train acc: 0.84  val loss:  1.74  valid acc: 0.62\n",
      "sin i=42500 train loss:  2.25  train acc: 0.80  val loss:  1.74  valid acc: 0.61\n",
      "sin i=43000 train loss:  2.35  train acc: 0.73  val loss:  1.73  valid acc: 0.62\n",
      "sin i=43500 train loss:  2.33  train acc: 0.77  val loss:  1.74  valid acc: 0.61\n",
      "sin i=44000 train loss:  2.30  train acc: 0.73  val loss:  1.74  valid acc: 0.61\n",
      "sin i=44500 train loss:  2.26  train acc: 0.80  val loss:  1.76  valid acc: 0.61\n",
      "sin i=45000 train loss:  2.22  train acc: 0.80  val loss:  1.73  valid acc: 0.63\n",
      "sin i=45500 train loss:  2.41  train acc: 0.76  val loss:  1.74  valid acc: 0.61\n",
      "sin i=46000 train loss:  2.30  train acc: 0.79  val loss:  1.74  valid acc: 0.61\n",
      "sin i=46500 train loss:  2.51  train acc: 0.73  val loss:  1.73  valid acc: 0.62\n",
      "sin i=47000 train loss:  2.13  train acc: 0.78  val loss:  1.74  valid acc: 0.61\n",
      "sin i=47500 train loss:  2.35  train acc: 0.76  val loss:  1.72  valid acc: 0.61\n",
      "sin i=48000 train loss:  2.28  train acc: 0.78  val loss:  1.73  valid acc: 0.61\n",
      "sin i=48500 train loss:  2.17  train acc: 0.87  val loss:  1.72  valid acc: 0.60\n",
      "sin i=49000 train loss:  2.17  train acc: 0.73  val loss:  1.73  valid acc: 0.62\n",
      "sin i=49500 train loss:  2.17  train acc: 0.79  val loss:  1.74  valid acc: 0.60\n",
      "sin i=50000 train loss:  2.22  train acc: 0.81  val loss:  1.73  valid acc: 0.61\n",
      "-> sin layer idx: 28 , best valid accuracy: 0.63, test accuracy: 0.45\n",
      "sin i=    0 train loss: 63.41  train acc: 0.00  val loss: 27.39  valid acc: 0.00\n",
      "sin i=  500 train loss:  4.05  train acc: 0.45  val loss:  3.58  valid acc: 0.06\n",
      "sin i= 1000 train loss:  4.05  train acc: 0.48  val loss:  3.00  valid acc: 0.14\n",
      "sin i= 1500 train loss:  3.68  train acc: 0.66  val loss:  2.97  valid acc: 0.17\n",
      "sin i= 2000 train loss:  3.78  train acc: 0.61  val loss:  2.94  valid acc: 0.17\n",
      "sin i= 2500 train loss:  3.87  train acc: 0.57  val loss:  2.88  valid acc: 0.23\n",
      "sin i= 3000 train loss:  3.85  train acc: 0.58  val loss:  2.79  valid acc: 0.24\n",
      "sin i= 3500 train loss:  3.73  train acc: 0.67  val loss:  2.71  valid acc: 0.27\n",
      "sin i= 4000 train loss:  3.75  train acc: 0.64  val loss:  2.51  valid acc: 0.37\n",
      "sin i= 4500 train loss:  3.71  train acc: 0.62  val loss:  2.64  valid acc: 0.30\n",
      "sin i= 5000 train loss:  3.71  train acc: 0.64  val loss:  2.29  valid acc: 0.42\n",
      "sin i= 5500 train loss:  3.55  train acc: 0.71  val loss:  2.45  valid acc: 0.37\n",
      "sin i= 6000 train loss:  3.49  train acc: 0.69  val loss:  2.22  valid acc: 0.42\n",
      "sin i= 6500 train loss:  3.40  train acc: 0.77  val loss:  2.19  valid acc: 0.46\n",
      "sin i= 7000 train loss:  3.70  train acc: 0.60  val loss:  2.02  valid acc: 0.52\n",
      "sin i= 7500 train loss:  3.42  train acc: 0.67  val loss:  2.11  valid acc: 0.48\n",
      "sin i= 8000 train loss:  3.36  train acc: 0.70  val loss:  2.16  valid acc: 0.51\n",
      "sin i= 8500 train loss:  3.45  train acc: 0.64  val loss:  2.08  valid acc: 0.45\n",
      "sin i= 9000 train loss:  3.64  train acc: 0.66  val loss:  2.19  valid acc: 0.43\n",
      "sin i= 9500 train loss:  3.49  train acc: 0.68  val loss:  2.16  valid acc: 0.48\n",
      "sin i=10000 train loss:  3.60  train acc: 0.64  val loss:  2.31  valid acc: 0.41\n",
      "sin i=10500 train loss:  3.55  train acc: 0.64  val loss:  2.29  valid acc: 0.44\n",
      "sin i=11000 train loss:  3.95  train acc: 0.61  val loss:  2.13  valid acc: 0.47\n",
      "sin i=11500 train loss:  3.09  train acc: 0.76  val loss:  2.09  valid acc: 0.52\n",
      "sin i=12000 train loss:  3.28  train acc: 0.63  val loss:  2.06  valid acc: 0.52\n",
      "sin i=12500 train loss:  3.38  train acc: 0.65  val loss:  2.13  valid acc: 0.50\n",
      "sin i=13000 train loss:  3.17  train acc: 0.70  val loss:  2.17  valid acc: 0.50\n",
      "sin i=13500 train loss:  3.06  train acc: 0.75  val loss:  2.05  valid acc: 0.51\n",
      "sin i=14000 train loss:  3.53  train acc: 0.65  val loss:  2.05  valid acc: 0.50\n",
      "sin i=14500 train loss:  3.00  train acc: 0.73  val loss:  2.17  valid acc: 0.46\n",
      "sin i=15000 train loss:  3.10  train acc: 0.71  val loss:  2.04  valid acc: 0.54\n",
      "sin i=15500 train loss:  3.04  train acc: 0.70  val loss:  2.01  valid acc: 0.55\n",
      "sin i=16000 train loss:  3.15  train acc: 0.68  val loss:  2.10  valid acc: 0.51\n",
      "sin i=16500 train loss:  2.98  train acc: 0.75  val loss:  2.00  valid acc: 0.56\n",
      "sin i=17000 train loss:  3.06  train acc: 0.68  val loss:  2.06  valid acc: 0.49\n",
      "sin i=17500 train loss:  3.11  train acc: 0.63  val loss:  2.12  valid acc: 0.50\n",
      "sin i=18000 train loss:  3.15  train acc: 0.63  val loss:  2.14  valid acc: 0.47\n",
      "sin i=18500 train loss:  3.07  train acc: 0.70  val loss:  1.98  valid acc: 0.53\n",
      "sin i=19000 train loss:  3.07  train acc: 0.73  val loss:  2.04  valid acc: 0.49\n",
      "sin i=19500 train loss:  2.71  train acc: 0.80  val loss:  2.02  valid acc: 0.55\n",
      "sin i=20000 train loss:  2.88  train acc: 0.77  val loss:  2.00  valid acc: 0.53\n",
      "sin i=20500 train loss:  2.82  train acc: 0.78  val loss:  2.01  valid acc: 0.55\n",
      "sin i=21000 train loss:  2.69  train acc: 0.73  val loss:  1.94  valid acc: 0.51\n",
      "sin i=21500 train loss:  2.93  train acc: 0.70  val loss:  2.00  valid acc: 0.53\n",
      "sin i=22000 train loss:  2.83  train acc: 0.69  val loss:  1.97  valid acc: 0.57\n",
      "sin i=22500 train loss:  3.07  train acc: 0.64  val loss:  1.91  valid acc: 0.56\n",
      "sin i=23000 train loss:  2.82  train acc: 0.74  val loss:  2.07  valid acc: 0.47\n",
      "sin i=23500 train loss:  2.46  train acc: 0.80  val loss:  1.97  valid acc: 0.51\n",
      "sin i=24000 train loss:  2.78  train acc: 0.67  val loss:  1.99  valid acc: 0.55\n",
      "sin i=24500 train loss:  2.58  train acc: 0.74  val loss:  1.94  valid acc: 0.54\n",
      "sin i=25000 train loss:  2.42  train acc: 0.77  val loss:  1.94  valid acc: 0.56\n",
      "sin i=25500 train loss:  2.47  train acc: 0.80  val loss:  1.95  valid acc: 0.53\n",
      "sin i=26000 train loss:  2.70  train acc: 0.77  val loss:  2.04  valid acc: 0.50\n",
      "sin i=26500 train loss:  2.64  train acc: 0.72  val loss:  1.99  valid acc: 0.52\n",
      "sin i=27000 train loss:  2.89  train acc: 0.73  val loss:  1.90  valid acc: 0.58\n",
      "sin i=27500 train loss:  2.69  train acc: 0.74  val loss:  1.91  valid acc: 0.57\n",
      "sin i=28000 train loss:  2.61  train acc: 0.71  val loss:  1.93  valid acc: 0.54\n",
      "sin i=28500 train loss:  2.70  train acc: 0.77  val loss:  1.89  valid acc: 0.58\n",
      "sin i=29000 train loss:  2.38  train acc: 0.75  val loss:  1.88  valid acc: 0.58\n",
      "sin i=29500 train loss:  2.44  train acc: 0.77  val loss:  1.88  valid acc: 0.58\n",
      "sin i=30000 train loss:  2.43  train acc: 0.73  val loss:  1.90  valid acc: 0.56\n",
      "sin i=30500 train loss:  2.39  train acc: 0.80  val loss:  1.87  valid acc: 0.59\n",
      "sin i=31000 train loss:  2.48  train acc: 0.76  val loss:  1.89  valid acc: 0.58\n",
      "sin i=31500 train loss:  2.34  train acc: 0.78  val loss:  1.88  valid acc: 0.58\n",
      "sin i=32000 train loss:  2.34  train acc: 0.78  val loss:  1.87  valid acc: 0.59\n",
      "sin i=32500 train loss:  2.42  train acc: 0.79  val loss:  1.89  valid acc: 0.58\n",
      "sin i=33000 train loss:  2.32  train acc: 0.78  val loss:  1.88  valid acc: 0.58\n",
      "sin i=33500 train loss:  2.37  train acc: 0.79  val loss:  1.88  valid acc: 0.58\n",
      "sin i=34000 train loss:  2.53  train acc: 0.76  val loss:  1.89  valid acc: 0.57\n",
      "sin i=34500 train loss:  2.36  train acc: 0.79  val loss:  1.89  valid acc: 0.58\n",
      "sin i=35000 train loss:  2.33  train acc: 0.80  val loss:  1.90  valid acc: 0.57\n",
      "sin i=35500 train loss:  2.58  train acc: 0.71  val loss:  1.89  valid acc: 0.57\n",
      "sin i=36000 train loss:  2.39  train acc: 0.77  val loss:  1.88  valid acc: 0.58\n",
      "sin i=36500 train loss:  2.29  train acc: 0.78  val loss:  1.88  valid acc: 0.58\n",
      "sin i=37000 train loss:  2.29  train acc: 0.77  val loss:  1.88  valid acc: 0.59\n",
      "sin i=37500 train loss:  2.09  train acc: 0.84  val loss:  1.88  valid acc: 0.57\n",
      "sin i=38000 train loss:  2.33  train acc: 0.76  val loss:  1.90  valid acc: 0.57\n",
      "sin i=38500 train loss:  2.36  train acc: 0.80  val loss:  1.90  valid acc: 0.58\n",
      "sin i=39000 train loss:  2.57  train acc: 0.73  val loss:  1.88  valid acc: 0.59\n",
      "sin i=39500 train loss:  2.20  train acc: 0.79  val loss:  1.88  valid acc: 0.57\n",
      "sin i=40000 train loss:  2.40  train acc: 0.80  val loss:  1.88  valid acc: 0.58\n",
      "sin i=40500 train loss:  2.44  train acc: 0.69  val loss:  1.89  valid acc: 0.57\n",
      "sin i=41000 train loss:  2.49  train acc: 0.77  val loss:  1.87  valid acc: 0.58\n",
      "sin i=41500 train loss:  2.49  train acc: 0.71  val loss:  1.88  valid acc: 0.58\n",
      "sin i=42000 train loss:  2.29  train acc: 0.80  val loss:  1.88  valid acc: 0.59\n",
      "sin i=42500 train loss:  2.38  train acc: 0.72  val loss:  1.88  valid acc: 0.57\n",
      "sin i=43000 train loss:  2.53  train acc: 0.71  val loss:  1.89  valid acc: 0.58\n",
      "sin i=43500 train loss:  2.56  train acc: 0.70  val loss:  1.89  valid acc: 0.58\n",
      "sin i=44000 train loss:  2.44  train acc: 0.77  val loss:  1.88  valid acc: 0.58\n",
      "sin i=44500 train loss:  2.40  train acc: 0.80  val loss:  1.88  valid acc: 0.59\n",
      "sin i=45000 train loss:  2.37  train acc: 0.75  val loss:  1.89  valid acc: 0.59\n",
      "sin i=45500 train loss:  2.51  train acc: 0.76  val loss:  1.88  valid acc: 0.58\n",
      "sin i=46000 train loss:  2.51  train acc: 0.74  val loss:  1.87  valid acc: 0.58\n",
      "sin i=46500 train loss:  2.70  train acc: 0.69  val loss:  1.88  valid acc: 0.57\n",
      "sin i=47000 train loss:  2.24  train acc: 0.75  val loss:  1.89  valid acc: 0.58\n",
      "sin i=47500 train loss:  2.49  train acc: 0.78  val loss:  1.87  valid acc: 0.58\n",
      "sin i=48000 train loss:  2.52  train acc: 0.73  val loss:  1.88  valid acc: 0.59\n",
      "sin i=48500 train loss:  2.34  train acc: 0.83  val loss:  1.87  valid acc: 0.59\n",
      "sin i=49000 train loss:  2.46  train acc: 0.73  val loss:  1.88  valid acc: 0.58\n",
      "sin i=49500 train loss:  2.36  train acc: 0.77  val loss:  1.87  valid acc: 0.58\n",
      "sin i=50000 train loss:  2.41  train acc: 0.77  val loss:  1.87  valid acc: 0.60\n",
      "-> sin layer idx: 27 , best valid accuracy: 0.60, test accuracy: 0.44\n",
      "sin i=    0 train loss: 63.12  train acc: 0.01  val loss: 20.64  valid acc: 0.00\n",
      "sin i=  500 train loss:  3.94  train acc: 0.60  val loss:  3.05  valid acc: 0.16\n",
      "sin i= 1000 train loss:  3.91  train acc: 0.58  val loss:  2.69  valid acc: 0.26\n",
      "sin i= 1500 train loss:  3.68  train acc: 0.71  val loss:  2.55  valid acc: 0.33\n",
      "sin i= 2000 train loss:  3.83  train acc: 0.59  val loss:  2.55  valid acc: 0.36\n",
      "sin i= 2500 train loss:  3.93  train acc: 0.60  val loss:  2.42  valid acc: 0.40\n",
      "sin i= 3000 train loss:  3.65  train acc: 0.61  val loss:  2.35  valid acc: 0.45\n",
      "sin i= 3500 train loss:  3.61  train acc: 0.67  val loss:  2.37  valid acc: 0.42\n",
      "sin i= 4000 train loss:  3.68  train acc: 0.67  val loss:  2.14  valid acc: 0.48\n",
      "sin i= 4500 train loss:  3.62  train acc: 0.69  val loss:  2.29  valid acc: 0.45\n",
      "sin i= 5000 train loss:  3.68  train acc: 0.64  val loss:  2.15  valid acc: 0.50\n",
      "sin i= 5500 train loss:  3.43  train acc: 0.70  val loss:  2.41  valid acc: 0.38\n",
      "sin i= 6000 train loss:  3.40  train acc: 0.70  val loss:  2.18  valid acc: 0.45\n",
      "sin i= 6500 train loss:  3.36  train acc: 0.73  val loss:  2.17  valid acc: 0.52\n",
      "sin i= 7000 train loss:  3.70  train acc: 0.62  val loss:  2.05  valid acc: 0.50\n",
      "sin i= 7500 train loss:  3.46  train acc: 0.66  val loss:  2.12  valid acc: 0.50\n",
      "sin i= 8000 train loss:  3.40  train acc: 0.69  val loss:  2.11  valid acc: 0.51\n",
      "sin i= 8500 train loss:  3.32  train acc: 0.67  val loss:  2.12  valid acc: 0.45\n",
      "sin i= 9000 train loss:  3.54  train acc: 0.68  val loss:  2.16  valid acc: 0.48\n",
      "sin i= 9500 train loss:  3.44  train acc: 0.65  val loss:  2.10  valid acc: 0.51\n",
      "sin i=10000 train loss:  3.45  train acc: 0.64  val loss:  2.24  valid acc: 0.47\n",
      "sin i=10500 train loss:  3.66  train acc: 0.59  val loss:  2.32  valid acc: 0.46\n",
      "sin i=11000 train loss:  3.88  train acc: 0.61  val loss:  2.17  valid acc: 0.47\n",
      "sin i=11500 train loss:  3.11  train acc: 0.76  val loss:  2.10  valid acc: 0.51\n",
      "sin i=12000 train loss:  3.42  train acc: 0.64  val loss:  2.12  valid acc: 0.50\n",
      "sin i=12500 train loss:  3.20  train acc: 0.70  val loss:  2.15  valid acc: 0.50\n",
      "sin i=13000 train loss:  3.15  train acc: 0.68  val loss:  2.20  valid acc: 0.50\n",
      "sin i=13500 train loss:  3.08  train acc: 0.69  val loss:  2.10  valid acc: 0.51\n",
      "sin i=14000 train loss:  3.64  train acc: 0.64  val loss:  2.11  valid acc: 0.49\n",
      "sin i=14500 train loss:  3.11  train acc: 0.64  val loss:  2.17  valid acc: 0.52\n",
      "sin i=15000 train loss:  3.13  train acc: 0.66  val loss:  2.09  valid acc: 0.54\n",
      "sin i=15500 train loss:  3.05  train acc: 0.70  val loss:  2.10  valid acc: 0.52\n",
      "sin i=16000 train loss:  3.16  train acc: 0.66  val loss:  2.18  valid acc: 0.47\n",
      "sin i=16500 train loss:  3.03  train acc: 0.70  val loss:  2.13  valid acc: 0.51\n",
      "sin i=17000 train loss:  3.02  train acc: 0.70  val loss:  2.03  valid acc: 0.50\n",
      "sin i=17500 train loss:  3.05  train acc: 0.68  val loss:  2.17  valid acc: 0.49\n",
      "sin i=18000 train loss:  3.23  train acc: 0.60  val loss:  2.20  valid acc: 0.47\n",
      "sin i=18500 train loss:  3.15  train acc: 0.69  val loss:  2.03  valid acc: 0.52\n",
      "sin i=19000 train loss:  3.12  train acc: 0.70  val loss:  2.09  valid acc: 0.52\n",
      "sin i=19500 train loss:  2.78  train acc: 0.74  val loss:  2.05  valid acc: 0.52\n",
      "sin i=20000 train loss:  2.90  train acc: 0.73  val loss:  2.04  valid acc: 0.54\n",
      "sin i=20500 train loss:  2.91  train acc: 0.70  val loss:  2.02  valid acc: 0.52\n",
      "sin i=21000 train loss:  2.74  train acc: 0.77  val loss:  2.02  valid acc: 0.52\n",
      "sin i=21500 train loss:  2.97  train acc: 0.68  val loss:  2.06  valid acc: 0.55\n",
      "sin i=22000 train loss:  2.86  train acc: 0.65  val loss:  2.14  valid acc: 0.52\n",
      "sin i=22500 train loss:  3.07  train acc: 0.66  val loss:  1.98  valid acc: 0.54\n",
      "sin i=23000 train loss:  2.90  train acc: 0.70  val loss:  2.10  valid acc: 0.51\n",
      "sin i=23500 train loss:  2.53  train acc: 0.77  val loss:  2.03  valid acc: 0.53\n",
      "sin i=24000 train loss:  2.81  train acc: 0.66  val loss:  2.08  valid acc: 0.51\n",
      "sin i=24500 train loss:  2.62  train acc: 0.70  val loss:  2.00  valid acc: 0.55\n",
      "sin i=25000 train loss:  2.44  train acc: 0.77  val loss:  1.98  valid acc: 0.53\n",
      "sin i=25500 train loss:  2.46  train acc: 0.73  val loss:  1.99  valid acc: 0.54\n",
      "sin i=26000 train loss:  2.81  train acc: 0.68  val loss:  2.06  valid acc: 0.54\n",
      "sin i=26500 train loss:  2.61  train acc: 0.71  val loss:  2.01  valid acc: 0.54\n",
      "sin i=27000 train loss:  2.84  train acc: 0.73  val loss:  1.95  valid acc: 0.55\n",
      "sin i=27500 train loss:  2.75  train acc: 0.75  val loss:  1.96  valid acc: 0.54\n",
      "sin i=28000 train loss:  2.64  train acc: 0.70  val loss:  1.96  valid acc: 0.56\n",
      "sin i=28500 train loss:  2.70  train acc: 0.69  val loss:  1.93  valid acc: 0.56\n",
      "sin i=29000 train loss:  2.41  train acc: 0.74  val loss:  1.96  valid acc: 0.55\n",
      "sin i=29500 train loss:  2.53  train acc: 0.70  val loss:  1.94  valid acc: 0.57\n",
      "sin i=30000 train loss:  2.41  train acc: 0.73  val loss:  1.97  valid acc: 0.57\n",
      "sin i=30500 train loss:  2.35  train acc: 0.81  val loss:  1.94  valid acc: 0.57\n",
      "sin i=31000 train loss:  2.54  train acc: 0.73  val loss:  1.95  valid acc: 0.57\n",
      "sin i=31500 train loss:  2.42  train acc: 0.80  val loss:  1.94  valid acc: 0.57\n",
      "sin i=32000 train loss:  2.39  train acc: 0.75  val loss:  1.94  valid acc: 0.57\n",
      "sin i=32500 train loss:  2.49  train acc: 0.73  val loss:  1.94  valid acc: 0.55\n",
      "sin i=33000 train loss:  2.41  train acc: 0.73  val loss:  1.94  valid acc: 0.57\n",
      "sin i=33500 train loss:  2.40  train acc: 0.70  val loss:  1.95  valid acc: 0.57\n",
      "sin i=34000 train loss:  2.59  train acc: 0.68  val loss:  1.95  valid acc: 0.57\n",
      "sin i=34500 train loss:  2.46  train acc: 0.73  val loss:  1.95  valid acc: 0.56\n",
      "sin i=35000 train loss:  2.42  train acc: 0.70  val loss:  1.96  valid acc: 0.57\n",
      "sin i=35500 train loss:  2.64  train acc: 0.69  val loss:  1.94  valid acc: 0.59\n",
      "sin i=36000 train loss:  2.43  train acc: 0.73  val loss:  1.93  valid acc: 0.57\n",
      "sin i=36500 train loss:  2.36  train acc: 0.77  val loss:  1.94  valid acc: 0.57\n",
      "sin i=37000 train loss:  2.42  train acc: 0.69  val loss:  1.94  valid acc: 0.56\n",
      "sin i=37500 train loss:  2.10  train acc: 0.83  val loss:  1.95  valid acc: 0.57\n",
      "sin i=38000 train loss:  2.39  train acc: 0.74  val loss:  1.96  valid acc: 0.57\n",
      "sin i=38500 train loss:  2.33  train acc: 0.74  val loss:  1.96  valid acc: 0.57\n",
      "sin i=39000 train loss:  2.63  train acc: 0.73  val loss:  1.93  valid acc: 0.57\n",
      "sin i=39500 train loss:  2.31  train acc: 0.73  val loss:  1.94  valid acc: 0.56\n",
      "sin i=40000 train loss:  2.47  train acc: 0.73  val loss:  1.94  valid acc: 0.56\n",
      "sin i=40500 train loss:  2.47  train acc: 0.74  val loss:  1.95  valid acc: 0.56\n",
      "sin i=41000 train loss:  2.63  train acc: 0.71  val loss:  1.93  valid acc: 0.57\n",
      "sin i=41500 train loss:  2.54  train acc: 0.69  val loss:  1.95  valid acc: 0.57\n",
      "sin i=42000 train loss:  2.32  train acc: 0.73  val loss:  1.95  valid acc: 0.56\n",
      "sin i=42500 train loss:  2.52  train acc: 0.77  val loss:  1.94  valid acc: 0.58\n",
      "sin i=43000 train loss:  2.62  train acc: 0.68  val loss:  1.94  valid acc: 0.58\n",
      "sin i=43500 train loss:  2.57  train acc: 0.66  val loss:  1.95  valid acc: 0.57\n",
      "sin i=44000 train loss:  2.60  train acc: 0.73  val loss:  1.94  valid acc: 0.58\n",
      "sin i=44500 train loss:  2.50  train acc: 0.75  val loss:  1.93  valid acc: 0.57\n",
      "sin i=45000 train loss:  2.40  train acc: 0.70  val loss:  1.95  valid acc: 0.57\n",
      "sin i=45500 train loss:  2.65  train acc: 0.70  val loss:  1.94  valid acc: 0.56\n",
      "sin i=46000 train loss:  2.58  train acc: 0.72  val loss:  1.94  valid acc: 0.57\n",
      "sin i=46500 train loss:  2.76  train acc: 0.67  val loss:  1.94  valid acc: 0.57\n",
      "sin i=47000 train loss:  2.30  train acc: 0.73  val loss:  1.95  valid acc: 0.57\n",
      "sin i=47500 train loss:  2.47  train acc: 0.76  val loss:  1.92  valid acc: 0.57\n",
      "sin i=48000 train loss:  2.59  train acc: 0.70  val loss:  1.93  valid acc: 0.57\n",
      "sin i=48500 train loss:  2.46  train acc: 0.77  val loss:  1.93  valid acc: 0.58\n",
      "sin i=49000 train loss:  2.53  train acc: 0.66  val loss:  1.93  valid acc: 0.57\n",
      "sin i=49500 train loss:  2.41  train acc: 0.76  val loss:  1.93  valid acc: 0.57\n",
      "sin i=50000 train loss:  2.39  train acc: 0.73  val loss:  1.93  valid acc: 0.57\n",
      "-> sin layer idx: 26 , best valid accuracy: 0.59, test accuracy: 0.41\n",
      "sin i=    0 train loss: 62.92  train acc: 0.01  val loss: 20.23  valid acc: 0.00\n",
      "sin i=  500 train loss:  4.11  train acc: 0.48  val loss:  3.38  valid acc: 0.13\n",
      "sin i= 1000 train loss:  4.28  train acc: 0.39  val loss:  2.91  valid acc: 0.22\n",
      "sin i= 1500 train loss:  3.76  train acc: 0.57  val loss:  2.93  valid acc: 0.22\n",
      "sin i= 2000 train loss:  4.01  train acc: 0.50  val loss:  2.88  valid acc: 0.21\n",
      "sin i= 2500 train loss:  3.92  train acc: 0.58  val loss:  2.73  valid acc: 0.29\n",
      "sin i= 3000 train loss:  3.91  train acc: 0.59  val loss:  2.70  valid acc: 0.31\n",
      "sin i= 3500 train loss:  3.84  train acc: 0.66  val loss:  2.78  valid acc: 0.31\n",
      "sin i= 4000 train loss:  3.83  train acc: 0.66  val loss:  2.63  valid acc: 0.36\n",
      "sin i= 4500 train loss:  3.83  train acc: 0.66  val loss:  2.81  valid acc: 0.30\n",
      "sin i= 5000 train loss:  3.87  train acc: 0.59  val loss:  2.66  valid acc: 0.33\n",
      "sin i= 5500 train loss:  3.61  train acc: 0.68  val loss:  2.67  valid acc: 0.38\n",
      "sin i= 6000 train loss:  3.63  train acc: 0.62  val loss:  2.50  valid acc: 0.34\n",
      "sin i= 6500 train loss:  3.56  train acc: 0.72  val loss:  2.50  valid acc: 0.42\n",
      "sin i= 7000 train loss:  3.89  train acc: 0.57  val loss:  2.40  valid acc: 0.39\n",
      "sin i= 7500 train loss:  3.62  train acc: 0.61  val loss:  2.51  valid acc: 0.41\n",
      "sin i= 8000 train loss:  3.63  train acc: 0.66  val loss:  2.53  valid acc: 0.41\n",
      "sin i= 8500 train loss:  3.52  train acc: 0.62  val loss:  2.42  valid acc: 0.42\n",
      "sin i= 9000 train loss:  3.70  train acc: 0.66  val loss:  2.53  valid acc: 0.38\n",
      "sin i= 9500 train loss:  3.68  train acc: 0.63  val loss:  2.53  valid acc: 0.37\n",
      "sin i=10000 train loss:  3.63  train acc: 0.62  val loss:  2.71  valid acc: 0.31\n",
      "sin i=10500 train loss:  3.64  train acc: 0.63  val loss:  2.56  valid acc: 0.40\n",
      "sin i=11000 train loss:  3.91  train acc: 0.57  val loss:  2.50  valid acc: 0.39\n",
      "sin i=11500 train loss:  3.23  train acc: 0.75  val loss:  2.44  valid acc: 0.42\n",
      "sin i=12000 train loss:  3.46  train acc: 0.58  val loss:  2.42  valid acc: 0.40\n",
      "sin i=12500 train loss:  3.40  train acc: 0.69  val loss:  2.38  valid acc: 0.42\n",
      "sin i=13000 train loss:  3.28  train acc: 0.73  val loss:  2.51  valid acc: 0.42\n",
      "sin i=13500 train loss:  3.21  train acc: 0.72  val loss:  2.35  valid acc: 0.47\n",
      "sin i=14000 train loss:  3.64  train acc: 0.59  val loss:  2.40  valid acc: 0.41\n",
      "sin i=14500 train loss:  3.15  train acc: 0.71  val loss:  2.34  valid acc: 0.47\n",
      "sin i=15000 train loss:  3.25  train acc: 0.65  val loss:  2.34  valid acc: 0.45\n",
      "sin i=15500 train loss:  3.19  train acc: 0.70  val loss:  2.28  valid acc: 0.47\n",
      "sin i=16000 train loss:  3.27  train acc: 0.66  val loss:  2.46  valid acc: 0.40\n",
      "sin i=16500 train loss:  3.14  train acc: 0.71  val loss:  2.43  valid acc: 0.42\n",
      "sin i=17000 train loss:  3.19  train acc: 0.65  val loss:  2.29  valid acc: 0.42\n",
      "sin i=17500 train loss:  3.22  train acc: 0.73  val loss:  2.45  valid acc: 0.42\n",
      "sin i=18000 train loss:  3.31  train acc: 0.59  val loss:  2.42  valid acc: 0.40\n",
      "sin i=18500 train loss:  3.29  train acc: 0.67  val loss:  2.29  valid acc: 0.45\n",
      "sin i=19000 train loss:  3.21  train acc: 0.70  val loss:  2.26  valid acc: 0.48\n",
      "sin i=19500 train loss:  2.91  train acc: 0.76  val loss:  2.21  valid acc: 0.49\n",
      "sin i=20000 train loss:  3.01  train acc: 0.71  val loss:  2.35  valid acc: 0.46\n",
      "sin i=20500 train loss:  3.03  train acc: 0.70  val loss:  2.27  valid acc: 0.47\n",
      "sin i=21000 train loss:  2.86  train acc: 0.73  val loss:  2.26  valid acc: 0.49\n",
      "sin i=21500 train loss:  3.14  train acc: 0.66  val loss:  2.27  valid acc: 0.49\n",
      "sin i=22000 train loss:  3.01  train acc: 0.67  val loss:  2.39  valid acc: 0.45\n",
      "sin i=22500 train loss:  3.22  train acc: 0.66  val loss:  2.32  valid acc: 0.47\n",
      "sin i=23000 train loss:  3.14  train acc: 0.67  val loss:  2.35  valid acc: 0.45\n",
      "sin i=23500 train loss:  2.70  train acc: 0.76  val loss:  2.28  valid acc: 0.48\n",
      "sin i=24000 train loss:  3.01  train acc: 0.66  val loss:  2.37  valid acc: 0.44\n",
      "sin i=24500 train loss:  2.73  train acc: 0.73  val loss:  2.26  valid acc: 0.48\n",
      "sin i=25000 train loss:  2.61  train acc: 0.77  val loss:  2.22  valid acc: 0.50\n",
      "sin i=25500 train loss:  2.61  train acc: 0.74  val loss:  2.27  valid acc: 0.48\n",
      "sin i=26000 train loss:  2.94  train acc: 0.70  val loss:  2.36  valid acc: 0.46\n",
      "sin i=26500 train loss:  2.86  train acc: 0.69  val loss:  2.30  valid acc: 0.45\n",
      "sin i=27000 train loss:  3.08  train acc: 0.67  val loss:  2.23  valid acc: 0.51\n",
      "sin i=27500 train loss:  2.87  train acc: 0.73  val loss:  2.24  valid acc: 0.49\n",
      "sin i=28000 train loss:  2.77  train acc: 0.70  val loss:  2.28  valid acc: 0.47\n",
      "sin i=28500 train loss:  2.83  train acc: 0.70  val loss:  2.21  valid acc: 0.51\n",
      "sin i=29000 train loss:  2.59  train acc: 0.70  val loss:  2.22  valid acc: 0.49\n",
      "sin i=29500 train loss:  2.64  train acc: 0.73  val loss:  2.22  valid acc: 0.51\n",
      "sin i=30000 train loss:  2.57  train acc: 0.72  val loss:  2.27  valid acc: 0.49\n",
      "sin i=30500 train loss:  2.49  train acc: 0.77  val loss:  2.22  valid acc: 0.51\n",
      "sin i=31000 train loss:  2.75  train acc: 0.70  val loss:  2.24  valid acc: 0.50\n",
      "sin i=31500 train loss:  2.59  train acc: 0.70  val loss:  2.23  valid acc: 0.50\n",
      "sin i=32000 train loss:  2.63  train acc: 0.73  val loss:  2.22  valid acc: 0.52\n",
      "sin i=32500 train loss:  2.60  train acc: 0.76  val loss:  2.23  valid acc: 0.51\n",
      "sin i=33000 train loss:  2.54  train acc: 0.75  val loss:  2.23  valid acc: 0.51\n",
      "sin i=33500 train loss:  2.53  train acc: 0.73  val loss:  2.23  valid acc: 0.50\n",
      "sin i=34000 train loss:  2.82  train acc: 0.69  val loss:  2.24  valid acc: 0.52\n",
      "sin i=34500 train loss:  2.60  train acc: 0.71  val loss:  2.24  valid acc: 0.50\n",
      "sin i=35000 train loss:  2.61  train acc: 0.70  val loss:  2.25  valid acc: 0.49\n",
      "sin i=35500 train loss:  2.89  train acc: 0.66  val loss:  2.23  valid acc: 0.52\n",
      "sin i=36000 train loss:  2.65  train acc: 0.72  val loss:  2.24  valid acc: 0.50\n",
      "sin i=36500 train loss:  2.56  train acc: 0.73  val loss:  2.23  valid acc: 0.51\n",
      "sin i=37000 train loss:  2.56  train acc: 0.69  val loss:  2.22  valid acc: 0.52\n",
      "sin i=37500 train loss:  2.30  train acc: 0.80  val loss:  2.23  valid acc: 0.52\n",
      "sin i=38000 train loss:  2.55  train acc: 0.71  val loss:  2.24  valid acc: 0.50\n",
      "sin i=38500 train loss:  2.48  train acc: 0.75  val loss:  2.25  valid acc: 0.49\n",
      "sin i=39000 train loss:  2.85  train acc: 0.66  val loss:  2.23  valid acc: 0.51\n",
      "sin i=39500 train loss:  2.41  train acc: 0.76  val loss:  2.23  valid acc: 0.51\n",
      "sin i=40000 train loss:  2.66  train acc: 0.71  val loss:  2.22  valid acc: 0.52\n",
      "sin i=40500 train loss:  2.64  train acc: 0.72  val loss:  2.25  valid acc: 0.51\n",
      "sin i=41000 train loss:  2.70  train acc: 0.77  val loss:  2.22  valid acc: 0.52\n",
      "sin i=41500 train loss:  2.72  train acc: 0.70  val loss:  2.23  valid acc: 0.51\n",
      "sin i=42000 train loss:  2.46  train acc: 0.75  val loss:  2.23  valid acc: 0.50\n",
      "sin i=42500 train loss:  2.62  train acc: 0.77  val loss:  2.23  valid acc: 0.52\n",
      "sin i=43000 train loss:  2.88  train acc: 0.64  val loss:  2.24  valid acc: 0.50\n",
      "sin i=43500 train loss:  2.69  train acc: 0.67  val loss:  2.24  valid acc: 0.51\n",
      "sin i=44000 train loss:  2.69  train acc: 0.66  val loss:  2.22  valid acc: 0.52\n",
      "sin i=44500 train loss:  2.70  train acc: 0.73  val loss:  2.23  valid acc: 0.52\n",
      "sin i=45000 train loss:  2.63  train acc: 0.72  val loss:  2.24  valid acc: 0.51\n",
      "sin i=45500 train loss:  2.82  train acc: 0.70  val loss:  2.23  valid acc: 0.51\n",
      "sin i=46000 train loss:  2.75  train acc: 0.70  val loss:  2.22  valid acc: 0.52\n",
      "sin i=46500 train loss:  2.81  train acc: 0.65  val loss:  2.23  valid acc: 0.52\n",
      "sin i=47000 train loss:  2.48  train acc: 0.73  val loss:  2.23  valid acc: 0.50\n",
      "sin i=47500 train loss:  2.64  train acc: 0.70  val loss:  2.21  valid acc: 0.52\n",
      "sin i=48000 train loss:  2.76  train acc: 0.72  val loss:  2.23  valid acc: 0.51\n",
      "sin i=48500 train loss:  2.57  train acc: 0.77  val loss:  2.21  valid acc: 0.52\n",
      "sin i=49000 train loss:  2.66  train acc: 0.68  val loss:  2.21  valid acc: 0.52\n",
      "sin i=49500 train loss:  2.58  train acc: 0.72  val loss:  2.22  valid acc: 0.52\n",
      "sin i=50000 train loss:  2.59  train acc: 0.70  val loss:  2.22  valid acc: 0.52\n",
      "-> sin layer idx: 25 , best valid accuracy: 0.52, test accuracy: 0.42\n",
      "sin i=    0 train loss: 63.00  train acc: 0.00  val loss: 17.26  valid acc: 0.00\n",
      "sin i=  500 train loss:  4.28  train acc: 0.38  val loss:  3.77  valid acc: 0.02\n",
      "sin i= 1000 train loss:  4.33  train acc: 0.40  val loss:  3.48  valid acc: 0.07\n",
      "sin i= 1500 train loss:  3.99  train acc: 0.54  val loss:  3.10  valid acc: 0.13\n",
      "sin i= 2000 train loss:  4.30  train acc: 0.43  val loss:  3.09  valid acc: 0.12\n",
      "sin i= 2500 train loss:  4.02  train acc: 0.53  val loss:  2.98  valid acc: 0.19\n",
      "sin i= 3000 train loss:  3.94  train acc: 0.52  val loss:  2.96  valid acc: 0.22\n",
      "sin i= 3500 train loss:  3.86  train acc: 0.62  val loss:  2.91  valid acc: 0.24\n",
      "sin i= 4000 train loss:  3.90  train acc: 0.60  val loss:  2.83  valid acc: 0.27\n",
      "sin i= 4500 train loss:  3.92  train acc: 0.58  val loss:  2.95  valid acc: 0.26\n",
      "sin i= 5000 train loss:  3.95  train acc: 0.52  val loss:  3.04  valid acc: 0.27\n",
      "sin i= 5500 train loss:  3.67  train acc: 0.61  val loss:  2.94  valid acc: 0.28\n",
      "sin i= 6000 train loss:  3.88  train acc: 0.52  val loss:  2.84  valid acc: 0.28\n",
      "sin i= 6500 train loss:  3.73  train acc: 0.68  val loss:  2.68  valid acc: 0.32\n",
      "sin i= 7000 train loss:  3.92  train acc: 0.58  val loss:  2.77  valid acc: 0.27\n",
      "sin i= 7500 train loss:  3.72  train acc: 0.62  val loss:  2.85  valid acc: 0.28\n",
      "sin i= 8000 train loss:  3.78  train acc: 0.66  val loss:  2.85  valid acc: 0.28\n",
      "sin i= 8500 train loss:  3.69  train acc: 0.59  val loss:  2.71  valid acc: 0.31\n",
      "sin i= 9000 train loss:  3.83  train acc: 0.59  val loss:  2.90  valid acc: 0.27\n",
      "sin i= 9500 train loss:  3.81  train acc: 0.58  val loss:  2.91  valid acc: 0.24\n",
      "sin i=10000 train loss:  3.63  train acc: 0.58  val loss:  3.00  valid acc: 0.23\n",
      "sin i=10500 train loss:  3.76  train acc: 0.57  val loss:  2.77  valid acc: 0.31\n",
      "sin i=11000 train loss:  4.01  train acc: 0.52  val loss:  2.76  valid acc: 0.29\n",
      "sin i=11500 train loss:  3.32  train acc: 0.75  val loss:  2.75  valid acc: 0.30\n",
      "sin i=12000 train loss:  3.65  train acc: 0.57  val loss:  2.77  valid acc: 0.27\n",
      "sin i=12500 train loss:  3.46  train acc: 0.66  val loss:  2.67  valid acc: 0.31\n",
      "sin i=13000 train loss:  3.46  train acc: 0.66  val loss:  2.80  valid acc: 0.28\n",
      "sin i=13500 train loss:  3.43  train acc: 0.66  val loss:  2.71  valid acc: 0.32\n",
      "sin i=14000 train loss:  3.80  train acc: 0.62  val loss:  2.75  valid acc: 0.30\n",
      "sin i=14500 train loss:  3.20  train acc: 0.72  val loss:  2.71  valid acc: 0.35\n",
      "sin i=15000 train loss:  3.45  train acc: 0.65  val loss:  2.74  valid acc: 0.30\n",
      "sin i=15500 train loss:  3.37  train acc: 0.66  val loss:  2.67  valid acc: 0.34\n",
      "sin i=16000 train loss:  3.32  train acc: 0.66  val loss:  2.80  valid acc: 0.28\n",
      "sin i=16500 train loss:  3.33  train acc: 0.66  val loss:  2.71  valid acc: 0.33\n",
      "sin i=17000 train loss:  3.34  train acc: 0.65  val loss:  2.64  valid acc: 0.34\n",
      "sin i=17500 train loss:  3.32  train acc: 0.67  val loss:  2.77  valid acc: 0.32\n",
      "sin i=18000 train loss:  3.50  train acc: 0.59  val loss:  2.80  valid acc: 0.29\n",
      "sin i=18500 train loss:  3.38  train acc: 0.68  val loss:  2.68  valid acc: 0.32\n",
      "sin i=19000 train loss:  3.36  train acc: 0.66  val loss:  2.66  valid acc: 0.33\n",
      "sin i=19500 train loss:  3.03  train acc: 0.72  val loss:  2.70  valid acc: 0.33\n",
      "sin i=20000 train loss:  3.16  train acc: 0.73  val loss:  2.75  valid acc: 0.30\n",
      "sin i=20500 train loss:  3.18  train acc: 0.69  val loss:  2.65  valid acc: 0.34\n",
      "sin i=21000 train loss:  3.05  train acc: 0.70  val loss:  2.63  valid acc: 0.33\n",
      "sin i=21500 train loss:  3.37  train acc: 0.54  val loss:  2.61  valid acc: 0.38\n",
      "sin i=22000 train loss:  3.13  train acc: 0.66  val loss:  2.74  valid acc: 0.31\n",
      "sin i=22500 train loss:  3.42  train acc: 0.62  val loss:  2.68  valid acc: 0.34\n",
      "sin i=23000 train loss:  3.32  train acc: 0.66  val loss:  2.67  valid acc: 0.34\n",
      "sin i=23500 train loss:  2.82  train acc: 0.72  val loss:  2.66  valid acc: 0.36\n",
      "sin i=24000 train loss:  3.22  train acc: 0.66  val loss:  2.63  valid acc: 0.36\n",
      "sin i=24500 train loss:  2.96  train acc: 0.67  val loss:  2.63  valid acc: 0.36\n",
      "sin i=25000 train loss:  2.76  train acc: 0.73  val loss:  2.57  valid acc: 0.35\n",
      "sin i=25500 train loss:  2.80  train acc: 0.73  val loss:  2.67  valid acc: 0.31\n",
      "sin i=26000 train loss:  3.05  train acc: 0.69  val loss:  2.68  valid acc: 0.34\n",
      "sin i=26500 train loss:  2.97  train acc: 0.66  val loss:  2.65  valid acc: 0.34\n",
      "sin i=27000 train loss:  3.26  train acc: 0.67  val loss:  2.62  valid acc: 0.35\n",
      "sin i=27500 train loss:  3.02  train acc: 0.71  val loss:  2.60  valid acc: 0.35\n",
      "sin i=28000 train loss:  2.93  train acc: 0.66  val loss:  2.62  valid acc: 0.35\n",
      "sin i=28500 train loss:  3.01  train acc: 0.70  val loss:  2.56  valid acc: 0.35\n",
      "sin i=29000 train loss:  2.74  train acc: 0.64  val loss:  2.58  valid acc: 0.37\n",
      "sin i=29500 train loss:  2.79  train acc: 0.72  val loss:  2.59  valid acc: 0.34\n",
      "sin i=30000 train loss:  2.75  train acc: 0.67  val loss:  2.62  valid acc: 0.35\n",
      "sin i=30500 train loss:  2.69  train acc: 0.78  val loss:  2.58  valid acc: 0.36\n",
      "sin i=31000 train loss:  2.91  train acc: 0.64  val loss:  2.60  valid acc: 0.34\n",
      "sin i=31500 train loss:  2.83  train acc: 0.66  val loss:  2.59  valid acc: 0.35\n",
      "sin i=32000 train loss:  2.83  train acc: 0.70  val loss:  2.58  valid acc: 0.34\n",
      "sin i=32500 train loss:  2.89  train acc: 0.73  val loss:  2.59  valid acc: 0.35\n",
      "sin i=33000 train loss:  2.67  train acc: 0.72  val loss:  2.59  valid acc: 0.35\n",
      "sin i=33500 train loss:  2.76  train acc: 0.69  val loss:  2.59  valid acc: 0.35\n",
      "sin i=34000 train loss:  2.97  train acc: 0.65  val loss:  2.61  valid acc: 0.34\n",
      "sin i=34500 train loss:  2.75  train acc: 0.66  val loss:  2.61  valid acc: 0.34\n",
      "sin i=35000 train loss:  2.83  train acc: 0.66  val loss:  2.61  valid acc: 0.34\n",
      "sin i=35500 train loss:  3.01  train acc: 0.70  val loss:  2.59  valid acc: 0.35\n",
      "sin i=36000 train loss:  2.78  train acc: 0.70  val loss:  2.60  valid acc: 0.34\n",
      "sin i=36500 train loss:  2.73  train acc: 0.70  val loss:  2.60  valid acc: 0.35\n",
      "sin i=37000 train loss:  2.68  train acc: 0.67  val loss:  2.59  valid acc: 0.36\n",
      "sin i=37500 train loss:  2.47  train acc: 0.76  val loss:  2.60  valid acc: 0.34\n",
      "sin i=38000 train loss:  2.71  train acc: 0.70  val loss:  2.60  valid acc: 0.35\n",
      "sin i=38500 train loss:  2.63  train acc: 0.73  val loss:  2.60  valid acc: 0.35\n",
      "sin i=39000 train loss:  3.05  train acc: 0.62  val loss:  2.58  valid acc: 0.36\n",
      "sin i=39500 train loss:  2.60  train acc: 0.74  val loss:  2.60  valid acc: 0.35\n",
      "sin i=40000 train loss:  2.76  train acc: 0.74  val loss:  2.59  valid acc: 0.34\n",
      "sin i=40500 train loss:  2.75  train acc: 0.68  val loss:  2.62  valid acc: 0.34\n",
      "sin i=41000 train loss:  2.94  train acc: 0.70  val loss:  2.58  valid acc: 0.34\n",
      "sin i=41500 train loss:  2.92  train acc: 0.70  val loss:  2.59  valid acc: 0.34\n",
      "sin i=42000 train loss:  2.59  train acc: 0.73  val loss:  2.60  valid acc: 0.35\n",
      "sin i=42500 train loss:  2.80  train acc: 0.69  val loss:  2.60  valid acc: 0.35\n",
      "sin i=43000 train loss:  3.01  train acc: 0.68  val loss:  2.60  valid acc: 0.34\n",
      "sin i=43500 train loss:  2.88  train acc: 0.66  val loss:  2.60  valid acc: 0.34\n",
      "sin i=44000 train loss:  2.91  train acc: 0.61  val loss:  2.59  valid acc: 0.35\n",
      "sin i=44500 train loss:  2.88  train acc: 0.70  val loss:  2.60  valid acc: 0.35\n",
      "sin i=45000 train loss:  2.78  train acc: 0.67  val loss:  2.59  valid acc: 0.35\n",
      "sin i=45500 train loss:  2.98  train acc: 0.70  val loss:  2.59  valid acc: 0.35\n",
      "sin i=46000 train loss:  2.92  train acc: 0.71  val loss:  2.58  valid acc: 0.35\n",
      "sin i=46500 train loss:  2.98  train acc: 0.60  val loss:  2.59  valid acc: 0.35\n",
      "sin i=47000 train loss:  2.70  train acc: 0.68  val loss:  2.59  valid acc: 0.35\n",
      "sin i=47500 train loss:  2.79  train acc: 0.70  val loss:  2.58  valid acc: 0.35\n",
      "sin i=48000 train loss:  2.81  train acc: 0.68  val loss:  2.59  valid acc: 0.37\n",
      "sin i=48500 train loss:  2.72  train acc: 0.72  val loss:  2.58  valid acc: 0.35\n",
      "sin i=49000 train loss:  2.83  train acc: 0.66  val loss:  2.58  valid acc: 0.36\n",
      "sin i=49500 train loss:  2.83  train acc: 0.68  val loss:  2.59  valid acc: 0.35\n",
      "sin i=50000 train loss:  2.78  train acc: 0.68  val loss:  2.59  valid acc: 0.34\n",
      "-> sin layer idx: 24 , best valid accuracy: 0.38, test accuracy: 0.38\n",
      "sin i=    0 train loss: 62.80  train acc: 0.00  val loss: 15.04  valid acc: 0.00\n",
      "sin i=  500 train loss:  4.34  train acc: 0.37  val loss:  3.87  valid acc: 0.04\n",
      "sin i= 1000 train loss:  4.48  train acc: 0.34  val loss:  3.39  valid acc: 0.08\n",
      "sin i= 1500 train loss:  3.96  train acc: 0.55  val loss:  3.32  valid acc: 0.11\n",
      "sin i= 2000 train loss:  4.32  train acc: 0.38  val loss:  3.23  valid acc: 0.12\n",
      "sin i= 2500 train loss:  4.18  train acc: 0.43  val loss:  3.18  valid acc: 0.13\n",
      "sin i= 3000 train loss:  4.12  train acc: 0.42  val loss:  3.21  valid acc: 0.15\n",
      "sin i= 3500 train loss:  4.05  train acc: 0.47  val loss:  3.18  valid acc: 0.16\n",
      "sin i= 4000 train loss:  4.04  train acc: 0.48  val loss:  3.06  valid acc: 0.19\n",
      "sin i= 4500 train loss:  4.10  train acc: 0.48  val loss:  3.08  valid acc: 0.17\n",
      "sin i= 5000 train loss:  4.19  train acc: 0.46  val loss:  3.20  valid acc: 0.18\n",
      "sin i= 5500 train loss:  3.91  train acc: 0.56  val loss:  3.11  valid acc: 0.17\n",
      "sin i= 6000 train loss:  4.09  train acc: 0.48  val loss:  2.95  valid acc: 0.22\n",
      "sin i= 6500 train loss:  3.88  train acc: 0.59  val loss:  3.01  valid acc: 0.20\n",
      "sin i= 7000 train loss:  3.98  train acc: 0.60  val loss:  2.96  valid acc: 0.22\n",
      "sin i= 7500 train loss:  3.85  train acc: 0.57  val loss:  2.90  valid acc: 0.21\n",
      "sin i= 8000 train loss:  3.84  train acc: 0.56  val loss:  2.86  valid acc: 0.26\n",
      "sin i= 8500 train loss:  3.73  train acc: 0.55  val loss:  2.80  valid acc: 0.26\n",
      "sin i= 9000 train loss:  3.99  train acc: 0.48  val loss:  2.98  valid acc: 0.22\n",
      "sin i= 9500 train loss:  3.93  train acc: 0.52  val loss:  2.93  valid acc: 0.24\n",
      "sin i=10000 train loss:  3.74  train acc: 0.61  val loss:  3.18  valid acc: 0.18\n",
      "sin i=10500 train loss:  4.03  train acc: 0.46  val loss:  2.92  valid acc: 0.24\n",
      "sin i=11000 train loss:  4.27  train acc: 0.42  val loss:  2.84  valid acc: 0.25\n",
      "sin i=11500 train loss:  3.57  train acc: 0.67  val loss:  2.75  valid acc: 0.29\n",
      "sin i=12000 train loss:  3.82  train acc: 0.51  val loss:  2.77  valid acc: 0.25\n",
      "sin i=12500 train loss:  3.66  train acc: 0.56  val loss:  2.71  valid acc: 0.28\n",
      "sin i=13000 train loss:  3.55  train acc: 0.68  val loss:  2.89  valid acc: 0.26\n",
      "sin i=13500 train loss:  3.53  train acc: 0.69  val loss:  2.80  valid acc: 0.28\n",
      "sin i=14000 train loss:  4.00  train acc: 0.59  val loss:  2.74  valid acc: 0.30\n",
      "sin i=14500 train loss:  3.41  train acc: 0.60  val loss:  2.79  valid acc: 0.30\n",
      "sin i=15000 train loss:  3.64  train acc: 0.59  val loss:  2.80  valid acc: 0.26\n",
      "sin i=15500 train loss:  3.53  train acc: 0.61  val loss:  2.75  valid acc: 0.27\n",
      "sin i=16000 train loss:  3.51  train acc: 0.63  val loss:  2.86  valid acc: 0.28\n",
      "sin i=16500 train loss:  3.46  train acc: 0.60  val loss:  2.76  valid acc: 0.30\n",
      "sin i=17000 train loss:  3.52  train acc: 0.54  val loss:  2.67  valid acc: 0.31\n",
      "sin i=17500 train loss:  3.52  train acc: 0.62  val loss:  2.82  valid acc: 0.28\n",
      "sin i=18000 train loss:  3.61  train acc: 0.61  val loss:  2.78  valid acc: 0.30\n",
      "sin i=18500 train loss:  3.57  train acc: 0.60  val loss:  2.57  valid acc: 0.35\n",
      "sin i=19000 train loss:  3.47  train acc: 0.62  val loss:  2.60  valid acc: 0.36\n",
      "sin i=19500 train loss:  3.14  train acc: 0.68  val loss:  2.64  valid acc: 0.35\n",
      "sin i=20000 train loss:  3.30  train acc: 0.64  val loss:  2.65  valid acc: 0.34\n",
      "sin i=20500 train loss:  3.26  train acc: 0.70  val loss:  2.59  valid acc: 0.36\n",
      "sin i=21000 train loss:  3.09  train acc: 0.66  val loss:  2.58  valid acc: 0.38\n",
      "sin i=21500 train loss:  3.42  train acc: 0.58  val loss:  2.59  valid acc: 0.37\n",
      "sin i=22000 train loss:  3.25  train acc: 0.63  val loss:  2.70  valid acc: 0.34\n",
      "sin i=22500 train loss:  3.43  train acc: 0.59  val loss:  2.60  valid acc: 0.38\n",
      "sin i=23000 train loss:  3.57  train acc: 0.59  val loss:  2.63  valid acc: 0.37\n",
      "sin i=23500 train loss:  2.91  train acc: 0.72  val loss:  2.51  valid acc: 0.39\n",
      "sin i=24000 train loss:  3.18  train acc: 0.66  val loss:  2.47  valid acc: 0.41\n",
      "sin i=24500 train loss:  3.06  train acc: 0.68  val loss:  2.53  valid acc: 0.40\n",
      "sin i=25000 train loss:  2.87  train acc: 0.67  val loss:  2.45  valid acc: 0.42\n",
      "sin i=25500 train loss:  2.99  train acc: 0.61  val loss:  2.51  valid acc: 0.41\n",
      "sin i=26000 train loss:  3.20  train acc: 0.66  val loss:  2.59  valid acc: 0.38\n",
      "sin i=26500 train loss:  3.06  train acc: 0.65  val loss:  2.53  valid acc: 0.38\n",
      "sin i=27000 train loss:  3.32  train acc: 0.60  val loss:  2.47  valid acc: 0.42\n",
      "sin i=27500 train loss:  3.12  train acc: 0.66  val loss:  2.47  valid acc: 0.40\n",
      "sin i=28000 train loss:  3.10  train acc: 0.66  val loss:  2.48  valid acc: 0.41\n",
      "sin i=28500 train loss:  3.08  train acc: 0.69  val loss:  2.45  valid acc: 0.43\n",
      "sin i=29000 train loss:  2.81  train acc: 0.63  val loss:  2.46  valid acc: 0.41\n",
      "sin i=29500 train loss:  2.94  train acc: 0.64  val loss:  2.45  valid acc: 0.42\n",
      "sin i=30000 train loss:  2.84  train acc: 0.70  val loss:  2.48  valid acc: 0.40\n",
      "sin i=30500 train loss:  2.72  train acc: 0.72  val loss:  2.45  valid acc: 0.41\n",
      "sin i=31000 train loss:  3.06  train acc: 0.62  val loss:  2.46  valid acc: 0.42\n",
      "sin i=31500 train loss:  2.93  train acc: 0.69  val loss:  2.46  valid acc: 0.41\n",
      "sin i=32000 train loss:  2.89  train acc: 0.70  val loss:  2.45  valid acc: 0.42\n",
      "sin i=32500 train loss:  3.03  train acc: 0.68  val loss:  2.46  valid acc: 0.42\n",
      "sin i=33000 train loss:  2.82  train acc: 0.70  val loss:  2.46  valid acc: 0.42\n",
      "sin i=33500 train loss:  2.84  train acc: 0.66  val loss:  2.47  valid acc: 0.42\n",
      "sin i=34000 train loss:  3.06  train acc: 0.61  val loss:  2.47  valid acc: 0.43\n",
      "sin i=34500 train loss:  2.89  train acc: 0.65  val loss:  2.48  valid acc: 0.41\n",
      "sin i=35000 train loss:  2.93  train acc: 0.62  val loss:  2.48  valid acc: 0.40\n",
      "sin i=35500 train loss:  3.19  train acc: 0.64  val loss:  2.46  valid acc: 0.41\n",
      "sin i=36000 train loss:  2.82  train acc: 0.67  val loss:  2.48  valid acc: 0.41\n",
      "sin i=36500 train loss:  2.79  train acc: 0.71  val loss:  2.46  valid acc: 0.42\n",
      "sin i=37000 train loss:  2.91  train acc: 0.62  val loss:  2.46  valid acc: 0.41\n",
      "sin i=37500 train loss:  2.56  train acc: 0.73  val loss:  2.46  valid acc: 0.43\n",
      "sin i=38000 train loss:  2.93  train acc: 0.65  val loss:  2.47  valid acc: 0.41\n",
      "sin i=38500 train loss:  2.80  train acc: 0.71  val loss:  2.48  valid acc: 0.41\n",
      "sin i=39000 train loss:  3.15  train acc: 0.58  val loss:  2.46  valid acc: 0.42\n",
      "sin i=39500 train loss:  2.75  train acc: 0.71  val loss:  2.46  valid acc: 0.42\n",
      "sin i=40000 train loss:  2.90  train acc: 0.73  val loss:  2.46  valid acc: 0.42\n",
      "sin i=40500 train loss:  2.88  train acc: 0.70  val loss:  2.48  valid acc: 0.41\n",
      "sin i=41000 train loss:  3.01  train acc: 0.65  val loss:  2.45  valid acc: 0.42\n",
      "sin i=41500 train loss:  3.09  train acc: 0.67  val loss:  2.46  valid acc: 0.43\n",
      "sin i=42000 train loss:  2.78  train acc: 0.66  val loss:  2.47  valid acc: 0.40\n",
      "sin i=42500 train loss:  2.97  train acc: 0.64  val loss:  2.46  valid acc: 0.42\n",
      "sin i=43000 train loss:  3.25  train acc: 0.59  val loss:  2.47  valid acc: 0.42\n",
      "sin i=43500 train loss:  2.92  train acc: 0.66  val loss:  2.47  valid acc: 0.42\n",
      "sin i=44000 train loss:  3.03  train acc: 0.69  val loss:  2.46  valid acc: 0.42\n",
      "sin i=44500 train loss:  3.03  train acc: 0.63  val loss:  2.46  valid acc: 0.42\n",
      "sin i=45000 train loss:  2.88  train acc: 0.66  val loss:  2.46  valid acc: 0.42\n",
      "sin i=45500 train loss:  3.05  train acc: 0.66  val loss:  2.46  valid acc: 0.42\n",
      "sin i=46000 train loss:  3.00  train acc: 0.62  val loss:  2.46  valid acc: 0.42\n",
      "sin i=46500 train loss:  3.12  train acc: 0.56  val loss:  2.47  valid acc: 0.41\n",
      "sin i=47000 train loss:  2.79  train acc: 0.73  val loss:  2.46  valid acc: 0.41\n",
      "sin i=47500 train loss:  2.92  train acc: 0.70  val loss:  2.44  valid acc: 0.43\n",
      "sin i=48000 train loss:  3.03  train acc: 0.62  val loss:  2.46  valid acc: 0.42\n",
      "sin i=48500 train loss:  2.84  train acc: 0.73  val loss:  2.45  valid acc: 0.42\n",
      "sin i=49000 train loss:  2.99  train acc: 0.58  val loss:  2.44  valid acc: 0.42\n",
      "sin i=49500 train loss:  2.82  train acc: 0.70  val loss:  2.45  valid acc: 0.41\n",
      "sin i=50000 train loss:  2.87  train acc: 0.63  val loss:  2.45  valid acc: 0.41\n",
      "-> sin layer idx: 23 , best valid accuracy: 0.43, test accuracy: 0.35\n",
      "sin i=    0 train loss: 62.69  train acc: 0.00  val loss: 17.35  valid acc: 0.00\n",
      "sin i=  500 train loss:  4.42  train acc: 0.37  val loss:  3.90  valid acc: 0.03\n",
      "sin i= 1000 train loss:  4.50  train acc: 0.30  val loss:  3.76  valid acc: 0.03\n",
      "sin i= 1500 train loss:  4.16  train acc: 0.50  val loss:  3.47  valid acc: 0.09\n",
      "sin i= 2000 train loss:  4.35  train acc: 0.45  val loss:  3.37  valid acc: 0.09\n",
      "sin i= 2500 train loss:  4.19  train acc: 0.45  val loss:  3.30  valid acc: 0.13\n",
      "sin i= 3000 train loss:  4.30  train acc: 0.39  val loss:  3.20  valid acc: 0.16\n",
      "sin i= 3500 train loss:  4.03  train acc: 0.52  val loss:  3.13  valid acc: 0.18\n",
      "sin i= 4000 train loss:  4.10  train acc: 0.48  val loss:  3.08  valid acc: 0.17\n",
      "sin i= 4500 train loss:  4.13  train acc: 0.48  val loss:  3.22  valid acc: 0.14\n",
      "sin i= 5000 train loss:  4.24  train acc: 0.43  val loss:  3.31  valid acc: 0.15\n",
      "sin i= 5500 train loss:  3.98  train acc: 0.45  val loss:  3.12  valid acc: 0.17\n",
      "sin i= 6000 train loss:  4.11  train acc: 0.45  val loss:  3.00  valid acc: 0.22\n",
      "sin i= 6500 train loss:  3.95  train acc: 0.44  val loss:  3.08  valid acc: 0.21\n",
      "sin i= 7000 train loss:  3.98  train acc: 0.45  val loss:  2.98  valid acc: 0.22\n",
      "sin i= 7500 train loss:  3.92  train acc: 0.45  val loss:  3.13  valid acc: 0.17\n",
      "sin i= 8000 train loss:  3.86  train acc: 0.50  val loss:  3.16  valid acc: 0.17\n",
      "sin i= 8500 train loss:  3.85  train acc: 0.52  val loss:  2.99  valid acc: 0.20\n",
      "sin i= 9000 train loss:  4.03  train acc: 0.42  val loss:  3.05  valid acc: 0.15\n",
      "sin i= 9500 train loss:  4.04  train acc: 0.44  val loss:  2.97  valid acc: 0.18\n",
      "sin i=10000 train loss:  3.85  train acc: 0.51  val loss:  3.22  valid acc: 0.19\n",
      "sin i=10500 train loss:  4.10  train acc: 0.41  val loss:  2.94  valid acc: 0.20\n",
      "sin i=11000 train loss:  4.42  train acc: 0.37  val loss:  2.99  valid acc: 0.22\n",
      "sin i=11500 train loss:  3.66  train acc: 0.53  val loss:  2.94  valid acc: 0.21\n",
      "sin i=12000 train loss:  3.75  train acc: 0.48  val loss:  2.97  valid acc: 0.23\n",
      "sin i=12500 train loss:  3.80  train acc: 0.48  val loss:  2.91  valid acc: 0.22\n",
      "sin i=13000 train loss:  3.72  train acc: 0.48  val loss:  3.00  valid acc: 0.21\n",
      "sin i=13500 train loss:  3.74  train acc: 0.53  val loss:  2.93  valid acc: 0.22\n",
      "sin i=14000 train loss:  4.21  train acc: 0.41  val loss:  2.94  valid acc: 0.21\n",
      "sin i=14500 train loss:  3.58  train acc: 0.52  val loss:  2.90  valid acc: 0.26\n",
      "sin i=15000 train loss:  3.82  train acc: 0.44  val loss:  2.91  valid acc: 0.22\n",
      "sin i=15500 train loss:  3.62  train acc: 0.57  val loss:  2.88  valid acc: 0.24\n",
      "sin i=16000 train loss:  3.68  train acc: 0.56  val loss:  2.85  valid acc: 0.24\n",
      "sin i=16500 train loss:  3.64  train acc: 0.58  val loss:  2.83  valid acc: 0.25\n",
      "sin i=17000 train loss:  3.64  train acc: 0.49  val loss:  2.84  valid acc: 0.24\n",
      "sin i=17500 train loss:  3.63  train acc: 0.54  val loss:  2.92  valid acc: 0.23\n",
      "sin i=18000 train loss:  3.69  train acc: 0.52  val loss:  2.83  valid acc: 0.24\n",
      "sin i=18500 train loss:  3.73  train acc: 0.55  val loss:  2.83  valid acc: 0.25\n",
      "sin i=19000 train loss:  3.57  train acc: 0.56  val loss:  2.74  valid acc: 0.27\n",
      "sin i=19500 train loss:  3.30  train acc: 0.62  val loss:  2.82  valid acc: 0.25\n",
      "sin i=20000 train loss:  3.43  train acc: 0.56  val loss:  2.80  valid acc: 0.27\n",
      "sin i=20500 train loss:  3.49  train acc: 0.59  val loss:  2.78  valid acc: 0.28\n",
      "sin i=21000 train loss:  3.23  train acc: 0.65  val loss:  2.72  valid acc: 0.31\n",
      "sin i=21500 train loss:  3.53  train acc: 0.57  val loss:  2.71  valid acc: 0.31\n",
      "sin i=22000 train loss:  3.38  train acc: 0.59  val loss:  2.76  valid acc: 0.29\n",
      "sin i=22500 train loss:  3.57  train acc: 0.60  val loss:  2.78  valid acc: 0.31\n",
      "sin i=23000 train loss:  3.57  train acc: 0.56  val loss:  2.75  valid acc: 0.31\n",
      "sin i=23500 train loss:  3.09  train acc: 0.65  val loss:  2.67  valid acc: 0.34\n",
      "sin i=24000 train loss:  3.46  train acc: 0.55  val loss:  2.62  valid acc: 0.35\n",
      "sin i=24500 train loss:  3.15  train acc: 0.65  val loss:  2.71  valid acc: 0.33\n",
      "sin i=25000 train loss:  3.01  train acc: 0.69  val loss:  2.66  valid acc: 0.34\n",
      "sin i=25500 train loss:  3.11  train acc: 0.66  val loss:  2.71  valid acc: 0.33\n",
      "sin i=26000 train loss:  3.32  train acc: 0.63  val loss:  2.71  valid acc: 0.35\n",
      "sin i=26500 train loss:  3.23  train acc: 0.60  val loss:  2.64  valid acc: 0.36\n",
      "sin i=27000 train loss:  3.42  train acc: 0.60  val loss:  2.66  valid acc: 0.33\n",
      "sin i=27500 train loss:  3.23  train acc: 0.64  val loss:  2.65  valid acc: 0.34\n",
      "sin i=28000 train loss:  3.17  train acc: 0.62  val loss:  2.62  valid acc: 0.36\n",
      "sin i=28500 train loss:  3.26  train acc: 0.63  val loss:  2.62  valid acc: 0.35\n",
      "sin i=29000 train loss:  2.93  train acc: 0.67  val loss:  2.63  valid acc: 0.36\n",
      "sin i=29500 train loss:  3.01  train acc: 0.64  val loss:  2.62  valid acc: 0.36\n",
      "sin i=30000 train loss:  2.91  train acc: 0.67  val loss:  2.64  valid acc: 0.36\n",
      "sin i=30500 train loss:  2.87  train acc: 0.68  val loss:  2.62  valid acc: 0.36\n",
      "sin i=31000 train loss:  3.14  train acc: 0.62  val loss:  2.63  valid acc: 0.36\n",
      "sin i=31500 train loss:  3.07  train acc: 0.61  val loss:  2.62  valid acc: 0.37\n",
      "sin i=32000 train loss:  3.03  train acc: 0.67  val loss:  2.62  valid acc: 0.36\n",
      "sin i=32500 train loss:  3.07  train acc: 0.64  val loss:  2.62  valid acc: 0.36\n",
      "sin i=33000 train loss:  2.95  train acc: 0.62  val loss:  2.62  valid acc: 0.36\n",
      "sin i=33500 train loss:  2.95  train acc: 0.65  val loss:  2.62  valid acc: 0.36\n",
      "sin i=34000 train loss:  3.30  train acc: 0.54  val loss:  2.63  valid acc: 0.36\n",
      "sin i=34500 train loss:  2.99  train acc: 0.64  val loss:  2.63  valid acc: 0.36\n",
      "sin i=35000 train loss:  2.98  train acc: 0.61  val loss:  2.63  valid acc: 0.36\n",
      "sin i=35500 train loss:  3.23  train acc: 0.59  val loss:  2.62  valid acc: 0.35\n",
      "sin i=36000 train loss:  2.96  train acc: 0.64  val loss:  2.63  valid acc: 0.37\n",
      "sin i=36500 train loss:  2.96  train acc: 0.62  val loss:  2.63  valid acc: 0.35\n",
      "sin i=37000 train loss:  3.08  train acc: 0.59  val loss:  2.62  valid acc: 0.36\n",
      "sin i=37500 train loss:  2.68  train acc: 0.75  val loss:  2.63  valid acc: 0.36\n",
      "sin i=38000 train loss:  2.96  train acc: 0.66  val loss:  2.63  valid acc: 0.35\n",
      "sin i=38500 train loss:  2.85  train acc: 0.69  val loss:  2.63  valid acc: 0.36\n",
      "sin i=39000 train loss:  3.22  train acc: 0.62  val loss:  2.62  valid acc: 0.35\n",
      "sin i=39500 train loss:  2.73  train acc: 0.73  val loss:  2.62  valid acc: 0.36\n",
      "sin i=40000 train loss:  2.99  train acc: 0.69  val loss:  2.62  valid acc: 0.36\n",
      "sin i=40500 train loss:  3.07  train acc: 0.68  val loss:  2.64  valid acc: 0.36\n",
      "sin i=41000 train loss:  3.06  train acc: 0.69  val loss:  2.61  valid acc: 0.36\n",
      "sin i=41500 train loss:  3.19  train acc: 0.60  val loss:  2.62  valid acc: 0.35\n",
      "sin i=42000 train loss:  2.81  train acc: 0.66  val loss:  2.62  valid acc: 0.35\n",
      "sin i=42500 train loss:  2.99  train acc: 0.63  val loss:  2.62  valid acc: 0.36\n",
      "sin i=43000 train loss:  3.28  train acc: 0.62  val loss:  2.63  valid acc: 0.36\n",
      "sin i=43500 train loss:  3.15  train acc: 0.65  val loss:  2.63  valid acc: 0.35\n",
      "sin i=44000 train loss:  3.11  train acc: 0.64  val loss:  2.62  valid acc: 0.36\n",
      "sin i=44500 train loss:  3.18  train acc: 0.60  val loss:  2.62  valid acc: 0.36\n",
      "sin i=45000 train loss:  3.04  train acc: 0.66  val loss:  2.62  valid acc: 0.36\n",
      "sin i=45500 train loss:  3.10  train acc: 0.66  val loss:  2.62  valid acc: 0.36\n",
      "sin i=46000 train loss:  3.06  train acc: 0.65  val loss:  2.62  valid acc: 0.36\n",
      "sin i=46500 train loss:  3.24  train acc: 0.62  val loss:  2.62  valid acc: 0.35\n",
      "sin i=47000 train loss:  2.92  train acc: 0.66  val loss:  2.62  valid acc: 0.36\n",
      "sin i=47500 train loss:  2.96  train acc: 0.69  val loss:  2.61  valid acc: 0.36\n",
      "sin i=48000 train loss:  2.99  train acc: 0.66  val loss:  2.62  valid acc: 0.36\n",
      "sin i=48500 train loss:  2.93  train acc: 0.67  val loss:  2.61  valid acc: 0.36\n",
      "sin i=49000 train loss:  3.15  train acc: 0.62  val loss:  2.61  valid acc: 0.36\n",
      "sin i=49500 train loss:  3.00  train acc: 0.66  val loss:  2.61  valid acc: 0.36\n",
      "sin i=50000 train loss:  3.04  train acc: 0.61  val loss:  2.61  valid acc: 0.36\n",
      "-> sin layer idx: 22 , best valid accuracy: 0.37, test accuracy: 0.33\n",
      "sin i=    0 train loss: 62.69  train acc: 0.00  val loss: 17.58  valid acc: 0.00\n",
      "sin i=  500 train loss:  4.45  train acc: 0.39  val loss:  4.04  valid acc: 0.03\n",
      "sin i= 1000 train loss:  4.64  train acc: 0.26  val loss:  4.10  valid acc: 0.01\n",
      "sin i= 1500 train loss:  4.25  train acc: 0.40  val loss:  3.96  valid acc: 0.02\n",
      "sin i= 2000 train loss:  4.42  train acc: 0.34  val loss:  4.07  valid acc: 0.02\n",
      "sin i= 2500 train loss:  4.50  train acc: 0.30  val loss:  3.97  valid acc: 0.03\n",
      "sin i= 3000 train loss:  4.48  train acc: 0.27  val loss:  3.88  valid acc: 0.04\n",
      "sin i= 3500 train loss:  4.37  train acc: 0.38  val loss:  3.92  valid acc: 0.01\n",
      "sin i= 4000 train loss:  4.34  train acc: 0.38  val loss:  3.74  valid acc: 0.05\n",
      "sin i= 4500 train loss:  4.41  train acc: 0.41  val loss:  3.83  valid acc: 0.03\n",
      "sin i= 5000 train loss:  4.41  train acc: 0.34  val loss:  3.87  valid acc: 0.04\n",
      "sin i= 5500 train loss:  4.24  train acc: 0.42  val loss:  3.67  valid acc: 0.03\n",
      "sin i= 6000 train loss:  4.22  train acc: 0.45  val loss:  3.59  valid acc: 0.06\n",
      "sin i= 6500 train loss:  4.10  train acc: 0.39  val loss:  3.52  valid acc: 0.06\n",
      "sin i= 7000 train loss:  4.34  train acc: 0.42  val loss:  3.52  valid acc: 0.05\n",
      "sin i= 7500 train loss:  4.15  train acc: 0.41  val loss:  3.55  valid acc: 0.05\n",
      "sin i= 8000 train loss:  4.12  train acc: 0.40  val loss:  3.61  valid acc: 0.07\n",
      "sin i= 8500 train loss:  4.19  train acc: 0.38  val loss:  3.48  valid acc: 0.05\n",
      "sin i= 9000 train loss:  4.38  train acc: 0.33  val loss:  3.53  valid acc: 0.04\n",
      "sin i= 9500 train loss:  4.29  train acc: 0.33  val loss:  3.46  valid acc: 0.06\n",
      "sin i=10000 train loss:  4.06  train acc: 0.39  val loss:  3.57  valid acc: 0.06\n",
      "sin i=10500 train loss:  4.28  train acc: 0.37  val loss:  3.42  valid acc: 0.07\n",
      "sin i=11000 train loss:  4.52  train acc: 0.31  val loss:  3.52  valid acc: 0.09\n",
      "sin i=11500 train loss:  3.92  train acc: 0.44  val loss:  3.42  valid acc: 0.09\n",
      "sin i=12000 train loss:  3.86  train acc: 0.45  val loss:  3.51  valid acc: 0.07\n",
      "sin i=12500 train loss:  3.96  train acc: 0.36  val loss:  3.40  valid acc: 0.11\n",
      "sin i=13000 train loss:  4.04  train acc: 0.40  val loss:  3.40  valid acc: 0.08\n",
      "sin i=13500 train loss:  3.94  train acc: 0.45  val loss:  3.40  valid acc: 0.07\n",
      "sin i=14000 train loss:  4.46  train acc: 0.33  val loss:  3.33  valid acc: 0.09\n",
      "sin i=14500 train loss:  3.91  train acc: 0.42  val loss:  3.41  valid acc: 0.10\n",
      "sin i=15000 train loss:  4.05  train acc: 0.40  val loss:  3.41  valid acc: 0.08\n",
      "sin i=15500 train loss:  3.85  train acc: 0.44  val loss:  3.40  valid acc: 0.08\n",
      "sin i=16000 train loss:  3.88  train acc: 0.45  val loss:  3.36  valid acc: 0.09\n",
      "sin i=16500 train loss:  3.72  train acc: 0.45  val loss:  3.33  valid acc: 0.11\n",
      "sin i=17000 train loss:  3.94  train acc: 0.37  val loss:  3.37  valid acc: 0.10\n",
      "sin i=17500 train loss:  3.82  train acc: 0.43  val loss:  3.41  valid acc: 0.09\n",
      "sin i=18000 train loss:  3.78  train acc: 0.45  val loss:  3.40  valid acc: 0.07\n",
      "sin i=18500 train loss:  4.09  train acc: 0.39  val loss:  3.38  valid acc: 0.07\n",
      "sin i=19000 train loss:  3.77  train acc: 0.49  val loss:  3.33  valid acc: 0.09\n",
      "sin i=19500 train loss:  3.54  train acc: 0.52  val loss:  3.34  valid acc: 0.08\n",
      "sin i=20000 train loss:  3.71  train acc: 0.43  val loss:  3.42  valid acc: 0.08\n",
      "sin i=20500 train loss:  3.76  train acc: 0.45  val loss:  3.35  valid acc: 0.07\n",
      "sin i=21000 train loss:  3.55  train acc: 0.48  val loss:  3.32  valid acc: 0.10\n",
      "sin i=21500 train loss:  3.85  train acc: 0.38  val loss:  3.30  valid acc: 0.08\n",
      "sin i=22000 train loss:  3.67  train acc: 0.41  val loss:  3.31  valid acc: 0.10\n",
      "sin i=22500 train loss:  3.93  train acc: 0.37  val loss:  3.38  valid acc: 0.09\n",
      "sin i=23000 train loss:  3.93  train acc: 0.34  val loss:  3.30  valid acc: 0.09\n",
      "sin i=23500 train loss:  3.35  train acc: 0.48  val loss:  3.28  valid acc: 0.10\n",
      "sin i=24000 train loss:  3.50  train acc: 0.46  val loss:  3.25  valid acc: 0.09\n",
      "sin i=24500 train loss:  3.56  train acc: 0.42  val loss:  3.29  valid acc: 0.10\n",
      "sin i=25000 train loss:  3.34  train acc: 0.47  val loss:  3.24  valid acc: 0.11\n",
      "sin i=25500 train loss:  3.55  train acc: 0.43  val loss:  3.30  valid acc: 0.09\n",
      "sin i=26000 train loss:  3.55  train acc: 0.42  val loss:  3.27  valid acc: 0.10\n",
      "sin i=26500 train loss:  3.67  train acc: 0.40  val loss:  3.25  valid acc: 0.12\n",
      "sin i=27000 train loss:  3.86  train acc: 0.36  val loss:  3.28  valid acc: 0.08\n",
      "sin i=27500 train loss:  3.56  train acc: 0.46  val loss:  3.26  valid acc: 0.10\n",
      "sin i=28000 train loss:  3.40  train acc: 0.49  val loss:  3.25  valid acc: 0.10\n",
      "sin i=28500 train loss:  3.49  train acc: 0.51  val loss:  3.25  valid acc: 0.10\n",
      "sin i=29000 train loss:  3.41  train acc: 0.41  val loss:  3.27  valid acc: 0.10\n",
      "sin i=29500 train loss:  3.47  train acc: 0.38  val loss:  3.26  valid acc: 0.10\n",
      "sin i=30000 train loss:  3.25  train acc: 0.47  val loss:  3.26  valid acc: 0.10\n",
      "sin i=30500 train loss:  3.28  train acc: 0.49  val loss:  3.25  valid acc: 0.10\n",
      "sin i=31000 train loss:  3.54  train acc: 0.40  val loss:  3.26  valid acc: 0.10\n",
      "sin i=31500 train loss:  3.38  train acc: 0.45  val loss:  3.26  valid acc: 0.10\n",
      "sin i=32000 train loss:  3.34  train acc: 0.45  val loss:  3.26  valid acc: 0.11\n",
      "sin i=32500 train loss:  3.42  train acc: 0.48  val loss:  3.26  valid acc: 0.11\n",
      "sin i=33000 train loss:  3.18  train acc: 0.47  val loss:  3.25  valid acc: 0.11\n",
      "sin i=33500 train loss:  3.36  train acc: 0.44  val loss:  3.26  valid acc: 0.11\n",
      "sin i=34000 train loss:  3.48  train acc: 0.43  val loss:  3.26  valid acc: 0.11\n",
      "sin i=34500 train loss:  3.28  train acc: 0.41  val loss:  3.25  valid acc: 0.11\n",
      "sin i=35000 train loss:  3.34  train acc: 0.42  val loss:  3.26  valid acc: 0.10\n",
      "sin i=35500 train loss:  3.57  train acc: 0.45  val loss:  3.26  valid acc: 0.09\n",
      "sin i=36000 train loss:  3.44  train acc: 0.43  val loss:  3.25  valid acc: 0.10\n",
      "sin i=36500 train loss:  3.33  train acc: 0.48  val loss:  3.25  valid acc: 0.10\n",
      "sin i=37000 train loss:  3.52  train acc: 0.34  val loss:  3.25  valid acc: 0.10\n",
      "sin i=37500 train loss:  3.00  train acc: 0.52  val loss:  3.25  valid acc: 0.10\n",
      "sin i=38000 train loss:  3.35  train acc: 0.42  val loss:  3.25  valid acc: 0.10\n",
      "sin i=38500 train loss:  3.30  train acc: 0.44  val loss:  3.25  valid acc: 0.09\n",
      "sin i=39000 train loss:  3.55  train acc: 0.41  val loss:  3.25  valid acc: 0.10\n",
      "sin i=39500 train loss:  3.14  train acc: 0.49  val loss:  3.26  valid acc: 0.10\n",
      "sin i=40000 train loss:  3.27  train acc: 0.52  val loss:  3.25  valid acc: 0.10\n",
      "sin i=40500 train loss:  3.44  train acc: 0.40  val loss:  3.26  valid acc: 0.10\n",
      "sin i=41000 train loss:  3.46  train acc: 0.42  val loss:  3.25  valid acc: 0.10\n",
      "sin i=41500 train loss:  3.48  train acc: 0.45  val loss:  3.25  valid acc: 0.10\n",
      "sin i=42000 train loss:  3.14  train acc: 0.55  val loss:  3.26  valid acc: 0.10\n",
      "sin i=42500 train loss:  3.40  train acc: 0.45  val loss:  3.25  valid acc: 0.09\n",
      "sin i=43000 train loss:  3.54  train acc: 0.45  val loss:  3.25  valid acc: 0.10\n",
      "sin i=43500 train loss:  3.54  train acc: 0.38  val loss:  3.25  valid acc: 0.10\n",
      "sin i=44000 train loss:  3.49  train acc: 0.44  val loss:  3.26  valid acc: 0.10\n",
      "sin i=44500 train loss:  3.61  train acc: 0.35  val loss:  3.25  valid acc: 0.10\n",
      "sin i=45000 train loss:  3.38  train acc: 0.45  val loss:  3.25  valid acc: 0.10\n",
      "sin i=45500 train loss:  3.47  train acc: 0.38  val loss:  3.25  valid acc: 0.09\n",
      "sin i=46000 train loss:  3.26  train acc: 0.47  val loss:  3.26  valid acc: 0.10\n",
      "sin i=46500 train loss:  3.67  train acc: 0.38  val loss:  3.25  valid acc: 0.11\n",
      "sin i=47000 train loss:  3.34  train acc: 0.38  val loss:  3.26  valid acc: 0.10\n",
      "sin i=47500 train loss:  3.46  train acc: 0.42  val loss:  3.24  valid acc: 0.11\n",
      "sin i=48000 train loss:  3.34  train acc: 0.45  val loss:  3.24  valid acc: 0.10\n",
      "sin i=48500 train loss:  3.30  train acc: 0.43  val loss:  3.25  valid acc: 0.11\n",
      "sin i=49000 train loss:  3.51  train acc: 0.37  val loss:  3.25  valid acc: 0.11\n",
      "sin i=49500 train loss:  3.22  train acc: 0.54  val loss:  3.26  valid acc: 0.11\n",
      "sin i=50000 train loss:  3.32  train acc: 0.41  val loss:  3.25  valid acc: 0.11\n",
      "-> sin layer idx: 21 , best valid accuracy: 0.12, test accuracy: 0.09\n",
      "sin i=    0 train loss: 62.66  train acc: 0.00  val loss: 14.20  valid acc: 0.00\n",
      "sin i=  500 train loss:  4.55  train acc: 0.38  val loss:  4.09  valid acc: 0.02\n",
      "sin i= 1000 train loss:  4.81  train acc: 0.25  val loss:  4.12  valid acc: 0.02\n",
      "sin i= 1500 train loss:  4.35  train acc: 0.38  val loss:  4.05  valid acc: 0.03\n",
      "sin i= 2000 train loss:  4.49  train acc: 0.38  val loss:  4.13  valid acc: 0.00\n",
      "sin i= 2500 train loss:  4.55  train acc: 0.29  val loss:  4.01  valid acc: 0.04\n",
      "sin i= 3000 train loss:  4.61  train acc: 0.29  val loss:  4.05  valid acc: 0.04\n",
      "sin i= 3500 train loss:  4.40  train acc: 0.36  val loss:  4.02  valid acc: 0.01\n",
      "sin i= 4000 train loss:  4.38  train acc: 0.38  val loss:  3.91  valid acc: 0.02\n",
      "sin i= 4500 train loss:  4.48  train acc: 0.37  val loss:  4.03  valid acc: 0.02\n",
      "sin i= 5000 train loss:  4.53  train acc: 0.32  val loss:  4.08  valid acc: 0.03\n",
      "sin i= 5500 train loss:  4.31  train acc: 0.36  val loss:  3.98  valid acc: 0.02\n",
      "sin i= 6000 train loss:  4.43  train acc: 0.38  val loss:  3.96  valid acc: 0.02\n",
      "sin i= 6500 train loss:  4.23  train acc: 0.35  val loss:  3.87  valid acc: 0.03\n",
      "sin i= 7000 train loss:  4.44  train acc: 0.38  val loss:  3.89  valid acc: 0.05\n",
      "sin i= 7500 train loss:  4.34  train acc: 0.32  val loss:  3.92  valid acc: 0.04\n",
      "sin i= 8000 train loss:  4.33  train acc: 0.39  val loss:  3.94  valid acc: 0.04\n",
      "sin i= 8500 train loss:  4.35  train acc: 0.29  val loss:  3.80  valid acc: 0.03\n",
      "sin i= 9000 train loss:  4.52  train acc: 0.28  val loss:  3.80  valid acc: 0.04\n",
      "sin i= 9500 train loss:  4.37  train acc: 0.34  val loss:  3.69  valid acc: 0.04\n",
      "sin i=10000 train loss:  4.10  train acc: 0.37  val loss:  3.71  valid acc: 0.03\n",
      "sin i=10500 train loss:  4.37  train acc: 0.35  val loss:  3.55  valid acc: 0.06\n",
      "sin i=11000 train loss:  4.63  train acc: 0.30  val loss:  3.58  valid acc: 0.05\n",
      "sin i=11500 train loss:  4.05  train acc: 0.41  val loss:  3.51  valid acc: 0.10\n",
      "sin i=12000 train loss:  4.05  train acc: 0.41  val loss:  3.57  valid acc: 0.06\n",
      "sin i=12500 train loss:  4.02  train acc: 0.37  val loss:  3.49  valid acc: 0.10\n",
      "sin i=13000 train loss:  4.05  train acc: 0.39  val loss:  3.48  valid acc: 0.08\n",
      "sin i=13500 train loss:  3.98  train acc: 0.45  val loss:  3.46  valid acc: 0.09\n",
      "sin i=14000 train loss:  4.46  train acc: 0.30  val loss:  3.43  valid acc: 0.08\n",
      "sin i=14500 train loss:  3.95  train acc: 0.43  val loss:  3.46  valid acc: 0.08\n",
      "sin i=15000 train loss:  4.14  train acc: 0.41  val loss:  3.47  valid acc: 0.08\n",
      "sin i=15500 train loss:  3.91  train acc: 0.44  val loss:  3.45  valid acc: 0.07\n",
      "sin i=16000 train loss:  3.92  train acc: 0.47  val loss:  3.41  valid acc: 0.08\n",
      "sin i=16500 train loss:  3.80  train acc: 0.42  val loss:  3.43  valid acc: 0.08\n",
      "sin i=17000 train loss:  4.00  train acc: 0.40  val loss:  3.41  valid acc: 0.09\n",
      "sin i=17500 train loss:  3.85  train acc: 0.40  val loss:  3.51  valid acc: 0.10\n",
      "sin i=18000 train loss:  3.81  train acc: 0.41  val loss:  3.43  valid acc: 0.06\n",
      "sin i=18500 train loss:  4.16  train acc: 0.38  val loss:  3.49  valid acc: 0.08\n",
      "sin i=19000 train loss:  3.78  train acc: 0.46  val loss:  3.39  valid acc: 0.08\n",
      "sin i=19500 train loss:  3.60  train acc: 0.50  val loss:  3.43  valid acc: 0.08\n",
      "sin i=20000 train loss:  3.78  train acc: 0.43  val loss:  3.54  valid acc: 0.08\n",
      "sin i=20500 train loss:  3.82  train acc: 0.46  val loss:  3.43  valid acc: 0.08\n",
      "sin i=21000 train loss:  3.63  train acc: 0.45  val loss:  3.44  valid acc: 0.09\n",
      "sin i=21500 train loss:  3.89  train acc: 0.37  val loss:  3.36  valid acc: 0.08\n",
      "sin i=22000 train loss:  3.73  train acc: 0.42  val loss:  3.39  valid acc: 0.09\n",
      "sin i=22500 train loss:  3.99  train acc: 0.40  val loss:  3.46  valid acc: 0.08\n",
      "sin i=23000 train loss:  3.97  train acc: 0.31  val loss:  3.37  valid acc: 0.08\n",
      "sin i=23500 train loss:  3.43  train acc: 0.47  val loss:  3.36  valid acc: 0.09\n",
      "sin i=24000 train loss:  3.62  train acc: 0.45  val loss:  3.34  valid acc: 0.09\n",
      "sin i=24500 train loss:  3.63  train acc: 0.44  val loss:  3.40  valid acc: 0.09\n",
      "sin i=25000 train loss:  3.42  train acc: 0.48  val loss:  3.35  valid acc: 0.10\n",
      "sin i=25500 train loss:  3.62  train acc: 0.41  val loss:  3.38  valid acc: 0.10\n",
      "sin i=26000 train loss:  3.59  train acc: 0.39  val loss:  3.39  valid acc: 0.09\n",
      "sin i=26500 train loss:  3.70  train acc: 0.41  val loss:  3.36  valid acc: 0.09\n",
      "sin i=27000 train loss:  3.93  train acc: 0.37  val loss:  3.39  valid acc: 0.09\n",
      "sin i=27500 train loss:  3.58  train acc: 0.49  val loss:  3.37  valid acc: 0.08\n",
      "sin i=28000 train loss:  3.54  train acc: 0.45  val loss:  3.36  valid acc: 0.09\n",
      "sin i=28500 train loss:  3.59  train acc: 0.45  val loss:  3.38  valid acc: 0.10\n",
      "sin i=29000 train loss:  3.48  train acc: 0.41  val loss:  3.38  valid acc: 0.09\n",
      "sin i=29500 train loss:  3.50  train acc: 0.42  val loss:  3.37  valid acc: 0.09\n",
      "sin i=30000 train loss:  3.34  train acc: 0.49  val loss:  3.37  valid acc: 0.09\n",
      "sin i=30500 train loss:  3.33  train acc: 0.47  val loss:  3.37  valid acc: 0.09\n",
      "sin i=31000 train loss:  3.62  train acc: 0.39  val loss:  3.37  valid acc: 0.09\n",
      "sin i=31500 train loss:  3.46  train acc: 0.41  val loss:  3.37  valid acc: 0.09\n",
      "sin i=32000 train loss:  3.42  train acc: 0.41  val loss:  3.37  valid acc: 0.09\n",
      "sin i=32500 train loss:  3.46  train acc: 0.48  val loss:  3.37  valid acc: 0.09\n",
      "sin i=33000 train loss:  3.21  train acc: 0.48  val loss:  3.37  valid acc: 0.09\n",
      "sin i=33500 train loss:  3.46  train acc: 0.42  val loss:  3.37  valid acc: 0.10\n",
      "sin i=34000 train loss:  3.63  train acc: 0.40  val loss:  3.37  valid acc: 0.10\n",
      "sin i=34500 train loss:  3.37  train acc: 0.41  val loss:  3.37  valid acc: 0.10\n",
      "sin i=35000 train loss:  3.44  train acc: 0.40  val loss:  3.37  valid acc: 0.09\n",
      "sin i=35500 train loss:  3.64  train acc: 0.40  val loss:  3.37  valid acc: 0.09\n",
      "sin i=36000 train loss:  3.51  train acc: 0.39  val loss:  3.37  valid acc: 0.09\n",
      "sin i=36500 train loss:  3.38  train acc: 0.47  val loss:  3.36  valid acc: 0.10\n",
      "sin i=37000 train loss:  3.57  train acc: 0.40  val loss:  3.36  valid acc: 0.10\n",
      "sin i=37500 train loss:  3.03  train acc: 0.56  val loss:  3.37  valid acc: 0.10\n",
      "sin i=38000 train loss:  3.38  train acc: 0.46  val loss:  3.37  valid acc: 0.10\n",
      "sin i=38500 train loss:  3.37  train acc: 0.41  val loss:  3.37  valid acc: 0.10\n",
      "sin i=39000 train loss:  3.57  train acc: 0.41  val loss:  3.37  valid acc: 0.09\n",
      "sin i=39500 train loss:  3.20  train acc: 0.50  val loss:  3.37  valid acc: 0.10\n",
      "sin i=40000 train loss:  3.39  train acc: 0.48  val loss:  3.37  valid acc: 0.10\n",
      "sin i=40500 train loss:  3.52  train acc: 0.41  val loss:  3.38  valid acc: 0.09\n",
      "sin i=41000 train loss:  3.53  train acc: 0.41  val loss:  3.37  valid acc: 0.09\n",
      "sin i=41500 train loss:  3.61  train acc: 0.41  val loss:  3.37  valid acc: 0.10\n",
      "sin i=42000 train loss:  3.23  train acc: 0.48  val loss:  3.37  valid acc: 0.10\n",
      "sin i=42500 train loss:  3.46  train acc: 0.40  val loss:  3.37  valid acc: 0.09\n",
      "sin i=43000 train loss:  3.64  train acc: 0.44  val loss:  3.37  valid acc: 0.10\n",
      "sin i=43500 train loss:  3.66  train acc: 0.35  val loss:  3.36  valid acc: 0.09\n",
      "sin i=44000 train loss:  3.55  train acc: 0.43  val loss:  3.38  valid acc: 0.09\n",
      "sin i=44500 train loss:  3.74  train acc: 0.35  val loss:  3.37  valid acc: 0.10\n",
      "sin i=45000 train loss:  3.52  train acc: 0.47  val loss:  3.37  valid acc: 0.09\n",
      "sin i=45500 train loss:  3.55  train acc: 0.42  val loss:  3.37  valid acc: 0.09\n",
      "sin i=46000 train loss:  3.36  train acc: 0.47  val loss:  3.37  valid acc: 0.10\n",
      "sin i=46500 train loss:  3.78  train acc: 0.34  val loss:  3.37  valid acc: 0.10\n",
      "sin i=47000 train loss:  3.40  train acc: 0.45  val loss:  3.37  valid acc: 0.10\n",
      "sin i=47500 train loss:  3.53  train acc: 0.42  val loss:  3.36  valid acc: 0.10\n",
      "sin i=48000 train loss:  3.41  train acc: 0.43  val loss:  3.36  valid acc: 0.10\n",
      "sin i=48500 train loss:  3.40  train acc: 0.41  val loss:  3.36  valid acc: 0.10\n",
      "sin i=49000 train loss:  3.54  train acc: 0.39  val loss:  3.36  valid acc: 0.09\n",
      "sin i=49500 train loss:  3.32  train acc: 0.57  val loss:  3.37  valid acc: 0.09\n",
      "sin i=50000 train loss:  3.46  train acc: 0.43  val loss:  3.37  valid acc: 0.10\n",
      "-> sin layer idx: 20 , best valid accuracy: 0.10, test accuracy: 0.07\n",
      "sin i=    0 train loss: 62.51  train acc: 0.00  val loss: 13.01  valid acc: 0.00\n",
      "sin i=  500 train loss:  4.72  train acc: 0.36  val loss:  4.24  valid acc: 0.00\n",
      "sin i= 1000 train loss:  4.93  train acc: 0.26  val loss:  4.13  valid acc: 0.02\n",
      "sin i= 1500 train loss:  4.44  train acc: 0.45  val loss:  3.99  valid acc: 0.04\n",
      "sin i= 2000 train loss:  4.69  train acc: 0.35  val loss:  3.94  valid acc: 0.02\n",
      "sin i= 2500 train loss:  4.75  train acc: 0.31  val loss:  3.91  valid acc: 0.05\n",
      "sin i= 3000 train loss:  4.81  train acc: 0.31  val loss:  3.73  valid acc: 0.05\n",
      "sin i= 3500 train loss:  4.49  train acc: 0.38  val loss:  3.77  valid acc: 0.06\n",
      "sin i= 4000 train loss:  4.55  train acc: 0.37  val loss:  3.65  valid acc: 0.05\n",
      "sin i= 4500 train loss:  4.61  train acc: 0.40  val loss:  3.75  valid acc: 0.04\n",
      "sin i= 5000 train loss:  4.66  train acc: 0.36  val loss:  3.80  valid acc: 0.05\n",
      "sin i= 5500 train loss:  4.49  train acc: 0.39  val loss:  3.71  valid acc: 0.05\n",
      "sin i= 6000 train loss:  4.61  train acc: 0.39  val loss:  3.65  valid acc: 0.08\n",
      "sin i= 6500 train loss:  4.42  train acc: 0.34  val loss:  3.62  valid acc: 0.06\n",
      "sin i= 7000 train loss:  4.48  train acc: 0.42  val loss:  3.60  valid acc: 0.06\n",
      "sin i= 7500 train loss:  4.40  train acc: 0.37  val loss:  3.69  valid acc: 0.05\n",
      "sin i= 8000 train loss:  4.39  train acc: 0.38  val loss:  3.64  valid acc: 0.08\n",
      "sin i= 8500 train loss:  4.40  train acc: 0.34  val loss:  3.71  valid acc: 0.06\n",
      "sin i= 9000 train loss:  4.68  train acc: 0.32  val loss:  3.68  valid acc: 0.05\n",
      "sin i= 9500 train loss:  4.59  train acc: 0.34  val loss:  3.69  valid acc: 0.06\n",
      "sin i=10000 train loss:  4.33  train acc: 0.39  val loss:  3.74  valid acc: 0.06\n",
      "sin i=10500 train loss:  4.57  train acc: 0.35  val loss:  3.64  valid acc: 0.08\n",
      "sin i=11000 train loss:  4.85  train acc: 0.30  val loss:  3.64  valid acc: 0.10\n",
      "sin i=11500 train loss:  4.29  train acc: 0.41  val loss:  3.62  valid acc: 0.09\n",
      "sin i=12000 train loss:  4.29  train acc: 0.43  val loss:  3.66  valid acc: 0.05\n",
      "sin i=12500 train loss:  4.27  train acc: 0.41  val loss:  3.62  valid acc: 0.07\n",
      "sin i=13000 train loss:  4.28  train acc: 0.37  val loss:  3.64  valid acc: 0.06\n",
      "sin i=13500 train loss:  4.26  train acc: 0.46  val loss:  3.64  valid acc: 0.07\n",
      "sin i=14000 train loss:  4.71  train acc: 0.27  val loss:  3.61  valid acc: 0.09\n",
      "sin i=14500 train loss:  4.22  train acc: 0.37  val loss:  3.62  valid acc: 0.08\n",
      "sin i=15000 train loss:  4.36  train acc: 0.42  val loss:  3.63  valid acc: 0.07\n",
      "sin i=15500 train loss:  4.13  train acc: 0.46  val loss:  3.60  valid acc: 0.08\n",
      "sin i=16000 train loss:  4.25  train acc: 0.46  val loss:  3.59  valid acc: 0.07\n",
      "sin i=16500 train loss:  3.94  train acc: 0.41  val loss:  3.62  valid acc: 0.07\n",
      "sin i=17000 train loss:  4.33  train acc: 0.34  val loss:  3.62  valid acc: 0.06\n",
      "sin i=17500 train loss:  4.12  train acc: 0.38  val loss:  3.68  valid acc: 0.06\n",
      "sin i=18000 train loss:  4.08  train acc: 0.41  val loss:  3.63  valid acc: 0.04\n",
      "sin i=18500 train loss:  4.38  train acc: 0.36  val loss:  3.67  valid acc: 0.07\n",
      "sin i=19000 train loss:  4.08  train acc: 0.42  val loss:  3.57  valid acc: 0.08\n",
      "sin i=19500 train loss:  3.89  train acc: 0.47  val loss:  3.63  valid acc: 0.07\n",
      "sin i=20000 train loss:  4.02  train acc: 0.41  val loss:  3.68  valid acc: 0.06\n",
      "sin i=20500 train loss:  4.10  train acc: 0.40  val loss:  3.63  valid acc: 0.07\n",
      "sin i=21000 train loss:  3.95  train acc: 0.41  val loss:  3.64  valid acc: 0.07\n",
      "sin i=21500 train loss:  4.20  train acc: 0.34  val loss:  3.58  valid acc: 0.07\n",
      "sin i=22000 train loss:  4.04  train acc: 0.39  val loss:  3.55  valid acc: 0.10\n",
      "sin i=22500 train loss:  4.28  train acc: 0.33  val loss:  3.63  valid acc: 0.07\n",
      "sin i=23000 train loss:  4.25  train acc: 0.32  val loss:  3.58  valid acc: 0.08\n",
      "sin i=23500 train loss:  3.67  train acc: 0.47  val loss:  3.55  valid acc: 0.08\n",
      "sin i=24000 train loss:  3.86  train acc: 0.44  val loss:  3.55  valid acc: 0.09\n",
      "sin i=24500 train loss:  3.85  train acc: 0.41  val loss:  3.57  valid acc: 0.07\n",
      "sin i=25000 train loss:  3.67  train acc: 0.44  val loss:  3.56  valid acc: 0.08\n",
      "sin i=25500 train loss:  3.93  train acc: 0.37  val loss:  3.58  valid acc: 0.07\n",
      "sin i=26000 train loss:  3.86  train acc: 0.41  val loss:  3.58  valid acc: 0.07\n",
      "sin i=26500 train loss:  3.95  train acc: 0.42  val loss:  3.56  valid acc: 0.09\n",
      "sin i=27000 train loss:  4.22  train acc: 0.37  val loss:  3.59  valid acc: 0.07\n",
      "sin i=27500 train loss:  3.79  train acc: 0.45  val loss:  3.56  valid acc: 0.08\n",
      "sin i=28000 train loss:  3.75  train acc: 0.41  val loss:  3.57  valid acc: 0.07\n",
      "sin i=28500 train loss:  3.86  train acc: 0.45  val loss:  3.56  valid acc: 0.10\n",
      "sin i=29000 train loss:  3.74  train acc: 0.34  val loss:  3.57  valid acc: 0.08\n",
      "sin i=29500 train loss:  3.88  train acc: 0.35  val loss:  3.57  valid acc: 0.09\n",
      "sin i=30000 train loss:  3.62  train acc: 0.45  val loss:  3.57  valid acc: 0.08\n",
      "sin i=30500 train loss:  3.60  train acc: 0.41  val loss:  3.56  valid acc: 0.09\n",
      "sin i=31000 train loss:  3.91  train acc: 0.40  val loss:  3.57  valid acc: 0.07\n",
      "sin i=31500 train loss:  3.76  train acc: 0.40  val loss:  3.57  valid acc: 0.07\n",
      "sin i=32000 train loss:  3.77  train acc: 0.37  val loss:  3.57  valid acc: 0.08\n",
      "sin i=32500 train loss:  3.73  train acc: 0.46  val loss:  3.57  valid acc: 0.08\n",
      "sin i=33000 train loss:  3.47  train acc: 0.48  val loss:  3.57  valid acc: 0.08\n",
      "sin i=33500 train loss:  3.75  train acc: 0.41  val loss:  3.57  valid acc: 0.07\n",
      "sin i=34000 train loss:  3.88  train acc: 0.41  val loss:  3.57  valid acc: 0.08\n",
      "sin i=34500 train loss:  3.62  train acc: 0.43  val loss:  3.57  valid acc: 0.08\n",
      "sin i=35000 train loss:  3.69  train acc: 0.41  val loss:  3.57  valid acc: 0.08\n",
      "sin i=35500 train loss:  3.90  train acc: 0.45  val loss:  3.57  valid acc: 0.08\n",
      "sin i=36000 train loss:  3.84  train acc: 0.34  val loss:  3.57  valid acc: 0.07\n",
      "sin i=36500 train loss:  3.66  train acc: 0.44  val loss:  3.57  valid acc: 0.08\n",
      "sin i=37000 train loss:  3.86  train acc: 0.35  val loss:  3.57  valid acc: 0.08\n",
      "sin i=37500 train loss:  3.30  train acc: 0.53  val loss:  3.57  valid acc: 0.08\n",
      "sin i=38000 train loss:  3.68  train acc: 0.41  val loss:  3.57  valid acc: 0.08\n",
      "sin i=38500 train loss:  3.68  train acc: 0.43  val loss:  3.57  valid acc: 0.08\n",
      "sin i=39000 train loss:  3.98  train acc: 0.34  val loss:  3.57  valid acc: 0.07\n",
      "sin i=39500 train loss:  3.46  train acc: 0.50  val loss:  3.57  valid acc: 0.07\n",
      "sin i=40000 train loss:  3.64  train acc: 0.46  val loss:  3.57  valid acc: 0.08\n",
      "sin i=40500 train loss:  3.74  train acc: 0.40  val loss:  3.58  valid acc: 0.08\n",
      "sin i=41000 train loss:  3.82  train acc: 0.38  val loss:  3.57  valid acc: 0.08\n",
      "sin i=41500 train loss:  3.92  train acc: 0.42  val loss:  3.57  valid acc: 0.08\n",
      "sin i=42000 train loss:  3.47  train acc: 0.43  val loss:  3.57  valid acc: 0.07\n",
      "sin i=42500 train loss:  3.71  train acc: 0.41  val loss:  3.57  valid acc: 0.07\n",
      "sin i=43000 train loss:  3.81  train acc: 0.37  val loss:  3.57  valid acc: 0.08\n",
      "sin i=43500 train loss:  3.99  train acc: 0.34  val loss:  3.57  valid acc: 0.08\n",
      "sin i=44000 train loss:  3.91  train acc: 0.43  val loss:  3.57  valid acc: 0.08\n",
      "sin i=44500 train loss:  3.96  train acc: 0.34  val loss:  3.57  valid acc: 0.08\n",
      "sin i=45000 train loss:  3.84  train acc: 0.39  val loss:  3.57  valid acc: 0.08\n",
      "sin i=45500 train loss:  3.85  train acc: 0.37  val loss:  3.57  valid acc: 0.08\n",
      "sin i=46000 train loss:  3.63  train acc: 0.46  val loss:  3.57  valid acc: 0.08\n",
      "sin i=46500 train loss:  4.03  train acc: 0.34  val loss:  3.57  valid acc: 0.08\n",
      "sin i=47000 train loss:  3.68  train acc: 0.40  val loss:  3.57  valid acc: 0.08\n",
      "sin i=47500 train loss:  3.75  train acc: 0.48  val loss:  3.56  valid acc: 0.08\n",
      "sin i=48000 train loss:  3.71  train acc: 0.42  val loss:  3.57  valid acc: 0.08\n",
      "sin i=48500 train loss:  3.61  train acc: 0.42  val loss:  3.57  valid acc: 0.08\n",
      "sin i=49000 train loss:  3.81  train acc: 0.38  val loss:  3.57  valid acc: 0.08\n",
      "sin i=49500 train loss:  3.53  train acc: 0.52  val loss:  3.57  valid acc: 0.07\n",
      "sin i=50000 train loss:  3.67  train acc: 0.38  val loss:  3.57  valid acc: 0.08\n",
      "-> sin layer idx: 19 , best valid accuracy: 0.10, test accuracy: 0.06\n",
      "sin i=    0 train loss: 62.48  train acc: 0.00  val loss: 14.11  valid acc: 0.00\n",
      "sin i=  500 train loss:  4.87  train acc: 0.33  val loss:  4.81  valid acc: 0.01\n",
      "sin i= 1000 train loss:  5.22  train acc: 0.23  val loss:  4.47  valid acc: 0.02\n",
      "sin i= 1500 train loss:  4.75  train acc: 0.37  val loss:  4.38  valid acc: 0.03\n",
      "sin i= 2000 train loss:  4.80  train acc: 0.33  val loss:  4.40  valid acc: 0.00\n",
      "sin i= 2500 train loss:  4.92  train acc: 0.27  val loss:  4.43  valid acc: 0.02\n",
      "sin i= 3000 train loss:  4.95  train acc: 0.33  val loss:  4.38  valid acc: 0.04\n",
      "sin i= 3500 train loss:  4.75  train acc: 0.32  val loss:  4.34  valid acc: 0.02\n",
      "sin i= 4000 train loss:  4.73  train acc: 0.29  val loss:  4.30  valid acc: 0.01\n",
      "sin i= 4500 train loss:  4.84  train acc: 0.32  val loss:  4.40  valid acc: 0.02\n",
      "sin i= 5000 train loss:  4.74  train acc: 0.30  val loss:  4.45  valid acc: 0.02\n",
      "sin i= 5500 train loss:  4.72  train acc: 0.32  val loss:  4.39  valid acc: 0.01\n",
      "sin i= 6000 train loss:  4.79  train acc: 0.34  val loss:  4.25  valid acc: 0.01\n",
      "sin i= 6500 train loss:  4.63  train acc: 0.35  val loss:  4.34  valid acc: 0.01\n",
      "sin i= 7000 train loss:  4.71  train acc: 0.35  val loss:  4.38  valid acc: 0.01\n",
      "sin i= 7500 train loss:  4.72  train acc: 0.26  val loss:  4.37  valid acc: 0.02\n",
      "sin i= 8000 train loss:  4.68  train acc: 0.30  val loss:  4.33  valid acc: 0.01\n",
      "sin i= 8500 train loss:  4.72  train acc: 0.31  val loss:  4.32  valid acc: 0.02\n",
      "sin i= 9000 train loss:  4.96  train acc: 0.26  val loss:  4.35  valid acc: 0.02\n",
      "sin i= 9500 train loss:  4.81  train acc: 0.29  val loss:  4.40  valid acc: 0.02\n",
      "sin i=10000 train loss:  4.37  train acc: 0.38  val loss:  4.29  valid acc: 0.01\n",
      "sin i=10500 train loss:  4.82  train acc: 0.28  val loss:  4.31  valid acc: 0.01\n",
      "sin i=11000 train loss:  4.98  train acc: 0.27  val loss:  4.33  valid acc: 0.02\n",
      "sin i=11500 train loss:  4.66  train acc: 0.32  val loss:  4.30  valid acc: 0.02\n",
      "sin i=12000 train loss:  4.42  train acc: 0.38  val loss:  4.37  valid acc: 0.01\n",
      "sin i=12500 train loss:  4.59  train acc: 0.24  val loss:  4.28  valid acc: 0.01\n",
      "sin i=13000 train loss:  4.47  train acc: 0.29  val loss:  4.34  valid acc: 0.01\n",
      "sin i=13500 train loss:  4.47  train acc: 0.38  val loss:  4.32  valid acc: 0.01\n",
      "sin i=14000 train loss:  4.94  train acc: 0.25  val loss:  4.27  valid acc: 0.02\n",
      "sin i=14500 train loss:  4.56  train acc: 0.30  val loss:  4.32  valid acc: 0.02\n",
      "sin i=15000 train loss:  4.71  train acc: 0.32  val loss:  4.30  valid acc: 0.01\n",
      "sin i=15500 train loss:  4.48  train acc: 0.35  val loss:  4.32  valid acc: 0.02\n",
      "sin i=16000 train loss:  4.54  train acc: 0.31  val loss:  4.25  valid acc: 0.02\n",
      "sin i=16500 train loss:  4.19  train acc: 0.35  val loss:  4.29  valid acc: 0.02\n",
      "sin i=17000 train loss:  4.61  train acc: 0.26  val loss:  4.33  valid acc: 0.01\n",
      "sin i=17500 train loss:  4.37  train acc: 0.34  val loss:  4.34  valid acc: 0.01\n",
      "sin i=18000 train loss:  4.36  train acc: 0.38  val loss:  4.32  valid acc: 0.02\n",
      "sin i=18500 train loss:  4.62  train acc: 0.33  val loss:  4.32  valid acc: 0.02\n",
      "sin i=19000 train loss:  4.37  train acc: 0.31  val loss:  4.29  valid acc: 0.02\n",
      "sin i=19500 train loss:  4.23  train acc: 0.37  val loss:  4.30  valid acc: 0.02\n",
      "sin i=20000 train loss:  4.29  train acc: 0.34  val loss:  4.37  valid acc: 0.01\n",
      "sin i=20500 train loss:  4.39  train acc: 0.29  val loss:  4.35  valid acc: 0.01\n",
      "sin i=21000 train loss:  4.15  train acc: 0.34  val loss:  4.29  valid acc: 0.02\n",
      "sin i=21500 train loss:  4.66  train acc: 0.27  val loss:  4.26  valid acc: 0.01\n",
      "sin i=22000 train loss:  4.33  train acc: 0.30  val loss:  4.28  valid acc: 0.02\n",
      "sin i=22500 train loss:  4.45  train acc: 0.29  val loss:  4.34  valid acc: 0.02\n",
      "sin i=23000 train loss:  4.38  train acc: 0.28  val loss:  4.28  valid acc: 0.02\n",
      "sin i=23500 train loss:  3.86  train acc: 0.41  val loss:  4.25  valid acc: 0.02\n",
      "sin i=24000 train loss:  4.08  train acc: 0.41  val loss:  4.27  valid acc: 0.02\n",
      "sin i=24500 train loss:  4.21  train acc: 0.34  val loss:  4.31  valid acc: 0.02\n",
      "sin i=25000 train loss:  3.89  train acc: 0.38  val loss:  4.27  valid acc: 0.02\n",
      "sin i=25500 train loss:  4.17  train acc: 0.36  val loss:  4.28  valid acc: 0.02\n",
      "sin i=26000 train loss:  4.13  train acc: 0.35  val loss:  4.28  valid acc: 0.02\n",
      "sin i=26500 train loss:  4.31  train acc: 0.30  val loss:  4.26  valid acc: 0.03\n",
      "sin i=27000 train loss:  4.61  train acc: 0.24  val loss:  4.28  valid acc: 0.02\n",
      "sin i=27500 train loss:  4.06  train acc: 0.38  val loss:  4.27  valid acc: 0.02\n",
      "sin i=28000 train loss:  3.97  train acc: 0.35  val loss:  4.27  valid acc: 0.02\n",
      "sin i=28500 train loss:  4.15  train acc: 0.34  val loss:  4.26  valid acc: 0.02\n",
      "sin i=29000 train loss:  4.07  train acc: 0.31  val loss:  4.30  valid acc: 0.02\n",
      "sin i=29500 train loss:  4.22  train acc: 0.27  val loss:  4.28  valid acc: 0.02\n",
      "sin i=30000 train loss:  3.92  train acc: 0.36  val loss:  4.27  valid acc: 0.02\n",
      "sin i=30500 train loss:  3.94  train acc: 0.38  val loss:  4.27  valid acc: 0.02\n",
      "sin i=31000 train loss:  4.11  train acc: 0.27  val loss:  4.28  valid acc: 0.02\n",
      "sin i=31500 train loss:  4.05  train acc: 0.35  val loss:  4.28  valid acc: 0.02\n",
      "sin i=32000 train loss:  4.22  train acc: 0.26  val loss:  4.28  valid acc: 0.02\n",
      "sin i=32500 train loss:  4.05  train acc: 0.35  val loss:  4.28  valid acc: 0.02\n",
      "sin i=33000 train loss:  3.70  train acc: 0.41  val loss:  4.29  valid acc: 0.02\n",
      "sin i=33500 train loss:  4.02  train acc: 0.33  val loss:  4.28  valid acc: 0.02\n",
      "sin i=34000 train loss:  4.16  train acc: 0.28  val loss:  4.28  valid acc: 0.02\n",
      "sin i=34500 train loss:  3.87  train acc: 0.35  val loss:  4.27  valid acc: 0.02\n",
      "sin i=35000 train loss:  3.98  train acc: 0.34  val loss:  4.28  valid acc: 0.02\n",
      "sin i=35500 train loss:  4.13  train acc: 0.34  val loss:  4.28  valid acc: 0.02\n",
      "sin i=36000 train loss:  4.27  train acc: 0.25  val loss:  4.28  valid acc: 0.02\n",
      "sin i=36500 train loss:  3.83  train acc: 0.37  val loss:  4.28  valid acc: 0.02\n",
      "sin i=37000 train loss:  4.13  train acc: 0.29  val loss:  4.28  valid acc: 0.02\n",
      "sin i=37500 train loss:  3.65  train acc: 0.42  val loss:  4.28  valid acc: 0.02\n",
      "sin i=38000 train loss:  3.96  train acc: 0.34  val loss:  4.28  valid acc: 0.02\n",
      "sin i=38500 train loss:  4.01  train acc: 0.34  val loss:  4.28  valid acc: 0.02\n",
      "sin i=39000 train loss:  4.26  train acc: 0.27  val loss:  4.27  valid acc: 0.02\n",
      "sin i=39500 train loss:  3.75  train acc: 0.41  val loss:  4.28  valid acc: 0.02\n",
      "sin i=40000 train loss:  3.89  train acc: 0.38  val loss:  4.27  valid acc: 0.02\n",
      "sin i=40500 train loss:  4.05  train acc: 0.37  val loss:  4.29  valid acc: 0.02\n",
      "sin i=41000 train loss:  4.11  train acc: 0.35  val loss:  4.28  valid acc: 0.02\n",
      "sin i=41500 train loss:  4.29  train acc: 0.28  val loss:  4.28  valid acc: 0.02\n",
      "sin i=42000 train loss:  3.84  train acc: 0.38  val loss:  4.28  valid acc: 0.02\n",
      "sin i=42500 train loss:  4.01  train acc: 0.36  val loss:  4.28  valid acc: 0.02\n",
      "sin i=43000 train loss:  4.18  train acc: 0.30  val loss:  4.28  valid acc: 0.02\n",
      "sin i=43500 train loss:  4.11  train acc: 0.30  val loss:  4.27  valid acc: 0.02\n",
      "sin i=44000 train loss:  4.07  train acc: 0.30  val loss:  4.28  valid acc: 0.02\n",
      "sin i=44500 train loss:  4.23  train acc: 0.30  val loss:  4.28  valid acc: 0.02\n",
      "sin i=45000 train loss:  4.14  train acc: 0.31  val loss:  4.28  valid acc: 0.02\n",
      "sin i=45500 train loss:  4.15  train acc: 0.29  val loss:  4.28  valid acc: 0.02\n",
      "sin i=46000 train loss:  3.91  train acc: 0.36  val loss:  4.28  valid acc: 0.02\n",
      "sin i=46500 train loss:  4.25  train acc: 0.30  val loss:  4.28  valid acc: 0.02\n",
      "sin i=47000 train loss:  3.96  train acc: 0.34  val loss:  4.28  valid acc: 0.02\n",
      "sin i=47500 train loss:  4.12  train acc: 0.32  val loss:  4.27  valid acc: 0.02\n",
      "sin i=48000 train loss:  4.02  train acc: 0.38  val loss:  4.27  valid acc: 0.02\n",
      "sin i=48500 train loss:  3.88  train acc: 0.42  val loss:  4.28  valid acc: 0.02\n",
      "sin i=49000 train loss:  4.09  train acc: 0.28  val loss:  4.27  valid acc: 0.02\n",
      "sin i=49500 train loss:  3.79  train acc: 0.40  val loss:  4.28  valid acc: 0.02\n",
      "sin i=50000 train loss:  3.98  train acc: 0.33  val loss:  4.28  valid acc: 0.02\n",
      "-> sin layer idx: 18 , best valid accuracy: 0.04, test accuracy: 0.02\n",
      "sin i=    0 train loss: 62.44  train acc: 0.00  val loss: 14.05  valid acc: 0.00\n",
      "sin i=  500 train loss:  4.83  train acc: 0.34  val loss:  4.59  valid acc: 0.01\n",
      "sin i= 1000 train loss:  5.22  train acc: 0.23  val loss:  4.47  valid acc: 0.02\n",
      "sin i= 1500 train loss:  4.83  train acc: 0.35  val loss:  4.47  valid acc: 0.03\n",
      "sin i= 2000 train loss:  4.83  train acc: 0.29  val loss:  4.46  valid acc: 0.02\n",
      "sin i= 2500 train loss:  5.12  train acc: 0.23  val loss:  4.46  valid acc: 0.02\n",
      "sin i= 3000 train loss:  5.09  train acc: 0.27  val loss:  4.44  valid acc: 0.03\n",
      "sin i= 3500 train loss:  4.87  train acc: 0.34  val loss:  4.45  valid acc: 0.01\n",
      "sin i= 4000 train loss:  4.88  train acc: 0.30  val loss:  4.42  valid acc: 0.03\n",
      "sin i= 4500 train loss:  4.87  train acc: 0.30  val loss:  4.51  valid acc: 0.02\n",
      "sin i= 5000 train loss:  4.93  train acc: 0.27  val loss:  4.55  valid acc: 0.02\n",
      "sin i= 5500 train loss:  4.83  train acc: 0.30  val loss:  4.44  valid acc: 0.01\n",
      "sin i= 6000 train loss:  4.88  train acc: 0.31  val loss:  4.34  valid acc: 0.01\n",
      "sin i= 6500 train loss:  4.79  train acc: 0.34  val loss:  4.41  valid acc: 0.01\n",
      "sin i= 7000 train loss:  4.86  train acc: 0.33  val loss:  4.39  valid acc: 0.02\n",
      "sin i= 7500 train loss:  4.88  train acc: 0.27  val loss:  4.42  valid acc: 0.01\n",
      "sin i= 8000 train loss:  4.77  train acc: 0.30  val loss:  4.38  valid acc: 0.02\n",
      "sin i= 8500 train loss:  4.82  train acc: 0.27  val loss:  4.39  valid acc: 0.02\n",
      "sin i= 9000 train loss:  5.05  train acc: 0.22  val loss:  4.39  valid acc: 0.02\n",
      "sin i= 9500 train loss:  4.90  train acc: 0.30  val loss:  4.43  valid acc: 0.01\n",
      "sin i=10000 train loss:  4.45  train acc: 0.40  val loss:  4.34  valid acc: 0.00\n",
      "sin i=10500 train loss:  4.91  train acc: 0.26  val loss:  4.41  valid acc: 0.00\n",
      "sin i=11000 train loss:  5.09  train acc: 0.25  val loss:  4.39  valid acc: 0.01\n",
      "sin i=11500 train loss:  4.70  train acc: 0.30  val loss:  4.37  valid acc: 0.01\n",
      "sin i=12000 train loss:  4.51  train acc: 0.33  val loss:  4.44  valid acc: 0.01\n",
      "sin i=12500 train loss:  4.70  train acc: 0.29  val loss:  4.38  valid acc: 0.01\n",
      "sin i=13000 train loss:  4.54  train acc: 0.32  val loss:  4.42  valid acc: 0.01\n",
      "sin i=13500 train loss:  4.51  train acc: 0.33  val loss:  4.35  valid acc: 0.00\n",
      "sin i=14000 train loss:  5.01  train acc: 0.25  val loss:  4.37  valid acc: 0.02\n",
      "sin i=14500 train loss:  4.59  train acc: 0.27  val loss:  4.36  valid acc: 0.02\n",
      "sin i=15000 train loss:  4.77  train acc: 0.26  val loss:  4.40  valid acc: 0.02\n",
      "sin i=15500 train loss:  4.53  train acc: 0.34  val loss:  4.39  valid acc: 0.01\n",
      "sin i=16000 train loss:  4.65  train acc: 0.30  val loss:  4.36  valid acc: 0.02\n",
      "sin i=16500 train loss:  4.30  train acc: 0.35  val loss:  4.40  valid acc: 0.02\n",
      "sin i=17000 train loss:  4.75  train acc: 0.26  val loss:  4.35  valid acc: 0.02\n",
      "sin i=17500 train loss:  4.43  train acc: 0.32  val loss:  4.37  valid acc: 0.01\n",
      "sin i=18000 train loss:  4.44  train acc: 0.36  val loss:  4.42  valid acc: 0.01\n",
      "sin i=18500 train loss:  4.70  train acc: 0.24  val loss:  4.44  valid acc: 0.02\n",
      "sin i=19000 train loss:  4.45  train acc: 0.33  val loss:  4.39  valid acc: 0.01\n",
      "sin i=19500 train loss:  4.31  train acc: 0.40  val loss:  4.39  valid acc: 0.02\n",
      "sin i=20000 train loss:  4.40  train acc: 0.32  val loss:  4.44  valid acc: 0.00\n",
      "sin i=20500 train loss:  4.52  train acc: 0.31  val loss:  4.43  valid acc: 0.00\n",
      "sin i=21000 train loss:  4.29  train acc: 0.33  val loss:  4.35  valid acc: 0.03\n",
      "sin i=21500 train loss:  4.73  train acc: 0.21  val loss:  4.35  valid acc: 0.01\n",
      "sin i=22000 train loss:  4.44  train acc: 0.34  val loss:  4.36  valid acc: 0.02\n",
      "sin i=22500 train loss:  4.58  train acc: 0.26  val loss:  4.40  valid acc: 0.02\n",
      "sin i=23000 train loss:  4.47  train acc: 0.29  val loss:  4.36  valid acc: 0.02\n",
      "sin i=23500 train loss:  3.95  train acc: 0.34  val loss:  4.33  valid acc: 0.01\n",
      "sin i=24000 train loss:  4.22  train acc: 0.37  val loss:  4.35  valid acc: 0.02\n",
      "sin i=24500 train loss:  4.26  train acc: 0.31  val loss:  4.38  valid acc: 0.02\n",
      "sin i=25000 train loss:  3.95  train acc: 0.38  val loss:  4.35  valid acc: 0.02\n",
      "sin i=25500 train loss:  4.24  train acc: 0.29  val loss:  4.38  valid acc: 0.02\n",
      "sin i=26000 train loss:  4.29  train acc: 0.34  val loss:  4.37  valid acc: 0.02\n",
      "sin i=26500 train loss:  4.47  train acc: 0.27  val loss:  4.34  valid acc: 0.02\n",
      "sin i=27000 train loss:  4.74  train acc: 0.22  val loss:  4.38  valid acc: 0.01\n",
      "sin i=27500 train loss:  4.15  train acc: 0.34  val loss:  4.36  valid acc: 0.02\n",
      "sin i=28000 train loss:  4.05  train acc: 0.38  val loss:  4.35  valid acc: 0.02\n",
      "sin i=28500 train loss:  4.29  train acc: 0.27  val loss:  4.35  valid acc: 0.01\n",
      "sin i=29000 train loss:  4.16  train acc: 0.30  val loss:  4.38  valid acc: 0.01\n",
      "sin i=29500 train loss:  4.38  train acc: 0.27  val loss:  4.36  valid acc: 0.02\n",
      "sin i=30000 train loss:  4.08  train acc: 0.32  val loss:  4.35  valid acc: 0.02\n",
      "sin i=30500 train loss:  3.99  train acc: 0.32  val loss:  4.35  valid acc: 0.02\n",
      "sin i=31000 train loss:  4.21  train acc: 0.28  val loss:  4.36  valid acc: 0.02\n",
      "sin i=31500 train loss:  4.12  train acc: 0.29  val loss:  4.36  valid acc: 0.01\n",
      "sin i=32000 train loss:  4.33  train acc: 0.30  val loss:  4.36  valid acc: 0.01\n",
      "sin i=32500 train loss:  4.08  train acc: 0.34  val loss:  4.36  valid acc: 0.02\n",
      "sin i=33000 train loss:  3.76  train acc: 0.41  val loss:  4.36  valid acc: 0.01\n",
      "sin i=33500 train loss:  4.11  train acc: 0.34  val loss:  4.36  valid acc: 0.02\n",
      "sin i=34000 train loss:  4.30  train acc: 0.33  val loss:  4.36  valid acc: 0.02\n",
      "sin i=34500 train loss:  4.02  train acc: 0.33  val loss:  4.35  valid acc: 0.02\n",
      "sin i=35000 train loss:  4.07  train acc: 0.32  val loss:  4.36  valid acc: 0.02\n",
      "sin i=35500 train loss:  4.29  train acc: 0.30  val loss:  4.35  valid acc: 0.02\n",
      "sin i=36000 train loss:  4.32  train acc: 0.24  val loss:  4.36  valid acc: 0.02\n",
      "sin i=36500 train loss:  3.96  train acc: 0.35  val loss:  4.36  valid acc: 0.02\n",
      "sin i=37000 train loss:  4.19  train acc: 0.30  val loss:  4.36  valid acc: 0.02\n",
      "sin i=37500 train loss:  3.74  train acc: 0.40  val loss:  4.36  valid acc: 0.02\n",
      "sin i=38000 train loss:  4.13  train acc: 0.30  val loss:  4.36  valid acc: 0.02\n",
      "sin i=38500 train loss:  4.10  train acc: 0.32  val loss:  4.35  valid acc: 0.02\n",
      "sin i=39000 train loss:  4.35  train acc: 0.23  val loss:  4.35  valid acc: 0.02\n",
      "sin i=39500 train loss:  3.80  train acc: 0.38  val loss:  4.36  valid acc: 0.02\n",
      "sin i=40000 train loss:  4.02  train acc: 0.33  val loss:  4.35  valid acc: 0.02\n",
      "sin i=40500 train loss:  4.08  train acc: 0.29  val loss:  4.36  valid acc: 0.02\n",
      "sin i=41000 train loss:  4.18  train acc: 0.34  val loss:  4.35  valid acc: 0.02\n",
      "sin i=41500 train loss:  4.43  train acc: 0.23  val loss:  4.36  valid acc: 0.02\n",
      "sin i=42000 train loss:  3.93  train acc: 0.38  val loss:  4.36  valid acc: 0.02\n",
      "sin i=42500 train loss:  4.09  train acc: 0.30  val loss:  4.36  valid acc: 0.02\n",
      "sin i=43000 train loss:  4.25  train acc: 0.30  val loss:  4.35  valid acc: 0.02\n",
      "sin i=43500 train loss:  4.24  train acc: 0.27  val loss:  4.36  valid acc: 0.02\n",
      "sin i=44000 train loss:  4.24  train acc: 0.33  val loss:  4.36  valid acc: 0.02\n",
      "sin i=44500 train loss:  4.31  train acc: 0.28  val loss:  4.36  valid acc: 0.01\n",
      "sin i=45000 train loss:  4.26  train acc: 0.33  val loss:  4.35  valid acc: 0.01\n",
      "sin i=45500 train loss:  4.33  train acc: 0.27  val loss:  4.36  valid acc: 0.02\n",
      "sin i=46000 train loss:  3.99  train acc: 0.32  val loss:  4.36  valid acc: 0.02\n",
      "sin i=46500 train loss:  4.38  train acc: 0.27  val loss:  4.36  valid acc: 0.02\n",
      "sin i=47000 train loss:  4.08  train acc: 0.29  val loss:  4.36  valid acc: 0.02\n",
      "sin i=47500 train loss:  4.21  train acc: 0.31  val loss:  4.35  valid acc: 0.02\n",
      "sin i=48000 train loss:  4.17  train acc: 0.33  val loss:  4.35  valid acc: 0.02\n",
      "sin i=48500 train loss:  3.96  train acc: 0.34  val loss:  4.36  valid acc: 0.02\n",
      "sin i=49000 train loss:  4.20  train acc: 0.27  val loss:  4.36  valid acc: 0.02\n",
      "sin i=49500 train loss:  3.95  train acc: 0.38  val loss:  4.36  valid acc: 0.02\n",
      "sin i=50000 train loss:  4.02  train acc: 0.30  val loss:  4.36  valid acc: 0.02\n",
      "-> sin layer idx: 17 , best valid accuracy: 0.03, test accuracy: 0.00\n",
      "sin i=    0 train loss: 62.45  train acc: 0.00  val loss: 13.63  valid acc: 0.00\n",
      "sin i=  500 train loss:  5.54  train acc: 0.28  val loss:  6.85  valid acc: 0.00\n",
      "sin i= 1000 train loss:  6.30  train acc: 0.16  val loss:  6.84  valid acc: 0.00\n",
      "sin i= 1500 train loss:  5.59  train acc: 0.28  val loss:  6.85  valid acc: 0.00\n",
      "sin i= 2000 train loss:  5.67  train acc: 0.27  val loss:  6.91  valid acc: 0.00\n",
      "sin i= 2500 train loss:  5.96  train acc: 0.21  val loss:  6.82  valid acc: 0.00\n",
      "sin i= 3000 train loss:  6.23  train acc: 0.18  val loss:  6.88  valid acc: 0.00\n",
      "sin i= 3500 train loss:  5.44  train acc: 0.30  val loss:  6.84  valid acc: 0.00\n",
      "sin i= 4000 train loss:  5.74  train acc: 0.25  val loss:  6.80  valid acc: 0.00\n",
      "sin i= 4500 train loss:  5.57  train acc: 0.28  val loss:  6.81  valid acc: 0.00\n",
      "sin i= 5000 train loss:  5.88  train acc: 0.23  val loss:  6.89  valid acc: 0.00\n",
      "sin i= 5500 train loss:  5.68  train acc: 0.26  val loss:  6.77  valid acc: 0.00\n",
      "sin i= 6000 train loss:  5.63  train acc: 0.23  val loss:  6.82  valid acc: 0.00\n",
      "sin i= 6500 train loss:  5.55  train acc: 0.27  val loss:  6.79  valid acc: 0.00\n",
      "sin i= 7000 train loss:  5.49  train acc: 0.27  val loss:  6.75  valid acc: 0.00\n",
      "sin i= 7500 train loss:  5.90  train acc: 0.21  val loss:  6.82  valid acc: 0.00\n",
      "sin i= 8000 train loss:  5.83  train acc: 0.23  val loss:  6.84  valid acc: 0.00\n",
      "sin i= 8500 train loss:  5.84  train acc: 0.21  val loss:  6.85  valid acc: 0.00\n",
      "sin i= 9000 train loss:  6.03  train acc: 0.18  val loss:  6.77  valid acc: 0.00\n",
      "sin i= 9500 train loss:  5.79  train acc: 0.22  val loss:  6.75  valid acc: 0.00\n",
      "sin i=10000 train loss:  5.24  train acc: 0.30  val loss:  6.85  valid acc: 0.00\n",
      "sin i=10500 train loss:  6.08  train acc: 0.18  val loss:  6.79  valid acc: 0.00\n",
      "sin i=11000 train loss:  6.03  train acc: 0.19  val loss:  6.78  valid acc: 0.00\n",
      "sin i=11500 train loss:  5.63  train acc: 0.23  val loss:  6.80  valid acc: 0.00\n",
      "sin i=12000 train loss:  5.34  train acc: 0.28  val loss:  6.81  valid acc: 0.00\n",
      "sin i=12500 train loss:  5.78  train acc: 0.21  val loss:  6.84  valid acc: 0.00\n",
      "sin i=13000 train loss:  5.54  train acc: 0.24  val loss:  6.79  valid acc: 0.00\n",
      "sin i=13500 train loss:  5.47  train acc: 0.26  val loss:  6.80  valid acc: 0.00\n",
      "sin i=14000 train loss:  6.29  train acc: 0.13  val loss:  6.83  valid acc: 0.00\n",
      "sin i=14500 train loss:  5.67  train acc: 0.21  val loss:  6.84  valid acc: 0.00\n",
      "sin i=15000 train loss:  5.94  train acc: 0.20  val loss:  6.82  valid acc: 0.00\n",
      "sin i=15500 train loss:  5.61  train acc: 0.23  val loss:  6.84  valid acc: 0.00\n",
      "sin i=16000 train loss:  5.56  train acc: 0.23  val loss:  6.82  valid acc: 0.00\n",
      "sin i=16500 train loss:  5.16  train acc: 0.29  val loss:  6.78  valid acc: 0.00\n",
      "sin i=17000 train loss:  5.78  train acc: 0.19  val loss:  6.84  valid acc: 0.00\n",
      "sin i=17500 train loss:  5.38  train acc: 0.25  val loss:  6.79  valid acc: 0.00\n",
      "sin i=18000 train loss:  5.05  train acc: 0.32  val loss:  6.81  valid acc: 0.00\n",
      "sin i=18500 train loss:  5.72  train acc: 0.20  val loss:  6.81  valid acc: 0.00\n",
      "sin i=19000 train loss:  5.43  train acc: 0.25  val loss:  6.81  valid acc: 0.00\n",
      "sin i=19500 train loss:  5.15  train acc: 0.28  val loss:  6.83  valid acc: 0.00\n",
      "sin i=20000 train loss:  5.38  train acc: 0.25  val loss:  6.83  valid acc: 0.00\n",
      "sin i=20500 train loss:  5.39  train acc: 0.23  val loss:  6.85  valid acc: 0.00\n",
      "sin i=21000 train loss:  5.26  train acc: 0.26  val loss:  6.79  valid acc: 0.00\n",
      "sin i=21500 train loss:  5.98  train acc: 0.15  val loss:  6.80  valid acc: 0.00\n",
      "sin i=22000 train loss:  5.51  train acc: 0.22  val loss:  6.82  valid acc: 0.00\n",
      "sin i=22500 train loss:  5.64  train acc: 0.20  val loss:  6.86  valid acc: 0.00\n",
      "sin i=23000 train loss:  5.35  train acc: 0.23  val loss:  6.85  valid acc: 0.00\n",
      "sin i=23500 train loss:  4.89  train acc: 0.31  val loss:  6.84  valid acc: 0.00\n",
      "sin i=24000 train loss:  5.01  train acc: 0.30  val loss:  6.81  valid acc: 0.00\n",
      "sin i=24500 train loss:  5.36  train acc: 0.23  val loss:  6.84  valid acc: 0.00\n",
      "sin i=25000 train loss:  4.75  train acc: 0.33  val loss:  6.84  valid acc: 0.00\n",
      "sin i=25500 train loss:  5.32  train acc: 0.23  val loss:  6.84  valid acc: 0.00\n",
      "sin i=26000 train loss:  5.32  train acc: 0.23  val loss:  6.83  valid acc: 0.00\n",
      "sin i=26500 train loss:  5.56  train acc: 0.20  val loss:  6.83  valid acc: 0.00\n",
      "sin i=27000 train loss:  5.92  train acc: 0.15  val loss:  6.85  valid acc: 0.00\n",
      "sin i=27500 train loss:  5.03  train acc: 0.28  val loss:  6.83  valid acc: 0.00\n",
      "sin i=28000 train loss:  4.97  train acc: 0.28  val loss:  6.85  valid acc: 0.00\n",
      "sin i=28500 train loss:  5.29  train acc: 0.23  val loss:  6.84  valid acc: 0.00\n",
      "sin i=29000 train loss:  5.46  train acc: 0.21  val loss:  6.84  valid acc: 0.00\n",
      "sin i=29500 train loss:  5.53  train acc: 0.19  val loss:  6.85  valid acc: 0.00\n",
      "sin i=30000 train loss:  5.03  train acc: 0.27  val loss:  6.85  valid acc: 0.00\n",
      "sin i=30500 train loss:  4.92  train acc: 0.28  val loss:  6.85  valid acc: 0.00\n",
      "sin i=31000 train loss:  5.26  train acc: 0.23  val loss:  6.85  valid acc: 0.00\n",
      "sin i=31500 train loss:  5.24  train acc: 0.23  val loss:  6.85  valid acc: 0.00\n",
      "sin i=32000 train loss:  5.51  train acc: 0.19  val loss:  6.85  valid acc: 0.00\n",
      "sin i=32500 train loss:  5.20  train acc: 0.24  val loss:  6.86  valid acc: 0.00\n",
      "sin i=33000 train loss:  4.63  train acc: 0.32  val loss:  6.86  valid acc: 0.00\n",
      "sin i=33500 train loss:  5.26  train acc: 0.24  val loss:  6.86  valid acc: 0.00\n",
      "sin i=34000 train loss:  5.15  train acc: 0.25  val loss:  6.85  valid acc: 0.00\n",
      "sin i=34500 train loss:  4.93  train acc: 0.28  val loss:  6.85  valid acc: 0.00\n",
      "sin i=35000 train loss:  5.14  train acc: 0.24  val loss:  6.86  valid acc: 0.00\n",
      "sin i=35500 train loss:  5.38  train acc: 0.20  val loss:  6.86  valid acc: 0.00\n",
      "sin i=36000 train loss:  5.66  train acc: 0.16  val loss:  6.85  valid acc: 0.00\n",
      "sin i=36500 train loss:  4.82  train acc: 0.30  val loss:  6.85  valid acc: 0.00\n",
      "sin i=37000 train loss:  5.45  train acc: 0.20  val loss:  6.85  valid acc: 0.00\n",
      "sin i=37500 train loss:  4.72  train acc: 0.30  val loss:  6.86  valid acc: 0.00\n",
      "sin i=38000 train loss:  5.06  train acc: 0.25  val loss:  6.85  valid acc: 0.00\n",
      "sin i=38500 train loss:  5.02  train acc: 0.27  val loss:  6.85  valid acc: 0.00\n",
      "sin i=39000 train loss:  5.62  train acc: 0.18  val loss:  6.85  valid acc: 0.00\n",
      "sin i=39500 train loss:  4.71  train acc: 0.30  val loss:  6.86  valid acc: 0.00\n",
      "sin i=40000 train loss:  4.96  train acc: 0.27  val loss:  6.85  valid acc: 0.00\n",
      "sin i=40500 train loss:  5.23  train acc: 0.25  val loss:  6.85  valid acc: 0.00\n",
      "sin i=41000 train loss:  5.08  train acc: 0.24  val loss:  6.85  valid acc: 0.00\n",
      "sin i=41500 train loss:  5.61  train acc: 0.17  val loss:  6.85  valid acc: 0.00\n",
      "sin i=42000 train loss:  4.87  train acc: 0.29  val loss:  6.85  valid acc: 0.00\n",
      "sin i=42500 train loss:  5.15  train acc: 0.25  val loss:  6.85  valid acc: 0.00\n",
      "sin i=43000 train loss:  5.38  train acc: 0.20  val loss:  6.85  valid acc: 0.00\n",
      "sin i=43500 train loss:  5.23  train acc: 0.23  val loss:  6.85  valid acc: 0.00\n",
      "sin i=44000 train loss:  5.39  train acc: 0.21  val loss:  6.85  valid acc: 0.00\n",
      "sin i=44500 train loss:  5.33  train acc: 0.22  val loss:  6.85  valid acc: 0.00\n",
      "sin i=45000 train loss:  5.33  train acc: 0.21  val loss:  6.85  valid acc: 0.00\n",
      "sin i=45500 train loss:  5.42  train acc: 0.21  val loss:  6.85  valid acc: 0.00\n",
      "sin i=46000 train loss:  5.01  train acc: 0.26  val loss:  6.85  valid acc: 0.00\n",
      "sin i=46500 train loss:  5.50  train acc: 0.19  val loss:  6.86  valid acc: 0.00\n",
      "sin i=47000 train loss:  5.14  train acc: 0.24  val loss:  6.85  valid acc: 0.00\n",
      "sin i=47500 train loss:  5.26  train acc: 0.23  val loss:  6.85  valid acc: 0.00\n",
      "sin i=48000 train loss:  5.22  train acc: 0.23  val loss:  6.85  valid acc: 0.00\n",
      "sin i=48500 train loss:  4.91  train acc: 0.27  val loss:  6.85  valid acc: 0.00\n",
      "sin i=49000 train loss:  5.23  train acc: 0.22  val loss:  6.85  valid acc: 0.00\n",
      "sin i=49500 train loss:  4.66  train acc: 0.32  val loss:  6.85  valid acc: 0.00\n",
      "sin i=50000 train loss:  5.10  train acc: 0.26  val loss:  6.85  valid acc: 0.00\n",
      "-> sin layer idx: 16 , best valid accuracy: 0.00, test accuracy: 0.00\n",
      "sin i=    0 train loss: 62.51  train acc: 0.00  val loss: 12.17  valid acc: 0.00\n",
      "sin i=  500 train loss:  5.54  train acc: 0.28  val loss:  6.87  valid acc: 0.00\n",
      "sin i= 1000 train loss:  6.30  train acc: 0.16  val loss:  6.83  valid acc: 0.00\n",
      "sin i= 1500 train loss:  5.60  train acc: 0.28  val loss:  6.87  valid acc: 0.00\n",
      "sin i= 2000 train loss:  5.70  train acc: 0.27  val loss:  6.94  valid acc: 0.00\n",
      "sin i= 2500 train loss:  6.00  train acc: 0.20  val loss:  6.83  valid acc: 0.00\n",
      "sin i= 3000 train loss:  6.30  train acc: 0.18  val loss:  6.89  valid acc: 0.00\n",
      "sin i= 3500 train loss:  5.46  train acc: 0.30  val loss:  6.86  valid acc: 0.00\n",
      "sin i= 4000 train loss:  5.77  train acc: 0.25  val loss:  6.82  valid acc: 0.00\n",
      "sin i= 4500 train loss:  5.60  train acc: 0.28  val loss:  6.84  valid acc: 0.00\n",
      "sin i= 5000 train loss:  5.89  train acc: 0.23  val loss:  6.90  valid acc: 0.00\n",
      "sin i= 5500 train loss:  5.69  train acc: 0.25  val loss:  6.83  valid acc: 0.00\n",
      "sin i= 6000 train loss:  5.69  train acc: 0.23  val loss:  6.87  valid acc: 0.00\n",
      "sin i= 6500 train loss:  5.59  train acc: 0.27  val loss:  6.82  valid acc: 0.00\n",
      "sin i= 7000 train loss:  5.50  train acc: 0.27  val loss:  6.81  valid acc: 0.00\n",
      "sin i= 7500 train loss:  5.94  train acc: 0.21  val loss:  6.87  valid acc: 0.00\n",
      "sin i= 8000 train loss:  5.86  train acc: 0.23  val loss:  6.90  valid acc: 0.00\n",
      "sin i= 8500 train loss:  5.88  train acc: 0.21  val loss:  6.91  valid acc: 0.00\n",
      "sin i= 9000 train loss:  6.09  train acc: 0.18  val loss:  6.83  valid acc: 0.00\n",
      "sin i= 9500 train loss:  5.83  train acc: 0.21  val loss:  6.79  valid acc: 0.00\n",
      "sin i=10000 train loss:  5.27  train acc: 0.30  val loss:  6.90  valid acc: 0.00\n",
      "sin i=10500 train loss:  6.12  train acc: 0.18  val loss:  6.84  valid acc: 0.00\n",
      "sin i=11000 train loss:  6.03  train acc: 0.19  val loss:  6.83  valid acc: 0.00\n",
      "sin i=11500 train loss:  5.66  train acc: 0.23  val loss:  6.85  valid acc: 0.00\n",
      "sin i=12000 train loss:  5.38  train acc: 0.28  val loss:  6.84  valid acc: 0.00\n",
      "sin i=12500 train loss:  5.81  train acc: 0.21  val loss:  6.88  valid acc: 0.00\n",
      "sin i=13000 train loss:  5.54  train acc: 0.24  val loss:  6.81  valid acc: 0.00\n",
      "sin i=13500 train loss:  5.49  train acc: 0.26  val loss:  6.85  valid acc: 0.00\n",
      "sin i=14000 train loss:  6.35  train acc: 0.13  val loss:  6.86  valid acc: 0.00\n",
      "sin i=14500 train loss:  5.72  train acc: 0.21  val loss:  6.87  valid acc: 0.00\n",
      "sin i=15000 train loss:  6.00  train acc: 0.20  val loss:  6.87  valid acc: 0.00\n",
      "sin i=15500 train loss:  5.63  train acc: 0.23  val loss:  6.89  valid acc: 0.00\n",
      "sin i=16000 train loss:  5.60  train acc: 0.23  val loss:  6.86  valid acc: 0.00\n",
      "sin i=16500 train loss:  5.20  train acc: 0.29  val loss:  6.80  valid acc: 0.00\n",
      "sin i=17000 train loss:  5.80  train acc: 0.19  val loss:  6.86  valid acc: 0.00\n",
      "sin i=17500 train loss:  5.38  train acc: 0.25  val loss:  6.84  valid acc: 0.00\n",
      "sin i=18000 train loss:  5.07  train acc: 0.32  val loss:  6.85  valid acc: 0.00\n",
      "sin i=18500 train loss:  5.76  train acc: 0.20  val loss:  6.85  valid acc: 0.00\n",
      "sin i=19000 train loss:  5.46  train acc: 0.25  val loss:  6.87  valid acc: 0.00\n",
      "sin i=19500 train loss:  5.16  train acc: 0.28  val loss:  6.86  valid acc: 0.00\n",
      "sin i=20000 train loss:  5.38  train acc: 0.25  val loss:  6.87  valid acc: 0.00\n",
      "sin i=20500 train loss:  5.43  train acc: 0.23  val loss:  6.88  valid acc: 0.00\n",
      "sin i=21000 train loss:  5.28  train acc: 0.26  val loss:  6.83  valid acc: 0.00\n",
      "sin i=21500 train loss:  6.03  train acc: 0.15  val loss:  6.83  valid acc: 0.00\n",
      "sin i=22000 train loss:  5.53  train acc: 0.22  val loss:  6.86  valid acc: 0.00\n",
      "sin i=22500 train loss:  5.66  train acc: 0.20  val loss:  6.89  valid acc: 0.00\n",
      "sin i=23000 train loss:  5.38  train acc: 0.23  val loss:  6.88  valid acc: 0.00\n",
      "sin i=23500 train loss:  4.91  train acc: 0.30  val loss:  6.88  valid acc: 0.00\n",
      "sin i=24000 train loss:  5.01  train acc: 0.30  val loss:  6.86  valid acc: 0.00\n",
      "sin i=24500 train loss:  5.40  train acc: 0.23  val loss:  6.87  valid acc: 0.00\n",
      "sin i=25000 train loss:  4.81  train acc: 0.33  val loss:  6.87  valid acc: 0.00\n",
      "sin i=25500 train loss:  5.36  train acc: 0.23  val loss:  6.87  valid acc: 0.00\n",
      "sin i=26000 train loss:  5.33  train acc: 0.23  val loss:  6.86  valid acc: 0.00\n",
      "sin i=26500 train loss:  5.60  train acc: 0.20  val loss:  6.87  valid acc: 0.00\n",
      "sin i=27000 train loss:  5.95  train acc: 0.15  val loss:  6.89  valid acc: 0.00\n",
      "sin i=27500 train loss:  5.06  train acc: 0.28  val loss:  6.87  valid acc: 0.00\n",
      "sin i=28000 train loss:  5.00  train acc: 0.28  val loss:  6.89  valid acc: 0.00\n",
      "sin i=28500 train loss:  5.31  train acc: 0.23  val loss:  6.87  valid acc: 0.00\n",
      "sin i=29000 train loss:  5.48  train acc: 0.21  val loss:  6.88  valid acc: 0.00\n",
      "sin i=29500 train loss:  5.58  train acc: 0.19  val loss:  6.88  valid acc: 0.00\n",
      "sin i=30000 train loss:  5.08  train acc: 0.27  val loss:  6.88  valid acc: 0.00\n",
      "sin i=30500 train loss:  4.94  train acc: 0.28  val loss:  6.89  valid acc: 0.00\n",
      "sin i=31000 train loss:  5.29  train acc: 0.23  val loss:  6.88  valid acc: 0.00\n",
      "sin i=31500 train loss:  5.26  train acc: 0.23  val loss:  6.88  valid acc: 0.00\n",
      "sin i=32000 train loss:  5.55  train acc: 0.19  val loss:  6.89  valid acc: 0.00\n",
      "sin i=32500 train loss:  5.24  train acc: 0.24  val loss:  6.89  valid acc: 0.00\n",
      "sin i=33000 train loss:  4.65  train acc: 0.32  val loss:  6.89  valid acc: 0.00\n",
      "sin i=33500 train loss:  5.27  train acc: 0.24  val loss:  6.89  valid acc: 0.00\n",
      "sin i=34000 train loss:  5.15  train acc: 0.25  val loss:  6.89  valid acc: 0.00\n",
      "sin i=34500 train loss:  4.96  train acc: 0.28  val loss:  6.89  valid acc: 0.00\n",
      "sin i=35000 train loss:  5.21  train acc: 0.24  val loss:  6.89  valid acc: 0.00\n",
      "sin i=35500 train loss:  5.39  train acc: 0.20  val loss:  6.89  valid acc: 0.00\n",
      "sin i=36000 train loss:  5.71  train acc: 0.16  val loss:  6.89  valid acc: 0.00\n",
      "sin i=36500 train loss:  4.86  train acc: 0.30  val loss:  6.89  valid acc: 0.00\n",
      "sin i=37000 train loss:  5.49  train acc: 0.20  val loss:  6.89  valid acc: 0.00\n",
      "sin i=37500 train loss:  4.75  train acc: 0.30  val loss:  6.89  valid acc: 0.00\n",
      "sin i=38000 train loss:  5.09  train acc: 0.25  val loss:  6.89  valid acc: 0.00\n",
      "sin i=38500 train loss:  5.02  train acc: 0.27  val loss:  6.89  valid acc: 0.00\n",
      "sin i=39000 train loss:  5.61  train acc: 0.18  val loss:  6.88  valid acc: 0.00\n",
      "sin i=39500 train loss:  4.75  train acc: 0.30  val loss:  6.89  valid acc: 0.00\n",
      "sin i=40000 train loss:  4.99  train acc: 0.27  val loss:  6.88  valid acc: 0.00\n",
      "sin i=40500 train loss:  5.24  train acc: 0.25  val loss:  6.88  valid acc: 0.00\n",
      "sin i=41000 train loss:  5.11  train acc: 0.24  val loss:  6.88  valid acc: 0.00\n",
      "sin i=41500 train loss:  5.65  train acc: 0.17  val loss:  6.88  valid acc: 0.00\n",
      "sin i=42000 train loss:  4.90  train acc: 0.29  val loss:  6.89  valid acc: 0.00\n",
      "sin i=42500 train loss:  5.17  train acc: 0.25  val loss:  6.88  valid acc: 0.00\n",
      "sin i=43000 train loss:  5.42  train acc: 0.20  val loss:  6.89  valid acc: 0.00\n",
      "sin i=43500 train loss:  5.26  train acc: 0.23  val loss:  6.88  valid acc: 0.00\n",
      "sin i=44000 train loss:  5.41  train acc: 0.21  val loss:  6.88  valid acc: 0.00\n",
      "sin i=44500 train loss:  5.35  train acc: 0.22  val loss:  6.89  valid acc: 0.00\n",
      "sin i=45000 train loss:  5.34  train acc: 0.21  val loss:  6.89  valid acc: 0.00\n",
      "sin i=45500 train loss:  5.47  train acc: 0.21  val loss:  6.88  valid acc: 0.00\n",
      "sin i=46000 train loss:  5.04  train acc: 0.26  val loss:  6.88  valid acc: 0.00\n",
      "sin i=46500 train loss:  5.55  train acc: 0.19  val loss:  6.89  valid acc: 0.00\n",
      "sin i=47000 train loss:  5.22  train acc: 0.24  val loss:  6.89  valid acc: 0.00\n",
      "sin i=47500 train loss:  5.30  train acc: 0.23  val loss:  6.88  valid acc: 0.00\n",
      "sin i=48000 train loss:  5.24  train acc: 0.23  val loss:  6.89  valid acc: 0.00\n",
      "sin i=48500 train loss:  4.98  train acc: 0.27  val loss:  6.88  valid acc: 0.00\n",
      "sin i=49000 train loss:  5.26  train acc: 0.22  val loss:  6.88  valid acc: 0.00\n",
      "sin i=49500 train loss:  4.72  train acc: 0.32  val loss:  6.89  valid acc: 0.00\n",
      "sin i=50000 train loss:  5.13  train acc: 0.26  val loss:  6.88  valid acc: 0.00\n",
      "-> sin layer idx: 15 , best valid accuracy: 0.00, test accuracy: 0.00\n",
      "sin i=    0 train loss: 62.47  train acc: 0.00  val loss: 11.31  valid acc: 0.00\n",
      "sin i=  500 train loss:  5.65  train acc: 0.28  val loss:  7.06  valid acc: 0.00\n",
      "sin i= 1000 train loss:  6.55  train acc: 0.16  val loss:  7.08  valid acc: 0.00\n",
      "sin i= 1500 train loss:  5.71  train acc: 0.28  val loss:  7.07  valid acc: 0.00\n",
      "sin i= 2000 train loss:  5.86  train acc: 0.27  val loss:  7.15  valid acc: 0.00\n",
      "sin i= 2500 train loss:  6.21  train acc: 0.20  val loss:  7.13  valid acc: 0.00\n",
      "sin i= 3000 train loss:  6.45  train acc: 0.18  val loss:  7.14  valid acc: 0.00\n",
      "sin i= 3500 train loss:  5.58  train acc: 0.30  val loss:  7.08  valid acc: 0.00\n",
      "sin i= 4000 train loss:  5.91  train acc: 0.25  val loss:  7.08  valid acc: 0.00\n",
      "sin i= 4500 train loss:  5.69  train acc: 0.28  val loss:  7.10  valid acc: 0.00\n",
      "sin i= 5000 train loss:  6.02  train acc: 0.23  val loss:  7.11  valid acc: 0.00\n",
      "sin i= 5500 train loss:  5.90  train acc: 0.25  val loss:  7.05  valid acc: 0.00\n",
      "sin i= 6000 train loss:  5.95  train acc: 0.23  val loss:  7.10  valid acc: 0.00\n",
      "sin i= 6500 train loss:  5.80  train acc: 0.26  val loss:  7.07  valid acc: 0.00\n",
      "sin i= 7000 train loss:  5.66  train acc: 0.27  val loss:  7.07  valid acc: 0.00\n",
      "sin i= 7500 train loss:  6.11  train acc: 0.21  val loss:  7.09  valid acc: 0.00\n",
      "sin i= 8000 train loss:  5.97  train acc: 0.23  val loss:  7.10  valid acc: 0.00\n",
      "sin i= 8500 train loss:  6.02  train acc: 0.21  val loss:  7.12  valid acc: 0.00\n",
      "sin i= 9000 train loss:  6.25  train acc: 0.18  val loss:  7.09  valid acc: 0.00\n",
      "sin i= 9500 train loss:  6.03  train acc: 0.21  val loss:  7.06  valid acc: 0.00\n",
      "sin i=10000 train loss:  5.44  train acc: 0.30  val loss:  7.12  valid acc: 0.00\n",
      "sin i=10500 train loss:  6.30  train acc: 0.18  val loss:  7.07  valid acc: 0.00\n",
      "sin i=11000 train loss:  6.21  train acc: 0.19  val loss:  7.08  valid acc: 0.00\n",
      "sin i=11500 train loss:  5.86  train acc: 0.23  val loss:  7.04  valid acc: 0.00\n",
      "sin i=12000 train loss:  5.52  train acc: 0.28  val loss:  7.05  valid acc: 0.00\n",
      "sin i=12500 train loss:  6.02  train acc: 0.21  val loss:  7.05  valid acc: 0.00\n",
      "sin i=13000 train loss:  5.74  train acc: 0.24  val loss:  7.06  valid acc: 0.00\n",
      "sin i=13500 train loss:  5.62  train acc: 0.26  val loss:  7.09  valid acc: 0.00\n",
      "sin i=14000 train loss:  6.50  train acc: 0.13  val loss:  7.08  valid acc: 0.00\n",
      "sin i=14500 train loss:  5.87  train acc: 0.21  val loss:  7.08  valid acc: 0.00\n",
      "sin i=15000 train loss:  6.21  train acc: 0.18  val loss:  7.06  valid acc: 0.00\n",
      "sin i=15500 train loss:  5.75  train acc: 0.23  val loss:  7.04  valid acc: 0.00\n",
      "sin i=16000 train loss:  5.81  train acc: 0.23  val loss:  7.07  valid acc: 0.00\n",
      "sin i=16500 train loss:  5.32  train acc: 0.29  val loss:  7.03  valid acc: 0.00\n",
      "sin i=17000 train loss:  6.02  train acc: 0.19  val loss:  7.08  valid acc: 0.00\n",
      "sin i=17500 train loss:  5.57  train acc: 0.25  val loss:  7.06  valid acc: 0.00\n",
      "sin i=18000 train loss:  5.17  train acc: 0.32  val loss:  7.04  valid acc: 0.00\n",
      "sin i=18500 train loss:  5.92  train acc: 0.20  val loss:  7.03  valid acc: 0.00\n",
      "sin i=19000 train loss:  5.61  train acc: 0.25  val loss:  7.02  valid acc: 0.00\n",
      "sin i=19500 train loss:  5.36  train acc: 0.28  val loss:  7.04  valid acc: 0.00\n",
      "sin i=20000 train loss:  5.58  train acc: 0.25  val loss:  7.06  valid acc: 0.00\n",
      "sin i=20500 train loss:  5.62  train acc: 0.23  val loss:  7.06  valid acc: 0.00\n",
      "sin i=21000 train loss:  5.50  train acc: 0.26  val loss:  7.00  valid acc: 0.00\n",
      "sin i=21500 train loss:  6.17  train acc: 0.15  val loss:  6.87  valid acc: 0.02\n",
      "sin i=22000 train loss:  5.65  train acc: 0.22  val loss:  6.82  valid acc: 0.00\n",
      "sin i=22500 train loss:  5.77  train acc: 0.20  val loss:  6.83  valid acc: 0.00\n",
      "sin i=23000 train loss:  5.53  train acc: 0.23  val loss:  6.83  valid acc: 0.00\n",
      "sin i=23500 train loss:  5.10  train acc: 0.30  val loss:  6.82  valid acc: 0.00\n",
      "sin i=24000 train loss:  5.11  train acc: 0.30  val loss:  6.81  valid acc: 0.00\n",
      "sin i=24500 train loss:  5.53  train acc: 0.23  val loss:  6.83  valid acc: 0.00\n",
      "sin i=25000 train loss:  4.92  train acc: 0.33  val loss:  6.82  valid acc: 0.00\n",
      "sin i=25500 train loss:  5.51  train acc: 0.23  val loss:  6.81  valid acc: 0.00\n",
      "sin i=26000 train loss:  5.45  train acc: 0.23  val loss:  6.81  valid acc: 0.00\n",
      "sin i=26500 train loss:  5.67  train acc: 0.21  val loss:  6.81  valid acc: 0.00\n",
      "sin i=27000 train loss:  6.01  train acc: 0.16  val loss:  6.80  valid acc: 0.00\n",
      "sin i=27500 train loss:  5.14  train acc: 0.28  val loss:  6.80  valid acc: 0.00\n",
      "sin i=28000 train loss:  5.17  train acc: 0.28  val loss:  6.81  valid acc: 0.00\n",
      "sin i=28500 train loss:  5.44  train acc: 0.23  val loss:  6.80  valid acc: 0.00\n",
      "sin i=29000 train loss:  5.55  train acc: 0.22  val loss:  6.80  valid acc: 0.00\n",
      "sin i=29500 train loss:  5.72  train acc: 0.18  val loss:  6.80  valid acc: 0.00\n",
      "sin i=30000 train loss:  5.23  train acc: 0.26  val loss:  6.80  valid acc: 0.00\n",
      "sin i=30500 train loss:  5.06  train acc: 0.29  val loss:  6.80  valid acc: 0.00\n",
      "sin i=31000 train loss:  5.44  train acc: 0.23  val loss:  6.80  valid acc: 0.00\n",
      "sin i=31500 train loss:  5.38  train acc: 0.23  val loss:  6.80  valid acc: 0.00\n",
      "sin i=32000 train loss:  5.71  train acc: 0.19  val loss:  6.80  valid acc: 0.00\n",
      "sin i=32500 train loss:  5.48  train acc: 0.24  val loss:  6.80  valid acc: 0.00\n",
      "sin i=33000 train loss:  4.76  train acc: 0.32  val loss:  6.80  valid acc: 0.00\n",
      "sin i=33500 train loss:  5.40  train acc: 0.24  val loss:  6.80  valid acc: 0.00\n",
      "sin i=34000 train loss:  5.26  train acc: 0.25  val loss:  6.80  valid acc: 0.00\n",
      "sin i=34500 train loss:  5.06  train acc: 0.29  val loss:  6.80  valid acc: 0.00\n",
      "sin i=35000 train loss:  5.30  train acc: 0.24  val loss:  6.80  valid acc: 0.00\n",
      "sin i=35500 train loss:  5.59  train acc: 0.20  val loss:  6.80  valid acc: 0.00\n",
      "sin i=36000 train loss:  5.86  train acc: 0.16  val loss:  6.80  valid acc: 0.00\n",
      "sin i=36500 train loss:  5.00  train acc: 0.30  val loss:  6.80  valid acc: 0.00\n",
      "sin i=37000 train loss:  5.61  train acc: 0.20  val loss:  6.80  valid acc: 0.00\n",
      "sin i=37500 train loss:  4.95  train acc: 0.31  val loss:  6.80  valid acc: 0.00\n",
      "sin i=38000 train loss:  5.23  train acc: 0.25  val loss:  6.80  valid acc: 0.00\n",
      "sin i=38500 train loss:  5.12  train acc: 0.27  val loss:  6.80  valid acc: 0.00\n",
      "sin i=39000 train loss:  5.74  train acc: 0.18  val loss:  6.80  valid acc: 0.00\n",
      "sin i=39500 train loss:  4.88  train acc: 0.30  val loss:  6.80  valid acc: 0.00\n",
      "sin i=40000 train loss:  5.13  train acc: 0.27  val loss:  6.80  valid acc: 0.00\n",
      "sin i=40500 train loss:  5.31  train acc: 0.25  val loss:  6.80  valid acc: 0.00\n",
      "sin i=41000 train loss:  5.22  train acc: 0.24  val loss:  6.80  valid acc: 0.00\n",
      "sin i=41500 train loss:  5.80  train acc: 0.18  val loss:  6.80  valid acc: 0.00\n",
      "sin i=42000 train loss:  5.06  train acc: 0.29  val loss:  6.80  valid acc: 0.00\n",
      "sin i=42500 train loss:  5.39  train acc: 0.26  val loss:  6.80  valid acc: 0.00\n",
      "sin i=43000 train loss:  5.54  train acc: 0.20  val loss:  6.80  valid acc: 0.00\n",
      "sin i=43500 train loss:  5.36  train acc: 0.23  val loss:  6.80  valid acc: 0.00\n",
      "sin i=44000 train loss:  5.55  train acc: 0.21  val loss:  6.80  valid acc: 0.00\n",
      "sin i=44500 train loss:  5.50  train acc: 0.23  val loss:  6.80  valid acc: 0.00\n",
      "sin i=45000 train loss:  5.46  train acc: 0.21  val loss:  6.80  valid acc: 0.00\n",
      "sin i=45500 train loss:  5.58  train acc: 0.21  val loss:  6.80  valid acc: 0.00\n",
      "sin i=46000 train loss:  5.20  train acc: 0.26  val loss:  6.80  valid acc: 0.00\n",
      "sin i=46500 train loss:  5.68  train acc: 0.19  val loss:  6.80  valid acc: 0.00\n",
      "sin i=47000 train loss:  5.29  train acc: 0.24  val loss:  6.80  valid acc: 0.00\n",
      "sin i=47500 train loss:  5.41  train acc: 0.23  val loss:  6.80  valid acc: 0.00\n",
      "sin i=48000 train loss:  5.37  train acc: 0.23  val loss:  6.80  valid acc: 0.00\n",
      "sin i=48500 train loss:  5.09  train acc: 0.27  val loss:  6.80  valid acc: 0.00\n",
      "sin i=49000 train loss:  5.37  train acc: 0.23  val loss:  6.80  valid acc: 0.00\n",
      "sin i=49500 train loss:  4.79  train acc: 0.32  val loss:  6.80  valid acc: 0.00\n",
      "sin i=50000 train loss:  5.21  train acc: 0.26  val loss:  6.80  valid acc: 0.00\n",
      "-> sin layer idx: 14 , best valid accuracy: 0.02, test accuracy: 0.00\n",
      "sin i=    0 train loss: 62.45  train acc: 0.00  val loss: 11.78  valid acc: 0.00\n",
      "sin i=  500 train loss:  5.72  train acc: 0.28  val loss:  7.04  valid acc: 0.00\n",
      "sin i= 1000 train loss:  6.59  train acc: 0.16  val loss:  7.05  valid acc: 0.00\n",
      "sin i= 1500 train loss:  5.74  train acc: 0.28  val loss:  7.06  valid acc: 0.00\n",
      "sin i= 2000 train loss:  5.88  train acc: 0.27  val loss:  7.14  valid acc: 0.00\n",
      "sin i= 2500 train loss:  6.25  train acc: 0.20  val loss:  7.13  valid acc: 0.00\n",
      "sin i= 3000 train loss:  6.47  train acc: 0.18  val loss:  7.11  valid acc: 0.00\n",
      "sin i= 3500 train loss:  5.63  train acc: 0.30  val loss:  7.09  valid acc: 0.00\n",
      "sin i= 4000 train loss:  5.94  train acc: 0.25  val loss:  7.09  valid acc: 0.00\n",
      "sin i= 4500 train loss:  5.73  train acc: 0.28  val loss:  7.09  valid acc: 0.00\n",
      "sin i= 5000 train loss:  6.05  train acc: 0.23  val loss:  7.10  valid acc: 0.00\n",
      "sin i= 5500 train loss:  5.92  train acc: 0.25  val loss:  7.07  valid acc: 0.00\n",
      "sin i= 6000 train loss:  5.97  train acc: 0.23  val loss:  7.10  valid acc: 0.00\n",
      "sin i= 6500 train loss:  5.82  train acc: 0.26  val loss:  7.09  valid acc: 0.00\n",
      "sin i= 7000 train loss:  5.69  train acc: 0.27  val loss:  7.09  valid acc: 0.00\n",
      "sin i= 7500 train loss:  6.15  train acc: 0.21  val loss:  7.12  valid acc: 0.00\n",
      "sin i= 8000 train loss:  5.99  train acc: 0.23  val loss:  7.12  valid acc: 0.00\n",
      "sin i= 8500 train loss:  6.01  train acc: 0.21  val loss:  7.17  valid acc: 0.00\n",
      "sin i= 9000 train loss:  6.28  train acc: 0.18  val loss:  7.11  valid acc: 0.00\n",
      "sin i= 9500 train loss:  6.05  train acc: 0.21  val loss:  7.09  valid acc: 0.00\n",
      "sin i=10000 train loss:  5.45  train acc: 0.30  val loss:  7.16  valid acc: 0.00\n",
      "sin i=10500 train loss:  6.31  train acc: 0.18  val loss:  7.09  valid acc: 0.00\n",
      "sin i=11000 train loss:  6.21  train acc: 0.19  val loss:  7.11  valid acc: 0.00\n",
      "sin i=11500 train loss:  5.89  train acc: 0.23  val loss:  7.07  valid acc: 0.00\n",
      "sin i=12000 train loss:  5.53  train acc: 0.28  val loss:  7.08  valid acc: 0.00\n",
      "sin i=12500 train loss:  6.01  train acc: 0.21  val loss:  7.11  valid acc: 0.00\n",
      "sin i=13000 train loss:  5.77  train acc: 0.24  val loss:  7.09  valid acc: 0.00\n",
      "sin i=13500 train loss:  5.65  train acc: 0.26  val loss:  7.13  valid acc: 0.00\n",
      "sin i=14000 train loss:  6.51  train acc: 0.13  val loss:  7.14  valid acc: 0.00\n",
      "sin i=14500 train loss:  5.91  train acc: 0.21  val loss:  7.13  valid acc: 0.00\n",
      "sin i=15000 train loss:  6.24  train acc: 0.18  val loss:  7.09  valid acc: 0.00\n",
      "sin i=15500 train loss:  5.78  train acc: 0.23  val loss:  7.09  valid acc: 0.00\n",
      "sin i=16000 train loss:  5.81  train acc: 0.23  val loss:  7.11  valid acc: 0.00\n",
      "sin i=16500 train loss:  5.34  train acc: 0.29  val loss:  7.07  valid acc: 0.00\n",
      "sin i=17000 train loss:  6.05  train acc: 0.19  val loss:  7.13  valid acc: 0.00\n",
      "sin i=17500 train loss:  5.61  train acc: 0.25  val loss:  7.12  valid acc: 0.00\n",
      "sin i=18000 train loss:  5.18  train acc: 0.32  val loss:  7.10  valid acc: 0.00\n",
      "sin i=18500 train loss:  5.92  train acc: 0.20  val loss:  7.12  valid acc: 0.00\n",
      "sin i=19000 train loss:  5.65  train acc: 0.25  val loss:  7.12  valid acc: 0.00\n",
      "sin i=19500 train loss:  5.41  train acc: 0.28  val loss:  7.12  valid acc: 0.00\n",
      "sin i=20000 train loss:  5.60  train acc: 0.25  val loss:  7.14  valid acc: 0.00\n",
      "sin i=20500 train loss:  5.67  train acc: 0.23  val loss:  7.15  valid acc: 0.00\n",
      "sin i=21000 train loss:  5.52  train acc: 0.26  val loss:  7.09  valid acc: 0.00\n",
      "sin i=21500 train loss:  6.21  train acc: 0.15  val loss:  7.09  valid acc: 0.00\n",
      "sin i=22000 train loss:  5.78  train acc: 0.22  val loss:  7.11  valid acc: 0.00\n",
      "sin i=22500 train loss:  5.82  train acc: 0.20  val loss:  7.14  valid acc: 0.00\n",
      "sin i=23000 train loss:  5.63  train acc: 0.23  val loss:  7.14  valid acc: 0.00\n",
      "sin i=23500 train loss:  5.13  train acc: 0.30  val loss:  7.13  valid acc: 0.00\n",
      "sin i=24000 train loss:  5.17  train acc: 0.30  val loss:  7.14  valid acc: 0.00\n",
      "sin i=24500 train loss:  5.61  train acc: 0.23  val loss:  7.13  valid acc: 0.00\n",
      "sin i=25000 train loss:  4.96  train acc: 0.33  val loss:  7.13  valid acc: 0.00\n",
      "sin i=25500 train loss:  5.60  train acc: 0.23  val loss:  7.12  valid acc: 0.00\n",
      "sin i=26000 train loss:  5.57  train acc: 0.23  val loss:  7.12  valid acc: 0.00\n",
      "sin i=26500 train loss:  5.75  train acc: 0.20  val loss:  7.13  valid acc: 0.00\n",
      "sin i=27000 train loss:  6.13  train acc: 0.15  val loss:  7.14  valid acc: 0.00\n",
      "sin i=27500 train loss:  5.22  train acc: 0.28  val loss:  7.13  valid acc: 0.00\n",
      "sin i=28000 train loss:  5.24  train acc: 0.28  val loss:  7.15  valid acc: 0.00\n",
      "sin i=28500 train loss:  5.50  train acc: 0.23  val loss:  7.13  valid acc: 0.00\n",
      "sin i=29000 train loss:  5.68  train acc: 0.21  val loss:  7.13  valid acc: 0.00\n",
      "sin i=29500 train loss:  5.87  train acc: 0.18  val loss:  7.14  valid acc: 0.00\n",
      "sin i=30000 train loss:  5.33  train acc: 0.26  val loss:  7.14  valid acc: 0.00\n",
      "sin i=30500 train loss:  5.18  train acc: 0.28  val loss:  7.14  valid acc: 0.00\n",
      "sin i=31000 train loss:  5.49  train acc: 0.23  val loss:  7.14  valid acc: 0.00\n",
      "sin i=31500 train loss:  5.54  train acc: 0.23  val loss:  7.14  valid acc: 0.00\n",
      "sin i=32000 train loss:  5.80  train acc: 0.19  val loss:  7.14  valid acc: 0.00\n",
      "sin i=32500 train loss:  5.39  train acc: 0.24  val loss:  7.14  valid acc: 0.00\n",
      "sin i=33000 train loss:  4.90  train acc: 0.32  val loss:  7.14  valid acc: 0.00\n",
      "sin i=33500 train loss:  5.46  train acc: 0.24  val loss:  7.14  valid acc: 0.00\n",
      "sin i=34000 train loss:  5.36  train acc: 0.25  val loss:  7.14  valid acc: 0.00\n",
      "sin i=34500 train loss:  5.13  train acc: 0.28  val loss:  7.14  valid acc: 0.00\n",
      "sin i=35000 train loss:  5.41  train acc: 0.24  val loss:  7.14  valid acc: 0.00\n",
      "sin i=35500 train loss:  5.69  train acc: 0.20  val loss:  7.14  valid acc: 0.00\n",
      "sin i=36000 train loss:  5.93  train acc: 0.16  val loss:  7.14  valid acc: 0.00\n",
      "sin i=36500 train loss:  5.07  train acc: 0.30  val loss:  7.14  valid acc: 0.00\n",
      "sin i=37000 train loss:  5.76  train acc: 0.20  val loss:  7.14  valid acc: 0.00\n",
      "sin i=37500 train loss:  4.97  train acc: 0.30  val loss:  7.15  valid acc: 0.00\n",
      "sin i=38000 train loss:  5.34  train acc: 0.25  val loss:  7.14  valid acc: 0.00\n",
      "sin i=38500 train loss:  5.20  train acc: 0.27  val loss:  7.14  valid acc: 0.00\n",
      "sin i=39000 train loss:  5.85  train acc: 0.18  val loss:  7.14  valid acc: 0.00\n",
      "sin i=39500 train loss:  4.95  train acc: 0.30  val loss:  7.14  valid acc: 0.00\n",
      "sin i=40000 train loss:  5.17  train acc: 0.27  val loss:  7.14  valid acc: 0.00\n",
      "sin i=40500 train loss:  5.40  train acc: 0.25  val loss:  7.14  valid acc: 0.00\n",
      "sin i=41000 train loss:  5.42  train acc: 0.24  val loss:  7.14  valid acc: 0.00\n",
      "sin i=41500 train loss:  5.92  train acc: 0.17  val loss:  7.14  valid acc: 0.00\n",
      "sin i=42000 train loss:  5.09  train acc: 0.29  val loss:  7.14  valid acc: 0.00\n",
      "sin i=42500 train loss:  5.34  train acc: 0.25  val loss:  7.14  valid acc: 0.00\n",
      "sin i=43000 train loss:  5.73  train acc: 0.20  val loss:  7.14  valid acc: 0.00\n",
      "sin i=43500 train loss:  5.51  train acc: 0.23  val loss:  7.14  valid acc: 0.00\n",
      "sin i=44000 train loss:  5.66  train acc: 0.21  val loss:  7.14  valid acc: 0.00\n",
      "sin i=44500 train loss:  5.55  train acc: 0.22  val loss:  7.14  valid acc: 0.00\n",
      "sin i=45000 train loss:  5.66  train acc: 0.21  val loss:  7.14  valid acc: 0.00\n",
      "sin i=45500 train loss:  5.63  train acc: 0.21  val loss:  7.14  valid acc: 0.00\n",
      "sin i=46000 train loss:  5.30  train acc: 0.26  val loss:  7.14  valid acc: 0.00\n",
      "sin i=46500 train loss:  5.80  train acc: 0.19  val loss:  7.14  valid acc: 0.00\n",
      "sin i=47000 train loss:  5.42  train acc: 0.24  val loss:  7.14  valid acc: 0.00\n",
      "sin i=47500 train loss:  5.52  train acc: 0.23  val loss:  7.14  valid acc: 0.00\n",
      "sin i=48000 train loss:  5.45  train acc: 0.23  val loss:  7.14  valid acc: 0.00\n",
      "sin i=48500 train loss:  5.19  train acc: 0.27  val loss:  7.14  valid acc: 0.00\n",
      "sin i=49000 train loss:  5.53  train acc: 0.22  val loss:  7.14  valid acc: 0.00\n",
      "sin i=49500 train loss:  4.85  train acc: 0.32  val loss:  7.14  valid acc: 0.00\n",
      "sin i=50000 train loss:  5.35  train acc: 0.26  val loss:  7.14  valid acc: 0.00\n",
      "-> sin layer idx: 13 , best valid accuracy: 0.00, test accuracy: 0.00\n",
      "sin i=    0 train loss: 62.39  train acc: 0.00  val loss: 12.29  valid acc: 0.00\n",
      "sin i=  500 train loss:  5.74  train acc: 0.28  val loss:  7.10  valid acc: 0.00\n",
      "sin i= 1000 train loss:  6.63  train acc: 0.16  val loss:  7.10  valid acc: 0.00\n",
      "sin i= 1500 train loss:  5.77  train acc: 0.28  val loss:  7.09  valid acc: 0.00\n",
      "sin i= 2000 train loss:  5.89  train acc: 0.27  val loss:  7.15  valid acc: 0.00\n",
      "sin i= 2500 train loss:  6.27  train acc: 0.20  val loss:  7.15  valid acc: 0.00\n",
      "sin i= 3000 train loss:  6.48  train acc: 0.18  val loss:  7.12  valid acc: 0.00\n",
      "sin i= 3500 train loss:  5.64  train acc: 0.30  val loss:  7.10  valid acc: 0.00\n",
      "sin i= 4000 train loss:  5.96  train acc: 0.25  val loss:  7.11  valid acc: 0.00\n",
      "sin i= 4500 train loss:  5.75  train acc: 0.28  val loss:  7.09  valid acc: 0.00\n",
      "sin i= 5000 train loss:  6.07  train acc: 0.23  val loss:  7.11  valid acc: 0.00\n",
      "sin i= 5500 train loss:  5.93  train acc: 0.25  val loss:  7.07  valid acc: 0.00\n",
      "sin i= 6000 train loss:  5.99  train acc: 0.23  val loss:  7.11  valid acc: 0.00\n",
      "sin i= 6500 train loss:  5.83  train acc: 0.26  val loss:  7.10  valid acc: 0.00\n",
      "sin i= 7000 train loss:  5.72  train acc: 0.27  val loss:  7.10  valid acc: 0.00\n",
      "sin i= 7500 train loss:  6.17  train acc: 0.21  val loss:  7.13  valid acc: 0.00\n",
      "sin i= 8000 train loss:  6.00  train acc: 0.23  val loss:  7.11  valid acc: 0.00\n",
      "sin i= 8500 train loss:  6.04  train acc: 0.21  val loss:  7.17  valid acc: 0.00\n",
      "sin i= 9000 train loss:  6.30  train acc: 0.18  val loss:  7.10  valid acc: 0.00\n",
      "sin i= 9500 train loss:  6.06  train acc: 0.21  val loss:  7.07  valid acc: 0.00\n",
      "sin i=10000 train loss:  5.47  train acc: 0.30  val loss:  7.16  valid acc: 0.00\n",
      "sin i=10500 train loss:  6.32  train acc: 0.18  val loss:  7.08  valid acc: 0.00\n",
      "sin i=11000 train loss:  6.22  train acc: 0.19  val loss:  7.10  valid acc: 0.00\n",
      "sin i=11500 train loss:  5.90  train acc: 0.23  val loss:  7.05  valid acc: 0.00\n",
      "sin i=12000 train loss:  5.55  train acc: 0.28  val loss:  7.07  valid acc: 0.00\n",
      "sin i=12500 train loss:  6.03  train acc: 0.21  val loss:  7.10  valid acc: 0.00\n",
      "sin i=13000 train loss:  5.78  train acc: 0.24  val loss:  7.07  valid acc: 0.00\n",
      "sin i=13500 train loss:  5.68  train acc: 0.26  val loss:  7.12  valid acc: 0.00\n",
      "sin i=14000 train loss:  6.53  train acc: 0.13  val loss:  7.13  valid acc: 0.00\n",
      "sin i=14500 train loss:  5.93  train acc: 0.21  val loss:  7.12  valid acc: 0.00\n",
      "sin i=15000 train loss:  6.28  train acc: 0.18  val loss:  7.08  valid acc: 0.00\n",
      "sin i=15500 train loss:  5.80  train acc: 0.23  val loss:  7.09  valid acc: 0.00\n",
      "sin i=16000 train loss:  5.84  train acc: 0.23  val loss:  7.10  valid acc: 0.00\n",
      "sin i=16500 train loss:  5.37  train acc: 0.29  val loss:  7.07  valid acc: 0.00\n",
      "sin i=17000 train loss:  6.06  train acc: 0.19  val loss:  7.11  valid acc: 0.00\n",
      "sin i=17500 train loss:  5.63  train acc: 0.25  val loss:  7.11  valid acc: 0.00\n",
      "sin i=18000 train loss:  5.21  train acc: 0.32  val loss:  7.08  valid acc: 0.00\n",
      "sin i=18500 train loss:  5.95  train acc: 0.20  val loss:  7.11  valid acc: 0.00\n",
      "sin i=19000 train loss:  5.66  train acc: 0.25  val loss:  7.11  valid acc: 0.00\n",
      "sin i=19500 train loss:  5.42  train acc: 0.28  val loss:  7.10  valid acc: 0.00\n",
      "sin i=20000 train loss:  5.62  train acc: 0.25  val loss:  7.13  valid acc: 0.00\n",
      "sin i=20500 train loss:  5.69  train acc: 0.23  val loss:  7.14  valid acc: 0.00\n",
      "sin i=21000 train loss:  5.53  train acc: 0.26  val loss:  7.08  valid acc: 0.00\n",
      "sin i=21500 train loss:  6.22  train acc: 0.15  val loss:  7.10  valid acc: 0.00\n",
      "sin i=22000 train loss:  5.79  train acc: 0.22  val loss:  7.10  valid acc: 0.00\n",
      "sin i=22500 train loss:  5.85  train acc: 0.20  val loss:  7.13  valid acc: 0.00\n",
      "sin i=23000 train loss:  5.65  train acc: 0.23  val loss:  7.14  valid acc: 0.00\n",
      "sin i=23500 train loss:  5.14  train acc: 0.30  val loss:  7.13  valid acc: 0.00\n",
      "sin i=24000 train loss:  5.19  train acc: 0.30  val loss:  7.13  valid acc: 0.00\n",
      "sin i=24500 train loss:  5.62  train acc: 0.23  val loss:  7.12  valid acc: 0.00\n",
      "sin i=25000 train loss:  4.98  train acc: 0.33  val loss:  7.12  valid acc: 0.00\n",
      "sin i=25500 train loss:  5.62  train acc: 0.23  val loss:  7.11  valid acc: 0.00\n",
      "sin i=26000 train loss:  5.59  train acc: 0.23  val loss:  7.12  valid acc: 0.00\n",
      "sin i=26500 train loss:  5.77  train acc: 0.20  val loss:  7.13  valid acc: 0.00\n",
      "sin i=27000 train loss:  6.13  train acc: 0.15  val loss:  7.13  valid acc: 0.00\n",
      "sin i=27500 train loss:  5.25  train acc: 0.28  val loss:  7.12  valid acc: 0.00\n",
      "sin i=28000 train loss:  5.24  train acc: 0.28  val loss:  7.13  valid acc: 0.00\n",
      "sin i=28500 train loss:  5.52  train acc: 0.23  val loss:  7.12  valid acc: 0.00\n",
      "sin i=29000 train loss:  5.69  train acc: 0.21  val loss:  7.12  valid acc: 0.00\n",
      "sin i=29500 train loss:  5.88  train acc: 0.18  val loss:  7.13  valid acc: 0.00\n",
      "sin i=30000 train loss:  5.35  train acc: 0.26  val loss:  7.13  valid acc: 0.00\n",
      "sin i=30500 train loss:  5.19  train acc: 0.28  val loss:  7.13  valid acc: 0.00\n",
      "sin i=31000 train loss:  5.51  train acc: 0.23  val loss:  7.13  valid acc: 0.00\n",
      "sin i=31500 train loss:  5.55  train acc: 0.23  val loss:  7.13  valid acc: 0.00\n",
      "sin i=32000 train loss:  5.81  train acc: 0.19  val loss:  7.13  valid acc: 0.00\n",
      "sin i=32500 train loss:  5.41  train acc: 0.24  val loss:  7.13  valid acc: 0.00\n",
      "sin i=33000 train loss:  4.91  train acc: 0.32  val loss:  7.13  valid acc: 0.00\n",
      "sin i=33500 train loss:  5.45  train acc: 0.24  val loss:  7.13  valid acc: 0.00\n",
      "sin i=34000 train loss:  5.36  train acc: 0.25  val loss:  7.13  valid acc: 0.00\n",
      "sin i=34500 train loss:  5.15  train acc: 0.28  val loss:  7.13  valid acc: 0.00\n",
      "sin i=35000 train loss:  5.43  train acc: 0.24  val loss:  7.13  valid acc: 0.00\n",
      "sin i=35500 train loss:  5.71  train acc: 0.20  val loss:  7.13  valid acc: 0.00\n",
      "sin i=36000 train loss:  5.94  train acc: 0.16  val loss:  7.13  valid acc: 0.00\n",
      "sin i=36500 train loss:  5.08  train acc: 0.30  val loss:  7.13  valid acc: 0.00\n",
      "sin i=37000 train loss:  5.78  train acc: 0.20  val loss:  7.13  valid acc: 0.00\n",
      "sin i=37500 train loss:  5.00  train acc: 0.30  val loss:  7.13  valid acc: 0.00\n",
      "sin i=38000 train loss:  5.36  train acc: 0.25  val loss:  7.13  valid acc: 0.00\n",
      "sin i=38500 train loss:  5.21  train acc: 0.27  val loss:  7.13  valid acc: 0.00\n",
      "sin i=39000 train loss:  5.87  train acc: 0.18  val loss:  7.13  valid acc: 0.00\n",
      "sin i=39500 train loss:  4.96  train acc: 0.30  val loss:  7.13  valid acc: 0.00\n",
      "sin i=40000 train loss:  5.19  train acc: 0.27  val loss:  7.13  valid acc: 0.00\n",
      "sin i=40500 train loss:  5.43  train acc: 0.25  val loss:  7.13  valid acc: 0.00\n",
      "sin i=41000 train loss:  5.43  train acc: 0.24  val loss:  7.13  valid acc: 0.00\n",
      "sin i=41500 train loss:  5.92  train acc: 0.17  val loss:  7.13  valid acc: 0.00\n",
      "sin i=42000 train loss:  5.09  train acc: 0.29  val loss:  7.13  valid acc: 0.00\n",
      "sin i=42500 train loss:  5.37  train acc: 0.25  val loss:  7.13  valid acc: 0.00\n",
      "sin i=43000 train loss:  5.75  train acc: 0.20  val loss:  7.13  valid acc: 0.00\n",
      "sin i=43500 train loss:  5.55  train acc: 0.23  val loss:  7.13  valid acc: 0.00\n",
      "sin i=44000 train loss:  5.67  train acc: 0.21  val loss:  7.13  valid acc: 0.00\n",
      "sin i=44500 train loss:  5.58  train acc: 0.22  val loss:  7.13  valid acc: 0.00\n",
      "sin i=45000 train loss:  5.68  train acc: 0.21  val loss:  7.13  valid acc: 0.00\n",
      "sin i=45500 train loss:  5.65  train acc: 0.21  val loss:  7.13  valid acc: 0.00\n",
      "sin i=46000 train loss:  5.31  train acc: 0.26  val loss:  7.13  valid acc: 0.00\n",
      "sin i=46500 train loss:  5.82  train acc: 0.19  val loss:  7.13  valid acc: 0.00\n",
      "sin i=47000 train loss:  5.44  train acc: 0.24  val loss:  7.13  valid acc: 0.00\n",
      "sin i=47500 train loss:  5.53  train acc: 0.23  val loss:  7.13  valid acc: 0.00\n",
      "sin i=48000 train loss:  5.46  train acc: 0.23  val loss:  7.13  valid acc: 0.00\n",
      "sin i=48500 train loss:  5.20  train acc: 0.27  val loss:  7.13  valid acc: 0.00\n",
      "sin i=49000 train loss:  5.55  train acc: 0.22  val loss:  7.13  valid acc: 0.00\n",
      "sin i=49500 train loss:  4.88  train acc: 0.32  val loss:  7.13  valid acc: 0.00\n",
      "sin i=50000 train loss:  5.36  train acc: 0.26  val loss:  7.13  valid acc: 0.00\n",
      "-> sin layer idx: 12 , best valid accuracy: 0.00, test accuracy: 0.00\n",
      "sin i=    0 train loss: 62.37  train acc: 0.00  val loss: 10.48  valid acc: 0.00\n",
      "sin i=  500 train loss:  5.75  train acc: 0.28  val loss:  7.08  valid acc: 0.00\n",
      "sin i= 1000 train loss:  6.64  train acc: 0.16  val loss:  7.08  valid acc: 0.00\n",
      "sin i= 1500 train loss:  5.81  train acc: 0.28  val loss:  7.10  valid acc: 0.00\n",
      "sin i= 2000 train loss:  5.91  train acc: 0.27  val loss:  7.14  valid acc: 0.00\n",
      "sin i= 2500 train loss:  6.30  train acc: 0.20  val loss:  7.12  valid acc: 0.00\n",
      "sin i= 3000 train loss:  6.49  train acc: 0.18  val loss:  7.11  valid acc: 0.00\n",
      "sin i= 3500 train loss:  5.68  train acc: 0.30  val loss:  7.08  valid acc: 0.00\n",
      "sin i= 4000 train loss:  5.99  train acc: 0.25  val loss:  7.12  valid acc: 0.00\n",
      "sin i= 4500 train loss:  5.78  train acc: 0.28  val loss:  7.13  valid acc: 0.00\n",
      "sin i= 5000 train loss:  6.09  train acc: 0.23  val loss:  7.10  valid acc: 0.00\n",
      "sin i= 5500 train loss:  5.96  train acc: 0.25  val loss:  7.08  valid acc: 0.00\n",
      "sin i= 6000 train loss:  6.04  train acc: 0.23  val loss:  7.13  valid acc: 0.00\n",
      "sin i= 6500 train loss:  5.86  train acc: 0.26  val loss:  7.11  valid acc: 0.00\n",
      "sin i= 7000 train loss:  5.73  train acc: 0.27  val loss:  7.11  valid acc: 0.00\n",
      "sin i= 7500 train loss:  6.19  train acc: 0.21  val loss:  7.12  valid acc: 0.00\n",
      "sin i= 8000 train loss:  6.03  train acc: 0.23  val loss:  7.10  valid acc: 0.00\n",
      "sin i= 8500 train loss:  6.06  train acc: 0.21  val loss:  7.18  valid acc: 0.00\n",
      "sin i= 9000 train loss:  6.32  train acc: 0.18  val loss:  7.10  valid acc: 0.00\n",
      "sin i= 9500 train loss:  6.10  train acc: 0.21  val loss:  7.13  valid acc: 0.00\n",
      "sin i=10000 train loss:  5.50  train acc: 0.30  val loss:  7.16  valid acc: 0.00\n",
      "sin i=10500 train loss:  6.36  train acc: 0.18  val loss:  7.08  valid acc: 0.00\n",
      "sin i=11000 train loss:  6.27  train acc: 0.19  val loss:  7.10  valid acc: 0.00\n",
      "sin i=11500 train loss:  5.93  train acc: 0.23  val loss:  7.08  valid acc: 0.00\n",
      "sin i=12000 train loss:  5.57  train acc: 0.28  val loss:  7.08  valid acc: 0.00\n",
      "sin i=12500 train loss:  6.06  train acc: 0.21  val loss:  7.11  valid acc: 0.00\n",
      "sin i=13000 train loss:  5.81  train acc: 0.24  val loss:  7.10  valid acc: 0.00\n",
      "sin i=13500 train loss:  5.69  train acc: 0.26  val loss:  7.13  valid acc: 0.00\n",
      "sin i=14000 train loss:  6.57  train acc: 0.13  val loss:  7.14  valid acc: 0.00\n",
      "sin i=14500 train loss:  5.97  train acc: 0.21  val loss:  7.14  valid acc: 0.00\n",
      "sin i=15000 train loss:  6.28  train acc: 0.18  val loss:  7.09  valid acc: 0.00\n",
      "sin i=15500 train loss:  5.83  train acc: 0.23  val loss:  7.09  valid acc: 0.00\n",
      "sin i=16000 train loss:  5.86  train acc: 0.23  val loss:  7.12  valid acc: 0.00\n",
      "sin i=16500 train loss:  5.38  train acc: 0.29  val loss:  7.08  valid acc: 0.00\n",
      "sin i=17000 train loss:  6.10  train acc: 0.19  val loss:  7.11  valid acc: 0.00\n",
      "sin i=17500 train loss:  5.65  train acc: 0.25  val loss:  7.14  valid acc: 0.00\n",
      "sin i=18000 train loss:  5.23  train acc: 0.32  val loss:  7.09  valid acc: 0.00\n",
      "sin i=18500 train loss:  5.97  train acc: 0.20  val loss:  7.12  valid acc: 0.00\n",
      "sin i=19000 train loss:  5.70  train acc: 0.25  val loss:  7.12  valid acc: 0.00\n",
      "sin i=19500 train loss:  5.44  train acc: 0.28  val loss:  7.11  valid acc: 0.00\n",
      "sin i=20000 train loss:  5.65  train acc: 0.25  val loss:  7.14  valid acc: 0.00\n",
      "sin i=20500 train loss:  5.71  train acc: 0.23  val loss:  7.15  valid acc: 0.00\n",
      "sin i=21000 train loss:  5.56  train acc: 0.26  val loss:  7.09  valid acc: 0.00\n",
      "sin i=21500 train loss:  6.25  train acc: 0.15  val loss:  7.11  valid acc: 0.00\n",
      "sin i=22000 train loss:  5.83  train acc: 0.22  val loss:  7.11  valid acc: 0.00\n",
      "sin i=22500 train loss:  5.88  train acc: 0.20  val loss:  7.14  valid acc: 0.00\n",
      "sin i=23000 train loss:  5.67  train acc: 0.23  val loss:  7.15  valid acc: 0.00\n",
      "sin i=23500 train loss:  5.18  train acc: 0.30  val loss:  7.13  valid acc: 0.00\n",
      "sin i=24000 train loss:  5.21  train acc: 0.30  val loss:  7.14  valid acc: 0.00\n",
      "sin i=24500 train loss:  5.65  train acc: 0.23  val loss:  7.13  valid acc: 0.00\n",
      "sin i=25000 train loss:  5.01  train acc: 0.33  val loss:  7.14  valid acc: 0.00\n",
      "sin i=25500 train loss:  5.66  train acc: 0.23  val loss:  7.13  valid acc: 0.00\n",
      "sin i=26000 train loss:  5.61  train acc: 0.23  val loss:  7.13  valid acc: 0.00\n",
      "sin i=26500 train loss:  5.81  train acc: 0.20  val loss:  7.13  valid acc: 0.00\n",
      "sin i=27000 train loss:  6.16  train acc: 0.15  val loss:  7.14  valid acc: 0.00\n",
      "sin i=27500 train loss:  5.27  train acc: 0.28  val loss:  7.13  valid acc: 0.00\n",
      "sin i=28000 train loss:  5.26  train acc: 0.28  val loss:  7.14  valid acc: 0.00\n",
      "sin i=28500 train loss:  5.56  train acc: 0.23  val loss:  7.13  valid acc: 0.00\n",
      "sin i=29000 train loss:  5.73  train acc: 0.21  val loss:  7.14  valid acc: 0.00\n",
      "sin i=29500 train loss:  5.91  train acc: 0.18  val loss:  7.14  valid acc: 0.00\n",
      "sin i=30000 train loss:  5.38  train acc: 0.26  val loss:  7.14  valid acc: 0.00\n",
      "sin i=30500 train loss:  5.23  train acc: 0.28  val loss:  7.14  valid acc: 0.00\n",
      "sin i=31000 train loss:  5.55  train acc: 0.23  val loss:  7.14  valid acc: 0.00\n",
      "sin i=31500 train loss:  5.57  train acc: 0.23  val loss:  7.14  valid acc: 0.00\n",
      "sin i=32000 train loss:  5.85  train acc: 0.19  val loss:  7.14  valid acc: 0.00\n",
      "sin i=32500 train loss:  5.44  train acc: 0.24  val loss:  7.14  valid acc: 0.00\n",
      "sin i=33000 train loss:  4.94  train acc: 0.32  val loss:  7.14  valid acc: 0.00\n",
      "sin i=33500 train loss:  5.51  train acc: 0.24  val loss:  7.14  valid acc: 0.00\n",
      "sin i=34000 train loss:  5.40  train acc: 0.25  val loss:  7.14  valid acc: 0.00\n",
      "sin i=34500 train loss:  5.17  train acc: 0.28  val loss:  7.14  valid acc: 0.00\n",
      "sin i=35000 train loss:  5.46  train acc: 0.24  val loss:  7.14  valid acc: 0.00\n",
      "sin i=35500 train loss:  5.74  train acc: 0.20  val loss:  7.14  valid acc: 0.00\n",
      "sin i=36000 train loss:  5.98  train acc: 0.16  val loss:  7.14  valid acc: 0.00\n",
      "sin i=36500 train loss:  5.11  train acc: 0.30  val loss:  7.14  valid acc: 0.00\n",
      "sin i=37000 train loss:  5.81  train acc: 0.20  val loss:  7.14  valid acc: 0.00\n",
      "sin i=37500 train loss:  5.02  train acc: 0.30  val loss:  7.14  valid acc: 0.00\n",
      "sin i=38000 train loss:  5.38  train acc: 0.25  val loss:  7.14  valid acc: 0.00\n",
      "sin i=38500 train loss:  5.24  train acc: 0.27  val loss:  7.14  valid acc: 0.00\n",
      "sin i=39000 train loss:  5.89  train acc: 0.18  val loss:  7.14  valid acc: 0.00\n",
      "sin i=39500 train loss:  5.00  train acc: 0.30  val loss:  7.14  valid acc: 0.00\n",
      "sin i=40000 train loss:  5.23  train acc: 0.27  val loss:  7.14  valid acc: 0.00\n",
      "sin i=40500 train loss:  5.46  train acc: 0.25  val loss:  7.14  valid acc: 0.00\n",
      "sin i=41000 train loss:  5.47  train acc: 0.24  val loss:  7.14  valid acc: 0.00\n",
      "sin i=41500 train loss:  5.96  train acc: 0.17  val loss:  7.14  valid acc: 0.00\n",
      "sin i=42000 train loss:  5.14  train acc: 0.29  val loss:  7.14  valid acc: 0.00\n",
      "sin i=42500 train loss:  5.40  train acc: 0.25  val loss:  7.14  valid acc: 0.00\n",
      "sin i=43000 train loss:  5.78  train acc: 0.20  val loss:  7.14  valid acc: 0.00\n",
      "sin i=43500 train loss:  5.58  train acc: 0.23  val loss:  7.14  valid acc: 0.00\n",
      "sin i=44000 train loss:  5.72  train acc: 0.21  val loss:  7.14  valid acc: 0.00\n",
      "sin i=44500 train loss:  5.60  train acc: 0.22  val loss:  7.14  valid acc: 0.00\n",
      "sin i=45000 train loss:  5.71  train acc: 0.21  val loss:  7.14  valid acc: 0.00\n",
      "sin i=45500 train loss:  5.69  train acc: 0.21  val loss:  7.13  valid acc: 0.00\n",
      "sin i=46000 train loss:  5.36  train acc: 0.26  val loss:  7.14  valid acc: 0.00\n",
      "sin i=46500 train loss:  5.85  train acc: 0.19  val loss:  7.14  valid acc: 0.00\n",
      "sin i=47000 train loss:  5.47  train acc: 0.24  val loss:  7.14  valid acc: 0.00\n",
      "sin i=47500 train loss:  5.57  train acc: 0.23  val loss:  7.14  valid acc: 0.00\n",
      "sin i=48000 train loss:  5.50  train acc: 0.23  val loss:  7.14  valid acc: 0.00\n",
      "sin i=48500 train loss:  5.24  train acc: 0.27  val loss:  7.14  valid acc: 0.00\n",
      "sin i=49000 train loss:  5.57  train acc: 0.22  val loss:  7.14  valid acc: 0.00\n",
      "sin i=49500 train loss:  4.90  train acc: 0.32  val loss:  7.14  valid acc: 0.00\n",
      "sin i=50000 train loss:  5.40  train acc: 0.26  val loss:  7.14  valid acc: 0.00\n",
      "-> sin layer idx: 11 , best valid accuracy: 0.00, test accuracy: 0.00\n",
      "sin i=    0 train loss: 62.41  train acc: 0.00  val loss:  9.38  valid acc: 0.00\n",
      "sin i=  500 train loss:  5.78  train acc: 0.28  val loss:  7.07  valid acc: 0.00\n",
      "sin i= 1000 train loss:  6.65  train acc: 0.16  val loss:  7.08  valid acc: 0.00\n",
      "sin i= 1500 train loss:  5.82  train acc: 0.28  val loss:  7.08  valid acc: 0.00\n",
      "sin i= 2000 train loss:  5.92  train acc: 0.27  val loss:  7.11  valid acc: 0.00\n",
      "sin i= 2500 train loss:  6.31  train acc: 0.20  val loss:  7.08  valid acc: 0.00\n",
      "sin i= 3000 train loss:  6.49  train acc: 0.18  val loss:  7.08  valid acc: 0.00\n",
      "sin i= 3500 train loss:  5.67  train acc: 0.30  val loss:  7.06  valid acc: 0.00\n",
      "sin i= 4000 train loss:  5.98  train acc: 0.25  val loss:  7.10  valid acc: 0.00\n",
      "sin i= 4500 train loss:  5.78  train acc: 0.28  val loss:  7.08  valid acc: 0.00\n",
      "sin i= 5000 train loss:  6.09  train acc: 0.23  val loss:  7.07  valid acc: 0.00\n",
      "sin i= 5500 train loss:  5.96  train acc: 0.25  val loss:  7.05  valid acc: 0.00\n",
      "sin i= 6000 train loss:  6.04  train acc: 0.23  val loss:  7.08  valid acc: 0.00\n",
      "sin i= 6500 train loss:  5.87  train acc: 0.26  val loss:  7.08  valid acc: 0.00\n",
      "sin i= 7000 train loss:  5.73  train acc: 0.27  val loss:  7.06  valid acc: 0.00\n",
      "sin i= 7500 train loss:  6.20  train acc: 0.21  val loss:  7.07  valid acc: 0.00\n",
      "sin i= 8000 train loss:  6.04  train acc: 0.23  val loss:  7.07  valid acc: 0.00\n",
      "sin i= 8500 train loss:  6.07  train acc: 0.21  val loss:  7.13  valid acc: 0.00\n",
      "sin i= 9000 train loss:  6.33  train acc: 0.18  val loss:  7.06  valid acc: 0.00\n",
      "sin i= 9500 train loss:  6.10  train acc: 0.21  val loss:  7.09  valid acc: 0.00\n",
      "sin i=10000 train loss:  5.52  train acc: 0.30  val loss:  7.12  valid acc: 0.00\n",
      "sin i=10500 train loss:  6.35  train acc: 0.18  val loss:  7.03  valid acc: 0.00\n",
      "sin i=11000 train loss:  6.27  train acc: 0.19  val loss:  7.04  valid acc: 0.00\n",
      "sin i=11500 train loss:  5.94  train acc: 0.23  val loss:  7.02  valid acc: 0.00\n",
      "sin i=12000 train loss:  5.58  train acc: 0.28  val loss:  7.05  valid acc: 0.00\n",
      "sin i=12500 train loss:  6.07  train acc: 0.21  val loss:  7.06  valid acc: 0.00\n",
      "sin i=13000 train loss:  5.81  train acc: 0.24  val loss:  7.06  valid acc: 0.00\n",
      "sin i=13500 train loss:  5.69  train acc: 0.26  val loss:  7.11  valid acc: 0.00\n",
      "sin i=14000 train loss:  6.57  train acc: 0.13  val loss:  7.11  valid acc: 0.00\n",
      "sin i=14500 train loss:  5.96  train acc: 0.21  val loss:  7.11  valid acc: 0.00\n",
      "sin i=15000 train loss:  6.29  train acc: 0.18  val loss:  7.05  valid acc: 0.00\n",
      "sin i=15500 train loss:  5.83  train acc: 0.23  val loss:  7.06  valid acc: 0.00\n",
      "sin i=16000 train loss:  5.86  train acc: 0.23  val loss:  7.08  valid acc: 0.00\n",
      "sin i=16500 train loss:  5.40  train acc: 0.29  val loss:  7.05  valid acc: 0.00\n",
      "sin i=17000 train loss:  6.11  train acc: 0.19  val loss:  7.08  valid acc: 0.00\n",
      "sin i=17500 train loss:  5.66  train acc: 0.25  val loss:  7.11  valid acc: 0.00\n",
      "sin i=18000 train loss:  5.22  train acc: 0.32  val loss:  7.06  valid acc: 0.00\n",
      "sin i=18500 train loss:  5.96  train acc: 0.20  val loss:  7.09  valid acc: 0.00\n",
      "sin i=19000 train loss:  5.69  train acc: 0.25  val loss:  7.08  valid acc: 0.00\n",
      "sin i=19500 train loss:  5.46  train acc: 0.28  val loss:  7.08  valid acc: 0.00\n",
      "sin i=20000 train loss:  5.66  train acc: 0.25  val loss:  7.11  valid acc: 0.00\n",
      "sin i=20500 train loss:  5.71  train acc: 0.23  val loss:  7.10  valid acc: 0.00\n",
      "sin i=21000 train loss:  5.57  train acc: 0.26  val loss:  7.05  valid acc: 0.00\n",
      "sin i=21500 train loss:  6.26  train acc: 0.15  val loss:  7.07  valid acc: 0.00\n",
      "sin i=22000 train loss:  5.84  train acc: 0.22  val loss:  7.06  valid acc: 0.00\n",
      "sin i=22500 train loss:  5.87  train acc: 0.20  val loss:  7.10  valid acc: 0.00\n",
      "sin i=23000 train loss:  5.69  train acc: 0.23  val loss:  7.12  valid acc: 0.00\n",
      "sin i=23500 train loss:  5.18  train acc: 0.30  val loss:  7.09  valid acc: 0.00\n",
      "sin i=24000 train loss:  5.21  train acc: 0.30  val loss:  7.10  valid acc: 0.00\n",
      "sin i=24500 train loss:  5.65  train acc: 0.23  val loss:  7.10  valid acc: 0.00\n",
      "sin i=25000 train loss:  5.03  train acc: 0.33  val loss:  7.10  valid acc: 0.00\n",
      "sin i=25500 train loss:  5.66  train acc: 0.23  val loss:  7.09  valid acc: 0.00\n",
      "sin i=26000 train loss:  5.62  train acc: 0.23  val loss:  7.10  valid acc: 0.00\n",
      "sin i=26500 train loss:  5.81  train acc: 0.20  val loss:  7.10  valid acc: 0.00\n",
      "sin i=27000 train loss:  6.15  train acc: 0.15  val loss:  7.10  valid acc: 0.00\n",
      "sin i=27500 train loss:  5.28  train acc: 0.28  val loss:  7.09  valid acc: 0.00\n",
      "sin i=28000 train loss:  5.26  train acc: 0.28  val loss:  7.10  valid acc: 0.00\n",
      "sin i=28500 train loss:  5.56  train acc: 0.23  val loss:  7.10  valid acc: 0.00\n",
      "sin i=29000 train loss:  5.74  train acc: 0.21  val loss:  7.10  valid acc: 0.00\n",
      "sin i=29500 train loss:  5.92  train acc: 0.18  val loss:  7.09  valid acc: 0.00\n",
      "sin i=30000 train loss:  5.39  train acc: 0.26  val loss:  7.10  valid acc: 0.00\n",
      "sin i=30500 train loss:  5.23  train acc: 0.28  val loss:  7.10  valid acc: 0.00\n",
      "sin i=31000 train loss:  5.54  train acc: 0.23  val loss:  7.10  valid acc: 0.00\n",
      "sin i=31500 train loss:  5.57  train acc: 0.23  val loss:  7.10  valid acc: 0.00\n",
      "sin i=32000 train loss:  5.86  train acc: 0.19  val loss:  7.10  valid acc: 0.00\n",
      "sin i=32500 train loss:  5.44  train acc: 0.24  val loss:  7.10  valid acc: 0.00\n",
      "sin i=33000 train loss:  4.95  train acc: 0.32  val loss:  7.10  valid acc: 0.00\n",
      "sin i=33500 train loss:  5.51  train acc: 0.24  val loss:  7.10  valid acc: 0.00\n",
      "sin i=34000 train loss:  5.41  train acc: 0.25  val loss:  7.10  valid acc: 0.00\n",
      "sin i=34500 train loss:  5.16  train acc: 0.28  val loss:  7.10  valid acc: 0.00\n",
      "sin i=35000 train loss:  5.46  train acc: 0.24  val loss:  7.10  valid acc: 0.00\n",
      "sin i=35500 train loss:  5.74  train acc: 0.20  val loss:  7.10  valid acc: 0.00\n",
      "sin i=36000 train loss:  5.98  train acc: 0.16  val loss:  7.10  valid acc: 0.00\n",
      "sin i=36500 train loss:  5.12  train acc: 0.30  val loss:  7.10  valid acc: 0.00\n",
      "sin i=37000 train loss:  5.80  train acc: 0.20  val loss:  7.10  valid acc: 0.00\n",
      "sin i=37500 train loss:  5.03  train acc: 0.30  val loss:  7.10  valid acc: 0.00\n",
      "sin i=38000 train loss:  5.39  train acc: 0.25  val loss:  7.10  valid acc: 0.00\n",
      "sin i=38500 train loss:  5.26  train acc: 0.27  val loss:  7.10  valid acc: 0.00\n",
      "sin i=39000 train loss:  5.89  train acc: 0.18  val loss:  7.10  valid acc: 0.00\n",
      "sin i=39500 train loss:  5.00  train acc: 0.30  val loss:  7.10  valid acc: 0.00\n",
      "sin i=40000 train loss:  5.23  train acc: 0.27  val loss:  7.10  valid acc: 0.00\n",
      "sin i=40500 train loss:  5.47  train acc: 0.25  val loss:  7.10  valid acc: 0.00\n",
      "sin i=41000 train loss:  5.45  train acc: 0.24  val loss:  7.10  valid acc: 0.00\n",
      "sin i=41500 train loss:  5.98  train acc: 0.17  val loss:  7.10  valid acc: 0.00\n",
      "sin i=42000 train loss:  5.13  train acc: 0.29  val loss:  7.10  valid acc: 0.00\n",
      "sin i=42500 train loss:  5.41  train acc: 0.25  val loss:  7.10  valid acc: 0.00\n",
      "sin i=43000 train loss:  5.78  train acc: 0.20  val loss:  7.10  valid acc: 0.00\n",
      "sin i=43500 train loss:  5.58  train acc: 0.23  val loss:  7.10  valid acc: 0.00\n",
      "sin i=44000 train loss:  5.73  train acc: 0.21  val loss:  7.10  valid acc: 0.00\n",
      "sin i=44500 train loss:  5.59  train acc: 0.22  val loss:  7.10  valid acc: 0.00\n",
      "sin i=45000 train loss:  5.71  train acc: 0.21  val loss:  7.10  valid acc: 0.00\n",
      "sin i=45500 train loss:  5.68  train acc: 0.21  val loss:  7.10  valid acc: 0.00\n",
      "sin i=46000 train loss:  5.34  train acc: 0.26  val loss:  7.10  valid acc: 0.00\n",
      "sin i=46500 train loss:  5.86  train acc: 0.19  val loss:  7.10  valid acc: 0.00\n",
      "sin i=47000 train loss:  5.47  train acc: 0.24  val loss:  7.10  valid acc: 0.00\n",
      "sin i=47500 train loss:  5.57  train acc: 0.23  val loss:  7.10  valid acc: 0.00\n",
      "sin i=48000 train loss:  5.51  train acc: 0.23  val loss:  7.10  valid acc: 0.00\n",
      "sin i=48500 train loss:  5.25  train acc: 0.27  val loss:  7.10  valid acc: 0.00\n",
      "sin i=49000 train loss:  5.57  train acc: 0.22  val loss:  7.10  valid acc: 0.00\n",
      "sin i=49500 train loss:  4.92  train acc: 0.32  val loss:  7.10  valid acc: 0.00\n",
      "sin i=50000 train loss:  5.40  train acc: 0.26  val loss:  7.10  valid acc: 0.00\n",
      "-> sin layer idx: 10 , best valid accuracy: 0.00, test accuracy: 0.00\n",
      "sin i=    0 train loss: 62.40  train acc: 0.00  val loss:  8.69  valid acc: 0.00\n",
      "sin i=  500 train loss:  5.82  train acc: 0.28  val loss:  7.06  valid acc: 0.00\n",
      "sin i= 1000 train loss:  6.69  train acc: 0.16  val loss:  7.07  valid acc: 0.00\n",
      "sin i= 1500 train loss:  5.84  train acc: 0.28  val loss:  7.08  valid acc: 0.00\n",
      "sin i= 2000 train loss:  5.94  train acc: 0.27  val loss:  7.10  valid acc: 0.00\n",
      "sin i= 2500 train loss:  6.32  train acc: 0.20  val loss:  7.08  valid acc: 0.00\n",
      "sin i= 3000 train loss:  6.51  train acc: 0.18  val loss:  7.08  valid acc: 0.00\n",
      "sin i= 3500 train loss:  5.69  train acc: 0.30  val loss:  7.08  valid acc: 0.00\n",
      "sin i= 4000 train loss:  6.00  train acc: 0.25  val loss:  7.11  valid acc: 0.00\n",
      "sin i= 4500 train loss:  5.80  train acc: 0.28  val loss:  7.11  valid acc: 0.00\n",
      "sin i= 5000 train loss:  6.10  train acc: 0.23  val loss:  7.09  valid acc: 0.00\n",
      "sin i= 5500 train loss:  5.96  train acc: 0.25  val loss:  7.07  valid acc: 0.00\n",
      "sin i= 6000 train loss:  6.03  train acc: 0.23  val loss:  7.11  valid acc: 0.00\n",
      "sin i= 6500 train loss:  5.88  train acc: 0.26  val loss:  7.09  valid acc: 0.00\n",
      "sin i= 7000 train loss:  5.75  train acc: 0.27  val loss:  7.09  valid acc: 0.00\n",
      "sin i= 7500 train loss:  6.19  train acc: 0.21  val loss:  7.08  valid acc: 0.00\n",
      "sin i= 8000 train loss:  6.04  train acc: 0.23  val loss:  7.10  valid acc: 0.00\n",
      "sin i= 8500 train loss:  6.07  train acc: 0.21  val loss:  7.14  valid acc: 0.00\n",
      "sin i= 9000 train loss:  6.32  train acc: 0.18  val loss:  7.09  valid acc: 0.00\n",
      "sin i= 9500 train loss:  6.10  train acc: 0.21  val loss:  7.10  valid acc: 0.00\n",
      "sin i=10000 train loss:  5.51  train acc: 0.30  val loss:  7.13  valid acc: 0.00\n",
      "sin i=10500 train loss:  6.37  train acc: 0.18  val loss:  7.07  valid acc: 0.00\n",
      "sin i=11000 train loss:  6.27  train acc: 0.19  val loss:  7.07  valid acc: 0.00\n",
      "sin i=11500 train loss:  5.96  train acc: 0.23  val loss:  7.05  valid acc: 0.00\n",
      "sin i=12000 train loss:  5.59  train acc: 0.28  val loss:  7.08  valid acc: 0.00\n",
      "sin i=12500 train loss:  6.07  train acc: 0.21  val loss:  7.08  valid acc: 0.00\n",
      "sin i=13000 train loss:  5.83  train acc: 0.24  val loss:  7.07  valid acc: 0.00\n",
      "sin i=13500 train loss:  5.71  train acc: 0.26  val loss:  7.11  valid acc: 0.00\n",
      "sin i=14000 train loss:  6.57  train acc: 0.13  val loss:  7.11  valid acc: 0.00\n",
      "sin i=14500 train loss:  5.97  train acc: 0.21  val loss:  7.13  valid acc: 0.00\n",
      "sin i=15000 train loss:  6.29  train acc: 0.18  val loss:  7.08  valid acc: 0.00\n",
      "sin i=15500 train loss:  5.83  train acc: 0.23  val loss:  7.08  valid acc: 0.00\n",
      "sin i=16000 train loss:  5.87  train acc: 0.23  val loss:  7.10  valid acc: 0.00\n",
      "sin i=16500 train loss:  5.41  train acc: 0.29  val loss:  7.09  valid acc: 0.00\n",
      "sin i=17000 train loss:  6.13  train acc: 0.19  val loss:  7.10  valid acc: 0.00\n",
      "sin i=17500 train loss:  5.67  train acc: 0.25  val loss:  7.13  valid acc: 0.00\n",
      "sin i=18000 train loss:  5.23  train acc: 0.32  val loss:  7.08  valid acc: 0.00\n",
      "sin i=18500 train loss:  5.97  train acc: 0.20  val loss:  7.11  valid acc: 0.00\n",
      "sin i=19000 train loss:  5.71  train acc: 0.25  val loss:  7.10  valid acc: 0.00\n",
      "sin i=19500 train loss:  5.47  train acc: 0.28  val loss:  7.10  valid acc: 0.00\n",
      "sin i=20000 train loss:  5.66  train acc: 0.25  val loss:  7.12  valid acc: 0.00\n",
      "sin i=20500 train loss:  5.72  train acc: 0.23  val loss:  7.13  valid acc: 0.00\n",
      "sin i=21000 train loss:  5.58  train acc: 0.26  val loss:  7.07  valid acc: 0.00\n",
      "sin i=21500 train loss:  6.26  train acc: 0.15  val loss:  7.09  valid acc: 0.00\n",
      "sin i=22000 train loss:  5.85  train acc: 0.22  val loss:  7.10  valid acc: 0.00\n",
      "sin i=22500 train loss:  5.89  train acc: 0.20  val loss:  7.12  valid acc: 0.00\n",
      "sin i=23000 train loss:  5.69  train acc: 0.23  val loss:  7.13  valid acc: 0.00\n",
      "sin i=23500 train loss:  5.18  train acc: 0.30  val loss:  7.12  valid acc: 0.00\n",
      "sin i=24000 train loss:  5.22  train acc: 0.30  val loss:  7.12  valid acc: 0.00\n",
      "sin i=24500 train loss:  5.66  train acc: 0.23  val loss:  7.12  valid acc: 0.00\n",
      "sin i=25000 train loss:  5.04  train acc: 0.33  val loss:  7.12  valid acc: 0.00\n",
      "sin i=25500 train loss:  5.68  train acc: 0.23  val loss:  7.11  valid acc: 0.00\n",
      "sin i=26000 train loss:  5.63  train acc: 0.23  val loss:  7.12  valid acc: 0.00\n",
      "sin i=26500 train loss:  5.81  train acc: 0.20  val loss:  7.12  valid acc: 0.00\n",
      "sin i=27000 train loss:  6.17  train acc: 0.15  val loss:  7.13  valid acc: 0.00\n",
      "sin i=27500 train loss:  5.28  train acc: 0.28  val loss:  7.11  valid acc: 0.00\n",
      "sin i=28000 train loss:  5.27  train acc: 0.28  val loss:  7.13  valid acc: 0.00\n",
      "sin i=28500 train loss:  5.58  train acc: 0.23  val loss:  7.12  valid acc: 0.00\n",
      "sin i=29000 train loss:  5.75  train acc: 0.21  val loss:  7.12  valid acc: 0.00\n",
      "sin i=29500 train loss:  5.93  train acc: 0.18  val loss:  7.12  valid acc: 0.00\n",
      "sin i=30000 train loss:  5.40  train acc: 0.26  val loss:  7.12  valid acc: 0.00\n",
      "sin i=30500 train loss:  5.23  train acc: 0.28  val loss:  7.12  valid acc: 0.00\n",
      "sin i=31000 train loss:  5.56  train acc: 0.23  val loss:  7.12  valid acc: 0.00\n",
      "sin i=31500 train loss:  5.59  train acc: 0.23  val loss:  7.12  valid acc: 0.00\n",
      "sin i=32000 train loss:  5.86  train acc: 0.19  val loss:  7.12  valid acc: 0.00\n",
      "sin i=32500 train loss:  5.44  train acc: 0.24  val loss:  7.12  valid acc: 0.00\n",
      "sin i=33000 train loss:  4.97  train acc: 0.32  val loss:  7.13  valid acc: 0.00\n",
      "sin i=33500 train loss:  5.52  train acc: 0.24  val loss:  7.12  valid acc: 0.00\n",
      "sin i=34000 train loss:  5.42  train acc: 0.25  val loss:  7.12  valid acc: 0.00\n",
      "sin i=34500 train loss:  5.18  train acc: 0.28  val loss:  7.13  valid acc: 0.00\n",
      "sin i=35000 train loss:  5.47  train acc: 0.24  val loss:  7.13  valid acc: 0.00\n",
      "sin i=35500 train loss:  5.74  train acc: 0.20  val loss:  7.12  valid acc: 0.00\n",
      "sin i=36000 train loss:  5.99  train acc: 0.16  val loss:  7.12  valid acc: 0.00\n",
      "sin i=36500 train loss:  5.12  train acc: 0.30  val loss:  7.13  valid acc: 0.00\n",
      "sin i=37000 train loss:  5.81  train acc: 0.20  val loss:  7.12  valid acc: 0.00\n",
      "sin i=37500 train loss:  5.03  train acc: 0.30  val loss:  7.13  valid acc: 0.00\n",
      "sin i=38000 train loss:  5.39  train acc: 0.25  val loss:  7.12  valid acc: 0.00\n",
      "sin i=38500 train loss:  5.26  train acc: 0.27  val loss:  7.12  valid acc: 0.00\n",
      "sin i=39000 train loss:  5.91  train acc: 0.18  val loss:  7.12  valid acc: 0.00\n",
      "sin i=39500 train loss:  5.01  train acc: 0.30  val loss:  7.12  valid acc: 0.00\n",
      "sin i=40000 train loss:  5.24  train acc: 0.27  val loss:  7.12  valid acc: 0.00\n",
      "sin i=40500 train loss:  5.47  train acc: 0.25  val loss:  7.12  valid acc: 0.00\n",
      "sin i=41000 train loss:  5.46  train acc: 0.24  val loss:  7.12  valid acc: 0.00\n",
      "sin i=41500 train loss:  5.98  train acc: 0.17  val loss:  7.12  valid acc: 0.00\n",
      "sin i=42000 train loss:  5.14  train acc: 0.29  val loss:  7.12  valid acc: 0.00\n",
      "sin i=42500 train loss:  5.40  train acc: 0.25  val loss:  7.12  valid acc: 0.00\n",
      "sin i=43000 train loss:  5.78  train acc: 0.20  val loss:  7.12  valid acc: 0.00\n",
      "sin i=43500 train loss:  5.57  train acc: 0.23  val loss:  7.12  valid acc: 0.00\n",
      "sin i=44000 train loss:  5.73  train acc: 0.21  val loss:  7.12  valid acc: 0.00\n",
      "sin i=44500 train loss:  5.61  train acc: 0.22  val loss:  7.12  valid acc: 0.00\n",
      "sin i=45000 train loss:  5.72  train acc: 0.21  val loss:  7.12  valid acc: 0.00\n",
      "sin i=45500 train loss:  5.69  train acc: 0.21  val loss:  7.12  valid acc: 0.00\n",
      "sin i=46000 train loss:  5.35  train acc: 0.26  val loss:  7.12  valid acc: 0.00\n",
      "sin i=46500 train loss:  5.87  train acc: 0.19  val loss:  7.12  valid acc: 0.00\n",
      "sin i=47000 train loss:  5.48  train acc: 0.24  val loss:  7.12  valid acc: 0.00\n",
      "sin i=47500 train loss:  5.58  train acc: 0.23  val loss:  7.12  valid acc: 0.00\n",
      "sin i=48000 train loss:  5.52  train acc: 0.23  val loss:  7.12  valid acc: 0.00\n",
      "sin i=48500 train loss:  5.26  train acc: 0.27  val loss:  7.12  valid acc: 0.00\n",
      "sin i=49000 train loss:  5.60  train acc: 0.22  val loss:  7.12  valid acc: 0.00\n",
      "sin i=49500 train loss:  4.91  train acc: 0.32  val loss:  7.13  valid acc: 0.00\n",
      "sin i=50000 train loss:  5.41  train acc: 0.26  val loss:  7.12  valid acc: 0.00\n",
      "-> sin layer idx: 9  , best valid accuracy: 0.00, test accuracy: 0.00\n",
      "sin i=    0 train loss: 62.38  train acc: 0.00  val loss:  8.82  valid acc: 0.00\n",
      "sin i=  500 train loss:  5.85  train acc: 0.28  val loss:  7.11  valid acc: 0.00\n",
      "sin i= 1000 train loss:  6.71  train acc: 0.16  val loss:  7.12  valid acc: 0.00\n",
      "sin i= 1500 train loss:  5.86  train acc: 0.28  val loss:  7.13  valid acc: 0.00\n",
      "sin i= 2000 train loss:  5.95  train acc: 0.27  val loss:  7.14  valid acc: 0.00\n",
      "sin i= 2500 train loss:  6.34  train acc: 0.20  val loss:  7.12  valid acc: 0.00\n",
      "sin i= 3000 train loss:  6.52  train acc: 0.18  val loss:  7.12  valid acc: 0.00\n",
      "sin i= 3500 train loss:  5.71  train acc: 0.30  val loss:  7.11  valid acc: 0.00\n",
      "sin i= 4000 train loss:  6.00  train acc: 0.25  val loss:  7.13  valid acc: 0.00\n",
      "sin i= 4500 train loss:  5.81  train acc: 0.28  val loss:  7.13  valid acc: 0.00\n",
      "sin i= 5000 train loss:  6.12  train acc: 0.23  val loss:  7.12  valid acc: 0.00\n",
      "sin i= 5500 train loss:  5.98  train acc: 0.25  val loss:  7.08  valid acc: 0.00\n",
      "sin i= 6000 train loss:  6.05  train acc: 0.23  val loss:  7.14  valid acc: 0.00\n",
      "sin i= 6500 train loss:  5.90  train acc: 0.26  val loss:  7.11  valid acc: 0.00\n",
      "sin i= 7000 train loss:  5.77  train acc: 0.27  val loss:  7.10  valid acc: 0.00\n",
      "sin i= 7500 train loss:  6.22  train acc: 0.21  val loss:  7.11  valid acc: 0.00\n",
      "sin i= 8000 train loss:  6.05  train acc: 0.23  val loss:  7.14  valid acc: 0.00\n",
      "sin i= 8500 train loss:  6.09  train acc: 0.21  val loss:  7.18  valid acc: 0.00\n",
      "sin i= 9000 train loss:  6.35  train acc: 0.18  val loss:  7.14  valid acc: 0.00\n",
      "sin i= 9500 train loss:  6.13  train acc: 0.21  val loss:  7.11  valid acc: 0.00\n",
      "sin i=10000 train loss:  5.53  train acc: 0.30  val loss:  7.17  valid acc: 0.00\n",
      "sin i=10500 train loss:  6.38  train acc: 0.18  val loss:  7.09  valid acc: 0.00\n",
      "sin i=11000 train loss:  6.28  train acc: 0.19  val loss:  7.10  valid acc: 0.00\n",
      "sin i=11500 train loss:  5.97  train acc: 0.23  val loss:  7.09  valid acc: 0.00\n",
      "sin i=12000 train loss:  5.60  train acc: 0.28  val loss:  7.12  valid acc: 0.00\n",
      "sin i=12500 train loss:  6.10  train acc: 0.21  val loss:  7.13  valid acc: 0.00\n",
      "sin i=13000 train loss:  5.82  train acc: 0.24  val loss:  7.10  valid acc: 0.00\n",
      "sin i=13500 train loss:  5.72  train acc: 0.26  val loss:  7.16  valid acc: 0.00\n",
      "sin i=14000 train loss:  6.59  train acc: 0.13  val loss:  7.15  valid acc: 0.00\n",
      "sin i=14500 train loss:  6.00  train acc: 0.21  val loss:  7.15  valid acc: 0.00\n",
      "sin i=15000 train loss:  6.31  train acc: 0.18  val loss:  7.10  valid acc: 0.00\n",
      "sin i=15500 train loss:  5.84  train acc: 0.23  val loss:  7.10  valid acc: 0.00\n",
      "sin i=16000 train loss:  5.89  train acc: 0.23  val loss:  7.13  valid acc: 0.00\n",
      "sin i=16500 train loss:  5.43  train acc: 0.29  val loss:  7.11  valid acc: 0.00\n",
      "sin i=17000 train loss:  6.14  train acc: 0.19  val loss:  7.11  valid acc: 0.00\n",
      "sin i=17500 train loss:  5.68  train acc: 0.25  val loss:  7.15  valid acc: 0.00\n",
      "sin i=18000 train loss:  5.25  train acc: 0.32  val loss:  7.12  valid acc: 0.00\n",
      "sin i=18500 train loss:  6.00  train acc: 0.20  val loss:  7.15  valid acc: 0.00\n",
      "sin i=19000 train loss:  5.73  train acc: 0.25  val loss:  7.13  valid acc: 0.00\n",
      "sin i=19500 train loss:  5.48  train acc: 0.28  val loss:  7.13  valid acc: 0.00\n",
      "sin i=20000 train loss:  5.67  train acc: 0.25  val loss:  7.16  valid acc: 0.00\n",
      "sin i=20500 train loss:  5.74  train acc: 0.23  val loss:  7.17  valid acc: 0.00\n",
      "sin i=21000 train loss:  5.59  train acc: 0.26  val loss:  7.11  valid acc: 0.00\n",
      "sin i=21500 train loss:  6.27  train acc: 0.15  val loss:  7.12  valid acc: 0.00\n",
      "sin i=22000 train loss:  5.85  train acc: 0.22  val loss:  7.13  valid acc: 0.00\n",
      "sin i=22500 train loss:  5.93  train acc: 0.20  val loss:  7.13  valid acc: 0.00\n",
      "sin i=23000 train loss:  5.71  train acc: 0.23  val loss:  7.17  valid acc: 0.00\n",
      "sin i=23500 train loss:  5.20  train acc: 0.30  val loss:  7.15  valid acc: 0.00\n",
      "sin i=24000 train loss:  5.25  train acc: 0.30  val loss:  7.15  valid acc: 0.00\n",
      "sin i=24500 train loss:  5.67  train acc: 0.23  val loss:  7.15  valid acc: 0.00\n",
      "sin i=25000 train loss:  5.05  train acc: 0.33  val loss:  7.16  valid acc: 0.00\n",
      "sin i=25500 train loss:  5.68  train acc: 0.23  val loss:  7.14  valid acc: 0.00\n",
      "sin i=26000 train loss:  5.65  train acc: 0.23  val loss:  7.15  valid acc: 0.00\n",
      "sin i=26500 train loss:  5.83  train acc: 0.20  val loss:  7.15  valid acc: 0.00\n",
      "sin i=27000 train loss:  6.18  train acc: 0.15  val loss:  7.16  valid acc: 0.00\n",
      "sin i=27500 train loss:  5.30  train acc: 0.28  val loss:  7.15  valid acc: 0.00\n",
      "sin i=28000 train loss:  5.29  train acc: 0.28  val loss:  7.16  valid acc: 0.00\n",
      "sin i=28500 train loss:  5.59  train acc: 0.23  val loss:  7.15  valid acc: 0.00\n",
      "sin i=29000 train loss:  5.75  train acc: 0.21  val loss:  7.15  valid acc: 0.00\n",
      "sin i=29500 train loss:  5.94  train acc: 0.18  val loss:  7.15  valid acc: 0.00\n",
      "sin i=30000 train loss:  5.41  train acc: 0.26  val loss:  7.16  valid acc: 0.00\n",
      "sin i=30500 train loss:  5.24  train acc: 0.28  val loss:  7.15  valid acc: 0.00\n",
      "sin i=31000 train loss:  5.58  train acc: 0.23  val loss:  7.15  valid acc: 0.00\n",
      "sin i=31500 train loss:  5.60  train acc: 0.23  val loss:  7.16  valid acc: 0.00\n",
      "sin i=32000 train loss:  5.87  train acc: 0.19  val loss:  7.15  valid acc: 0.00\n",
      "sin i=32500 train loss:  5.46  train acc: 0.24  val loss:  7.16  valid acc: 0.00\n",
      "sin i=33000 train loss:  4.99  train acc: 0.32  val loss:  7.16  valid acc: 0.00\n",
      "sin i=33500 train loss:  5.53  train acc: 0.24  val loss:  7.16  valid acc: 0.00\n",
      "sin i=34000 train loss:  5.43  train acc: 0.25  val loss:  7.16  valid acc: 0.00\n",
      "sin i=34500 train loss:  5.20  train acc: 0.28  val loss:  7.16  valid acc: 0.00\n",
      "sin i=35000 train loss:  5.50  train acc: 0.24  val loss:  7.16  valid acc: 0.00\n",
      "sin i=35500 train loss:  5.76  train acc: 0.20  val loss:  7.16  valid acc: 0.00\n",
      "sin i=36000 train loss:  6.00  train acc: 0.16  val loss:  7.16  valid acc: 0.00\n",
      "sin i=36500 train loss:  5.13  train acc: 0.30  val loss:  7.16  valid acc: 0.00\n",
      "sin i=37000 train loss:  5.82  train acc: 0.20  val loss:  7.16  valid acc: 0.00\n",
      "sin i=37500 train loss:  5.06  train acc: 0.30  val loss:  7.16  valid acc: 0.00\n",
      "sin i=38000 train loss:  5.41  train acc: 0.25  val loss:  7.16  valid acc: 0.00\n",
      "sin i=38500 train loss:  5.29  train acc: 0.27  val loss:  7.16  valid acc: 0.00\n",
      "sin i=39000 train loss:  5.92  train acc: 0.18  val loss:  7.16  valid acc: 0.00\n",
      "sin i=39500 train loss:  5.03  train acc: 0.30  val loss:  7.16  valid acc: 0.00\n",
      "sin i=40000 train loss:  5.26  train acc: 0.27  val loss:  7.16  valid acc: 0.00\n",
      "sin i=40500 train loss:  5.48  train acc: 0.25  val loss:  7.15  valid acc: 0.00\n",
      "sin i=41000 train loss:  5.48  train acc: 0.24  val loss:  7.15  valid acc: 0.00\n",
      "sin i=41500 train loss:  5.98  train acc: 0.17  val loss:  7.15  valid acc: 0.00\n",
      "sin i=42000 train loss:  5.15  train acc: 0.29  val loss:  7.16  valid acc: 0.00\n",
      "sin i=42500 train loss:  5.42  train acc: 0.25  val loss:  7.15  valid acc: 0.00\n",
      "sin i=43000 train loss:  5.80  train acc: 0.20  val loss:  7.16  valid acc: 0.00\n",
      "sin i=43500 train loss:  5.61  train acc: 0.23  val loss:  7.15  valid acc: 0.00\n",
      "sin i=44000 train loss:  5.76  train acc: 0.21  val loss:  7.15  valid acc: 0.00\n",
      "sin i=44500 train loss:  5.63  train acc: 0.22  val loss:  7.16  valid acc: 0.00\n",
      "sin i=45000 train loss:  5.73  train acc: 0.21  val loss:  7.16  valid acc: 0.00\n",
      "sin i=45500 train loss:  5.72  train acc: 0.21  val loss:  7.15  valid acc: 0.00\n",
      "sin i=46000 train loss:  5.36  train acc: 0.26  val loss:  7.15  valid acc: 0.00\n",
      "sin i=46500 train loss:  5.89  train acc: 0.19  val loss:  7.16  valid acc: 0.00\n",
      "sin i=47000 train loss:  5.50  train acc: 0.24  val loss:  7.16  valid acc: 0.00\n",
      "sin i=47500 train loss:  5.58  train acc: 0.23  val loss:  7.16  valid acc: 0.00\n",
      "sin i=48000 train loss:  5.53  train acc: 0.23  val loss:  7.16  valid acc: 0.00\n",
      "sin i=48500 train loss:  5.27  train acc: 0.27  val loss:  7.15  valid acc: 0.00\n",
      "sin i=49000 train loss:  5.60  train acc: 0.22  val loss:  7.16  valid acc: 0.00\n",
      "sin i=49500 train loss:  4.94  train acc: 0.32  val loss:  7.16  valid acc: 0.00\n",
      "sin i=50000 train loss:  5.43  train acc: 0.26  val loss:  7.15  valid acc: 0.00\n",
      "-> sin layer idx: 8  , best valid accuracy: 0.00, test accuracy: 0.00\n",
      "sin i=    0 train loss: 62.46  train acc: 0.00  val loss:  8.05  valid acc: 0.00\n",
      "sin i=  500 train loss:  5.92  train acc: 0.28  val loss:  7.11  valid acc: 0.00\n",
      "sin i= 1000 train loss:  6.75  train acc: 0.16  val loss:  7.11  valid acc: 0.00\n",
      "sin i= 1500 train loss:  5.90  train acc: 0.28  val loss:  7.15  valid acc: 0.00\n",
      "sin i= 2000 train loss:  6.00  train acc: 0.27  val loss:  7.15  valid acc: 0.00\n",
      "sin i= 2500 train loss:  6.38  train acc: 0.20  val loss:  7.13  valid acc: 0.00\n",
      "sin i= 3000 train loss:  6.55  train acc: 0.18  val loss:  7.13  valid acc: 0.00\n",
      "sin i= 3500 train loss:  5.75  train acc: 0.30  val loss:  7.10  valid acc: 0.00\n",
      "sin i= 4000 train loss:  6.04  train acc: 0.25  val loss:  7.13  valid acc: 0.00\n",
      "sin i= 4500 train loss:  5.85  train acc: 0.28  val loss:  7.16  valid acc: 0.00\n",
      "sin i= 5000 train loss:  6.16  train acc: 0.23  val loss:  7.19  valid acc: 0.00\n",
      "sin i= 5500 train loss:  6.02  train acc: 0.25  val loss:  7.13  valid acc: 0.00\n",
      "sin i= 6000 train loss:  6.10  train acc: 0.23  val loss:  7.18  valid acc: 0.00\n",
      "sin i= 6500 train loss:  5.95  train acc: 0.26  val loss:  7.16  valid acc: 0.00\n",
      "sin i= 7000 train loss:  5.81  train acc: 0.27  val loss:  7.14  valid acc: 0.00\n",
      "sin i= 7500 train loss:  6.24  train acc: 0.21  val loss:  7.12  valid acc: 0.00\n",
      "sin i= 8000 train loss:  6.08  train acc: 0.23  val loss:  7.18  valid acc: 0.00\n",
      "sin i= 8500 train loss:  6.12  train acc: 0.21  val loss:  7.20  valid acc: 0.00\n",
      "sin i= 9000 train loss:  6.39  train acc: 0.18  val loss:  7.12  valid acc: 0.00\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
      "\u001b[31mKeyboardInterrupt\u001b[39m                         Traceback (most recent call last)",
      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[173]\u001b[39m\u001b[32m, line 33\u001b[39m\n\u001b[32m     31\u001b[39m \u001b[38;5;66;03m# add l1 regularization of all params to the loss\u001b[39;00m\n\u001b[32m     32\u001b[39m loss = torch.nn.functional.cross_entropy(logits, y)\n\u001b[32m---> \u001b[39m\u001b[32m33\u001b[39m loss += \u001b[32m0.01\u001b[39m * \u001b[38;5;28;43msum\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mp\u001b[49m\u001b[43m.\u001b[49m\u001b[43mabs\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m.\u001b[49m\u001b[43msum\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mp\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mprobe\u001b[49m\u001b[43m.\u001b[49m\u001b[43mparameters\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m  \u001b[38;5;66;03m# L1-reg\u001b[39;00m\n\u001b[32m     34\u001b[39m loss.backward()\n\u001b[32m     35\u001b[39m optimizer.step()\n",
      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[173]\u001b[39m\u001b[32m, line 33\u001b[39m, in \u001b[36m<genexpr>\u001b[39m\u001b[34m(.0)\u001b[39m\n\u001b[32m     31\u001b[39m \u001b[38;5;66;03m# add l1 regularization of all params to the loss\u001b[39;00m\n\u001b[32m     32\u001b[39m loss = torch.nn.functional.cross_entropy(logits, y)\n\u001b[32m---> \u001b[39m\u001b[32m33\u001b[39m loss += \u001b[32m0.01\u001b[39m * \u001b[38;5;28msum\u001b[39m(p.abs().sum() \u001b[38;5;28;01mfor\u001b[39;00m p \u001b[38;5;129;01min\u001b[39;00m probe.parameters())  \u001b[38;5;66;03m# L1-reg\u001b[39;00m\n\u001b[32m     34\u001b[39m loss.backward()\n\u001b[32m     35\u001b[39m optimizer.step()\n",
      "\u001b[31mKeyboardInterrupt\u001b[39m: "
     ]
    }
   ],
   "source": [
    "test_extracted = {}\n",
    "\n",
    "test_accuracies = {\"sin\": {}, \"bin\": {}, \"lin\": {}, \"log\": {}}\n",
    "\n",
    "basis_name = \"sin\"\n",
    "basis_embs = basis_embs_sin\n",
    "\n",
    "for layer_idx in reversed(range(len(train_hidden_states))):\n",
    "\n",
    "    torch.manual_seed(0)\n",
    "    probe = ClassifierProbe(\n",
    "        emb_dim=train_hidden_states[0].shape[-1],\n",
    "        hidden_dim=100,\n",
    "        basis=basis_embs,\n",
    "        heldout_mask=test_mask,\n",
    "    ).to(device)\n",
    "\n",
    "    optimizer = torch.optim.Adam(probe.parameters(), lr=1e-3)  # TODO: try with weight_decay=1e-3\n",
    "    scheduler = torch.optim.lr_scheduler.LinearLR(optimizer, start_factor=1.0, end_factor=0.01, total_iters=30000)\n",
    "\n",
    "    rng = torch.Generator().manual_seed(0)\n",
    "    best_val_acc = -1\n",
    "    best_ckpt = None\n",
    "    for i in range(50000+1):\n",
    "        probe.train()\n",
    "        optimizer.zero_grad()\n",
    "        minibatch_idcs = torch.randint(len(train_labels), size=(128,), generator=rng)\n",
    "        x = train_hidden_states[layer_idx][minibatch_idcs].float().to(device)\n",
    "        y = train_labels[minibatch_idcs].to(device)\n",
    "        logits = probe(x, holdout_eval_tokens=False)\n",
    "        # add l1 regularization of all params to the loss\n",
    "        loss = torch.nn.functional.cross_entropy(logits, y)\n",
    "        loss += 0.01 * sum(p.abs().sum() for p in probe.parameters())  # L1-reg\n",
    "        loss.backward()\n",
    "        optimizer.step()\n",
    "        scheduler.step()\n",
    "        if i % 500 == 0:\n",
    "            train_acc = (logits.argmax(dim=-1) == y).float().mean().item()\n",
    "            probe.eval()\n",
    "            with torch.no_grad():\n",
    "                valid_logits = probe(valid_hidden_states[layer_idx].float().to(device), holdout_eval_tokens=False)  # TODO: holdout_eval_tokens switched to False -- incompatible with using model's own predictions as labels!\n",
    "                valid_loss = torch.nn.functional.cross_entropy(valid_logits, valid_labels)\n",
    "                valid_accuracy = (valid_logits.argmax(dim=-1) == valid_labels).float().mean().item()\n",
    "                if valid_accuracy > best_val_acc:\n",
    "                    best_val_acc = valid_accuracy\n",
    "                    best_ckpt = probe.state_dict()\n",
    "            print(f\"{basis_name} {i=:>5} train loss: {loss.item():5.2f}  train acc: {train_acc:.2f}  val loss: {valid_loss.item():5.2f}  valid acc: {valid_accuracy:.2f}\")\n",
    "    probe.load_state_dict(best_ckpt)\n",
    "    probe.eval()\n",
    "    with torch.no_grad():\n",
    "        test_logits = probe(test_hidden_states[layer_idx].float().to(device), holdout_eval_tokens=False)\n",
    "        test_extracted[layer_idx] = test_logits.argmax(dim=-1)\n",
    "        test_accuracy = (test_extracted[layer_idx] == test_labels).float().mean().item()\n",
    "\n",
    "    test_accuracies[basis_name][layer_idx] = test_accuracy\n",
    "    print(f\"-> {basis_name} layer idx: {layer_idx:<3}, best valid accuracy: {best_val_acc:.2f}, test accuracy: {test_accuracy:.2f}\")\n",
    "    # best test_accuracy so far=0.45"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 174,
   "id": "48a64b28-a3f0-42d3-abff-22add531a611",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train loss: 64.2337875366211 train acc: 0.0009765625 LR: [0.0009999505]\n",
      "step=0        0.0% \n",
      "Train loss: 3.203324317932129 train acc: 0.6171875 LR: [0.0009504505000000151]\n",
      "step=1000    37.1% \n",
      "Train loss: 3.225451946258545 train acc: 0.6845703125 LR: [0.0009009505000000161]\n",
      "step=2000    49.6% \n",
      "Train loss: 3.116640090942383 train acc: 0.6923828125 LR: [0.0008514505000000177]\n",
      "step=3000    51.1% \n",
      "Train loss: 3.18668532371521 train acc: 0.673828125 LR: [0.0008019505000000194]\n",
      "step=4000    51.7% \n",
      "Train loss: 3.021243095397949 train acc: 0.744140625 LR: [0.0007524505000000225]\n",
      "step=5000    54.5% \n",
      "Train loss: 3.089547634124756 train acc: 0.720703125 LR: [0.0007029505000000275]\n",
      "step=6000    52.2% \n",
      "Train loss: 2.987320899963379 train acc: 0.724609375 LR: [0.0006534505000000315]\n",
      "step=7000    48.1% \n",
      "Train loss: 2.9233715534210205 train acc: 0.724609375 LR: [0.0006039505000000363]\n",
      "step=8000    51.7% \n",
      "Train loss: 2.9112982749938965 train acc: 0.7109375 LR: [0.0005544505000000445]\n",
      "step=9000    58.0% \n",
      "Train loss: 2.858201026916504 train acc: 0.7373046875 LR: [0.0005049505000000547]\n",
      "step=10000   52.8% \n",
      "Train loss: 2.807520627975464 train acc: 0.7509765625 LR: [0.0004554505000000548]\n",
      "step=11000   51.1% \n",
      "Train loss: 2.818568229675293 train acc: 0.73046875 LR: [0.0004059505000000473]\n",
      "step=12000   55.8% \n",
      "Train loss: 2.7056360244750977 train acc: 0.7431640625 LR: [0.0003564505000000392]\n",
      "step=13000   50.9% \n",
      "Train loss: 2.5240285396575928 train acc: 0.7841796875 LR: [0.000306950500000029]\n",
      "step=14000   57.8% \n",
      "Train loss: 2.6640219688415527 train acc: 0.7509765625 LR: [0.00025745050000001773]\n",
      "step=15000   53.0% \n",
      "Train loss: 2.56095027923584 train acc: 0.7568359375 LR: [0.0002079505000000123]\n",
      "step=16000   57.1% \n",
      "Train loss: 2.543759346008301 train acc: 0.7548828125 LR: [0.00015845050000001058]\n",
      "step=17000   56.0% \n",
      "Train loss: 2.46024227142334 train acc: 0.7783203125 LR: [0.00010895050000000908]\n",
      "step=18000   55.2% \n",
      "Train loss: 2.447075843811035 train acc: 0.7607421875 LR: [5.945050000000361e-05]\n",
      "step=19000   57.3% \n",
      "Train loss: 2.419114589691162 train acc: 0.7529296875 LR: [1.000000000000058e-05]\n",
      "step=20000   55.2% \n",
      "Train loss: 2.437913417816162 train acc: 0.78125 LR: [1.000000000000058e-05]\n",
      "step=21000   55.8% \n",
      "Train loss: 2.3444290161132812 train acc: 0.763671875 LR: [1.000000000000058e-05]\n",
      "step=22000   56.9% \n",
      "Train loss: 2.484499216079712 train acc: 0.736328125 LR: [1.000000000000058e-05]\n",
      "step=23000   55.6% \n",
      "Train loss: 2.4394032955169678 train acc: 0.7509765625 LR: [1.000000000000058e-05]\n",
      "step=24000   57.8% \n",
      "Train loss: 2.321702480316162 train acc: 0.791015625 LR: [1.000000000000058e-05]\n",
      "step=25000   55.2% \n",
      "Train loss: 2.3407979011535645 train acc: 0.7783203125 LR: [1.000000000000058e-05]\n",
      "step=26000   57.5% \n",
      "Train loss: 2.3601112365722656 train acc: 0.755859375 LR: [1.000000000000058e-05]\n",
      "step=27000   56.5% \n",
      "Train loss: 2.4831812381744385 train acc: 0.7412109375 LR: [1.000000000000058e-05]\n",
      "step=28000   56.9% \n",
      "Train loss: 2.353656530380249 train acc: 0.7763671875 LR: [1.000000000000058e-05]\n",
      "step=29000   55.0% \n",
      "Train loss: 2.388251304626465 train acc: 0.7509765625 LR: [1.000000000000058e-05]\n",
      "step=30000   57.3% \n",
      "Test accuracy: 0.42278480529785156\n"
     ]
    }
   ],
   "source": [
    "rng = torch.Generator().manual_seed(0)\n",
    "rng_py = random.Random(0)\n",
    "\n",
    "assert list(train_hidden_states.keys()) == list(range(len(train_hidden_states)))\n",
    "train_hidden_states_tensor = torch.stack(list(train_hidden_states.values()), dim=0)\n",
    "\n",
    "histories = []\n",
    "\n",
    "probe = ClassifierProbe(\n",
    "    emb_dim=train_hidden_states[0].shape[-1],\n",
    "    hidden_dim=100,\n",
    "    basis=basis_embs_sin,\n",
    "    heldout_mask=test_mask,\n",
    ").to(device)\n",
    "\n",
    "optimizer = torch.optim.Adam(probe.parameters(), lr=1e-3, weight_decay=0)\n",
    "scheduler = torch.optim.lr_scheduler.LinearLR(optimizer, start_factor=1.0, end_factor=0.01, total_iters=20000)\n",
    "\n",
    "train_layers = list(range(len(train_hidden_states)-2, len(train_hidden_states)))\n",
    "\n",
    "for step in range(30000+1):\n",
    "    probe.train()\n",
    "    optimizer.zero_grad()\n",
    "    layer_idcs = torch.tensor(random.choices(train_layers, k=1024))\n",
    "    minibatch_idcs = torch.randint(len(train_labels), size=(1024,), generator=rng)\n",
    "    x = train_hidden_states_tensor[layer_idcs, minibatch_idcs].float().to(device)\n",
    "    y = train_labels[minibatch_idcs].to(device)\n",
    "    train_logits = probe(x, holdout_eval_tokens=False)\n",
    "    loss = torch.nn.functional.cross_entropy(train_logits, y)\n",
    "    loss += 1e-2 * sum(p.abs().sum() for p in probe.parameters()) # L1 regularization\n",
    "    loss.backward()\n",
    "    optimizer.step()\n",
    "    scheduler.step()\n",
    "\n",
    "    best_val_acc = -1\n",
    "    best_ckpt = probe.state_dict()\n",
    "\n",
    "    if step % 1000 == 0:\n",
    "        print(\"Train loss: %s train acc: %s LR: %s\" % (loss.item(), \n",
    "                                                       sum(train_logits.argmax(-1) == y).item() / len(y),\n",
    "                                                       scheduler.get_last_lr()))\n",
    "        probe.eval()\n",
    "        valid_accs = []\n",
    "        with torch.no_grad():\n",
    "            print(f\"{step=:<5}\", end=\"  \")\n",
    "            # for layer_idx in range(0, len(train_hidden_states)):\n",
    "            layer_idx = len(train_hidden_states)-1\n",
    "            valid_logits = probe(valid_hidden_states[layer_idx].float().to(device), holdout_eval_tokens=False)\n",
    "            valid_acc = (valid_logits.argmax(dim=-1) == valid_labels).float().mean().item()\n",
    "            valid_accs.append(valid_acc)\n",
    "            histories.append({\"step\": step, \"eval_layer\": layer_idx, \"valid_acc\": valid_acc})\n",
    "            acc_out = f\"{valid_acc:>6.1%}\"\n",
    "            if layer_idx not in train_layers:\n",
    "                print('\\033[94m' + acc_out + '\\033[0m', end=\" \")\n",
    "            else:\n",
    "                print(acc_out, end=\" \")\n",
    "            print()\n",
    "            valid_acc = sum(valid_accs) / len(valid_accs)\n",
    "            if valid_acc > best_val_acc:\n",
    "                best_val_acc = valid_acc\n",
    "                best_ckpt = probe.state_dict()\n",
    "\n",
    "probe.load_state_dict(best_ckpt)\n",
    "probe.eval()\n",
    "with torch.no_grad():\n",
    "    layer_idx = len(train_hidden_states)-1\n",
    "    test_logits = probe(test_hidden_states[layer_idx].float().to(device), holdout_eval_tokens=False)\n",
    "    test_extracted[layer_idx] = test_logits.argmax(dim=-1)\n",
    "    test_accuracy = (test_extracted[layer_idx] == test_labels).float().mean().item()\n",
    "    print(\"Test accuracy: %s\" % test_accuracy)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "id": "0177b63b-b198-4246-aed7-4be6fc2b6b13",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Test accuracy: 0.4379746913909912\n"
     ]
    }
   ],
   "source": [
    "\n",
    "probe.load_state_dict(best_ckpt)\n",
    "probe.eval()\n",
    "with torch.no_grad():\n",
    "    layer_idx = len(train_hidden_states)-1\n",
    "    test_logits = probe(test_hidden_states[layer_idx].float().to(device), holdout_eval_tokens=False)\n",
    "    test_extracted[layer_idx] = test_logits.argmax(dim=-1)\n",
    "    test_accuracy = (test_extracted[layer_idx] == test_labels).float().mean().item()\n",
    "    print(\"Test accuracy: %s\" % test_accuracy)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "63664ed2-695f-42da-9ec3-5ec17f8743f9",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Test accuracy: 0.43544304370880127\n"
     ]
    }
   ],
   "source": [
    "# last 2 layers, multi L1=5e-3\n",
    "\n",
    "probe.load_state_dict(best_ckpt)\n",
    "probe.eval()\n",
    "with torch.no_grad():\n",
    "    layer_idx = len(train_hidden_states)-1\n",
    "    test_logits = probe(test_hidden_states[layer_idx].float().to(device), holdout_eval_tokens=False)\n",
    "    test_extracted[layer_idx] = test_logits.argmax(dim=-1)\n",
    "    test_accuracy = (test_extracted[layer_idx] == test_labels).float().mean().item()\n",
    "    print(\"Test accuracy: %s\" % test_accuracy)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "bb93204b-cabe-4c20-889e-0e885ca7a6c8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Test accuracy: 0.28354430198669434\n"
     ]
    }
   ],
   "source": [
    "# last 3 layers, add+multi\n",
    "\n",
    "probe.load_state_dict(best_ckpt)\n",
    "probe.eval()\n",
    "with torch.no_grad():\n",
    "    layer_idx = len(train_hidden_states)-1\n",
    "    test_logits = probe(test_hidden_states[layer_idx].float().to(device), holdout_eval_tokens=False)\n",
    "    test_extracted[layer_idx] = test_logits.argmax(dim=-1)\n",
    "    test_accuracy = (test_extracted[layer_idx] == test_labels).float().mean().item()\n",
    "    print(\"Test accuracy: %s\" % test_accuracy)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 175,
   "id": "f8349f5c7a383567",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-20T22:04:30.698955Z",
     "start_time": "2025-09-20T22:04:30.577310Z"
    }
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.DataFrame({\"probe_l%s\" % k: v.cpu() for k, v in test_extracted.items()})\n",
    "df[\"model_predictions\"] = test_preds_t.cpu()\n",
    "df[\"inputs\"] = [make_str_input(op) for op in test_inputs]\n",
    "df[\"labels\"] = test_labels_ref.cpu()\n",
    "df.to_csv(\"/home/xstefan3/tmp/pycharm_project_437/notebooks/logs/model_vs_probes_preds_llama3b_multi_210925_2.csv\", index=False)  # TODO: visualize"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 176,
   "id": "9f1525c48a64f3ee",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(device(type='cpu'), device(type='cuda', index=0))"
      ]
     },
     "execution_count": 176,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_preds_t.device, test_labels.device"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 177,
   "id": "e253b515feab91e9",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-21T10:18:23.312638Z",
     "start_time": "2025-09-21T10:18:23.287473Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Ever computed correctly internally and NOT correctly returned (out of incorrect): tensor(0.2105)\n",
      "NOT ever computed correctly internally and correctly returned: (out of correct) tensor(0.2717)\n",
      "Ever computed correctly internally and correctly returned (out of correct): tensor(0.7283)\n",
      "NOT ever computed correctly internally and NOT correctly returned (out of incorrect): tensor(0.7895)\n",
      "Ever computed as returned: tensor(0.6734)\n"
     ]
    }
   ],
   "source": [
    "is_result_computed_per_l = torch.vstack([test_extracted[l_key] == test_labels_ref for l_key in test_extracted])\n",
    "is_result_computed_internally = torch.any(is_result_computed_per_l, dim=0).cpu()\n",
    "returned_val_is_computed = torch.vstack([test_extracted[l_key].cpu() == test_preds_t for l_key in test_extracted])\n",
    "is_result_returned = (test_preds_t == test_labels_ref.cpu())\n",
    "print(\n",
    "      \"Ever computed correctly internally and NOT correctly returned (out of incorrect): %s\\n\"\n",
    "      \"NOT ever computed correctly internally and correctly returned: (out of correct) %s\\n\"\n",
    "      \"Ever computed correctly internally and correctly returned (out of correct): %s\\n\"\n",
    "      \"NOT ever computed correctly internally and NOT correctly returned (out of incorrect): %s\\n\"\n",
    "      \"Ever computed as returned: %s\"\n",
    "      % (\n",
    "         torch.sum(is_result_computed_internally & ~is_result_returned).item() / (~is_result_returned).sum(),\n",
    "         torch.sum(~is_result_computed_internally & is_result_returned).item() / is_result_returned.sum(),\n",
    "         torch.sum(is_result_computed_internally & is_result_returned).item() / is_result_returned.sum(),\n",
    "         torch.sum(~is_result_computed_internally & ~is_result_returned).item() / (~is_result_returned).sum(),\n",
    "         returned_val_is_computed.any(dim=0).sum() / len(returned_val_is_computed[0]))\n",
    "        )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "a3f7ba0a324efb68",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-21T10:19:37.230223Z",
     "start_time": "2025-09-21T10:19:37.219407Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(140, tensor(30))"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 15 out of 25 correct contains multiplication involving \"1\" or \"2\" --> effectively solvable by addition\n",
    "# only 30 out of 140 in the case of Llama 3B\n",
    "(len(test_labels_ref[~is_result_computed_internally & is_result_returned]),\n",
    "torch.isin(test_inputs_t[~is_result_computed_internally & is_result_returned], torch.tensor([1, 2])).any(dim=1).sum())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8c6a0ac0e3171398",
   "metadata": {},
   "outputs": [],
   "source": [
    "def solve_linear_layer(x: Tensor, y: Tensor) -> torch.nn.Linear:\n",
    "    if y.ndim == 1:\n",
    "        y = y.unsqueeze(-1)\n",
    "    if not y.is_floating_point():\n",
    "        y = y.float()\n",
    "   \n",
    "    lin = torch.nn.Linear(x.shape[-1], y.shape[-1], device=x.device)\n",
    "    x_aug = torch.cat([x, torch.ones(len(x), 1, device=x.device)], dim=1)\n",
    "    coeffs = torch.linalg.lstsq(x_aug, y).solution\n",
    "    w, b = coeffs[:-1], coeffs[-1]\n",
    "    with torch.no_grad():\n",
    "        lin.weight[:] = w.T\n",
    "        lin.bias[:] = b\n",
    "    return lin"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "743ac235337625af",
   "metadata": {},
   "outputs": [],
   "source": [
    "for layer_idx in range(len(train_hidden_states)):\n",
    "    lin_probe = solve_linear_layer(\n",
    "        train_hidden_states[layer_idx].float().to(device),\n",
    "        train_labels.to(device),\n",
    "    )\n",
    "    log_probe = solve_linear_layer(\n",
    "        train_hidden_states[layer_idx].float().to(device),\n",
    "        train_labels.log1p().to(device),\n",
    "    )\n",
    "    lin_test_pred = lin_probe(test_hidden_states[layer_idx].float().to(device)).flatten().round().int()\n",
    "    lin_test_accuracy = (lin_test_pred == test_labels).float().mean().item()\n",
    "    \n",
    "    log_test_pred = log_probe(test_hidden_states[layer_idx].float().to(device)).flatten().exp().add(1).round().int()\n",
    "    log_test_accuracy = (log_test_pred == test_labels).float().mean().item()\n",
    "    \n",
    "    test_accuracies[\"lin\"][layer_idx] = lin_test_accuracy\n",
    "    test_accuracies[\"log\"][layer_idx] = log_test_accuracy\n",
    "\n",
    "    print(f\"layer idx: {layer_idx:<3}, linear probe acc: {lin_test_accuracy:.2f}, log probe acc: {log_test_accuracy:.2f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "265eb99ebd08e3d7",
   "metadata": {},
   "outputs": [],
   "source": [
    "for name, accs in test_accuracies.items():\n",
    "    print(f\"{name} accs: | \" + \" | \".join([f\"{x:.0%}\" for layer, x in sorted(accs.items())]) + \" |\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
