{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys; sys.path.append('..')\n",
    "import torch\n",
    "from dual_attention import DualAttention\n",
    "from symbol_retrieval import PositionRelativeSymbolRetriever, SymbolicAttention"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Documentation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[0;31mInit signature:\u001b[0m\n",
      "\u001b[0mDualAttention\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
      "\u001b[0;34m\u001b[0m    \u001b[0md_model\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
      "\u001b[0;34m\u001b[0m    \u001b[0mn_heads_sa\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
      "\u001b[0;34m\u001b[0m    \u001b[0mn_heads_ra\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
      "\u001b[0;34m\u001b[0m    \u001b[0mdropout\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mfloat\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
      "\u001b[0;34m\u001b[0m    \u001b[0msa_kwargs\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mdict\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
      "\u001b[0;34m\u001b[0m    \u001b[0mra_kwargs\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mdict\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
      "\u001b[0;34m\u001b[0m    \u001b[0mra_type\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mstr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'relational_attention'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
      "\u001b[0;34m\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mDocstring:\u001b[0m     \n",
      "Base class for all neural network modules.\n",
      "\n",
      "Your models should also subclass this class.\n",
      "\n",
      "Modules can also contain other Modules, allowing to nest them in\n",
      "a tree structure. You can assign the submodules as regular attributes::\n",
      "\n",
      "    import torch.nn as nn\n",
      "    import torch.nn.functional as F\n",
      "\n",
      "    class Model(nn.Module):\n",
      "        def __init__(self):\n",
      "            super().__init__()\n",
      "            self.conv1 = nn.Conv2d(1, 20, 5)\n",
      "            self.conv2 = nn.Conv2d(20, 20, 5)\n",
      "\n",
      "        def forward(self, x):\n",
      "            x = F.relu(self.conv1(x))\n",
      "            return F.relu(self.conv2(x))\n",
      "\n",
      "Submodules assigned in this way will be registered, and will have their\n",
      "parameters converted too when you call :meth:`to`, etc.\n",
      "\n",
      ".. note::\n",
      "    As per the example above, an ``__init__()`` call to the parent class\n",
      "    must be made before assignment on the child.\n",
      "\n",
      ":ivar training: Boolean represents whether this module is in training or\n",
      "                evaluation mode.\n",
      ":vartype training: bool\n",
      "\u001b[0;31mInit docstring:\u001b[0m\n",
      "An implementation of Dual Attention.\n",
      "\n",
      "The DualAttention module is a form of multi-head attention involving a composition of two distinct types of attention heads.\n",
      "The first type is standard self-attention, which captures object-level (i.e., sensory) features, and\n",
      "the second type is relational attention, which captures relational features.\n",
      "\n",
      "\n",
      "Parameters\n",
      "----------\n",
      "d_model : int\n",
      "    model dimension\n",
      "n_heads_sa : int\n",
      "    number of self-attention heads\n",
      "n_heads_ra : int\n",
      "    number of relational attention heads\n",
      "dropout : float\n",
      "    dropout rate\n",
      "sa_kwargs : dict, optional\n",
      "    self-attention kwargs, by default None\n",
      "ra_kwargs : dict, optional\n",
      "    relational attention kwargs, by default None\n",
      "ra_type : str, optional\n",
      "    type of relational attention module (e.g., whether to use RCA for an ablation experiment).\n",
      "    by default 'relational_attention'.\n",
      "\n",
      "Raises\n",
      "------\n",
      "ValueError\n",
      "    _description_\n",
      "\u001b[0;31mFile:\u001b[0m           ~/projects/abstract_transformer/dual_attention.py\n",
      "\u001b[0;31mType:\u001b[0m           type\n",
      "\u001b[0;31mSubclasses:\u001b[0m     "
     ]
    }
   ],
   "source": [
    "DualAttention?"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Dual Attention with Symbolic Attention"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "d, l = 64, 10\n",
    "\n",
    "symbol_retriever = SymbolicAttention(d_model=d, n_heads=4, n_symbols=10)\n",
    "relational_attn = DualAttention(d_model=d, n_heads_sa=4, n_heads_ra=4, dropout=0., ra_kwargs=dict(symmetric_rels=True, use_relative_positional_symbols=False))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "x: torch.Size([1, 10, 64]), s: torch.Size([1, 10, 64]), y: torch.Size([1, 10, 64])\n"
     ]
    }
   ],
   "source": [
    "x = torch.randn(1, 10, d)\n",
    "s = symbol_retriever(x)\n",
    "y, *_ = relational_attn(x, s)\n",
    "print(f'x: {x.shape}, s: {s.shape}, y: {y.shape}')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Dual Attention with Position-Relative Symbols"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "d, l = 64, 10\n",
    "\n",
    "symbol_retriever = PositionRelativeSymbolRetriever(symbol_dim=d, max_rel_pos=l)\n",
    "relational_attn = DualAttention(d_model=d, n_heads_sa=4, n_heads_ra=4, dropout=0., ra_kwargs=dict(symmetric_rels=True, use_relative_positional_symbols=True))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "x: torch.Size([1, 10, 64]), s: torch.Size([10, 10, 64]), y: torch.Size([1, 10, 64])\n"
     ]
    }
   ],
   "source": [
    "x = torch.randn(1, 10, d)\n",
    "s = symbol_retriever(x)\n",
    "y, *_ = relational_attn(x, s)\n",
    "print(f'x: {x.shape}, s: {s.shape}, y: {y.shape}')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "abstract_transformer",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
