[
  {
    "label": "Events Input Sequence"
  },
  {
    "label": "Self-Attention Layer"
  },
  {
    "label": "Multi-Layer Perceptron"
  },
  {
    "label": "Image Input Sequence"
  },
  {
    "label": "Spatial Transformer Layer"
  },
  {
    "label": "Feedforward Layer"
  },
  {
    "label": "Conv 11"
  },
  {
    "label": "LayerNorm"
  },
  {
    "label": "Conv 33"
  },
  {
    "label": "Relu"
  },
  {
    "label": "BN"
  },
  {
    "label": "Spatiotemporal Context Feature Representation"
  }
]