[
  {
    "label": "Events Input Sequence"
  },
  {
    "label": "Image Input Sequence"
  },
  {
    "label": "Temporal Context Encoder Block"
  },
  {
    "label": "Spatial Context Encoder Block"
  },
  {
    "label": "Concat/Attention"
  },
  {
    "label": "Conv 11"
  },
  {
    "label": "Conv 33"
  },
  {
    "label": "ReLU"
  },
  {
    "label": "Conv 11"
  },
  {
    "label": "BatchNorm"
  },
  {
    "label": "Residual Add"
  },
  {
    "label": "Sum"
  },
  {
    "label": "MLP Mixer"
  },
  {
    "label": "Dropout"
  },
  {
    "label": "Conv 11"
  },
  {
    "label": "LayerNorm"
  }
]