{
  "schema_version": "2.0",
  "metadata": {
    "diagram_id": "swin_transformer_decoder_block_01"
  },
  "global_properties": {
    "topic": "Swin Transformer Decoder Block",
    "purpose": "architecture_overview",
    "target_audience": "researcher",
    "complexity_level": "publication_quality",
    "domain": "Computer Vision",
    "visual_format": "block_diagram",
    "diagram_type": "architecture_diagram",
    "layout_grid": "2x1",
    "style_theme": "professional_light",
    "background_color": "#AEECEF",
    "font_family": "Helvetica, Arial, sans-serif",
    "title": {
      "text": "Swin Transformer Decoder Block",
      "is_present": true
    }
  },
  "components": [
    {
      "id": "container_left",
      "type": "group_container",
      "label": "",
      "geometry": {
        "shape": "rounded_rectangle"
      },
      "styling": {
        "fill_color": "#D6EAF8",
        "border_color": "#000000",
        "border_width": 2,
        "border_style": "solid"
      }
    },
    {
      "id": "container_right",
      "type": "group_container",
      "label": "",
      "geometry": {
        "shape": "rounded_rectangle"
      },
      "styling": {
        "fill_color": "#E8DAEF",
        "border_color": "#000000",
        "border_width": 2,
        "border_style": "solid"
      }
    },
    {
      "id": "input_query",
      "type": "shape_node",
      "label": "Single frame Query Qᵢ",
      "geometry": {
        "shape": "rounded_rectangle"
      },
      "styling": {
        "fill_color": "#B7E1FA",
        "border_color": "#000000",
        "border_width": 1,
        "border_style": "solid"
      },
      "text_properties": {
        "font_weight": "bold",
        "text_color": "#000000"
      }
    },
    {
      "id": "encoder_output_left",
      "type": "text_node",
      "label": "Encoder output",
      "geometry": {
        "shape": "none"
      },
      "styling": {
        "fill_color": "#FFFFFF",
        "border_color": "#FFFFFF",
        "border_width": 0,
        "border_style": "solid"
      },
      "text_properties": {
        "font_weight": "normal",
        "text_color": "#000000"
      }
    },
    {
      "id": "encoder_output_right",
      "type": "text_node",
      "label": "Encoder output",
      "geometry": {
        "shape": "none"
      },
      "styling": {
        "fill_color": "#FFFFFF",
        "border_color": "#FFFFFF",
        "border_width": 0,
        "border_style": "solid"
      },
      "text_properties": {
        "font_weight": "normal",
        "text_color": "#000000"
      }
    },
    {
      "id": "w_msa",
      "type": "shape_node",
      "label": "W-MSA",
      "sub_label": "V   K   Q",
      "geometry": {
        "shape": "rectangle"
      },
      "styling": {
        "fill_color": "#B7E1B7",
        "border_color": "#000000",
        "border_width": 1,
        "border_style": "solid"
      },
      "text_properties": {
        "font_weight": "bold",
        "text_color": "#000000"
      }
    },
    {
      "id": "ln1_left",
      "type": "shape_node",
      "label": "LN",
      "geometry": {
        "shape": "rectangle"
      },
      "styling": {
        "fill_color": "#F7E7CE",
        "border_color": "#000000",
        "border_width": 1,
        "border_style": "solid"
      },
      "text_properties": {
        "font_weight": "normal",
        "text_color": "#000000"
      }
    },
    {
      "id": "w_mca",
      "type": "shape_node",
      "label": "W-MCA",
      "sub_label": "V'   K'   Q'",
      "geometry": {
        "shape": "rectangle"
      },
      "styling": {
        "fill_color": "#B7E1B7",
        "border_color": "#000000",
        "border_width": 1,
        "border_style": "solid"
      },
      "text_properties": {
        "font_weight": "bold",
        "text_color": "#000000"
      }
    },
    {
      "id": "ln2_left",
      "type": "shape_node",
      "label": "LN",
      "geometry": {
        "shape": "rectangle"
      },
      "styling": {
        "fill_color": "#F7E7CE",
        "border_color": "#000000",
        "border_width": 1,
        "border_style": "solid"
      },
      "text_properties": {
        "font_weight": "normal",
        "text_color": "#000000"
      }
    },
    {
      "id": "ln3_left",
      "type": "shape_node",
      "label": "LN",
      "geometry": {
        "shape": "rectangle"
      },
      "styling": {
        "fill_color": "#F7E7CE",
        "border_color": "#000000",
        "border_width": 1,
        "border_style": "solid"
      },
      "text_properties": {
        "font_weight": "normal",
        "text_color": "#000000"
      }
    },
    {
      "id": "mlp_left",
      "type": "shape_node",
      "label": "MLP",
      "geometry": {
        "shape": "rectangle"
      },
      "styling": {
        "fill_color": "#B7D3F7",
        "border_color": "#000000",
        "border_width": 1,
        "border_style": "solid"
      },
      "text_properties": {
        "font_weight": "bold",
        "text_color": "#000000"
      }
    },
    {
      "id": "sw_msa",
      "type": "shape_node",
      "label": "SW-MSA",
      "sub_label": "V   K   Q",
      "geometry": {
        "shape": "rectangle"
      },
      "styling": {
        "fill_color": "#B7E1B7",
        "border_color": "#000000",
        "border_width": 1,
        "border_style": "solid"
      },
      "text_properties": {
        "font_weight": "bold",
        "text_color": "#000000"
      }
    },
    {
      "id": "ln1_right",
      "type": "shape_node",
      "label": "LN",
      "geometry": {
        "shape": "rectangle"
      },
      "styling": {
        "fill_color": "#F7E7CE",
        "border_color": "#000000",
        "border_width": 1,
        "border_style": "solid"
      },
      "text_properties": {
        "font_weight": "normal",
        "text_color": "#000000"
      }
    },
    {
      "id": "sw_mca",
      "type": "shape_node",
      "label": "SW-MCA",
      "sub_label": "V'   K'   Q'",
      "geometry": {
        "shape": "rectangle"
      },
      "styling": {
        "fill_color": "#B7E1B7",
        "border_color": "#000000",
        "border_width": 1,
        "border_style": "solid"
      },
      "text_properties": {
        "font_weight": "bold",
        "text_color": "#000000"
      }
    },
    {
      "id": "ln2_right",
      "type": "shape_node",
      "label": "LN",
      "geometry": {
        "shape": "rectangle"
      },
      "styling": {
        "fill_color": "#F7E7CE",
        "border_color": "#000000",
        "border_width": 1,
        "border_style": "solid"
      },
      "text_properties": {
        "font_weight": "normal",
        "text_color": "#000000"
      }
    },
    {
      "id": "ln3_right",
      "type": "shape_node",
      "label": "LN",
      "geometry": {
        "shape": "rectangle"
      },
      "styling": {
        "fill_color": "#F7E7CE",
        "border_color": "#000000",
        "border_width": 1,
        "border_style": "solid"
      },
      "text_properties": {
        "font_weight": "normal",
        "text_color": "#000000"
      }
    },
    {
      "id": "mlp_right",
      "type": "shape_node",
      "label": "MLP",
      "geometry": {
        "shape": "rectangle"
      },
      "styling": {
        "fill_color": "#B7D3F7",
        "border_color": "#000000",
        "border_width": 1,
        "border_style": "solid"
      },
      "text_properties": {
        "font_weight": "bold",
        "text_color": "#000000"
      }
    }
  ],
  "connections": [
    {
      "id": "conn_input_left",
      "from_id": "input_query",
      "to_id": "ln1_left",
      "label": {
        "text": "",
        "position": "middle_above",
        "text_color": "#000000"
      },
      "line_properties": {
        "type": "straight",
        "style": "solid",
        "color": "#000000",
        "width": 2
      },
      "arrowhead": {
        "start_type": "none",
        "end_type": "solid_triangle",
        "size": "medium"
      }
    },
    {
      "id": "conn_ln1_wmsa",
      "from_id": "ln1_left",
      "to_id": "w_msa",
      "label": {
        "text": "",
        "position": "middle_above",
        "text_color": "#000000"
      },
      "line_properties": {
        "type": "straight",
        "style": "solid",
        "color": "#000000",
        "width": 2
      },
      "arrowhead": {
        "start_type": "none",
        "end_type": "solid_triangle",
        "size": "medium"
      }
    },
    {
      "id": "conn_wmsa_ln2",
      "from_id": "w_msa",
      "to_id": "ln2_left",
      "label": {
        "text": "",
        "position": "middle_above",
        "text_color": "#000000"
      },
      "line_properties": {
        "type": "straight",
        "style": "solid",
        "color": "#000000",
        "width": 2
      },
      "arrowhead": {
        "start_type": "none",
        "end_type": "solid_triangle",
        "size": "medium"
      }
    },
    {
      "id": "conn_encoder_left_ln2",
      "from_id": "encoder_output_left",
      "to_id": "ln2_left",
      "label": {
        "text": "Encoder output",
        "position": "start",
        "text_color": "#000000"
      },
      "line_properties": {
        "type": "orthogonal",
        "style": "solid",
        "color": "#000000",
        "width": 1.5
      },
      "arrowhead": {
        "start_type": "none",
        "end_type": "solid_triangle",
        "size": "small"
      }
    },
    {
      "id": "conn_ln2_wmca",
      "from_id": "ln2_left",
      "to_id": "w_mca",
      "label": {
        "text": "",
        "position": "middle_above",
        "text_color": "#000000"
      },
      "line_properties": {
        "type": "straight",
        "style": "solid",
        "color": "#000000",
        "width": 2
      },
      "arrowhead": {
        "start_type": "none",
        "end_type": "solid_triangle",
        "size": "medium"
      }
    },
    {
      "id": "conn_wmca_ln3",
      "from_id": "w_mca",
      "to_id": "ln3_left",
      "label": {
        "text": "",
        "position": "middle_above",
        "text_color": "#000000"
      },
      "line_properties": {
        "type": "straight",
        "style": "solid",
        "color": "#000000",
        "width": 2
      },
      "arrowhead": {
        "start_type": "none",
        "end_type": "solid_triangle",
        "size": "medium"
      }
    },
    {
      "id": "conn_ln3_mlp_left",
      "from_id": "ln3_left",
      "to_id": "mlp_left",
      "label": {
        "text": "",
        "position": "middle_above",
        "text_color": "#000000"
      },
      "line_properties": {
        "type": "straight",
        "style": "solid",
        "color": "#000000",
        "width": 2
      },
      "arrowhead": {
        "start_type": "none",
        "end_type": "solid_triangle",
        "size": "medium"
      }
    },
    {
      "id": "conn_mlp_left_skip",
      "from_id": "mlp_left",
      "to_id": "ln1_left",
      "label": {
        "text": "",
        "position": "end",
        "text_color": "#000000"
      },
      "line_properties": {
        "type": "orthogonal",
        "style": "solid",
        "color": "#000000",
        "width": 1.5
      },
      "arrowhead": {
        "start_type": "none",
        "end_type": "none",
        "size": "small"
      }
    },
    {
      "id": "conn_ln2_left_skip",
      "from_id": "ln2_left",
      "to_id": "w_msa",
      "label": {
        "text": "",
        "position": "end",
        "text_color": "#000000"
      },
      "line_properties": {
        "type": "orthogonal",
        "style": "solid",
        "color": "#000000",
        "width": 1.5
      },
      "arrowhead": {
        "start_type": "none",
        "end_type": "none",
        "size": "small"
      }
    },
    {
      "id": "conn_ln3_left_skip",
      "from_id": "ln3_left",
      "to_id": "w_mca",
      "label": {
        "text": "",
        "position": "end",
        "text_color": "#000000"
      },
      "line_properties": {
        "type": "orthogonal",
        "style": "solid",
        "color": "#000000",
        "width": 1.5
      },
      "arrowhead": {
        "start_type": "none",
        "end_type": "none",
        "size": "small"
      }
    },
    {
      "id": "conn_input_right",
      "from_id": "input_query",
      "to_id": "ln1_right",
      "label": {
        "text": "",
        "position": "middle_above",
        "text_color": "#000000"
      },
      "line_properties": {
        "type": "straight",
        "style": "solid",
        "color": "#000000",
        "width": 2
      },
      "arrowhead": {
        "start_type": "none",
        "end_type": "solid_triangle",
        "size": "medium"
      }
    },
    {
      "id": "conn_ln1_swmsa",
      "from_id": "ln1_right",
      "to_id": "sw_msa",
      "label": {
        "text": "",
        "position": "middle_above",
        "text_color": "#000000"
      },
      "line_properties": {
        "type": "straight",
        "style": "solid",
        "color": "#000000",
        "width": 2
      },
      "arrowhead": {
        "start_type": "none",
        "end_type": "solid_triangle",
        "size": "medium"
      }
    },
    {
      "id": "conn_swmsa_ln2",
      "from_id": "sw_msa",
      "to_id": "ln2_right",
      "label": {
        "text": "",
        "position": "middle_above",
        "text_color": "#000000"
      },
      "line_properties": {
        "type": "straight",
        "style": "solid",
        "color": "#000000",
        "width": 2
      },
      "arrowhead": {
        "start_type": "none",
        "end_type": "solid_triangle",
        "size": "medium"
      }
    },
    {
      "id": "conn_encoder_right_ln2",
      "from_id": "encoder_output_right",
      "to_id": "ln2_right",
      "label": {
        "text": "Encoder output",
        "position": "start",
        "text_color": "#000000"
      },
      "line_properties": {
        "type": "orthogonal",
        "style": "solid",
        "color": "#000000",
        "width": 1.5
      },
      "arrowhead": {
        "start_type": "none",
        "end_type": "solid_triangle",
        "size": "small"
      }
    },
    {
      "id": "conn_ln2_swmca",
      "from_id": "ln2_right",
      "to_id": "sw_mca",
      "label": {
        "text": "",
        "position": "middle_above",
        "text_color": "#000000"
      },
      "line_properties": {
        "type": "straight",
        "style": "solid",
        "color": "#000000",
        "width": 2
      },
      "arrowhead": {
        "start_type": "none",
        "end_type": "solid_triangle",
        "size": "medium"
      }
    },
    {
      "id": "conn_swmca_ln3",
      "from_id": "sw_mca",
      "to_id": "ln3_right",
      "label": {
        "text": "",
        "position": "middle_above",
        "text_color": "#000000"
      },
      "line_properties": {
        "type": "straight",
        "style": "solid",
        "color": "#000000",
        "width": 2
      },
      "arrowhead": {
        "start_type": "none",
        "end_type": "solid_triangle",
        "size": "medium"
      }
    },
    {
      "id": "conn_ln3_mlp_right",
      "from_id": "ln3_right",
      "to_id": "mlp_right",
      "label": {
        "text": "",
        "position": "middle_above",
        "text_color": "#000000"
      },
      "line_properties": {
        "type": "straight",
        "style": "solid",
        "color": "#000000",
        "width": 2
      },
      "arrowhead": {
        "start_type": "none",
        "end_type": "solid_triangle",
        "size": "medium"
      }
    },
    {
      "id": "conn_mlp_right_skip",
      "from_id": "mlp_right",
      "to_id": "ln1_right",
      "label": {
        "text": "",
        "position": "end",
        "text_color": "#000000"
      },
      "line_properties": {
        "type": "orthogonal",
        "style": "solid",
        "color": "#000000",
        "width": 1.5
      },
      "arrowhead": {
        "start_type": "none",
        "end_type": "none",
        "size": "small"
      }
    },
    {
      "id": "conn_ln2_right_skip",
      "from_id": "ln2_right",
      "to_id": "sw_msa",
      "label": {
        "text": "",
        "position": "end",
        "text_color": "#000000"
      },
      "line_properties": {
        "type": "orthogonal",
        "style": "solid",
        "color": "#000000",
        "width": 1.5
      },
      "arrowhead": {
        "start_type": "none",
        "end_type": "none",
        "size": "small"
      }
    },
    {
      "id": "conn_ln3_right_skip",
      "from_id": "ln3_right",
      "to_id": "sw_mca",
      "label": {
        "text": "",
        "position": "end",
        "text_color": "#000000"
      },
      "line_properties": {
        "type": "orthogonal",
        "style": "solid",
        "color": "#000000",
        "width": 1.5
      },
      "arrowhead": {
        "start_type": "none",
        "end_type": "none",
        "size": "small"
      }
    }
  ],
  "layout_constraints": [
    {
      "type": "containment",
      "container_id": "container_left",
      "element_ids": [
        "ln1_left",
        "w_msa",
        "ln2_left",
        "w_mca",
        "ln3_left",
        "mlp_left"
      ],
      "padding": "medium"
    },
    {
      "type": "containment",
      "container_id": "container_right",
      "element_ids": [
        "ln1_right",
        "sw_msa",
        "ln2_right",
        "sw_mca",
        "ln3_right",
        "mlp_right"
      ],
      "padding": "medium"
    },
    {
      "type": "alignment",
      "alignment_type": "vertical_center",
      "element_ids": [
        "container_left",
        "container_right"
      ]
    },
    {
      "type": "distribution",
      "distribution_type": "horizontal_equal_spacing",
      "element_ids": [
        "container_left",
        "container_right"
      ]
    },
    {
      "type": "alignment",
      "alignment_type": "bottom_edge",
      "element_ids": [
        "input_query",
        "container_left",
        "container_right"
      ]
    }
  ]
}