\centering
\fontsize{8pt}{6pt}\selectfont % Adjust the font size as needed
\begin{algorithmic}[1]
\LineComment{ $\mathbf{T}$: Sequence Length}
\LineComment{ $\mathbf{o}^{[1:T]}$: Sequence of Observations | $o^{1}, o_{2}, \ldots, o^{T}$}
\LineComment{ $\mathbf{RandomTransformation}$:  Spatiotemporal Augmenter}
\LineComment{ $\mathbf{ConvEncoder}$: ResNet34-Based Encoder}
\LineComment{ $\mathbf{TransformerEncoder}$: Video-Swin Transformer with CSW-MSA}
\LineComment{ $\mathbf{ConvExpander}$: Expander Network for ResNet34-Based Representations}
\LineComment{ $\mathbf{TransformerExpander}$: Expander Network for Video-Swin Transformer Representations}
\Procedure{StRL}{$o^{[1:T]}$}
    \StateComment{Obtaining varying viewpoints of the same sequence of observations.}
    \State $\mathbf{x}^{[1:T]} \gets \mathbf{RandomTransformation}(o^{[1:T]})$
    \State $\mathbf{\hat{x}}^{[1:T]} \gets \mathbf{RandomTransformation}(o^{[1:T]})$
    \StateComment{Encoding observational data into state representations.}
    \State $\mathbf{s_{x}} \gets \mathbf{ConvEncoder}(x^{[1:T]})$ 
    \State $\mathbf{s_{\hat{x}}} \gets \mathbf{TransformerEncoder}(\hat{x}^{[1:T]})$
    \StateComment{Extend state representations into a higher-dimensional space.}
    \State $\mathbf{z} \gets \mathbf{ConvExpander}(s_{x})$
    \State $\mathbf{\hat{z}} \gets \mathbf{TransformerExpander}(s_{\hat{x}})$
    \StateComment{VICReg Loss: Optimize State Representation}
    \State $\mathbf{loss_{strl}} \gets \mathbf{VICReg}(z, \hat{z})$ 

\EndProcedure
\end{algorithmic}

