\begin{algorithm*}
\caption{ScalaBL Training Procedure}
\label{alg:scalabl}
\begin{algorithmic}[1]
\Require Pretrained weights $\W_0 \in \R^{n\times d}$, fine-tuning dataset $\D$, prior distribution $P(\s)$
\Require Number of training epochs $E$, batch size $B$, learning rate $\eta$
\Require KL divergence weight $\beta$, variance initialization parameter $\rho$
\State $\mathbf{Z} \sim \mathcal{U}(-\sqrt{\frac{1}{d}},\sqrt{\frac{1}{d}})$ \Comment{Sample random matrix}
\State $\textbf{\_}, \s_{\mu}, \A \gets \text{SVD}(\mathbf{Z})$  \Comment{Initialize using SVD}
\State $\B \gets 0$ \Comment{Initialize as in LoRA}
\State $\s_{\sigma} \sim \mathcal{U}(\frac{\rho}{\sqrt{2}}, \rho)$ \Comment{Initialize as in BLoB}
\For{epoch $e \gets 1 \dots E$}
\For{batch $\D_t \sim \D$}
\State $\eps_t \sim \N(0,1)$ \Comment{Sample noise}
\State $\W_t \gets \W_0 + \B \diag{\s_{\mu}+\s_{\sigma} \cdot \eps_t}\A $ \Comment{Reparameterization trick (Equation \ref{eq:reparam})}
\State $\mathcal{L}_t \gets -\frac{1}{B} \log P(\D_t|\W_t) + \beta D_{KL}(\N(\s_{\mu}, \diag{\s_{\sigma}})||P(\s))$ \Comment{Compute ELBO}
\State $\btheta \gets [\s_{\mu},\s_{\sigma,},\A,\B]$ \Comment{Collect trainable parameters}
\State $\btheta \gets \btheta - \eta \frac{\partial \mathcal{L}_t}{\partial \btheta}$\Comment{Compute gradient and update parameters}
\EndFor
\EndFor
\end{algorithmic}
\end{algorithm*}