\begin{algorithm}[h]
% \caption{AGMA extension to Lion}\label{alg:agma-lion}
\caption{Lion-PMA}\label{alg:agma-lion}
\scriptsize
% \SetKwInOut{Input}{input}\SetKwInOut{Output}{output}
% \Input{$\gamma$(lr), $\beta_1, \beta_2$(betas), $\theta_0$(params), $f(\theta)$(objective), $\lambda$(weight decay), $K$(accumulate iterations)}
% \KwData{$m_0\gets 0$}
% % \Output{$\theta_t$}
% \BlankLine
% $\mathcal{S}=\varnothing$\;
\For{$t=1\to \ldots$}{
    $g_t\gets \nabla_{\theta}f_t(\theta_{t-1})$\;
    $\tau\gets t\%K$\;
    \If{$\tau = 0$ and $t>0$}{
        $\gamma_t\gets\gamma$\;
        $u_t\gets \beta_1 m_{t-1} + (1-\beta_1)g_t/K$\;
        $u_t\gets \mathrm{sign}(u_t)$\;
    }
    \Else{
        $\gamma_t\gets\gamma/K$\;
        $u_t\gets \frac{\tau}{\tau+1} m_{t} + \frac{1-\beta_1}{\tau+1}g_t$\;
        $u_t\gets \mathrm{sign}(u_t)$\;
        $m_t\gets \frac{\tau}{\tau+1} v_{t} + \frac{1-\beta_2}{\tau+1}g_t^2$\;
    }
    % $\hat{m}_t\gets m_t/(1-\beta_1^{t//K})$\tcp*[c]{Debias. "//" refers to division with remainder.}
    % $\sqrt{\hat{v}_t}\gets \sqrt{v_t/(1-\beta_2^{t//K})}+\epsilon$\;
    $\hat{\theta}_t\gets (1-\gamma_t\lambda)\theta_{t-1}$\;
    $\theta_t = \hat{\theta}_t - \gamma_t u_t$\;
    \If{$\tau = 0$ and $t>0$}{
        $\hat{m}_t\gets K\hat{m}_t$\;
    }
}
\Return $\theta_t$\;
\normalsize
\end{algorithm}
