\label{pseudocode}

\begin{algorithm}
\caption{Sparse IRM with Iterative Hard-Thresholding}
\label{euclid}
    \begin{algorithmic}[1]
    \State \textbf{Input:} target nonzero features $d_\inv < d$, $\cD = \{\cD^e\}_{e\in \cE}$ and $\cD^e \coloneqq \{(\vx^e_i, y_i )\}^{n_e}_{i=1}$.
    \State Initialize weights $(\vv, \Phi)$.
    \For {training iteration $t = 1, 2, \cdots, T$}
        % \If {$t \mod P = 0$}ll l
        \State $\vv^{t+1} \gets \text{proj}_{d_\inv}(\vv^t - \eta \nabla_{\vv} \hat \cL(\vv^t))$
        \State $\Phi^{t+1} \gets \Phi^t - \eta \nabla_{\Phi} \hat \cL(\Phi^t) $
        \State $t = t+1$
        % \Else
        %     $\vv^t \gets Proj_{d_\inv}(\vv^t - \eta \nabla_{\vv} \hat \cL(\vv^t))$, $t = t+1$
        % \EndIf
    \EndFor
    % \Procedure {SparseIRM+IHT}{$\vx$}
    % \If {$i\geq maxval$}
    %     \State $i\gets 0$
    % \Else
    %     \If {$i+k\leq maxval$}
    %         \State $i\gets i+k$
    %     \EndIf
    % \EndIf
    \end{algorithmic}
\end{algorithm}
% The theory applies to the linear case in which $\Phi (\vx^e) = \vx^e$. However, this algorithm can be applied more generally to more complex models
% \end{algorithm}
