Note that within this section, we assume finiteness of the state space ($|\mathcal{S}|<\infty$) and that the transition operator has rank $\tilde{d}$ for all time steps, that is, $\textnormal{rank}(\mathcal{P}_{h}^{\star})=\tilde{d}$ for all $h\in[H]$. Recall that we denote \(\mathcal{X}_{h}^{\star} :=\{(s,a)\in\mathcal{S}\times\mathcal{A} |  d_{\mathcal{P}^{\star}, h}^{\pi^{\star}}(s,a) > 0\}\) as the set of state-action pairs reachable by the optimal policy at time step \(h\in[H]\). Similarly, we define \(\mathcal{X}_{h} :=\{(s,a)|\exists\pi:  d_{\mathcal{P}^{\star}, h}^{\pi}(s,a) > 0\}\) as the set of state-action pairs reachable by any policy at time step \(h\in[H]\). In the following, we provide the proofs for section \ref{sec:more_on_good_representations}.


%Let $\mathcal{X}_{h}:=\{(s,a)\in\mathcal{S}\times\mathcal{A}:d_{\mathcal{P}^{\star},h}^{\pi^{\star}}\}$ be the set of state-action pairs reachable by the optimal policy at time step $h$.

%\unisoftex*

Let us start by constructing a full rank factorization of \(\mathcal{P}_{h}^{\star}\). Note that \(\mathcal{P}_{h}^{\star}\) has rank \(\tilde{d}\) by assumption and hence we can select \(\tilde{d}\) columns of \(\mathcal{P}_{h}^{\star}\) such that they form a basis for the column space of \(\mathcal{P}_{h}^{\star}\). We collect them in a matrix \(\Phi\in\mathbb{R}^{|\mathcal{S}||\mathcal{A}|\times \tilde{d}}\), placing them in the same order as they appear in \(\mathcal{P}_{h}^{\star}\).  Now each column of \(\mathcal{P}_{h}^{\star}\) can be expressed as a linear combination of the columns of \(\Phi\) and we identify the row $\Phi_{sa,\cdot}$ with the feature $\phi(s,a)$. We denote \(\Psi\in\mathbb{R}^{\tilde{d}\times |\mathcal{S}|}\) as the matrix uniquely determined by the coefficients in the linear combinations such that \(\mathcal{P}_{h}^{\star} = \Phi\Psi\) and identify the column $\Psi_{.,s}$ with $\mu(s)$. Then,

\begin{lemma}
     Let $d\geq \tilde{d}$. Then, the following statements are equivalent:
        
        $(1)$ \(\textnormal{span}\{\mathcal{P}_{h}^{\star}(\cdot|s,a) |  (s,a)\in\mathcal{X}_{h}^{\star}\}=\textnormal{span}(\{\mathcal{P}_{h}^{\star}(\cdot|s,a)|(s,a)\in\mathcal{X}_{h}\})\)
        
        $(2)$ there exists a UniSOFT representation \(\langle\tilde{\phi}_{h}, \tilde{\mu}_{h}\rangle_{\mathbb{R}^{d}}=\mathcal{P}_{h}^{\star}\).
\end{lemma}
\begin{proof}
    
    \((1) \Rightarrow (2)\). By construction, $\textnormal{span}(\{\phi(s,a)|(s,a)\in\mathcal{X}_{h}^{\star})\}=\textnormal{span}(\{ \phi(s,a)|(s,a)\in\mathcal{X}_{h}\})$. After extending $\Phi$ and $\Psi$ with $d-\tilde{d}$ columns and rows of zero vectors, respectively, we see that \(\Phi\Psi\) is a UniSOFT representation of \(\mathcal{P}_{h}^{\star}\).

    \((2) \Rightarrow (1)\). Let $\tilde{\Phi}\tilde{\Psi}$ be a UniSOFT representation. Then, we easily observe that,
    \begin{align*}
        \textnormal{span}(\{\mathcal{P}_{h}^{\star}(\cdot|s,a)|(s,a)\in\mathcal{X}_{h}^{\star}\}) &=
        \textnormal{span}(\{\tilde{\phi}(s,a)^{T}\tilde{\Psi}|(s,a)\in\mathcal{X}_{h}^{\star}\}) \\ &\stackrel{(i)}{=} \textnormal{span}(\{\tilde{\phi}(s,a)^{T}\tilde{\Psi}|(s,a)\in\mathcal{X}_{h}\}) \\ &= \textnormal{span}(\{\mathcal{P}_{h}^{\star}(\cdot|s,a)|(s,a)\in\mathcal{X}_{h}\}),
    \end{align*}
    where $(i)$ follows from the UniSOFT property of $\tilde{\Phi}$.
\end{proof}

\begin{restatable}[Existence of good representations, Lemma \ref{lemma:unisoft_existance}]{lemma}{unisoftexformal}\label{lemma:unisoft_existance_formal}
Let $d\geq \tilde{d}$. Then, the following statements are equivalent:
        
        $(1)$ \(\textnormal{span}\{\mathcal{P}_{h}^{\star}(\cdot|s,a) |  (s,a)\in\mathcal{X}_{h}^{\star}\}=\mathbb{R}^{\tilde{d}}\) and $|\mathcal{X}_{h}^{\star}|\geq d$,
        
        $(2)$ there exists a non-redundant UniSOFT representation \(\langle\tilde{\phi}_{h}, \tilde{\mu}_{h}\rangle_{\mathbb{R}^{d}}=\mathcal{P}_{h}^{\star}\),

        $(3)$ if $d=\tilde{d}$, any representation \(\langle\phi_{h},\mu_{h}\rangle_{\mathbb{R}^{d}}=\mathcal{P}_{h}^{\star}\) is UniSOFT.
\end{restatable}

\begin{proof}   
    \((1) \Rightarrow (2)\). By construction, the rows of \(\Phi\) corresponding to elements in \(\mathcal{X}_{h}^{\star}\), i.e. the vectors $\{\phi(s,a)|(s,a)\in\mathcal{X}_{h}^{\star}\}$, form a basis of \(\mathbb{R}^{\tilde{d}}\). As $|\mathcal{X}_{h}^{\star}|\geq d$ holds, we can extend $\Phi$ with $d-\tilde{d}$ columns of unit vectors, such that $\Phi\in\mathbb{R}^{|\mathcal{S}||\mathcal{A}|\times d}$ and $\textnormal{span}\{\phi(s,a)|(s,a)\in\mathcal{X}_{h}^{\star}\}=\mathbb{R}^{d}$. Hence, after appending $d-\tilde{d}$ rows of zero vectors to $\Psi$, we see that \(\Phi\Psi\) is a non-redundant and UniSOFT representation of \(\mathcal{P}_{h}^{\star}\).

    \((2) \Rightarrow (1)\). First, note that $|\mathcal{X}_{h}^{\star}|\geq d$ must hold, in order to find $d$ features $\phi$ that span the feature space $\mathbb{R}^{d}$. Second, note that $\textnormal{rank}\{\mathcal{P}_{h}^{\star}(\cdot|s,a)|(s,a)\in\mathcal{X}_{h}^{\star}\} \leq \tilde{d}$ must hold, as otherwise $\textnormal{rank}(\mathcal{P}_{h}^{\star})>\tilde{d}$.
    We provide a proof by contradiction. Let $\tilde{\Phi}\tilde{\Psi}$ be any non-redundant UniSOFT representation. Suppose that $\textnormal{rank}\{\mathcal{P}_{h}^{\star}(\cdot|s,a)|(s,a)\in\mathcal{X}_{h}^{\star}\} <\tilde{d}$ holds. Since $\tilde{\Phi}$ is UniSOFT and non-redundant by assumption, we have that $\textnormal{rank}(\tilde{\Phi}\tilde{\Psi})=\textnormal{rank}(\tilde{\Psi})$, which implies that $\textnormal{rank}(\tilde{\Psi})=\tilde{d}$ must be true, to match the rank of $\mathcal{P}_{h}^{\star}$. However, this further implies that
    \[\tilde{d}\stackrel{(i)}{=}\textnormal{rank}\{\tilde{\phi}(s,a)^{T}\tilde{\Psi}|(s,a)\in\mathcal{X}_{h}\}=\textnormal{rank}\{\mathcal{P}_{h}^{\star}(\cdot|s,a)|(s,a)\in\mathcal{X}_{h}^{\star}\} \stackrel{(ii)}{<} \tilde{d}\]
    holds, where $(i)$ follows from $\tilde{\Phi}$ being UniSOFT and non-redundant and $(ii)$ follows by assumption. This is, of course, absurd.
    
    \((2) \Rightarrow (3)\). \textbf{(Case $d=\tilde{d}$)} Let \(\mathcal{P}_{h}^{\star}=\Phi^{\star}\Psi^{\star}\) such that the representation is non-redundant and UniSOFT. By Theorem \ref{thm:low_rank_factorization} there exists an invertible matrix \(R\in\mathbb{R}^{d \times d}\) such that \(\bar{\Phi}=\Phi^{\star}R\) and \(\bar{\Psi}=R^{-1}\Psi^{\star}\) for any other full rank factorization \(\mathcal{P}_{h}^{\star}=\bar{\Phi}\bar{\Psi}\). Therefore, rows in \(\Phi^{\star}\) that form a basis of \(\mathbb{R}^{d}\) also form a basis of \(\mathbb{R}^{d}\) in \(\bar{\Phi}\).
    
    \((3) \Rightarrow (1)\). The claim follows by the construction of \(\Phi\).
    
\end{proof}

\begin{corollary}\label{cor:unisoft_negative}
        Let $d\geq \tilde{d}$ and $\mathcal{Y}_{h}:=\{s'\in\mathcal{S}|\exists (s,a)\in\mathcal{S}\times\mathcal{A}:\mathcal{P}_{h}^{\star}(s'|s,a)>0\}$ be the set of states reachable by any other state (loops included). If there exists a state $s\in\mathcal{Y}_{h}$ s.t. $d_{\mathcal{P}^{\star},h+1}^{\pi^{\star}}(s) = 0$, then there exists no factorization $P_{h}^{\star}=\Phi\Psi$ such that $\Phi$ is UniSOFT and non-redundant, where $\Phi\in\mathbb{R}^{|S||A|\times d}$ and $\Psi\in\mathbb{R}^{d\times|S|}$ .
\end{corollary}


\begin{proof}
    First, note that \[\textnormal{rank}(\{\mathcal{P}_{h}^{\star}(\cdot|s,a) |  (s,a)\in\mathcal{X}_{h}^{\star}\}) < \textnormal{rank}(\{\mathcal{P}_{h}^{\star}(\cdot|s,a) |  (s,a)\in\mathcal{S}\times\mathcal{A}\})=\tilde{d}\]
    must be true, since by assumption, there exists a state-action pair $(\tilde{s},\tilde{a})$ such that $\mathcal{P}_{h}^{\star}(\cdot|\tilde{s},\tilde{a})\notin\textnormal{span}(\{\mathcal{P}_{h}^{\star}(\cdot|s,a) |  (s,a)\in\mathcal{X}_{h}\})$.
    Now, suppose that there exists a non-redundant UniSOFT representation. Then, by Lemma \ref{lemma:unisoft_existance}, we know that \[\textnormal{span}\{\mathcal{P}_{h}^{\star}(\cdot|s,a) |  (s,a)\in\mathcal{X}_{h}\}=\mathbb{R}^{\tilde{d}}\] must hold, which, however, contradicts the inequality derived above. 
\end{proof}

\begin{lemma}\label{lemma:linear_independence_perturbation}
    Suppose $(X, \Vert\cdot\Vert)$ is some normed space. Let \(\{v_{i}\}_{i=1}^{d}\) be a set of linear independent vectors in $X$. Then, there exists some \(\epsilon > 0\), such that any set of vectors \(\{u_{i}\}_{i=1}^{d}\) in $X$ with \(\Vert v_{i} - u_{i}\Vert \leq \epsilon\) for all $i\in[d]$ is linear independent as well. In particular, \(\epsilon < \min_{(\alpha_{1}, .., \alpha_{d}):\Sigma_{i}|\alpha_{i}|=1}\Vert\sum_{i=1}^{d}\alpha_{i}v_{i}\Vert/2\)
\end{lemma}

\begin{proof}
    We provide a proof by contradiction. Let \(S:=\{(\alpha_{1}, ..., \alpha_{d})\in\mathbb{R}^{d}|\sum_{i=1}^{d}|\alpha_{i}|=1\}\). Suppose \(\{u_{i}\}_{i=1}^{d}\) are linear dependent, that is, there exists some vector \((\alpha_{1}, ..., \alpha_{d})\in\mathbb   {R}^{d}\) such that
    \[0=\Vert\sum_{i=1}^{d}\alpha_{i}u_{i}\Vert.
    \]
    In particular, w.l.o.g. we can assume that \((\alpha_{1}, ..., \alpha_{d})\in S\). Let \(\epsilon < \min_{(\alpha_{1}, .., \alpha_{d}):\Sigma_{i}|\alpha_{i}|=1}\Vert\sum_{i=1}^{d}\alpha_{i}v_{i}\Vert/2\) and positive. But then, 
    \begin{align*}
        0=\Vert\sum_{i=1}^{d}\alpha_{i}u_{i}\Vert &= \Vert\sum_{i=1}^{d}\alpha_{i}v_{i} + \sum_{i=1}^{d}\alpha_{i}(u_{i} - v_{i})\Vert \\ &\stackrel{(i)}{\geq} 
        \Vert \sum_{i=1}^{d}\alpha_{i}v_{i}\Vert - \Vert\sum_{i=1}^{d}\alpha_{i}(u_{i} - v_{i})\Vert \\
        &\stackrel{(ii)}{>} 2\epsilon - \epsilon\sum_{i=1}^{d}|\alpha_{i}| = \epsilon,
    \end{align*}
    leads to a contradiction, where \((i)\) follow from the reverse triangle inequality and \((ii)\) follows from the Cauchy-Schwarz inequality.
\end{proof}

\unisoftexmiss*

\begin{proof}
    Since \(\mathcal{P}^{\star}\) is assumed to admit a non-redundant UniSOFT representation, by Lemma \ref{lemma:unisoft_existance}, there exist \(\tilde{d}\) state-action pairs in \(\mathcal{X}_{h}^{\star}\) such that their transition vectors in model \(\mathcal{P}_{h}^{\star}\) span \(\mathbb{R}^{\tilde{d}}\). Denote \(\tilde{\mathcal{X}}_{h}^{\star}\) as the set that contains those \(\tilde{d}\) state-action pairs. Let $\epsilon>0$ arbitrary such that,
    \[\epsilon < \min_{(\alpha_{1}, .., \alpha_{d}):\Sigma_{i}|\alpha_{i}|=1}\Vert\sum_{i=1}^{\tilde{d}}\alpha_{i}v_{i}\Vert_{\textnormal{TV}} \frac{d_{\textnormal{min}}^{\star}}{2}, \]
     where \(\{v_{i}\}_{i=1}^{\tilde{d}}=\{\mathcal{P}_{h}^{\star}(\cdot|s,a)|(s,a)\in\tilde{\mathcal{X}}_{h}^{\star}\}\).
    Then, by continuity of norms and integrals, we can find an \(\alpha^{\star}\)-approximate representation with induced transition operator \(\mathcal{P}\), such that for any \(h\in[H]\) and \((s',a')\in\tilde{\mathcal{X}}_{h}^{\star}\),
    \begin{align*}
    \epsilon &=
        \mathbb{E}_{(s,a)\sim d_{\mathcal{P}^{\star},h}^{\pi^{\star}}}[\Vert\mathcal{P}_{h}(\cdot|s,a) - \mathcal{P}_{h}^{\star}(\cdot|s,a)\Vert_{\textnormal{TV}}] \\ &= \sum_{(s,a)\in\mathcal{S}\times\mathcal{A}}d_{\mathcal{P}^{\star},h}^{\pi^{\star}}(s,a)\Vert\mathcal{P}_{h}(\cdot|s,a) - \mathcal{P}_{h}^{\star}(\cdot|s,a)\Vert_{\textnormal{TV}} \\
        &\geq  d_{\textnormal{min}}^{\star}\Vert\mathcal{P}_{h}(\cdot|s',a') - \mathcal{P}_{h}^{\star}(\cdot|s',a')\Vert_{\textnormal{TV}}.
    \end{align*}
     Then, by Lemma \ref{lemma:linear_independence_perturbation}, the vectors in \(\{\mathcal{P}_{h}(\cdot|s,a)|(s,a)\in\tilde{\mathcal{X}}_{h}^{\star}\}\) are linear independent and, by Lemma \ref{lemma:unisoft_existance_formal}, there exists a non-redundant UniSOFT representation inducing \(\mathcal{P}\). In particular, the existence of one good representation implies the existence of an infinite number of good representations. As $\epsilon$ was chosen arbitrarily, we conclude the proof.
     
\end{proof}
