% \begin{figure}[ht]
% \centering
% \input{Contents/figs/linear_iiefg_0127.pdf}
% \caption{Experiments on .}\label{fig:lower_bound_instance}
% \end{figure}\noindent

% \begin{figure*}
% \centering
% \includegraphics[width=0.9\linewidth]{Contents/figs/linear_iiefg_0127.pdf}
% \caption{\label{fig:exp}This frog was uploaded via the file-tree menu.}
% \end{figure*}

% \begin{figure*}
% \centering
% \includegraphics[width=0.9\linewidth]{Contents/figs/linear_iiefg_0127.pdf}
% \caption{\label{fig:exp}Experiment results of \LSOMD, \LSFTRL and baseline methods on two linear POMG environments. The curves show the value of Eq. \eqref{equation:regret} against the number of episodes and are averaged over $10$ different seeds, where the shaded areas denote the $1$ standard deviation.
% }
% \end{figure*}

% \begin{figure*}
% \centering
% \includegraphics[width=0.95\linewidth]{Contents/figs/linear_iiefg_0520.pdf}
% % \vspace{-0.4cm}
% \caption{\label{fig:exp}Experiment results of baseline methods and \LSFTRL on two linear IIEFG environments. The curves show the value of Eq. \eqref{equation:regret} against the number of episodes and are averaged over $10$ different seeds, where the shaded areas denote the $1$ standard error.
% }
% \end{figure*}

% \vspace{-0.4cm}
\section{Experiments}\label{sec:exp}
% \vspace{-0.1cm}
This section presents the empirical evaluations of our \LSFTRL algorithm as well as previous methods.
\footnote{Codes of the experiments are available at \url{https://github.com/AnonymousXX-XX/Linear-IIEFG}.}
% \footnote{Please see the supplementary material for all the codes of experiments. We adopt the codes of all the baselines implemented by \citet{Fiegel2023adapting}: \url{https://github.com/anon17893/IIG-tree-adaptation}}

% \begin{figure*}
% \centering
% \includegraphics[width=0.95\linewidth]{Contents/figs/linear_iiefg_0520.pdf}
% % \vspace{-0.4cm}
% \caption{\label{fig:exp}Experiment results of baseline methods and \LSFTRL on two linear IIEFG environments. The curves show the value of Eq. \eqref{equation:regret} against the number of episodes and are averaged over $10$ different seeds, where the shaded areas denote the $1$ standard error.
% }
% \end{figure*}

% \vspace{-0.2cm}
\paragraph{Environments} 
% Since we are not aware of existing POMGs with linear function approximation, 
We construct two $A$-ary tree IIEFG environments with linear structures,
% \footnote{Our environments are implemented based on OpenSpiel library \citep{OpenSpiel}.} 
both of which exactly follow from the construction of the hard-to-learn IIEFG instances used to prove the regret lower bound (please see Appendix \ref{app:sec:lower_proof} for details of such instances).
The IIEFG instance in the first environment involves $H=3$ steps and $A=10$ actions at each infoset of the max-player (hence there are $1110$ infoset-action pairs of the max-player in total), 
while the second IIEFG instance has $H=5$ steps and $A=5$ actions at each infoset of the max-player (hence $3905$ infoset-action pairs in total). 
In both environments, the rewards for all state-action pairs $(s,a)\in\bigcup_{h\in[H-1]}\gS_{h}\times\gA$ are set to be $0$ and the mean of the reward for each $(s,a)\in\gS_H\times\gA$ is set as $\bar{r}_H(s,a)=\langle\vphi(s,a), \vtheta\rangle$. Particularly, the feature $\vphi(s,a)$ has dimension $d=10$, with each dimension first uniformly sampled from $[-1,1]$ and then normalized by its $L^2$-norm, and the construction of $\vtheta$ is given by the same procedure. 
% \zhao{TBA: compute source}
% Both the two POMG environments are implemented based on OpenSpiel \citep{OpenSpiel}.

% We consider an $A$-ary tree POMG instance, in which 
% \begin{itemize}
%     \item $B=1$ so that there is actually no opponent effectively (and hence the dependence on the opponent's action $b$ is omitted in what follows);
%     \item $X_h=S_h=A^{h-1}$ for all $h\in[H]$, which means that $\gX_h=\gS_h$ and there is actually no partial observability;
%     \item $r_h(s,a)=0$ for all $h\in[H-1]$, and $r_H(s,a)$ is a reward sampled from Bernoulli distribution $\operatorname{Ber}(\bar{r}_H(s,a))$ with mean $\bar{r}_H(s,a)=\langle\vphi(s,a), \vtheta\rangle$.
% \end{itemize}
% By the construction, there exists a unique action sequence $(a_1,\ldots,a_{h-1})$ that determines $s_h$ (and hence $x_h$) and the transition is deterministic.


% , while there is only one pair of infoset-action for the min-player at each state. Further, the reward $\bar{r}_h\left(s_h, a_h, b_h\right)$
% \vspace{-0.2cm}
\paragraph{Baselines} We incorporate the algorithms in most related works as baselines, including \IXOMD \citep{kozuno2021learning}, \BalancedOMD \citep{bai2022nearoptimal}, and \BalancedFTRL, \AdaptiveFTRL \citep{Fiegel2023adapting}.
\footnote{We adopt the codes of all the baselines implemented by \citet{Fiegel2023adapting}: \url{https://github.com/anon17893/IIG-tree-adaptation}.} 
Following \citet{Fiegel2023adapting}, we conduct a (logarithmic) grid search on the learning rates of each algorithm in each environment.
% , and take the base IX parameter was taken as 1/20 of this global learning rate

% \vspace{-0.2cm}
\paragraph{Results} As shown in Figure \ref{fig:exp}, the baseline methods except \AdaptiveFTRL have similar performance in both environments and \AdaptiveFTRL converges relatively slower than other baselines. 
Further, \LSFTRL outperforms all the baselines with significantly faster convergence rates on both environments, due to the leverage of the linear structures of the games.
Besides, all the algorithms empirically suffer more regret in the second environment than in the first one, since it involves a longer horizon length $H$ and more infoset-action pairs to learn than the first environment. 
% Further, \LSFTRL outperforms \LSOMD in the first environment but is slightly outperformed by \LSOMD in the second one and both \LSFTRL and \LSOMD enjoy a faster convergence than all the baselines, due to the leverage of the linear structures of the games.
% Besides, all the algorithms empirically suffer more regret in the second environment than in the first one, since the second environment involves a longer horizon length $H$ and more infoset-action pairs to learn than the first environment. 