


\begin{figure}[t!]
    \centering
    \includegraphics[width=0.99\linewidth]{figures/pairwise_sims_satclip.pdf}
    \caption{\textbf{Pairwise cosine similarity of SatCLIP embeddings used to form auxiliary ViT token: Frozen (Left) vs Fine-Tuned (Right). } On the BigEarthNetv2.0 land cover classification task, the fine-tuned SatCLIP token exhibits significantly greater pairwise disagreement between countries compared to the frozen token in countries covered by the train-split of BigEarthNetv2.0.}
    \label{fig:pairwise_sims}
\end{figure}
% \begin{figure}[t]
%     \centering
%     \includegraphics[width=\textwidth]{figures/satclip_embeddings_v8.pdf}%
%     \quad
%     \caption{\textbf{Qualitative result: Frozen vs Fine-Tuned SatCLIP auxiliary ViT token on the BigEarthNetv2.0 land-cover classification task:}  
%       Maps: PCA embeddings of the SatCLIP tokens: frozen (left) vs finetuned (right) on 10 European countries covered by the BigEarthNetv2.0 dataset.}
%     \label{fig:ft_satclip}
% \end{figure}

% \begin{figure*}[!t]
%   \centering
%   % first panel, one column wide
%   \begin{subfigure}[t]{0.49\textwidth}
%     \centering
%     \includegraphics[width=\linewidth]{figures/pairwise_sims_satclip.pdf}
%     \caption{\textbf{Pairwise cosine similarity of SatCLIP embeddings: Frozen (Left) vs Fine-Tuned (Right) SatCLIP auxiliary ViT token: } On the BigEarthNetv2.0 land cover classification task, the fine-tuned SatCLIP token exhibits significantly greater pairwise disagreement between countries compared to theand loses global context preserved by a frozen SatCLIP encoder.}
%     \label{fig:pairwise_sims}
%   \end{subfigure}%
%   \hfill
%   % second panel, one column wide
%   \begin{subfigure}[t]{0.49\textwidth}
%     \centering
%     \includegraphics[width=\linewidth]{figures/satclip_embeddings_v8.pdf}
%     \caption{\textbf{Frozen vs Fine-Tuned SatCLIP auxiliary ViT token on the BigEarthNetv2.0 land-cover classification task:}  
%       Maps: PCA embeddings of the SatCLIP tokens: frozen (left) vs finetuned (right) on 10 European countries covered by the BigEarthNetv2.0 dataset.}
%     \label{fig:ft_satclip}
%   \end{subfigure}
%   %
%   \caption{%
%     \textbf{Qualitative Results with \texttt{TOKEN-FUSE} with Frozen and Finetuned SatCLIP auxiliary tokens: }  
%     (\subref{fig:pairwise_sims}) Pairwise similarity matrices between SatCLIP embeddings spanning 10 European countries spanning the spatial extent of the BigEarthNetv2.0 dataset.  
%     (\subref{fig:ft_satclip}) Geographic PCA maps of frozen vs finetuned SatCLIP auxiliary tokens fused with \texttt{TOKEN-FUSE}.%
%   }
%   \label{fig:satclip_comparison}
% \end{figure*}