\documentclass{midl}

\usepackage{booktabs}
\usepackage{multirow}
\usepackage{longtable}

\jmlryear{2025}
\jmlrworkshop{Full Paper -- MIDL 2025}
\jmlrvolume{-- nnn}
\editors{Accepted for publication at MIDL 2025}

\title[Improving Brain Disorder Diagnosis with Advanced Brain Function Representation]{Improving Brain Disorder Diagnosis with Advanced Brain Function Representation and Kolmogorov-Arnold Networks}

\midlauthor{\Name{Tyler Ward} \Email{tyler.ward@uky.edu}\\
\Name{Abdullah-Al-Zubaer Imran} \Email{aimran@uky.edu}\\
\addr University of Kentucky, Lexington, KY 40506, USA
}

\begin{document}

\maketitle

\begin{abstract}
Quantifying functional connectivity (FC), a vital metric for the diagnosis of various brain disorders, traditionally relies on the use of a pre-defined brain atlas. However, using such atlases can lead to issues regarding selection bias and lack of regard for specificity. 
Addressing this, we propose a \textit{novel} transformer-based classification network (ABFR-KAN) with effective brain function representation to aid in diagnosing autism spectrum disorder (ASD). ABFR-KAN leverages Kolmogorov-Arnold Network (KAN) blocks replacing traditional multi-layer perceptron (MLP) components. Thorough experimentation reveals the effectiveness of ABFR-KAN in improving the diagnosis of ASD under various configurations of the model architecture. Our code is available at \url{https://github.com/tbwa233/ABFR-KAN}.
\end{abstract}

\begin{keywords}
ASD, Brain MRI, classification, functional connectivity, Kolmogorov-Arnold Network, Transformer
\end{keywords}

\section{Introduction}
Diagnoses of autism spectrum disorder (ASD) are becoming increasingly prevalent across the world \cite{ge2024prevalence}. As such, research into effective methods to improve the diagnosis of this brain disorder is vital. Traditional methods of diagnosing ASD have relied on the analysis of functional connectivity (FC) in the brain, quantified from blood-oxygen-level-dependent (BOLD) signals obtained during resting-state functional magnetic resonance imaging (rs-fMRI), but this approach has several flaws. 

FC analysis performed in this matter typically relies on regions-of-interest (ROIs) produced by registering a subject's brain with a pre-defined atlas. This approach can lead to subjective selection bias, disregard for individual specificity, and a lack of interaction between brain regions and FC analysis \cite{liu2024randomizing}. Despite research into various methods of addressing these issues, such as data-driven \cite{jensen2024addressing}, individualized \cite{li2022atlas}, and multi-atlas \cite{xu2024multi} setups, a definitive resolution to all of the challenges associated with atlas-based parcellation techniques has not yet emerged.

Given that one of the largest drawbacks of traditional FC analysis is the high dimensionality and complexity of the functional representations, solutions that address this particular issue are desired. Recently, Kolmogorov-Arnold Networks (KANs) \cite{liu2024kan} have emerged as an alternative to traditional multi-layer perceptrons (MLPs), leveraging learnable activation functions on edges rather than fixed activation functions on nodes. Inspired by the Kolmogorov-Arnold representation theorem, KANs replace conventional weight matrices with univariate functions parameterized as splines, offering improved expressiveness and flexibility in function approximation. This design enables KANs to model complex transformations more efficiently while maintaining better interpretability and scaling properties compared to MLPs. Additionally, KANs have demonstrated potential in computer vision-related tasks \cite{cheon2024demonstrating, pal2024understanding}. Based on this, we hypothesize that replacing MLPs with KANs in brain disorder diagnosis modes can better capture intricate relationships in FC patterns, leading to more robust and individualized ASD diagnoses.

In this paper, we propose \textit{ABFR-KAN}, a novel workflow for brain disorder diagnosis. Building upon state-of-the-art methods, we propose novel sampling and function representation strategies and investigate the impact of KANs under various configurations in transformer networks. Our specific contributions are summarized as:
\begin{itemize}
  \item Randomized anchor patch selection, which helps avoid structural bias, boosts individual-specific representations, and increases robustness and generalizability by reducing dependence on atlas-based parcellation.
  \item Iterative sampling of patches from a subject's brain, aimed to create multiple function representations for the same subject, introducing variance while preserving meaningful FC information. 
  \item Extensive experimentation demonstrating the effectiveness of replacing traditional MLP components in two transformer networks (ViT and DeiT).
\end{itemize} 

\begin{figure}
\centering
\includegraphics[width=\linewidth]{figures/abfrkanarch.png}
\caption{Proposed ABFR-KAN model: The transformer network is fed fMRI-derived patches that are embedded with spatial position information and passed through the encoder. The binary classification prediction (control or ASD) is produced by the KAN head. The encoder is in the ViT style, with a KAN block replacing the MLP block. The KAN block is similar to an MLP block but with DropPath regularization and KAN layers to handle nonlinear transformations. In the KAN layer, input $I$ passes through multiple learnable nonlinear functions ($\phi_n$) that are combined in a structured manner to form the final transformations. In our KAN implementation, a reflectional switch function is used as a basis function in the KAN layer.}
\label{fig:arch}
\end{figure}

\section{Related Work}
There generally exist three different setups for brain disorder analysis using an atlas: single-atlas, multi-atlas, and individual-specific atlas. An example of a model constructed from a single-atlas approach is BrainGNN \cite{li2021braingnn}, a graph neural network (GNN) based on the Desikan-Killiany \cite{desikan2006automated} atlas that is capable of analyzing fMRI images and discovering neurological biomarkers. Another group employed multiple atlases \cite{kennedy1998gyri, craddock2012whole, rolls2020automated} to build a spectral GNN that enabled the identification of potential disease-related patterns associated with major depressive disorder \cite{lee2024spectral}. PFC-DBGNN-
STAA \cite{cui2023personalized} was proposed as a method for identifying mild cognitive impairment (MCI) based on individual-specific FC features.

As an alternative to using pre-defined atlases for ROI parcellation, several data-driven approaches have been proposed. For example, attention-guided hybrid deep learning networks have been used to localize discriminative brain regions automatically for Alzheimer's disease and MCI diagnosis \cite{lian2020attention}. RandomFR \cite{liu2024randomizing} is an innovative approach for brain function representation and operates via a randomized selection of brain patches as well as the use of novel function and position description methods. RandomFR serves as the main inspiration for the research presented in this paper.

Given the early stage of research into KANs, there exist few studies on the use of KANs for similar tasks that we propose in this paper. One study explores the use of KANs as deep feature extractors for MRI reconstruction, finding that incorporating Chebyshev polynomials into KANs \cite{ss2024chebyshev} led to both improved convergence and MRI reconstruction quality based on total variation and peak signal-to-noise ratio \cite{penkin2024kolmogorov}. Another study demonstrated the usefulness of KANs for chemical exchange saturation transfer (CEST) MRI analysis of the human brain \cite{wang2024cest}, and another found that a model integrating the learnable spline activation functions of KAN into convolution layers, ConvKAN, outperformed traditional convolutional neural network (CNN) and graph convolution network (GCN) approaches at classifying Parkinson's disease \cite{patel20242d}. To our knowledge, we are the first to investigate the efficacy of KANs for FC analysis and ASD diagnosis.

\section{Methods}
In this work, we follow the workflow structure described by \cite{liu2024randomizing} for brain function representation, which is divided into three stages: sampling, function representation, and transformer network. In the sampling stage, anchor patches are selected from the gray matter region of rs-fMRI scans. Each patch is defined by its average BOLD signal and its spatial position in the brain. In the function representation stage, sampled patches are characterized using a combination of function descriptions measuring FC and position descriptions, which encode their spatial locations in a standardized brain coordinate system. Function descriptions are computed as the Pearson correlation between the BOLD signal of the sampled patch and the anchor patches, forming a functional representation matrix. In the transformer network, embeddings based on the fusion of the function and position descriptions are passed to a transformer network for classification.\

\subsection{Random Anchor Selection}
\label{subsec:random}
Previous works \cite{liu2024randomizing} have explored selecting anchor patches using a grid-based method, where a grid of coordinates to sample anchor patches from is constructed from ROIs in a unified parcellation of gray matter, along with stride and offset values. This approach, which has proven effective, does limit models in terms of flexibility and adaptability because the same grid is used for every subject in a dataset, imposing a structural bias and a disregard for individual specificity, as a subject may have functionally distinct regions that do not align well with predefined anchor patches.

To address this flaw, we propose an alternate approach, a randomized one, for anchor patch selection. Our method works as follows. First, bounding boxes encompassing ROIs in the gray matter are calculated, and the starting coordinates are randomly sampled using:
\begin{equation}
x_{start} \sim \text{Uniform}(x_{min}, x_{max} - p_s),
\end{equation}
where $p_s$ is the patch size. Similar equations are used to calculate $y_{start}$ and $z_{start}$. Once the anchor patches are sampled, they are validated to ensure sufficient overlap with the gray matter mask, $g_m$:
\begin{equation}
\sum(p_m \cdot g_m) \geq \tau,
\end{equation}
where $p_m$ is the patch mask and $\tau$ is a threshold. If the condition is not met, resample until a valid sample is found. This process repeats until the desired number of anchor patches is sampled. A visualization of both the grid-based anchor selection and our random anchor selection can be seen in Figure~\ref{fig:anchor}.

\subsection{Iterative Patch Sampling}
\label{subsec:iterative}

Given an fMRI volume \( V \in \mathbb{R}^{T \times X \times Y \times Z} \), where \( T \) represents the number of timepoints and \( X, Y, Z \) denote spatial dimensions, we define a grey matter mask \( M \in \{0,1\}^{X \times Y \times Z} \), where \( M(i,j,l) = 1 \) if the voxel belongs to grey matter. To extract representative features, we randomly sample a set of candidate patch centers \( S = \{ (x_k, y_k, z_k) \}_{k=1}^{N} \) from a uniform distribution within the fMRI volume.

Each selected center is used to define a cubic patch, $P_k$, of size \( p \), extending symmetrically around the center along all three spatial dimensions. The patch is constrained within the grey matter by applying an element-wise mask with \( M \), ensuring only voxels, $V_k$, belonging to grey matter are retained.

For each valid patch, we compute the mean fMRI signal across all timepoints:

\begin{equation}
\bar{V}_k = \frac{\sum_{i,j,l} V_k(i,j,l)}{\sum_{i,j,l} P_k(i,j,l) \cdot M(i,j,l)}.
\end{equation}

\noindent Additionally, the patch's spatial position is normalized relative to the fMRI volume dimensions.

To establish functional connectivity relationships, we compute correlation coefficients between sampled patches and predefined anatomical anchor regions. Each anchor region \( A_i \) is represented as a binary mask corresponding to a known brain region. The mean signal for each anchor is computed as:

\begin{equation}
\bar{V}_{A_i} = \frac{\sum_{i,j,l} V(i,j,l) \cdot \mathbb{1}[A(i,j,l) = i]}{\sum_{i,j,l} \mathbb{1}[A(i,j,l) = i]}.
\end{equation}

Pairwise functional connectivity (FC) correlations between patch signals and anchor regions are computed using the Pearson correlation coefficient, $C_{ij} = \text{corr}(\bar{V}_i, \bar{V}_{A_j})$.

To capture fMRI features at varying spatial resolutions, the process is repeated across multiple patch sizes (\( p \in \{8, 12, 16\} \)). Each iteration samples new patches, extracts features, and computes FC matrices. The final aggregated FC matrix is obtained by averaging correlation values across iterations, and the final aggregated feature matrix consists of concatenated patch feature vectors across all iterations, capturing local brain activity features across multiple scales.

This iterative approach enhances the robustness of extracted fMRI features by reducing the impact of single-scale patch selection biases and improving anatomical coverage.

\subsection{Transformer Network}
In this work, we explore KAN integration in two popular transformer networks: vision transformer (ViT) \cite{dosovitskiy2020image} and data-efficient image transformer \cite{touvron2021training}. A visual depiction of our implementation of the KAN-based ViT is shown in Figure ~\ref{fig:arch}. Both networks have two main locations that traditionally are constructed with MLP components: in the encoder and the classification head. We experiment with three different configurations of KAN integration: KAN-KAN, where the MLPs in both the encoder and classification head are replaced with KANs, KAN-MLP, where only the MLP in the encoder is replaced, and MLP-KAN, where the opposite is true.

\section{Experiments and Results}
\subsection{Data}
We evaluated our proposed ABFR-KAN using pre-processed neuroimaging data from the Autism Brain Imaging Data Exchange (ABIDE) \cite{craddock2013neuro, di2014autism}. The preprocessed ABIDE repository contains data collected from a total of 1,112 patients at various sites, preprocessed using a variety of methods. To initially train our ABFR-KAN model, we selected data from 171 patients that were collected from the New York University (NYU) Langone Medical Center site that had been processed using the Data Processing Assistant for Resting-State fMRI (DPARSF) \cite{yan2010dparsf} method. In total, 64 male (age range: 7-39) and 9 female (age range: 10-38) patients with ASD diagnoses were selected, along with 72 male (age range: 6-31) and 26 female (age range: 8-29) patients from the control group. To further validate the performance of ABFR-KAN on other ABIDE sites, as well as to explore its cross-domain performance, we select additional data from 110 patients collected from the University of Michigan (UM) Functional MRI Center. This time, the dataset was balanced, containing information from 55 patients in the control group (46 male aged 8 to 18, 9 female aged 9 to 18) and 55 in the ASD group (38 male aged 8 to 18, 17 female aged 9 to 19).

\subsection{Implementation Details}
The ABFR-KAN model was implemented with PyTorch and trained on on a \emph{Intel (R) Xeon (R) w7-2475X, 2600MHz} machine with a dual \emph{NVIDIA A4000X2} GPU (32GB). A 5-fold cross-validation strategy was used to assess the model's performance. For classification, we minimize the cross-entropy loss. The model was trained for 100 epochs, using the Adam optimizer with a learning rate of 0.0009. The model's performance is gauged using traditional metrics for classification tasks, namely accuracy (ACC), area under curve (AUC), F1 score (F1), precision (PRE), sensitivity (SEN), and specificity (SPE).

\begin{table}[t]
\centering
\caption{ABIDE NYU Site Performance: Classification performance of ABFR-KAN under different anchor selection and patch sampling strategies. The best and second best results are \textbf{bolded} and \underline{underlined}, respectively.}
\label{tab:abfr-kan-NYU}
\resizebox{\linewidth}{!}{
\begin{tabular}{cccccccc}
\toprule
\multicolumn{8}{c}{\textbf{Grid-based anchor selection, random patch sampling}} \\
\midrule
Backbone & Model & ACC & AUC & F1 & PRE & SEN & SPE \\
\midrule
\multirow{4}{*}{ViT} 
& MLP-MLP & \underline{0.731$\pm$0.064} & 0.695$\pm$0.114 & \underline{0.778$\pm$0.054} & 0.745$\pm$0.057 & \underline{0.899$\pm$0.062} & \underline{0.718$\pm$0.094} \\
& KAN-KAN & 0.708$\pm$0.092 & \underline{0.706$\pm$0.080} & 0.768$\pm$0.039 & \underline{0.762$\pm$0.145} & 0.867$\pm$0.121 & 0.692$\pm$0.124 \\
& KAN-MLP & 0.714$\pm$0.049 & \textbf{0.718$\pm$0.089} & 0.771$\pm$0.057 & \textbf{0.765$\pm$0.108} & 0.866$\pm$0.093 & 0.704$\pm$0.068 \\
& MLP-KAN & \textbf{0.737$\pm$0.050} & 0.686$\pm$0.096 & \textbf{0.785$\pm$0.050} & 0.741$\pm$0.070 & \textbf{0.910$\pm$0.058} & \textbf{0.719$\pm$0.049} \\
\midrule
\multirow{4}{*}{DeiT} 
& MLP-MLP & \underline{0.720$\pm$0.050} & \underline{0.696$\pm$0.097} & 0.762$\pm$0.050 & 0.724$\pm$0.080 & \underline{0.856$\pm$0.011} & \underline{0.693$\pm$0.057} \\
& KAN-KAN & \textbf{0.725$\pm$0.057} & \textbf{0.697$\pm$0.049} & \textbf{0.773$\pm$0.043} & \textbf{0.750$\pm$0.131} & 0.846$\pm$0.060 & \textbf{0.711$\pm$0.068} \\
& KAN-MLP & 0.696$\pm$0.053 & 0.663$\pm$0.097 & 0.756$\pm$0.048 & \underline{0.742$\pm$0.121} & 0.826$\pm$0.123 & 0.672$\pm$0.052 \\
& MLP-KAN & 0.679$\pm$0.058 & 0.662$\pm$0.025 & \underline{0.767$\pm$0.017} & 0.733$\pm$0.080 & \textbf{0.939$\pm$0.049} & 0.663$\pm$0.066 \\
\midrule
\multicolumn{8}{c}{\textbf{Random anchor selection, random patch sampling}} \\
\midrule
Backbone & Model & ACC & AUC & F1 & PRE & SEN & SPE \\
\midrule
\multirow{4}{*}{ViT} 
& MLP-MLP & 0.702$\pm$0.033 & 0.667$\pm$0.081 & 0.737$\pm$0.057 & 0.723$\pm$0.061 & 0.776$\pm$0.125 & \underline{0.704$\pm$0.039} \\
& KAN-KAN & \textbf{0.743$\pm$0.068} & \textbf{0.727$\pm$0.112} & \textbf{0.783$\pm$0.061} & \textbf{0.764$\pm$0.084} & \textbf{0.890$\pm$0.132} & \textbf{0.729$\pm$0.072} \\
& KAN-MLP & \underline{0.720$\pm$0.045} & \underline{0.704$\pm$0.041} & \underline{0.768$\pm$0.055} & \underline{0.734$\pm$0.082} & 0.825$\pm$0.116 & 0.699$\pm$0.037 \\
& MLP-KAN & 0.708$\pm$0.059 & 0.693$\pm$0.071 & 0.763$\pm$0.069 & 0.732$\pm$0.146 & \underline{0.876$\pm$0.143} & 0.688$\pm$0.057 \\
\midrule
\multirow{4}{*}{DeiT} 
& MLP-MLP & 0.696$\pm$0.072 & 0.671$\pm$0.092 & 0.755$\pm$0.065 & 0.725$\pm$0.080 & 0.865$\pm$0.139 & 0.682$\pm$0.085 \\
& KAN-KAN & \underline{0.708$\pm$0.058} & \underline{0.676$\pm$0.066} & \underline{0.775$\pm$0.014} & \underline{0.741$\pm$0.054} & 0.900$\pm$0.127 & \underline{0.682$\pm$0.059} \\
& KAN-MLP & 0.684$\pm$0.051 & 0.622$\pm$0.020 & 0.767$\pm$0.032 & 0.722$\pm$0.117 & \textbf{0.959$\pm$0.050} & 0.665$\pm$0.065 \\
& MLP-KAN & \textbf{0.713$\pm$0.058} & \textbf{0.689$\pm$0.060} & \textbf{0.779$\pm$0.038} & \textbf{0.762$\pm$0.114} & \underline{0.950$\pm$0.045} & \textbf{0.687$\pm$0.082} \\
\midrule
\multicolumn{8}{c}{\textbf{Grid-based anchor selection, iterative patch sampling}} \\
\midrule
Backbone & Model & ACC & AUC & F1 & PRE & SEN & SPE \\
\midrule
\multirow{4}{*}{ViT} 
& MLP-MLP & 0.643$\pm$0.047 & 0.612$\pm$0.040 & \underline{0.760$\pm$0.049} & \underline{0.725$\pm$0.043} & 0.839$\pm$0.124 & \underline{0.652$\pm$0.040} \\
& KAN-KAN & 0.673$\pm$0.043 & 0.621$\pm$0.043 & 0.757$\pm$0.033 & 0.702$\pm$0.108 & \underline{0.898$\pm$0.128} & 0.638$\pm$0.037 \\
& KAN-MLP & \textbf{0.690$\pm$0.048} & \textbf{0.674$\pm$0.090} & \textbf{0.772$\pm$0.035} & 0.683$\pm$0.045 & \textbf{0.918$\pm$0.068} & 0.651$\pm$0.051 \\
& MLP-KAN & \underline{0.684$\pm$0.029} & \underline{0.623$\pm$0.056} & 0.725$\pm$0.043 & \textbf{0.733$\pm$0.064} & 0.819$\pm$0.153 & \textbf{0.671$\pm$0.048} \\
\midrule
\multirow{4}{*}{DeiT} 
& MLP-MLP & 0.667$\pm$0.047 & 0.608$\pm$0.089 & 0.733$\pm$0.029 & 0.685$\pm$0.063 & 0.844$\pm$0.156 & 0.647$\pm$0.055 \\
& KAN-KAN & 0.673$\pm$0.020 & 0.588$\pm$0.095 & \underline{0.755$\pm$0.018} & 0.688$\pm$0.051 & \underline{0.888$\pm$0.081} & 0.646$\pm$0.023 \\
& KAN-MLP & \textbf{0.707$\pm$0.054} & \textbf{0.667$\pm$0.101} & \textbf{0.779$\pm$0.030} & \underline{0.706$\pm$0.075} & \textbf{0.898$\pm$0.085} & \textbf{0.674$\pm$0.064} \\
& MLP-KAN & \underline{0.673$\pm$0.060} & \underline{0.661$\pm$0.074} & 0.722$\pm$0.039 & \textbf{0.769$\pm$0.096} & 0.805$\pm$0.070 & \underline{0.660$\pm$0.061} \\
\midrule
\multicolumn{8}{c}{\textbf{Random anchor selection, iterative patch sampling}} \\
\midrule
Backbone & Model & ACC & AUC & F1 & PRE & SEN & SPE \\
\midrule
\multirow{4}{*}{ViT} 
& MLP-MLP & 0.679$\pm$0.080 & 0.664$\pm$0.052 & 0.739$\pm$0.048 & \underline{0.707$\pm$0.101} & 0.785$\pm$0.045 & 0.661$\pm$0.095 \\
& KAN-KAN & \underline{0.703$\pm$0.082} & \underline{0.669$\pm$0.094} & \underline{0.764$\pm$0.075} & 0.698$\pm$0.062 & \textbf{0.919$\pm$0.084} & \underline{0.678$\pm$0.079} \\
& KAN-MLP & 0.679$\pm$0.076 & 0.640$\pm$0.110 & 0.753$\pm$0.077 & 0.693$\pm$0.069 & 0.856$\pm$0.132 & 0.655$\pm$0.090 \\
& MLP-KAN & \textbf{0.743$\pm$0.088} & \textbf{0.734$\pm$0.131} & \textbf{0.786$\pm$0.069} & \textbf{0.780$\pm$0.151} & \underline{0.897$\pm$0.065} & \textbf{0.716$\pm$0.105} \\
\midrule
\multirow{4}{*}{DeiT} 
& MLP-MLP & 0.702$\pm$0.067 & \textbf{0.673$\pm$0.105} & \underline{0.769$\pm$0.045} & \textbf{0.738$\pm$0.060} & 0.877$\pm$0.103 & \textbf{0.695$\pm$0.065} \\
& KAN-KAN & \textbf{0.714$\pm$0.047} & \underline{0.645$\pm$0.045} & \textbf{0.781$\pm$0.020} & 0.707$\pm$0.059 & 0.909$\pm$0.062 & \underline{0.683$\pm$0.064} \\
& KAN-MLP & \underline{0.702$\pm$0.047} & 0.644$\pm$0.133 & 0.774$\pm$0.025 & \underline{0.707$\pm$0.052} & \underline{0.919$\pm$0.087} & 0.679$\pm$0.059 \\
& MLP-KAN & 0.690$\pm$0.058 & 0.645$\pm$0.102 & 0.763$\pm$0.042 & 0.687$\pm$0.061 & \textbf{0.928$\pm$0.051} & 0.658$\pm$0.070 \\
\bottomrule
\end{tabular}
}
\end{table}

\begin{table}[t]
\centering
\caption{ABIDE UM Site Performance: Classification performance of ABFR-KAN under the random anchor selection, random patch sampling setting. The best and second best results are \textbf{bolded} and \underline{underlined}, respectively.}
\label{tab:abfr-kan-UM}
\resizebox{\linewidth}{!}{
\begin{tabular}{cccccccc}
\toprule
Backbone & Model & ACC & AUC & F1 & PRE & SEN & SPE \\
\midrule
\multirow{4}{*}{ViT} 
& MLP-MLP & \underline{0.736$\pm$0.078} & \textbf{0.707$\pm$0.111} & 0.726$\pm$0.096 & \underline{0.727$\pm$0.028} & \underline{0.889$\pm$0.222} & \textbf{0.727$\pm$0.073} \\
& KAN-KAN & 0.727$\pm$0.050 & \underline{0.673$\pm$0.107} & \textbf{0.766$\pm$0.044} & \textbf{0.767$\pm$0.137} & \textbf{0.913$\pm$0.085} & \underline{0.723$\pm$0.066 }\\
& KAN-MLP & 0.718$\pm$0.045 & 0.653$\pm$0.147 & 0.719$\pm$0.045 & 0.723$\pm$0.075 & 0.761$\pm$0.292 & 0.692$\pm$0.069 \\
& MLP-KAN & \textbf{0.736$\pm$0.053} & 0.667$\pm$0.056 & \underline{0.740$\pm$0.072} & 0.722$\pm$0.171 & 0.793$\pm$0.163 & 0.716$\pm$0.021 \\
\midrule
\multirow{4}{*}{DeiT} 
& MLP-MLP & \underline{0.727$\pm$0.076} & \textbf{0.689$\pm$0.112} & 0.715$\pm$0.096 & \underline{0.755$\pm$0.126} & 0.741$\pm$0.161 & \underline{0.710$\pm$0.084} \\
& KAN-KAN & \textbf{0.727$\pm$0.070} & 0.658$\pm$0.043 & \textbf{0.742$\pm$0.107} & \textbf{0.762$\pm$0.145} & \underline{0.836$\pm$0.199} & \textbf{0.711$\pm$0.026} \\
& KAN-MLP & 0.709$\pm$0.022 & 0.663$\pm$0.117 & \underline{0.733$\pm$0.024} & 0.752$\pm$0.129 & \textbf{0.838$\pm$0.057} & 0.682$\pm$0.041 \\
& MLP-KAN & 0.700$\pm$0.068 & \underline{0.687$\pm$0.127} & 0.721$\pm$0.068 & 0.675$\pm$0.074 & 0.805$\pm$0.155 & 0.678$\pm$0.095 \\
\midrule
\end{tabular}
}
\end{table}

\begin{table}[t]
\centering
\caption{Cross-site generalizability of the proposed ABFR-KAN in ASD detection under the random anchor selection, random patch sampling setting. The best and second best results are \textbf{bolded} and \underline{underlined}, respectively.}
\label{tab:abfr-kan-cross}
\resizebox{\linewidth}{!}{
\begin{tabular}{ll ccc c ccc}
\toprule
\multirow{2}{*}{Backbone} & \multirow{2}{*}{Model} & 
\multicolumn{3}{c}{Test: UM Site (Train: NYU Site)} & \phantom{} & \multicolumn{3}{c}{Test: NYU Site (Train: UM Site)}\\
\cmidrule{3-5} \cmidrule{7-9}
&& ACC & AUC & F1 && ACC & AUC & F1\\ 
\midrule
\multirow{4}{*}{ViT} & MLP-MLP & 0.639$\pm$0.030 & 0.628$\pm$0.076 & 0.675$\pm$0.052 & & \textbf{0.543$\pm$0.058} & \textbf{0.551$\pm$0.087} & 0.559$\pm$0.074\\
& KAN-KAN & \textbf{0.676$\pm$0.062} & \textbf{0.687$\pm$0.106} & \textbf{0.713$\pm$0.056} & & 0.536$\pm$0.037 & \underline{0.525$\pm$0.083} & \textbf{0.590$\pm$0.034}\\
& KAN-MLP & \underline{0.655$\pm$0.041} & \underline{0.665$\pm$0.039} & \underline{0.704$\pm$0.050} & & 0.528$\pm$0.033 & 0.509$\pm$0.115 & 0.557$\pm$0.035\\
& MLP-KAN & 0.644$\pm$0.054 & 0.655$\pm$0.067 & 0.701$\pm$0.063 & & \underline{0.538$\pm$0.039} & 0.520$\pm$0.044 & \underline{0.574$\pm$0.056}\\
\midrule
\multirow{4}{*}{DeiT} & MLP-MLP & \underline{0.564$\pm$0.058} & 0.621$\pm$0.085 & 0.522$\pm$0.045 & & \underline{0.553$\pm$0.058} & \textbf{0.568$\pm$0.092} & 0.567$\pm$0.076 \\
& KAN-KAN & \textbf{0.578$\pm$0.047} & \underline{0.623$\pm$0.061} & \underline{0.536$\pm$0.010} & & \textbf{0.559$\pm$0.054} & 0.546$\pm$0.036 & \textbf{0.586$\pm$0.085} \\
& KAN-MLP & 0.555$\pm$0.041 & 0.576$\pm$0.019 & 0.531$\pm$0.022 & & 0.543$\pm$0.017 & 0.547$\pm$0.097 & \underline{0.578$\pm$0.019}\\
& MLP-KAN & \textbf{0.578$\pm$0.047} & \textbf{0.636$\pm$0.055} & \textbf{0.542$\pm$0.026} & & 0.534$\pm$0.052 & \underline{0.564$\pm$0.104} & 0.575$\pm$0.054\\
\bottomrule
\end{tabular}
}
\end{table}

\begin{table}[t]
\centering
\caption{Performance comparison of ABFR-KAN against the baseline and state-of-the-art models in ASD detection under the random anchor selection, random patch sampling setting. The best and second best results are \textbf{bolded} and \underline{underlined}, respectively.}
\label{tab:abfr-kan-sota}
\resizebox{\linewidth}{!}{
\begin{tabular}{lcccc}
\toprule
Model & ACC & AUC & SEN & SPE \\
\midrule
SVM~\cite{cortes1995support} & 0.649$\pm$0.056 & 0.663$\pm$0.097 & 0.838$\pm$0.152 & 0.619$\pm$0.045 \\
BrainNetCNN~\cite{kawahara2017brainnetcnn} & 0.696$\pm$0.039 & 0.654$\pm$0.072 & 0.828$\pm$0.121 & 0.676$\pm$0.065 \\
GAT~\cite{velivckovic2017graph}& 0.661$\pm$0.041 & 0.640$\pm$0.073 & 0.817$\pm$0.068 & 0.636$\pm$0.046 \\
GCN~\cite{qin2022using} & 0.720$\pm$0.063 & \underline{0.705$\pm$0.078} & 0.848$\pm$0.104 & 0.700$\pm$0.075 \\
BrainGNN~\cite{li2021braingnn} & 0.719$\pm$0.030 & 0.663$\pm$0.048 & 0.784$\pm$0.108 & 0.706$\pm$0.032 \\
MVS-GCN~\cite{wen2022mvs} & 0.726$\pm$0.083 & 0.695$\pm$0.099 & \underline{0.888$\pm$0.074} & 0.695$\pm$0.099 \\
ABFR-KAN & \textbf{0.743$\pm$0.088} & \textbf{0.734$\pm$0.131} & \textbf{0.897$\pm$0.065} & \textbf{0.716$\pm$0.105}\\
\bottomrule
\end{tabular}
}
\end{table}

\subsection{Results}

From the results in Table~\ref{tab:abfr-kan-NYU}, it is apparent that ABFR-KAN consistently outperforms MLP-only methods across different configurations, demonstrating the benefits of KANs in FC analysis. In the grid-based anchor selection, random patch sampling experiment, MLP-KAN performed best for ViT models, while KAN-KAN performed best for DeiT models. This suggests that KANs can enhance representation without fully replacing MLP components. It should be noted for this experiment that the MLP-MLP variant with the ViT backbone represents the RandomFR \cite{liu2024randomizing} model.

For random anchor selection, random patch sampling, performance improved across most metrics, particularly for ViT models, indicating that removing structural bias allows for more subject-specific representations. KAN-KAN performed best for ViT models, while MLP-KAN was optimal for DeiT. The grid-based anchor selection, iterative patch sampling approach yielded more competitive results between ViT and DeiT, with KAN-MLP emerging as the top-performing configuration.

The random anchor selection, iterative patch sampling strategy achieved the highest classification performance, particularly with the ViT backbone and MLP-KAN configuration. This approach enhances robustness by introducing controlled variance while preserving subject specificity, effectively capturing individualized patterns in ASD diagnosis.

To assess generalizability, we evaluated the random anchor selection, random patch sampling approach on the ABIDE UM site (Table~\ref{tab:abfr-kan-UM}). Despite variations in data acquisition, ABFR-KAN demonstrated strong transferability, particularly for ViT-based models. Cross-domain evaluations, where models trained on one site (NYU) were tested on another (UM) and vice versa (Table~\ref{tab:abfr-kan-cross}), further confirmed its generalization capabilities.

Finally, we compared ABFR-KAN against traditional and state-of-the-art baselines (Table~\ref{tab:abfr-kan-sota}). Our method outperformed classical machine learning models like SVM and deep learning-based approaches such as BrainGNN and MVS-GCN. The most pronounced improvements were in sensitivity and AUC, highlighting ABFR-KAN’s ability to improve ASD classification accuracy while reducing the misclassification of ASD-positive cases.

\subsection{Discussion}

Our results demonstrate the effectiveness of ABFR-KAN for FC-based ASD classification, with particular emphasis on the benefits of random anchor selection, iterative patch sampling. This configuration yielded the best performance across multiple metrics. In this section, we break down the contributing factors to this performance, analyze why the best-performing configuration (KAN-KAN) performed as it did.

Traditional grid-based anchor selection introduces structural biases by imposing predefined spatial constraints on the extracted FC patches. Our random anchor selection method mitigates this issue, allowing for more individualized functional representations. Iterative patch sampling provides a multi-scale view of each subject’s FC patterns, introducing controlled variance while preserving meaningful functional signals. This is especially important in ASD classification, where inter-subject variability is high. The benefits of iterative patch sampling are evident in our best-performing configuration (MLP-KAN combined random anchor selection), as it enables the model to refine functional representations over multiple iterations.

While hybrid models demonstrated strong performance in some cases, notably in our best-performing model, the fully KAN-based configuration ultimately achieved the best results, as validated by a Kruskal-Wallis test followed by a Dunn test on the random anchor selection, iterative patch sampling experiment. These tests uncovered a statistically significant p-value of 0.0018 for DeiT models with the KAN-KAN configuration. We propose several explanations for this. First, the combination of KAN layers in both the encoder and classification head enables ABFR-KAN to model functional connectivity patterns with greater expressivity. KANs inherently capture complex, nonlinear relationships better than MLPs, making them particularly well-suited for FC analysis.

Additionally, KANs provide a more adaptable function representation, and having them in both the encoder and classification head ensures that the entire model benefits from this increased flexibility. While hybrid configurations (KAN-MLP, MLP-KAN) still offer improvements over MLP-only architectures, they lack the full expressive power of KAN-KAN, which may explain why they perform slightly worse in our experiments.

Secondly, the number of trainable parameters varies across configurations, with KAN-KAN having the highest parameter count (190,112 for ViT and 205,072 for DeiT). While higher parameter counts can sometimes lead to overfitting, in this case, the added complexity appears to be beneficial. The learnable activation functions allow the model to adapt more flexibly to the underlying data structure rather than being constrained by fixed activation functions.

Although KAN-KAN was the best overall performer, our results also highlight scenarios where hybrid configurations offer advantages. Notably, MLP-KAN achieved a strong performance in grid-based anchor selection experiments, suggesting that conventional MLP heads may still be beneficial in certain cases. Such instances could be in settings where computational efficiency is a priority, where fewer trainable parameters would give a speed boost to the model.

\section{Conclusions}
In this paper, we have introduced a novel architecture for improving ASD diagnosis, ABFR-KAN. We systematically evaluated multiple architectural configurations, identifying the impact of KAN integration at different stages of the model pipeline. The most notable result was that the KAN-KAN configuration, which fully replaces MLP components with KAN layers, achieved the best classification performance, highlighting the expressive power of KANs in modeling nonlinear relationships in brain connectivity data.  Additionally, we demonstrate that random anchor selection and iterative patch sampling provide substantial improvements in FC analysis by mitigating structural biases and introducing controlled variance.

Despite KAN-KAN’s superiority, hybrid models such as MLP-KAN and KAN-MLP also exhibited strong performance in certain settings, indicating that selectively integrating KANs can provide a balance between expressive function representation and computational efficiency. These results suggest that while full KAN integration is optimal in many cases, hybrid configurations remain a viable alternative, particularly when computational constraints are a concern.

\clearpage

\bibliography{midl25_115}

\clearpage
\appendix
\clearpage
\section{Dataset Samples}
\begin{figure}[h]
\centering
\includegraphics[width=0.9\linewidth, trim={0cm 1cm 0cm 0cm}, clip]{figures/scans.png}
\caption{Raw and preprocessed data for a single patient in the ABIDE I dataset.}  
\label{fig:scans}
\end{figure}
\noindent Figure ~\ref{fig:scans} shows different patient-specific scans from the ABIDE I dataset. The first, MP-RAGE, is an MRI pulse sequence optimizing for T1-weighted imaging, allowing easy identification of anatomical features with high gray/white matter contrast \cite{brant1992mp}. The second, rs-fMRI, is a noninvasive technique used to measure and analyze brain activity when a subject is at rest, i.e., not engaged in a specific task. rs-fMRI's are widely used to study FC between brain regions \cite{santana2022rs}. The third, and the most relevant to this research, is the DPARSF pre-processed fMRI data. DPARSF is a toolkit enabling easy pre-processing tasks such as slice timing, realignment, normalization, and smoothing data \cite{yan2010dparsf}. 

\clearpage
\section{Anchor Patch Selection}
\begin{figure}[h]
\centering
\includegraphics[width=0.9\linewidth, trim={0cm 1cm 0cm 0cm}, clip]{figures/anchorselection.png}
\caption{(a) The gray matter mask from which our anchor patches are selected. (b) The baseline grid-based anchor selection process. Note how certain patches fall outside of the gray matter region entirely. (c) Our randomized anchor selection process, which captures the full scope of the gray matter region, reduces structural bias and enhances individual specificity.}
\label{fig:anchor}
\end{figure}
\clearpage
\section{Patch Sampling}
\begin{figure}[h]
\centering
\includegraphics[width=\linewidth, trim={0cm 1cm 0cm 0cm}, clip]{figures/patchsampling.png}
\caption{(a) The random patch sampling process. Observe how the size of the patches is consistent. (b) The iterative patch sampling process, where each subject is processed three times as a form of data augmentation, with patch sizes varying from 8$\times$8, 12$\times$12, and 16$\times$16.}
\label{fig:patchsampling}
\end{figure}
\noindent Figure~\ref{fig:patchsampling} shows the two different patch sampling processes used in our study. The first, random patch sampling, uses consistent patch sizes randomly selected from across the gray matter region, reducing structural bias while maintaining functional specificity. The second, the iterative sampling method, acts as a data augmentation technique with the aim of introducing variability while preserving meaningful FC information.

\clearpage

\section{Receiver Operating Characteristic (ROC) Curves}
\begin{figure}[h]
\centering
\includegraphics[width=0.75\linewidth]{figures/roc.png}
\caption{ROC curve comparison of our ABFR-KAN models vs. the baseline, which is reported as MLP-MLP. Note that our ABFR-KAN models generally achieve better curves compared to the MLP-MLP models.}
\label{fig:roc}
\end{figure}
\noindent Figure ~\ref{fig:roc} presents a visual performance comparison between our ABFR-KAN models and the baseline. The ROC curves illustrate the trade-offs between true and false positive rates. Our ABFR-KAN models demonstrate consistent performance improvement over the baseline, as indicated by the higher ROC curves. This suggests that replacing traditional MLP components with KANs enhances the model's ability to distinguish between ASD and control subjects. The observed improvements highlight the effectiveness of our approach in capturing complex FC patterns within the brain. 

\end{document}
