% \documentclass{uai2025} % for initial submission
\documentclass[accepted]{uai2025} % after acceptance, for a revised version; 
% also before submission to see how the non-anonymous paper would look like 
                        
%% There is a class option to choose the math font
% \documentclass[mathfont=ptmx]{uai2025} % ptmx math instead of Computer
                                         % Modern (has noticeable issues)
% \documentclass[mathfont=newtx]{uai2025} % newtx fonts (improves upon
                                          % ptmx; less tested, no support)
% NOTE: Only keep *one* line above as appropriate, as it will be replaced
%       automatically for papers to be published. Do not make any other
%       change above this note for an accepted version.

%% Choose your variant of English; be consistent
\usepackage[american]{babel}
% \usepackage[british]{babel}

%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams
\usepackage{subcaption}
\usepackage{multirow}
\usepackage{hyperref}
\newcommand{\theHalgorithm}{\arabic{algorithm}}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{mathtools}
\usepackage{amsthm}
\DeclareMathOperator*{\argmax}{arg\,max}
\DeclareMathOperator*{\argmin}{arg\,min}
\usepackage{csquotes}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% THEOREMS
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\theoremstyle{plain}
\newtheorem{theorem}{Theorem}[section]
\newtheorem{proposition}[theorem]{Proposition}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{corollary}[theorem]{Corollary}
\theoremstyle{definition}
\newtheorem{definition}[theorem]{Definition}
\newtheorem{assumption}[theorem]{Assumption}
\theoremstyle{remark}
\newtheorem{remark}[theorem]{Remark}

%% Provided macros
% \smaller: Because the class footnote size is essentially LaTeX's \small,
%           redefining \footnotesize, we provide the original \footnotesize
%           using this macro.
%           (Use only sparingly, e.g., in drawings, as it is quite small.)

%% Self-defined macros
\newcommand{\swap}[3][-]{#3#1#2} % just an example

\title{SALSA: A Secure, Adaptive and Label-Agnostic Scalable Algorithm for Machine Unlearning}

\author[1]{\href{mailto:<makroo.owais@kgpian.iitkgp.ac.in>?Subject=Your UAI 2025 paper}{Owais Makroo$^*$}{}}
\author[2]{Atif Hassan$^*$}
\author[1]{Swanand Khare}
% Add affiliations after the authors
\affil[1]{%
    Department of Mathematics\\
    IIT Kharagpur\\
    Kharagpur, West Bengal, India
}
\affil[2]{%
    Department of Artificial Intelligence\\
    IIT Kharagpur\\
    Kharagpur, West Bengal, India
}
  \begin{document}
\maketitle

\begin{abstract}
Machine Learning as a Service (MLaaS) has simplified access to powerful machine learning models but faces challenges in complying with the “right to be forgotten” while resisting adversarial threats. 
Machine Unlearning (MU) addresses these issues by enabling selective data removal from models. 
However, existing methods are slow, label-dependent, vulnerable to black-box attacks, and computationally impractical for large-scale MLaaS deployments. 
We introduce SALSA, a Secure, Adaptive, Label-Agnostic, Scalable Algorithm for efficient and robust machine unlearning tailored to classification tasks in MLaaS. 
SALSA redistributes the class-wise predicted probabilities of data to be forgotten and optimizes a novel loss function that minimizes the divergence between redistributed and predicted probabilities while anchoring model parameters near their initialization. 
This ensures simultaneous unlearning and generalization.
SALSA requires neither labels nor access to the remaining data, making it ideal for MLaaS environments. 
It is exceptionally fast, achieving at least $25\times$ faster unlearning, on average, than the fastest baseline, while consistently outperforming five state-of-the-art MU techniques across eight metrics on benchmark datasets.
Experiments on synthetic data show that SALSA’s altered decision boundaries closely approximate exact unlearning. 
Rigorous evaluations against state-of-the-art black-box attacks demonstrate its resilience to security threats. 
Thus, SALSA redefines practical machine unlearning, offering a scalable and resilient solution for safeguarding privacy in modern MLaaS systems.
\end{abstract}
\def\thefootnote{*}\footnotetext{These authors contributed equally to this work}%\def\thefootnote{\arabic{footnote}}
\section{Introduction}\label{sec:intro}
Machine learning (ML), particularly deep learning (DL), has revolutionized data-driven services, achieving remarkable performance in domains such as computer vision \citep{DBLP:conf/cvpr/HeZRS16,DBLP:conf/iclr/DosovitskiyB0WZ21}, natural language processing \citep{DBLP:conf/nips/BrownMRSKDNSSAA20,DBLP:journals/corr/abs-2303-08774}, and speech recognition \citep{DBLP:conf/icml/RadfordKXBMS23,DBLP:conf/nips/BaevskiZMA20}. 
At the core of this progress lies the emergence of deep neural network foundation models that leverage billions of parameters to deliver exceptional performance across diverse tasks \cite{xi2023rise}.
ML's success has catalyzed the widespread adoption of cloud-based platforms known as Machine Learning as a Service (MLaaS) that democratize access to powerful predictive and analytic tools by allowing users to train, fine-tune, and deploy models without managing complex computational infrastructure.
These platforms abstract the complexities of machine learning through APIs, offering benefits such as scalability, cost-effectiveness, and enhanced privacy by separating user data from service providers during deployment \citep{shmueli2023machine}. 

However, MLaaS is not without its own security and vulnerability issues.
Models trained on sensitive data are susceptible to memorizing and exposing private information \citep{wu2022quantitative,carlini2023extracting}. 
Such vulnerabilities are particularly critical in cloud platforms, where deployed models interact with potentially malicious users, risking data leakage and exploitation through attacks like membership inference and model inversion \citep{DBLP:conf/sp/HuWDX24,7958568}.
In response, government regulations mandate the ``right to be forgotten," requiring the effective removal of personal data upon request. 
While straightforward in storage systems, enforcing this in trained ML models remains a formidable challenge \citep{thudi2022unrolling}. 
Machine Unlearning (MU) has emerged as a potential solution, aiming to erase the influence of specific data points while preserving model performance \citep{DBLP:journals/tetci/XuWWJ24,DBLP:conf/sp/BourtouleCCJTZL21}. 

\subsection{Motivation}
A na\"ive approach to machine unlearning (MU) involves retraining the model from scratch on the remaining data to guarantee complete unlearning. 
While effective, this approach is computationally prohibitive for modern deep learning models and impractical in Machine Learning as a Service (MLaaS) settings, where servers hosting models lack access to the original training data.  
To address these challenges, researchers have proposed efficient MU techniques categorized as exact \citep{DBLP:conf/sp/BourtouleCCJTZL21,DBLP:conf/ijcai/YanLG0L022} and approximate unlearning \citep{10113700,DBLP:conf/uss/WangH023,DBLP:conf/eurosp/ThudiDCP22}. 
However, most methods rely on access to remaining training data or labels, which is often unavailable in MLaaS environments due to privacy constraints \citep{DBLP:conf/iclr/000100BCX24}. 
Exact methods, such as influence functions, require computationally expensive Hessian inversions \citep{DBLP:conf/nips/ChenYXBHHFZWL23,DBLP:conf/ndss/WarneckePWR23}, while approximate methods \citep{DBLP:journals/corr/abs-2409-19732,10113700} involve iterative fine-tuning, further limiting scalability.  

MU techniques must also address adversarial threats in MLaaS. 
Models are vulnerable to membership inference \cite{DING2025103947}, model inversion \citep{DBLP:conf/sp/HuWDX24}, and malicious unlearning, where attackers exploit unlearning requests to cause over-unlearning, degrading model utility \cite{DBLP:conf/ndss/Hu0CZ00ZX24}. 
These risks are particularly severe in classification tasks central to MLaaS applications, such as facial recognition \citep{10426386}, anomaly detection \citep{DBLP:conf/ccs/DuCLOS19}, and medical diagnosis \citep{zhou2023unified}, where breaches or malicious unlearning can have dire consequences.  
Addressing these computational and security challenges is critical for advancing practical and resilient machine unlearning solutions.  

\subsection{Our Contribution}
We propose SALSA, a Secure, Adaptive and Label-agnostic Scalable Algorithm for Machine Unlearning specifically designed for classification tasks. 
We introduce a new strategy to redistribute the class-wise predicted probabilities of a model for a given set of samples that need to be forgotten. 
A novel loss function is then employed to implement unlearning while maintaining generalization performance. 
This is achieved by simultaneously minimizing the divergence between the redistributed and predicted class-wise probabilities as well as the Euclidean distance between the original and current model parameters.
Iteratively fine-tuning a pre-trained model using this process results in a computationally efficient unlearning approach that we empirically find to converge within a few steps. 
Thus, unlike prior techniques, SALSA solely relies on samples that need to be forgotten without requiring corresponding label information to perform unlearning while preserving the model's generalization on the remaining data without its explicit utilization.

We extensively evaluate SALSA under diverse settings. 
First, we observe that on non-linear synthetic datasets, across multiple unlearning paradigms (sample-wise, subclass-wise, and class-wise unlearning), the altered decision boundary of our unlearned models closely approximate those of exact unlearning.
Second, we evaluate SALSA's efficacy on three benchmark datasets, CIFAR10, SVHN and TinyImageNet using ResNet18 and Swin Transformer models, under different unlearning paradigms (sample-wise and class-wise unlearning) against five state-of-the-art machine unlearning methods. 
SALSA consistently outperforms all considered baselines under eight different evaluation metrics across all datasets while being at least $25\times$ faster, on average, than the fastest baseline. 
Finally, to ensure the security of the unlearning process, we test SALSA against three prominent black-box attacks that are possible in the MLaaS setting, namely, membership inference, model inversion, and malicious unlearning (over-unlearning). 
The attacks consistently fail to recover any information about samples unlearned using our approach while also remaining unsuccessful in compromising the performance of the end model.
Thus, our proposed algorithm effectively mitigates threats encountered by MLaaS while maintaining high performance.

\section{Related Work}
\subsection{Machine Unlearning}
Introduced by \cite{cao2015towards}, machine unlearning focuses on removing specific data influences from trained models. 
While retraining without the samples to be forgotten ensures complete unlearning, its computational cost is prohibitive for large-scale models like GPT-3 \citep{brown2020language}, which requires $34$ days on $1024$ GPUs for retraining \citep{narayanan2021efficient}. 
To address this, efficient unlearning strategies have emerged, categorized as exact and approximate.

\textbf{Exact Unlearning:} 
Exact unlearning methods, such as SISA \citep{DBLP:conf/sp/BourtouleCCJTZL21}, retrain only on affected data shards while DaRE \citep{brophy2021machine} selectively retrains parts of random forests, reducing overhead but requiring access to the training dataset, an issue in privacy-centric MLaaS environments. 

\textbf{Approximate Unlearning:} 
Approximate methods adjust model parameters without full retraining. 
Amnesiac ML \citep{graves2021amnesiac} removes gradient updates corresponding to the samples that need to be forgotten to achieve unlearning but risks residual influence. 
Influence-based approaches \citep{DBLP:conf/icml/GuoGHM20,izzo2021approximate,DBLP:conf/nips/ChenYXBHHFZWL23,DBLP:conf/ndss/WarneckePWR23} use influence functions but face scalability issues due to costly inverse Hessian computations. 
Gradient-based techniques offer practical alternatives.
\cite{DBLP:conf/ndss/WarneckePWR23} overwrite unlearned data contributions, while methods like SalUn \citep{DBLP:conf/iclr/FanLZ0W024}, SFTC \citep{10.1145/3655693.3655697}, and LAF \citep{DBLP:conf/iclr/000100BCX24} refine model weights or rely on biased labeling strategies for efficiency. 
Recent works, including FEMU \citep{10113700} and SFRon \citep{DBLP:journals/corr/abs-2409-19732} achieve scalable and practical unlearning, aligning well with MLaaS requirements.


\subsection{Black-box Threats in MLaaS}
Black-box attacks present significant challenges to privacy and security in MLaaS.

\textbf{Membership Inference Attacks (MIAs):} 
MIAs determine training data membership by exploiting overfitting patterns in model outputs \citep{shokri2017membership}. 
Advanced methods infer unlearned data membership by analyzing confidence vectors before and after unlearning \citep{hu2024eraser,chen2021machine,lu2022label,gao2022deletion}, with top-1 confidence scores enhancing efficacy \citep{lu2022label}.

\textbf{Model Inversion:} 
Model inversion reconstructs training data from outputs, transitioning from white-box \cite{fredrikson2014privacy,fredrikson2015model} to black-box settings. 
Approaches like LBMI \citep{yang2019neural} leverage autoencoders, while MIRROR \citep{an2022mirror} and BREP-MI \citep{kahla2022label} exploit residual data influence using GANs and hard-label outputs, respectively.

\textbf{Malicious Unlearning:} 
Malicious unlearning degrades model performance during the unlearning phase. 
\cite{DBLP:conf/ndss/Hu0CZ00ZX24} showed that by pushing data closer to the decision boundary, over-unlearning increases misclassification risks. 
This threat is amplified in black-box MLaaS settings, where limited transparency exposes models to exploitation.

\section{Preliminary}
\textbf{Notations:}
Let $\mathcal{D}=\{z_i\}_{i=1}^n$ be a dataset containing $n$ data points where each samples is $z_i=(x_i,y_i)$. 
Here,  $x_i\in\mathbb{R}^d\sim\mathcal{P}$ is a feature vector assumed to be sampled from an underlying distribution $\mathcal{P}$ while $y_i\in\{1,\cdots,c\}$ is the target/label and $c$ is the number of classes. 
Let $\mathcal{D}=\mathcal{D}_\text{train}\cup\mathcal{D}_\text{test}$ where $\mathcal{D}_\text{train}$ is the train set and $\mathcal{D}_\text{test}$ is the test set used for model training and evaluation, respectively. 
Let  $\mathcal{D}_u=\{z_i\}_{i=1}^{n_u}\subset \mathcal{D}_\text{train}$, denote a subset of training samples to be unlearned, termed the \textit{forget set}. 
Here, $n_u$ is the number of samples to be unlearned.
The remaining samples, termed \textit{retain set} are denoted as, $\mathcal{D}_r=\mathcal{D}_\text{train}\setminus\mathcal{D}_u=\{z_i\}_{i=1}^{n_r}$ where $n_r=n-n_u$. 
Let a machine learning model, trained on $\mathcal{D}_\text{train}$, referred to as the original pre-trained model, be $f_\theta:\mathbb{R}^d\to \mathbb{R}^c$ parameterized by $\theta=[\theta_1,\cdots,\theta_L]$ where $L$ is the depth/number of layers of the model. 
Retraining the original model from scratch on $\mathcal{D}_r$ yields $f_{\theta_*}$ which is considered as the oracle for unlearning. 

\begin{figure}[t]
    \centering
    \includegraphics[width=0.9\linewidth]{uai2025-template/All Required Images/machine_unlearning_MLaaS.png}
    \caption{Overview of the MLaaS framework and potential black-box attacks that the unlearning algorithm must defend against to safeguard the model.}
    \label{fig:MLaaS_framework}
\end{figure}

\textbf{Evaluation Metrics:}
We assess unlearning using eight metrics: forgetting and retain accuracies on the train set ($\text{FA}_\text{tr}$ and $\text{RA}_\text{tr}$, respectively) and test sets ($\text{FA}_\text{te}$ and $\text{RA}_\text{te}$, respectively) to measure unlearning and generalization performance, test accuracy (TA) for overall generalization, average discrepancy (Avg. D), defined as the average disparity in metrics between the unlearned and retrained model, to compare their overall ``closeness" \citep{DBLP:journals/corr/abs-2409-19732}, robustness to membership inference attacks (MIA), and computational efficiency through the number of iterations (Iters) required for unlearning. 
Ideally, the MIA score for any unlearning method should be close to $50\%$ \citep{hu2022membership}.

\textbf{Assumptions:}
We assume the deployed model is well-trained, achieving high accuracy on $\mathcal{D}_{\text{train}}$ and reliable predictions on unseen data, consistent with the MLaaS setting.
In this work, we focus on classification tasks.
We also consider three distinct unlearning paradigms, sample-wise forgetting which entails unlearning a random subset of data points from $\mathcal{D}_\text{train}$, subclass-wise forgetting which requires unlearning all samples from $\mathcal{D}_\text{train}$ that belong to a particular subclass within a class and class-wise forgetting which unlearns all samples from $\mathcal{D}_\text{train}$ that belong to a single class.
% Sample-wise and class-wise forgetting are evaluated on synthetic and benchmark datasets, while subclass-wise forgetting is used exclusively in synthetic experiments.

\textbf{MLaaS Framework:}
In the MLaaS paradigm, developers train proprietary models and deploy them on servers for commercialization. 
While the server handles model maintenance, including periodic updates, it lacks access to $\mathcal{D}_\text{train}$ and relies on $\mathcal{D}_\text{test}$ for monitoring model performance. 
To comply with data protection regulations, developers pre-select an unlearning method executed by the server when authorized users submit data revocation requests for instances $x_i\in\mathcal{D}_u$. 
However, this opens avenues for malicious attacks. 
Black-box attacks, such as membership inference and model inversion, target $\mathcal{D}_\text{train}$ while authorized malicious users aim to degrade model utility through the exploitation of revocation rights by submitting corrupted inputs post-deployment.
Fig. \ref{fig:MLaaS_framework} illustrates the MLaaS framework and potential attack channels.

\section{Methodology}
Our proposed approach comprises two key components: \textit{(i)} a probability redistribution module that redistributes the predicted output $f_\theta(x_i)$ for each $x_i\in\mathcal{D}_u$ to simulate the removal of sample influence from $f_\theta$ and \textit{(ii)} a regularized loss function that balances the performance across the retained dataset $\mathcal{D}_r$ and test data $\mathcal{D}_\text{test}$ while ensuring unlearning occurs effectively on $\mathcal{D}_u$.

\subsection{Adaptive Probability Redistribution}
We formulate unlearning for a given sample as reducing the probability mass assigned to the predicted class while redistributing it proportionally among the remaining classes. 
Let $p_i=\sigma\left(f_{\theta}(x_i)\right)\in\mathbb{R}^c$ where $\sigma(\cdot)$ is the Softmax function.
\begin{equation}
    \begin{split}
        \widehat{p}_i^j &= \begin{cases}
        p_i^j(1-\alpha)~~~~~~~~~~~~~~~~~~~~~~~~~~~\text{ if }j=\widehat{y},\\
        \frac{p_i^j}{\underset{{j,j\neq\widehat{y}_i}}{\sum}p_i^j}(1-\alpha)p_i^{\widehat{y_i}}+p_i^j~~~~~~\text{otherwise}
    \end{cases}\\
    \widehat{y}_i &= \argmax_{j\in\{1,\cdots,c\}}~p_i^j
    \end{split}
\end{equation}
where $\widehat{y}_i$ is the predicted class for $x_i$ while $\alpha\in[0,1]$ is a hyper-parameter controlling the extent of unlearning. 

\subsection{Loss Function}
Achieving effective unlearning requires fine-tuning $\theta$ to minimize the influence of samples in $\mathcal{D}_u$, while simultaneously preserving its performance on $\mathcal{D}_r$ and $\mathcal{D}_\text{test}$.
To this end, we propose the following regularized loss function,
\begin{equation}
    \text{KL}\left(\widehat{p}_i\parallel f_{\theta^t}(x_i)\right) + \sum_{l=1}^L\lambda_l\lVert\theta_l-\theta^t_l\rVert_F^2
    \label{eqn:loss_fn}
\end{equation}
Here $\lambda_l$ is a layer-wise regularization hyper-parameter while $\theta^t$ represents the parameters of the unlearned model at iteration $t$ with $\theta^0=\theta$. 
Each layer is assigned a unique $\lambda$ to account for the heterogeneous importance of layers in large deep neural networks \cite{DBLP:journals/jmlr/ZhangBS22}. Section \ref{sec:implementation_details} outlines a straightforward method for determining $\lambda$ values based on layer depth.
The term $\text{KL}\left(\widehat{p}_i\parallel f_{\theta^t}(x_i)\right)$ ensures that $f_\theta$ is updated to align with the modified target distribution $\widehat{p}_i$, effectively reducing the influence of samples in $\mathcal{D}_u$.
The term $\lambda_l\lVert\theta_l-\theta^t_l\rVert_F^2$ is a regularizer on the distance between the original pre-trained network $\theta$ and the updated parameters $\theta^t$ at iteration $t$ in euclidean space which preserves model generalization on $\mathcal{D}_r$. 

\subsection{Understanding and Optimizing Hyper-parameter Dynamics}
The balance between unlearning and retention is controlled by the hyper-parameters $\alpha$ and $\lambda_l$, respectively. 
Therefore, effective unlearning hinges on a precise understanding of hyper-parameter selection and their dynamic interplay.

\textbf{Balancing Stability and Unlearning:}
The hyper-parameter $\alpha$ governs the extent of unlearning on $\mathcal{D}_u$.
Initializing $\alpha$ close to $1$ causes significant reductions in the predicted probabilities of the target class, resulting in a large deviation from the original predictions. 
This results in large losses and significant weight updates that push the model parameters far from their initial state, thereby degrading performance on $\mathcal{D}_r$. 
Conversely, initializing $\alpha$ close to $0$ results in negligible changes to the predicted probabilities, rendering unlearning ineffective. 
To strike an effective balance, we propose using a cosine annealing \cite{DBLP:conf/iclr/LoshchilovH17} inspired dynamic update rule for $\alpha$ that gradually increases its value during training, enabling controlled unlearning while maintaining stability. 
Specifically, $\alpha$ is updated at each training iteration $t$ as follows,
\begin{equation}
    \alpha^t = \alpha_{\scriptscriptstyle\max} - \frac{1}{2}\left(\alpha_{\scriptscriptstyle\max}-\alpha_{\scriptscriptstyle\min}\right) \left(1+\cos\left( \frac{t}{T-1}\pi\right)\right)
    \label{eqn:cosine_annealing_alpha}
\end{equation}
Here $\alpha^t$ represents the value of $\alpha$ at iteration $t$, $\alpha_{\min}$ and $\alpha_{\max}$ are hyper-parameters defining the the range of $\alpha$ and $T$ denotes the total number of iterations. 
The proposed schedule begins with $\alpha^0=\alpha_{\min}$ and gradually increases as training progresses until $\alpha^T=\alpha_{\max}$.

\textbf{Preserving Generalization:} 
The hyper-parameter $\lambda_l$ controls the regularization that preserves model performance on $\mathcal{D}_r$.
If initialized too high, $\lambda_l$ overly constrains parameter updates, preventing effective unlearning of $\mathcal{D}_u$. 
Conversely, initializing it too low allows the parameters to drift excessively, leading to significant degradation in generalization.
To ensure a smooth balance, we dynamically adjust $\lambda_l$ during training. 
The update rule for $\lambda_l$ at iteration $t$ is,
{\small
\begin{equation}
    \lambda_{\scriptscriptstyle l}^t = \lambda_{\scriptscriptstyle l_{\max}} - \frac{1}{2}\left(\lambda_{\scriptscriptstyle l_{\max}}-\lambda_{\scriptscriptstyle l_{\min}}\right) \left(1+\cos\left( \frac{t}{T-1}\pi\right)\right)
    \label{eqn:cosine_annealing_lambda}
\end{equation}
}
Here, $\lambda^t_l$ represents the value of $\lambda_l$ at iteration $t$, $\lambda_{l_{\min}}$ and $\lambda_{l_{\max}}$ are hyperparameters defining the range of $\lambda_l$ and $T$ denotes the total number of iterations. 
Starting with $\lambda_l^0=\lambda_{l_{\min}}$, this schedule allows flexibility in the early stages of training to prioritize unlearning. 
By the end, $\lambda_l^T=\lambda_{l_{\max}}$ reinforces constraints, pulling parameters closer to their original state and recovering performance on $\mathcal{D}_r$.

\textbf{Interplay of $\alpha^t$ and $\lambda_l^t$:} 
The dynamic interaction between $\alpha^t$ and $\lambda_l^t$ is crucial for balancing unlearning and performance.
Early in training, the gradual increase in $\alpha_t$ allows the network to incrementally update its weights. 
During this phase, $\lambda_l^t$ exerts minimal influence, allowing the network to slowly unlearn $\mathcal{D}_u$. 
Even near the end of training, when $\alpha^t$ is close to $\alpha_{\max}$, the relative change in the class-wise target probabilities, in between iterations, is small, ensuring that parameter updates due to unlearning are never aggressive. 
On the contrary, the increase in $\lambda_l^t$ forces the model weights to move closer to the original parameters thus allowing the network to recover performance on $\mathcal{D}_r$. 
This synergy between $\alpha^t$ and $\lambda_l^t$ ensures that the model achieves effective unlearning without compromising its overall utility.


\begin{figure*}[t]
    \centering
    \setlength\tabcolsep{0.5pt}
    \begin{tabular}{cccccc}
        \subfloat[]{\includegraphics[width=0.165\textwidth,height=0.14\textwidth]{uai2025-template/All Required Images/Decision Boundaries/Class_Moons/ground_truth.png}\label{fig:d1_gt}} &
        \subfloat[]{\includegraphics[width=0.165\textwidth,height=0.14\textwidth]{uai2025-template/All Required Images/Decision Boundaries/Class_Moons/train_dec_bound.png}\label{fig:d1_t_db}} & \subfloat[]{\includegraphics[width=0.165\textwidth,height=0.14\textwidth]{uai2025-template/All Required Images/Decision Boundaries/Class_Moons/retrain_dec_bound.png}\label{fig:d1_cl_ret}} & 
        \subfloat[]{\includegraphics[width=0.165\textwidth,height=0.14\textwidth]{uai2025-template/All Required Images/Decision Boundaries/Class_Moons/unlearn_dec_bound.png}\label{fig:d1_cl_unl}} & \subfloat[]{\includegraphics[width=0.165\textwidth,height=0.14\textwidth]{uai2025-template/All Required Images/Decision Boundaries/Sample_Moons/retrain_dec_bound.png}\label{fig:d1_ss_ret}} &
        \subfloat[]{\includegraphics[width=0.165\textwidth,height=0.14\textwidth]{uai2025-template/All Required Images/Decision Boundaries/Sample_Moons/unlearn_dec_bound.png}\label{fig:d1_ss_unl}}\\
        \subfloat[]{\includegraphics[width=0.165\textwidth,height=0.14\textwidth]{uai2025-template/All Required Images/Decision Boundaries/SubClass_Blobs/ground_truth.png}\label{fig:d2_gt}} &
        \subfloat[]{\includegraphics[width=0.165\textwidth,height=0.14\textwidth]{uai2025-template/All Required Images/Decision Boundaries/SubClass_Blobs/train_dec_bound.png}\label{fig:d2_t_db}} & \subfloat[]{\includegraphics[width=0.165\textwidth,height=0.14\textwidth]{uai2025-template/All Required Images/Decision Boundaries/SubClass_Blobs/retrain_dec_bound.png}\label{fig:d2_sc_ret}} &
        \subfloat[]{\includegraphics[width=0.165\textwidth,height=0.14\textwidth]{uai2025-template/All Required Images/Decision Boundaries/SubClass_Blobs/unlearn_dec_bound.png}\label{fig:d2_sc_unl}} & \subfloat[]{\includegraphics[width=0.165\textwidth,height=0.14\textwidth]{uai2025-template/All Required Images/Decision Boundaries/Sample_Blobs/retrain_dec_bound.png}\label{fig:d2_ss_ret}} &
        \subfloat[]{\includegraphics[width=0.165\textwidth,height=0.14\textwidth]{uai2025-template/All Required Images/Decision Boundaries/Sample_Blobs/unlearn_dec_bound.png}\label{fig:d2_ss_unl}}\\
    \end{tabular}
    \caption{The non-linear synthetic datasets generated for the controlled simulations and their corresponding decision boundaries across multiple tasks. 
    Fig. \ref{fig:d1_gt}, is the Moons dataset and Fig. \ref{fig:d2_gt} is the Blobs dataset.
    Figs. \ref{fig:d1_t_db} and \ref{fig:d2_t_db} are the visualization of corresponding decision boundaries. 
    Figs. \ref{fig:d1_cl_ret} and \ref{fig:d1_cl_unl} are the retrained and unlearned models' decision boundaries, respectively, for class-wise forgetting on the Moons dataset.
    Figs. \ref{fig:d1_ss_ret} and \ref{fig:d1_ss_unl} are the retrained and unlearned models' decision boundaries, respectively, for sample-wise forgetting on the Moons dataset.
    Figs. \ref{fig:d2_sc_ret} and \ref{fig:d2_sc_unl} are the retrained and unlearned models' decision boundaries, respectively, for subclass-wise forgetting on the Blobs dataset.
    Figs. \ref{fig:d2_ss_ret} and \ref{fig:d2_ss_unl} are the retrained and unlearned models' decision boundaries, respectively, for sample-wise forgetting on the Blobs dataset.}
    \label{fig:simulation_ground_truth}
\end{figure*}



\section{Controlled Simulations on Synthetic Data}
Existing metrics for machine unlearning offer valuable insights into post-forgetting performance but fail to fully capture how unlearning reshapes a model's decision-making behaviour. 
Since a model's classification performance fundamentally depends on its decision boundary, analyzing it directly reveals the impact of unlearning on generalization. 
Thus, we conduct controlled experiments on two synthetic datasets with known ground truths, applying SALSA under sample-wise, subclass-wise, and class-wise unlearning. 
This setup allows us to precisely evaluate how unlearning transforms the model's decision boundary, providing a deeper understanding of its generalization dynamics.


\subsection{Dataset and Models}
We use two non-linear synthetic datasets, Moons \cite{moons} and Blobs \cite{blobs}, to evaluate SALSA across different unlearning paradigms.
The Moons dataset comprises three classes, each with $5000$ training and $500$ test samples. 
A three-layer MLP, trained to convergence, learns the decision boundary shown in Fig. \ref{fig:d1_t_db}. 
This dataset is used for class-wise and sample-wise unlearning.
The Blobs dataset includes two classes, each with two distinct subclasses, reflecting hierarchical structures in real-world data. 
It contains $7500$ training and $500$ test samples per class. 
A three-layer MLP captures the decision boundary (Fig. \ref{fig:d2_t_db}). 
This dataset is used for subclass-wise and sample-wise unlearning. 
 

\subsection{Implementation Details}
On the Moons dataset, class $3$ is chosen for class-wise unlearning while the smaller sub-class of class $1$ is chosen for subclass unlearning on the Blobs dataset. 
For sample-wise unlearning, we unlearn a random $10\%$ subset of examples from both datasets. 
We report all results by averaging over three different runs for each experiment.
Further details for reproducibility are provided in Section \ref{sec:hyperparams} of the Appendix.

\begin{table}[!h]
    \centering
    \small
    \setlength\tabcolsep{3pt}
    \caption{Simulation results on the Moons and Blobs datasets for sample-wise, subclass-wise and class-wise unlearning. 
    Results have been averaged over three different runs.}
    \label{tab:simulation_results}
    \begin{tabular}{cccccccc}
        \toprule
        Dataset & Task & Method & $\text{FA}_\text{tr}$ & $\text{RA}_\text{tr}$ & $\text{FA}_\text{te}$ & $\text{RA}_\text{te}$ & TA\\
        \toprule
        \multirow{4}{*}{Moons} & \multirow{2}{*}{\shortstack{Class\\ wise}} & RT & $00.0$ & $100.0$ & $00.0$ & $100.0$ & -\\
         &  & SALSA & $00.0$ & $98.8$ & $00.6$ & $98.4$ & - \\
         \cline{2-8}\noalign{\vspace{3pt}}
         &  \multirow{2}{*}{\shortstack{Sample\\ wise}} & RT & $100.0$ & $100.0$ & - & - & $100.0$\\
         &  & SALSA & $100.0$ & $100.0$ & - & - & $100.0$\\
        \midrule
        \multirow{4}{*}{Blobs} & \multirow{2}{*}{\shortstack{Subclass\\ wise}} & RT & $00.0$ & $100.0$ & $00.0$ & $100.0$ & -\\
         &  & SALSA & $00.0$ & $97.6$ & $00.0$ & $97.5$ & - \\
         \cline{2-8}\noalign{\vspace{3pt}}
         &  \multirow{2}{*}{\shortstack{Sample\\ wise}} & RT & $98.5$ & $100.0$ & - & - & $98.0$\\
         &  & SALSA & $96.9$ & $95.8$ & - & - & $95.3$\\
        \bottomrule
    \end{tabular}
\end{table}

\subsection{Results}
Across all scenarios, SALSA consistently matches retrained models in forgetting targeted information while preserving generalization (see Table \ref{tab:simulation_results} for fine-grained results).

\textbf{Class-wise Unlearning:} 
The ideal decision boundary learned by the retrained model is visualized in Fig. \ref{fig:d1_cl_ret}. 
SALSA closely replicates this boundary (Fig. \ref{fig:d1_cl_unl}), effectively forgetting the class while maintaining overall generalization. 
The Average Discrepancy is $0.85\%$.

\textbf{Subclass-wise Unlearning:} 
The retrained boundary is visualized in Fig. \ref{fig:d2_sc_ret}. 
SALSA reproduces this boundary (Fig. \ref{fig:d2_sc_unl}) with a low Average Discrepancy of $1.23\%$.

\textbf{Sample-wise Unlearning:} 
Randomly removing $10\%$ of training samples minimally affects the model’s decision boundary. 
SALSA produces boundaries (Figs. \ref{fig:d1_ss_unl}, \ref{fig:d2_ss_unl}) nearly identical to those of the retrained models (Figs. \ref{fig:d1_ss_ret}, \ref{fig:d2_ss_ret}), with Average Discrepancies of $0\%$ (Moons) and $2.8\%$ (Blobs).


\section{Experiments on Standard Data}
We assess the performance of our proposed approach on benchmark datasets, focusing on class-wise and sample-wise unlearning. 
% Comparisons are made against state-of-the-art techniques using multiple evaluation metrics.

\subsection{Dataset and Models}
Our experiments leverage three widely used image classification datasets with varying sizes, resolutions, and class distributions.
\textbf{CIFAR10} \citep{krizhevsky2009learning} comprises $50,000$ training images and $10,000$ test images distributed uniformly over $10$ classes.
We use the ResNet18 model \citep{DBLP:conf/cvpr/HeZRS16} for training on this dataset.
The Street View House Numbers or \textbf{SVHN} \citep{netzer2011reading} is a real-world dataset with $73,257$ training images and $26,032$ test images across $10$ classes.
ResNet18 is employed for training on this dataset.
The \textbf{TinyImageNet} \citep{le2015tiny} dataset consists of $200$ classes, each comprising $500$ training images and $50$ test images, totaling $100,000$ training samples and $10,000$ test samples.
Swin-T transformer \citep{DBLP:conf/cvpr/Liu0LYXWN000WG22} is chosen for training on this dataset.

\subsection{Baselines}
We regard the retrained model (RT) as the oracle of approximate machine unlearning and compare SALSA against five state-of-the-art machine unlearning methods. 
These include SFTC \citep{10.1145/3655693.3655697}, SalUn \citep{DBLP:conf/iclr/FanLZ0W024}, FEMU \citep{10113700}, LAF \citep{DBLP:conf/iclr/000100BCX24} and SFRon \citep{DBLP:journals/corr/abs-2409-19732}.
We also consider Fine-tuning (FT), a strong baseline, where $f_\theta$ is fine-tuned on $\mathcal{D}_r$. 
This is akin to catastrophic forgetting where fine-tuning without $\mathcal{D}_u$, may lead to unlearning. 

\subsection{Implementation Details}\label{sec:implementation_details}
According to Eqn. \ref{eqn:cosine_annealing_lambda}, determining $\lambda_{l_{\min}}$ and $\lambda_{l_{\max}}$ is essential to balance unlearning across network layers effectively. 
Recognizing that the initial layers of a neural network play a more critical role in learning \citep{DBLP:journals/jmlr/ZhangBS22}, we assign higher weights to these layers during unlearning which is achieved using an linear function.
\begin{equation}
\begin{split}
    \lambda_{l_{\min}} &= m\times \left(L-l+1\right) + c~~~~~~~\forall l\in\{1,\cdots,L\}\\
    \lambda_{l_{\max}} &= \lambda_{l_{\min}} + \gamma 
\end{split}
\end{equation}
where $m$ and $c$ are the scale and shift hyper-parameters that dictate how $\lambda_{l_{\min}}$ varies across the network depth. 
Meanwhile, $\gamma$ determines how much $\lambda_{l}^t$ increases as training progresses. 
We choose to forget the best-performing class in each dataset (classes $1,4$ and $23$ for CIFAR10, SVHN and TinyImageNet, respectively) as it typically causes the largest drop in performance, challenging all unlearning algorithms to match the performance of the oracle.
Following \citep{DBLP:conf/iclr/000100BCX24}, we forget $40\%$ random subset of samples from the last $50\%$ of classes for sample-wise unlearning in each dataset.
On the CIFAR10 and SVHN datasets, the ResNet18 model is trained from scratch while we fine-tune an ImageNet \citep{ILSVRC15} pre-trained Swin-T transformer on TinyImageNet which closely follows MLaaS practice of fine-tuning strong foundation models on custom datasets. 
We report all results by averaging over three different runs for each experiment.
Further implementation details for reproducibility are provided in Section \ref{sec:hyperparams} of the Appendix.


\begin{table*}[h]
    \centering
    \setlength\tabcolsep{3.4pt}
    \caption{Combined class-wise and sample-wise unlearning performance comparison.
    For class-wise unlearning, the class with the highest training accuracy is unlearned.
    For sample-wise unlearning, a random subset of 10\% examples is unlearned.
    Values closest to RT under each metric are bolded, with the second best underlined.
    For MIA, scores nearest to 50\% are bolded, with the second nearest underlined.
    For Iters, the method with the fewest iterations is bolded, with the second best underlined.
    Results are averaged over three runs.}
    \label{tab:combined_unlearning}
    \begin{tabular}{clcccccccccccccc}
        \toprule
        & & \multicolumn{7}{c}{Class-wise Unlearning} & & \multicolumn{6}{c}{Sample-wise Unlearning}\\
        \cmidrule(lr){3-9} \cmidrule(lr){11-16}
        Dataset & Methods & $\text{FA}_\text{tr}$ & $\text{RA}_\text{tr}$ & $\text{FA}_\text{te}$ & $\text{RA}_\text{te}$ & Avg. D & MIA & \multicolumn{1}{c}{Iters} & & $\text{FA}_\text{tr}$ & $\text{RA}_\text{tr}$ & TA & Avg. D & MIA & \multicolumn{1}{c}{Iters}\\
        \toprule
        \multirow{8}{*}{\shortstack{CIFAR10~~}} & RT & $00.0$ & $100$ & $00.0$ & $95.6$ & $-$ & $46.6$ & $70$K & & $93.3$ & $100$ & $92.6$ & $-$ & $60.4$ & $56$K\\
        \cline{2-15}\noalign{\vspace{3pt}}
         & FT & $\mathbf{00.0}$ & \underline{$92.7$} & $\mathbf{00.0}$ & $88.5$ & \underline{$03.6$} & $60.6$ & $14$K & & \underline{$99.8$} & \underline{$99.9$} & \underline{$95.0$} & \underline{$3.0$} & $64.2$ & $11$K\\
         & SFTC & $\mathbf{00.0}$ & \underline{$92.7$} & $\mathbf{00.0}$ & $88.6$ & \underline{$03.6$} & $59.0$ & $16$K & & $\mathbf{100}$ & $\mathbf{100}$ & $\mathbf{95.4}$ & $3.2$ & $66.4$ & $15$K\\
         & SalUn & $\mathbf{00.0}$ & $85.5$ & $\mathbf{00.0}$ & $82.8$ & $06.8$ & $63.1$ & $4$K & & $\mathbf{100}$ & $\mathbf{100}$ & $95.3$ & $3.1$ & $66.2$ & $3$K\\
         & LAF & $\mathbf{00.0}$ & $60.2$ & $\mathbf{00.0}$ & $\mathbf{93.3}$ & $10.5$ & $62.4$ & $5$K & & $94.1$ & \underline{$99.9$} & $93.9$ & $0.7$ & \underline{$55.6$} & $109$K\\
         & SFRon & $\mathbf{00.0}$ & $74.5$ & $\mathbf{00.0}$ & $73.2$ & $12.0$ & $62.0$ & \underline{$2$K} & & $\mathbf{100}$ & \underline{$99.9$} & $95.2$ & $3.1$ & $65.5$ & \underline{$4$K}\\
         & SALSA & \underline{$00.3$} & $\mathbf{98.7}$ & \underline{$00.5$} & \underline{$92.0$} & $\mathbf{01.4}$ & $\mathbf{50.2}$ & $\mathbf{0.1}$K & & $96.9$ & $97.9$ & $\mathbf{95.4}$ & $\mathbf{2.9}$ & $\mathbf{51.1}$ & $\mathbf{0.02}$K\\
        \midrule
        \multirow{8}{*}{SVHN} & RT & $00.0$ & $99.4$ & $00.0$ & $96.7$ & $-$ & $62.6$ & $70$K & & $92.9$ & $99.9$ & $95.8$ & $-$ & $54.8$ & $664$K\\
        \cline{2-15}\noalign{\vspace{3pt}}
         & FT & $\mathbf{00.0}$ & \underline{$96.0$} & $\mathbf{00.0}$ & \underline{$95.6$} & \underline{$01.1$} & $56.7$ & $374$K & & $97.8$ & $99.4$ & $\mathbf{96.4}$ & $\mathbf{2.0}$ & \underline{$55.3$} & $8$K\\
         & SFTC & $\mathbf{00.0}$ & $95.7$ & $\mathbf{00.0}$ & $95.4$ & $01.3$ & $59.5$ & $20$K & & $\mathbf{99.8}$ & $\mathbf{99.8}$ & $\mathbf{96.4}$ & $2.6$ & $55.6$ & $33$K\\
         & SalUn & $\mathbf{00.0}$ & $93.3$ & $\mathbf{00.0}$ & $93.0$ & $02.4$ & $63.2$ & $22$K & & \underline{$99.7$} & \underline{$99.7$} & $\mathbf{96.4}$ & $2.5$ & $55.7$ & $33$K\\
         & LAF & $04.1$ & $65.2$ & $01.3$ & $\mathbf{96.4}$ & $10.0$ & $64.5$ & $595$K & & $00.0$ & $98.7$ & $60.2$ & $43.2$ & $33.5$ & $595$K\\
         & SFRon & $\mathbf{00.0}$ & $90.7$ & $\mathbf{00.0}$ & $90.4$ & $03.8$ & $\mathbf{53.1}$ & $2$K & & $97.8$ & $99.1$ & \underline{$96.2$} & $\mathbf{2.0}$ & $55.7$ & \underline{$2$K}\\
         & SALSA & \underline{$00.7$} & $\mathbf{99.6}$ & \underline{$00.7$} & $95.3$ & $\mathbf{00.7}$ & \underline{$45.7$} & $\mathbf{0.2}$K & & $97.1$ & $98.1$ & $91.2$ & \underline{$3.5$} & $\mathbf{51.1}$ & $\mathbf{0.03}$K\\
        \midrule
        \multirow{8}{*}{\shortstack{Tiny\\Image\\Net}} & RT & $00.0$ & $92.0$ & $00.0$ & $85.3$ & $-$ & $63.4$ & $78$K & & $85.81$ & $97.8$ & $85.81$ & $-$ & $58.06$ & $78$K\\
        \cline{2-15}\noalign{\vspace{3pt}}
         & FT & $82.8$ & $\mathbf{92.3}$ & \underline{$84.0$} & $84.4$ & $42.0$ & \underline{$60.0$} & $4$K & & $67.8$ & \underline{$77.6$} & $69.3$ & $18.0$ & \underline{$55.3$} & $3$K\\
         & SFTC & $98.2$ & $91.4$ & $84.0$ & $83.4$ & $49.8$ & $63.2$ & $4$K & & \underline{$76.6$} & \underline{$77.6$} & $70.1$ & \underline{$14.8$} & $56.9$ & $4$K\\
         & SalUn & $97.4$ & \underline{$91.5$} & $98.0$ & $84.0$ & $49.3$ & $62.3$ & $4$K & & $75.2$ & $76.8$ & \underline{$70.4$} & $15.4$ & $56.9$ & $4$K\\
         & LAF & $-$ & $-$ & $-$ & $-$ & $-$ & $-$ & $-$ & & $-$ & $-$ & $-$ & $-$ & $-$ & $-$\\
         & SFRon & $\mathbf{00.0}$ & $91.4$ & $\mathbf{00.0}$ & \underline{$84.7$} & \underline{$00.3$} & $39.3$ & $1$K & & $48.3$ & $65.7$ & $59.3$ & $31.8$ & $55.8$ & \underline{$1$K}\\
         & SALSA & \underline{$00.2$} & \underline{$91.5$} & $\mathbf{00.0}$ & $\mathbf{85.3}$ & $\mathbf{00.2}$ & $\mathbf{47.8}$ & $\mathbf{0.02}$K & & $\mathbf{89.7}$ & $\mathbf{91.2}$ & $\mathbf{84.9}$ & $\mathbf{3.6}$ & $\mathbf{49.1}$ & $\mathbf{0.08}$K\\
        \bottomrule
    \end{tabular}
\end{table*}



\subsection{Results}
Under the metrics, $\text{FA}_{\text{tr}},\text{RA}_{\text{tr}},\text{FA}_{\text{te}},\text{RA}_{\text{te}}$ and TA, the algorithm with the smallest discrepancy from the oracle (RT) is considered the best. 
Additionally, for the MIA and Iters metrics, the method achieving scores closest to $50\%$ and $1$, respectively, is considered optimal. 

\textbf{Class-wise and Sample-wise Unlearning}: 
Table \ref{tab:combined_unlearning} showcases both class-wise and sample-wise unlearning performance across all datasets.



For \textbf{class-wise unlearning}, SALSA consistently outperforms all considered baselines across each dataset, achieving the lowest average discrepancy ($<1.5\%$) from exact unlearning. 
This near-perfect approximation demonstrates that SALSA effectively emulates retraining without access to the original data, ensuring the preservation of model generalization. 
Moreover, SALSA achieves remarkable efficiency, being up to $25\times$ faster than the next fastest baseline, SFRon.
Note that, due to LAF's high resource requirements, we were unable to evaluate it on TinyImageNet.
In terms of privacy preservation, SALSA's MIA scores are consistently close to $50\%$, aligning with the gold standard for privacy. 
For instance, on SVHN and TinyImageNet, even RT exhibits slight membership leakage, with MIA scores deviating from $50\%$. 
In contrast, SALSA's near-ideal MIA scores showcase robust privacy preservation while maintaining high utility.



For \textbf{sample-wise unlearning}, SALSA outperforms all considered baselines on CIFAR10 and TinyImageNet, achieving the lowest average discrepancy ($<4\%$) from exact unlearning. 
On SVHN, SFRon achieves the lowest average discrepancy. 
However, our approach is $84\times$ faster, on average, than SFRon, the fastest considered baseline.
Moreover, SALSA's MIA scores are near $50\%$ on all datasets, aligning with the gold standard for privacy. 
The combined results demonstrate SALSA's superior efficiency across both unlearning scenarios while maintaining competitive performance metrics and robust privacy preservation.


\begin{figure}[t]
    \centering
    \small
    \addtolength{\tabcolsep}{-0.4em}
    \begin{tabular}{cc}  % 3 columns, m{2cm} for the first column to center text vertically
    \toprule
        Forgetting class & Non-forgetting class\\
    \toprule
        \subfloat[]{\includegraphics[width=0.225\textwidth]{uai2025-template/All Required Images/Face_Unlearn/63_gt.png}\label{fig:51_ground_truth}} &
        \subfloat[]{\includegraphics[width=0.225\textwidth]{uai2025-template/All Required Images/Face_Unlearn/66_gt.png}\label{fig:0_ground_truth}} \\

        \subfloat[]{\includegraphics[width=0.225\textwidth]{uai2025-template/All Required Images/Face_Unlearn/63_not_unlearned.png}\label{fig:51_before_unlearning}} &
        \subfloat[]{\includegraphics[width=0.225\textwidth]{uai2025-template/All Required Images/Face_Unlearn/66_not_unlearned.png}\label{fig:0_before_unlearning}} \\

        \subfloat[]{\includegraphics[width=0.225\textwidth]{uai2025-template/All Required Images/Face_Unlearn/63_unlearned.png}\label{fig:51_after_unlearning}} &
        \subfloat[]{\includegraphics[width=0.225\textwidth]{uai2025-template/All Required Images/Face_Unlearn/66_unlearned.png}\label{fig:0_after_unlearning}} \\
    \bottomrule
    \end{tabular}
    \caption{The black box model inversion attack variant of MIRROR.
    Figs. \ref{fig:51_ground_truth} and \ref{fig:0_ground_truth} are images taken from two visually similar classes in the VGGFace2 dataset and represent the ground truth.
    Figs. \ref{fig:51_before_unlearning} and \ref{fig:0_before_unlearning} are randomly selected batches of output generated by the inversion attack on the pre-trained network for each class before unlearning. 
    Figs. \ref{fig:51_after_unlearning} and \ref{fig:0_after_unlearning} are randomly selected batches of output generated by the same attack model on the unlearned network. }
    \label{fig:model_inversion}
\end{figure}

\section{Model Inversion Attack}

\subsection{Setup}
To assess the robustness of SALSA, we evaluate its effectiveness against MIRROR \citep{DBLP:conf/ndss/TaoXLSAX0022}, a state-of-the-art model inversion attack. 
We adopt the experimental setup of MIRROR, where a StyleGAN \citep{DBLP:conf/cvpr/KarrasLA19} pre-trained on the CelebA dataset \citep{DBLP:conf/iccv/LiuLWT15} serves as the generator for the attack. 
An InceptionResNet-v1 \citep{szegedy2017inception} pre-trained on the VGGFace2 dataset \citep{DBLP:conf/fgr/CaoSXPZ18} is targeted in the attack. 
The StyleGAN iteratively optimizes its generated images using a genetic algorithm \citep{bhandari1996genetic}, aiming to infer private data from the model in a black-box setting.
The hyperparameters for unlearning are detailed in Section \ref{sec:hyperparams} of the Appendix.

\subsection{Results}
To demonstrate the efficacy of SALSA against state-of-the-art black box model inversion attack, we select two visually similar classes from the VGGFace2 dataset (Figs. \ref{fig:51_ground_truth} and \ref{fig:0_ground_truth}).
Without unlearning, MIRROR reconstructs facial and hair features of private training samples with striking fidelity, as shown in Figs. \ref{fig:51_before_unlearning} and \ref{fig:0_before_unlearning}. 
However, after unlearning with SALSA, MIRROR struggles to recover even rudimentary information about the forgotten classes, as demonstrated in Fig. \ref{fig:51_after_unlearning}.
A critical goal of unlearning is to ensure that data from retained classes remains unaffected. 
Fig. \ref{fig:0_after_unlearning} confirms this, as the images generated by MIRROR for the retained class are nearly indistinguishable from the originals.
Our results highlight SALSA's robustness against model inversion attacks, successfully erasing private data while maintaining the integrity of retained classes.


\section{Malicious Unlearning}
\subsection{Setup}
Malicious Unlearning (Over-unlearning) simulates a black-box attack in MLaaS, where the server has no knowledge of the unlearning request.
We specifically implement subset over-unlearning II as proposed by \cite{DBLP:conf/ndss/Hu0CZ00ZX24}.
For each dataset, we shift $50\%$ of the best-performing class' samples just across the model’s decision boundary, pushing them toward the second-highest-performing class.
These adversarially modified samples form the unlearning request.

\subsection{Results}
\cite{DBLP:conf/ndss/Hu0CZ00ZX24} showed that over-unlearning can significantly degrade test accuracy (TA).
However, SALSA remains remarkably robust, experiencing nearly no drop in performance on CIFAR10 and TinyImageNet (Table \ref{tab:over_unlearning}).
On SVHN, however, TA drops by over $10\%$.
We attribute this to the high visual similarity between the top two performing classes, digits $1$ and $4$, making decision boundaries more susceptible to perturbations.
These results highlight SALSA’s resilience against adversarial unlearning in most MLaaS settings.

\begin{table}[t]
    \centering
    % \setlength\tabcolsep{1.8pt}
    \caption{Test accuracy (TA) for Malicious Unlearning (over-unlearning) in contrast to normal (benign) unlearning. }
    \label{tab:over_unlearning}
    \begin{tabular}{lcc}
    \toprule
        Dataset & Benign Unlearn & Malicious Unlearn\\
    \toprule
        CIFAR10 & 88.57 & 88.42 \\
        SVHN & 86.71 & 74.06 \\
        TinyImageNet & 86.64 & 86.71 \\
    \bottomrule
    \end{tabular}
\end{table}


\section{Analysis Study}
We investigate different aspects of SALSA through the lens of class-wise unlearning.

\textbf{Visualizing the Unlearning:}
We leverage GradCAM \citep{selvaraju2017grad} to visualize how the Swin-T transformer attends to images from both unlearned and retained classes in TinyImageNet.
Fig. \ref{fig:attention_map} shows activation maps before and after applying SALSA, highlighting the forget set (class $23$) and three random retained samples.
Post-unlearning, the model no longer focuses on key regions, indicating the effective removal of class-specific information.
This visualization provides intuitive evidence of SALSA’s ability to successfully unlearn without compromising generalization.

\begin{figure}[h]
    \centering
    \small
    \addtolength{\tabcolsep}{-0.4em}
    \begin{tabular}{cc}  % 3 columns, m{2cm} for the first column to center text vertically
    \toprule
        Forgetting class & Non-forgetting class\\
    \toprule
        \subfloat[]{\includegraphics[width=0.23\textwidth]{uai2025-template/All Required Images/GradCamMaps/unlearn_orig.png}\label{fig:ground_truth_att_unlearn}} &
        \subfloat[]{\includegraphics[width=0.23\textwidth]{uai2025-template/All Required Images/GradCamMaps/retain_orig.png}\label{fig:ground_truth_attn_orig}}\\

        \subfloat[]{\includegraphics[width=0.23\textwidth]{uai2025-template/All Required Images/GradCamMaps/unlearn_no_unlearn.png}\label{fig:before_unlearning_att_unlearn}} &
        \subfloat[]{\includegraphics[width=0.23\textwidth]{uai2025-template/All Required Images/GradCamMaps/retain_no_unlearn.png}\label{fig:before_unlearning_att_orig}} \\

        \subfloat[]{\includegraphics[width=0.23\textwidth]{uai2025-template/All Required Images/GradCamMaps/unlearn_unlearn.png}\label{fig:after_unlearning_att_unlearn}} &
        \subfloat[]{\includegraphics[width=0.23\textwidth]{uai2025-template/All Required Images/GradCamMaps/retain_unlearn.png}\label{fig:after_unlearning_att_orig}} \\
    \bottomrule
    \end{tabular}
    \caption{Swin-T transformer activation maps on TinyImageNet.
    Fig. \ref{fig:ground_truth_att_unlearn} shows a random batch from the forget set, while Fig. \ref{fig:ground_truth_attn_orig} shows random images from the retained set.
    Figs. \ref{fig:before_unlearning_att_unlearn} and \ref{fig:before_unlearning_att_orig} depict activation maps before unlearning, whereas Figs. \ref{fig:after_unlearning_att_unlearn} and \ref{fig:after_unlearning_att_orig} demonstrate the same map but after unlearning.}
    \label{fig:attention_map}
\end{figure}

\textbf{Effect of varying $\alpha_{\min}$ and $\alpha_{\max}$:}
Fixing $\alpha_{\max}=1$, we expect $\text{FA}_\text{te}$ to start low and approach $0$ as $\alpha_{\min}$ increases from $0$ to $1$. 
This is because smaller $\alpha_{\min}$ values delay most of the probability mass redistribution until the final stages of unlearning.
Similarly, fixing $\alpha_{\min}=0$ and varying $\alpha_{\max}$ in $[0,1]$ should amplify this effect, as class-wise unlearning demands $\alpha_{\max}\to 1$.
Fig. \ref{fig:alpha_graphs} confirms this trend for ResNet18 on CIFAR10.
Notably, SALSA remains robust, i.e., $\text{RA}_\text{te}$ remains stable while $\text{FA}_\text{te}$ steadily declines.

\begin{figure}[t]
    \centering
    \begin{minipage}{0.41\textwidth} % Adjust width for single column
        \centering
        \includegraphics[width=\textwidth]{uai2025-template/All Required Images/Graphs/cunl_alpha_fa.png}
        \subcaption{Change in forgetting accuracy on the test set, $(\text{FA}_\text{te})$}
    \end{minipage}
    
    \vspace{0.5cm} % Add vertical space between images
    
    \begin{minipage}{0.41\textwidth}
        \centering
        \includegraphics[width=\textwidth]{uai2025-template/All Required Images/Graphs/cunl_alpha_ra.png}
        \subcaption{Change in retained accuracy on the test set, $(\text{RA}_\text{te})$}
    \end{minipage}
    
    \caption{Accuracy trends for retain and forget classes on test sets with varying alpha values.}
    \label{fig:alpha_graphs}
\end{figure}


\textbf{Effect of varying $m,c,\gamma$:} 
Increasing $m$ amplifies weight penalization near the input layers, while higher $c$ enforces stronger regularization across all layers. 
Larger $\gamma$ further intensifies weight penalization as unlearning progresses. 
In all cases, we expect the amount of unlearning to reduce, i.e.,  $\text{FA}_{\text{te}}$ should rise due to increased regularization.  
Figs. \ref{fig:lambda_slope}, \ref{fig:lambda_shift}, and \ref{fig:lambda_gamma} validate this trend for ResNet18 on CIFAR10.
Once again, SALSA remains robust to changes in $\text{RA}_\text{te}$ with respect to the variation in the regularization hyperparameters.

\begin{figure}[t]
    \centering
    \includegraphics[width=0.41\textwidth]{uai2025-template/All Required Images/Graphs/cunl_alpha_slope.png}
    \caption{Accuracy variation with the slope $(m)$, showing its effect on retention and forgetting.}
    \label{fig:lambda_slope}
\end{figure}

\begin{figure}[h]
    \centering
    \includegraphics[width=0.41\textwidth]{uai2025-template/All Required Images/Graphs/cunl_alpha_shift.png}
    \caption{Accuracy dependence on the shift $(c)$, illustrating how offset adjustments influence performance.}
    \label{fig:lambda_shift}
\end{figure}

\begin{figure}[!h]
    \centering
    \includegraphics[width=0.41\textwidth]{uai2025-template/All Required Images/Graphs/cunl_alpha_gamma.png}
    \caption{Accuracy trends with respect to $\gamma$, highlighting its role in modulating class retention and forgetting.}
    \label{fig:lambda_gamma}
\end{figure}



\section{Conclusion}
In this work, we present SALSA, a Scalable, Adaptive and Label-Agnostic Scalable Algorithm for machine unlearning tailored for classification tasks in the MLaaS scenario. 
SALSA redistributes model output probabilities for samples that need to be forgotten. 
Thereafter, it employs a novel loss function that minimizes the divergence between predicted and redistributed probabilities while maintaining minimum distance from model initialization.
This ensures simultaneous unlearning and generalization.
Our approach is label independent and requires only the samples to be forgotten, for unlearning which makes SALSA exceptionally fast, achieving at least $25\times$ and $84\times$ faster class-wise and sample-wise unlearning, respectively, than the fastest considered baseline. 
Extensive experiments on benchmark and synthetic datasets show that SALSA achieves the closest approximation to exact unlearning.
Through rigorous evaluations against state-of-the-art black box attacks, we demonstrate SALSA's resilience to privacy and security threats.
% Our strategy delivers the dual benefits of performance and privacy with minimal trade-offs while being at least $25\times$ faster than its competitors. 
By balancing utility and privacy at scale, SALSA marks a significant step forward in practical, privacy-preserving unlearning for MLaaS and sensitive data management.

% References
\bibliography{bibliography}

\newpage

\onecolumn

\title{SALSA: A Secure, Adaptive and Label-Agnostic Scalable Algorithm for Machine Unlearning\\(Supplementary Material)}
\maketitle

\appendix

\section{Choice of hyper-parameters}\label{sec:hyperparams}
\subsection{Unlearning}
\begin{table}[h]
\centering
\caption{Hyperparameters for different datasets and model combinations for both class-wise and sample-wise unlearning.}\label{tab:unlearning_hyperparams}
\begin{tabular}{lllcclccccl}
\toprule
Dataset & Model & Task & Epochs & Batch Size & \multicolumn{1}{c}{lr} & $\alpha_\text{min}$ & $\alpha_\text{max}$ & slope (m) & shift (c) & $\gamma$ \\
\toprule
\multirow{2}{*}{CIFAR10} & \multirow{2}{*}{ResNet18} & Class & 10 & 512 & 2.0e-4 & 0.1 & 0.9 & 0.1 & 1 & 0.4 \\
& & Sample & 1 & 512 & 1.0e-5 & 0.1 & 0.3 & 0.01 & 0.1 & 0.2 \\
\midrule
\multirow{2}{*}{SVHN} & \multirow{2}{*}{ResNet18} & Class & 10 & 512 & 2.0e-4 & 0.1 & 0.9 & 0.1 & 1 & 0.4 \\
& & Sample & 1 & 512 & 1.0e-5 & 0.1 & 0.3 & 0.01 & 0.1 & 0.2 \\
\midrule
\multirow{2}{*}{TinyImageNet} & \multirow{2}{*}{SwinT} & Class & 10 & 32 & 1.0e-3 & 1-1e-6 & 1 & 4.0e-3 & 1.0e-2 & 3.0e-2 \\
& & Sample & 1 & 32 & 9.0e-4 & 0.3 & 0.6 & 5.0e-4 & 5.0e-3 & 1.0e-2 \\
\midrule
\multirow{2}{*}{Blobs} & \multirow{2}{*}{MLP} & Subset & 50 & 64 & 2.0e-4 & 0.3 & 1 & 0.09 & 0.05 & 0.04 \\
& & Sample & 20 & 64 & 2.0e-6 & 0.01 & 0.05 & 0.01 & 0.05 & 0.01 \\
\midrule
\multirow{2}{*}{Moons} & \multirow{2}{*}{MLP} & Class & 20 & 64 & 2.0e-4 & 0.3 & 1 & 0.07 & 0.01 & 0 \\
& & Sample & 20 & 64 & 2.0e-6 & 0.01 & 0.05 & 0.01 & 0.05 & 0.01 \\
\midrule
VGGFACE2 & \shortstack{Inception\\ ResnetV1} & Class & 10 & 16 & 1.0e-3 & 1-1e-8 & 1 & 1.0e-5 & 1.0e-4 & 1.0e-2 \\
\bottomrule
\end{tabular}
\end{table}

\subsection{Original Training}
\begin{table}[ht]
\centering
\caption{Hyperparameters for different dataset and model combinations.}
\label{tab:learning_hyperparams}
\begin{tabular}{ccccc}
\toprule
Dataset & Model & Epochs & Batch Size & lr \\
\toprule
CIFAR10 & ResNet18 & 200 & 128 & 0.1 \\
SVHN & ResNet18 & 200 & 128 & 0.1 \\
TinyImageNet & SwinT & 200 & 128 & 0.1 \\
Blobs & MLP & 75 & 64 & 0.01 \\
Moons & MLP & 50 & 64 & 0.01 \\
\bottomrule
\end{tabular}
\end{table}
\end{document}
