% \documentclass{uai2025} % for initial submission
\documentclass[accepted]{uai2025} % after acceptance, for a revised version; 
% also before submission to see how the non-anonymous paper would look like 
                        
%% There is a class option to choose the math font
% \documentclass[mathfont=ptmx]{uai2025} % ptmx math instead of Computer
                                         % Modern (has noticeable issues)
% \documentclass[mathfont=newtx]{uai2025} % newtx fonts (improves upon
                                          % ptmx; less tested, no support)
% NOTE: Only keep *one* line above as appropriate, as it will be replaced
%       automatically for papers to be published. Do not make any other
%       change above this note for an accepted version.

%% Choose your variant of English; be consistent
\usepackage[american]{babel}
% \usepackage[british]{babel}



%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams

% \documentclass[sigconf,anonymous,review]{acmart}
\usepackage{subcaption}
\usepackage[linesnumbered,ruled,vlined]{algorithm2e}
% \usepackage{algorithm2e}
\usepackage{algorithm}
\usepackage{algorithmic}
\usepackage{multirow} 
\usepackage{enumitem}
\renewcommand{\algorithmicrequire}{\textbf{Input:}} % Use Input in the format of Algorithm
\renewcommand{\algorithmicensure}{\textbf{Output:}} % Use Output in the format of Algorithm

%% Provided macros
% \smaller: Because the class footnote size is essentially LaTeX's \small,
%           redefining \footnotesize, we provide the original \footnotesize
%           using this macro.
%           (Use only sparingly, e.g., in drawings, as it is quite small.)

%% Self-defined macros
\newcommand{\swap}[3][-]{#3#1#2} % just an example

\title{\textsc{Vadis:} Investigating Inter-View Representation Biases for Multi-View Partial Multi-Label Learning}

% The standard author block has changed for UAI 2025 to provide
% more space for long author lists and allow for complex affiliations
%
% All author information is authomatically removed by the class for the
% anonymous submission version of your paper, so you can already add your
% information below.
%
% Add authors



\author[1]{\href{mailto:<wangjie2022@seu.edu.cn>?Subject=Your UAI 2025 paper}{Jie Wang}{}}
% \author[1]{Jie Wang}
\author[1]{Ning Xu}
\author[1]{Xin Geng}

% Add affiliations after the authors
\affil[1]{%
    School of Computer Science and Engineering, Southeast University, Nanjing, China
}

% \affil[3]{%
%     Another Affiliation\\
%     Address\\
%     …
%   }
  
  \begin{document}


\maketitle

\begin{abstract}
    Multi-view partial multi-label learning (MVPML) deals with training data where each example is represented by multiple feature vectors and associated with a set of candidate labels, only a subset of which are correct. The diverse representation biases present in different views complicate the annotation process in MVPML, leading to the inclusion of incorrect labels in the candidate label set. Existing methods typically merge features from different views to identify the correct labels in the training data without addressing the representation biases inherent in different views. In this paper, we propose a novel MVPML method called \textsc{Vadis}, which investigates view-aware representations for disambiguation and predictive model learning. Specifically, we exploit the global common representation shared by all views, aligning it with a local semantic similarity matrix to estimate ground-truth labels via a low-rank mapping matrix. Additionally, to identify incorrect labels, the view-specific inconsistent representation is recovered by leveraging the sparsity assumption. Experiments on real-world datasets validate the superiority of our approach over other state-of-the-art methods.
\end{abstract}

\begin{figure}
  \includegraphics[width=0.9\columnwidth]{example}
  \caption{An example multi-view partial multi-label scenario. The news webpage can be represented from different views such as text, audio, and video. Among the candidate label set given by the crowdsourced annotators, only ‘Argentina’, ‘FIFA World Cup’, and ‘France’ are correct.}
  \label{fig1}
\end{figure}

\section{Introduction}
Partial multi-label learning has gained significant research attention as a means of modeling objects with imprecise semantics \cite{xie2018partial}. In this paradigm, each example is represented by a single feature vector associated with a candidate label set, of which only a subset is deemed valid. In recent years, this framework has been widely employed in many real-world scenarios with inaccurate supervision \cite{xie2018partial, yu2018feature, sun2019partial, zhang2020partial}.

However, due to the intricate nature of real-world scenarios, objects often encompass descriptions from multiple perspectives, resulting in complex properties. Furthermore, the intricate representations stemming from these diverse viewpoints intensify the challenge of annotation, thereby elevating the likelihood of incorrect labeling. As illustrated in Figure \ref{fig1}, a news webpage can contain multiple views, such as text, audio, and video, each accompanied by numerous candidate labels contributed by crowdsourced annotators. Among these labels, only \textit{Argentina}, \textit{FIFA World Cup}, and \textit{France} are correct.

To deal with the task under these circumstances, multi-view partial multi-label learning has emerged \cite{chen2020multi}, where each example is represented by multiple feature vectors associated with a candidate label set, of which only a subset is correct. Several works have been proposed to address the MVPML problem. One previous attempt \cite{chen2020multi} induces a predictive model by simply fusing the similarity matrices over each view, followed by label propagation to disambiguate the candidate label set. Another method \cite{wu2020feature} leverages the aggregated manifold structure of each view, and then maps the manifold structure to the label space for disambiguation. Furthermore, the latent label distribution is also extracted from the candidate labels by incorporating the graph-fusion-based topological structure of the feature space to obtain a predictive model \cite{xu2022multi}. 

However, the aforementioned works simply fuse the features from separate views to facilitate disambiguation, overlooking the consideration of representation biases existing across these views. Actually, the diverse representation biases from different views increase the annotation challenge in MVPML, resulting in the fact that the incorrect labels are selected into the candidate label set in the training data. As the underlying cause of the generation of incorrect labels in the candidate label set is the foundation of inferring the true labels to train a reasonable classifier on MVPML data, the representation biases across different views should be considered for solving the MVPML problem.

To alleviate this issue, a novel approach named \textsc{Vadis}, i.e., \textit{View-Aware DISambiguation for multi-view partial multi-label learning} is proposed to explore the feature representation biases via leveraging the properties of different views to identify the true labels, and induce the predictive model. Specifically, we utilize a global common feature representation shared across all views. This representation, which corresponds to the local similarity matrix in the semantic space, is employed to estimate the ground-truth labels by introducing a low-rank mapping matrix. Moreover, we recover the view-specific feature representation influenced by inconsistencies, to identify incorrect labels using the sparsity assumption. Extensive experiments on real-world datasets validate the superiority of \textsc{Vadis} over other state-of-the-art methods for solving MVPML problem.

\section{Related Works}
In this section, two learning frameworks, namely \textit{partial multi-label learning} (PML) \cite{xie2018partial} and \textit{multi-view multi-label learning} (MVML) \cite{luo2013multiview} are introduced, which are closely related to the multi-view partial multi-label learning (MVPML).

MVPML can be seen as a specialized case of the well-known PML problem when each instance is represented by a single feature vector from the same view in the input space. The PML problem aims to learn from the data where each instance is associated with a set of candidate labels, among which only a subset is considered correct. Several approaches have been proposed in this domain, involving the use of the confidence scores associated with each candidate label to determine the correct labels \cite{xie2018partial, xu2020partial}. Additionally, the low-rank assumption is employed to identify noise labels for disambiguation \cite{sun2019partial,yu2018feature}. The credible label elicitation method is used to construct the final prediction, with correct labels being detected from each candidate label set \cite{zhang2020partial}. Furthermore, noisy label identification  \cite{xie2021partial} is proposed to tackle the noise labels and ground-truth labels simultaneously.


On the other hand, MVPML can degenerate into the MVML problem \cite{luo2013multiview} when the interference of false-positive labels is absent in the label space. MVML is aimed to learn a multi-label classifier from the training data where each example is represented with multiple feature vectors and associated with multiple correct labels simultaneously. Previous works detect the informative subspaces over different views to learn a predictive model. These low-dimensional shared subspaces are designed to handle multi-label image classification, using constraints like consistency regularization \cite{luo2013multiview, zhu2015block, liu2015low}. Additionally, some papers propose the co-training framework \cite{blum1998combining,zhou2010semi} to explore the reliable labeling information communication over different views by Confidence-rated filtering \cite{xing2018multi} and diversity maximization \cite{zhan2017inductive}. The Hilbert-Schmidt Independence Criterion \cite{zhang2018latent} and matrix factorization \cite{zhu2018multi} based on the measurement of multi-view correlations are used to discover the shared subspaces. Furthermore, the view-specific information is also utilized to learn the classification model \cite{wu2019multi}. 


Both MVML and PML can be viewed as degenerated versions of MVPML, which makes the task of learning from MVPML data more challenging to solve. One previous attempt towards MVPML \cite{chen2020multi} induces a predictive model by simply fusing the similarity matrices from each view and employing label propagation to disambiguate the candidate label set. Another approach \cite{wu2020feature} deploys the aggregated manifold structure of each view to disambiguate the candidate label set by mapping the manifold structure to the label space. The latent label distribution is also learned from the candidate labels by graph-fusion-based incorporation of the topological structure within the feature space, to induce a predictive model \cite{xu2022multi}. Nonetheless, these methods neglect the impact of the feature representation biases in different views on the label space. 

In the next section, a novel MVPML approach named \textsc{Vadis} with strong generalization performance is proposed, where the representation biases in different views are considered to learn a predictive model. 

\section{The Proposed Approach}
\subsection{Problem Formulation}
Formally, let $\mathcal{X}=\mathbb{R}^{d_1} \times \mathbb{R}^{d_2} \ldots \times \mathbb{R}^{d_V}$ denote the input space consisting of $V$ views, where each view $v$ has a dimensionality of $d_v$ ($1 \leq v \leq V$). Furthermore, let $\mathcal{Y}=\left\{ y_1, \ldots, y_c\right\}$ denote the label space consisting of $c$ possible class labels. Let $\mathcal{D}=\left\{\left(\boldsymbol{x}_i, Y_i\right) \mid 1 \leq i \leq n\right\}$ denote the MVPML training set, where $\boldsymbol{x}_i=\left[\boldsymbol{x}_i^1; \boldsymbol{x}_i^2; \ldots; \boldsymbol{x}_i^V\right] \in \mathcal{X} $ is the $\left(d = \sum_{v=1}^V d_v\right)$-dimensional multi-view instance and $Y_i \subseteq \mathcal{Y}$ is the candidate label set associated with $x_i$. Here, the ground-truth label set $\widetilde{Y}_i \subseteq \mathcal{Y}$ for $x_i$ is concealed in its candidate label set (i.e. $\widetilde{Y} _i \subseteq Y_i$ ) and thus not directly accessible. Accordingly, the task of MVPML is to learn a multi-label classification model $h: \mathcal{X} \rightarrow 2^{\mathcal{Y}}$ from $\mathcal{D}$ which is capable of predicting the proper labels for unseen instances.

In this paper, let $\mathbf{X}=\left[\boldsymbol{x}_1, \boldsymbol{x}_2, \ldots, \boldsymbol{x}_n\right] \in \mathbb{R}^{d \times n}$ denote the feature matrix, where $\mathbf{X}^v=
\left[\boldsymbol{x}_1^v, \boldsymbol{x}_2^v, \ldots, \boldsymbol{x}_n^v\right] \in \mathbb{R}^{d_v \times n}$ ($1 \leq v \leq V$) is the feature matrix of the $v$-th view. Furthermore, let $\mathbf{L}=\left[\boldsymbol{l}_1, \boldsymbol{l}_2, \ldots, \boldsymbol{l}_n\right] ^\top$ denote the partial multi-label matrix, where  $\boldsymbol{l}_i=\left[l_{i}^{y_1}; l_{i}^{y_2}; \ldots; l_{i}^{y_c}\right] \in\{0,1\}^{c}$ is the observed label vector of $\boldsymbol{x}_i$, i.e., $l_{i}^{y_j}=1$ if $y_j \in Y_i$, otherwise $l_{i}^{y_j}=0$.

\subsection{The \textsc{Vadis} Framework}
In order to capture the view-aware feature representations from diverse views, we employ the self-representation approach where an instance can be represented as a linear combination of other instances. This enables us to learn the following self-representation $\mathbf{Z}^v$ of $v$-th ($1 \leq v \leq V$) view:
\begin{equation}
    \mathbf{X}^v=\mathbf{X}^v\mathbf{Z}^v +\mathbf{E}^v,
    \label{1}
\end{equation}
where $ \mathbf{Z}^v\in \mathbb{R}^{n \times n}$ is the learned self-representation matrix and $\mathbf{E}^v \in \mathbb{R}^{d_v \times n}$ is the error term. In Eq. (\ref{1}), the feature representation of each view is reconstructed to the same dimension, which facilitates subsequent learning. Additionally, the reconstruction objective effectively alleviates the potential loss of representation associated with each view.



Due to the diverse properties inherent in different views, biases arise in the representation of views, alongside the shared common representation. These biases are caused by inconsistencies from different views, prompting us to recover the view-specific-inconsistent representation, denoted as $\mathbf{O}^v$. Subsequently, the view-aware self-representation $\mathbf{Z}^v$ can be decomposed into the global common representation $\mathbf{C}$ shared by all views, and the view-specific-inconsistent representation $\mathbf{O}^v$.
\begin{equation}
    \mathbf{Z}^v = \mathbf{C} + \mathbf{O}^v.
    \label{l2}
\end{equation}
In Eq. (\ref{l2}), the common feature representation matrix $\mathbf{C}$ serves as a global representation, assumed to be related to the local similarity among instances within each view in the semantic space. The local similarity of the $v$-th view denoted as $ \mathbf{S}^v$, is defined. Notably, as the feature representations of distinct instances grow more alike, the common representation becomes richer in the specific view. Intuitively, the common feature representation should exhibit greater similarity to $S_{i, j}^v$ when the $i$-th and $j$-th instances within the $v$-th view share stronger correlations. Each element $S_{i, j}^v$ can be calculated by $S_{i, j}^v = \text{exp}({-\frac{\left\|\boldsymbol{x}_i^v-\boldsymbol{x}_j^v\right\|^2}{2}})$ if $\boldsymbol{x}_i$ is among $K$-nearest neighbors of $\boldsymbol{x}_j$, otherwise $S_{i, j}^v = 0$. Here, $K$ represents a preset parameter.


Furthermore, it is essential to note that inconsistent representations are inherently not universal, making it reasonable to assume sparsity in the view-specific-inconsistent representation matrix $\mathbf{O}^v$. Therefore, we can formulate the following objective function:
\begin{equation}
\centering
\begin{aligned}
 \min _{\mathbf{C}, \mathbf{O}^v} &\sum_{v=1}^{V}\left\|\mathbf{X}^v-\mathbf{X}^v(\mathbf{C}+\mathbf{O}^v)\right\|_F^2 +\left\|\mathbf{O}^v\right\|_1 +\left\|\mathbf{C}-\mathbf{S}^v\right\|_F^2  \\
& \textit { s.t. } ~\mathbf{X}^v=\mathbf{X}^v \mathbf{Z}^v+\mathbf{E}^v, \mathbf{Z}^v=\mathbf{C}+\mathbf{O}^v.
% & ~~~~~~~~~~~~~~~~~~~~~~~~Z^v=C+O^v. \\
\end{aligned}
\end{equation}
Here, the first part represents the self-representation loss, the second part enforces a sparse constraint on the view-specific-inconsistent representation, and the final term adds a constraint based on local similarity.

The observed label matrix $\mathbf{L}$ can be divided into two distinct parts: the ground-truth label matrix $\mathbf{\widetilde{L}}$ and the remaining incorrect label matrix $\mathbf{N}$.
\begin{equation}
       \mathbf{L} =  \mathbf{\widetilde{L}} + \mathbf{N}.
\end{equation}

To estimate the ground-truth labels from the observed labels, a matrix $\mathbf{P} \in \mathbb{R}^{n \times c}$ is constructed to map the common feature representation $\mathbf{C}$ to the label space. Furthermore, the incorrect labels are influenced by representation biases from different views, suggesting a certain connection between the view-specific-inconsistent feature representation and the incorrect labels. Therefore, we introduce a mapping matrix $\mathbf{Q} \in \mathbb{R}^{n \times c}$ to identify the incorrect labels. As a result, we obtain the following formula:
\begin{equation}
       \mathbf{\widetilde{L}} \approx  \mathbf{CP}, ~\mathbf{N} \approx \mathbf{OQ}.
\end{equation}
Here, $\mathbf{O}$ denotes the comprehensive inconsistent representation acquired through the fusion of view-specific representations from various views.

It is important to acknowledge that there are well-established label correlations among different labels in multi-label learning \cite{zhang2013review}, which leads to the assumption that $\mathbf{P}$ is linearly dependent to effectively capture such label correlations, implying $\mathbf{P}$ is low-rank. Since the rank function poses optimization challenges due to its discrete nature, we opt for the nuclear norm as a replacement \cite{sun2019partial}. Furthermore, the incorrect labels in partial multi-label learning tend to be sparse within the candidate label set. To simplify the optimization,  we impose a sparse constraint on the mapping matrix $\mathbf{Q}$. Consequently, we can formulate the following objective function:
\begin{equation}
\begin{aligned}
 \min _{\mathbf{C},\mathbf{O}^v, \mathbf{P}, \mathbf{Q}} &\|\mathbf{L}-\mathbf{CP}-\mathbf{OQ}\|_F^2 + \gamma_1\|\mathbf{P}\|_*+\gamma_2\|\mathbf{Q}\|_1 \\
\textit { s.t. } & \mathbf{X}^v=\mathbf{X}^v \mathbf{Z}^v+\mathbf{E}^v \\
& \mathbf{Z}^v=\mathbf{C}+\mathbf{O}^v, \mathbf{O}=\frac{1}{V} \sum\nolimits_{v=1}^{V} \mathbf{O}^v. 
\end{aligned}
\end{equation}
Here, the first part ensures that the learned labels match the observed labels. The last two components represent low-rank and sparse constraints applied to different mapping matrices, where $\gamma_1$ and $\gamma_2$ are the trade-off parameters.



Subsequently, the candidate label set is disambiguated by identifying the correct labels, and a linear predictive model $\mathbf{W}$ is induced. The overall optimization problem can be achieved as follows:
\begin{equation}{\label{final}}
\centering
\begin{aligned}
\min _{\substack{\mathbf{C},\mathbf{P},\mathbf{W}, \\ \mathbf{O}^v,\mathbf{Q}}}&\sum_{v=1}^{V}\left\|\mathbf{X}^v-\mathbf{X}^v (\mathbf{C}+\mathbf{O}^v)\right\|_F^2+\left\|\mathbf{C}-\mathbf{S}^v\right\|_F^2\\&~~~~~~~~~~~~+\left\|\mathbf{O}^v\right\|_1 +\|\mathbf{L}-\mathbf{CP}-\mathbf{OQ}\|_F^2  + \gamma_1\|\mathbf{P}\|_*\\ &~~~~~~~~~~~~ +\gamma_2\|\mathbf{Q}\|_1 +\|\mathbf{L-OQ-X^\top W}\|_F^2+\|\mathbf{W}\|_F^2\\
\textit { s.t. } &\mathbf{X}^v=\mathbf{X}^v \mathbf{Z}^v+\mathbf{E}^v \\
& \mathbf{Z}^v=\mathbf{C}+\mathbf{O}^v, \mathbf{O}=\frac{1}{V} \sum\nolimits_{v=1}^{V} \mathbf{O}^v. 
\end{aligned}
\end{equation}
Here, $\|\mathbf{W}\|_F^2$ is a regularization term to control the model complexity.

\begin{table*}[h]
\centering
\caption{Characteristics of the multi-view partial multi-label datasets.}
\resizebox{0.95\textwidth}{!}{%
\begin{tabular}{@{}c|c|c|c|c|c|c|c|c@{}}
\bottomrule \bottomrule  
\textit{DataSets}    & $|S|$   & $V(S)$                                      & $VDim(S)$          & $CL(S)$ & $LCard(S)$ & $\textit{Domain}$ & \textit{Description} & \textit{Controlling Parameters}                                                                    \\ \hline
\text{Emotions}    & \text{593}   & \text{2}                      & \text{8 / 64}               & \text{6}     & \text{1.869}    & \text{Music}  & \text{Rhythm, Timbre}  &  \multirow{5}{*}{\begin{tabular}[c]{@{}c@{}}$p \in~\{0.3, 0.7\}$\\ $r~\in~\{1, 2, 3\}$\end{tabular}}         \\ \cline{1-8} 
\text{Yeast}       & \text{2,417} & \text{2}  & \text{24 / 79}              & \text{14}    & \text{4.237}    & \text{Biology} & \text{Genetic Expression, Phylogenetic Profile} &                                                                                  \\\cline{1-8} 
\text{Pascal}      & 9,963 & 5    & \text{42 / 100 / 196 / 370 / 310} & 20    & 1.465    & \text{Images}  & \text{DenseHue, Gist, DenseSift, HSV, Tag} &                                                                                           \\\cline{1-8} 
\text{EspGame5k}   & 5,192 & 4         & 48 / 91 / 519 / 368      & 268   & 4.679    & \text{Images}  & \text{DenseHue, Gist, DenseSift, HSV} &                                                                                           \\\cline{1-8} 
\text{Mirflickr5k} & \text{5,000} & \text{5}    & \text{48 / 93 / 112 / 359 / 318}  & \text{38}    & \text{4.711}    & \text{Images} & \text{DenseHue, Gist, DenseSift, HSV, Tag}  &                                                                                           \\ \toprule \toprule
\end{tabular}%
}
\label{ds}
\end{table*}







\subsection{Optimization}
In this section, an iterative strategy is utilized to address the final optimization problem in Eq. (\ref{final}). When $\mathbf{O}^v, \mathbf{P}, \mathbf{Q}$ and $\mathbf{W}$ are fixed, $\mathbf{C}$ could be updated by the following ordinary least squares problem:
\begin{equation}
\begin{aligned}
 \min _{\mathbf{C}} &\sum_{v=1}^{V} \left\|\mathbf{X}^v-\mathbf{X}^v (\mathbf{C} + \mathbf{O}^v)\right\|_F^2+\left\|\mathbf{C}-\mathbf{S}^v\right\|_F^2\\
&+\|\mathbf{L-C P-O Q}\|_F^{2}.
% &\text { s.t. } ~Z^v=C+O^v \\
% & ~~~~~~~X^v=X^v Z^v+E^v \\
% & ~~~~~~~O=\frac{1}{V} \sum_{v=1}^{V} O^v,
\end{aligned}
\end{equation}

When $\mathbf{C}, \mathbf{O}^v, \mathbf{P}$ and $\mathbf{Q}$ are fixed, $\mathbf{W}$ could be updated by the following ridge regression problem:
\begin{equation}
\begin{aligned}
 \min _{\mathbf{W}} &\|\mathbf{Y-O Q-X^\top W}\|_F^2 +\|\mathbf{W}\|_F^{2}.
\end{aligned}
\end{equation}
Note that the computational complexity would be demanding. Following \cite{wang2019adaptive}, we adopt an alternating optimization strategy BFGS for large-scale data sets. 

When $\mathbf{C}, \mathbf{O}^v, \mathbf{Q}$ and $\mathbf{W}$ are fixed, $\mathbf{P}$ could be updated by the following problem:
\begin{equation}
    \min _{\mathbf{P}} \|\mathbf{L-C P-O Q}\|_F^2 + \gamma_1\|\mathbf{P}\|_{*}.
\label{9}
\end{equation}
To solve the Eq. (\ref{9}), we introduce an auxiliary variable $\mathbf{Z} \in \mathbb{R}^{n \times c}$ and reformulate it into the following equivalent form:
\begin{equation}
\begin{aligned}
    \min _{\mathbf{P}} &\|\mathbf{L-CP-OQ}\|_F^2+\gamma_1\|\mathbf{Z}\|_* \\
    \textit { s.t. } &\mathbf{Z=P},\nonumber
\end{aligned}
\label{10}
\end{equation}
which can be solved by popular ADMM (Alternating Direction Method of Multiplier) techniques \cite{boyd2011distributed}. Firstly, an augmented Lagrange function is induced as follows:
\begin{equation}
\begin{aligned}
     \mathcal{L}\left(\mathbf{P, Z, U}; \rho\right) &= \|\mathbf{L-CP-O Q}\|_F^2+\gamma_1\|\mathbf{Z}\|_* \\
     &~~ +\langle \mathbf{U, P-Z}\rangle +\frac{\rho}{2}\|\mathbf{P-Z}\|_F^2,
\end{aligned}
\label{14}
\end{equation}
where $\mathbf{U}$ is a Lagrange multiplier matrix, and $\rho$ is a penalty parameter. Then alternative optimization objectives are as follows:
\begin{equation}
\left\{\begin{aligned}
\mathbf{P}^{k+1}  &=\underset{\mathbf{P}}{\operatorname{argmin}}\|\mathbf{L-CP-O Q}\|_F^2 
+\frac{\rho}{2}\left\|\mathbf{P}-\mathbf{Z}^k+\frac{\mathbf{U}^k}{\rho}\right\|_F^2 \\
\mathbf{Z}^{k+1}  &=\underset{\mathbf{Z}}{\operatorname{argmin}} \gamma_1\|\mathbf{Z}\|_*
+\frac{\rho}{2}\left\|\mathbf{Z}-\mathbf{P}^{k+1}-\frac{\mathbf{U}^k}{\rho}\right\|_F^2 \\
\mathbf{U}^{k+1}  &=\mathbf{U}^k+\rho \left(\mathbf{P}^{k+1}-\mathbf{Z}^{k+1}\right).
\end{aligned}\right.
\end{equation}
The optimization of $\mathbf{P}^{k+1}$ is an ordinary least squares problem that is easily solved by Eq. (\ref{16}). Furthermore, inspired by \cite{cai2010singular}, the optimization of $\mathbf{Z}^{k+1}$ can be solved by Singular Value Thresholding (SVT).
\begin{equation}
\begin{aligned}
\mathbf{P}^{k+1}=&\left(2 \mathbf{C}^{\top} \mathbf{C}+\rho \mathbf{I}\right)^{-1}\left(\right. 2\mathbf{C}^{\top}(\mathbf{L-OQ})+\rho \mathbf{Z}^k-\mathbf{U}^k\left.\right)
\end{aligned}
\label{16}
\end{equation}
\begin{equation}
\mathbf{Z}^{k+1}=D_{\frac{\gamma_1}{\rho}}\left(\mathbf{P}^{k+1}+\frac{\mathbf{U}^k}{\rho}\right).
\label{17}
\end{equation}
Specifically, $\mathcal{D}_\tau(\mathbf{X})$ denotes the singular value thresholding operator given by $\mathcal{D}_\tau(\mathbf{X})=\mathbf{U} \mathcal{S}_\tau(\mathbf{\Sigma}) \mathbf{V}^*$, where $\mathbf{X}=\mathbf{U} \mathbf{\Sigma} \mathbf{V}^*$ is any singular value decomposition. $\mathcal{S}_\tau: \mathbb{R} \rightarrow \mathbb{R}$ is the shrinkage operator $\mathcal{S}_\tau[x]=\operatorname{sgn}(x) \max (|x|-\tau, 0)$ \cite{zhuang2012non}.

When $\mathbf{C}, \mathbf{O}^v, \mathbf{P}$ and $\mathbf{W}$ are fixed, $\mathbf{Q}$ could be reformulated as follows:
\begin{equation}
\centering
\begin{aligned}
 \min _{\mathbf{Q}}\|\mathbf{L-C P-OQ}\|_F^2 +\gamma_2\|\mathbf{Q}\|_1 +\|\mathbf{L-O Q-X^\top W}\|_F^2 ,
\end{aligned}
\end{equation}
Which can also be solved by ADMM. The difference is that the optimization for $\mathbf{Z}_{1}^{k+1}$ can be achieved by employing the shrinkage operator \cite{zhuang2012non}.
\begin{equation}
\mathbf{Z}_{1}^{k+1}=S_{\frac{\gamma_2}{\rho_1}}\left(\mathbf{Q}^{k+1}+\frac{\mathbf{U}_{1}^k}{\rho_{1}}\right),
\label{18}
\end{equation}
where $\mathbf{Z}_{1}$ is an auxiliary variable, $\mathbf{U}_{1}$ is a Lagrange multiplier matrix, and $\rho_1$ is a penalty parameter.

When $\mathbf{C}, \mathbf{P}, \mathbf{Q}$ and $\mathbf{W}$ are fixed, $\mathbf{O}^v$ could be updated by the following problem:
\begin{equation}
\centering
\begin{aligned}
 \min _{\mathbf{O}^v} &\sum_{v=1}^{V}\left\|\mathbf{X}^v-\mathbf{X}^v (\mathbf{C} + \mathbf{O}^v)\right\|_F^2+\left\|\mathbf{C}-\mathbf{S}^v\right\|_F^2 \\ &~~~+\left\|\mathbf{O}^v\right\|_1  +\|\mathbf{L-C P-OQ}\|_F^2 \\
 &~~~ +\|\mathbf{L-O Q-X^\top W}\|_F^2\\
% & ~~~~~~~~~~~~\text { s.t. } ~Z^v=C+O^v \\
% & ~~~~~~~~~~~~~~~~~~~~X^v=X^v Z^v+E^v \\
& \textit { s.t. } ~\mathbf{O}=\frac{1}{V} \sum\nolimits_{v=1}^{V} \mathbf{O}^v,
\label{20}
\end{aligned}
\end{equation}
which can also be solved by the ADMM techniques.

\begin{table*}[t]
\centering
\caption{Predictive performance of each comparing method on five datasets in terms of \textit{Ranking
Loss} (mean $\pm$ std). The best performance is marked in bold (the smaller the better).}
\resizebox{0.8\textwidth}{!}{%
\begin{tabular}{@{}cccccccccc@{}}
\toprule\toprule
\multirow{2}{*}{Datasets} & \multirow{2}{*}{\begin{tabular}[c]{@{}c@{}}Controlling\\ Parameters\end{tabular}}  & \multicolumn{7}{c}{Comparing Approaches} \\ \cmidrule(l){3-10} 
                             &         & \textsc{Vadis}                     &\textsc{Glade} &\textsc{F2l2if}      & \textsc{Fiman}             & \textsc{Fpml}        & \textsc{Gradis}           & \textsc{Pml-lrs}       & \textsc{Lsamml}            \\ \midrule
\multirow{6}{*}{Emotions}   & $r=1,p=0.3$ & \textbf{0.143$\pm$0.027} & 0.162$\pm$0.020 & 0.232$\pm$0.032 & 0.175$\pm$0.022 & 0.217$\pm$0.031 & 0.184$\pm$0.025 & 0.218$\pm$0.030 & 0.178$\pm$0.028 \\
 & $r=1,p=0.7$ & \textbf{0.153$\pm$0.030} &  0.167$\pm$0.025 & 0.233$\pm$0.019 & 0.186$\pm$0.025 & 0.217$\pm$0.027 & 0.231$\pm$0.032 & 0.217$\pm$0.026 & 0.213$\pm$0.047 \\
 & $r=2,p=0.3$ & \textbf{0.149$\pm$0.027} &  0.173$\pm$0.027 & 0.238$\pm$0.027 & 0.179$\pm$0.023 & 0.224$\pm$0.031 & 0.204$\pm$0.025 & 0.228$\pm$0.030 & 0.194$\pm$0.023 \\
 & $r=2,p=0.7$ & \textbf{0.172$\pm$0.033} &  0.189$\pm$0.027 & 0.254$\pm$0.028 & 0.194$\pm$0.016 & 0.237$\pm$0.021 & 0.298$\pm$0.034 & 0.250$\pm$0.035 & 0.217$\pm$0.033 \\
 & $r=3,p=0.3$ & \textbf{0.144$\pm$0.026} &  0.174$\pm$0.027 & 0.240$\pm$0.031 & 0.176$\pm$0.024 & 0.230$\pm$0.028 & 0.207$\pm$0.041 & 0.227$\pm$0.030 & 0.189$\pm$0.024 \\
 & $r=3,p=0.7$ & \textbf{0.186$\pm$0.030} & 0.189$\pm$0.033 & 0.285$\pm$0.036 &  0.188$\pm$0.026 & 0.267$\pm$0.024 & 0.333$\pm$0.043 & 0.316$\pm$0.029 & 0.229$\pm$0.030 \\ \midrule
\multirow{6}{*}{Yeast}      & $r=1,p=0.3$ & \textbf{0.164$\pm$0.008} &   0.167$\pm$0.009 & 0.362$\pm$0.015 & 0.183$\pm$0.011 & 0.212$\pm$0.011 & 0.183$\pm$0.008 & 0.214$\pm$0.011 & 0.502$\pm$0.020 \\
 & $r=1,p=0.7$ & \textbf{0.167$\pm$0.008} &  0.170$\pm$0.009 & 0.366$\pm$0.014 & 0.186$\pm$0.011 & 0.212$\pm$0.011 & 0.214$\pm$0.008 & 0.213$\pm$0.011 & 0.500$\pm$0.030 \\
 & $r=2,p=0.3$ & \textbf{0.168$\pm$0.009} &  \textbf{0.168$\pm$0.008} & 0.364$\pm$0.014 & 0.185$\pm$0.012 & 0.211$\pm$0.011 & 0.198$\pm$0.010 & 0.213$\pm$0.011 & 0.505$\pm$0.022 \\
 & $r=2,p=0.7$ & \textbf{0.168$\pm$0.011} &   0.171$\pm$0.008 & 0.370$\pm$0.014 & 0.186$\pm$0.013 & 0.212$\pm$0.011 & 0.244$\pm$0.012 & 0.214$\pm$0.011 & 0.499$\pm$0.021 \\
 & $r=3,p=0.3$ & \textbf{0.169$\pm$0.010} &   \textbf{0.169$\pm$0.008} & 0.365$\pm$0.014 & 0.185$\pm$0.011 & 0.211$\pm$0.011 & 0.208$\pm$0.013 & 0.214$\pm$0.011 & 0.501$\pm$0.025 \\
 & $r=3,p=0.7$ & \textbf{0.170$\pm$0.010} &   \textbf{0.170$\pm$0.010} & 0.367$\pm$0.014 & 0.187$\pm$0.013 & 0.213$\pm$0.011 & 0.266$\pm$0.011 & 0.213$\pm$0.011 & 0.505$\pm$0.020 \\  \midrule
\multirow{6}{*}{EspGame5k}  & $r=1,p=0.3$ & \textbf{0.182$\pm$0.005} &   0.185$\pm$0.007 & 0.234$\pm$0.007 & 0.225$\pm$0.007 & 0.289$\pm$0.011 & 0.234$\pm$0.010 & 0.254$\pm$0.007 & 0.232$\pm$0.005 \\
 & $r=1,p=0.7$ & \textbf{0.185$\pm$0.004} &   0.188$\pm$0.005 & 0.241$\pm$0.007 & 0.229$\pm$0.007 & 0.289$\pm$0.011 & 0.272$\pm$0.008 & 0.259$\pm$0.008 & 0.233$\pm$0.005 \\
 & $r=2,p=0.3$ & \textbf{0.184$\pm$0.005} &   0.188$\pm$0.006 & 0.238$\pm$0.008 & 0.229$\pm$0.007 & 0.289$\pm$0.012 & 0.253$\pm$0.006 & 0.259$\pm$0.010 & 0.235$\pm$0.007 \\
 & $r=2,p=0.7$ & \textbf{0.191$\pm$0.006} &   0.194$\pm$0.006 & 0.253$\pm$0.008 & 0.241$\pm$0.009 & 0.290$\pm$0.012 & 0.314$\pm$0.005 & 0.272$\pm$0.011 & 0.241$\pm$0.006 \\
 & $r=3,p=0.3$ & \textbf{0.188$\pm$0.004} &   0.191$\pm$0.006 & 0.243$\pm$0.006 & 0.233$\pm$0.006 & 0.289$\pm$0.010 & 0.265$\pm$0.005 & 0.265$\pm$0.009 & 0.236$\pm$0.004 \\
 & $r=3,p=0.7$ & \textbf{0.196$\pm$0.006} &   0.198$\pm$0.008 & 0.256$\pm$0.009 & 0.243$\pm$0.009 & 0.289$\pm$0.012 & 0.336$\pm$0.008 & 0.272$\pm$0.010 & 0.242$\pm$0.006 \\ \midrule
\multirow{6}{*}{Pascal}     & $r=1,p=0.3$ & \textbf{0.085$\pm$0.004} &   0.089$\pm$0.003 & 0.217$\pm$0.008 & 0.116$\pm$0.005 & 0.267$\pm$0.010 & 0.102$\pm$0.008 & 0.329$\pm$0.006 & 0.244$\pm$0.008 \\
 & $r=1,p=0.7$ & \textbf{0.089$\pm$0.004} &   0.091$\pm$0.004 & 0.237$\pm$0.007 & 0.131$\pm$0.007 & 0.268$\pm$0.013 & 0.113$\pm$0.007 & 0.330$\pm$0.010 & 0.253$\pm$0.009 \\
 & $r=2,p=0.3$ & \textbf{0.087$\pm$0.004} &   0.090$\pm$0.003 & 0.229$\pm$0.010 & 0.124$\pm$0.005 & 0.264$\pm$0.012 & 0.112$\pm$0.008 & 0.328$\pm$0.008 & 0.249$\pm$0.008 \\
 & $r=2,p=0.7$ & \textbf{0.096$\pm$0.005} &   \textbf{0.096$\pm$0.004} & 0.258$\pm$0.008 & 0.145$\pm$0.006 & 0.275$\pm$0.013 & 0.130$\pm$0.005 & 0.326$\pm$0.009 & 0.264$\pm$0.007 \\
 & $r=3,p=0.3$ & \textbf{0.089$\pm$0.004} &   0.091$\pm$0.004 & 0.242$\pm$0.009 & 0.128$\pm$0.005 & 0.268$\pm$0.012 & 0.114$\pm$0.007 & 0.329$\pm$0.008 & 0.253$\pm$0.007 \\
 & $r=3,p=0.7$ & \textbf{0.101$\pm$0.001} &   \textbf{0.101$\pm$0.005} & 0.278$\pm$0.009 & 0.159$\pm$0.007 & 0.279$\pm$0.013 & 0.142$\pm$0.010 & 0.318$\pm$0.008 & 0.270$\pm$0.009 \\\midrule
\multirow{6}{*}{Mirfickr5k} & $r=1,p=0.3$ & \textbf{0.102$\pm$0.007} &   0.113$\pm$0.009 & 0.140$\pm$0.010 & 0.138$\pm$0.007 & 0.193$\pm$0.008 & 0.148$\pm$0.007 & 0.225$\pm$0.009 & 0.189$\pm$0.010 \\
 & $r=1,p=0.7$ & \textbf{0.104$\pm$0.008} &   0.116$\pm$0.009 & 0.142$\pm$0.010 & 0.139$\pm$0.007 & 0.194$\pm$0.007 & 0.190$\pm$0.007 & 0.224$\pm$0.009 & 0.191$\pm$0.010 \\
 & $r=2,p=0.3$ & \textbf{0.104$\pm$0.007} &   0.116$\pm$0.009 & 0.142$\pm$0.009 & 0.140$\pm$0.007 & 0.193$\pm$0.008 & 0.167$\pm$0.009 & 0.224$\pm$0.009 & 0.191$\pm$0.010 \\
 & $r=2,p=0.7$ & \textbf{0.108$\pm$0.008} &   0.124$\pm$0.009 & 0.149$\pm$0.009 & 0.142$\pm$0.007 & 0.194$\pm$0.008 & 0.227$\pm$0.006 & 0.223$\pm$0.009 & 0.196$\pm$0.010 \\
 & $r=3,p=0.3$ & \textbf{0.105$\pm$0.008} &   0.118$\pm$0.009 & 0.144$\pm$0.009 & 0.142$\pm$0.007 & 0.194$\pm$0.007 & 0.176$\pm$0.008 & 0.223$\pm$0.009 & 0.192$\pm$0.009 \\
 & $r=3,p=0.7$ & \textbf{0.110$\pm$0.007} &   0.135$\pm$0.009 & 0.155$\pm$0.010 & 0.145$\pm$0.008 & 0.193$\pm$0.007 & 0.250$\pm$0.009 & 0.216$\pm$0.010 & 0.200$\pm$0.010 \\ \bottomrule\bottomrule
\end{tabular}%
}
\label{ranking loss}
\end{table*}

Due to page limitations, the complete pseudo-code of the algorithm can be found in the Appendix. In addition, we also analyse the time complexity of the \textsc{Vadis} algorithm, which can be viewed in the Supplementary Material.

\subsection{Prediction}
To predict the appropriate label set for a new instance $\boldsymbol{x}$, we employ a virtual label bipartition. Specifically, an additional virtual label $y_0$ is introduced as a threshold to categorize the labels as either relevant or irrelevant. Consequently, the label space $\mathcal{Y}$ is expanded to $\mathcal{Y}^{\prime}=\mathcal{Y} \cup\left\{y_0\right\}=\left\{y_0, y_1, \ldots, y_c\right\}$. In this paper, $l_{\boldsymbol{x}}^{y_0}$ is set to be $0.5$. Let $\mathbf{W}^*=\left[\boldsymbol{w}_0^*, \boldsymbol{w}_1^*, \ldots, \boldsymbol{w}_c^*\right] $ be the final predictive model, providing the following outputs for each class $y_j (0<j<c)$:
\begin{equation}
    \forall_{j=0}^c: f\left(y_j \mid \boldsymbol{x}\right)=\boldsymbol{x}^{\top} {\boldsymbol{w}_j^{*}}.
\end{equation}
Then, the predicted labels for $\boldsymbol{x}$ are obtained via splitting the outputs:
\begin{equation}
    \zeta(\boldsymbol{x})=\left\{y_j \mid f\left(y_j \mid \boldsymbol{x}\right)>f\left(y_0 \mid \boldsymbol{x}\right), 1 \leq j \leq c\right\}.
\end{equation}


\begin{table*}[t]
\centering
\caption{Predictive performance of each comparing method on five datasets in terms of \textit{Average Precision} (mean $\pm$ std). The best performance is marked in bold (the larger the better).}
\resizebox{0.88\textwidth}{!}{%
\begin{tabular}{@{}cccccccccc@{}}
\toprule\toprule
\multirow{2}{*}{Datasets} & \multirow{2}{*}{\begin{tabular}[c]{@{}c@{}}Controlling\\ Parameters\end{tabular}}  & \multicolumn{7}{c}{Comparing Approaches} \\ \cmidrule(l){3-10} 
                             &         & \textsc{Vadis}                     &\textsc{Glade} &\textsc{F2l2if}      & \textsc{Fiman}             & \textsc{Fpml}        & \textsc{Gradis}           & \textsc{Pml-lrs}       & \textsc{Lsamml}            \\ \midrule
\multirow{6}{*}{Emotions} & $ r=1,p=0.3$  & \textbf{0.821$\pm$0.032} & 0.800$\pm$0.031 & 0.725$\pm$0.030 & 0.792$\pm$0.028 & 0.748$\pm$0.027 &   0.806$\pm$0.027 & 0.739$\pm$0.028 & 0.785$\pm$0.034 \\
 & $ r=1,p=0.7$  & \textbf{0.810$\pm$0.041} & 0.786$\pm$0.030 & 0.725$\pm$0.022 & 0.786$\pm$0.032 & 0.748$\pm$0.025 &   0.801$\pm$0.033 & 0.742$\pm$0.019 & 0.753$\pm$0.046 \\
 & $ r=2,p=0.3$  & \textbf{0.819$\pm$0.032} & 0.784$\pm$0.032 & 0.721$\pm$0.024 & 0.792$\pm$0.028 & 0.743$\pm$0.025 &   0.817$\pm$0.033 & 0.739$\pm$0.024 & 0.771$\pm$0.029 \\
 & $ r=2,p=0.7$  &   0.802$\pm$0.032 & 0.775$\pm$0.033 & 0.720$\pm$0.027 & 0.775$\pm$0.018 & 0.737$\pm$0.019 & \textbf{0.815$\pm$0.026} & 0.729$\pm$0.025 & 0.752$\pm$0.031 \\
 & $ r=3,p=0.3$  & \textbf{0.819$\pm$0.031} & 0.784$\pm$0.028 & 0.724$\pm$0.027 & 0.792$\pm$0.024 & 0.734$\pm$0.027 &   0.807$\pm$0.038 & 0.744$\pm$0.022 & 0.780$\pm$0.028 \\
 & $ r=3,p=0.7$  & \textbf{0.779$\pm$0.035} & 0.769$\pm$0.043 & 0.684$\pm$0.033 &   0.772$\pm$0.027 & 0.703$\pm$0.027 & 0.730$\pm$0.042 & 0.654$\pm$0.030 & 0.734$\pm$0.034 \\ \midrule
\multirow{6}{*}{Yeast} & $ r=1,p=0.3$  & \textbf{0.767$\pm$0.012} & 0.763$\pm$0.011 & 0.597$\pm$0.014 & 0.756$\pm$0.013 & 0.703$\pm$0.012 &   0.764$\pm$0.011 & 0.701$\pm$0.012 & 0.437$\pm$0.025 \\
 & $ r=1,p=0.7$  & \textbf{0.762$\pm$0.008} &   0.760$\pm$0.012 & 0.592$\pm$0.012 & 0.753$\pm$0.013 & 0.703$\pm$0.013 & 0.749$\pm$0.011 & 0.702$\pm$0.013 & 0.435$\pm$0.029 \\
 & $ r=2,p=0.3$  & \textbf{0.767$\pm$0.011} &   0.763$\pm$0.012 & 0.595$\pm$0.011 & 0.754$\pm$0.013 & 0.703$\pm$0.013 & 0.763$\pm$0.013 & 0.702$\pm$0.013 & 0.440$\pm$0.019 \\
 & $ r=2,p=0.7$  & \textbf{0.761$\pm$0.015} &   \textbf{0.761$\pm$0.012} & 0.588$\pm$0.012 & 0.754$\pm$0.013 & 0.702$\pm$0.012 & 0.749$\pm$0.012 & 0.701$\pm$0.012 & 0.427$\pm$0.013 \\
 & $ r=3,p=0.3$  &   0.764$\pm$0.014 & 0.761$\pm$0.011 & 0.592$\pm$0.012 & 0.754$\pm$0.014 & 0.703$\pm$0.013 & \textbf{0.768$\pm$0.013} & 0.702$\pm$0.012 & 0.441$\pm$0.027 \\
 & $ r=3,p=0.7$  &   0.759$\pm$0.011 & \textbf{0.762$\pm$0.011} & 0.588$\pm$0.012 & 0.752$\pm$0.013 & 0.703$\pm$0.013 & 0.758$\pm$0.010 & 0.702$\pm$0.013 & 0.417$\pm$0.021 \\ \midrule
\multirow{6}{*}{EspGame5k} & $ r=1,p=0.3$  & \textbf{0.425$\pm$0.011} &   0.415$\pm$0.010 & 0.387$\pm$0.007 & 0.397$\pm$0.008 & 0.267$\pm$0.011 & 0.375$\pm$0.008 & 0.346$\pm$0.007 & 0.378$\pm$0.008 \\
 & $ r=1,p=0.7$  & \textbf{0.424$\pm$0.010} &   0.415$\pm$0.010 & 0.379$\pm$0.008 & 0.391$\pm$0.006 & 0.267$\pm$0.011 & 0.349$\pm$0.008 & 0.332$\pm$0.011 & 0.376$\pm$0.008 \\
 & $ r=2,p=0.3$  & \textbf{0.423$\pm$0.012} &   0.414$\pm$0.010 & 0.381$\pm$0.005 & 0.393$\pm$0.006 & 0.267$\pm$0.011 & 0.365$\pm$0.007 & 0.332$\pm$0.010 & 0.376$\pm$0.009 \\
 & $ r=2,p=0.7$  & \textbf{0.420$\pm$0.104} &   0.412$\pm$0.008 & 0.367$\pm$0.007 & 0.380$\pm$0.007 & 0.266$\pm$0.010 & 0.324$\pm$0.005 & 0.302$\pm$0.010 & 0.371$\pm$0.008 \\
 & $ r=3,p=0.3$  & \textbf{0.422$\pm$0.010} &   0.412$\pm$0.008 & 0.375$\pm$0.005 & 0.389$\pm$0.007 & 0.267$\pm$0.010 & 0.358$\pm$0.008 & 0.325$\pm$0.011 & 0.373$\pm$0.008 \\
 & $ r=3,p=0.7$  & \textbf{0.415$\pm$0.011} &   0.408$\pm$0.011 & 0.353$\pm$0.009 & 0.371$\pm$0.009 & 0.266$\pm$0.011 & 0.315$\pm$0.008 & 0.295$\pm$0.011 & 0.368$\pm$0.007 \\ \midrule
\multirow{6}{*}{Pascal} & $ r=1,p=0.3$  & \textbf{0.746$\pm$0.009} & 0.700$\pm$0.008 & 0.548$\pm$0.013 &   0.726$\pm$0.009 & 0.487$\pm$0.013 & 0.719$\pm$0.011 & 0.437$\pm$0.010 & 0.471$\pm$0.011 \\
 & $ r=1,p=0.7$  & \textbf{0.741$\pm$0.010} & 0.698$\pm$0.009 & 0.507$\pm$0.008 & 0.700$\pm$0.010 & 0.486$\pm$0.019 &   0.704$\pm$0.012 & 0.429$\pm$0.011 & 0.466$\pm$0.010 \\
 & $ r=2,p=0.3$  & \textbf{0.742$\pm$0.009} & 0.698$\pm$0.010 & 0.521$\pm$0.011 &   0.713$\pm$0.012 & 0.495$\pm$0.013 & 0.702$\pm$0.013 & 0.433$\pm$0.010 & 0.469$\pm$0.011 \\
 & $ r=2,p=0.7$  & \textbf{0.734$\pm$0.009} &   0.693$\pm$0.009 & 0.476$\pm$0.009 & 0.676$\pm$0.010 & 0.476$\pm$0.015 & 0.683$\pm$0.010 & 0.428$\pm$0.010 & 0.458$\pm$0.009 \\
 & $ r=3,p=0.3$  & \textbf{0.743$\pm$0.010} & 0.698$\pm$0.009 & 0.505$\pm$0.011 &   0.705$\pm$0.011 & 0.481$\pm$0.011 & 0.697$\pm$0.009 & 0.431$\pm$0.009 & 0.465$\pm$0.010 \\
 & $ r=3,p=0.7$  & \textbf{0.725$\pm$0.007} &   0.688$\pm$0.009 & 0.443$\pm$0.011 & 0.644$\pm$0.010 & 0.461$\pm$0.012 & 0.661$\pm$0.013 & 0.429$\pm$0.010 & 0.452$\pm$0.010 \\ \midrule
\multirow{6}{*}{Mirfickr5k} & $ r=1,p=0.3$  & \textbf{0.694$\pm$0.015} & 0.649$\pm$0.014 & 0.617$\pm$0.014 &   0.654$\pm$0.014 & 0.444$\pm$0.010 & 0.617$\pm$0.011 & 0.417$\pm$0.013 & 0.524$\pm$0.016 \\
 & $ r=1,p=0.7$  & \textbf{0.692$\pm$0.015} & 0.649$\pm$0.013 & 0.613$\pm$0.014 &   0.653$\pm$0.012 & 0.441$\pm$0.010 & 0.581$\pm$0.013 & 0.417$\pm$0.013 & 0.522$\pm$0.015 \\
 & $ r=2,p=0.3$  & \textbf{0.692$\pm$0.016} &   0.650$\pm$0.013 & 0.614$\pm$0.013 & 0.649$\pm$0.013 & 0.444$\pm$0.009 & 0.605$\pm$0.013 & 0.417$\pm$0.013 & 0.523$\pm$0.014 \\
 & $ r=2,p=0.7$  & \textbf{0.688$\pm$0.015} &   0.647$\pm$0.013 & 0.606$\pm$0.014 & 0.646$\pm$0.012 & 0.442$\pm$0.010 & 0.555$\pm$0.010 & 0.418$\pm$0.012 & 0.517$\pm$0.017 \\
 & $ r=3,p=0.3$  & \textbf{0.690$\pm$0.017} &   0.649$\pm$0.013 & 0.611$\pm$0.013 & 0.647$\pm$0.013 & 0.444$\pm$0.011 & 0.598$\pm$0.011 & 0.418$\pm$0.013 & 0.522$\pm$0.016 \\
 & $ r=3,p=0.7$  & \textbf{0.683$\pm$0.013} &   0.646$\pm$0.012 & 0.598$\pm$0.015 & 0.638$\pm$0.012 & 0.441$\pm$0.011 & 0.543$\pm$0.010 & 0.419$\pm$0.013 & 0.513$\pm$0.015 \\ \bottomrule\bottomrule
\end{tabular}%
}
\label{ap}
\end{table*}


\section{EXPERIMENTS}
\subsection{Experimental Setup}
\textbf{Datasets:} Following \cite{chen2020multi, wu2020feature, xu2022multi}, five popular real-world multi-view multi-label datasets from different domains for experiments are selected, i.e., \texttt{Emotions} \cite{trohidis2008multi}, \texttt{Yeast} \cite{elisseeff2001kernel}, \texttt{Pascal} \cite{everingham2010pascal}, \texttt{EspGame5k} \cite{von2004labeling} and \texttt{Mirflickr5k} \cite{huiskes2008mir}. Table \ref{ds} summarizes the characteristics of the multi-view partial multi-label datasets. More details  on the datasets can be found in Appendix.


This paper employs a widely-used approach \cite{cour2011learning, liu2012conditional, yu2016maximum} to construct MVPML examples through the introduction of candidate labels. The generation process, as shown in Table \ref{ds}, relies on two controlling parameters, $p$ and $r$. Here, $p \in (0,1)$ is responsible for controlling the proportion of partially labeled samples within the dataset, while $r \in \mathbb{N}$ is employed to regulate the quantity of false-positive labels in the candidate label set. Let $(\boldsymbol{x}, \widetilde{Y})$ represent a multi-view multi-label sample, where $\widetilde{Y}$ denotes the ground-truth label set. To construct an MVPML sample $(\boldsymbol{x}, Y)$, $r$ false-positive labels $\Delta_r \subseteq \mathcal{Y} ~\backslash ~\widetilde{Y}$ are randomly inserted into $\widetilde{Y}$, i.e., $Y = \widetilde{Y} \cup \Delta_r$. This construction process is executed using six distinct parameter control settings, with $p \in \{0.3,0.7\}$ and $r \in \{1,2,3\}$ for each real-world dataset in Table \ref{ds}.

\textbf{Comparing Algorithms:}
The performance evaluation of \textsc{Vadis} involves a comprehensive comparison with various state-of-the-art approaches. This comparative analysis considers multiple methodologies, each meticulously selected based on recommendations provided in the pertinent literature:
\begin{itemize}[leftmargin=*]
    \item \textsc{Glade} \cite{xu2022multi} where the latent label distribution is exploited from the candidate labels via the graph-fusion-based incorporation of the topological structure in the feature space to induce a predictive model [recommended configuration: $\lambda= 0.001, \gamma_1=5, \gamma_2 =20]$.
    \item \textsc{Fiman} \cite{wu2020feature} where the aggregated manifold structure is learned to disambiguate the candidate label set [recommended configuration: $t_d=$ $0.4, t_p=0.6, k=10$, and $\left.\eta=1\right]$.
    \item \textsc{Gradis} \cite{chen2020multi} where the graph-based label propagation is adopted to learn multi-view representation and disambiguate candidate label set [recommended configuration: $\eta=0.1, \alpha=0.95, k=10]$.
    \item \textsc{Pml-lrs} \cite{sun2019partial} where the sparse and low-rank decomposition strategy are employed to learn from partial multi-label examples [recommended configuration: $\beta=1, \gamma=0.1, \eta=1$].
    \item \textsc{Lsamml} \cite{zhang2018latent} where the Hilbert-Schmidt Independence Criterion is used for multi-view representation [recommended configuration: grid search for $\gamma, \beta \in$ $\left.\left\{10^{-2}, 10^{-1}, \ldots, 10^2\right\}\right]$.
    \item \textsc{Fpml} \cite{yu2018feature} where the noisy labels estimation is utilized to learn from partial multi-label samples via low-rank approximation [recommended configuration: $\lambda_1=$ $\left.1, \lambda_2=1, \lambda_3=10\right]$.
    \item \textsc{F2l2if} \cite{zhu2015block} where the block-row sparse regularization is adopted to learn a shared subspace [recommended configuration: grid search for $\lambda_1, \lambda_2 \in$ $\left.\left\{10^{-2}, 10^{-1}, \ldots, 10^2\right\}\right]$.
\end{itemize}

we employ five popular multi-label metrics \cite{zhang2013review} for performance evaluation, i.e., \textit{Ranking Loss}, \textit{Coverage}, \textit{Average Precision}, \textit{Hamming Loss} and \textit{One Error}. Specifically, for \textit{Average Precision}, the greater, the better, while for others the opposite. Ten-fold cross-validation is performed on every dataset, and the mean $\pm$ std values are cataloged for each comparative approach. Details on the experimental settings can be found in Appendix.






\begin{table}[t]
\centering
\caption{Friedman statistics $F_F$ according to each evaluation metric and the critical value at the significance level of 0.05.}
\resizebox{0.68\columnwidth}{!}{
\begin{tabular}{@{}ccc@{}}
\toprule\toprule
~~Evaluation metric & $F_F$ & Critical value~~ \\ \midrule
$\textit{Ranking Loss}$ & 54.3549 & \multirow{5}{*}{2.0549} \\
$\textit {Coverage}$ & 61.9823 &  \\
$\textit {Average Precision}$ & 95.7738 &  \\
$\textit {Hamming Loss}$ & 23.8863 &  \\
$\textit {One Error}$ & 117.9339 &  \\ \bottomrule\bottomrule
\end{tabular}
}

\label{friedman}
\end{table}

\begin{figure*}[htbp]
  \centering
  \begin{subfigure}[b]{0.3\textwidth}
    \includegraphics[width=\textwidth]{AP.pdf}
    \caption{\textit{Average Precision}}
  \end{subfigure}
  \hfill
  \begin{subfigure}[b]{0.3\textwidth}
    \centering
    \includegraphics[width=\textwidth]{CV.pdf}
    \caption{\textit{Coverage}}
  \end{subfigure}
  \hfill
  \begin{subfigure}[b]{0.3\textwidth}
    \centering
    \includegraphics[width=\textwidth]{RK.pdf}
    \caption{\textit{Ranking Loss}}
  \end{subfigure}
  \begin{subfigure}[b]{0.3\textwidth}
    \centering
    \includegraphics[width=\textwidth]{HM.pdf}
    \caption{\textit{Hamming Loss}}
  \end{subfigure}
  ~~~~~
  \begin{subfigure}[b]{0.3\textwidth}
    \centering
    \includegraphics[width=\textwidth]{OE.pdf}
    \caption{\textit{One Error}}
  \end{subfigure}
  \caption{Comparison of \textsc{Vadis} (control algorithm) with other comparative approaches using \textit{Bonferroni-Dunn test}. In the CD diagrams, approaches not connected to \textsc{Vadis} are regarded to perform substantially differently than the control algorithm (CD=1.7013 at the significance level of 0.05).}
  \label{cd}
\end{figure*}

\begin{figure}[t]
\centering
    \begin{subfigure}{0.46\linewidth}
      \centering
      \includegraphics[width=\linewidth]{g1.pdf}
      \caption{Varying $\gamma_1$}
      \label{delta}
      \end{subfigure}
    \begin{subfigure}{0.46\linewidth}
      \centering
      \includegraphics[width=\linewidth]{g2.pdf}
      \caption{Varying $\gamma_2$}
      \label{delta}
      \end{subfigure}

\caption{Performance sensitivity analysis on \texttt{Emotions} and \texttt{Yeast} (with controlling configuration: $p = 0.3, r = 1$): (a) Performance of $\textsc{Vadis}$ changes as $\gamma_1$ increases from 0.001 to 1000 ($\gamma_2$ is fixed as 15); (b) Performance of $\textsc{Vadis}$ changes as $\gamma_2$ increases from 10 to 70 ($\gamma_1$ is fixed as 5).}
 \label{sensi}
\end{figure}

\subsection{Experimental Results}
The experimental results are displayed in Tables \ref{ranking loss}, \ref{ap}. For the \textit{Ranking Loss}, \textsc{Vadis} demonstrates outstanding performance across all datasets. Regarding \textit{Average Precision}, our method achieves the best performance in 27 out of 30 settings. Specifically, for \textit{Average Precision}, it outperforms others in datasets \texttt{EspGame5k}, \texttt{Pascal} and \texttt{Mirfickr5k}. More results on other metrics can be found in Appendix.




To analyze the relative performance among the comparing approaches in a systematic way, Friedman test \cite{demvsar2006statistical} is employed for the test of performance comparison. Table \ref{friedman} reports the Friedman statistics $F_F$ and the corresponding critical value in terms of each evaluation metric. It is obvious that the null hypothesis of equal performance is rejected at the significance level of 0.05. Accordingly, posthoc \textit{Boferroni-Dunn test} \cite{demvsar2006statistical} is performed to compare the relative performance among the comparing approaches. Here, \textsc{Vadis} is treated as the control approach where the difference of average rank (over all data sets) between \textsc{Vadis} and one comparing approach is calibrated with critical difference (CD).
The critical difference (CD) diagrams \cite{demvsar2006statistical} are presented in Figure \ref{cd}, where the average rank of each approach is marked along the axis (the smaller the better). Based on the results, it is obvious to observe that: 
\begin{itemize}[leftmargin=*]
    \item As illustrated in Figure \ref{cd}, \textsc{Vadis} demonstrates superior performance compared to other comparative approaches. Furthermore, \textsc{Vadis} achieves the lowest average rank across all evaluation metrics.

    \item When comparing \textsc{Vadis} to the MVPML approach \textsc{Gradis}, \textsc{Vadis} exhibits superior performance in terms of \textit{Ranking Loss}, \textit {Hamming Loss}, \textit {Average Precision}, and \textit {Coverage}. Similarly, when compared to the MVPML approach \textsc{Fiman}, \textsc{Vadis} achieves superior results in \textit {Average Precision}, \textit {Hamming Loss}, and \textit {Coverage}. Furthermore, in comparison to the MVPML approach \textsc{Glade}, \textsc{Vadis} outperforms MVPML approach \textsc{Glade} in terms of \textit {Hamming Loss} and \textit {One Error}.

    \item When compared to the two degenerated MVML approaches, \textsc{Lsamml} and \textsc{F2l2if}, \textsc{Vadis} consistently achieves superior performance across all evaluation metrics. Moreover, \textsc{Vadis} surpasses both \textsc{Pml-lrs} and \textsc{Fpml} in terms of all evaluation metrics, which are two degenerated PML approaches.

    \item As indicated in Tables \ref{ranking loss}, \ref{ap}, the performance of \textsc{Vadis} over competing techniques remains consistent across various controlling parameter choices for $p$ and $r$.

    % \item As indicated in Tables 2 and 3, the performance of \textsc{Vadis} over competing techniques remains consistent across various controlling parameter choices for $p$ and $r$.

    % \item As indicated in Tables 2 and 3, the performance of \textsc{Vadis} over competing techniques remains consistent across various controlling parameter choices for $p$ and $r$.
\end{itemize}




\begin{table}[t]
\centering
\caption{The reliability of the inconsistent representations (with controlling configuration: $p = 0.7, r = 3$) on \texttt{Emotions} and \texttt{Yeast}. The
best performance is marked in bold ($\downarrow / \uparrow$ indicates the smaller / larger, the better).}
\resizebox{0.8\columnwidth}{!}{%
\begin{tabular}{@{}ccccc@{}}
\toprule\toprule
\multirow{2}{*}{Evaluation metric} & \multicolumn{2}{c}{Emotions} & \multicolumn{2}{c}{Yeast} \\ \cmidrule(l){2-5} 
 & \textsc{Vadis} & \textsc{Vadis-c} & \multicolumn{1}{c}{\textsc{Vadis}} & \multicolumn{1}{c}{\textsc{Vadis-c}} \\ \midrule
$\textit{Ranking Loss} \downarrow$ & \textbf{0.186} & 0.220 & \textbf{0.176} & 0.246 \\
$\textit {Coverage}\downarrow$ & \textbf{0.319} & 0.351 & \textbf{0.472} & 0.564 \\
$\textit {Average Precision} \uparrow$ & \textbf{0.779} & 0.744 & \textbf{0.759} & 0.670 \\
$\textit {Hamming Loss}\downarrow$ & \textbf{0.300} & 0.349 & \textbf{0.226} & 0.306 \\
$\textit {One Error}\downarrow$ & \textbf{0.230} & 0.248 & \textbf{0.206} & 0.240 \\ \bottomrule\bottomrule
\end{tabular}%
}
\label{ablation}
\end{table}

\subsection{Sensitivity Analysis}
In this section, we delve into an analysis of  \textsc{Vadis}'s performance sensitivity concerning its parameters. Figure \ref{sensi} serves as a visual representation of \textsc{Vadis}'s performance across different parameter settings on Emotions and Yeast datasets, specifically measured in terms of Average Precision. It's worth noting that similar patterns of performance are observed across other datasets as well.

One notable observation is the consistent stability of  \textsc{Vadis}'s performance across a broad spectrum of parameter values. This stability is a crucial feature as it enables the robust application of  \textsc{Vadis} without the necessity for fine-tuning its parameters. Consequently, this characteristic ensures the delivery of reliable classification results, enhancing the utility and effectiveness of  \textsc{Vadis} in practical applications.


\subsection{Ablation Study}
\textsc{Vadis} leverages the feature representation biases that underlie the generation of incorrect labels in the candidate label set, enabling the identification of true labels through the learning of view-specific-inconsistent representations for disambiguation. To show the reliability of the inconsistent representations, a variant of \textsc{Vadis-c} is investigated. Table \ref{ablation} reports the detailed results of \textsc{Vadis} and \textsc{Vadis-c} in terms of each evaluation metric on \texttt{Emotions} and \texttt{Yeast}. These results substantiate the efficacy of the view-specific-inconsistent representations in \textsc{Vadis}.


In addition, we have used the local semantic similarity for aligning with the global consistency representations. so we have added more ablation experiments to explain the effects. \textsc{Vadis-s} refers to the model performance after removing the use of the local similarity matrix for aligning global consistency representations. The detailed experimental results on \texttt{Emotions} and \texttt{Yeast} in terms of five metrics are reported in Table \ref{ablation-2}.

Additionally, for the learning of false-positive labels, we analyze the false-positive -label learning by comparing the estimated false-positive label $\mathbf{OQ}$ with the generated false-positive label in the \textit{emotions}. We use a vanilla variant of \textsc{Vadis} (dubbed \textsc{Vadis-o}) that eliminates the regularizer of $\mathbf{Q}$ and performs the estimation of $\mathbf{OQ}$ in Table \ref{ablation-3}.

\begin{table}[]
\centering
\caption{The effect of local semantic similarity aligning (with controlling configuration: $p = 0.7, r = 3$) on \texttt{Emotions} and \texttt{Yeast}. The
best performance is marked in bold ($\downarrow / \uparrow$ indicates the smaller / larger, the better).}
\resizebox{0.8\columnwidth}{!}{%
\begin{tabular}{@{}ccccc@{}}
\toprule\toprule
\multirow{2}{*}{Evaluation metric} & \multicolumn{2}{c}{Emotions} & \multicolumn{2}{c}{Yeast} \\ \cmidrule(l){2-5} 
 & \textsc{Vadis} & \textsc{Vadis-s} & \multicolumn{1}{c}{\textsc{Vadis}} & \multicolumn{1}{c}{\textsc{Vadis-s}} \\ \midrule
$\textit{Ranking Loss} \downarrow$ & \textbf{0.186} & 0.214 & \textbf{0.176} & 0.218 \\
$\textit {Coverage}\downarrow$ & \textbf{0.319} & 0.339 & \textbf{0.472} & 0.502 \\
$\textit {Average Precision} \uparrow$ & \textbf{0.779} & 0.757 & \textbf{0.759} & 0.734 \\
$\textit {Hamming Loss}\downarrow$ & \textbf{0.300} & 0.310 & \textbf{0.226} & 0.254 \\
$\textit {One Error}\downarrow$ & \textbf{0.230} & 0.242 & \textbf{0.206} & 0.222 \\ \bottomrule\bottomrule
\end{tabular}%
}
\label{ablation-2}
\end{table}



\begin{table}[t]
\centering
\caption{The estimation of false-positive labels on \texttt{Emotions} during iterations (with controlling configuration: $p = 0.7, r = 3$) on \texttt{Emotions}.}
\label{ablation-3}
\resizebox{0.9\columnwidth}{!}{%
\begin{tabular}{@{}ccccccccccc@{}}
\toprule\toprule
\multirow{2}{*}{\textbf{Method}} & \multirow{2}{*}{\textbf{Metrics}} & \multicolumn{9}{c}{Iteration} \\ \cmidrule(l){3-11} 
 &  & 1 & 5 & 10 & 15 & 20 & 25 & 30 & 35 & 40 \\ \midrule
\multirow{2}{*}{\textsc{Vadis}} & \textit{Hamming loss} & 0.265 & 0.263 & 0.268 & 0.266 & 0.265 & 0.265 & 0.263 & 0.261 & 0.261 \\ \cmidrule(l){2-11} 
 & \textit{Average precision} & 0.543 & 0.547 & 0.571 & 0.580 & 0.588 & 0.592 & 0.595 & 0.596 & 0.596 \\ \midrule
\multirow{2}{*}{\textsc{Vadis-o}} & \textit{Hamming loss} & 0.294 & 0.277 & 0.278 & 0.273 & 0.271 & 0.270 & 0.269 & 0.270 & 0.269 \\ \cmidrule(l){2-11} 
 & \textit{Average precision} & 0.589 & 0.573 & 0.571 & 0.570 & 0.572 & 0.570 & 0.571 & 0.570 & 0.570 \\ \bottomrule\bottomrule
\end{tabular}%
}
\end{table}


\section{Conclusion}

In this paper, we introduce a novel approach called \textsc{Vadis} to investigate feature representation biases by leveraging the characteristics of various views for identifying true labels. Specifically, we employ the global common representation, corresponding to the local similarity matrix in the semantic space, to estimate true labels using a low-rank mapping matrix. Additionally, for identifying incorrect labels, we recover the view-specific inconsistent representation based on the sparsity assumption. A substantial number of experiments have demonstrated the effectiveness of \textsc{Vadis}.


\begin{contributions} % will be removed in pdf for initial submission 
					  % (without ‘accepted’ option in \documentclass)
                      % so you can already fill it to test with the
                      % ‘accepted’ class option
    Briefly list author contributions. 
    This is a nice way of making clear who did what and to give proper credit.
    This section is optional.

    H.~Q.~Bovik conceived the idea and wrote the paper.
    Coauthor One created the code.
    Coauthor Two created the figures.
\end{contributions}

\begin{acknowledgements} % will be removed in pdf for initial submission,
						 % (without ‘accepted’ option in \documentclass)
                         % so you can already fill it to test with the
                         % ‘accepted’ class option
    Briefly acknowledge people and organizations here.

    \emph{All} acknowledgements go in this section.
\end{acknowledgements}

% References
\bibliography{uai2025-template}

\newpage

\onecolumn

\title{Supplementary Material of \\\textsc{Vadis:} Investigating Inter-View Representation Biases for Multi-View Partial Multi-Label Learning}
\maketitle


\appendix

\section{Appendix}
In this section, we will add some content from the main text section.
\subsection{Details of Datasets}\label{app1}
In this paper, details of the dataset are as follows: For the \texttt{Emotions} dataset, the two views per sample equivalent to the rhythm features and timbre features of a piece of music; For the \texttt{EspGame5k} dataset, the four views of each sample equivalent to the DenseHue, Gist, DenseSift, and HSV features of an image; In addition to the four views used by \texttt{EspGame5k}, the \texttt{Pascal}  and \texttt{Mirflickr5k} datasets adopt the tag features to represent each sample; For the \texttt{Yeast} dataset, the two views of each sample equivalent to the genetic expression and phylogeny profile of a gene.

Furthermore, the specific symbols are explained below:
\begin{itemize}[leftmargin=*]
    \item $|\mathcal{S}|$: shows the number of samples in each dataset.
    \item $V(\mathcal{S})$: shows the number and the details of views in each dataset.
    \item $V \operatorname{Dim}(\mathcal{S})$: shows the dimensionality of each view in each dataset.
    \item $C L(\mathcal{S})$: shows the number of class labels in each dataset.
    \item $LCard(\mathcal{S})$: shows the average number of ground-truth labels corresponding to each sample (i.e. label cardinality) in each dataset.
    \item $\textit{Domain}$: shows the domain associated with each view in each dataset.
    \item $\textit{Description}$: shows the feature description associated with each view in each dataset.
    
\end{itemize}

\subsection{pseudo-code of \textsc{Vadis}}
The complete pseudo-code of the algorithm is shown in Algorithm 1.
\begin{algorithm}[tb]

\label{alg:algorithm}
\caption{ The pseudo-code of \textsc{Vadis}}
% \textbf{Input}: 
% \\
% $\mathcal{D}:$  the multi-view partial multi-label training set\\
% $\gamma_1, \gamma_2: $  the trade-off parameters in objective function Eq. (7)\\
% $S^v : $  the local similarity of the $v$-th view ($1 \leq v \leq V$)\\
% $\boldsymbol{x}: $ the unseen instance \\
% \textbf{Outputs}:\\
% $Y: \quad$ the predicted label set for $\boldsymbol{x}$\\
% \textbf{Process}: 
\begin{algorithmic}[1] %[1] enables line numbers
\REQUIRE 
the multi-view partial multi-label training set $\mathcal{D}$, the trade-off parameters $\gamma_1, \gamma_2$ in objective function Eq. (7), the local similarity of the $v$-th view $S^v$ ($1 \leq v \leq V$), the unseen instance $\boldsymbol{x}$;
\BlankLine 
\STATE Calculate the self-representation matrix $\mathbf{Z}^{v}$ by Eq. (1) and  initialize $\mathbf{C}^{(0)}, \mathbf{O}^{v(0)}, \mathbf{P}^{(0)}, \mathbf{Q}^{(0)}$ and $\mathbf{W}^{(0)}$;
\STATE \textbf{repeat}
\STATE$\quad$ Update $\mathbf{C}^{(t+1)}$ by solving problem Eq. (8) with the BFGS strategy;
\STATE $\quad$ Update $\mathbf{W}^{(t+1)}$ by solving problem Eq. (9) with the ridge regression problem;
\STATE $\quad$ Update $\mathbf{P}^{(t+1)}$ by solving problem Eq. (12) with the Alternating Direction Method of Multiplier;
\STATE $\quad$ Update $\mathbf{Q}^{(t+1)}$ by solving problem Eq. (15) with the Alternating Direction Method of Multiplier;
\STATE $\quad$ Update $\mathbf{O}^{v(t+1)}$ by solving problem Eq. (17) with the Alternating Direction Method of Multiplier;
\STATE $\quad$ $t = t + 1$;

\STATE \textbf{until} convergence
\STATE The final predictive model is obtained by setting $\mathbf{W}^*=$ $\mathbf{W}^{(t)}$;
\STATE Return a proper label set $Y$ according to Eq. (19).
\ENSURE
the predicted label set $Y$ for $\boldsymbol{x}$ 
\end{algorithmic}

\end{algorithm}


\subsection{Details of Experimental Settings}\label{app2}
The code implementation is based on PyTorch, and all the experiments are conducted on NVIDIA RTX 3090. 
In the experiments, we set the value of the K-nearest neighbor parameter, K, to 10. As for the control parameters $\gamma_1$ and $\gamma_2$, we consistently fix them at 5 and 15, respectively, across all experiments. Moreover, the number of iterations for optimization in the experiments is fixed at 200 times. The stopping threshold for the optimization algorithm is set to 1e-4. The pseudo-code for \textsc{Vadis} is shown in Algorithm 1, where the initialization of $\mathbf{P}$ and $\mathbf{Q}$ employs torch.rand, while the initialization of $\mathbf{W}$ and $\mathbf{C}$ adopts torch.zeros.
Ten-fold cross-validation is conducted on each dataset, and the mean and the standard deviation (mean $\pm$ std) values are recorded for each comparative approach. 

\begin{table*}
\centering
\caption{Predictive performance of each comparing method on five datasets in terms of \textit{Hamming
Loss} (mean $\pm$ std). The best performance is marked in bold (the smaller the better).}
\resizebox{0.9\textwidth}{!}{%
\begin{tabular}{@{}cccccccccc@{}}
\toprule
\multirow{2}{*}{Datasets} & \multirow{2}{*}{\begin{tabular}[c]{@{}c@{}}Controlling\\ Parameters\end{tabular}}  & \multicolumn{7}{c}{Comparing Approaches} \\ \cmidrule(l){3-10} 
                             &         & \textsc{Vadis}                     &\textsc{Glade} &\textsc{F2l2if}      & \textsc{Fiman}             & \textsc{Fpml}        & \textsc{Gradis}           & \textsc{Pml-lrs}       & \textsc{Lsamml}            \\ \midrule
\multirow{6}{*}{Emotions} &
  $r=1,p=0.3$ &
  \textbf{0.195$\pm$0.018} &
  0.207$\pm$0.017 &
  0.422$\pm$0.024 &
  0.228$\pm$0.019 &
  0.244$\pm$0.013 &
  0.218$\pm$0.016 &
  0.256$\pm$0.018 &
  0.214$\pm$0.022 \\
 & $r=1,p=0.7$ & \textbf{0.204$\pm$0.027} & 0.208$\pm$0.018 & 0.425$\pm$0.017 & 0.235$\pm$0.016 & 0.236$\pm$0.015 & 0.272$\pm$0.021          & 0.237$\pm$0.018 & 0.264$\pm$0.035 \\
 & $r=2,p=0.3$ & \textbf{0.193$\pm$0.022} & 0.206$\pm$0.022 & 0.428$\pm$0.022 & 0.227$\pm$0.017 & 0.233$\pm$0.016 & 0.258$\pm$0.019          & 0.243$\pm$0.017 & 0.235$\pm$0.024 \\
 & $r=2,p=0.7$ & \textbf{0.214$\pm$0.018} & 0.227$\pm$0.015 & 0.440$\pm$0.024 & 0.238$\pm$0.011 & 0.404$\pm$0.033 & 0.358$\pm$0.022          & 0.400$\pm$0.024 & 0.333$\pm$0.026 \\
 & $r=3,p=0.3$ & \textbf{0.199$\pm$0.017} & 0.212$\pm$0.016 & 0.430$\pm$0.025 & 0.233$\pm$0.019 & 0.250$\pm$0.017 & 0.290$\pm$0.037          & 0.236$\pm$0.018 & 0.263$\pm$0.020 \\
 & $r=3,p=0.7$ & \textbf{0.230$\pm$0.017} & 0.236$\pm$0.019 & 0.450$\pm$0.030 & 0.248$\pm$0.015 & 0.679$\pm$0.012 & 0.353$\pm$0.025          & 0.675$\pm$0.012 & 0.536$\pm$0.026 \\ \midrule
\multirow{6}{*}{Yeast} &
  $r=1,p=0.3$ &
  \textbf{0.203$\pm$0.010} &
  0.229$\pm$0.012 &
  0.310$\pm$0.009 &
  0.211$\pm$0.008 &
  0.232$\pm$0.008 &
  0.209$\pm$0.007 &
  0.236$\pm$0.007 &
  0.303$\pm$0.005 \\
 & $r=1,p=0.7$ & \textbf{0.204$\pm$0.010} & 0.234$\pm$0.010 & 0.314$\pm$0.010 & 0.211$\pm$0.007 & 0.232$\pm$0.008 & 0.235$\pm$0.007          & 0.253$\pm$0.007 & 0.303$\pm$0.004 \\
 & $r=2,p=0.3$ & \textbf{0.204$\pm$0.010} & 0.230$\pm$0.009 & 0.315$\pm$0.009 & 0.213$\pm$0.008 & 0.232$\pm$0.008 & 0.224$\pm$0.010          & 0.250$\pm$0.007 & 0.303$\pm$0.005 \\
 & $r=2,p=0.7$ & \textbf{0.206$\pm$0.009} & 0.233$\pm$0.012 & 0.317$\pm$0.008 & 0.211$\pm$0.008 & 0.236$\pm$0.011 & 0.273$\pm$0.011          & 0.377$\pm$0.008 & 0.303$\pm$0.005 \\
 & $r=3,p=0.3$ & \textbf{0.203$\pm$0.008} & 0.228$\pm$0.011 & 0.313$\pm$0.009 & 0.209$\pm$0.007 & 0.232$\pm$0.008 & 0.244$\pm$0.011          & 0.279$\pm$0.010 & 0.303$\pm$0.004 \\
 &
  $r=3,p=0.7$ &
  \textbf{0.206$\pm$0.011} &
  0.239$\pm$0.009 &
  0.317$\pm$0.009 &
  0.211$\pm$0.008 &
  0.254$\pm$0.010 &
  0.312$\pm$0.009 &
  0.590$\pm$0.007 &
  0.303$\pm$0.004 \\ \midrule
\multirow{6}{*}{EspGame5k} &
  $r=1,p=0.3$ &
  \textbf{0.053$\pm$0.001} &
  \textbf{0.053$\pm$0.001} &
  0.081$\pm$0.001 &
  0.111$\pm$0.002 &
  \textbf{0.053$\pm$0.001} &
  0.057$\pm$0.001 &
  \textbf{0.053$\pm$0.001} &
  0.056$\pm$0.002 \\
 & $r=1,p=0.7$ & \textbf{0.053$\pm$0.001} & \textbf{0.053$\pm$0.001} & 0.082$\pm$0.001 & 0.119$\pm$0.003 & \textbf{0.053$\pm$0.001} & 0.063$\pm$0.001          & \textbf{0.053$\pm$0.001} & 0.056$\pm$0.002 \\
 & $r=2,p=0.3$ & \textbf{0.053$\pm$0.001} & \textbf{0.053$\pm$0.001} & 0.082$\pm$0.001 & 0.116$\pm$0.003 & \textbf{0.053$\pm$0.001} & 0.061$\pm$0.001          & \textbf{0.053$\pm$0.001} & 0.056$\pm$0.002 \\
 & $r=2,p=0.7$ & \textbf{0.053$\pm$0.001} & \textbf{0.053$\pm$0.001} & 0.083$\pm$0.002 & 0.130$\pm$0.004 & \textbf{0.053$\pm$0.001} & 0.075$\pm$0.001          & \textbf{0.053$\pm$0.001} & 0.057$\pm$0.002 \\
 & $r=3,p=0.3$ & \textbf{0.053$\pm$0.001} & \textbf{0.053$\pm$0.001} & 0.083$\pm$0.001 & 0.118$\pm$0.003 & \textbf{0.053$\pm$0.001} & 0.067$\pm$0.001          & \textbf{0.053$\pm$0.001} & 0.056$\pm$0.002 \\
 & $r=3,p=0.7$ & \textbf{0.053$\pm$0.001} & \textbf{0.053$\pm$0.001} & 0.085$\pm$0.001 & 0.141$\pm$0.004 & \textbf{0.053$\pm$0.001} & 0.087$\pm$0.002          & \textbf{0.053$\pm$0.001} & 0.058$\pm$0.002 \\ \midrule
\multirow{6}{*}{Pascal} &
  $r=1,p=0.3$ &
  \textbf{0.051$\pm$0.002} &
  0.121$\pm$0.004 &
  0.174$\pm$0.003 &
  0.109$\pm$0.004 &
  0.067$\pm$0.001 &
  \textbf{0.051$\pm$0.001} &
  0.083$\pm$0.007 &
  0.074$\pm$0.001 \\
 & $r=1,p=0.7$ & \textbf{0.052$\pm$0.002}          & 0.129$\pm$0.005 & 0.176$\pm$0.002 & 0.132$\pm$0.003 & 0.067$\pm$0.002 & \textbf{0.052$\pm$0.002} & 0.082$\pm$0.007 & 0.075$\pm$0.001 \\
 & $r=2,p=0.3$ & \textbf{0.052$\pm$0.002}          & 0.126$\pm$0.005 & 0.175$\pm$0.002 & 0.121$\pm$0.005 & 0.067$\pm$0.001 & \textbf{0.052$\pm$0.001} & 0.083$\pm$0.007 & 0.075$\pm$0.001 \\
 & $r=2,p=0.7$ & \textbf{0.053$\pm$0.002}          & 0.143$\pm$0.005 & 0.177$\pm$0.002 & 0.161$\pm$0.005 & 0.068$\pm$0.001 & \textbf{0.053$\pm$0.001} & 0.087$\pm$0.007 & 0.077$\pm$0.001 \\
 & $r=3,p=0.3$ & \textbf{0.053$\pm$0.002}          & 0.131$\pm$0.004 & 0.176$\pm$0.002 & 0.127$\pm$0.005 & 0.067$\pm$0.001 & \textbf{0.053$\pm$0.001} & 0.084$\pm$0.006 & 0.076$\pm$0.001 \\
 & $r=3,p=0.7$ & \textbf{0.055$\pm$0.002}          & 0.155$\pm$0.005 & 0.178$\pm$0.002 & 0.191$\pm$0.009 & 0.069$\pm$0.001 & \textbf{0.055$\pm$0.001} & 0.108$\pm$0.009 & 0.080$\pm$0.001 \\ \midrule
\multirow{6}{*}{Mirfickr5k} &
  $r=1,p=0.3$ &
  \textbf{0.107$\pm$0.003} &
  0.163$\pm$0.006 &
  0.121$\pm$0.002 &
  0.112$\pm$0.003 &
  0.124$\pm$0.004 &
  0.111$\pm$0.004 &
  0.186$\pm$0.004 &
  0.125$\pm$0.004 \\
 & $r=1,p=0.7$ & \textbf{0.108$\pm$0.004} & 0.167$\pm$0.006 & 0.122$\pm$0.003 & 0.114$\pm$0.003 & 0.124$\pm$0.004 & 0.122$\pm$0.003          & 0.214$\pm$0.003 & 0.126$\pm$0.004 \\
 & $r=2,p=0.3$ & \textbf{0.107$\pm$0.003} & 0.166$\pm$0.006 & 0.121$\pm$0.002 & 0.113$\pm$0.003 & 0.124$\pm$0.004 & 0.120$\pm$0.004          & 0.208$\pm$0.003 & 0.125$\pm$0.004 \\
 & $r=2,p=0.7$ & \textbf{0.110$\pm$0.004} & 0.170$\pm$0.006 & 0.122$\pm$0.002 & 0.117$\pm$0.003 & 0.124$\pm$0.004 & 0.142$\pm$0.004          & 0.271$\pm$0.003 & 0.127$\pm$0.004 \\
 & $r=3,p=0.3$ & \textbf{0.109$\pm$0.003} & 0.166$\pm$0.006 & 0.122$\pm$0.003 & 0.114$\pm$0.003 & 0.124$\pm$0.004 & 0.129$\pm$0.005          & 0.231$\pm$0.002 & 0.126$\pm$0.004 \\
 & $r=3,p=0.7$ & \textbf{0.112$\pm$0.004} & 0.174$\pm$0.006 & 0.124$\pm$0.002 & 0.120$\pm$0.003 & 0.124$\pm$0.004 & 0.162$\pm$0.003          & 0.353$\pm$0.006 & 0.129$\pm$0.004 \\ \bottomrule
\end{tabular}%
}
\label{hm}
\end{table*}


\subsection{More Results of MVPML Datasets}\label{app3}
Tables \ref{hm} ,\ref{coverage} and \ref{oe} below show the predictive performance of \textsc{Vadis} with other comparing methods on five datasets in terms of other evaluation metrics (i.e, \textit{Hamming Loss, Coverage and One Error}). The best performance is marked in bold (the smaller the better).


 Compared to the comparative methods in terms of \textit{Hamming Loss}, our approach has delivered comparable performance across all datasets. Similarly, in the CD Figures (b) and (d) in our paper, both \textit{Coverage} and \textit{Hamming Loss} metrics reflect the excellent performance of \textsc{Vadis}. Regarding \textit{One-error}, \textsc{Vadis} exhibits strong performance on the \texttt{EspGame5k}, \texttt{Pascal}, and \texttt{Mirflickr5k} datasets, and in CD Figure (e) in the paper, our method is positioned furthest to the right, indicating overall superior performance compared to other approaches.

\begin{table*}[t]
\centering
\caption{Predictive performance of each comparing method on five datasets in terms of \textit{Coverage} (mean $\pm$ std). The best performance is marked in bold (the smaller the better).}
\resizebox{0.9\textwidth}{!}{%
\begin{tabular}{@{}cccccccccc@{}}
\toprule
\multirow{2}{*}{Datasets} & \multirow{2}{*}{\begin{tabular}[c]{@{}c@{}}Controlling\\ Parameters\end{tabular}}  & \multicolumn{7}{c}{Comparing Approaches} \\ \cmidrule(l){3-10} 
                             &         & \textsc{Vadis}                     &\textsc{Glade} &\textsc{F2l2if}      & \textsc{Fiman}             & \textsc{Fpml}        & \textsc{Gradis}           & \textsc{Pml-lrs}       & \textsc{Lsamml}            \\ \midrule
\multirow{6}{*}{Emotions} &
  $r=1,p=0.3$ &
  \textbf{0.284$\pm$0.023} &
  0.300$\pm$0.023 &
  0.357$\pm$0.029 &
  0.314$\pm$0.026 &
  0.350$\pm$0.031 &
  0.372$\pm$0.032 &
  0.346$\pm$0.028 &
  0.311$\pm$0.024 \\
 & $r=1,p=0.7$ & \textbf{0.291$\pm$0.022} & 0.300$\pm$0.023          & 0.358$\pm$0.021 & 0.323$\pm$0.021 & 0.350$\pm$0.027 & 0.494$\pm$0.030 & 0.345$\pm$0.025 & 0.340$\pm$0.043 \\
 & $r=2,p=0.3$ & \textbf{0.288$\pm$0.023} & 0.308$\pm$0.025          & 0.364$\pm$0.027 & 0.315$\pm$0.026 & 0.356$\pm$0.032 & 0.405$\pm$0.022 & 0.356$\pm$0.028 & 0.323$\pm$0.025 \\
 & $r=2,p=0.7$ & \textbf{0.313$\pm$0.030} & 0.324$\pm$0.028          & 0.385$\pm$0.024 & 0.335$\pm$0.020 & 0.370$\pm$0.026 & 0.593$\pm$0.029 & 0.384$\pm$0.034 & 0.344$\pm$0.030 \\
 & $r=3,p=0.3$ & \textbf{0.284$\pm$0.025} & 0.307$\pm$0.027          & 0.363$\pm$0.030 & 0.313$\pm$0.026 & 0.359$\pm$0.029 & 0.432$\pm$0.047 & 0.356$\pm$0.031 & 0.323$\pm$0.025 \\
 & $r=3,p=0.7$ & \textbf{0.319$\pm$0.024} & 0.320$\pm$0.019          & 0.400$\pm$0.025 & 0.321$\pm$0.023 & 0.394$\pm$0.018 & 0.655$\pm$0.030 & 0.430$\pm$0.020 & 0.352$\pm$0.025 \\ \midrule
\multirow{6}{*}{Yeast} &
  $r=1,p=0.3$ &
  \textbf{0.451$\pm$0.014} &
  0.455$\pm$0.013 &
  0.644$\pm$0.012 &
  0.481$\pm$0.016 &
  0.485$\pm$0.012 &
  0.516$\pm$0.014 &
  0.492$\pm$0.012 &
  0.747$\pm$0.016 \\
 & $r=1,p=0.7$ & \textbf{0.460$\pm$0.015} & \textbf{0.460$\pm$0.013} & 0.648$\pm$0.013 & 0.489$\pm$0.016 & 0.486$\pm$0.013 & 0.606$\pm$0.008 & 0.492$\pm$0.012 & 0.748$\pm$0.023 \\
 & $r=2,p=0.3$ & \textbf{0.458$\pm$0.014} & 0.461$\pm$0.013          & 0.646$\pm$0.012 & 0.486$\pm$0.016 & 0.485$\pm$0.013 & 0.545$\pm$0.018 & 0.492$\pm$0.012 & 0.750$\pm$0.020 \\
 & $r=2,p=0.7$ & 0.471$\pm$0.013          & \textbf{0.460$\pm$0.012} & 0.655$\pm$0.012 & 0.489$\pm$0.020 & 0.487$\pm$0.013 & 0.671$\pm$0.013 & 0.494$\pm$0.012 & 0.748$\pm$0.018 \\
 & $r=3,p=0.3$ & \textbf{0.462$\pm$0.015} & \textbf{0.462$\pm$0.014} & 0.646$\pm$0.013 & 0.484$\pm$0.018 & 0.484$\pm$0.013 & 0.561$\pm$0.021 & 0.490$\pm$0.012 & 0.745$\pm$0.015 \\
 & $r=3,p=0.7$ & \textbf{0.472$\pm$0.014} & \textbf{0.472$\pm$0.013} & 0.649$\pm$0.013 & 0.488$\pm$0.017 & 0.486$\pm$0.013 & 0.708$\pm$0.015 & 0.487$\pm$0.012 & 0.752$\pm$0.018 \\ \midrule
\multirow{6}{*}{EspGame5k} &
  $r=1,p=0.3$ &
  \textbf{0.379$\pm$0.008} &
  0.380$\pm$0.010 &
  0.456$\pm$0.011 &
  0.446$\pm$0.011 &
  0.526$\pm$0.012 &
  0.482$\pm$0.011 &
  0.462$\pm$0.012 &
  0.444$\pm$0.007 \\
 & $r=1,p=0.7$ & \textbf{0.383$\pm$0.007} & 0.387$\pm$0.009          & 0.464$\pm$0.010 & 0.453$\pm$0.009 & 0.527$\pm$0.012 & 0.569$\pm$0.010 & 0.472$\pm$0.013 & 0.445$\pm$0.005 \\
 & $r=2,p=0.3$ & \textbf{0.381$\pm$0.008} & 0.385$\pm$0.010          & 0.461$\pm$0.011 & 0.453$\pm$0.009 & 0.527$\pm$0.013 & 0.521$\pm$0.012 & 0.469$\pm$0.015 & 0.450$\pm$0.009 \\
 & $r=2,p=0.7$ & \textbf{0.396$\pm$0.009} & 0.400$\pm$0.009          & 0.481$\pm$0.012 & 0.470$\pm$0.014 & 0.527$\pm$0.013 & 0.657$\pm$0.004 & 0.484$\pm$0.013 & 0.460$\pm$0.009 \\
 & $r=3,p=0.3$ & \textbf{0.387$\pm$0.011} & 0.391$\pm$0.011          & 0.468$\pm$0.012 & 0.456$\pm$0.011 & 0.526$\pm$0.012 & 0.544$\pm$0.008 & 0.477$\pm$0.011 & 0.449$\pm$0.004 \\
 & $r=3,p=0.7$ & \textbf{0.403$\pm$0.012} & 0.405$\pm$0.011          & 0.482$\pm$0.013 & 0.471$\pm$0.015 & 0.527$\pm$0.016 & 0.698$\pm$0.013 & 0.486$\pm$0.012 & 0.459$\pm$0.008 \\ \midrule
\multirow{6}{*}{Pascal} &
  $r=1,p=0.3$ &
  \textbf{0.129$\pm$0.005} &
  0.132$\pm$0.003 &
  0.277$\pm$0.010 &
  0.171$\pm$0.006 &
  0.343$\pm$0.011 &
  0.152$\pm$0.010 &
  0.387$\pm$0.007 &
  0.302$\pm$0.007 \\
 & $r=1,p=0.7$ & \textbf{0.134$\pm$0.005} & 0.135$\pm$0.005          & 0.297$\pm$0.009 & 0.188$\pm$0.008 & 0.341$\pm$0.014 & 0.164$\pm$0.008 & 0.389$\pm$0.010 & 0.312$\pm$0.009 \\
 & $r=2,p=0.3$ & \textbf{0.132$\pm$0.005} & 0.134$\pm$0.004          & 0.290$\pm$0.013 & 0.181$\pm$0.006 & 0.337$\pm$0.012 & 0.165$\pm$0.010 & 0.388$\pm$0.011 & 0.307$\pm$0.007 \\
 & $r=2,p=0.7$ & \textbf{0.140$\pm$0.007}          & 0.142$\pm$0.006 & 0.317$\pm$0.009 & 0.204$\pm$0.008 & 0.348$\pm$0.013 & 0.184$\pm$0.005 & 0.390$\pm$0.010 & 0.327$\pm$0.007 \\
 & $r=3,p=0.3$ & \textbf{0.135$\pm$0.005} & \textbf{0.135$\pm$0.005}          & 0.302$\pm$0.011 & 0.186$\pm$0.007 & 0.343$\pm$0.014 & 0.166$\pm$0.008 & 0.391$\pm$0.010 & 0.312$\pm$0.006 \\
 & $r=3,p=0.7$ & \textbf{0.146$\pm$0.009}          & 0.148$\pm$0.007 & 0.337$\pm$0.010 & 0.219$\pm$0.008 & 0.355$\pm$0.014 & 0.197$\pm$0.011 & 0.390$\pm$0.007 & 0.332$\pm$0.009 \\ \midrule
\multirow{6}{*}{Mirfickr5k} &
  $r=1,p=0.3$ &
  \textbf{0.304$\pm$0.016} &
  0.310$\pm$0.016 &
  0.356$\pm$0.016 &
  0.396$\pm$0.020 &
  0.419$\pm$0.015 &
  0.412$\pm$0.015 &
  0.438$\pm$0.015 &
  0.418$\pm$0.018 \\
 & $r=1,p=0.7$ & \textbf{0.310$\pm$0.016} & 0.314$\pm$0.017          & 0.363$\pm$0.014 & 0.397$\pm$0.019 & 0.421$\pm$0.014 & 0.534$\pm$0.008 & 0.436$\pm$0.015 & 0.423$\pm$0.017 \\
 & $r=2,p=0.3$ & \textbf{0.310$\pm$0.014} & 0.313$\pm$0.015          & 0.362$\pm$0.015 & 0.401$\pm$0.018 & 0.420$\pm$0.015 & 0.458$\pm$0.016 & 0.437$\pm$0.015 & 0.423$\pm$0.017 \\
 & $r=2,p=0.7$ & \textbf{0.319$\pm$0.017} & \textbf{0.319$\pm$0.016}          & 0.378$\pm$0.014 & 0.405$\pm$0.017 & 0.421$\pm$0.015 & 0.621$\pm$0.015 & 0.437$\pm$0.014 & 0.434$\pm$0.017 \\
 & $r=3,p=0.3$ & 0.322$\pm$0.016 & \textbf{0.316$\pm$0.016}         & 0.367$\pm$0.014 & 0.405$\pm$0.020 & 0.420$\pm$0.014 & 0.478$\pm$0.022 & 0.436$\pm$0.015 & 0.426$\pm$0.015 \\
 & $r=3,p=0.7$ & 0.326$\pm$0.015          & \textbf{0.323$\pm$0.015} & 0.389$\pm$0.016 & 0.408$\pm$0.021 & 0.422$\pm$0.014 & 0.663$\pm$0.018 & 0.435$\pm$0.015 & 0.441$\pm$0.016 \\ \bottomrule
\end{tabular}%
}
\label{coverage}
\end{table*}


\begin{table*}
\centering
\caption{Predictive performance of each comparing method on five datasets in terms of \textit{One Error} (mean $\pm$ std). The best performance is marked in bold (the smaller the better).}
\resizebox{0.9\textwidth}{!}{%
\begin{tabular}{@{}cccccccccc@{}}
\toprule
\multirow{2}{*}{Datasets} & \multirow{2}{*}{\begin{tabular}[c]{@{}c@{}}Controlling\\ Parameters\end{tabular}}  & \multicolumn{7}{c}{Comparing Approaches} \\ \cmidrule(l){3-10} 
                             &         & \textsc{Vadis}                     &\textsc{Glade} &\textsc{F2l2if}      & \textsc{Fiman}             & \textsc{Fpml}        & \textsc{Gradis}           & \textsc{Pml-lrs}       & \textsc{Lsamml}            \\ \midrule
\multirow{6}{*}{Emotions} &
  $r=1,p=0.3$ &
  \textbf{0.231$\pm$0.057} &
  0.273$\pm$0.056 &
  0.396$\pm$0.054 &
  0.271$\pm$0.060 &
  0.340$\pm$0.046 &
  0.238$\pm$0.048 &
  0.368$\pm$0.036 &
  0.293$\pm$0.057 \\
 & $r=1,p=0.7$ & 0.258$\pm$0.076          & 0.314$\pm$0.057 & 0.391$\pm$0.050 & 0.284$\pm$0.065          & 0.337$\pm$0.049 & \textbf{0.219$\pm$0.064} & 0.363$\pm$0.024 & 0.341$\pm$0.078 \\
 & $r=2,p=0.3$ & \textbf{0.220$\pm$0.054}          & 0.309$\pm$0.060 & 0.400$\pm$0.052 & 0.276$\pm$0.052          & 0.345$\pm$0.045 & 0.229$\pm$0.055 & 0.362$\pm$0.035 & 0.323$\pm$0.059 \\
 & $r=2,p=0.7$ & 0.253$\pm$0.062          & 0.317$\pm$0.073 & 0.388$\pm$0.069 & 0.295$\pm$0.038          & 0.359$\pm$0.059 & \textbf{0.201$\pm$0.058} & 0.358$\pm$0.044 & 0.342$\pm$0.042 \\
 & $r=3,p=0.3$ & 0.244$\pm$0.067          & 0.304$\pm$0.051 & 0.398$\pm$0.055 & 0.283$\pm$0.039          & 0.373$\pm$0.050 & \textbf{0.176$\pm$0.049} & 0.354$\pm$0.030 & 0.300$\pm$0.038 \\
 & $r=3,p=0.7$ & 0.300$\pm$0.054          & 0.331$\pm$0.090 & 0.432$\pm$0.057 & 0.320$\pm$0.048          & 0.386$\pm$0.053 & \textbf{0.189$\pm$0.056} & 0.434$\pm$0.044 & 0.378$\pm$0.057 \\ \midrule
\multirow{6}{*}{Yeast} &
  $r=1,p=0.3$ &
  \textbf{0.211$\pm$0.028} &
  0.223$\pm$0.016 &
  0.386$\pm$0.028 &
  0.216$\pm$0.025 &
  0.249$\pm$0.022 &
  0.214$\pm$0.019 &
  0.249$\pm$0.022 &
  0.685$\pm$0.056 \\
 & $r=1,p=0.7$ & 0.213$\pm$0.019          & 0.224$\pm$0.017 & 0.395$\pm$0.026 & 0.218$\pm$0.024          & 0.251$\pm$0.022 & \textbf{0.209$\pm$0.016} & 0.249$\pm$0.022 & 0.694$\pm$0.068 \\
 & $r=2,p=0.3$ & \textbf{0.205$\pm$0.031}          & 0.220$\pm$0.021 & 0.383$\pm$0.018 & 0.220$\pm$0.026          & 0.250$\pm$0.022 & 0.206$\pm$0.023 & 0.250$\pm$0.022 & 0.677$\pm$0.039 \\
 & $r=2,p=0.7$ & 0.231$\pm$0.030          & 0.224$\pm$0.018 & 0.401$\pm$0.025 & 0.223$\pm$0.025          & 0.250$\pm$0.021 & \textbf{0.201$\pm$0.024} & 0.249$\pm$0.022 & 0.705$\pm$0.033 \\
 & $r=3,p=0.3$ & 0.224$\pm$0.029          & 0.224$\pm$0.022 & 0.386$\pm$0.025 & 0.220$\pm$0.027          & 0.249$\pm$0.021 & \textbf{0.198$\pm$0.023} & 0.249$\pm$0.022 & 0.681$\pm$0.062 \\
 & $r=3,p=0.7$ & 0.226$\pm$0.018          & 0.220$\pm$0.019 & 0.401$\pm$0.031 & 0.222$\pm$0.022          & 0.249$\pm$0.022 & \textbf{0.192$\pm$0.016} & 0.249$\pm$0.022 & 0.736$\pm$0.047 \\ \midrule
\multirow{6}{*}{EspGame5k} &
  $r=1,p=0.3$ &
  \textbf{0.549$\pm$0.027} &
  0.557$\pm$0.022 &
  0.574$\pm$0.014 &
  0.560$\pm$0.023 &
  0.758$\pm$0.019 &
  0.560$\pm$0.023 &
  0.633$\pm$0.018 &
  0.586$\pm$0.014 \\
 & $r=1,p=0.7$ & \textbf{0.554$\pm$0.022} & 0.555$\pm$0.021 & 0.583$\pm$0.014 & 0.567$\pm$0.017          & 0.758$\pm$0.019 & 0.560$\pm$0.018          & 0.670$\pm$0.023 & 0.588$\pm$0.015 \\
 & $r=2,p=0.3$ & \textbf{0.550$\pm$0.023} & 0.555$\pm$0.024 & 0.582$\pm$0.010 & 0.559$\pm$0.021          & 0.758$\pm$0.019 & 0.558$\pm$0.019          & 0.663$\pm$0.016 & 0.592$\pm$0.011 \\
 & $r=2,p=0.7$ & \textbf{0.555$\pm$0.211} & 0.559$\pm$0.025 & 0.585$\pm$0.014 & 0.567$\pm$0.017          & 0.758$\pm$0.019 & 0.559$\pm$0.016          & 0.703$\pm$0.016 & 0.595$\pm$0.016 \\
 & $r=3,p=0.3$ & \textbf{0.556$\pm$0.024} & 0.560$\pm$0.024 & 0.585$\pm$0.015 & 0.567$\pm$0.018          & 0.758$\pm$0.019 & 0.559$\pm$0.019          & 0.668$\pm$0.017 & 0.590$\pm$0.013 \\
 & $r=3,p=0.7$ & 0.560$\pm$0.023          & 0.562$\pm$0.026 & 0.606$\pm$0.021 & 0.584$\pm$0.017          & 0.758$\pm$0.019 & \textbf{0.555$\pm$0.017} & 0.718$\pm$0.021 & 0.599$\pm$0.011 \\ \midrule
\multirow{6}{*}{Pascal} &
  $r=1,p=0.3$ &
  \textbf{0.302$\pm$0.014} &
  0.390$\pm$0.015 &
  0.528$\pm$0.017 &
  \textbf{0.302$\pm$0.014} &
  0.550$\pm$0.018 &
  0.325$\pm$0.015 &
  0.575$\pm$0.021 &
  0.594$\pm$0.017 \\
 & $r=1,p=0.7$ & \textbf{0.306$\pm$0.014} & 0.390$\pm$0.016 & 0.577$\pm$0.010 & 0.328$\pm$0.014          & 0.553$\pm$0.024 & 0.338$\pm$0.018          & 0.585$\pm$0.021 & 0.597$\pm$0.017 \\
 & $r=2,p=0.3$ & \textbf{0.308$\pm$0.012} & 0.389$\pm$0.016 & 0.561$\pm$0.010 & 0.314$\pm$0.018          & 0.540$\pm$0.017 & 0.342$\pm$0.015          & 0.583$\pm$0.021 & 0.594$\pm$0.018 \\
 & $r=2,p=0.7$ & \textbf{0.310$\pm$0.012} & 0.389$\pm$0.018 & 0.615$\pm$0.012 & 0.354$\pm$0.012          & 0.570$\pm$0.023 & 0.358$\pm$0.017          & 0.584$\pm$0.022 & 0.599$\pm$0.015 \\
 & $r=3,p=0.3$ & \textbf{0.305$\pm$0.016} & 0.388$\pm$0.012 & 0.576$\pm$0.011 & 0.321$\pm$0.016          & 0.559$\pm$0.013 & 0.351$\pm$0.014          & 0.582$\pm$0.021 & 0.597$\pm$0.017 \\
 & $r=3,p=0.7$ & \textbf{0.317$\pm$0.009} & 0.391$\pm$0.013 & 0.654$\pm$0.012 & 0.397$\pm$0.014          & 0.585$\pm$0.016 & 0.384$\pm$0.021          & 0.583$\pm$0.021 & 0.605$\pm$0.016 \\ \midrule
\multirow{6}{*}{Mirfickr5k} &
  $r=1,p=0.3$ &
  \textbf{0.234$\pm$0.017} &
  0.301$\pm$0.020 &
  0.301$\pm$0.019 &
  \textbf{0.234$\pm$0.026} &
  0.564$\pm$0.021 &
  0.294$\pm$0.020 &
  0.576$\pm$0.019 &
  0.429$\pm$0.020 \\
 & $r=1,p=0.7$ & \textbf{0.237$\pm$0.019}          & 0.300$\pm$0.020 & 0.305$\pm$0.022 & \textbf{0.237$\pm$0.025} & 0.567$\pm$0.019 & 0.288$\pm$0.020          & 0.576$\pm$0.019 & 0.428$\pm$0.021 \\
 & $r=2,p=0.3$ & \textbf{0.237$\pm$0.018}          & 0.299$\pm$0.019 & 0.305$\pm$0.018 & \textbf{0.237$\pm$0.021} & 0.563$\pm$0.020 & 0.287$\pm$0.020          & 0.577$\pm$0.019 & 0.424$\pm$0.017 \\
 & $r=2,p=0.7$ & \textbf{0.236$\pm$0.020}          & 0.303$\pm$0.019 & 0.311$\pm$0.020 & \textbf{0.236$\pm$0.026} & 0.562$\pm$0.023 & 0.292$\pm$0.017          & 0.576$\pm$0.019 & 0.434$\pm$0.023 \\
 & $r=3,p=0.3$ & \textbf{0.237$\pm$0.023}          & 0.300$\pm$0.016 & 0.307$\pm$0.014 & 0.238$\pm$0.026 & 0.564$\pm$0.022 & 0.289$\pm$0.021          & 0.576$\pm$0.019 & 0.433$\pm$0.023 \\
 &
  $r=3,p=0.7$ &
  \textbf{0.248$\pm$0.020} &
  0.301$\pm$0.018 &
  0.317$\pm$0.027 &
  \textbf{0.248$\pm$0.019} &
  0.565$\pm$0.022 &
  0.289$\pm$0.017 &
  0.575$\pm$0.019 &
  0.430$\pm$0.022 \\ \bottomrule
\end{tabular}%
}
\label{oe}
\end{table*}




\subsection{Time Complexity Analysis}
What's more, we discuss the time complexity of our method, which encompasses two aspects: the initiation procedure and the optimization procedure.
The cost of initializing $\mathbf{C}^{(0)}$, $\mathbf{P}^{(0)}$, $\mathbf{Q}^{(0)}$, $\mathbf{W}^{(0)}$, and $\mathbf{O}^{(0)}$ is $O\left( n^2+2nc+c \sum_{v=1}^V d_v+ Vn^2\right)$.
Then the time consumption of updating $\mathbf{C}$ is $O\left(T 1\left(n^2c + \sum_{v=1}^V d_v n^2\right)\right)$,where $T 1$ is the iteration number of BFGS in this step. The time consumption of updating $\mathbf{W}$ is $O\left(T 2\left(nc \sum_{v=1}^V d_v\right)\right)$, where $T 2$ is the iteration number of ridge regression problem in this step. 
The time consumption of updating $\mathbf{P}, \mathbf{Q}, and \mathbf{O}^{v}$ are $O\left(T 3\left(n^2c\right)\right), O\left(T 4\left(n^2c\right)\right)$, and $O\left(T 5\left(\sum_{v=1}^V d_vn^2 + n^2c\right)\right)$ respectively, where $T 3, T 4, and T 5$ are the iteration number of Alternating Direction Method of Multiplier in this step. Therefore, the total time complexity is the merging of the above components.





% \begin{algorithm}[tb]
% \caption{The pseudo-code of \textsc{Vadis}}
% \label{alg:algorithm}
% \flushleft{}
% \textbf{Input}: 
% \\
% $\mathcal{D}:$  the multi-view partial multi-label training set\\
% $\gamma_1, \gamma_2: $  the trade-off parameters in objective function Eq. (7)\\
% $\mathbf{S}^v : $  the local similarity matrix of the $v$-th view ($1 \leq v \leq V$)\\
% $\boldsymbol{x}: $ the unseen instance \\
% \textbf{Outputs}:\\
% $Y: \quad$ the predicted label set for $\boldsymbol{x}$\\
% \textbf{Process}: 
% \begin{algorithmic}[1] %[1] enables line numbers
% \STATE Calculate the self-representation matrix $\mathbf{Z}^{v}$ by Eq. (1) and  initialize $\mathbf{C}^{(0)}, \mathbf{O}^{v(0)}, \mathbf{P}^{(0)}, \mathbf{Q}^{(0)}$ and $\mathbf{W}^{(0)}$;
% \STATE \textbf{repeat}
% \STATE$\quad$ Update $\mathbf{C}^{(t+1)}$ by solving problem Eq. (8) with the BFGS strategy;
% \STATE $\quad$ Update $\mathbf{W}^{(t+1)}$ by solving problem Eq. (9) with the ridge regression problem;
% \STATE $\quad$ Update $\mathbf{P}^{(t+1)}$ by solving problem Eq. (12) with the Alternating Direction Method of Multiplier;
% \STATE $\quad$ Update $\mathbf{Q}^{(t+1)}$ by solving problem Eq. (15) with the Alternating Direction Method of Multiplier;
% \STATE $\quad$ Update $\mathbf{O}^{v(t+1)}$ by solving problem Eq. (17) with the Alternating Direction Method of Multiplier;
% \STATE $\quad$ $t = t + 1$;

% \STATE \textbf{until} convergence
% \STATE The final predictive model is obtained by setting $\mathbf{W}^*=$ $\mathbf{W}^{(t)}$;
% \STATE Return a proper label set $Y$ according to Eq. (19).


% \end{algorithmic}
% \end{algorithm}

\end{document}
