\documentclass[accepted]{uai2025} % for initial submission
%\documentclass[accepted]{uai2025} % after acceptance, for a revised version; 
% also before submission to see how the non-anonymous paper would look like 
                        
%% There is a class option to choose the math font
% \documentclass[mathfont=ptmx]{uai2025} % ptmx math instead of Computer
                                         % Modern (has noticeable issues)
% \documentclass[mathfont=newtx]{uai2025} % newtx fonts (improves upon
                                          % ptmx; less tested, no support)
% NOTE: Only keep *one* line above as appropriate, as it will be replaced
%       automatically for papers to be published. Do not make any other
%       change above this note for an accepted version.

%% Choose your variant of English; be consistent
\usepackage[american]{babel}
% \usepackage[british]{babel}

%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams
\usepackage[T1]{fontenc}
% T1 fonts will be used to generate the final print and online PDFs,
% so please use T1 fonts in your manuscript whenever possible.
% Other font encondings may result in incorrect characters.
%
\usepackage{multirow}
% Used for displaying a sample figure. If possible, figure files should
% be included in EPS format.
%
% If you use the hyperref package, please uncomment the following two lines
% to display URLs in blue roman font according to Springer's eBook style:
%\usepackage{color}
%\renewcommand\UrlFont{\color{blue}\rmfamily}
%
\usepackage{amsmath}
\usepackage{graphicx} 
\usepackage{color} % For text colors
\usepackage{amssymb,bbding,ulem}
\usepackage{graphicx}
\usepackage{subcaption}
\usepackage{tabularx}
\usepackage{adjustbox}
%% Provided macros
% \smaller: Because the class footnote size is essentially LaTeX's \small,
%           redefining \footnotesize, we provide the original \footnotesize
%           using this macro.
%           (Use only sparingly, e.g., in drawings, as it is quite small.)

%% Self-defined macros
\newcommand{\swap}[3][-]{#3#1#2} % just an example

\title{MSCGrapher: Learning Multi-Scale Dynamic Correlations for Multivariate Time Series Forecasting}

% The standard author block has changed for UAI 2025 to provide
% more space for long author lists and allow for complex affiliations
%
% All author information is authomatically removed by the class for the
% anonymous submission version of your paper, so you can already add your
% information below.
%
% Add authors
\author[1]{\href{mailto:2023050075@ybu.edu.cn}{Xian Yang}}
\author[1]{\href{mailto:zgzhang@ybu.edu.cn}{Zhenguo Zhang}\thanks{Corresponding author.}}
\author[1]{\href{mailto:2023050070@ybu.edu.cn}{Shihao Lu}}

\affil[1]{%
    Department of Computer Science and Technology\\
    Yanbian University\\
    977 Gongyuan Road, Yanji, 133002, China
}

%
  
\begin{document}
\maketitle

\begin{abstract}

Efficient learning intra-series and inter-series correlations is essential for multivariate time series forecasting (MTSF). 
However, in real-world scenarios, persistent and significant inter-series correlations are challenging to be represented in a static way and the strength of correlations varies across different time scales.
In this paper, we address this challenge by modeling the complex inter-series relationships through dynamical correlations, considering the varying strengths of correlations. 
We propose a novel MTSF model: MSCGrapher, which leverages an adaptive correlation learning block to uncover inter-series correlations across different scales.
Concretely, time series are first decomposed into different scales based on their periodicities. 
The graph representation of MTS is then constructed and an adaptive correlation learning method is introduced to capture the inter-series correlations across different scales. 
To quantify the strength of these correlations, we compute correlation scores based on the characteristics of the graph edges and classify correlations as either \textit{Strong} or \textit{Weak}. 
Finally, we employ a self-attention module to capture intra-series correlations and then fuse features from all scales to obtain the final representation. 
Extensive experiments on 12 real-world datasets show that MSCGrapher gains significant forecasting performance, highlighting the critical role of inter-series correlations in capturing implicit patterns for MTS. 



\end{abstract}

\section{Introduction}\label{sec:intro}
MTSF involves predicting the future based on multiple interrelated historical data, playing a significant role across various industries. Examples include predicting the prices of multiple assets in financial markets, multi-parameter weather in meteorology, equipment operation status in industrial manufacturing, and physiological indicators in healthcare \citep{chen2011, Wu2021, Fatima2024review, Thai2024}. Due to its substantial applications, MTSF attracts widespread research interest. In the last decade, various deep learning models, such as methods based on CNNs \citep{zeng2023financial, wang2023micn}, MLPs \citep{challu2023nhits, vijay2023tsmixer}, and Transformers \citep{zhou2021informer, Wu2021}, have been proposed to tackle the challenges of time series forecasting and have achieved outstanding performance. Although these methods have different architectures, they fundamentally utilize neural networks to capture correlations: inter-series correlations and intra-series correlations \citep{cai2024msgnet}. 


\begin{figure}[t]
\centering
\includegraphics[width=1.0\columnwidth]{case.pdf}
\caption{The relationships between different series vary at different time scales, resulting in different graph structures.}
\label{fig1}
\end{figure}


Early works primarily capture intra-series correlations but overlook inter-series correlations. This oversight significantly impacts the model's ability to capture complex dynamic relationships and prediction accuracy.
In recent years, an increasing number of studies \citep{zhang2023crossformer, yue2022ts2vec} have focused on modeling inter-series correlations to reveal and leverage the complex interactions within MTS. 
One promising approach uses graph learning \citep{wu2020connecting} to construct relationship graphs to model these correlations.
While these methods can capture inter-series dependencies, they still have significant shortcomings in fully addressing the dynamically changing correlations across different time scales.
% Current MTS analysis faces three critical limitations:
Current MTSF method faces three limitations:
(1) \textbf{Time Scale Insensitivity}: 
Most works primarily focus on single-scale correlation analysis, limiting their ability to reveal correlations across time scales and handle complex dynamic systems.
For example, in climate science, climate change and extreme weather events are influenced by a combination of factors that exhibit different correlations at different time scales. 
The long-term global warming interacts with mid-term variations such as El Niño and short-term fluctuations like sudden extreme weather events (e.g., hurricanes, heavy rainfall) \citep{heede2023towards}. 
Figure~\ref{fig1} illustrates an example where a MTS is divided into three different time scales. 
Clearly, the relationships between nodes change at these scales.
At scale $f_1$, the orange and blue series exhibit consistency. 
However, at scale $f_3$, they diverge, with the orange series affecting the green series, resulting in a different graph structure.
From the above example, we can clearly identify the limitations of existing deep learning models in dynamic modeling of relationships for MTS.
(2) \textbf{Dynamic Relationship Modeling}: 
While graph-based approaches represent MTS as nodes and their relationships as edges \citep{kipf2016semi}, current graph structure learning in GNNs often lacks the adaptability needed to model dynamic inter-series correlations, particularly when these relationships evolve across different time scales. This rigidity limits their effectiveness in capturing the temporal evolution of complex systems.
(3) \textbf{Correlation Strength Variability}: 
Another critical challenge lies in the scale-dependent variations in correlation strength, which significantly impact model performance. Existing methods often fail to account for these variations, further restricting their ability to accurately model and predict MTS behavior.
These raise the question: Can graph learning accurately capture the correlations of MTS across different time scales? If so, what adjustments are needed for GNN's architecture?







To address the above issues, we propose MSCGrapher, which can effectively enhance graph learning's ability to capture dynamic varying correlations across various time scales in MTS, and accurately characterize the strength and weakness of correlations at different scales.
First, MSCGrapher encodes the temporal variations of each series into a high-dimensional space and represents them as nodes in graph.
For the multi-periodic characteristic of time series, we use Fast Fourier Transform (FFT) to extract periodic components at different frequencies, which reveals the underlying patterns and trends. 
Next, we design an adaptive correlation graph learning block that uses an adaptive GNN to dynamically learn adjacency matrices for each time scale. It computes relationship strength scores from edge features and partitions the matrices based on these scores to identify correlations and capture the complex dynamic changes in the data.
For intra-series relationships, a multi-head attention module is employed to capture the dependencies at different time points by computing correlations between time steps.
Finally, after multiple layers of feature aggregation, we generate the final prediction results. 
Our contributions include the following key aspects: 

\begin{itemize} 
    \item \textbf{Overall Framework:} We propose the MSCGrapher framework, which effectively handles MTS and captures both multi-scale inter-series correlations and intra-series temporal correlations.
    \item \textbf{Effective Modules:} Our research shows that using an adaptive GNN can more accurately capture the complex dynamic changes hidden in MTS.
    \item \textbf{Performance:} Extensive experiments on various real-world datasets show that MSCGrapher outperforms existing models. Additionally, we perform transferability experiments with the correlation learning method, verifying its generalization capability across different datasets and models.
\end{itemize}

\section{RELATED WORKS}
\subsection{Time Series Forecasting}% Framework}

Early time series forecasting are generally based on traditional statistical or machine learning methods.
Recent advancements in deep learning architectures have shown significant advantages in time series forecasting \citep{miller2024survey}. 
CNNs have succeeded in MTSF, as seen in works like \citep{zeng2023financial,wang2023micn}.
TCNs, a type of CNN that prevents future value leakage, effectively preserve the temporal order of time series \citep{bai2018empirical}. 
MLPs encode temporal dependencies into their layers using the MLP structure \citep{vijay2023tsmixer,challu2023nhits}.
Transformers are used in MTSF due to their ability to capture long-range dependencies. Crossformer \citep{zhang2023crossformer} and Informer \citep{zhou2021informer} enhance model performance by employing cross-attention mechanisms and probabilistic sparse self-attention mechanisms to capture temporal dependencies.
However, these methods fail to consider inter-series correlations at different time scales in MTS. 
While some methods address periodicity as a key factor in time series \citep{wu2022timesnet,fan2022depts}, they still fall short in modeling complex correlations and multi-scale dependencies.





\subsection{Correlations Learning with GNNs}

Graph Neural Networks (GNNs) demonstrate their importance in various fields by effectively modeling complex interactions in graph-structured data.
Initially, GNNs were applied to tasks like traffic prediction \citep{wu2023spatiotemporal} and skeleton-based action recognition \citep{shi2019skeleton}.
In recent years, many studies start applying GNNs in MTS modeling to capture the dependencies between variables.
These methods \citep{yu2017spatio,li2017diffusion} often use fixed graph structures to model inter-series correlations. 
For example, in traffic prediction, a graph structure is constructed based on the spatial distance between sensors, with sensors as nodes and roads as edges connecting the nodes.
However, constructing a graph structure based on prior knowledge is challenging in MTS modeling.
To address this, researchers propose learnable graph structures to dynamically model relationships between series, offering new perspectives \citep{wu2020connecting}.
Recently, some approaches attempt to use dynamic or time-varying graph structures to model correlations \citep{zheng2020gman,chen2023multi,cai2024msgnet}, but they overlook a key factor: as time progresses, the inter-series correlations change dynamically across different time scales, and the strength of these correlations fluctuates. 
Failure to adequately consider the varying strength of inter-series correlations leads to insufficient accuracy in capturing these important dependencies.



\section{Preliminaries}
\subsection{Problem Definition}
Given MTS $\mathbf{X}_t = \{x_{t-L}, \ldots, x_{t-1}\} \in \mathbb{R}^{N \times L}$, where $L$ denotes the size of the historical review window and $N$ is the number of variables, the MTSF task is to predict the values of $N$ variables over the future $T$ time steps. 
The future values are denoted as $\mathbf{Y}_t = \{x_{t}, \ldots, x_{t+T-1}\} \in \mathbb{R}^{N \times T}$, where $T$ denotes the size of the future prediction window. 
Here,  
$\mathbf{X}_{t,:} \in \mathbb{R}^{N}$ denotes the time series collected at time step $t$, and $\mathbf{X}_{:,n} \in \mathbb{R}^{L}$ represents the entire times series of each variable indexed by $n$.

\subsection{Graph Representation for MTS Inter-series Correlations}

We use graphs to represent the inter-series correlations of MTS at different scales, referred to as the strong correlation graph and the weak correlation graph.
The graph is defined as $G=\mathsf{(}\mathbf{V},\mathbf{E}\mathsf{)}$, where 
$\mathbf{V}$ represents the set of nodes, $\left| \boldsymbol{\mathbf{V}} \right| = N$ and $\mathbf{E}$ is the set of edges. 
We consider the $i$-$th$ series as nodes $v_i$, and the weighted edges representing relationships between different time series are denoted by $E_i$. 
Strong correlation refers to variable pairs that exhibit consistently similar trends at the same time scale, with corresponding weights close to 1 in the learnable adjacency matrix. Weak correlation refers to dissimilar or noise-influenced trends, with weights close to 0. 
The strong correlation graph is denoted as $G_{\mathrm{strong}}$, and the the weak correlation graph is denoted as $G_{\mathrm{weak}}$. 
Different time scales identified from the MTS are represented as $f=\{ f_1, \ldots, f_k \}$, assuming there are $k$ different scales. 
The adjacency matrix corresponding to each scale is denoted by $\{\mathbf{A}^1, \ldots, \mathbf{A}^k \}$, where $\mathbf{A}^k \in \mathbb{R}^{N \times N}$. 
$\mathbf{A}^k_{\mathrm{strong}}$ and $\mathbf{A}^k_{\mathrm{weak}}$ represent the adjacency matrices of the strong correlation graph and weak correlation graph at scale $k$.

\begin{figure*}[htbp]
\centering
\includegraphics[width=\linewidth]{MSCGrapher.pdf}

\caption{The overall framework of MSCGrapher. The core is the Multi-scale Correlation Learning Block, which includes the Multi-scale Segmentation Layer and Adaptive Correlation Graph Learning Layer.}
\label{fig2}
\end{figure*}

\section{MSCGrapher}
Our MSCGrapher, with residual connections, consists of an Embedding Layer, Multi-scale Correlation Learning Block (MSCL), Multi-head Attention Layer (MAL), Multi-scale Aggregation Layer, and Projection Layer.
The Embedding Layer processes time series into suitable representations, MSCL and MAL capture inter-series correlations and intra-series dependencies. Finally, the multi-scale aggregation layer integrates features, and the projection layer outputs the final representation required for downstream tasks. The overall framework is illustrated in Figure~\ref{fig2}.



\subsection{Time Series Embedding representation}

For each series of MTS, we treat it as a node of graph. The first step is to integrate the temporal dynamics of each series into a proper embedding representation.

Local features in time series reflect short-term changes and behaviors. 
We use 1D convolution to transform the input MTS into high-dimensional embedded representations: 
$\mathbf{emb}_{\mathrm{Token}}= \mathrm{Conv}_{1d}({\mathbf{X}}_{t}, \mathbf{W})$, where $\mathbf{emb}_{\mathrm{Token}} \in \mathbb{R}^{c_{\dim} \times L}$, $c_{\dim}$ is the feature dimension and $\mathbf{W}$ is the weight matrix. 
Additionally, temporal features often contain important information that explains periodicity, trends, and other time-related patterns. 
We employ an embedding operation to enhance temporal context information: $\mathbf{emb}_{\mathrm{Temporal}} = \mathbf{E}_m + \mathbf{E}_d + \mathbf{E}_w + \mathbf{E}_h + \mathbf{E}_t$, where $\mathbf{E}_{i \in \{m, d, w, h, t\}} \in \mathbb{R}^{c_{\dim} \times L}$ represents embeddings for month, day, week, hour, and minute. 
In the forecasting scenario, position features are also crucial. Therefore, positional information is added in the series through position encoding: $\mathbf{emb}_{\mathrm{Position}} = \mathrm{PE}(L, i)$, where $i \in \{ 0, c_{\dim}-1 \}$ represents the index of the dimension in the embedding vector. In summary, Embedding layer of MSCGrapher consists of three main parts:
\begin{equation}
\mathbf{H}_{\mathrm{emb}} = \mathbf{emb}_{\mathrm{Token}} + \mathbf{emb}_{\mathrm{Temoporal}} + \mathbf{emb}_{\mathrm{Position}}
\end{equation}

\subsection{Multi-scale Inter-series Correlations Learning}

To effectively capture the correlations of MTS at different time scales, we design a Multi-scale Correlation Learning Block (named MSCL), which consists of Multi-scale Segmentation Layer and Adaptive Correlation Graph Learning Layer. 
The former divides time series into different time scales based on their periodic characteristics while the latter learns the dependencies between time series at the corresponding scales to capture correlations.
$k$ parallel blocks are used to learn correlations of $k$ time scales.

\subsubsection{Multi-scale Segmentation of MTS} 

Generally, different scales uncover various patterns. 
For example, in financial markets, short-term price fluctuations may be influenced by news events and trading behaviors, while long-term trends may be driven by economic cycles and policy.
To identify periodicities of time series as time scales, we transform the representation of MTS to frequency domain by using Fast Fourier Transform (FFT). The process is as follows:

\begin{equation}
\begin{aligned}
\mathbf{X}_f = \mathrm{FFT}(\mathbf{H}_{\mathrm{emb}}), \quad \mathbf{F} = \mathrm{Avg}(\mathrm{Amp}(\mathbf{X}_f)), \\
\mathop{\mathrm{argTopk}}_{\substack{f_* \in \{ 1, \ldots, \left[\frac{L}{2}\right] \}}}(\mathbf{F}) = \{f_1, \ldots, f_k \}, \quad p_i = \frac{L}{f_i}.
\end{aligned}
\label{eq2}
\end{equation}
where $\mathrm{FFT}(·)$ and $\mathrm{Amp}(·)$  represent FFT and amplitude calculations, $p_i$ is the period corresponding to different scales. 
We first extract the $k$ most significant frequency components $\{f_1, \ldots, f_k \}$, and compute their corresponding period $\{p_1, \ldots, p_k \}$. 
Then, we reshape the original input $\mathbf{X}_{\mathrm{input}}$ based on the extracted period $p_i$ and frequency $f_i$:

\begin{equation}
\mathbf{X}^i = \mathrm{Reshape}_{p_i, f_i}(\mathrm{Padding}(\mathbf{X}_{\mathrm{input}})),
\end{equation}
where $\mathrm{Padding}(·)$ extends the time series with zero padding along the time dimension to fit $\mathrm{Reshape}_{p_i, f_i}(·)$, $i \in \{1, \ldots, k\}$. 
Note that $\mathbf{X}^i \in \mathbb{R}^{c_{\dim} \times {p_i} \times {f_i}}$ represents the $i$-$th$ reshaped time series for time scale $i$. 

\subsubsection{Adaptive Correlation Graph Learning}

Two trainable matrices, $\mathbf{E}^l_1 \in \mathbb{R}^{c \times N}$ and  $\mathbf{E}^l_2 \in \mathbb{R}^{N \times c}$, are employed to learn the adaptive adjacency matrix at time scale $l$: 

\begin{equation}
\mathbf{A}^l = \mathrm{SoftMax} \left( \mathrm{ReLU} \left( \mathbf{E}_1^l (\mathbf{E}_2^l)^T \right) \right),
\end{equation}
i.e., we learn a new adjacency matrix at each time scale to capture differences in correlations across different scales.
After obtaining the $l$-$th$ time scale adjacency matrices $\mathbf{A}^l$, we can generate new adjacency matrices $\mathbf{A}^l_{\mathrm{strong}}$ and $\mathbf{A}^l_{\mathrm{weak}}$ based on the changes in correlation strength.
The process is illustrated in Figure~\ref{fig3}. 


\begin{figure*}[htbp]
\centering
\includegraphics[width=\linewidth]{correlation_block.pdf}
\caption{An overview of the Adaptive Correlation Graph Learning Layer. It utilizes a correlation graph learning method to obtain the strong and weak correlations between time series.}
\label{fig3}
\end{figure*}

We first construct the edge index matrix $\mathbf{E}^l_{\mathrm{index}}$ and edge attribute matrix $\mathbf{A}^l_{\mathrm{edge}}$ based on $\mathbf{A}^l$:


\begin{equation}
\begin{aligned}
\mathbf{E}_{\mathrm{index}}^l &= \mathrm{Transpose}(\mathrm{Nonzero}(\mathbf{A}^l)), \\
\mathbf{A}_{\mathrm{edge}}^l &= \mathrm{Reshape}_c(\mathbf{A}^l(\mathbf{A}^l \ne 0)), \\
\end{aligned}
\end{equation}
Then, we obtain graph representation by two parameters:

\begin{equation}
\begin{aligned}
\widehat{\mathbf{X}} &= \mathrm{Reshape}_{L, c_{\dim}}(\mathbf{X}_{\mathrm{input}}), \\
\mathbf{G}_{\mathrm{data}} &= \mathrm{Data}(\widehat{\mathbf{X}}, \mathbf{E}_{\mathrm{index}}^l, \mathbf{A}_{\mathrm{edge}}^l, \mathbf{B}^l).
\end{aligned}
\end{equation}
where $\mathbf{B}^l$ is a zero-filled batch tensor, $\mathrm{Data}()$ is used to create a graph object.

Next, we apply convolution and non-linear transformations on the node features in  $\mathbf{G}_{\mathrm{data}}$ to extract the start and end node indices, denoted as 
$\mathrm{r}$ and $\mathrm{c}$, respectively, from $\mathbf{E}^l_{\mathrm{index}}$, where $\mathrm{r}$ corresponds to the source node and 
$\mathrm{c}$ to the target node.
We then obtain representations for each edge and compute edge scores using an MLP:

\begin{equation}
\begin{aligned}
\widehat{\mathbf{G}}_{\mathrm{data}} = \mathrm{Conv}_2(\mathrm{ReLU}(\mathrm{Conv}_1(\mathbf{G}_{\mathrm{data}}))), \\
\mathrm{r} = \mathbf{E}_{\mathrm{index}}^l[0,:], \mathrm{c} = \mathbf{E}_{\mathrm{index}}^l[1,:], \\
\mathbf{E}_{\mathrm{rep}} = \mathrm{concat}(\widehat{\mathbf{X}}[\mathrm{r}], \widehat{\mathbf{X}}[\mathrm{c}]),\mathbf{S}_{\mathrm{edge}}= \mathrm{MLP}(\mathbf{E}_{\mathrm{rep}}).
\end{aligned}
\end{equation}


We concatenate the node features corresponding to the start and end node indices to obtain $\mathbf{E}_{\mathrm{rep}}$ and use $\mathrm{MLP}(·)$ to compute edge scores $\mathbf{S}_{\mathrm{edge}}$. 
After obtaining the score for each edge, a higher score indicates a stronger relationship between the two nodes. Specifically, a higher score means that the edge is more important in the graph structure, and the correlations between two nodes is stronger.
We partition the edges of the graph into strong correlation graph $\mathbf{G}_{\mathrm{strong}}$ and weak correlation graph $\mathbf{G}_{\mathrm{weak}}$ based on the edge scores and a ratio.

Then, we relabel the nodes of $\mathbf{G}_{\mathrm{strong}}$ and $\mathbf{G}_{\mathrm{weak}}$ to obtain new node features:
\begin{equation}
\begin{split}
\mathbf{G}_{\mathrm{strong}}^l, \mathbf{G}_{\mathrm{weak}}^l= \mathrm{Divide}(\mathbf{S}_{\mathrm{edge}}, \mathrm{ratio}), \\
\mathbf{A}_{\mathrm{strong}}^l, \mathbf{A}_{\mathrm{weak}}^l = \mathrm{Relabel}(\mathbf{G}_{\mathrm{strong}}^l, \mathbf{G}_{\mathrm{weak}}^l),
\end{split}
\end{equation}
where $\mathrm{Relabel}(·)$ represents the function for relabeling node features. 
After that, we assign corresponding weights to the strong and weak adjacency matrices, perform a weighted sum, and fuse the two graphs to generate a new representation $\mathbf{A}_{f}^l$.

\begin{equation}
\begin{split}
\mathbf{A}_{f}^l = W \cdot \mathbf{A}_{\mathrm{strong}}^l + (1 - W) \cdot \mathbf{A}_{\mathrm{weak}}^l,
\end{split}
\end{equation}
where $W$ is a weighting parameter used to control the fusion ratio of $\mathbf{A}_{\mathrm{strong}}^l$ and $\mathbf{A}_{\mathrm{weak}}^l$. 
We assign a higher weight to the $\mathbf{A}_{\mathrm{strong}}^l$ to direct the model’s attention toward strong correlation information, thereby capturing key node relationships more effectively while avoiding noise interference caused by weak correlation information.
Finally, we use the Mixhop graph convolution method \citep{abu2019mixhop} to capture the dependencies in the Fusion graph between time series:

\begin{equation}
\begin{split}
\mathbf{X}_{\mathrm{out}}^i = \sigma \left(  \underset{j \in P}{\big\|} (\mathbf{A}_{f}^l)^j \mathbf{X}^i \right).
\end{split}
\end{equation}
where, $\mathbf{X}_{\mathrm{out}}^i$ is the output after fusion at scale $i$. 
The hyperparameter $P$ is a set of integers representing the powers of the adjacency matrix. $(\mathbf{A}_{f}^l)^j$  denotes the $j$-$th$ power of the learned Fusion adjacency matrix $\mathbf{A}_{f}^l$, and $\|$ concatenates the intermediate results generated in each iteration along the column direction. 
Finally, we use an MLP to project $\mathbf{X}_{\mathrm{out}}^i$ into a 3D tensor $\widehat{\mathbf{X}}_{\mathrm{out}}^i \in \mathbb{R}^{c_{\dim} \times p_i \times f_i}$.

\subsection{Extraction of Intra-series Correlations} 
A multi-head attention based module is proposed to capture the intra-series correlations within time series at different time scales. 
Specifically, we project the input series $\widehat{\mathbf{X}}^i_{\mathrm{out}}$ through a linear layer into different spaces to obtain queries($\bf{Q}$), keys($\bf{K}$), and values($\bf{V}$). 
They are then projected onto multiple attention heads, where each head learns different temporal dependencies. 
Finally, we combine the outputs of different heads and extract local features through $\mathrm{Conv}_{1d}$ to generate a comprehensive representation $\widehat{\mathbf{H}}_{\mathrm{out}}^i \in \mathbb{R}^{Bf_i\times c_{\dim} \times p_i}$, where $B$ is the batch size:

\begin{equation}
\begin{split}
% \mathrm{Attention}({\bf{Q}}, {\bf{K}}, {\bf{V}}) = \mathrm{Softmax}\left(\frac{{\bf{Q}} {\bf{K}}^T}{\sqrt{d_k}}\right) {\bf{V}}, \\
{\bf{O}} = \mathrm{Linear}\left(\mathrm{Concat}\left(\mathrm{head}_1, \ldots, \mathrm{head}_H\right)\right), \\
{\bf{X}_{\mathrm{attn}}}= \mathrm{LayerNorm}\left(\widehat{\mathbf{X}}_{\mathrm{out}}^i + \mathrm{Dropout}({\bf{O}})\right), \\
{\bf{Y}} = \mathrm{Dropout}\left(\mathrm{Conv}_{1d}\left(\mathrm{Conv}_{1d}({\bf{X}}_{\text{attn}}^T)^T\right)^T\right),\\
\widehat{\mathbf{H}}_{\mathrm{out}}^i = \mathrm{LayerNorm}\left({\bf{X}}_{\mathrm{attn}} + {\bf{Y}}\right).
\end{split}
\end{equation}

\subsection{Multi-Scale Aggregator and Projection}
After handling $k$ scales, we obtain the representations $\widehat{\mathbf{H}}_{\mathrm{out}}^i$ for each scale.
To generate predictions through node regression, we need to aggregate the tensors from the $k$ different scales. 
Each tensor is first reshaped to obtain new $\widehat{\mathbf{H}}_{\mathrm{out}}^i \in \mathbb{R}^{c_{\dim} \times L}$,  and then $k$ scales based on their respective amplitudes are aggregated:
\begin{equation}
\begin{split}
\widehat{\mathbf{X}}_{\mathrm{out}} = \sum_{i=1}^k \mathrm{Softmax}(\mathbf{W}) \widehat{\mathbf{H}}_{\mathrm{out}}^i,
\end{split}
\end{equation}
$\mathbf{W}\in \mathbb{R}^{B \times k}$ is the learnable scale weight matrix composed of amplitudes from each time scale, which represents the relative importance. 
Thus, we can adaptively integrate the information from different scales based on the learned weights.
The final prediction is completed by a regression process:

\begin{equation}
\begin{split}
\mathbf{Y}_{t} = \mathbf{W}_{l} \widehat{\mathbf{X}}_{\mathrm{out}} \mathbf{W}_{t} + \mathbf{b}.
\end{split}
\end{equation}
where $\mathbf{W}_{l}\in \mathbb{R}^{N \times c_{\dim}}$ and $\mathbf{W}_{t}\in \mathbb{R}^{L \times T}$ are learnable weights. 
$\mathbf{W}_{l}$ and $\mathbf{W}_{t}$ perform linear mapping on the variable dimension and time dimension, respectively. 


\section{Experiments}
We conduct a comprehensive experiments of MSCGrapher on MTSF across multiple real-world datasets to validate its generalization ability in various scenarios. 
We also explore the potential of integrating correlation learning block into other models to assess their transferability and performance. 

\subsection{Experiment Setup}

\subsubsection{Datasets and Baselines} 
12 real-world MTS datasets are employed, including Flight, ETT(h1,h2,m1,m2)\citep{wu2022timesnet}, Weather, Electricity, Exchange-Rate\citep{lai2018modeling} and PEMS(03,04,07,08)\citep{liu2022scinet}. 
13 well-established forecasting models are selected as baselines, including (1) Transformer-based models: Informer \citep{zhou2021informer}, Autoformer \citep{Wu2021}, Pyraformer \citep{liu2021pyraformer}, FEDformer \citep{zhou2022fedformer}, and Stationary \citep{liu2022non}; (2) Linear methods: TiDE \citep{das2023long} and Dlinear \citep{zeng2023transformers}; (3) TCN-based methods: TimesNet \citep{wu2022timesnet} and MSGNet \citep{cai2024msgnet}; (4) GNN-based methods: MSHyper \citep{shang2024mshyper}, CrossGNN \citep{huang2023crossgnn}, StemGNN \citep{cao2020spectral} and FourierGNN \citep{yi2024fouriergnn}.

\subsubsection{Implementation details} 
All experiments are conducted on an NVIDIA GeForce RTX 4090 24GB GPU. 
We use the Adam optimizer with a learning rate set to $10^{-4}$ and a batch size of 32. 
The loss function is MSE. %, and we train for 10 epochs with early stopping applied when appropriate. 
We set the historical review window $L$ to 96 and the forecasting window $T$ to \{96, 192, 336, 720\} or \{12, 24, 48, 96\}. 


\subsection{Forecasting Results And Analysis}
We present the forecasting results in Table \ref{table1}, which compare the MSE and MAE across all output lengths with 9 non GNN-based baselines. 
The best results are highlighted in \textcolor{red}{red} \textbf{bold} and the second best results are \underline{underlined} in \textcolor{blue}{blue}.
Compared to other models, MSCGrapher wins 13 times across various frequencies, numbers of variables, and real-world scenarios, while the second baselines only win 7 times. 
To assess the model's generalization ability, we also calculate the average rank, where MSCGrapher gains 1.50 and consistently outperforms other models.
Compared to Transformer-based methods, MSCGrapher has a significant performance improvement, which demonstrate that inter-series relationships cannot be ignored for MTS.
Although linear methods are advantageous for long-term forecasting, MSCGrapher still achieved performance improvement on most datasets.
Compared to the latest SOTA model (MSGNet), which also leverages multi-scale information, MSCGrapher achieves superior performance across all datasets. For example, on the Flight dataset, MSCGrapher reduces the MSE by 3.4\%; on the ETTh datasets, the MSE drops by 4.8\% and 2.2\%, respectively; and on the PEMS dataset, the improvements are 1.5\% and 7.3\%. The Flight dataset contains highly volatile air traffic data with frequent short-term fluctuations and certain periodic patterns. By integrating both strong and weak correlations, MSCGrapher enhances forecasting accuracy. The ETTh dataset exhibits clear seasonal trends and periodic fluctuations—strong correlations capture the inertial behavior of power loads, while weak correlations reflect contextual factors such as ambient temperature and temporal cycles. MSCGrapher effectively distinguishes and adaptively fuses correlations of different strengths, resulting in lower forecasting errors. In the PEMS datasets, traffic flow is characterized by spatial heterogeneity and abrupt temporal changes, making inter-node relationships particularly complex. MSCGrapher demonstrates its advantage by modeling the correlations of dynamic variations.


\begin{table*}[t]
\caption{The forecasting results of our MSCGrpaher and baselines. Complete results are referred to Supplements.}
\centering
\renewcommand{\arraystretch}{1.78} % Adjust the row height here
\begin{adjustbox}{width=\textwidth}
\begin{tabular}{c|cccccccccccccccccccc}
\hline
Models      & \multicolumn{2}{c}{MSCGrpaher}           & \multicolumn{2}{c}{MSGNet(2024)} & \multicolumn{2}{c}{Dlinear(2023)} & \multicolumn{2}{c}{TimesNet(2023)} & \multicolumn{2}{c}{TiDE(2023)} & \multicolumn{2}{c}{Stationary(2022)} & \multicolumn{2}{c}{FEDformer(2022)} & \multicolumn{2}{c}{Pyraformer(2022)} & \multicolumn{2}{c}{Autoformer(2021)} & \multicolumn{2}{c}{Informer(2021)} \\ \hline
Metric      & MSE              & MAE             & MSE             & MAE            & MSE             & MAE             & MSE              & MAE             & MSE             & MAE          & MSE                  & MAE           & MSE              & MAE              & MSE               & MAE              & MSE                           & MAE                          & MSE                          & MAE                         \\ \hline
Flight      & \textbf{\textcolor{red}{0.201}}   & \textbf{\textcolor{red}{0.315}}  & \underline{\textcolor{blue}{0.208}} &  \underline{\textcolor{blue}{0.321}} & 0.233           & 0.345           & 0.265            & 0.372           & 0.239           & 0.350        & 0.560                & 0.542         & 0.418            & 0.485            & 0.448             & 0.496            & 0.238                         & 0.344                        & 0.391                        & 0.439                       \\ \hline
ETTh1       & \textbf{\textcolor{red}{0.450}}   & \textbf{\textcolor{red}{0.452}}  & 0.472           & 0.477          & \underline{\textcolor{blue}{0.456}} & \underline{\textcolor{blue}{0.452}} & 0.458            & 0.452           & 0.518           & 0.517        & 0.570                & 0.537         & 0.498            & 0.484            & 0.827             & 0.703            & 0.496                         & 0.487                        & 1.040                        & 0.820                       \\ \hline
ETTh2       & \underline{\textcolor{blue}{0.393}} & \textbf{\textcolor{red}{0.415}}  & 0.402           & 0.431          & 0.559           & 0.515           & 0.414            & 0.427           & \textbf{\textcolor{red}{0.387}}  & \underline{\textcolor{blue}{0.419}}  & 0.526                & 0.516         & 0.437            & 0.449            & 0.826             & 0.703            & 0.450                         & 0.459                        & 4.431                        & 1.729                       \\ \hline
ETTm1       & \textbf{\textcolor{red}{0.400}}   & 0.412           & 0.400           & 0.412          & 0.403           & \underline{\textcolor{blue}{0.407}} & \underline{\textcolor{blue}{0.400}} & \textbf{\textcolor{red}{0.406}}  & 0.413           & 0.415        & 0.471                & 0.456         & 0.448            & 0.452            & 0.618             & 0.607            & 0.588                         & 0.517                        & 0.961                        & 0.734                       \\ \hline
ETTm2       & \textbf{\textcolor{red}{0.287}}   & \textbf{\textcolor{red}{0.329}}  & \underline{\textcolor{blue}{0.290}} & \underline{\textcolor{blue}{0.331}} & 0.350           & 0.401           & 0.291            & 0.333           & 0.293           & 0.338        & 0.306                & 0.347         & 0.305            & 0.349            & 1.498             & 0.869            & 0.327                         & 0.371                        & 1.410                        & 0.810                       \\ \hline
weather     & \textbf{\textcolor{red}{0.255}}   & \textbf{\textcolor{red}{0.283}}  & \underline{\textcolor{blue}{0.257}} & \underline{\textcolor{blue}{0.284}} & 0.265           & 0.317           & 0.259            & 0.287           & 0.271           & 0.320        & 0.288                & 0.314         & 0.309            & 0.360            & 0.815             & 0.717            & 0.338                         & 0.382                        & 0.634                        & 0.548                       \\ \hline
electricity & 0.196            & 0.302           & 0.196           & 0.303          & 0.212           & 0.300           & \textbf{\textcolor{red}{0.193}}   & \textbf{\textcolor{red}{0.295}}  & 0.209           & \underline{\textcolor{blue}{0.295}}  & \underline{\textcolor{blue}{0.193}}  & 0.296         & 0.214            & 0.327            & 0.382             & 0.445            & 0.227                         & 0.338                        & 0.336                        & 0.397                       \\ \hline
exchange    & \underline{\textcolor{blue}{0.396}} & \underline{\textcolor{blue}{0.429}} & 0.403           & 0.432          & \textbf{\textcolor{red}{0.354}}  & \textbf{\textcolor{red}{0.414}}  & 0.416            & 0.443           & 0.400           & 0.431        & 0.461                & 0.454         & 0.519            & 0.500            & 1.377             & 1.018            & 0.613                         & 0.539                        & 1.550                        & 0.998                       \\ \hline
PEMS03      & \textbf{\textcolor{red}{0.136}}   & \textbf{\textcolor{red}{0.241}}  & \underline{\textcolor{blue}{0.138}} & \underline{\textcolor{blue}{0.243}} & 0.278           & 0.375           & 0.147            & 0.248           & 0.326           & 0.420        & 0.147                & 0.249         & 0.213            & 0.327            & 0.360             & 0.414            & 0.667                         & 0.601                        & 0.201                        & 0.300                       \\ \hline
PEMS08      & \textbf{\textcolor{red}{0.192}}   & \underline{\textcolor{blue}{0.276}} & 0.206           & 0.323          & 0.379           & 0.416           & \underline{\textcolor{blue}{0.193}} & \textbf{\textcolor{red}{0.271}}  & 0.441           & 0.464        & 0.201                & 0.276         & 0.286            & 0.358            & 0.269             & 0.292            & 0.814                         & 0.659                        & 0.313                        & 0.325                       \\ \hline
1st Count   & \multicolumn{2}{c|}{\textbf{\textcolor{red}{13}}}   & \multicolumn{2}{c|}{0}           & \multicolumn{2}{c|}{2}            & \multicolumn{2}{c|}{\underline{\textcolor{blue}{4}}}       & \multicolumn{2}{c|}{1}         & \multicolumn{2}{c|}{0}               & \multicolumn{2}{c|}{0}              & \multicolumn{2}{c|}{0}               & \multicolumn{2}{c|}{0}                                       & \multicolumn{2}{c}{0}                                      \\ \hline
Avg Rank    & \multicolumn{2}{c|}{\textbf{\textcolor{red}{1.50}}} & \multicolumn{2}{c|}{\underline{\textcolor{blue}{2.60}}}  & \multicolumn{2}{c|}{4.95}         & \multicolumn{2}{c|}{2.90}          & \multicolumn{2}{c|}{5.30}      & \multicolumn{2}{c|}{5.60}           & \multicolumn{2}{c|}{6.25}           & \multicolumn{2}{c|}{8.85}            & \multicolumn{2}{c|}{7.35}                                    & \multicolumn{2}{c}{8.75}                                   \\ \hline 

\end{tabular}
\end{adjustbox}

\label{table1}
\end{table*}


\begin{table}[t]
\caption{Forecasting results compared with GNN methods.}
\centering
\renewcommand{\arraystretch}{1.8} % 调整行高
\begin{adjustbox}{width=\columnwidth}
\begin{tabular}{c|cccccccccc}
\hline
Datasets   & \multicolumn{2}{c}{Electricity} & \multicolumn{2}{c}{Weather} & \multicolumn{2}{c}{PEMS03} & \multicolumn{2}{c}{PEMS04} & \multicolumn{2}{c}{PEMS08} \\ \hline
Metric     & MSE            & MAE            & MSE            & MAE            & MSE            & MAE            & MSE            & MAE            & MSE            & MAE            \\ \hline 
MSCGrpaher     & \textbf{\textcolor{red}{0.196}} & 0.302          & 0.255 & \textbf{\textcolor{red}{0.283}} & \textbf{\textcolor{red}{0.136}} & \textbf{\textcolor{red}{0.241}} & \textbf{\textcolor{red}{0.137}} & \textbf{\textcolor{red}{0.255}} & \textbf{\textcolor{red}{0.192}} & \textbf{\textcolor{red}{0.276}} \\ \hline 
FourierGNN & 0.228          & 0.325          & \textbf{\textcolor{red}{0.249}}          & 0.302          & 0.151          & 0.267          & 0.180          & 0.295          & 0.216          & 0.313          \\ \hline 
StemGNN    & 0.197          & \textbf{\textcolor{red}{0.300}} & 0.268          & 0.321          & 0.187          & 0.302          & 0.217          & 0.333          & 0.303          & 0.351          \\ \hline
\end{tabular}
\end{adjustbox}
\label{table2}
\end{table}

\begin{table}[h]
\caption{GNN methods results on PEMS datasets.}
\centering
\renewcommand{\arraystretch}{1.3} % 调整行高
\begin{adjustbox}{width=\columnwidth}
\begin{tabular}{cccccccccc}
\hline
\multicolumn{2}{c}{Dataset}  & \multicolumn{2}{c}{PEMS03}     & \multicolumn{2}{c}{PEMS04}      & \multicolumn{2}{c}{PEMS07}      & \multicolumn{2}{c}{PEMS08}      \\ \hline
\multicolumn{2}{c}{Metric}   & MSE   & MAE   & MSE    & MAE & MSE    & MAE   & MSE    & MAE            \\ \hline
\multirow{3}{*}{MSCGrpaher}       & 12  & \textbf{\textcolor{red}{0.078}} & \textbf{\textcolor{red}{0.184}}        & \textbf{\textcolor{red}{0.092}} & 0.207 & \textbf{\textcolor{red}{0.070}} & \textbf{\textcolor{red}{0.175}} & \textbf{\textcolor{red}{0.116}} & \textbf{\textcolor{red}{0.223}}  \\
                            & 24 & \textbf{\textcolor{red}{0.103}}         & 0.214          & \textbf{\textcolor{red}{0.109}} & 0.228 & \textbf{\textcolor{red}{0.093}} & \textbf{\textcolor{red}{0.195}} & \textbf{\textcolor{red}{0.149}} & \textbf{\textcolor{red}{0.255}}  \\
                            & 48 & \textbf{\textcolor{red}{0.151}} & \textbf{\textcolor{red}{0.257}}         & \textbf{\textcolor{red}{0.144}}          & \textbf{\textcolor{red}{0.265}} & \textbf{\textcolor{red}{0.125}} & \textbf{\textcolor{red}{0.234}} & \textbf{\textcolor{red}{0.189}} & \textbf{\textcolor{red}{0.275}}  \\ \hline
\multirow{3}{*}{MSHyper} & 12  & 0.106          & 0.207          & 0.103          & \textbf{\textcolor{red}{0.197}}         & 0.137          & 0.256          & 0.113          & 0.209            \\
                            & 24 & 0.126          & \textbf{\textcolor{red}{0.207}}         & 0.148          & \textbf{\textcolor{red}{0.148}}          & 0.245          & 0.225          & 0.230          & 0.248                \\
                            & 48 & 0.138          & 0.265          & 0.191 & 0.308          & 0.137          & 0.221          & 0.317          & 0.324                           \\ \hline
\multirow{3}{*}{CrossGNN}    & 12  & 0.094          & 0.208 & 0.158          & 0.270          & 0.085          & 0.198          & 0.148          & 0.262                \\
                            & 24 & 0.131 & 0.248 & 0.231          & 0.322          & 0.185          & 0.293          & 0.277          & 0.363                \\
                            & 48 & 0.242          & 0.343 & 0.468          & 0.475          & 0.340          & 0.414          & 0.336          & 0.407           \\ \hline
\multirow{3}{*}{FourierGNN}    & 12  & 0.087          & 0.202 & 0.112          & 0.231          & 0.073          & 0.182          & 0.143          & 0.263                \\
                            & 24 & 0.120 & 0.240 & 0.153          & 0.272          & 0.100          & 0.215          & 0.210          & 0.320                \\
                            & 48 & 0.177          & 0.294 & 0.209          & 0.321          & 0.140          & 0.258          & 0.216          & 0.311           \\ \hline
\end{tabular}
\end{adjustbox}
\label{GNN}
\end{table}




Additionally, we compare MSCGrapher with GNN-based methods using datasets consistent with the baselines.
The detailed results are shown in Table \ref{table2} and \ref{GNN}. 
As shown in Table \ref{table2}, MSCGrapher significantly outperforms GNN-based methods across five datasets from different domains, especially on three traffic-related datasets, where the average MSE decreases by 18.6\%, 30.38\%, and 23.87\%, respectively.
Table \ref{GNN} further compares MSCGrapher with state-of-the-art GNN methods on the PEMS datasets, where MSCGrapher achieves superior performance on most metrics. Due to the pronounced spatial heterogeneity and temporal abrupt changes in traffic flow within the PEMS datasets, the relationships among nodes are highly complex and dynamic. 
This fully demonstrates that the inter-series correlations in multivariate time series evolve over time, and our multi-scale correlation learning method effectively captures and handles these dynamic features.



To more clearly show the capability of MSCGrapher in modeling the inter-series correlations of MTS, we illustrate the forecasting results of a single-variate series on flight dataset with 5 baselines on Figure~\ref{fig4}. 
The significant prediction deviations are marked with circles and yellow shaded areas.
As observed, MSCGrapher fits nearly all key change regions well, whereas other baselines struggle in scenarios involving drastic changes. 
Specifically, MSCGrapher demonstrates better visualization of prediction results than MSGNet in the low-value range of 20–100. Moreover, within the peak range of 20–40, MSCGrapher more accurately captures the dynamic trends of the true value curve. 
MSCGrapher also outperforms DLinear in predicting certain extreme points, producing results closer to the true values. In contrast, DLinear tends to exhibit lag or smoothing effects when handling sharp fluctuations, making it difficult to precisely capture sudden changes. 
Furthermore, MSCGrapher surpasses TiDE in predicting extreme points and fitting trends within the 30–40 and 50–60 intervals, demonstrating higher fitting accuracy and improved dynamic consistency. 
Pyraformer performs poorly during peak periods and in regions with significant fluctuations, failing to accurately track pronounced changes in the data. 
Autoformer underperforms in low-value regions, struggling to capture subtle variations and resulting in significant deviations between predicted and true values.



\begin{figure}[h]
    \centering
    \begin{subfigure}{0.32\columnwidth}  % 使用 \columnwidth 确保适应双栏布局
        \centering
        \includegraphics[width=\linewidth]{MSCGrapher_fore.pdf} % 子图1
        \caption{} % 可以添加子图标题
    \end{subfigure}\hfill
    \begin{subfigure}{0.32\columnwidth}  % 使用 \columnwidth 确保适应双栏布局
        \centering
        \includegraphics[width=\linewidth]{MSGNet.pdf} % 子图2
        \caption{} % 可以添加子图标题
    \end{subfigure}\hfill
    \begin{subfigure}{0.32\columnwidth}  % 使用 \columnwidth 确保适应双栏布局
        \centering
        \includegraphics[width=\linewidth]{DLinear.pdf} % 子图3
        \caption{} % 可以添加子图标题
    \end{subfigure}
    
    \begin{subfigure}{0.32\columnwidth}  % 使用 \columnwidth 确保适应双栏布局
        \centering
        \includegraphics[width=\linewidth]{TiDE.pdf} % 子图4
        \caption{} % 可以添加子图标题
    \end{subfigure}\hfill
    \begin{subfigure}{0.32\columnwidth}  % 使用 \columnwidth 确保适应双栏布局
        \centering
        \includegraphics[width=\linewidth]{Pyraformer.pdf} % 子图5
        \caption{} % 可以添加子图标题
    \end{subfigure}\hfill
    \begin{subfigure}{0.32\columnwidth}  % 使用 \columnwidth 确保适应双栏布局
        \centering
        \includegraphics[width=\linewidth]{Autoformer.pdf} % 子图6
        \caption{} % 可以添加子图标题
    \end{subfigure}
    
    \caption{Visualization of the prediction results on the flight dataset with an output length of 96.}
    \label{fig4}
\end{figure}

\subsection{Ablation Study}
To investigate the impact of different modules in MSCGrapher, we design the following variants:
\begin{enumerate}
    \item w/o-ACLayer: Adaptive correlation learning layer is instead by convolutions.
    \item w/o-CorGraph: The process of correlation graph learning does not have a strong or weak degree division.  
    \item w/o-MSLayer: It removes the multi-scale modeling part.
    \item w/o-Attention: The Multi-Head self-Attention Layer is removed.
\end{enumerate}

We do the ablation study on 5 datasets which are from different domain.
Table \ref{ablation_study} shows the average results of these variants across output lengths.
From Table \ref{ablation_study}, we can find theses variants all have a increase in MSE and MAE. 
When removing the adaptive correlation learning layer, the performance is most affected on all datasets. 
On Flight and PEMS08, MSE drops by 35.33\% and 24.41\%, and MAE by 19.30\% and 22.76\%, which demonstrate that the inter-series correlations is a key factor for MTSF task. 
Furthermore, the results of  variant 2 also indicate that manipulating the strength of the inter-series correlations in different ways can more accurately capture the implicit information.
When using a single scale instead of multi-scale partitioning, MSCGrapher has a significant performance degradation, where MSE drops by 12.02\% and MAE by 8.07\% on the Flight dataset. 
It proves that the periodicity is a core characteristic and multi-scale helps to extract the complex periodic patterns hidden in MTS.
The results of variant 4 show that intra-series correlations are also a crucial factor.

\begin{table}[h]
\caption{Results of the ablation study.}
\centering
\renewcommand{\arraystretch}{1.8} % Adjust row height
\begin{adjustbox}{width=\columnwidth}
\begin{tabular}{lcccccccccc}
\hline
Datasets & \multicolumn{2}{c}{Flight} & \multicolumn{2}{c}{ETTh1} & \multicolumn{2}{c}{Exchange} & \multicolumn{2}{c}{Weather} & \multicolumn{2}{c}{PEMS08} \\ \hline
Metric & MSE & MAE & MSE & MAE & MSE & MAE & MSE & MAE & MSE & MAE \\ \hline
MSCGrapher & \textbf{0.201} & \textbf{0.315} & \textbf{0.450} & \textbf{0.452} & \textbf{0.393} & \textbf{0.429} & \textbf{0.255} & \textbf{0.283} & \textbf{0.192} & \textbf{0.276} \\
w/o-ACLayer & 0.317 & 0.413 & 0.500 & 0.478 & 0.430 & 0.448 & 0.265 & 0.292 & 0.254 & 0.342 \\
w/o-CorGraph & 0.208 & 0.320 & 0.465 & 0.460 & 0.417 & 0.442 & 0.257 & 0.284 & 0.204 & 0.277 \\
w/o-MSLayer & 0.233 & 0.347 & 0.467 & 0.461 & 0.399 & 0.429 & 0.256 & 0.284 & 0.198 & 0.283 \\
w/o-Attention & 0.206 & 0.320 & 0.455 & 0.455 & 0.412 & 0.436 & 0.257 & 0.284 & 0.198 & 0.281 \\ \hline
\end{tabular}
\end{adjustbox}
% The results show the average error across all prediction lengths, with the best performance highlighted in bold.}
\label{ablation_study}
\end{table}

\subsection{Sensitivity to Hyperparameters}
We evaluate the impact of following hyperparameters on different datasets: scale $k$, embedding dimension $c_{\text{dim}}$ and ratio. 
In this experiments, the length of historical review window and the prediction length are set to 96.
The results are presented in Figure~\ref{fig7}. 
For $k$, we can find the MSE gradually decreases on all datasets as $k$ increases.
When $k$ increases to a certain extent, the performance begins to decline. 
For these datasets, the best choice of $k$ is 3 or 5, which indicates that MTS can be represented well from several period in most cases.  
Similar to $k$, $c_{dim}$ also presents the same trend. 
Concerning the ratio, although increasing the ratio helps capture more potential strong correlations, it also raises the risk of misinterpreting weak correlations as strong correlations, which may affect the overall performance of the model. 

\begin{figure}[h]
    \centering
    \begin{subfigure}[b]{0.3\columnwidth}
        \centering
        \includegraphics[width=\textwidth]{k.pdf}
        \caption{$k$}
        \label{k}
    \end{subfigure}
    \hfill
    \begin{subfigure}[b]{0.3\columnwidth}
        \centering
        \includegraphics[width=\textwidth]{c_dim.pdf}
        \caption{$c_{\text{dim}}$}
        \label{cdim}
    \end{subfigure}
    \hfill
    \begin{subfigure}[b]{0.3\columnwidth}
        \centering
        \includegraphics[width=\textwidth]{ratio.pdf}
        \caption{ratio}
        \label{ratio}
    \end{subfigure}
    \caption{Sensitivity tests for $k$, $c_{\text{dim}}$, and ratio.}% Results are recorded with an input length of 96 and an output length of 96.}
    \label{fig7}
\end{figure}




\begin{table*}[t]
\caption{Comparative results of integrating our correlation learning methods into MTGNN and TEGNN.}
\centering
\renewcommand{\arraystretch}{1.8} % Adjust row height
\begin{adjustbox}{width=\textwidth}
\begin{tabular}{c|c|cccc|cccc|cccc|cccc}
\hline
Datasets &  & \multicolumn{4}{c|}{Solar-Energy} & \multicolumn{4}{c|}{Traffic} & \multicolumn{4}{c|}{Electricity} & \multicolumn{4}{c}{Exchange-Rate} \\ \hline
Model & Metric        & 3(30min)     & 6(60min)      & 12(120min)     & 24(240min)     & 3(3h)     & 6(6h)     & 12(12h)   & 24(24h)    & 3(3h)     & 6(6h)     & 12(12h)    & 24(24h)    & 3(3d)     & 6(6d)     & 12(12d)    & 24(24d)    \\ \hline
\multirow{4}{*}{MTGNN} & RSE    & 0.1778 & 0.2348 & 0.3109 & 0.4270 & 0.4162 & 0.4754 & 0.4461 & 0.4535 & 0.0745 & 0.0878 & 0.0916 & 0.0953 & 0.0194 & 0.0259 & 0.0349 & 0.0456 \\
      & RSE(Cor) & 0.1782 & 0.2362 & \textbf{0.3102} & \textbf{0.4230} & 0.4237 & \textbf{0.4688} & 0.4531 & \textbf{0.4531} & \textbf{0.0744} & \textbf{0.0856} & \textbf{0.0911} & \textbf{0.0950} & \textbf{0.0192} & \textbf{0.0258} & \textbf{0.0344} & \textbf{0.0446} \\
      & CORR   & 0.9852 & 0.9726 & 0.9509 & 0.9031 & 0.8963 & 0.8667 & 0.8794 & 0.8810 & 0.9474 & 0.9316 & 0.9278 & 0.9234 & 0.9786 & 0.9708 & 0.9551 & 0.9372 \\
      & CORR(Cor) & 0.9851 & 0.9723 & \textbf{0.9512} & \textbf{0.9058} & 0.8934 & \textbf{0.8670} & 0.8764 & \textbf{0.8818} & \textbf{0.9475} & \textbf{0.9327} & 0.9273 & \textbf{0.9237} & \textbf{0.9789} & 0.9705 & \textbf{0.9533} & 0.9362 \\
\hline
\multirow{4}{*}{TEGNN} & RSE    & 0.1824 & 0.2612 & 0.3289 & 0.4733 & 0.4421 & 0.4433 & 0.4508 & 0.4692 & 0.0774 & 0.0862 & 0.0948 & 0.0985 & 0.0178 & 0.0245 & 0.0363 & 0.0449 \\
      & RSE(Cor) & \textbf{0.1739} & \textbf{0.2298} & \textbf{0.2943} & \textbf{0.3942} & \textbf{0.4178} & 0.4505 & \textbf{0.4414} & \textbf{0.4495} & \textbf{0.0748} & \textbf{0.0862} & \textbf{0.0938} & \textbf{0.0965} & \textbf{0.0177} & 0.0255 & \textbf{0.0348} & \textbf{0.0507} \\
      & CORR   & 0.9847 & 0.9676 & 0.9379 & 0.8854 & 0.8853 & 0.8820 & 0.8743 & 0.8671 & 0.9418 & 0.9310 & 0.9225 & 0.9182 & 0.9815 & 0.9732 & 0.9566 & 0.9352 \\
      & CORR(Cor) & \textbf{0.9856} & \textbf{0.9742} & \textbf{0.9572} & \textbf{0.9183} & \textbf{0.8978} & 0.8792 & \textbf{0.8830} & \textbf{0.8781} & \textbf{0.9460} & \textbf{0.9531} & \textbf{0.9250} & \textbf{0.9201} & \textbf{0.9817} & \textbf{0.9732} & \textbf{0.9588} & \textbf{0.9385} \\
\hline
\end{tabular}
\end{adjustbox}

\label{causal_learning_results}
\end{table*}


\subsection{Learned Correlation Graph Visualization}
To clearly exhibit the learned correlations, we visualize a subset of the correlation adjacency matrices from the Flight dataset in Figure~\ref{fig5}.

Specifically, we choose three different time scales: 24h, 12h, and 6h, and compare the different correlation adjacency matrices for each time scale. 
As shown in Figure \ref{fig5}, the correlation adjacency matrices are sparse on all time scales, which says that our method can effectively find the intrinsic relationships hidden in MTS.
At different time scales, we observe that the strength of correlations changes. For example, the correlation between nodes 3 and 6 (marked in red boxes) is strong at 24h and 6h. However, at a 12h time scale, the correlation becomes weak. This suggests that the variation in correlations across different time scales reveals the complex dynamic evolution of the system. If we can focus more on the strong correlations and reduce the influence of weak correlations during the evolution process, we can more effectively capture the core dynamics and key changes of the system. By increasing the weight of strong correlations, we can direct the model’s attention to relationships that remain consistently stable across multiple time scales, rather than being distracted by weak correlations that only exist briefly at specific time scales.


\begin{figure}[h]
    \centering    
    \begin{subfigure}{0.3\columnwidth}
        \centering
        \includegraphics[width=\linewidth]{correlation_adjacency_matrix_1.pdf}
        \caption{$24h$} % 子图标题
        \label{fig:24h_causal}
    \end{subfigure}
    \hfill
    \begin{subfigure}{0.3\columnwidth}
        \centering
        \includegraphics[width=\linewidth]{correlation_adjacency_matrix_2.pdf}
        \caption{$12h$} % 子图标题
        \label{fig:12h_causal}
    \end{subfigure}
    \hfill
    \begin{subfigure}{0.3\columnwidth}
        \centering
        \includegraphics[width=\linewidth]{correlation_adjacency_matrix_3.pdf}
        \caption{$6h$} % 子图标题
        \label{fig:6h_causal}
    \end{subfigure}
    
    \caption{Visualization of the first layer correlation adjacency matrix on Flight dataset for different time scales: (a) 24h, (b) 12h, and (c) 6h.}
    \label{fig5}
\end{figure}


\begin{table}[htbp]
\caption{Model efficiency comparison on Electricity with input length 96 and output length 96.}
\centering
\renewcommand{\arraystretch}{1.8} % Adjust row height
\begin{adjustbox}{width=\columnwidth}
\begin{tabular}{c|cccc}
\hline
Models & Pred Length & GPU Memory (GB) & Training Time (ms/iter) & MSE Rank \\ \hline
MSCGrapher       & 96                   & 6.48                      & 404 ms                         & 1                \\ \hline
MSGNet           & 96                   & 6.55                      & 281 ms                         & 4                \\ \hline
TimesNet         & 96                   & 5.81                      & 532 ms                         & 3                \\ \hline
DLinear          & 96                   & 1.38                      & 15 ms                          & 5                \\ \hline
FourierGNN       & 96                   & 21.57                  & 434 ms                         & 6                \\ \hline
StemGNN          & 96                   & 7.25                   & 186 ms                         & 2                \\ \hline
\end{tabular}
\end{adjustbox}
\label{efficiency}
\end{table}



\subsection{Transferability of Correlation Learning}

In this section, we integrate the correlation learning method into GNN-based models: MTGNN \citep{wu2020connecting} and TEGNN \citep{duan2022multivariate}, to validate its transferability. 
For ease of comparison, experiments are conducted on same MTS datasets: Solar-Energy, Traffic, Electricity, and Exchange-Rate. 
Relative Squared Error (RSE) and Empirical Correlation Coefficient (CORR) are used for evaluation. 

Table \ref{causal_learning_results} shows the comparative results before and after integrating our correlation learning method into MTGNN and TEGNN. (Cor) indicates the incorporation of our proposed correlation learning method into the respective models.
From Table \ref{causal_learning_results}, it is evident that both models gain performance improvements in the majority of scenarios, especially for TEGNN on Solar-Energy and Electricity datasets. 
After replacing the original correlation learning, MTGNN shows slight improvements across all horizons on the smaller datasets, Electricity and Exchange. 
However, in the other two larger datasets, the performance improvement is more noticeable for long-series predictions. 
TEGNN exhibits significant improvements in the first three datasets, which is likely because these datasets are collected on an hourly or minute basis, containing more time series information. 
With the replace of correlation learning, TEGNN has better ability to capture the complex inter-series relationships.
This indicates that our correlation learning method can effectively extract inter-series dynamic correlations from MTS.

\subsection{Computational Efficiency}
For efficiency evaluation, we select the more complex Electricity dataset to conduct a comprehensive comparison of GPU memory usage, running speed, and MSE ranking across different models under the prediction length of 96. This approach allows us to systematically assess the trade-off between accuracy and computational efficiency. To ensure fairness, all models were tested under the same conditions. The detailed results are presented in Table \ref{efficiency}.

In Table \ref{efficiency}, although MSCGrapher is not the best among all models in terms of training speed and GPU memory usage, it strikes a good balance between overall efficiency and performance. Specifically, MSCGrapher maintains a controllable level of resource consumption while achieving significantly better prediction accuracy than models such as FourierGNN, demonstrating strong modeling capability. Although TimesNet shows advantages in memory usage, its noticeably slower training speed hampers overall training efficiency. Considering both accuracy and computational cost, MSCGrapher exhibits stable and superior performance.



\section{Conclusion}
In this study, we propose a novel MTSF model, MSCGrapher, which starts from the premise that different relationships exist in MTS at various scales. 
MSCGrapher effectively captures both inter-series correlations with varying strengths and intra-series temporal correlations at different time scales by combining multi-scale correlation learning block with multi-head self-attention. 
Extensive experiments on real-world datasets shows that MSCGrapher outperforms existing models. 
When introduced our adaptive correlation learning method, two GNN-based methods also gain better performance, which proves that modeling the dynamic varying correlations is helpful for MTSF task.


\section*{Acknowledgement}
This work is supported by the National Natural Science Foundation of China [grant number 62162062] and the Science and Technology Development Plan Project of Jilin Province [20220203127SF].
% References
\bibliography{uai2025-template}

\newpage

\onecolumn

\title{MSCGrapher: Learning Multi-Scale Dynamic Correlations for Multivariate Time Series Forecasting\\(Supplementary Material)}
\maketitle

\appendix

\section{Descriptions of Notations}
To facilitate understanding of the symbols used in this paper, a detailed summary of the key notations is provided in Table \ref{notations}.

\begin{table}[h]
\centering
\caption{Description of the key notations.}
\begin{tabular}{c|c}
\toprule
\textbf{Notation} & \textbf{Descriptions} \\
\midrule
$\mathbf{X}_t$ & Original input series \\
$\mathbf{Y}_t$ & Target output series \\
$N$ & Number of variables in the series \\
$L$ & Length of the historical window \\
$T$ & Length of the prediction window \\
$\mathbf{X}_{t,:}$ &  Time series collected at time step $t$ \\
$\mathbf{X}_{:,n}$ & Entire time series of of each variable indexed by $n$ \\
$G=\mathsf{(}\mathbf{V},\mathbf{E}\mathsf{)}$ & Graph with node set $V$ and edge set $E$ \\
$G_{\mathrm{strong}}$ & Strong correlation graph \\
$G_{\mathrm{weak}}$ & Weak correlation graph \\
$f = \{f_1, \dots, f_k\}$ & Multi-scale representations in MTS \\
$\mathbf{A}^k_{\mathrm{strong}}$ & Adjacency matrix of the strong correlation graph at scale $k$ \\
 $\mathbf{A}^k_{\mathrm{weak}}$ & Adjacency matrix of the weak correlation graph at scale $k$ \\
$\mathbf{E}_{i \in \{m, d, w, h, t\}}$ & Embeddings for month, day, week, hour, and minute \\
$\mathbf{H}_{\mathrm{emb}}$ & Embedding of the original input series\\
$\mathbf{X}_f$ & Fast Fourier Transform of $\mathbf{H}_{\mathrm{emb}}$ \\
$\mathbf{F}$ & Overall amplitude measure \\
$p_i$ & Period corresponding to different scales \\
$\mathbf{X}^i$ & The $i$-th reshaped time series for time scale $i$ \\
$\mathbf{E}^l_1, \mathbf{E}^l_2$ & Learnable parameters at layer $l$ for source and target node embeddings \\
$\mathbf{A}^l$  & Adjacency matrices at layer $l$ \\
$\mathbf{A}^l_{\mathrm{strong}}, \mathbf{A}^l_{\mathrm{weak}}$ & Strong and Weak correlation matrices at layer $l$ \\
$\mathbf{E}^l_{\mathrm{index}}$ & Edge index matrix at layer $l$ indicating connections \\
$\mathbf{A}_{\mathrm{edge}}^l$ & Edge attribute matrix at layer $l$ describing edge features \\
$\mathbf{G}_{\mathrm{data}}$ & Contains node and edge features \\
$\mathbf{B}^l$ & All-zero batch tensor \\
$\mathrm{r}$ & Source node of an edge \\
$\mathrm{c}$ & Target node of an edge \\
$\mathbf{E}_{\mathrm{rep}}$ & Node features from source and target node indices \\
$\mathbf{S}_{\mathrm{edge}}$ & Computed edge scores \\
$\mathbf{A}_{f}^l$ & Fused adjacency matrix at layer $l$ \\
$\mathbf{X}_{\mathrm{out}}^i$ & The output after fusion at scale $i$ \\
\bottomrule
\end{tabular}
\label{notations}
\end{table}




\section{Details of Implementing Multivariate Time Series Forecasting}
In this section, we summarize the detailed information on datasets, baselines, evaluation metrics, and hyperparameter settings. The code is available: https://github.com/sopphia2001/MSCGrapher. 

\subsection{Datasets}
We use a total of 12 real-world datasets to evaluate MSCGrapher, covering various domains such as weather, electricity, and traffic. The specific information of the datasets is as follows:

\begin{itemize}
    \item Flight\citep{cai2024msgnet}: Contains flight data variations for seven major airports in Europe from January 2019 to December 2021, including flight data particularly related to COVID-19 (post-2020), with important information such as flight numbers, departure and destination airports, departure times, and landing times.
    \item ETT\citep{zhou2021informer}: Includes seven factors of electric transformers from July 2016 to July 2018. There are four subsets: ETTh1 and ETTh2 are recorded hourly, while ETTm1 and ETTm2 are recorded every 15 minutes.
    \item Weather\citep{Wu2021}: Includes 21 meteorological factors collected every 10 minutes from 1990 to 2016.
    \item Electricity\citep{Wu2021}: This dataset contains electricity consumption data from the UCI Machine Learning Repository, which summarizes hourly electricity consumption of 321 customers from 2012 to 2014.
    \item Exchange-Rate\citep{Wu2021}: Collects panel data of daily exchange rates for eight countries from 1990 to 2016.
    \item PEMS: Contains data from the California public transportation network collected in 5-minute windows. We use the same two public subsets (PEMS03, PEMS04, PEMS07 and PEMS08) adopted in SCINet\citep{liu2022scinet}.
\end{itemize}

For the multivariate time series forecasting, we set the input length to 96. The output length for the PEMS datasets is set to \{12, 24, 36, 96\}, while for other datasets, the output length is set to \{96, 192, 336, 720\}. Table \ref{tab:dataset-details} lists the detailed information of the datasets, which is crucial for understanding the characteristics of the datasets.

\begin{table*}[h]
\caption{Detailed Information of Datasets. The frequency indicates the sampling interval of the time points.}
\centering

\begin{tabular}{c|ccccc}
\hline
Datasets      & Nodes & Prediction Length       & Train/Valid/Test Size  & Split Ratio & Frequency \\ \hline
Flight        & 7     & \{96, 192, 336, 720\}   & (18221, 2537, 5165)    & 7:1:2      & Hourly    \\ \hline
ETTh1         & 7     & \{96, 192, 336, 720\}   & (8545, 2881, 2881)     & 6:2:2      & Hourly    \\ \hline
ETTh2         & 7     & \{96, 192, 336, 720\}   & (8545, 2881, 2881)     & 6:2:2      & Hourly    \\ \hline
ETTm1         & 7     & \{96, 192, 336, 720\}   & (34465, 11521, 11521)  & 6:2:2      & 15 min    \\ \hline
ETTm2         & 7     & \{96, 192, 336, 720\}   & (34465, 11521, 11521)  & 6:2:2      & 15 min    \\ \hline
Weather       & 21    & \{96, 192, 336, 720\}   & (36792, 5271, 10540)   & 7:1:2      & 10 min    \\ \hline
Electricity   & 321   & \{96, 192, 336, 720\}   & (18317, 2633, 5261)    & 7:1:2      & Hourly    \\ \hline
Exchange-Rate & 8     & \{96, 192, 336, 720\}   & (5120, 665, 1422)      & 7:1:2      & Daily     \\ \hline
PEMS03        & 358   & \{12, 24, 48, 96\}      & (15629, 5147, 5147)    & 6:2:2      & 5 min     \\ \hline
PEMS04        & 307   & \{12, 24, 48, 96\}      & (10100, 3303, 3304)    & 6:2:2      & 5 min     \\ \hline
PEMS07        & 883   & \{12, 24, 48, 96\}      & (16839, 5550, 5550)    & 6:2:2      & 5 min     \\ \hline
PEMS08        & 170   & \{12, 24, 48, 96\}      & (10618, 3476, 3477)    & 6:2:2      & 5 min     \\ \hline
\end{tabular}

\label{tab:dataset-details}
\end{table*}

\subsection{Baselines}

We compare MSCGrapher with 13 baselines to validate its forecasting performance. We select outstanding time series forecasting models from 2021 to 2024. The specific model codes are as follows:

\begin{itemize}
    \item Informer: {\url{https://github.com/zhouhaoyi/Informer2020}}
    \item Autoformer: {\url{https://github.com/thuml/Autoformer}}
    \item Pyraformer: {\url{https://github.com/ant-research/Pyraformer}}
    \item  FEDformer: {\url{https://github.com/MAZiqing/FEDformer}}
    \item Stationary: {\url{https://github.com/thuml/Nonstationary_Transformers}}
    \item TiDE: {\url{https://github.com/google-research/google-research/tree/master/tide}}
    \item  TimesNet: {\url{https://github.com/thuml/Time-Series-Library}}
    \item  DLinear: {\url{https://github.com/honeywell21/DLinear}}
    \item MSGNet: {\url{https://github.com/YoZhibo/MSGNet}}
    \item MSHyper: {\url{https://github.com/shangzongjiang/Ada-MSHyper}}
    \item CrossGNN: {\url{https://github.com/hqh0728/CrossGNN}}
    \item StemGNN: {\url{https://github.com/microsoft/StemGNN}}
    \item FourierGNN: {\url{https://github.com/aikunyi/FourierGNN}}
\end{itemize}



\subsection{Evaluation Metrics}

In the experiments, we use Mean Squared Error (MSE) and Mean Absolute Error (MAE) as evaluation metrics. For multivariate time series, given the true value $\mathbf{Y}_t = \{x_{t}, \ldots, x_{t+T-1}\} \in \mathbb{R}^{N \times T}$ at time step $t$ and the predicted values $\widehat{\mathbf{Y}}_t = \{x_{t}, \ldots, x_{t+T-1}\} \in \mathbb{R}^{N \times T}$ for $N$ variables over the next $T$ time steps, the definitions of the metrics are as follows, where $x_{ij} \in \mathbf{Y}_t$, ${\widehat{x}_{ij}} \in \widehat{\mathbf{Y}}_t$:

\begin{equation}
\begin{split}
\mathrm{MSE} = \frac{1}{N \times T} \sum_{i=1}^{N} \sum_{j=1}^{T} \left( x_{ij} - \widehat{x}_{ij} \right)^2
\end{split}
\end{equation}

\begin{equation}
\begin{split}
\mathrm{MAE} = \frac{1}{N \times T} \sum_{i=1}^{N} \sum_{j=1}^{T} \left| x_{ij} - \widehat{x}_{ij} \right|
\end{split}
\end{equation}





\subsection{Setup and Hyperparameters}

All experiments are conducted on an RTX 4090 24GB GPU using the PyTorch framework. We use the Adam optimizer with a learning rate of $10^{-4}$ and a batch size of 32. The default loss function is MSE, with the number of training epochs set to 10 and early stopping applied where appropriate. The embedding dimension $c_\mathrm{dim}$ is set within the range \{16, 32, 64, 128, 512, 1024\}, $k$ is set within the range \{3, 5\}, and the number of graph convolution layers is set to 2. All comparison baseline models are implemented based on the benchmarks from the TimesNet\citep{wu2022timesnet} repository, which builds upon the configurations provided in the original papers or official code of each model. Specific hyperparameters for different datasets are provided in Table \ref{Hyperparameter1}.


\begin{table*}[h]
\caption{Hyperparameter settings for different datasets.}
\centering
\renewcommand{\arraystretch}{1.3} 
\begin{tabular}{c|cccccccccc}
\hline
Datasets      & Flight                    & ETTh1                    & ETTh2                  & ETTm1 & ETTm2                    & Weather                 & Electricity              & Exchange                 & PEMS03      & PEMS08     \\ \hline
Epochs        & \multicolumn{10}{c}{10}        \\ \hline
Batch size    & \multicolumn{10}{c}{32}                  \\ \hline
Loss          & \multicolumn{10}{c}{MSE}                 \\ \hline
Optimizer     & \multicolumn{10}{c}{Adam}                \\ \hline
Learning rate & \multicolumn{10}{c}{1e-4}                \\ \hline
k             & \multicolumn{1}{c|}{5}    & \multicolumn{1}{c|}{3}   & \multicolumn{1}{c|}{5} & \multicolumn{2}{c|}{3}           & \multicolumn{5}{c}{5} \\ \hline
$C_{dim}$         & \multicolumn{5}{c|}{32}                                                                                          & \multicolumn{1}{c|}{64} & \multicolumn{1}{c|}{512} & \multicolumn{1}{c|}{64}  & \multicolumn{2}{c}{512}  \\ \hline
Ratio  & \multicolumn{10}{c}{0.25}                \\ \hline
Dropout       & \multicolumn{1}{c|}{0.05} & \multicolumn{1}{c|}{0.1} & \multicolumn{2}{c|}{0.05}      & \multicolumn{1}{c|}{0.3} & \multicolumn{2}{c|}{0.05}                          & \multicolumn{1}{c|}{0.2} & \multicolumn{2}{c}{0.05} \\ \hline
Dim of E      & \multicolumn{1}{c|}{100}  & \multicolumn{5}{c|}{10}                                                                                        & \multicolumn{1}{c|}{100} & \multicolumn{3}{c}{10}                              \\ \hline
Heads         & \multicolumn{10}{c}{8}                   \\ \hline       
\end{tabular}

\label{Hyperparameter1}
\end{table*}

\section{Implementation Details for Validating the Transferability of Correaltion Learning Methods}

In this section, we summarize the detailed information on datasets, baselines, evaluation metrics, and hyperparameter settings.

\subsection{Datasets}

We use four datasets for validation. In addition to Electricity and Exchange-Rate, we also use the Solar-Energy and Traffic datasets. The specific information is as follows:

\begin{itemize}
    \item Solar-Energy: This dataset contains solar energy data collected by the National Renewable Energy Laboratory in 2007, sampled every 10 minutes from 137 photovoltaic stations in Alabama.
    \item Traffic: This dataset includes road occupancy data (ranging between 0 and 1) from the California Department of Transportation. The data is aggregated hourly from 862 sensors in the San Francisco Bay Area from 2015 to 2016.
\end{itemize}

According to the original paper, the four datasets are split chronologically into training (60\%), validation (20\%), and test sets (20\%). For validating the transferability of correaltion learning methods, we set the input window to 168, and the output horizons to \{3, 6, 12, 24\}. Specifically, the prediction horizons for the Solar-Energy dataset range from 30 to 240 minutes, for Traffic and Electricity datasets range from 3 to 24 hours, and for the Exchange-Rate dataset range from 3 to 24 days. Table \ref{tab:dataset-details2} lists the detailed information of the datasets, which is crucial for understanding their characteristics.

\begin{table*}[ht]
\caption{Detailed Information of Datasets. The frequency indicates the sampling interval of the time points.}
\renewcommand{\arraystretch}{1} 
\centering
\begin{tabular}{c|ccccc}
\hline
Datasets      & Nodes & Horizon       & Train/Valid/Test Size & Split Size & Frequency  \\ \hline
Solar-Energy  & 137   & \{3,6,12,24\} & (31536,10512,10512)   & 6:2:2      & 10 minutes \\ \hline
Traffic       & 862   & \{3,6,12,24\} & (10526,3509,3509)     & 6:2:2      & Hourly     \\ \hline
Electricity   & 321   & \{3,6,12,24\} & (15782,5261,5261)     & 6:2:2      & Hourly     \\ \hline
Exchange-Rate & 8     & \{3,6,12,24\} & (4553,1518,1517)      & 6:2:2      & Daily      \\ \hline
\end{tabular}

\label{tab:dataset-details2}
\end{table*}

\subsection{Baselines}

We integrate the correlation method into two different baseline models to validate the effectiveness and transferability of the proposed correlation learning methods through comparisons of accuracy before and after integration. The specific baseline model codes are as follows:

\begin{itemize}
    \item MTGNN: {\url{https://github.com/nnzhan/MTGNN}}
    \item TEGNN: {\url{https://github.com/RRRussell/MTHetGNN}}
\end{itemize}

\subsection{Evaluation Metrics}

In the experiments, we use Relative Squared Error (RSE) and Empirical Correlation Coefficient (CORR) as evaluation metrics. For multivariate time series, given the true value $\mathbf{Y}_t = \{x_{t}, \ldots, x_{t+T-1}\} \in \mathbb{R}^{N \times T}$ at time step $t$ and the predicted values $\widehat{\mathbf{Y}}_t = \{x_{t}, \ldots, x_{t+T-1}\} \in \mathbb{R}^{N \times T}$ for $N$ variables over the next $T$ time steps, the definitions of the metrics are as follows, where $x_{ij} \in \mathbf{Y}_t$, ${\widehat{x}_{ij}} \in \widehat{\mathbf{Y}}_t$:

\begin{equation}
\begin{split}
\mathrm{RSE} = \frac{\sqrt{\sum_{i=1}^T \sum_{j=1}^N \left( x_{ij} - \widehat{x}_{ij} \right)^2}}{\sqrt{\sum_{i=1}^T \sum_{j=1}^N \left( x_{ij} - \mathrm{mean}(x) \right)^2}}
\end{split}
\end{equation}

\begin{equation}
\begin{split}
\mathrm{CORR} = \frac{1}{T} \sum_{j=1}^N \frac{\sum_{i=1}^T \left( x_{ij} - \mathrm{mean}(x_{*j}) \right) \left( \widehat{x}_{ij} - \mathrm{mean}(\widehat{x}_{*j}) \right)}{\sqrt{\sum_{i=1}^T \left( x_{ij} - \mathrm{mean}(x_{*j}) \right)^2 \sum_{i=1}^T \left( \widehat{x}_{ij} - \mathrm{mean}(\widehat{x}_{*j}) \right)^2}}
\end{split}
\end{equation}





\subsection{Setup and Hyperparameters}

All experiments are conducted on an RTX 4090 24GB GPU using the PyTorch framework. We use the Adam optimizer for fine-tuning and optimize all trainable parameters through backpropagation. The learning rate is set to $10^{-3}$, and we choose L1Loss as the loss function with 30 training epochs. We integrate correlation learning methods into the baseline models with a ratio of 0.25, using the same hyperparameters as in the original papers. For detailed parameters of each baseline, please refer to \citep{wu2020connecting,duan2022multivariate}.






\section{Review Window Experiments }
To better highlight our model's performance on long series, we extend the input length to increase the historical information available to the model, evaluating its performance in handling longer temporal dependencies. We conducte experiments on five datasets with input lengths of \{48, 96, 192, 226, 512, 720\} and an output length of 96, using MSE as the evaluation metric. The results are shown in Figure \ref{fig8}. 

The figure shows that as input length increases, MSCGrapher's overall predictive performance declines, highlighting its strength in capturing long-term trends and complex dependencies. We believe that multi-scale operations in MSCGrapher are crucial to this process. These operations divide long time series into sub-series of different scales, shortening series length, improving processing efficiency, and capturing dependencies across various time scales. This method effectively overcomes the performance fluctuations and instability issues that traditional Transformer models face with long series. Additionally, multi-scale operations enable MSCGrapher to flexibly model across different time scales, providing a comprehensive understanding of complex patterns and trends. By applying the Transformer mechanism to each sub-series, MSCGrapher can make fine-grained and coarse-grained predictions, improving overall accuracy. 



In general, the size of the historical review window influences the dependencies that the model learns from historical information. 
%A robust model should show performance improvements when extending the review window, rather than  exhibiting significant fluctuations. 
To assess MSCGrapher’s effectiveness with extended historical windows, we compare it with well-performing models on the ETTm1 dataset. 
%In the experiments, We set various historical review window sizes: {48, 96, 192, 336, 512, 720} to predict the next 96 time steps.% and MSE is the evaluation metric. 
The results are presented in Figure \ref{fig6}.
We observe that as the historical review window increases, 
MSCGrapher decreases and eventually stabilizes, which indicates that it can effectively handle large amounts of historical data and extract helpful information. 
This is due to MSCGrapher’s capability to identify different time scales and learn corresponding relationship graphs allows it to capture long-series time dependencies effectively.
TimesNet shows a similar overall trend to MSCGrapher but performs poorly; DLinear performs relatively well in long-term forecasting but is unsatisfactory for short-term predictions; Other methods exhibit significant fluctuations as the historical review window lengthens. 

\begin{figure*}[htbp]
\centering
\begin{subfigure}{0.49\textwidth}
    \centering
    \includegraphics[width=\textwidth]{review_windows.pdf}
    \caption{Forecasting results for different datasets with output length 96 and input lengths in \{48, 96, 192, 336, 512\}.}
    \label{fig8}
\end{subfigure}
\hfill
\begin{subfigure}{0.49\textwidth}
    \centering
    \includegraphics[width=\textwidth]{extend_windows.pdf}
    \caption{Forecasting results for different models on ETTm1 with output length 96 and input lengths in \{48, 96, 192, 336, 512\}.}
    \label{fig6}
\end{subfigure}
\caption{Review window experiments with diffident datasets(a) and models(b).}
\end{figure*}



\begin{figure*}[htbp]
\label{PEMS08}
    \centering
    % First row of images (1 row, 4 columns)
    \begin{subfigure}[b]{0.25\textwidth}
        \includegraphics[width=\textwidth]{PEMS08_Static_Adjacency_Matrix.pdf}
        \caption{Static}
        \label{fig:PEMS08_Static_Adjacency}
    \end{subfigure}%
    \begin{subfigure}[b]{0.25\textwidth}
        \includegraphics[width=\textwidth]{PEMS08_6.pdf}
        \caption{6h}
        \label{fig:PEMS08_6}
    \end{subfigure}%
    \begin{subfigure}[b]{0.25\textwidth}
        \includegraphics[width=\textwidth]{PEMS08_12.pdf}
        \caption{12h}
        \label{fig:PEMS08_12}
    \end{subfigure}%
    \begin{subfigure}[b]{0.25\textwidth}
        \includegraphics[width=\textwidth]{PEMS08_Correlation_24.pdf}
        \caption{24h}
        \label{fig:PEMS08_24}
    \end{subfigure}%
    \caption{Visualization of the adjacency matrix for the top 50 nodes in the PEMS08 dataset, showcasing the learnable adjacency matrices at different scales in the first layer and the preset static adjacency matrix. The preset static adjacency matrix fails to capture the correlations between time series with strong similarities.}
    \label{PEMS08}
\end{figure*}


\begin{figure*}[htbp]
\label{PEMS03}
    \centering
    % First row of images (1 row, 4 columns)
    \begin{subfigure}[b]{0.25\textwidth}
        \includegraphics[width=\textwidth]{PEMS03_Static_Adjacency_Matrix.pdf}
        \caption{Static}
        \label{fig:PEMS03_Static_Adjacency}
    \end{subfigure}%
    \begin{subfigure}[b]{0.25\textwidth}
        \includegraphics[width=\textwidth]{PEMS03_1.pdf}
        \caption{6h}
        \label{fig:PEMS03_6}
    \end{subfigure}%
    \begin{subfigure}[b]{0.25\textwidth}
        \includegraphics[width=\textwidth]{PEMS03_2.pdf}
        \caption{12h}
        \label{fig:PEMS03_12}
    \end{subfigure}%
    \begin{subfigure}[b]{0.25\textwidth}
        \includegraphics[width=\textwidth]{PEMS03_3.pdf}
        \caption{24h}
        \label{fig:PEMS03_24}
    \end{subfigure}%

    \caption{Visualization of the adjacency matrix for the top 50 nodes in the PEMS03 dataset, showcasing the learnable adjacency matrices at different scales in the first layer and the preset static adjacency matrix. The preset static adjacency matrix fails to capture the correlations between time series with strong similarities.}
    \label{PEMS03}
\end{figure*}


\section{More Learned Graph Visualization}
To illustrate the specific role of the information obtained from the correlation graph, we provide additional visualization examples to demonstrate its advantages, as shown in Figures \ref{PEMS08} and \ref{PEMS03}. Taking the PEMS dataset as an example, which includes an adjacency matrix based on predefined distances, we compare it with the correlation matrix learned by our model to further validate the effectiveness of the correlation information.

Specifically, as seen in Figures \ref{PEMS08} and \ref{PEMS03}, the learned correlation matrix is much sparser, indicating that MSCGrapher relies on a more concise set of inter-series relationships during prediction. At the same time, the learned adjacency matrix captures connections between nodes that are physically distant but exhibit high similarity in their time series behavior, as highlighted by the red box in the figures. To deepen the understanding of the model’s learning mechanism, we also visualize the time series corresponding to node pairs with high weights in the adjacency matrix, as shown in Figure \ref{variates}.





\begin{figure*}[htbp]
\label{series}
    \centering
    % First row of images (1 column)
    \begin{subfigure}[b]{\textwidth}
        \centering
        \includegraphics[width=0.5\textwidth]{seq2_18.pdf}
        \label{fig:seq2_18}
    \end{subfigure}%

    % Second row of images (1 column)
    \begin{subfigure}[b]{\textwidth}
        \centering
        \includegraphics[width=0.5\textwidth]{seq20_40.pdf}
        \label{fig:seq20_40}
    \end{subfigure}%

    % Third row of images (1 column)
    \begin{subfigure}[b]{\textwidth}
        \centering
        \includegraphics[width=0.5\textwidth]{seq3_33.pdf}
        \label{fig:seq3_33}
    \end{subfigure}%

    \caption{Visualization of time series for node pairs with higher values in the learnable adjacency matrix of the PEMS03.}
    \label{variates}
\end{figure*}







\section{More Forecasting Results Visualization}

To more intuitively demonstrate MSCGrapher's outstanding performance, we present additional visual results in the figures. Figure \ref{fig:results_sensitivity_tests} shows the forecasting results for the Flight dataset with an input length of 96 and output lengths of \{96, 192, 336, 720\}. Figures \ref{fig:exchange} and \ref{fig:weather} display the forecasting results for different models on the Exchange and Weather datasets, with an input length of 96 and output length of 96. It can be observed that, across these two datasets, the overall prediction accuracy of all models does not reach an ideal level. However, MSCGrapher maintains relatively high accuracy within the 0–100 prediction interval and demonstrates a more stable forecasting trend than other models in subsequent time periods, indicating its strong capability to model sequence dependencies even when faced with highly volatile data.



\begin{figure*}[htbp]
    \centering
    % First row of images
    \begin{subfigure}[b]{0.5\textwidth}
        \centering
        \includegraphics[width=\textwidth]{MSCGrapher_96.pdf}
        \caption{Prediction Length $T=96$}
        \label{fig:Prediction_Length_T_96}
    \end{subfigure}%
    \begin{subfigure}[b]{0.5\textwidth}
        \centering
        \includegraphics[width=\textwidth]{MSCGrapher_192.pdf}
        \caption{Prediction Length $T=192$}
        \label{fig:Prediction_Length_T_192}
    \end{subfigure}\\[6ex] % Add space between rows
    % Second row of images
    \begin{subfigure}[b]{0.5\textwidth}
        \centering
        \includegraphics[width=\textwidth]{MSCGrapher_336.pdf}
        \caption{Prediction Length $T=336$}
        \label{fig:Prediction_Length_T_336}
    \end{subfigure}%
    \begin{subfigure}[b]{0.5\textwidth}
        \centering
        \includegraphics[width=\textwidth]{MSCGrapher_720.pdf}
        \caption{Prediction Length $T=720$}
        \label{fig:Prediction_Length_T_720}
    \end{subfigure}
    \caption{Visualization of the prediction results for the Flight dataset. The input length is 96.}
    \label{fig:results_sensitivity_tests}
\end{figure*}


\section{All Forecasting Results}

In this section, we present the complete results of multivariate time series forecasting. We use 10 datasets and compare them with 12 deep learning models. The best results are highlighted in \textcolor{red}{red} \textbf{bold} and the second best results are \underline{underlined} in \textcolor{blue}{blue}. 
From Table \ref{full results 1}, MSCGrapher demonstrates outstanding performance in MTSF task. Specifically, MSCGrapher achieves the best performance 46 times throughout the forecasting results, far surpassing other models. Additionally, compared to GNN models in Table \ref{GNN results}, MSCGrapher also achieves more advanced performance.








\begin{figure*}[h]
    \centering
    % First row of images
    \begin{subfigure}[b]{0.3\textwidth}
        \centering
        \includegraphics[width=\textwidth]{MSCGrapher_exchange.pdf}
        \caption{MSCGrapher}
        \label{fig:MSCGrapher}
    \end{subfigure}%
    \begin{subfigure}[b]{0.3\textwidth}
        \centering
        \includegraphics[width=\textwidth]{MSGNet_exchange.pdf}
        \caption{MSGNet}
        \label{fig:MSGNet}
    \end{subfigure}%
    \begin{subfigure}[b]{0.3\textwidth}
        \centering
        \includegraphics[width=\textwidth]{DLinear_exchange.pdf}
        \caption{DLinear}
        \label{fig:DLinear}
    \end{subfigure}\\[6ex] % Add space if needed
    % Second row of images
    \begin{subfigure}[b]{0.3\textwidth}
        \centering
        \includegraphics[width=\textwidth]{TiDE_exchange.pdf}
        \caption{TiDE}
        \label{fig:TiDE}
    \end{subfigure}%
    \begin{subfigure}[b]{0.3\textwidth}
        \centering
        \includegraphics[width=\textwidth]{Pyraformer_exchange.pdf}
        \caption{Pyraformer}
        \label{fig:Pyraformer}
    \end{subfigure}%
    \begin{subfigure}[b]{0.3\textwidth}
        \centering
        \includegraphics[width=\textwidth]{Autoformer_exchange.pdf}
        \caption{Autoformer}
        \label{fig:Autoformer}
    \end{subfigure}%

    \caption{Visualization of forecasting results for exchange dataset with an input length of 96 and output length of 96.}
    \label{fig:exchange}
\end{figure*}

\begin{figure*}[htbp]
    \centering
    % First row of images
    \begin{subfigure}[b]{0.3\textwidth}
        \centering
        \includegraphics[width=\textwidth]{MSCGrapher_weather.pdf}
        \caption{MSCGrapher}
        \label{fig:MSCGrapher_weather}
    \end{subfigure}%
    \begin{subfigure}[b]{0.3\textwidth}
        \centering
        \includegraphics[width=\textwidth]{MSGNet_weather.pdf}
        \caption{MSGNet}
        \label{fig:MSGNet_weather}
    \end{subfigure}%
    \begin{subfigure}[b]{0.3\textwidth}
        \centering
        \includegraphics[width=\textwidth]{DLinear_weather.pdf}
        \caption{DLinear}
        \label{fig:DLinear_weather}
    \end{subfigure}\\[6ex] % Add space if needed
    % Second row of images
    \begin{subfigure}[b]{0.3\textwidth}
        \centering
        \includegraphics[width=\textwidth]{TiDE_weather.pdf}
        \caption{TiDE}
        \label{fig:TiDE_weather}
    \end{subfigure}%
    \begin{subfigure}[b]{0.3\textwidth}
        \centering
        \includegraphics[width=\textwidth]{Pyraformer_weather.pdf}
        \caption{Pyraformer}
        \label{fig:Pyraformer_weather}
    \end{subfigure}%
    \begin{subfigure}[b]{0.3\textwidth}
        \centering
        \includegraphics[width=\textwidth]{Autoformer_weather.pdf}
        \caption{Autoformer}
        \label{fig:Autoformer_weather}
    \end{subfigure}%
    
    \caption{Visualization of forecasting results for weather dataset with an input length of 96 and output length of 96.}
    \label{fig:weather}
\end{figure*}



\begin{table*}[htbp]
\caption{The best results for MSCGrapher compared to GNN models are highlighted in red bold.}
\centering
\renewcommand{\arraystretch}{1} 

\begin{tabular}{cccccccccccc}
\hline
\multicolumn{2}{c}{Dataset}       & \multicolumn{2}{c}{Electricity} & \multicolumn{2}{c}{Weather}     & \multicolumn{2}{c}{PEMS03}      & \multicolumn{2}{c}{PEMS04}      & \multicolumn{2}{c}{PEMS08}      \\ \hline
\multicolumn{2}{c}{Metric}        & MSE            & MAE            & MSE            & MAE            & MSE            & MAE            & MSE            & MAE            & MSE            & MAE            \\ \hline
\multirow{4}{*}{ours}       & 96  & \textbf{\textcolor{red}{0.165}} & 0.274          & \textbf{\textcolor{red}{0.165}} & \textbf{\textcolor{red}{0.214}} & \textbf{\textcolor{red}{0.078}} & \textbf{\textcolor{red}{0.184}} & \textbf{\textcolor{red}{0.092}} & \textbf{\textcolor{red}{0.207}} & \textbf{\textcolor{red}{0.116}} & \textbf{\textcolor{red}{0.223}} \\
                            & 192 & 0.187          & 0.294          & \textbf{\textcolor{red}{0.215}} & \textbf{\textcolor{red}{0.257}} & \textbf{\textcolor{red}{0.103}} & \textbf{\textcolor{red}{0.214}} & \textbf{\textcolor{red}{0.109}} & \textbf{\textcolor{red}{0.228}} & \textbf{\textcolor{red}{0.149}} & \textbf{\textcolor{red}{0.255}} \\
                            & 336 & \textbf{\textcolor{red}{0.198}} & 0.307          & 0.275          & \textbf{\textcolor{red}{0.301}} & \textbf{\textcolor{red}{0.151}} & \textbf{\textcolor{red}{0.257}} & \textbf{\textcolor{red}{0.144}} & \textbf{\textcolor{red}{0.265}} & \textbf{\textcolor{red}{0.189}} & \textbf{\textcolor{red}{0.275}} \\
                            & 720 & \textbf{\textcolor{red}{0.232}} & \textbf{\textcolor{red}{0.333}} & 0.363          & \textbf{\textcolor{red}{0.359}} & \textbf{\textcolor{red}{0.213}} & \textbf{\textcolor{red}{0.309}} & \textbf{\textcolor{red}{0.201}} & \textbf{\textcolor{red}{0.319}} & \textbf{\textcolor{red}{0.313}} & \textbf{\textcolor{red}{0.350}} \\ \hline
\multirow{4}{*}{FourierGNN} & 96  & 0.211          & 0.307          & 0.177          & 0.240          & 0.087          & 0.202          & 0.112          & 0.231          & 0.143          & 0.263          \\
                            & 192 & 0.214          & 0.312          & 0.218          & 0.279          & 0.120          & 0.240          & 0.153          & 0.272          & 0.210          & 0.320          \\
                            & 336 & 0.227          & 0.325          & \textbf{\textcolor{red}{0.265}} & 0.318          & 0.177          & 0.294          & 0.209          & 0.321          & 0.216          & 0.311          \\
                            & 720 & 0.260          & 0.354          & \textbf{\textcolor{red}{0.336}} & 0.370          & 0.218          & 0.333          & 0.247          & 0.354          & 0.294          & 0.356          \\ \hline
\multirow{4}{*}{StemGNN}    & 96  & 0.165          & \textbf{\textcolor{red}{0.267}} & 0.181          & 0.250          & 0.119          & 0.244          & 0.144          & 0.276          & 0.246          & 0.319          \\
                            & 192 & \textbf{\textcolor{red}{0.180}} & \textbf{\textcolor{red}{0.283}} & 0.226          & 0.289          & 0.179          & 0.305          & 0.188          & 0.317          & 0.281          & 0.337          \\
                            & 336 & 0.200          & \textbf{\textcolor{red}{0.306}} & 0.287          & 0.338          & 0.191          & 0.303          & 0.234          & 0.342          & 0.305          & 0.356          \\
                            & 720 & 0.243          & 0.345          & 0.379          & 0.406          & 0.258          & 0.355          & 0.303          & 0.396          & 0.380          & 0.393          \\ \hline
\end{tabular}


\label{GNN results}
\end{table*}




\begin{table*}[h]
\caption{The complete results of multivariate time series forecasting. For the PEMS dataset, the output lengths are \{12, 24, 36, 48\}, and for other datasets, the output lengths are \{96, 192, 336, 720\}.}\label{full results 1}
\centering
\renewcommand{\arraystretch}{1.8} 
\resizebox{\textwidth}{!}{%
\begin{tabular}{cccccccccccccccccccccc}
\hline
\multicolumn{2}{c}{Models}             & \multicolumn{2}{c}{OURS}                                                      & \multicolumn{2}{c}{MSGNet(2024)}                                        & \multicolumn{2}{c}{Dlinear(2023)}                                             & \multicolumn{2}{c}{TimesNet(2023)}                                            & \multicolumn{2}{c}{TiDE(2023)}                                                & \multicolumn{2}{c}{Stationary(2022)}                                          & \multicolumn{2}{c}{FEDformer(2022)} & \multicolumn{2}{c}{Pyraformer(2022)} & \multicolumn{2}{c}{Autoformer(2021)}                                    & \multicolumn{2}{c}{Informer(2021)} \\ \hline
Datasets                      & Metric & MSE                                   & MAE                                   & MSE                                & MAE                                & MSE                                   & MAE                                   & MSE                                   & MAE                                   & MSE                                   & MAE                                   & MSE                                   & MAE                                   & MSE              & MAE              & MSE               & MAE              & MSE                                & MAE                                & MSE              & MAE             \\ \hline
                              & 96     & \textbf{\textcolor{red}{0.178}} & \textbf{\textcolor{red}{0.294}} & \underline{\textcolor{blue}{0.183}} & \underline{\textcolor{blue}{0.300}} & 0.221                                 & 0.337                                 & 0.237                                 & 0.350                                 & 0.224                                 & 0.340                                 & 0.386                                 & 0.461                                 & 0.360            & 0.452            & 0.436             & 0.496            & 0.204                              & 0.319                              & 0.333            & 0.405           \\
                              & 192    & \textbf{\textcolor{red}{0.183}} & \textbf{\textcolor{red}{0.301}} & \underline{\textcolor{blue}{0.189}} & \underline{\textcolor{blue}{0.306}} & 0.220                                 & 0.336                                 & 0.224                                 & 0.337                                 & 0.227                                 & 0.342                                 & 0.422                                 & 0.478                                 & 0.397            & 0.474            & 0.437             & 0.492            & 0.200                              & 0.314                              & 0.358            & 0.421           \\
                              & 336    & \textbf{\textcolor{red}{0.201}} & \textbf{\textcolor{red}{0.315}} & 0.207                              & 0.320                              & 0.229                                 & 0.342                                 & 0.289                                 & 0.394                                 & 0.234                                 & 0.346                                 & 0.544                                 & 0.533                                 & 0.492            & 0.527            & 0.444             & 0.494            & \underline{\textcolor{blue}{0.201}} & \underline{\textcolor{blue}{0.318}} & 0.398            & 0.446           \\
\multirow{-4}{*}{Flight}      & 720    & \textbf{\textcolor{red}{0.243}} & \textbf{\textcolor{red}{0.351}} & \underline{\textcolor{blue}{0.252}} & \underline{\textcolor{blue}{0.358}} & 0.263                                 & 0.366                                 & 0.310                                 & 0.408                                 & 0.270                                 & 0.371                                 & 0.890                                 & 0.697                                 & 0.424            & 0.488            & 0.476             & 0.504            & 0.345                              & 0.426                              & 0.476            & 0.484           \\ \hline
                              & 96     & 0.393                                 & 0.410                                 & 0.423                              & 0.440                              & \underline{\textcolor{blue}{0.386}}    & \textbf{\textcolor{red}{0.400}} & \textbf{\textcolor{red}{0.384}} & \underline{\textcolor{blue}{0.402}}    & 0.427                                 & 0.450                                 & 0.513                                 & 0.491                                 & 0.395            & 0.424            & 0.664             & 0.612            & 0.449                              & 0.459                              & 0.865            & 0.713           \\
                              & 192    & 0.439                                 & 0.443                                 & 0.445                              & 0.469                              & \underline{\textcolor{blue}{0.437}}    & \underline{\textcolor{blue}{0.432}}    & \textbf{\textcolor{red}{0.436}} & \textbf{\textcolor{red}{0.429}} & 0.472                                 & 0.486                                 & 0.534                                 & 0.504                                 & 0.469            & 0.470            & 0.790             & 0.681            & 0.500                              & 0.482                              & 1.008            & 0.792           \\
                              & 336    & \textbf{\textcolor{red}{0.480}} & \underline{\textcolor{blue}{0.468}}    & \underline{\textcolor{blue}{0.481}} & 0.473                              & 0.481                                 & \textbf{\textcolor{red}{0.459}} & 0.491                                 & 0.475                                 & 0.527                                 & 0.527                                 & 0.588                                 & 0.535                                 & 0.530            & 0.499            & 0.891             & 0.738            & 0.521                              & 0.496                              & 1.107            & 0.809           \\
\multirow{-4}{*}{ETTh1}       & 720    & \textbf{\textcolor{red}{0.488}} & \textbf{\textcolor{red}{0.485}} & 0.540                              & 0.524                              & 0.519                                 & 0.516                                 & 0.521                                 & \underline{\textcolor{blue}{0.500}}    & 0.644                                 & 0.605                                 & 0.643                                 & 0.616                                 & 0.598            & 0.544            & 0.963             & 0.782            & \underline{\textcolor{blue}{0.514}} & 0.512                              & 1.181            & 0.965           \\ \hline
                              & 96     & \underline{\textcolor{blue}{0.326}}    & \underline{\textcolor{blue}{0.370}}    & 0.348                              & 0.399                              & 0.333                                 & 0.387                                 & 0.340                                 & 0.374                                 & \textbf{\textcolor{red}{0.304}} & \textbf{\textcolor{red}{0.359}} & 0.476                                 & 0.458                                 & 0.358            & 0.397            & 0.645             & 0.597            & 0.346                              & 0.388                              & 3.755            & 1.525           \\
                              & 192    & 0.407                                 & \textbf{\textcolor{red}{0.413}} & 0.404                              & 0.431                              & 0.477                                 & 0.476                                 & \underline{\textcolor{blue}{0.402}}    & \underline{\textcolor{blue}{0.414}}    & \textbf{\textcolor{red}{0.394}} & 0.422                                 & 0.512                                 & 0.493                                 & 0.429            & 0.439            & 0.788             & 0.683            & 0.456                              & 0.452                              & 5.602            & 1.931           \\
                              & 336    & \underline{\textcolor{blue}{0.422}}    & \underline{\textcolor{blue}{0.434}}    & 0.435                              & 0.443                              & 0.594                                 & 0.541                                 & 0.452                                 & 0.452                                 & \textbf{\textcolor{red}{0.385}} & \textbf{\textcolor{red}{0.421}} & 0.552                                 & 0.551                                 & 0.496            & 0.487            & 0.907             & 0.747            & 0.482                              & 0.486                              & 4.721            & 1.835           \\
\multirow{-4}{*}{ETTh2}       & 720    & \textbf{\textcolor{red}{0.416}} & \textbf{\textcolor{red}{0.441}} & \underline{\textcolor{blue}{0.421}} & \underline{\textcolor{blue}{0.451}} & 0.831                                 & 0.657                                 & 0.462                                 & 0.468                                 & 0.463                                 & 0.475                                 & 0.562                                 & 0.560                                 & 0.463            & 0.474            & 0.963             & 0.783            & 0.515                              & 0.511                              & 3.647            & 1.625           \\ \hline
                              & 96     & \textbf{\textcolor{red}{0.323}} & \textbf{\textcolor{red}{0.368}} & \underline{\textcolor{blue}{0.326}}                              & \underline{\textcolor{blue}{0.371}}                              & 0.345                                 & 0.372                                 & 0.338                                 & 0.375                                 & 0.356    & 0.381    & 0.386                                 & 0.398                                 & 0.379            & 0.419            & 0.543             & 0.510            & 0.505                              & 0.475                              & 0.672            & 0.571           \\
                              & 192    & \textbf{\textcolor{red}{0.374}} & 0.396 & 0.376 & 0.397 & 0.380                                 & \underline{\textcolor{blue}{0.389}}                                 & \underline{\textcolor{blue}{0.374}}                               & \textbf{\textcolor{red}{0.387}}                                 & 0.391                                 & 0.399                                 & 0.459                                 & 0.444                                 & 0.426            & 0.441            & 0.557             & 0.537            & 0.553                              & 0.496                              & 0.795            & 0.669           \\
                              & 336    & 0.421 & 0.426 & 0.417 & 0.421 & \underline{\textcolor{blue}{0.413}}                                 & \underline{\textcolor{blue}{0.413}}                                 & \textbf{\textcolor{red}{0.410}}                                 & \textbf{\textcolor{red}{0.411}}                                 & 0.424                                 & 0.423                                 & 0.495                                 & 0.464                                 & 0.445            & 0.459            & 0.745             & 0.655            & 0.621                              & 0.537                              & 1.212            & 0.871           \\
\multirow{-4}{*}{ETTm1}       & 720    & 0.483    & 0.461 & 0.482                              & 0.459                              & \textbf{\textcolor{red}{0.474}}                                 & \underline{\textcolor{blue}{0.453}}        & \underline{\textcolor{blue}{0.478}}                         & \textbf{\textcolor{red}{0.450}} & 0.480    & 0.456                                 & 0.543                                 & 0.516                                 & 0.543                                 & 0.490            & 0.908            & 0.724             & 0.671            & 0.561                              & 1.166                              & 0.823                  \\ \hline
                              & 96     & \textbf{\textcolor{red}{0.178}} & \textbf{\textcolor{red}{0.260}} & 0.184 & 0.267 & 0.193                                 & 0.292                                 & 0.187                                 & 0.267                                 & \underline{\textcolor{blue}{0.182}}                                 & \underline{\textcolor{blue}{0.264}}                                 & 0.192                                 & 0.274                                 & 0.203            & 0.287            & 0.435             & 0.507            & 0.255                              & 0.339                              & 0.365            & 0.453           \\
                              & 192    & \textbf{\textcolor{red}{0.248}} & \textbf{\textcolor{red}{0.307}} & \underline{\textcolor{blue}{0.248}} & \underline{\textcolor{blue}{0.307}} & 0.284                                 & 0.362                                 & 0.249                                 & 0.309                                 & 0.256                                 & 0.323                                 & 0.280                                 & 0.339                                 & 0.269            & 0.328            & 0.730             & 0.673            & 0.281                              & 0.340                              & 0.533            & 0.563           \\
                              & 336    & \textbf{\textcolor{red}{0.311}} & \textbf{\textcolor{red}{0.345}} & \underline{\textcolor{blue}{0.312}} & \underline{\textcolor{blue}{0.346}} & 0.369                                 & 0.427                                 & 0.321                                 & 0.351                                 & 0.313                                 & 0.354                                 & 0.334                                 & 0.361                                 & 0.325            & 0.366            & 1.201             & 0.845            & 0.339                              & 0.372                              & 1.363            & 0.887           \\
\multirow{-4}{*}{ETTm2}       & 720    & \underline{\textcolor{blue}{0.410}}    & \textbf{\textcolor{red}{0.402}} & 0.414                              & 0.404                              & 0.554                                 & 0.522                                 & \textbf{\textcolor{red}{0.408}} & \underline{\textcolor{blue}{0.403}}    & 0.419                                 & 0.410                                 & 0.417                                 & 0.413                                 & 0.421            & 0.415            & 3.625             & 1.451            & 0.433                              & 0.432                              & 3.379            & 1.338           \\ \hline
                              & 96     & \textbf{\textcolor{red}{0.165}} & \textbf{\textcolor{red}{0.214}} & \underline{\textcolor{blue}{0.165}} & \underline{\textcolor{blue}{0.214}} & 0.196                                 & 0.255                                 & 0.172                                 & 0.220                                 & 0.202                                 & 0.261                                 & 0.173                                 & 0.223                                 & 0.217            & 0.296            & 0.896             & 0.556            & 0.266                              & 0.336                              & 0.300            & 0.384           \\
                              & 192    & \textbf{\textcolor{red}{0.215}} & \textbf{\textcolor{red}{0.257}} & \underline{\textcolor{blue}{0.215}} & \underline{\textcolor{blue}{0.258}} & 0.237                                 & 0.296                                 & 0.219                                 & 0.261                                 & 0.242                                 & 0.298                                 & 0.245                                 & 0.285                                 & 0.276            & 0.336            & 0.622             & 0.624            & 0.307                              & 0.367                              & 0.598            & 0.544           \\
                              & 336    & \textbf{\textcolor{red}{0.275}} & \textbf{\textcolor{red}{0.301}} & \underline{\textcolor{blue}{0.276}} & \underline{\textcolor{blue}{0.301}} & 0.283                                 & 0.335                                 & 0.280                                 & 0.306                                 & 0.287                                 & 0.335                                 & 0.321                                 & 0.338                                 & 0.339            & 0.380            & 0.739             & 0.753            & 0.359                              & 0.395                              & 0.578            & 0.523           \\
\multirow{-4}{*}{weather}     & 720    & \underline{\textcolor{blue}{0.363}}    & \textbf{\textcolor{red}{0.359}} & 0.371                              & 0.362                              & \textbf{\textcolor{red}{0.345}} & 0.381                                 & 0.365                                 & \underline{\textcolor{blue}{0.359}}    & 0.351                                 & 0.386                                 & 0.414                                 & 0.410                                 & 0.403            & 0.428            & 1.004             & 0.934            & 0.419                              & 0.428                              & 1.059            & 0.741           \\ \hline
                              & 96     & \textbf{\textcolor{red}{0.165}} & \underline{\textcolor{blue}{0.274}}    & 0.169                              & 0.279                              & 0.197                                 & 0.282                                 & \underline{\textcolor{blue}{0.168}}    & \textbf{\textcolor{red}{0.272}} & 0.194                                 & 0.277                                 & 0.169                                 & 0.274                                 & 0.193            & 0.308            & 0.386             & 0.449            & 0.201                              & 0.317                              & 0.274            & 0.368           \\
                              & 192    & 0.187                                 & 0.294                                 & 0.188                              & 0.296                              & 0.196                                 & \underline{\textcolor{blue}{0.285}}    & \underline{\textcolor{blue}{0.184}}    & 0.289                                 & 0.193                                 & \textbf{\textcolor{red}{0.280}} & \textbf{\textcolor{red}{0.182}} & 0.286                                 & 0.201            & 0.315            & 0.386             & 0.443            & 0.222                              & 0.334                              & 0.396            & 0.386           \\
                              & 336    & \textbf{\textcolor{red}{0.198}} & 0.307                                 & 0.199                              & 0.307                              & 0.209                                 & 0.301                                 & \underline{\textcolor{blue}{0.198}}    & \underline{\textcolor{blue}{0.300}}    & 0.206                                 & \textbf{\textcolor{red}{0.296}} & 0.200                                 & 0.304                                 & 0.214            & 0.329            & 0.378             & 0.443            & 0.231                              & 0.338                              & 0.300            & 0.394           \\
\multirow{-4}{*}{electricity} & 720    & 0.232                                 & 0.333                                 & 0.227                              & 0.330                              & 0.245                                 & 0.333                                 & \textbf{\textcolor{red}{0.220}} & \textbf{\textcolor{red}{0.320}} & 0.242                                 & 0.328                                 & \underline{\textcolor{blue}{0.222}}    & \underline{\textcolor{blue}{0.321}}    & 0.246            & 0.355            & 0.376             & 0.445            & 0.254                              & 0.361                              & 0.373            & 0.439           \\ \hline
                              & 96     & \underline{\textcolor{blue}{0.099}}    & \underline{\textcolor{blue}{0.227}}    & 0.105                              & 0.231                              & \textbf{\textcolor{red}{0.088}} & \textbf{\textcolor{red}{0.218}} & 0.107                                 & 0.234                                 & 0.107                                 & 0.233                                 & 0.111                                 & 0.237                                 & 0.148            & 0.278            & 1.093             & 0.884            & 0.197                              & 0.323                              & 0.847            & 0.752           \\
                              & 192    & \underline{\textcolor{blue}{0.193}}    & \textbf{\textcolor{red}{0.315}} & 0.196                              & 0.318                              & \textbf{\textcolor{red}{0.176}} & \underline{\textcolor{blue}{0.315}}    & 0.226                                 & 0.344                                 & 0.201                                 & 0.323                                 & 0.219                                 & 0.335                                 & 0.271            & 0.380            & 1.085             & 0.976            & 0.300                              & 0.369                              & 1.204            & 0.895           \\
                              & 336    & 0.369                                 & 0.442                                 & 0.370                              & 0.442                              & \textbf{\textcolor{red}{0.313}} & \textbf{\textcolor{red}{0.427}} & 0.367                                 & 0.448                                 & \underline{\textcolor{blue}{0.351}}    & \underline{\textcolor{blue}{0.432}}    & 0.421                                 & 0.476                                 & 0.460            & 0.500            & 1.597             & 1.090            & 0.509                              & 0.524                              & 1.672            & 1.036           \\
\multirow{-4}{*}{exchange}    & 720    & \underline{\textcolor{blue}{0.923}}    & \underline{\textcolor{blue}{0.730}}    & 0.940                              & 0.738                              & \textbf{\textcolor{red}{0.839}} & \textbf{\textcolor{red}{0.695}} & 0.964                                 & 0.746                                 & 0.940                                 & 0.735                                 & 1.092                                 & 0.769                                 & 1.195            & 0.841            & 1.735             & 1.124            & 1.447                              & 0.941                              & 2.478            & 1.310           \\ \hline
                              & 12     & \textbf{\textcolor{red}{0.078}} & \textbf{\textcolor{red}{0.184}} & \underline{\textcolor{blue}{0.079}} & \underline{\textcolor{blue}{0.186}} & 0.122                                 & 0.243                                 & 0.085                                 & 0.192                                 & 0.178                                 & 0.305                                 & 0.081                                 & 0.188                                 & 0.126            & 0.251            & 0.152             & 0.253            & 0.272                              & 0.385                              & 0.183            & 0.284           \\
                              & 24     & \textbf{\textcolor{red}{0.103}} & \textbf{\textcolor{red}{0.214}} & \underline{\textcolor{blue}{0.104}} & \underline{\textcolor{blue}{0.215}} & 0.201                                 & 0.317                                 & 0.118                                 & 0.223                                 & 0.257                                 & 0.371                                 & 0.105                                 & 0.214                                 & 0.149            & 0.275            & 0.186             & 0.290            & 0.334                              & 0.440                              & 0.193            & 0.293           \\
                              & 36     & \textbf{\textcolor{red}{0.151}} & \textbf{\textcolor{red}{0.257}} & \underline{\textcolor{blue}{0.151}} & \underline{\textcolor{blue}{0.257}} & 0.333                                 & 0.425                                 & 0.155                                 & 0.260                                 & 0.379                                 & 0.463                                 & 0.154                                 & 0.257                                 & 0.227            & 0.348            & 0.520             & 0.526            & 1.032                              & 0.782                              & 0.202            & 0.304           \\
\multirow{-4}{*}{PEMS03}      & 48     & \textbf{\textcolor{red}{0.213}} & \textbf{\textcolor{red}{0.309}} & \underline{\textcolor{blue}{0.218}} & \underline{\textcolor{blue}{0.313}} & 0.457                                 & 0.515                                 & 0.228                                 & 0.317                                 & 0.490                                 & 0.539                                 & 0.247                                 & 0.336                                 & 0.348            & 0.434            & 0.584             & 0.590            & 1.031                              & 0.796                              & 0.225            & 0.319           \\ \hline
                              & 12     & 0.116                                 & 0.223                                 & 0.116                              & 0.224                              & 0.154                                 & 0.276                                 & \underline{\textcolor{blue}{0.112}}    & \underline{\textcolor{blue}{0.212}}    & 0.227                                 & 0.343                                 & \textbf{\textcolor{red}{0.109}} & \textbf{\textcolor{red}{0.207}} & 0.173            & 0.273            & 0.216             & 0.246            & 0.436                              & 0.485                              & 0.297            & 0.313           \\
                              & 24     & 0.149                                 & 0.255                                 & 0.149                              & 0.255                              & 0.248                                 & 0.353                                 & \underline{\textcolor{blue}{0.141}}    & \underline{\textcolor{blue}{0.238}}    & 0.318                                 & 0.409                                 & \textbf{\textcolor{red}{0.140}} & \textbf{\textcolor{red}{0.236}} & 0.210            & 0.301            & 0.249             & 0.267            & 0.467                              & 0.502                              & 0.321            & 0.317           \\
                              & 36     & \textbf{\textcolor{red}{0.189}} & \textbf{\textcolor{red}{0.275}} & \underline{\textcolor{blue}{0.196}} & 0.285                              & 0.440                                 & 0.470                                 & 0.198                                 & \underline{\textcolor{blue}{0.283}}    & 0.497                                 & 0.510                                 & 0.211                                 & 0.294                                 & 0.320            & 0.394            & 0.288             & 0.297            & 0.966                              & 0.733                              & 0.308            & 0.311           \\
\multirow{-4}{*}{PEMS08}      & 48     & \textbf{\textcolor{red}{0.313}} & \textbf{\textcolor{red}{0.350}} & 0.361                              & 0.527                              & 0.674                                 & 0.565                                 & \underline{\textcolor{blue}{0.320}}    & \underline{\textcolor{blue}{0.351}}    & 0.721                                 & 0.592                                 & 0.345                                 & 0.367                                 & 0.442            & 0.465            & 0.324             & 0.359            & 1.385                              & 0.915                              & 0.327            & 0.361           \\ \hline
\multicolumn{2}{c}{1st Count}          & \multicolumn{2}{c}{\textbf{\textcolor{red}{46}}}                        & \multicolumn{2}{c}{0}                                                   & \multicolumn{2}{c}{11}                                                        & \multicolumn{2}{c}{\underline{\textcolor{blue}{11}}}                           & \multicolumn{2}{c}{7}                                                         & \multicolumn{2}{c}{5}                                                         & \multicolumn{2}{c}{0}               & \multicolumn{2}{c}{0}                & \multicolumn{2}{c}{0}                                                   & \multicolumn{2}{c}{0}              \\ \hline
\end{tabular}
}


\end{table*}







\end{document}
