\section{Proposed Method}
\label{sec:method}

Grounded in a probabilistic view, 
% we propose {\name} to learn \textit{hard-to-remove} visible watermarks for copyrighted  images protections, against both direct and unauthorized AI-assisted misuse. 
We propose {\name} to learn \textit{hard-to-remove} visible watermarks to protect copyrighted images from direct and AI-assisted misuse.


\subsection{Preliminary}

\textbf{Notations.}
As in previous works~\citep{ongie2020deep, whang21solve, liu2023aipo}, we represent (flattened) images as vectors denoted by lowercase boldface letters. Uppercase boldface letters mark matrices. 

\textbf{Inverse problems.} 
Given a corrupted observation $\yv \in \R^m$ of an unknown image $\xv_T \in \R^n$ ($m \leq n$), 
inverse problems aim to reconstruct clean $\xv_T$ assuming that $\yv$ is generated by
% seek to reconstruct some unknown clean image $\xv_T \in \R^n$ from a $m$-dimensional $(m \leq n)$ corrupted observation
\begin{align}\label{eq:inv-prob}
   \yv = f(\xv_T) + \ev,
\end{align}
where $f(\cdot)$ is a known forward operator that corrupts $\xv_T$, and $\ev$ is a noise that has independently and identically distributed elements~\citep{bora2017compressed,ongie2020deep}.
% Notably, inverse problems are typically under-determined, in the sense that without proper prior information, the problem admit infinitely many {optimal} solutions. 
Inverse problems, like compressed sensing and inpainting, are associated with a specific operator $f$.
For more background, see~\cite{ongie2020deep}. 
Our work focuses on image inpainting.


\textbf{Image inpainting.} 
% Given an image with masked contents, this task is to recover the original image. 
This task aims to recover an image with masked content.
Formally, inpainting assumes that $\yv = \Amat \xv_T + \ev$, where $\Amat \in \R^{n \times n}$ is a diagonal matrix with binary entries indicating whether a pixel is observed or missing, 
and $\ev \sim \mathcal G(\zeros; \sigma^2 \Imat)$ is an isotropic Gaussian noise with known variance $\sigma^2$.



\textbf{Deep Generative Prior.} 
Inverse problems are generally under-determined, in the sense that
Eq \eqref{eq:inv-prob} admits infinitely many possible solutions.
To address this,  deep generative models (DGMs) pre-trained on large datasets can be used as \textit{priors} to assess the plausibility of reconstructions and help find the optimal one~\citep{ongie2020deep}. 
From a Bayesian perspective, this entails a \textit{maximum-a-posterior} (MAP) problem.
Let $G$ be a DGM prior.
We solve the inverse problem by finding
\begin{align}
    \xv^*
    &=
    \argmax
    \nolimits_{\xv} \log p_G(\xv \mid \yv; \lambda) \\
    &= 
    \argmax
    \nolimits_{\xv} \log p_{e}(\yv - f(\xv)) + \lambda \log p_G(\xv), \notag
\end{align}
$\log p_e (\cdot)$ and $\log p_G (\cdot)$ represent the log-likelihood of noise $\ev$ and image $\xv$, respectively. 
The hyperparameter $\lambda > 0$ controls the weight of the prior $G$, acting as a regularizer~\citep{whang21solve}.



\textbf{Copyrighted Image Protection.} 
The advance of DGMs also enables unauthorized use of copyrighted content. 
For instance, DreamBooth~\citep{ruiz2023dreambooth} allows text-to-image diffusion models~\citep{rombach2022high} to generate personalized images. 
However, by fine-tuning on a few of an artist's work, it can mimic and plagiarize their style~\citep{van2023anti}.
This has raised significant concerns about copyright protection~\citep{shan2023glaze}. 
To counter this, 
recent works~\citep{liang2023mist,van2023anti} proposed \textit{targeted} attacks on DGMs like DreamBooth being misused. 
Conceptually, 
given a misused DGM $G$ with training loss $\ell(G; \xv)$ for any $\xv$, these works protected copyrighted image $\xv_T$ by learning an invisible perturbation $\deltav$ via $\max\nolimits_{\deltav: \| \deltav \|_\infty < \varepsilon} \ell(G; \xv + \deltav)$ to degrade $G$'s performance on $\xv+\deltav$, where $\varepsilon$ limits pixel-level perturbation. 
This defines an adversarial attack on $G$. 
When $G$ is inaccessible, $\delta$ is learned by attacking (an ensemble of) open-source surrogate models~\citep{liu2024metacloak}.










\subsection{{\name}: Towards a Universal Protection by Visible Watermarking}

% While existing attack-based safeguards effectively defend against targeted misuse cases, they still have two inherent limitations.
% First, they can only offer \textit{short-term} protection: when new techniques (e.g., for personalization) are developed, existing safeguards (e.g., those specialized for DreamBooth) may fall short to apply~\citep{}. 
% Second, these safeguards do not provide any protection against direct misuse.
% For example, unauthorized users may scrape copyrighted artistic images for commercial uses (e.g., online content) without purchasing rights, undermining the original creator's profits.
% This misuse does not rely on AI tools and cannot be prevented by attack-based safeguards. 
% Therefore, a more general formulation for protection is needed. 

% \tcu{
As outlined before, 
although attack-based safeguards can effectively address targeted misuse, they have key weakness. 
First, the attack-based formulation limits their applicability in \textit{untargeted} scenarios.
Specifically, 
their performance on black-box AI is largely unpredictable due to the nature of the attack~\citep{demontis2019adversarial,liu2024metacloak},
and in white-box settings, they provide only \textit{short-term} protection, in the sense that 
new personalization techniques may render current safeguards (e.g., those against DreamBooth) ineffective~\citep{liu2024metacloak,xue2024rethinking}. 
In addition, the \textit{invisible} nature of existing protections also poses two inherent limitations.
First, these protections are prone to distortion or purification attack~\citep{athalye2018synthesizing,liu2024metacloak,zhao2024can}. 
Second, the \textit{invisibility} offers no protection against \textit{direct misuse}.
% We refer to a misuse direct if the misuse does not involve AI, but just for pirate unauthorized use. 
% For instance, users may scrape copyrighted images for commercial purposes without permission, undermining creators' profits. Such misuse doesn't involve AI tools and can't be defended by existing attack-based methods. 
We refer to a misuse as direct if it does not involve AI, but rather unauthorized use such as piracy. 
For instance, users may scrape copyrighted images for commercial purposes without purchasing rights, undermining creators' profits. Such misuse doesn't involve AI tools and cannot be addressed by existing attack-based methods.
Consequently, existing safeguards often provide unsatisfactory protection in execution~\citep{liu2024metacloak,zhao2024can}. 
Hence, a more general formulation for protection is needed. 
% }
% {\mm This paragraph looks redundant as it is already written in Intro. I suggest to  streamline:}

% {\mm Considering the weakness of attack-based watermarking like its unpredictability and requirement of frequent updates and retraining and a visible watermark's lack of protection against direct misuse (see Section 1), we resort to visible watermarking to ...( + next next paragraph) }

% \textbf{New solution.}
In light of these limitations, 
we resort to visible watermarking for stronger protection. 
First, visible watermarks render protected images largely unusable in {direct} use. In AI-involved misuse scenarios, when a prominent watermark presents, AI such as personalization with DreamBooth will also be affected due to the backdoor mechanism~\citep{rawat2022devil,pan2023trojan,chou2023backdoor}. 
As shown in Fig \ref{fig:dbooth}, DreamBooth learns watermark patterns from watermarked training images, leading to unusable outputs. 
Notably, adding visible watermarks requires no prior domain knowledge of misuse scenarios or mechanisms. Thus, it provides a broader protection. 
In addition, 
visible watermarking are much more robust to distortion attacks. 
In Fig \ref{fig:harvim-distort} we applied JPEG compression~\citep{dziugaite2016study,aydemir2018effects} and Gaussian blur~\citep{zhao2020blurring} at varied intensities to distort watermarked images, and observed that the watermarks remain readable even when the images are greatly destroyed.
% These results demonstrate the validity and generality of visible watermarking for copyright protection in the era of AI. 

% \textbf{Challenge.}
Our finding indicates that visible watermark offers an excellent level of robustness against standard transformation attack, and pave the way for more reliable copyright protection than existing attempts. 
Nonetheless, conventional watermarks are typically added in a consistent manner to the images, which offers limited resistance against more targeted watermark removal attack~\citep{dekel2017effectiveness,liang2021visible,sun2023denet}. 
% While creator involvement in manual or \tcu{rule-based watermark placement can help}~\citep{kankanhalli1999adaptive}, it demands significant human effort
% and is not scalable. 
While manual or rule-based watermark placement can provide some protection~\citep{kankanhalli1999adaptive}, it requires significant human effort and lacks scalability.
To address this, 
we propose an \textit{automated} solution by \textit{learning} a visible watermark that is resistant to remove. 
We refer to our approach as \textit{\underline{ha}rd-to-\underline{r}emove \underline{vi}sible water\underline{m}ark} ({\name}) and provide details below. 





\ExplSyntaxOn
% #1: prefix
% #2: args of \includegraphics
% #3: start index
% #4: num of images
\cs_set:Npn \loadimage #1#2#3#4 {
    \seq_clear:N \l_tmpa_seq
    \int_step_inline:nnn {#3} {#3 + #4 - 1} {
        \seq_put_right:Nx \l_tmpa_seq {
            \exp_not:N \includegraphics
                [\exp_not:n {#2}]
                {#1/##1.png}
        }
    }
    \seq_use:Nn \l_tmpa_seq {&}
}

% #1: image width
% #2: num of col
\cs_set:Npn \calctotalwidth #1#2 {
    \fp_eval:n {
        (#2) * (#1) + (#2 - 1) * (\tabcolsep)
    } pt
}
\ExplSyntaxOff



\begin{figure*}[htb!]
\centering
\resizebox{0.9\textwidth}{!}{
\renewcommand{\tabcolsep}{2pt}
\def\figwidth{0.09\linewidth}%
\newcommand{\authornote}[1]{
\adjustbox{rotate=90}{\parbox{\figwidth}{\footnotesize \bf \centering #1}}
}

\begin{tabular}{*{12}{c}}

\toprule[0.4ex]
& \multicolumn{5}{c}{\bf Training Samples}  \vline
& \multicolumn{5}{c}{\bf Generated Samples} \\
% \cmidrule[0.2ex](lr){2-6} \cmidrule[0.2ex](lr){7-12}
% & \multicolumn{5}{c}{\fbox{\parbox{\calctotalwidth{\figwidth}{5}}{\bf \centering {Generated Samples}}}} \\
% & \multicolumn{5}{c}{\fbox{\parbox{\calctotalwidth{\figwidth}{5}}{\bf \centering {Training Samples}}}}
% & \multicolumn{5}{c}{\fbox{\parbox{\calctotalwidth{\figwidth}{5}}{\bf \centering {Generated Samples}}}} \\
\noalign{\vskip 0.5ex}

\authornote{clean}     &  
\loadimage{figures/dbooth/train}{width=\figwidth}{0}{5}
&
\loadimage{figures/dbooth/gen}{width=\figwidth}{0}{5}\\

\authornote{+wm}     &  
\loadimage{figures/dbooth/train_wm}{width=\figwidth}{0}{5}
&
\loadimage{figures/dbooth/gen_wm}{width=\figwidth}{0}{5}\\
\bottomrule[0.4ex]

\end{tabular}
}

\caption{
Visible Watermarking can provide strong protection: 
DreamBooth trained on watermarked (``+wm'') images learn watermark patterns as well.
Examples and implementations are from \cite{von2022diffusers}.
}
\label{fig:dbooth}
\end{figure*}



\ExplSyntaxOn
% #1: prefix
% #2: args of \includegraphics
% #3: start index
% #4: num of images
\cs_set:Npn \loadimage #1#2#3#4 {
    \seq_clear:N \l_tmpa_seq
    \int_step_inline:nnn {#3} {#3 + #4 - 1} {
        \seq_put_right:Nx \l_tmpa_seq {
            \exp_not:N \includegraphics
                [\exp_not:n {#2}]
                {#1/##1.png}
        }
    }
    \seq_use:Nn \l_tmpa_seq {&}
}

% #1: image width
% #2: num of col
\cs_set:Npn \calctotalwidth #1#2 {
    \fp_eval:n {
        (#2) * (#1) + (#2 - 1) * (\tabcolsep)
    } pt
}
\ExplSyntaxOff



\begin{figure}[htb!]
\centering
\renewcommand{\tabcolsep}{2pt}
\def\figwidth{0.13\linewidth}%
\newcommand{\authornote}[1]{
\adjustbox{rotate=90}{\parbox{\figwidth}{ \bf \small \centering #1}}
}

\begin{tabular}{cccccc}

\toprule[0.4ex]

% & \multicolumn{5}{c}{\fbox{\parbox{\calctotalwidth{\figwidth}{5}}{\centering {Training Samples}}}}
% & \multicolumn{5}{c}{\fbox{\parbox{\calctotalwidth{\figwidth}{5}}{\centering {Generated Samples}}}} \\
% \noalign{\vskip 0.5ex}

\authornote{Obs}     &  
\loadimage{figures/distortion/dog_wm}{width=\figwidth}{0}{5} \\


\cmidrule[0.2ex]{2-6}

\authornote{Jpeg(l)}     &  
\loadimage{figures/distortion/jpeg_high}{width=\figwidth}{0}{5} \\

\authornote{Gaus(l)}     &  
\loadimage{figures/distortion/gaus_high}{width=\figwidth}{0}{5} \\

\cmidrule[0.2ex]{2-6}

\authornote{Jpeg(h)}     &  
\loadimage{figures/distortion/jpeg_low}{width=\figwidth}{0}{5} \\

\authornote{Gaus(h)}     &  
\loadimage{figures/distortion/gaus_low}{width=\figwidth}{0}{5} \\

\bottomrule[0.4ex]

\end{tabular}
\caption{
Visible watermarks remain resilient to strong distortion attacks JPEG compression and Gaussian blur, at low- (top) and high-intensity (bottom) levels. 
}
\label{fig:harvim-distort}
\end{figure}






% \tcu{
% Notably, 
% Harvim is agnostic to misuse cases; however, it still provides strong protection, meaning that downstream applications, such as personalization with DreamBooth, cannot be performed with the watermark present.
% For example, Figure 1 illustrates that DreamBooth learns watermark patterns from the training data (i.e., the watermarked images), resulting in largely unusable generations.
% Therefore, although the watermark was not designed to defend against DreamBooth specifically, a watermark removal process is still necessary to train it on the protected images.
% }




\subsection{Formal Formulation of {\name}}

% \tcc{Help check if this paragraph is unclear.}
% We represent a \textit{watermark} $\mv \in \R^n$ as an image, which has the same shape as copyrighted image $\xv_T$ that needs protection. 
% Then, watermark removing can be formulated as an inverse problem~\citep{}, where the watermarked observation is given by%
% %
% \footnote{We will express an observation $\yv$ (or a reconstruction $\xv^*$) as a function of $\mv$ (and hyparameter $\lambda$) to highlight that it depends on the particular $\mv$ (and $\lambda$).}
% %
% $\yv(\mv) = \Amat_m \xv_T + \ev$. 
% Same as in inpainting, $\Amat_m$ is a diagonal square matrix with entries denoting whether a pixel is watermarked or not. 
% By treating \textit{watermark} area as missing, inpainting can serve as a surrogate task for visible watermark removal~\citep{}, and we follow this convention.

We formulate the proposed {\name} as an optimization problem. 
To this end, we define a {watermark} $\mv \in \R^n$ as an image with the same dimensions\footnote{The background is also part of the image.} as the copyrighted image $\xv_T$. 
Then, watermark removal can be formulated as an inverse problem~\citep{ongie2020deep}, 
where the watermarked observation is%
\footnote{We write the observation $\yv$ (or reconstruction $\xv^*$) as a function of $\mv$ (and hyperparameter $\lambda$) to highlight the dependence. 
% on $\mv$ (and $\lambda$).
} 
$\yv(\mv) = \Amat_m \xv_T + \ev$. 
Similar to inpainting, $\Amat_m$ is a diagonal matrix where entries indicate if a pixel is watermarked. Treating the {watermarked} area as missing, inpainting serves as a surrogate for visible watermark removal~\citep{huang2004attacking}.


Built upon this formulation, {\name} seeks an $\mv$ that makes $\xv_T$ \textit{hard to reconstruct} from observation $\yv(\mv)$.
The \textit{reconstruction hardness} is measured by a {similarity score} $s(\xv^*(\mv), \xv_T)$ between the optimal reconstruction $\xv^*(\mv)$ from $\yv (\mv)$ to the ground truth $\xv_T$. 


% \tcu{
\textbf{Watermarking constraints.}
When learning $\mv$ for copyrighted image protection, two standard \textit{readability} constraints must be met
\citep{mohanty1999dual,kankanhalli1999adaptive}.
First, 
\textbf{image readability}
requires that the watermarked observation's readability must remain. 
Otherwise, while an excessive watermark occupying the entire image can make it unrecoverable, 
% is able to provide perfect protection as it makes $\xv_T$ impossible to be recovered, 
audience will also fail to recognize the image content, which could negatively compromise the creator's financial gains and public visibility.
This constraint is solved by adding a regularization term $\mathcal R (\mv)$ to penalize the size of watermark.
Second, \textbf{watermark readability}
requires that the watermark itself should convey clear copyright information, such as the creator's logo or name.
To satisfy this constraint,
we use a small pre-trained generative model to control $\mv$, as detailed in Appendix \ref{app:wm-details} due to page limit.

Put together, 
{\name} learns $\mv$ to watermark image $\xv_T$ by solving a bi-level optimization problem
\begin{align}\label{eq:opt-wm}
    &\mv^* 
    = 
    \min\nolimits_{\mv} 
    s(\xv^*(\mv), \xv_T) + \mathcal{R}(\mv), \\
    &\text{s.t.}\ 
    \xv^*(\mv) 
    = 
    \argmax\nolimits_{\xv} \log p_G (\xv \mid \yv(\mv); \lambda). \notag
    % (&\triangleq l(\mv)).\notag
\end{align}
% where we follow previous works~\citep{whang21solve, liu2023aipo} and replace maximizing the log-likelihood with minimizing the negative log-likelihood in the lower-level optimization. 





% \textbf{Reconstruction is necessary}

\begin{remark}
We want to emphasize that Eq \eqref{eq:opt-wm} provides a general framework for image protection for two key reasons. 
First, the concept of \textit{hard-to-reconstruct region} underlying {\name} reflects an intrinsic characteristic of an image, rather than a property specific to any particular prior $G$. 
Second, Eq \eqref{eq:opt-wm} is not limited to any specific choice of $G$. 
The next section presents an implementation, 
but {\name} by definition can incorporate any generative prior $G$ capable of modeling the real image distribution.
\end{remark}



\subsection{An Approximate Solution for {\name}}

The bi-level optimization Eq \eqref{eq:opt-wm} is non-trivial to solve, with difficulties lying in two folds. 
First, inpainting requires matrix $\Amat_m$ containing binary entries, which cannot be optimized by gradient-based method. 
Second, its feasible set, as specified by the lower-level optimization that involves some deep neural network $G$, is NP-hard to identify~\citep{sinha2017review}.
Therefore, further approximations are needed. 


% To solve the firschallenge, we leverage a soft 
Mathematically, the first challenge arises from that $\mv$'s gradient is undefined due to the discrete nature of $\Amat_m$.
To address this issue, we construct a differentiable approximation for it based on continuous-valued learnable watermark $\mv$.  
Specifically, given $\mv \in \R^n$, denote the sigmoid function by $\text{sig}: \R \rightarrow \R$, we define 
\begin{align}\label{eq:inpaint-mask}
    \Amat_m = \text{diag}\left(\text{sig}\left(\frac{m_1 - \alpha}{\beta}\right), \dots, \text{sig}\left(\frac{m_n - \alpha}{\beta}\right)  \right),
\end{align}
where $\alpha, \beta$ are hyperparameters such that
$\text{sig}((m_i - \alpha) / \beta) \approx 1$ when $m_i$ lies within the watermark area, and $0$ otherwise. 
Additional implementation details are provided in Appendix \ref{app:wm-details}.


The differentiable $\mv$ can be optimized with gradient
\begin{align}\label{eq:bilevel-grad}
    &\nabla_{\mv} \left(s(\xv^*(\mv), \xv_T) + \mathcal{R}(\mv) \right) \\
    =& 
    \nabla_{\xv^*} s(\xv^*(\mv), \xv_T)^\top \Jmat_{\mv}(\xv^*(\mv)) + \nabla_\mv \mathcal R(\mv), \notag
\end{align}
where the second line holds from the chain rule, and $\Jmat_{\mv}(\xv^*(\mv))$ denotes the Jacobian of $\xv^*$ with respect to $\mv$. 
Unfortunately, this Jacobian is intractable due to the unknown form of $\xv^*(\mv)$, making the problem remain unsolved. 
We resort to meta-learning for an approximate solution~\citep{huang2020metapoison} by replacing the exact $\xv^*(\mv)$ with an approximate solution $\Tilde{\xv}(\mv)$ that is computed from $K$-step gradient descent~\citep{finn2017model}. 
By treating $\nabla_{\xv} \log p_G( \xv \mid \yv(\mv); \lambda)$
as a function of $\mv$, $\Tilde{\xv}(\mv)$ can be expressed as an explicit function of $\mv$, making approximation Eq \eqref{eq:bilevel-grad} viable. 

% \tcc{@Qi Lei: Please help revise the following discussions. can you blend this to reflect the importance we choose a inner problem that is guaranteed to solve? 

% In bilevel optimization, progress in the outer problem depends heavily on accurately solving the inner (second-level) problem. Unlike single-level optimization, where the objective directly reflects progress, errors in solving the inner problem can mislead the outer objective~\cite{ghadimi2018approximation,franceschi2018bilevel}. Thus, the ability to solve the second-level problem accurately is critical, as it determines whether the outer optimization can faithfully reflect true progress.
% }

In practice, however, meta-learning requires small $K$ to maintain affordable computational cost, and 1 or 2 is often used~\citep{huang2020metapoison}. Such a small value often results in highly inaccurate approximation~\citep{geiping2020witches}, 
{Critically, when the approximation fails to reflect the faithful progress made by current $\mv$, 
the upper-level optimization will be misled as well, resulting in poor or failed solutions~\citep{ghadimi2018approximation,franceschi2018bilevel}.
}

\textbf{Idea.}
While this difficulty cannot be resolved in general, 
for inverse problems solvers that use normalizing flows~\citep{papamakarios2021normalize} as generative priors, it can be largely alleviated.
In specific, denote $\xv^*(\yv; \lambda) = \argmax\nolimits_{\xv} \log p_G(\xv \mid \yv; \lambda)$,~\cite{liu2023aipo} showed that under regular conditions, $\log p_G(\xv \mid \yv; \lambda')$ is locally convex at $\xv^*(\yv; \lambda)$ when $\lambda'$ is close enough to $\lambda$. Therefore, using $\xv^*(\yv; \lambda)$ as an initial value, $\xv^*(\yv; \lambda')$ by nature can be obtained within \textit{a few} gradient descent steps.
Motivated by this, 
we expect $\log p_G(\xv \mid \yv; \lambda)$ to preserve a local convexity around $\xv^*(\yv'; \lambda')$ if $\yv'$ is close to $\yv$ and $\lambda'$ is close to $\lambda$.
Built upon this, 
we optimize $\mv$ along with $\Tilde{\xv}(\mv)$ and $\lambda$ as in~\cite{liu2023aipo} together in an iterative way. 

% instead of optimizing $\Tilde{\xv}^*$ and $\mv$ iteratively with fixed $\lambda$ as in existing framework, 
% we take $\lambda$ into optimization as well.

\textbf{Solution.}
Our solution starts with a randomly initialized watermark $\mv_0$, hyperparameter $\lambda_0 = 0$, and an approximate solution $\Tilde{\xv}(\mv_0; \lambda_0)$ solved by gradient descent.
Here the approximate solution $\Tilde{\xv}$ is expressed as a function of both watermark $\mv$ and $\lambda$.
In each round $t$, 
we first update hyperparameter $\lambda_t$ by taking a small step towards the final $\lambda$.
Next, given current $\mv_{t-1}$ and $\lambda_t$, we solve $\Tilde{\xv}_t(\mv_{t-1}; \lambda_{t})$ by taking $K$ gradient descent steps from the last round solution $\Tilde{\xv}_{t-1}$.
Finally, we update $\mv_t$ by {unrolling} updates on $\Tilde{\xv}_t(\mv_{t-1}; \lambda_{t})$ as a function of $\mv_{t-1}$ and take
\begin{align}\label{eq:wm-update}
\mv_t = \mv_{t-1} - \nabla_\mv \left( s(\underbrace{\Tilde{\xv}_t(\mv_{t-1}; \lambda_{t})}_{\text{func. of $\mv_{t-1}$}}, \xv_T) + \mathcal R(\mv_{t-1}) \right).
\end{align}
We repeat the following steps until $\lambda_t$ reaches the pre-specified value $\lambda$. 
The solution is outlined in Algo \ref{alg:main}.
% \tcu{Note that without updating $\mv_t$ in step \ref{step:update_m}, our algorithm reduces to AIPO~\citep{liu2023aipo}. }




\begin{algorithm}[!t]
\caption{{\name} algorithm}
\label{alg:main}




\begin{algorithmic}[1] 
    \STATE \textbf{Input:}  
    copyrighted image $\xv_T$,
    $\lambda>0$ and its update steps $T > 0$,
    random noise variance $\sigma^2 > 0$,
    generative prior $G:\R^n\to \R^n$,
    inpainting mask hyperparameters $\alpha, \beta$ (Eq \eqref{eq:inpaint-mask}),
    unrolled steps $K$
    
    \STATE \textbf{Initialize:}
    $\lambda_0=0$, 
    randomly initialize $\mv_0$ and inpainting mask $\Amat_{m, 0}$ based on Eq \eqref{eq:inpaint-mask},
    watermarked image $\yv_0 = \Amat_{m, 0} \xv_T + \ev$ where $\ev \sim \mathcal G(\zeros, \sigma^2 \Imat)$
    \STATE 
    Ignoring dependency on $\mv_0$, 
    find the MLE solution $\Tilde{\xv}_0 = \Tilde{\xv}(\mv_0, \lambda_0)$ for $\yv_0$~\citep{liu2023aipo}
    \FOR{$t=1, \dots, T$} 
        \STATE
        Treat $\yv_{t-1}(\mv_{t-1}) = \Amat_{m, t-1} \xv_T + \ev, \ev \sim \mathcal G(\zeros, \sigma^2 \Imat)$ as a function of $\mv_{t-1}$
        % 
        \STATE 
        $\lambda_t = \lambda_{t-1} + \frac{\lambda}{T}$
        % 
        \STATE 
        $\Tilde{\xv}_t = \Tilde{\xv}_{t-1}$
        % \STATE
        \FOR{$k=1, \dots, K$}
            \STATE
            $\Tilde{\xv}_t = \Tilde{\xv}_t + \nabla_{\xv} \log p(\xv \mid \yv_{t-1}(\mv_{t-1}); \lambda_t)$
        \ENDFOR
        % 
        \STATE
        Denote current solution as $\Tilde{\xv}_t(\mv_{t-1}, \lambda_t)$
        % 
        \STATE\label{step:update_m}
        \textcolor{black}{
        Update $\mv_t$ based on Eq \eqref{eq:wm-update}
        }
    \ENDFOR 
    \RETURN Learned $\mv_t$
\end{algorithmic}
\end{algorithm}
