\documentclass[accepted]{uai2025} % for initial submission
%\documentclass[accepted]{uai2025} % after acceptance, for a revised version; 
% also before submission to see how the non-anonymous paper would look like 
                        
%% There is a class option to choose the math font
% \documentclass[mathfont=ptmx]{uai2025} % ptmx math instead of Computer
                                         % Modern (has noticeable issues)
% \documentclass[mathfont=newtx]{uai2025} % newtx fonts (improves upon
                                          % ptmx; less tested, no support)
% NOTE: Only keep *one* line above as appropriate, as it will be replaced
%       automatically for papers to be published. Do not make any other
%       change above this note for an accepted version.

%% Choose your variant of English; be consistent
\usepackage[american]{babel}
% \usepackage[british]{babel}

%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
    %\bibliographystyle{plainnat}
% \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams
\usepackage{xspace}

\usepackage{titlesec}
\usepackage{titletoc}

\usepackage[utf8]{inputenc} % allow utf-8 input
\usepackage[T1]{fontenc}    % use 8-bit T1 fonts
\usepackage{hyperref}       % hyperlinks
\usepackage{url}            % simple URL typesetting
\usepackage{booktabs}       % professional-quality tables
\usepackage{amsfonts}       % blackboard math symbols
\usepackage{nicefrac}       % compact symbols for 1/2, etc.
\usepackage{microtype}      % microtypography
\usepackage{xcolor}         % colors
\usepackage{amsmath}
\usepackage{graphicx}
\usepackage{cleveref}
\usepackage{algorithm}
%\usepackage{algorithmic}
\usepackage{amsthm}
\newtheorem{theorem}{Theorem}[section]  
\newtheorem{assumption}{Assumption}
\newtheorem{lemma}{Lemma}
\newtheorem{corollary}{Corollary}
\bibliographystyle{plainnat}
\usepackage{thmtools}
\usepackage{thm-restate}
\usepackage[T1]{fontenc}

\usepackage{compact}
\newtheorem{definition}{Definition}[section] % 
\newtheorem{proposition}{Proposition}[section] 
\usepackage{amsthm}



\usepackage{amsmath,amssymb}
\usepackage{algorithm}
\usepackage{algpseudocode}  % or {algorithmic} if you prefer


\newcommand{\lk}[1]{\textcolor{red}{[\textbf{Lingkai}: #1]}}
\newcommand{\yuqi}[1]{\textcolor{blue}{[\textbf{Yuqi }: #1]}}
\newcommand{\haichuan}[1]{\textcolor{green}{[\textbf{Haichuan }: #1]}}
\newcommand{\ak}[1]{\textcolor{teal}{[\textbf{Adam }: #1]}}


\newcommand{\argmax}{\mathop{\mathrm{arg\,max}}}
\newcommand{\argmin}{\mathop{\mathrm{arg\,min}}}
\newcommand{\ours}{\textsc{DiffOracle}\xspace}
%% Provided macros
% \smaller: Because the class footnote size is essentially LaTeX's \small,
%           redefining \footnotesize, we provide the original \footnotesize
%           using this macro.
%           (Use only sparingly, e.g., in drawings, as it is quite small.)

%% Self-defined macros
\newcommand{\swap}[3][-]{#3#1#2} % just an example

\title{Robust Optimization with Diffusion Models for Green Security}

% The standard author block has changed for UAI 2025 to provide
% more space for long author lists and allow for complex affiliations
%
% All author information is authomatically removed by the class for the
% anonymous submission version of your paper, so you can already add your
% information below.
%
% Add authors
\author[1]{\href{lingkaikong@g.harvard.edu}{Lingkai Kong}{}}
\author[1]{Haichuan Wang}
\author[1]{Yuqi Pan}
\author[1]{Cheol Woo Kim}
\author[1]{Mingxiao Song}
\author[1]{Alayna Nguyen}
\author[1]{\\Tonghan Wang}
\author[2]{Haifeng Xu}
\author[1]{Milind Tambe}
% Add affiliations after the authors
\affil[1]{%
    Harvard University
}
\affil[2]{%
    University of Chicago

}

  
  \begin{document}
\maketitle

\begin{abstract}
% \textit{We could cast this more generally as green security games, which includes: protecting against illegal logging, illegal fishing, poaching, illegal environmental pollution/dumping and other topics, depending on whether this goes in a very applied direction.}


% We aim to use diffusion models to predict poacher behavior, with the resulting predictions informing patrol planning. Given the potential noisiness of the poacher dataset, we propose addressing this challenge through a distributionally robust optimization approach. Specifically, we will employ a game-theoretic framework to solve the resulting minimax optimization problem. Our key innovation is to develop a guided diffusion technique capable of directly sampling from the worst-case distribution.

% In green security, defenders patrol by forecasting the adversarial behavior of poachers, illegal loggers, and illegal fishers. However, this behavior is often highly uncertain and multimodal. To address this challenge, we propose leveraging a conditional diffusion model for adversarial behavior prediction and focus on robust patrol optimization. However, the learned diffusion model can be imperfect due to noise and limited size in data. we formulate the problem as a two-player game between the defender and nature, where nature player selects the adversarial behavior distribution within a constrained space. This formulation enhances the robustness of the patrol strategy to imperfections in the learned diffusion model. To accurately estimate the expected utility function, we combine twisted Sequential Monte Carlo sampling and diffusion model, ensuring asymptotic exactness. We evaluate our approach on both synthetic and real-world poaching datasets, demonstrating its effectiveness.

% In green security, defenders must forecast the behavior of adversaries such as poachers, illegal loggers, and illegal fishers to plan effective patrols. However, these behaviors are often highly uncertain and complicated. To address this challenge, we propose using a conditional diffusion model for adversarial behavior prediction. Because the learned diffusion model can be imperfect due to noise and limited data, we formulate the patrol optimization problem as a two-player game between the defender and nature, where nature selects an adversarial behavior distribution from a constrained neighborhood around the model’s predicted distribution. This game-theoretic framework enhances the robustness of patrol strategies against model imperfections. We solve the resulting game via a double oracle algorithm. To accurately estimate expected utilities, we integrate twisted Sequential Monte Carlo sampling with the diffusion model, ensuring asymptotic exactness. Theoretically, our algorithm can converge to an epsilon-equilibrium with arbitrarily high probability using finite iterations and finite samples from the diffusion model. We evaluate our method on both synthetic and real-world poaching datasets, demonstrating its effectiveness.


% In green security, defenders must predict the behavior of adversaries such as poachers, illegal loggers, and illegal fishers to plan effective patrols. However, these behaviors are often highly uncertain and complex. Previous approaches typically rely on Gaussian processes or linear models, which have limited expressiveness.
% % To address this challenge, we propose using a conditional diffusion model for adversarial behavior prediction. Since the learned diffusion model may be imperfect due to noise and limited data, we formulate the patrol optimization problem as a two-player game between the defender and nature. In this game, nature selects an adversarial behavior distribution from a constrained neighborhood around the model’s predicted distribution. 
% The use of a diffusion model introduces new challenges in applying game theory to solve the following robust optimization problem, such as a constrained mixed strategy space and the need to sample from an unnormalized distribution to estimate the utility. To tackle these issues, we introduce mixed strategy of mixed strategies and leverage a twisted sequential Monte Carlo sampler for efficient sampling from the diffusion model. Theoretically, our algorithm is guaranteed to converge to an $\epsilon$-equilibrium with high probability using a finite number of iterations and finite number of samples from the diffusion model. We evaluate our method on both synthetic and real-world poaching datasets, demonstrating its effectiveness.

In green security, defenders must forecast adversarial behavior—such as poaching, illegal logging, and illegal fishing—to plan effective patrols. These behaviors are often highly uncertain and complex. Prior work has leveraged game theory to design robust patrol strategies to handle uncertainty, but existing adversarial behavior models primarily rely on Gaussian processes or linear models, which lack the expressiveness needed to capture intricate behavioral patterns.  To address this limitation, we propose a conditional diffusion model for adversary behavior modeling, leveraging its strong distribution-fitting capabilities. To the best of our knowledge, this is the first application of diffusion models in the green security domain.  
Integrating diffusion models into game-theoretic optimization, however, presents new challenges, including a constrained mixed strategy space and the need to sample from an unnormalized distribution to estimate utilities. To tackle these challenges, we introduce a mixed strategy of mixed strategies and employ a twisted Sequential Monte Carlo (SMC) sampler for accurate sampling.  Theoretically, our algorithm is guaranteed to converge to an \(\epsilon\)-equilibrium with high probability using a finite number of iterations and samples. Empirically, we evaluate our approach on both synthetic and real-world poaching datasets, demonstrating its effectiveness.  



\end{abstract}


\input{intro}
\input{related_works}
\input{background}
\input{method}
\input{experiments}
\input{conclusion}

\section*{Acknowledgements}
We are thankful to the Uganda Wildlife Authority for granting us access to incident data from Murchison Falls National Park.
We also thank Charles Emogor for the insightful discussions and the anonymous reviewers for their valuable feedback. This work was supported by ONR MURI N00014-24-1-2742.

%\bibliographystyle{abbrv}
\bibliography{references}




%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


\newpage
\appendix
\onecolumn


\begin{center}
	{\Large \textbf{Appendix for Robust Optimization with Diffusion Models for Green Security}}
\end{center}

\startcontents[sections]
\printcontents[sections]{l}{1}{\setcounter{tocdepth}{2}}


\section{More Details on Diffusion Models}


A diffusion model \citep{sohl2015deep} is a generative framework composed of two stochastic processes: a \emph{forward} process that progressively adds Gaussian noise to real data, and a \emph{reverse} (or denoising) process that learns to remove this noise step by step. Formally, let \(\mathbf{z}^0 \sim \mathcal{D}\) be a sample from the training dataset.\footnote{We use \(\mathbf{z}^0\) and \(\mathbf{z}\) interchangeably when there is no ambiguity.} The forward diffusion process can be written as
$q(\mathbf{z}^t \mid \mathbf{z}^{t-1}) 
= \mathcal{N}\!\bigl(\mathbf{z}^t;\,\mathbf{z}^{t-1},\,\beta^2 \mathbf{I}\bigr),$
where \(\beta^2\) is the noise variance at each step \(t=1,\dots,T\). As \(T\) becomes large, repeated noising transforms the data distribution into (approximately) pure Gaussian noise:
$q(\mathbf{z}^T) \approx \mathcal{N}(\mathbf{0},\,T \beta^2 \mathbf{I}).$

\textbf{Score-based Approximation.} To invert this process (i.e., to denoise and recover samples from the original data distribution), one can approximate the reverse transition 
\(
q(\mathbf{z}^{t-1} \mid \mathbf{z}^t) 
\)
via the \emph{score function}, \(\nabla_{\mathbf{z}^t} \log q(\mathbf{z}^t)\) when $\beta$ is small. Specifically,
\[
q(\mathbf{z}^{t-1} \mid \mathbf{z}^t) 
\,\approx\, 
\mathcal{N}\!\Bigl(\mathbf{z}^{t-1};\,
\mathbf{z}^t 
+ \beta^2 \,\nabla_{\mathbf{z}^t}\! \log q(\mathbf{z}^t),\,
\beta^2 \mathbf{I}\Bigr).
\]
Here, \(q(\mathbf{z}^t) = \int q(\mathbf{z}^0)\,q(\mathbf{z}^t \mid \mathbf{z}^0)\, d\mathbf{z}^0\), and the gradient \(\nabla_{\mathbf{z}^t}\! \log q(\mathbf{z}^t)\) points toward regions of higher data density. In practice, we do not know \(q(\mathbf{z}^t)\) in closed form, so a neural \emph{score network} \(s_{\theta}(\mathbf{z}^t, t)\) is trained to approximate this gradient via \emph{denoising score matching} \citep{vincent2011connection, ho2020denoising}. Consequently, the learned reverse (denoising) transition becomes
\[
p_{\theta}(\mathbf{z}^{t-1} \mid \mathbf{z}^t) 
= \mathcal{N}\!\Bigl(\mathbf{z}^{t-1};\,
\mathbf{z}^t 
+ \beta^2\, s_{\theta}(\mathbf{z}^t, t),\,
\beta^2 \mathbf{I}\Bigr).
\]
Starting from an initial Gaussian sample \(\mathbf{z}^T \sim \mathcal{N}(\mathbf{0},\, T \beta^2 \mathbf{I})\), iterating this reverse process ultimately recovers samples that approximate the original data distribution.

\textbf{Conditional Extension.} This diffusion framework can be naturally extended to include additional context \(\mathbf{c}\). In a \emph{conditional} diffusion model~\citep{ho2021classifier}, the score network becomes \(s_{\theta}(\mathbf{z}^t, t, \mathbf{c})\), so that at each step the denoising is informed by side information such as class labels, textual descriptions, or other relevant features. This conditional approach enables the generation of samples that match not only the learned data distribution but also the specific context \(\mathbf{c}\), making it particularly useful for tasks in which external conditions strongly influence the underlying data generation process.


Rather than directly estimating the score function \( s_{\theta}(\mathbf{z}^t, t) \), Denoising Diffusion Probabilistic Models (DDPM)~\citep{ho2020denoising} reformulate the learning objective as a \textit{noise prediction} task. This reparameterization leverages the closed-form expression of the forward process:
\[
\mathbf{z}^t = \sqrt{\bar{\alpha}_t} \mathbf{z}^0 + \sqrt{1 - \bar{\alpha}_t} \boldsymbol{\epsilon}, \quad \boldsymbol{\epsilon} \sim \mathcal{N}(0, \mathbf{I}),
\]
where \(\bar{\alpha}_t\) denotes the cumulative product of noise schedules. The training objective becomes recovering the noise \(\boldsymbol{\epsilon}\) that perturbed \(\mathbf{z}^0\) to form \(\mathbf{z}^t\). A neural network \(\boldsymbol{\epsilon}_\theta(\mathbf{z}^t, t)\) is trained to approximate this noise, which corresponds to learning the score function up to a time-dependent scaling:
\[
s_{\theta}(\mathbf{z}^t, t) = - \frac{\boldsymbol{\epsilon}_\theta(\mathbf{z}^t, t)}{\sqrt{1 - \bar{\alpha}_t}}.
\]

Training then reduces to minimizing a simple mean squared error (MSE) loss between the true and predicted noise:
\[
\mathcal{L}_{\text{simple}} = \mathbb{E}_{\mathbf{z}_0,\, \boldsymbol{\epsilon},\, t}\!
  \bigl[\|\boldsymbol{\epsilon} 
    - \boldsymbol{\epsilon}_\theta(\mathbf{z}_t, t, \mathbf{c})\|^2\bigr].
\]

By training this conditional diffusion model on historical poaching data---augmented with contextual features \(\mathbf{c}\)---we learn \( p_\theta(\mathbf{z} \mid \mathbf{c}) \), a powerful and expressive model of poacher behavior. This enables us to capture complex, multimodal patterns of attacker responses, thereby supporting the development of robust patrol strategies discussed earlier.



% Instead of directly estimating the score function \( s_{\theta}(\mathbf{z}^t, t) \), Denoising Diffusion Probabilistic Models (DDPM) reparameterize the learning objective in terms of \textit{noise prediction}. This is motivated by the fact that, given the closed-form expression for \( q(\mathbf{z}^t \mid \mathbf{z}^0) \), we can rewrite: $\mathbf{z}^t = \sqrt{\bar{\alpha}_t} \mathbf{z}^0 + \sqrt{1 - \bar{\alpha}_t} \boldsymbol{\epsilon}, \quad \boldsymbol{\epsilon} \sim \mathcal{N}(0, \mathbf{I}).$ Thus, the goal of DDPM training is to predict the noise \(\boldsymbol{\epsilon}\) that was added to transform \(\mathbf{z}^0\) into \(\mathbf{z}^t\). A neural network \(\boldsymbol{\epsilon}_\theta(\mathbf{z}^t, t)\) is trained to approximate \(\boldsymbol{\epsilon}\), which is equivalent to learning the score function up to a scaling factor: $s_{\theta}(\mathbf{z}^t, t) = - \frac{\boldsymbol{\epsilon}_\theta(\mathbf{z}^t, t)}{\sqrt{1 - \bar{\alpha}_t}}.$


% Training the diffusion model then reduces to minimizing the mean squared error (MSE) loss between the true noise \(\boldsymbol{\epsilon}\) and the predicted noise \(\boldsymbol{\epsilon}_\theta\):
% \[
% \mathcal{L}_{\text{simple}} = \mathbb{E}_{\mathbf{z}_0,\, \boldsymbol{\epsilon},\, t}\!
%   \bigl[\|\boldsymbol{\epsilon} 
%     - \boldsymbol{\epsilon}_\theta(\mathbf{z}_t, t, \mathbf{c})\|^2\bigr].
% \]

% By training this conditional diffusion model on historical poaching data—augmented with contextual variables \(\mathbf{c}\)—we learn \( p_\theta(\mathbf{z} \mid \mathbf{c}) \), a flexible and expressive representation of poacher behavior. This allows us to capture complex, multimodal patterns of attacker responses, which in turn informs the robust patrol strategies described in the preceding sections.

\section{Examples of mixed 
strategy over mixed strategies}\label{appdx:example}
Let us consider a national park with $3$ target regions to protect, and poachers' pure strategies specify how many snares to put in each target region. Two examples of poacher pure strategies could be $\mathbf{z}_1 = (3, 4, 3)$ and $\mathbf{z}_2 = (0, 0, 10)$. Each entry in the pure strategy determines the number of snares a poacher will place in the corresponding target region. Let us denote poachers' pure strategy space as $\mathcal{Z} = \{\mathbf{z}_1, \mathbf{z}_2\}$. 

A mixed strategy $\tau$ is a distribution on the pure strategy space, i.e., $\tau \in \Delta(\mathcal{Z})$. Denote the subset of mixed strategies which satisfy the constraint 
$D_{\rm KL}(\tau(\mathbf{z}) || p_{\theta}(\mathbf{z} | \mathbf{c})) \leq \rho$ as $\mathcal{T}$. One such example $\tau_1$ could be $P(\mathbf{z}_1) = 0.1$ and $P(\mathbf{z}_2) = 0.9$. Another degenerate example of mixed strategy $\tau_2$ could be $P(\mathbf{z}_1) = 0$ and $P(\mathbf{z}_2) = 1$.

A mixed strategy over mixed strategies $\sigma$ is a distribution on the constrained mixed strategy space, i.e., $\sigma \in \Delta(\mathcal{T})$. One example of mixed strategy over mixed strategies $\sigma_1$ could be $P(\tau_1) = 0.1$ and $P(\tau_2) = 0.9$. Another degenerate example $\sigma_2$ could be $P(\tau_1) = 0$ and $P(\tau_2) = 1$.

A mixed strategy over mixed strategies is still a distribution on the original pure strategy space, i.e., $\sigma \in \Delta(\mathcal{Z})$. For example, an alternative way to view $\sigma_1$ could be $$P(\mathbf{z}_1) = P(\sigma_1(\tau_1)) \cdot P(\tau_1(\mathbf{z}_1)) + P(\sigma_1(\tau_2)) \cdot P(\tau_2(\mathbf{z}_1)) = 0.01$$ 
and 
$$P(\mathbf{z}_2) = P(\sigma_1(\tau_1)) \cdot P(\tau_1(\mathbf{z}_2)) + P(\sigma_1(\tau_2)) \cdot P(\tau_2(\mathbf{z}_2)) = 0.99$$
However, it is proven in Proposition $\ref{thm:mixed_over_mixed}$ that all mixed strategy over mixed strategies $\sigma$ satisfy $D_{\rm KL}(\sigma || p_{\theta}(\mathbf{z} | \mathbf{c})) \leq \rho$, which is not generally true for elements in $\Delta(\mathcal{Z})$.

From Section $\ref{sec:double_oracle}$ onward, readers can interpret $\mathcal{T}$ as the pure strategy space and $\sigma$ as a standard mixed strategy. Despite each pure strategy $\tau \in \mathcal{T}$ being a distribution, 
all standard terminologies of game theory remain applicable.



% The pure strategy $z = (3, 4, 3)$ means the poacher will put $3$, $4$, and $5$ snares in each target region, respectively. 



\section{Proof of Proposition \ref{thm:mixed_over_mixed}}\label{appdx:mixed_over_mixed}
% $$D_{\mathrm{KL}}(\tau(\mathbf{z}) \,\|\, p_{\theta}(\mathbf{z}\mid \mathbf{c})) \leq \rho \}$$

We now show that for any $\pi(\mathbf{x})\in \Delta(\mathcal{X})$, 

\begin{align*}
\min_{\tau(\mathbf{z})} \left\{ \mathbb{E}_{\pi(\mathbf{x})}\mathbb{E}_{\tau(\mathbf{z})} \left[ u(\mathbf{x}, \mathbf{z}) \right] : D_{\mathrm{KL}}(\tau(\mathbf{z}) \,\|\ p_{\theta}(\mathbf{z} | \mathbf{c})) \leq \rho \right\}=\\
\min_{\sigma(\tau)} \left\{ \mathbb{E}_{\pi(\mathbf{x})}\mathbb{E}_{\sigma(\tau)} \left(\mathbb{E}_{\tau(\mathbf{z})}\left[ u(\mathbf{x}, \mathbf{z}) \right] \right): D_{\mathrm{KL}}(\tau(\mathbf{z}) \,\|\,  p_{\theta}(\mathbf{z} | \mathbf{c})) \leq \rho \right\}.
\end{align*}
From this, the original theorem follows.

Consider any solution $\tau'(\mathbf{z})$ that attains the minimum on the left-hand side. Define a degenerate distribution over strategies $\sigma'(\tau) = \delta[\tau = \tau']$, i.e., it places all its mass on $\tau'$. Note that $\tau'$ satisfies the divergence constraint on the left, so $\sigma'(\tau)$ will also satisfy the corresponding constraint on the right-hand side. Since the expected value under $\sigma'(\tau)$ matches the value attained by $\tau'$, we have the left side is not smaller than the right side.

Now take any solution $\sigma'(\tau)$ that attains the minimum on the right side. Define
$\tau'(\mathbf{z}) = \mathbb{E}_{\sigma'(\tau)}[\tau(\mathbf{z})]$. 
Because a mixture over mixed strategies is itself a valid mixed strategy in $\Delta(\mathcal{Z})$, $\tau'(\mathbf{z})$ is admissible on the left side.

By the convexity of the divergence measure $D$, we have:
\[
D_{\mathrm{KL}}(\tau'(\mathbf{z})\,\|\ p_\theta(\mathbf{z}| \mathbf{c})) 
= D_{\mathrm{KL}}\bigl(\mathbb{E}_{\sigma'(\tau)}\tau(\mathbf{z}) \,\|\  p_\theta(\mathbf{z}| \mathbf{c})\bigr)
\leq \mathbb{E}_{\sigma'(\tau)}[D_{\mathrm{KL}}(\tau(\mathbf{z}) \,\|\ p_\theta(\mathbf{z}|\mathbf{c})]
\leq \rho.
\]
Here, the first inequality follows from the convexity of $D$, and the second inequality is by the construction of $\sigma'(\tau)$, which satisfies the original constraint on the right side.

Thus, $\tau'(\mathbf{z})$ satisfies the left side constraint and attains the same expected value as $\sigma'(\tau)$. We then obtain that the left side is not larger than the right side.

Combining both parts, we conclude the proof. 

% \section{Appendix: Double Oracle}

% \paragraph{Conventional Notations}
% Let $w$ denote the realization of an infinite sequence of payoff matrix outcome. We denote $(p_i^*(w), q_i^*(w))$ as the equilibrium to the subgame $(X_i, Y_i, \hat{u}_i)$, where $\hat{u}_i$ is the sample approximation of the utility matrix at $i$-th iteration of Algorithm \ref{alg:double-oracle}. Let $\Delta_i$ denote the largest difference among the cell of the true payoff table and sample payoff table at the $i$-th iteration of the algorithm. We assume the utility is bounded between $[0, M]$.

% Lemma \ref{lem:utility_bound} shows that for any strategy pair, the sample estimation error is bounded by the largest sample estimation error in the payoff matrix.
% \begin{lemma}\label{lem:utility_bound}
%     For any $p_i \in \Delta X_i$ and $q_i \in \Delta Y_i$, we have $|\hat{U_i}(p_i, q_i) - U(p_i, q_i)| \leq \Delta_i$.
% \end{lemma}
% \begin{proof}
%     We write 
%     \begin{equation}\label{eq:utility_bound_pf}
%         |\hat{U_i}(p_i, q_i) - U(p_i, q_i)| = \sum_{x}\sum_{y} p_i(x) \cdot q_i (y) \cdot |\hat{U_i}(x, y) - U(x, y)|
%     \end{equation}
%    $|\hat{U_i}(x, y) - U(x, y)|$ denotes the sample estimation error for pure strategy pair $(x,y)$ in the payoff matrix. The maximum on the right-hand side of \ref{eq:utility_bound_pf} is obtained when putting all the probability mass on the strategy pair with the largest sample estimation error, which is $\Delta_i$.
% \end{proof}

% Lemma \ref{lem:order_stats_bound} proposes a sampling scheme that bounds the largest sample estimation error in the payoff matrix at every iteration of the algorithm. Because of lemma \ref{lem:utility_bound}, the proposed sampling scheme bounds the sample estimation error for the utility of any strategy pair as well. 
% \begin{lemma}\label{lem:order_stats_bound}
%     Given any $(\delta, \mathsf{p})$, if we sample
%     $$N_i = \left\lceil \frac{M(i+1)^2 \cdot \mathsf{p}}{\delta^2} \right\rceil$$
%     particles for each cell at the $i$-th iteration of the algorithm, then $\|\Delta\|_\infty$ with probability at least $1-\mathsf{prob}$. (We omit the time subscript of $\|\Delta\|_\infty$ because this bound holds for every iteration). 
% \end{lemma}

% \begin{proof}
%     For any cell $(j,k)$ in the matrix, we apply the Chebyshev bound of twisted sampling:
%     $$P(|\Delta_{j,k}| > \delta) \leq \frac{M}{N \cdot \delta^2}$$
%     Since at $i$-th iteration, there are $(i+1)^2$ cells in the payoff matrix, we apply the union bound and obtain:
%      $$P(\|\Delta\|_\infty > \delta) \leq \frac{M (i+1)^2}{N\cdot \delta^2}$$
%     By setting $N_i = \frac{M(i+1)^2 \cdot \mathsf{p}}{\delta^2}$, we have 
%     $$P(\|\Delta\|_\infty > \delta) \leq \mathsf{p}$$
% \end{proof}

% % \begin{lemma}
% %     Given a pure strategy $x$, if for every $i$,
% %     $$U(p_i^*(w)) + 2\delta \geq U(x, q_i^*(w)) with probability 1-p$$
% %     and $U(p_i^*(w), q_i^*(w)) \to U(p^*, q^*)$ and $U(x, q_i^*(w)) \to U(x, q^*)$, then 
% %     $$U(p^*, q^*) + 2\delta \geq U(x, q^*) with probability 1-p$$
% % \end{lemma}
% Lemma \ref{lem:prob-preserve} establishes that the probabilistic guarantee on utility bounds is preserved in the limit as the strategy pair converges.
% \begin{lemma}\label{lem:prob-preserve}
%     Given a pure strategy \( x \), if for every \( i \),
%     \[
%     U(p_i^*, q_i^*) + 2\delta \geq U(x, q_i^*) \quad \text{with probability at least } 1 - p,
%     \]
%     and for any $w$
%     \[
%     U(p_i^*(w), q_i^*(w)) \to U(p^*(w), q^*(w)) \quad \text{and} \quad U(x, q_i^*(w)) \to U(x, q^*(w)),
%     \]
%     then
%     \[
%     U(p^*, q^*) + 2\delta \geq U(x, q^*) \quad \text{with probability at least } 1 - p.
%     \]
% \end{lemma}
% \begin{proof}
% Let's consider the event $E_i = \left\{ 
% w \mid U(p_i^*(w), q_i^*(w)) + 2\delta < U(x, q_i^*(w))
% \right\}$, which means the collection of elementary outcomes that makes the $i$-th item in the weak convergent subsequence satisfies the inequality and $E = \left\{ 
% w \mid U(p^*(w), q^*(w)) + 2\delta < U(x, q^*(w))
% \right\}$. Let $\liminf\limits_{i \to \infty} E_i = 
%     \bigcup\limits_{n=1}^\infty 
% \bigcap\limits_{i=n}^\infty E_i$, where $\liminf\limits_{i \to \infty} E_i$ is the event that $w$ belongs to $E_i$ for all sufficiently large $i$. 

% Fix a $w$. Since  \[
%     U(p_i^*(w), q_i^*(w)) \to U(p^*(w), q^*(w)) \quad \text{and} \quad U(x, q_i^*(w)) \to U(x, q^*(w)),
%     \]
% if $U(p^*(w), q^*(w)) + 2\delta < U(x, q^*(w))$, then there exists $i_0$ such that when $i > i_0$,
% \[
%     U(p_i^*(w), q_i^*(w)) + 2\delta < U(x, q_i^*(w)) 
% \]
% Hence, $E \subseteq \liminf\limits_{i \to \infty} E_i$.
% By monotonicity of probability measure,
% \begin{align}\label{eq:monotonicity-of-measure}
%     P(E) \leq P(\liminf\limits_{i \to \infty} E_i)
% \end{align}
% % By applying Fatou's lemma to the indicator function $\mathbf{1}_{E_i}(w)$, we have
% % \begin{align*}
% %     P(\liminf\limits_{i \to \infty} E_i) \leq \liminf\limits_{i \to \infty}P(E_i)
% % \end{align*}
% Also, we have 
% \begin{align}\label{eq:fatou-lemma}
%     P(\liminf\limits_{i \to \infty} E_i) \leq \liminf\limits_{i \to \infty}P(E_i) \leq \mathsf{p}
% \end{align}
% where the first inequality follows from applying Fatou's lemma to the indicator function $\mathbf{1}_{E_i}(w)$, and the second inequality follows because $\forall i, P(E_i) \leq p $. Combining \ref{eq:monotonicity-of-measure} and \ref{eq:fatou-lemma}, we obtain 
% \[
%     U(p^*, q^*) + 2\delta \geq U(x, q^*) \quad \text{with probability at least } 1 - \mathsf{p}.
% \]
% \end{proof}

% \begin{theorem}
%     Let $G = (X, Y, u)$ be a continuous game. If $G$ is an infinite game and $\epsilon > 0$, given any $(\delta, \mathsf{p})$, by sampling
%      $$N_i =\left\lceil  \frac{M(i+1)^2 \cdot \mathsf{p}}{\delta^2} \right\rceil$$
%      particles for each cell in the payoff matrix at the $i$-th iteration of the algorithm, the algorithm will stop in a finite number of iterations, and when it stops, the algorithm will converge to a  $10\delta + \epsilon$ equilibrium with probability at least $1-\mathsf{p}$.
% \end{theorem}

% \begin{proof}
%     % (Technically, every $\hat{U}$ should have a time subscript, but for notation simplicity let's omit it for now. It won't impact the proof because every time we use convergence or weak convergence, we apply the convergence result to the original utility function).

%     % Let $w$ denote the realization of the infinite sequence of payoff matrix outcome. We denote $(p_i^*(w), q_i^*(w))$ as the equilibrium to the subgame $(X_i, Y_i, \hat{u}_i)$, where $\hat{u}_i$ is the sample approximation of the utility matrix at $i$-th iteration. 
    
%     We list here several results that are already proven in \cite{adam2021double}.
%     \begin{enumerate}
%         \item There exists a weakly convergent subsequence, which for simplicity, will be denoted by the same indices. Therefore, $p_i^*(w) \Rightarrow p^*(w)$ for some $p^*(w)$ and $q_i^*(w) \Rightarrow q^*(w)$ for some $q^*(w)$, where $\Rightarrow$ denotes weak convergence.
%         \item If $p_i \Rightarrow p$ in $\Delta_X$ and $q_i \Rightarrow q$ in $\Delta_Y$, then $U(p_i, q_i) \to U(p,q)$. If $p_i \Rightarrow p$ in $\Delta_X$ and $y_i \to y$ in $Y$, then $U(p_i, y_i) \to U(p,y)$.
%         \item For any $p \in \Delta_X$ we have
%         $$\min_{y \in Y} U(p,y) = \min_{q \in \Delta_Y} U(p,q)$$
%     \end{enumerate}
%     % From \cite{adam2021double}, we know there exists a weakly convergent subsequence, which for simplicity, will be denoted by the same indices. Therefore, $p_i^*(w) \Rightarrow p^*$ for some $p^*$ and $q_i^*(w) \Rightarrow q^*$ for some $q^*$, where $\Rightarrow$ denotes weak convergence.
%     By lemma \ref{lem:utility_bound} and \ref{lem:order_stats_bound}, our sampling scheme ensures that for any strategy pair $(p,q)$ and iteration $i$, we have $|U(p,q) - \hat{U}_i(p,q)| \leq \delta$ with probability at least $1-\mathsf{p}$.

%     Consider any $x$ such that $x \in X_{i_0}$ for some $i_0$. Take an arbitrary $i \geq i_0$, which implies $x \in X_i$. Since $(p_i^*, q_i^*)$ is an equilibrium of the subgame $(X_i, Y_i, \hat{u}_i)$, we  get
%     $$\hat{U}_i(p_i^*, q_i^*) \geq \hat{U}_i(x, q_i^*)$$
%     Since $U(p_i^*, q_i^*)$ and $\hat{U}_i(p_i^*, q_i^*)$ differ by at most $\delta$ with probability at least $1-\mathsf{p}$, we have
%     \begin{align*}
%         U(p_i^*, q_i^*) + 2\delta \geq U(x, q_i^*) \to U(x, q^*) \text{ with probability at least } 1 - \mathsf{p}.
%     \end{align*}
%     % $$U(p_i^*, q_i^*) + 2\delta \geq U(x, q_i^*) \to U(x, q^*)$$
%     Since $U(p_i^*, q_i^*) \to U(p^*, q^*)$, by lemma $\ref{lem:prob-preserve}$ we have
%     \begin{align}\label{eq:for-closed-x}
%         U(p^*, q^*) + 2\delta \geq U(x, q^*) \text{ with probability at least } 1 - \mathsf{p}
%     \end{align}
%     for all $x \in \cup X_i$. Since $U$ is continuous, the previous inequality holds for all $x \in cl(\cup X_i)$.
    
%  Fix now an arbitrary $x \in X$. Note $x_{i+1}$ is best response to $\hat{U}_i$ (since ranger oracle uses finite sample estimation of payoff matrix), and we have
%     $$\hat{U}_i(x_{i+1}, q_i^*) \geq \hat{U}_i(x, q_i^*)$$

%      Because $U$ and $\hat{U}_i$ differ by at most $\delta$ with probability at least $1-\mathsf{p}$, we have
%     \begin{align}\label{eq:original-eq-6}
%         U(x_{i+1}, q_i^*) + 2\delta \geq U(x, q_i^*) \to U(x, q^*) \text{ with probability at least } 1 - \mathsf{p}
%     \end{align}
%      Since $x_{i+1} \in X_{i+1}$ and by compactness of $X$, we can select a convergence subsequence $x_i \to \tilde{x}$, where $\tilde{x} \in cl(\cup X_i)$. This allows us to use \ref{eq:for-closed-x} to obtain 
%     \begin{align}\label{eq:original-eq-7}
%         U(x_{i+1}, q_i^*) \to U(\tilde{x}, q^*) \leq U(p^*, q^*) + 2 \delta \text{ with probability at least } 1 - \mathsf{p}
%     \end{align}

%     Now we first show that our algorithm will hit the terminating condition with finite iteration. Combining \ref{eq:original-eq-6} and \ref{eq:original-eq-7}, there exists a sufficiently large $L_0$ such that if $i > L_0$, we have \begin{align}\label{eq:ranger_util_range}
%         U(x_{i+1}, q_i^*) \in (U(p^*, q^*)-2\delta-\frac{\epsilon}{2},U(p^*, q^*) + \frac{\epsilon}{2}) \text{ with probability at least } 1 - \mathsf{p}
%     \end{align}
%     Repeat the analogous argument in the other variable: there exists a sufficiently large $L_1$ such that for $i > L_1$, we have \begin{align}\label{eq:poacher_util_range}
%    U(p_i^*, y_{i+1}) \in (U(p^*, q^*)-2\delta-\frac{\epsilon}{2},U(p^*, q^*) + \frac{\epsilon}{2}) \text{ with probability at least } 1 - \mathsf{p}
%     \end{align}
%      The two sides are not symmetrical because the best response for the poacher doesn't use the finite sample approximation of payoff matrix, thus having a smaller error.
%      Take $L = max\{L_0, L_1\}$. For every $i > L$, combine \ref{eq:ranger_util_range} and \ref{eq:poacher_util_range}, we have
%      \begin{align*}
%          U(x_{i+1}, q_i^*)- U(p_i^*, y_{i+1}) \in (- 2\delta - \epsilon, 4\delta + \epsilon) \text{ with probability at least } 1 - \mathsf{p}
%      \end{align*}
%     This implies
%     \begin{align*}
%         \hat{U}_i(x_{i+1}, q_i^*)- \hat{U}_i(p_i^*, y_{i+1}) \in (- 4\delta - \epsilon, 6\delta + \epsilon) \text{ with probability at least } 1 - \mathsf{p}
%     \end{align*}
%     The chance that the algorithm has not stopped after $L+t$ iterations is bounded above by $\mathsf{p}^t$. Since $0 <\mathsf{p} < 1$, this implies the algorithm will terminate in finite iterations. 

%      When the algorithm terminates, we have the following:
%      \begin{equation}\label{eq:direction1}
%     \begin{aligned}
%         U(p_i^*, q_i^*) & \leq U(x_{i+1}, q_i^*) + 2\delta \\
%         & \leq \hat{U}_i(x_{i+1},q_i^*) + \delta + 2\delta \\
%         & \leq \hat{U}_i(p_i^*, y_{i+1}) + 4\delta + 3\delta + \epsilon \\
%         & \leq \hat{U}_i(p_i^*, y') + 7\delta + \epsilon \text{ where } y'=\arg\min_{y \in \Delta_Y} U(p_i^*, y)\\
%         & \leq \min_{y' \in Y}U(p_i^*, y') + \delta + 7\delta + \epsilon \\
%         & = \min_{q \in \Delta Y}U(p_i^*, q) + 8\delta + \epsilon
%     \end{aligned}
%     \end{equation}
%     with probability at least $1-\mathsf{p}$. The first relation follows from the definition of best response (with a relaxation $2\delta$ introduced as the best response is to $\hat{U}_i$), the second and fifth relations come from the error bound on utility deviation, the third relation comes from the terminating condition, and the fourth relation comes from best response. Similarly, we have 
%     \begin{equation}\label{eq:direction2}
%         \begin{aligned}
%         U(p_i^*, q_i^*) & \geq U(p_i^*, y_{i+1}) - 2\delta \\
%         & \geq \hat{U}_i(p_i^*,y_{i+1}) - \delta - 2\delta \\
%         & \geq \hat{U}_i(x_{i+1},q_i^*) - 6\delta - 3\delta - \epsilon \\
%         & \geq \hat{U}_i(x', q_i^{*}) - 9\delta - \epsilon \text{ where } x'=\arg\max_{x \in \Delta_X} U(x, q_i^*)\\
%         & \geq \max_{x' \in X}U(x', q_i^*) - \delta - 9\delta - \epsilon \\
%         & = \max_{p \in \Delta X}U(p, q_i^*) - 10\delta - \epsilon
%         \end{aligned}
%     \end{equation}
%     % \begin{align}\label{eq:direction2}
%     %     U(p_i^*, q_i^*) & \geq U(p_i^*, y_{i+1}) - 2\delta \\
%     %     & \geq \hat{U}_i(p_i^*,y_{i+1}) - \delta - 2\delta \\
%     %     & \geq \hat{U}_i(x_{i+1},q_i^*) - 6\delta - 3\delta - \epsilon \\
%     %     & \geq \hat{U}_i(x', q_i^{*}) - 9\delta + \epsilon \text{ where } x'=\arg\min_{x \in \Delta_X} U(x, q_i^*)\\
%     %     & \geq \min_{x' \in X}U(x', q_i^*) - \delta - 9\delta + \epsilon \\
%     %     & = \min_{q \in \Delta Y}U(p_i^*, q) - 10\delta - \epsilon
%     % \end{align}
    
%      with probability at least $1-\mathsf{p}$. Combining \ref{eq:direction1} and \ref{eq:direction2}, we show that $(p_i^*, q_i^*)$ is a $10\delta+\epsilon$ equilibrium with probability at least $1-\mathsf{p}$.
% \end{proof}

    




     
    % % $$U(x_{i+1}, q_i^*)- U(p_i^*, y_{i+1}) \in (- 2\delta - \epsilon, 4\delta + \epsilon)$$
    % with probability at least $1-\mathsf{prob}$.
   
    
    %  Combine \ref{eq:original-eq-6} and \ref{eq:original-eq-7} we have 
    % $$U(p^*, q^*) + 4 \delta \geq U(x, q^*) \text{ with probability at least } 1 - \mathsf{p}$$
    %  Repeating the analogous argument in the other variable yields 
    % $$U(p^*, q^*) - 2 \delta \leq U(p^*, y) \text{ with probability at least } 1 - \mathsf{p}$$
    % Note the two sides are not symmetrical because the best response for the poacher does not use finite sample estimation, which reduces the error. 

    


    
% \newpage
    
%     We know that $p_i^* \Rightarrow p^*$ for some $p^*$ and $q_i^* \Rightarrow q^*$ for some $q^*$, where $\Rightarrow$ denotes weak convergence.  

%     Consider any $x$ such that $x \in X_{i_0}$ for some $i_0$. Take an arbitrary $i \geq i_0$, which implies $x \in X_i$. Since $(p_i^*, q_i^*)$ is an equilibrium of the subgame $(X_i, Y_i, \hat{u})$, we  get
%     $$\hat{U}_i(p_i^*, q_i^*) \geq \hat{U}_i(x, q_i^*)$$
%     Since $U(p_i^*, q_i^*)$ and $\hat{U}_i(p_i^*, q_i^*)$ differ by at most $\delta$ with prob $1-\mathsf{prob}$, we have 
%     $$U(p_i^*, q_i^*) + 2\delta \geq U(x, q_i^*) \to U(x, q^*)$$
%     Since $U(p_i^*, q_i^*) \to U(p^*, q^*)$, this implies
%     \begin{align}\label{eq-for-closedx}
%         U(p^*, q^*) + 2\delta \geq U(x, q^*)
%     \end{align}
%     for all $x \in \cup X_i$. Since $U$ is continuous, the previous inequality holds for all $x \in cl(\cup X_i)$.

%     Fix now an arbitrary $x \in X$. Note $x_{i+1}$ is best response to $\hat{U}_i$ (ranger oracle uses finite sample estimation), so we have
%     $$\hat{U}_i(x_{i+1}, q_i^*) \geq \hat{U}_i(x, q_i^*)$$
%     Because $U$ and $\hat{U}_i$ differ by at most $\delta$ with probability at least $1-\mathsf{prob}$, we have
%     \begin{align}\label{eq6}
%         U(x_{i+1}, q_i^*) + 2\delta \geq U(x, q_i^*) \to U(x, q^*)
%     \end{align}
%     Since $x_{i+1} \in X_{i+1}$ and by compactness of $X$, we can select a convergence subsequence $x_i \to \tilde{x}$, where $\tilde{x} \in cl(\cup X_i)$. This allows us to use \ref{eq-for-closedx} to obtain 
%     \begin{align}\label{eq7}
%         U(x_{i+1}, q_i^*) \to U(\tilde{x}, q^*) \leq U(p^*, q^*) + 2 \delta
%     \end{align}
%     Combine \ref{eq6} and \ref{eq7} we have 
%     $$U(p^*, q^*) + 4 \delta \geq U(x, q^*)$$
%     Repeating the analogous argument in the other variable yields 
%     $$U(p^*, q^*) - 2 \delta \leq U(p^*, y)$$
%     (The two sides are not symmetrical because the best response for the poacher is perfect as it does not use finite sample estimation). 
%     % All the derivation above holds with $$\|\Delta\|_\infty < \delta$$, which happens with probability at least $1-\mathsf{prob}$.  
%     % Hence, $(p^*, q^*)$ is a $4\delta - equilibrium$ with probability at least $1-\mathsf{prob}$. 

   

%     Note by \ref{eq7} we have 
%     $$U(p^*, q^*) +2 \delta \geq U(\tilde{x}, q^*) \leftarrow U(x_{i+1}, q_i^*)$$
%     By best response,
%     $$U(x_{i+1}, q_i^*)  + 2\delta \geq U(p_i^*, q_i^*) \rightarrow U(p^*, q^*)$$
%     Hence, there exists a sufficiently large $L_0$ such that if $i > L_0$, we have 
%     $$U(x_{i+1}, q_i^*) \in (U(p^*, q^*)-2\delta-\frac{\epsilon}{2},U(p^*, q^*)+2\delta + \frac{\epsilon}{2})$$
%     with probability at least $1-\mathsf{prob}$.
%     Similarly, there exists a sufficiently large $L_1$ such that for $i > L_1$, we have 
%      $$U(p_i^*, y_{i+1}) \in (U(p^*, q^*)-2\delta-\frac{\epsilon}{2},U(p^*, q^*) + \frac{\epsilon}{2})$$
%     with probability at least $1-\mathsf{prob}$. Again, the two sides are not symmetrical because diffusion (poacher) oracle does not involve finite sampling error. Take $L = max\{L_0, L_1\}$. For every $i > L$, we have
%     $$U(x_{i+1}, q_i^*)- U(p_i^*, y_{i+1}) \in (- 2\delta - \epsilon, 4\delta + \epsilon)$$
%     with probability at least $1-\mathsf{prob}$.
%     This implies
%      $$\hat{U}_i(x_{i+1}, q_i^*)- \hat{U}_i(p_i^*, y_{i+1}) \in (- 6\delta - \epsilon, 6\delta + \epsilon)$$
%     with probability at least $1-\mathsf{prob}$.
%     Let $i = L+1+k$, the chance that the algorithm has not stopped is less than $\mathsf{prob}^k$. Since $0 < \mathsf{prob} < 1$, this shows the terminating condition will happen in finite iterations with probability $1$.

%     When the algorithm stops, we have the following:
%     \begin{align*}
%         U(p_i^*, q_i^*) & \leq U(x_{i+1}, q_i^*) + 2\delta \\
%         & \leq U(p_i^*, y_{i+1}) + 8\delta + \epsilon \\
%         &  = min_{y' \in Y}U(p_i^*, y') + 2\delta + 8\delta + \epsilon \\
%         & = min_{q \in \Delta Y}U(p_i^*, q) + 10\delta + \epsilon
%     \end{align*}
%     with probability at least $1-\mathsf{prob}$. The first and the third relation above follow from the definition of best response (with a relaxation $2\delta$ introduced as the best response is to $\hat{U}_i$, and second from terminating condition. We repeat the same procedure for the other variables, and we obtain $(p_i^*, q_i^*)$ is a $10\delta+\epsilon$ equilibrium with probability at least $1-\mathsf{prob}$.


    
% \end{proof}






% \section{Appendix: Approximation Error of the Ranger Oracle}

% \subsection{Problem Setup}

% Suppose the goal is to solve a stochastic optimization problem:
% \[
% x^* = \arg\min_{x \in \mathcal{X}} \mathbb{E}_q[F(x, \xi)],
% \]
% where \( \xi \sim q \) is a random variable, and \( F(x, \xi) \) is the objective function.

% In the SMC context, we approximate the expectation \( \mathbb{E}_q[F(x, \xi)] \) using particle estimates:
% \[
% \hat{\mathbb{E}}[F(x, \xi)] = \frac{1}{N} \sum_{n=1}^N F(x, X_t^n),
% \]
% where \( \{X_t^n\}_{n=1}^N \) are the particles sampled according to the SMC algorithm.

% The \textbf{particle-based stochastic optimization problem} becomes:
% \[
% \hat{x}_N = \arg\min_{x \in \mathcal{X}} \hat{\mathbb{E}}[F(x, \xi)].
% \]

% We are interested in bounding the \textbf{objective gap}:
% \[
% \mathbb{E}_q[F(\hat{x}_N, \xi)] - \mathbb{E}_q[F(x^*, \xi)].
% \]


% \subsection{Derivation Steps}

% \subsubsection{Step 1: Decompose the Objective Gap}

% Decompose the gap into two components:
% \[
% \mathbb{E}_q[F(\hat{x}_N, \xi)] - \mathbb{E}_q[F(x^*, \xi)] = \underbrace{\mathbb{E}_q[F(\hat{x}_N, \xi)] - \hat{\mathbb{E}}[F(\hat{x}_N, \xi)]}_{\text{(I)}} + \underbrace{\hat{\mathbb{E}}[F(\hat{x}_N, \xi)] - \hat{\mathbb{E}}[F(x^*, \xi)]}_{\text{(II)}} + \underbrace{\hat{\mathbb{E}}[F(x^*, \xi)] - \mathbb{E}_q[F(x^*, \xi)]}_{\text{(III)}}.
% \]

% - (I) and (III): Error from approximating the true expectation with the particle estimate.

% - (II): Suboptimality of the particle-based solution \( \hat{x}_N \) in the approximate problem.


% \subsubsection{Step 2: Bound the Approximation Errors (I) and (III)}

% From the \textbf{MSE bounds in Proposition 11.3} in \cite{chopin2020introduction}, the particle estimate satisfies:
% \[
% \mathbb{E}\left[\left(\hat{\mathbb{E}}[F(x, \xi)] - \mathbb{E}_q[F(x, \xi)]\right)^2\right] \leq \frac{c_t \|F(x, \cdot)\|_\infty^2}{N},
% \]
% where $c_t$ is a constant.

% Using \textbf{Chebyshev's inequality}, the probability that the particle estimate deviates significantly from the true expectation is bounded as:
% \[
% \mathbb{P}\left(\left|\hat{\mathbb{E}}[F(x, \xi)] - \mathbb{E}_q[F(x, \xi)]\right| > \epsilon\right) \leq \frac{c_t \|F(x, \cdot)\|_\infty^2}{N \epsilon^2}.
% \]

% Thus, for any \( x \in \mathcal{X} \), the particle estimate converges in probability to the true expectation.

% Applying this bound to both \( \hat{x}_N \) and \( x^* \), we have:
% \[
% \mathbb{P}\left(|\mathbb{E}_q[F(\hat{x}_N, \xi)] - \hat{\mathbb{E}}[F(\hat{x}_N, \xi)]| > \epsilon\right) \leq \frac{c_t \|F(x, \cdot)\|_\infty^2}{N \epsilon^2},
% \]
% and
% \[
% \mathbb{P}\left(|\hat{\mathbb{E}}[F(x^*, \xi)] - \mathbb{E}_q[F(x^*, \xi)]| > \epsilon\right) \leq \frac{c_t \|F(x, \cdot)\|_\infty^2}{N \epsilon^2}.
% \]

% \subsubsection{Step 3: Bound the Suboptimality Term (II)}

% By definition of \( \hat{x}_N \), it minimizes the particle-based objective:
% \[
% \hat{x}_N = \arg\min_{x \in \mathcal{X}} \hat{\mathbb{E}}[F(x, \xi)].
% \]

% Thus:
% \[
% \hat{\mathbb{E}}[F(\hat{x}_N, \xi)] \leq \hat{\mathbb{E}}[F(x^*, \xi)].
% \]

% Rearranging, we find that:
% \[
% \hat{\mathbb{E}}[F(\hat{x}_N, \xi)] - \hat{\mathbb{E}}[F(x^*, \xi)] \leq 0.
% \]

% This means the suboptimality term is non-positive.

% \subsubsection{Step 4: Combine the Bounds}

% Combining the results:
% \[
% \mathbb{E}_q[F(\hat{x}_N, \xi)] - \mathbb{E}_q[F(x^*, \xi)] \leq 2 \sup_{x \in \mathcal{X}} \left|\hat{\mathbb{E}}[F(x, \xi)] - \mathbb{E}_q[F(x, \xi)]\right|.
% \]

% Using the uniform bound from Step 2:
% \[
% \sup_{x \in \mathcal{X}} \left|\hat{\mathbb{E}}[F(x, \xi)] - \mathbb{E}_q[F(x, \xi)]\right| = \mathcal{O}_p\left(\sqrt{\frac{1}{N}}\right).
% \]

% Thus, the \textbf{objective gap} satisfies:
% \[
% \mathbb{E}_q[F(\hat{x}_N, \xi)] - \mathbb{E}_q[F(x^*, \xi)] = \mathcal{O}_p\left(\sqrt{\frac{1}{N}}\right).
% \]


% \subsubsection{Final Probabilistic Bound}

% Using the probabilistic result from Chebyshev's inequality, for any \( \epsilon > 0 \), we can state that:
% \[
% \mathbb{P}\left(\mathbb{E}_q[F(\hat{x}_N, \xi)] - \mathbb{E}_q[F(x^*, \xi)] > \epsilon\right) \leq \frac{c_t \|F(x, \cdot)\|_\infty^2}{N \epsilon^2}.
% \]

% This shows that the \textbf{objective gap decreases as \( \mathcal{O}(1/N) \) in probability}, and for sufficiently large \( N \), the particle-based stochastic optimization converges to the true optimal solution with high probability.

% \subsubsection{Concavity of the utility function in ranger effort}
% \begin{assumption}[Concavity of Utility Function]
%     We assume that the utility function is twice differentiable with respect to $a$, and we assume that $\frac{\partial^2 V(a,q(z))}{\partial a^2} \leq 0 \ \forall a$, i.e., the utility function is concave in ranger effort.
% \end{assumption}

% The concavity assumption implies that $\frac{\partial V(a,q(z))}{\partial a}$ is decreasing in $a$, which is equivalent to saying there is diminishing marginal return to ranger effort. The intuition is within the first unit of effort, the most obvious snares in the field are first removed. Hence, the same effort will lead to less snares found later. %This phenomenon is also documented in [ideally we can find some conservation literature here]. 

% \begin{assumption}
%     The distribution learned by diffusion model, i.e., $\pi(\mathbf{z})$ has full support.
% \end{assumption}
\input{proof}
\input{exp-details}
\end{document}