\onecolumn

\title{Privacy-Aware Randomized Quantization via Linear Programming \\ (Supplementary Material)}
\maketitle

\appendix



\section{Proofs}\label{proofs}

\begin{proof}

(of Theorem~\ref{thm:erm_privacy}):

Assume that bins are uniformly distributed, i.e., $B_{i} = -\Delta-c + (i-1)\frac{2c+2\Delta}{m-1} ~ (i\in [m])$, $m \geq 4$, the selection probability of \textsf{ERM} can be calculated as:

\begin{align*}
    q_j(i) & = 
        \frac{\exp\{\frac{\gamma(B_{i}-B_{j})}{2(B_{j}-B_{1})}\}}{\sum_{k=1}^{j}\exp\{\frac{\gamma(B_{k}-B_{j})}{2(B_{j}-B_{1})}\}} 
         = \frac{\exp\{\frac{\gamma(i-j)}{2(j-1)}\}}{\sum_{k=1}^{j}\exp\{\frac{\gamma(k-j)}{2(j-1)}\}}.
\end{align*}

Take reciprocal of the probability, we have:

\begin{equation*}
    \frac{1}{q_j(i)} = \sum_{k=1}^{j} \exp\{\frac{\gamma(k-i)}{2(j-1)}\} 
      \leq j \exp \{ \frac{\gamma (j-1)}{2(j-1)} \} = j \exp\{\frac{\gamma}{2}\}. 
\end{equation*}

Therefore we can find the lower bound of $q_j(i)$:

\begin{equation*}
    q_j(i) \geq \frac{1}{j} \exp\{-\frac{\gamma}{2}\}.
\end{equation*}

Combining the lower bound with \eqref{equ:output}, we obtain a lower bound of $p(x,i)$:

\begin{align*}
    p(x,i) & = q_j(i)  {\sum}_{m\geq r \geq j+1} \left( q_{m-j}(m-r+1) \frac{B_{r}-x}{B_{r}-B_{i}}  \right) 
     ~~~\geq~~~ \frac{\exp\{-\frac{\gamma}{2}\}}{j} \cdot \frac{\exp\{-\frac{\gamma}{2}\}}{m-j} 
    \cdot {\sum}_{m\geq r \geq j+1} \left( \frac{B_{r}-x}{B_{r}-B_{i}}  \right) \\
    & \geq \frac{\exp\{-\gamma\}}{m^2/4} \cdot {\sum}_{m\geq r \geq j+1} \left( \frac{B_{r}-x}{B_{r}-B_{i}}  \right) 
     ~~~\geq~~~ \frac{4\exp\{-\gamma\}}{m^2 (B_{m}-B_{1})} \cdot {\sum}_{m\geq r \geq j+1} (B_{r}-B_{j+1}) \\
    & \geq \frac{4\exp\{-\gamma\}}{m^2 (2c+2\Delta)} \cdot \frac{2c+2\Delta}{m-1} \cdot {\sum}_{m\geq r \geq j+1} (r-(j+1)) 
     ~~~=~~~ \frac{4\exp\{-\gamma\}}{m^2 (m-1)} \cdot \frac{(m-j-1)(m-j)}{2}. 
\end{align*}

Since $x \in [-c, c]$ and $x \in [B_j, B_{j+1})$, we have:

\begin{equation*}
    (j-1) \cdot \frac{2c+2\Delta}{m-1} < \Delta \leq j \cdot \frac{2c+2\Delta}{m-1},
\end{equation*}

which implies:

\begin{equation*}
    \frac{(m-1)\Delta}{2c+2\Delta} \leq j < \frac{(m-1)\Delta}{2c+2\Delta} + 1.
\end{equation*}

Hence we have:

\begin{align*}
     p(x,i) & \geq \frac{4\exp\{-\gamma\}}{m^2 (m-1)} \cdot \frac{(m-j-1)(m-j)}{2} 
     ~~~>~~~  \frac{4\exp\{-\gamma\}}{m^2 (m-1)} \cdot \frac{(m-\frac{(m-1)\Delta}{2c+2\Delta}-2)(m-\frac{(m-1)\Delta}{2c+2\Delta} - 1)}{2} \\
    & =  \frac{(2mc+m\Delta-4c-4\Delta)(2mc+m\Delta-2c-2\Delta)\exp\{-\gamma\}}{2m^2 (m-1)(c+\Delta)}  
     ~~~>~~~  \frac{c\exp\{-\gamma\}}{2m(c+\Delta)} \quad (m \geq 4).
\end{align*}

Let privacy loss $e^\epsilon = \max_{x, x^{\prime}}\frac{p(x,i)}{p(x^{\prime},i)}$, $i \in [m]$, we have:

\begin{align*}
    e^\epsilon &
     \leq \frac{\max_{x,i} p(x,i)}{\min_{x,i} p(x,i)} \quad
     \leq \frac{1}{\frac{c\exp\{-\gamma\}}{2m(c+\Delta)}} 
= \frac{2m(c+\Delta)\exp{\gamma}}{c}. 
\end{align*}

Hence we have an upper bound on $\epsilon$:

\begin{equation*}
    \epsilon \leq \gamma + \log \frac{2m(c+\Delta)}{c}.
\end{equation*}

\end{proof}

\begin{proof}
(of Theorem~\ref{thm:erm_error}):

    For exponential mechanism with $\delta$-sensitive score function $f$, privacy parameter $\gamma$, set of output $\mathcal{Y}$, we have the following inequality on the quality $f(y)$ of the output $y$~\citep{adaptive}:

    \begin{align*}
        \mathbb{E}(f(y)) \geq {\max}_{y \in \mathcal{Y}} ~f(y) - \frac{2\delta\log |\mathcal{Y}|}{\gamma}.
    \end{align*}

    Assume $x \in [B_{j}, B_{j+1})$, $B_{i}=-c-\Delta+(i-1)\frac{2c+2\Delta}{m-1}$. When selecting the left bin $B_{l}$ with exponential mechanism (denote as event $L_j = l$), we have $f(l)=B_{j}-B_{l}$, $\max f(l) = 0$, sensitivity of the score function $\delta=B_{j}-B_{1}$, $|\mathcal{Y}|=j$, hence we have:

    \begin{align*}
        \mathbb{E}(B_{j}-B_{l}) = - \mathbb{E}(q(l)) \leq 
        \frac{2 (B_{j}-B_{1}) \log j}{\gamma}.
    \end{align*}

    Similarly, when selecting the right bin $B_{r}$, we have:

    \begin{align*}
        \mathbb{E}(B_{r}-B_{j+1}) \leq 
        \frac{(B_{m}-B_{j+1}) \log (m-j)}{\gamma}.
    \end{align*}

    Since $\mathcal{M}(x) \in \{B_{l}, B_{r}\}$, we can have an upper bound on the expected absolute error:

    \begin{align*}
         \mathbb{E}(|\mathcal{M}(x)-x|) & = \mathbb{E}(B_{r}-B_{j+1}) + (B_{j+1}-B_{j}) + \mathbb{E}(B_{j}-B_{l}) \\
        & \leq \frac{2 (B_{j}-B_{1}) \log j}{\gamma} + \frac{2c+2\Delta}{m-1} + \frac{(B_{m}-B_{j+1}) \log (m-j)}{\gamma} \\
        & \leq \frac{2 \log(m) (2c+2\Delta)}{\gamma} + \frac{2c+2\Delta}{m-1}.
    \end{align*}
    
\end{proof}

\begin{proof}
(of Lemma~\ref{lemma:x_error}):

For each given $x \in [B_j, B_{j+1})$, the upper bound of its Mean Absolute Error can be derived as follows:

{
\begin{eqnarray*}
    \mathbb{E}(|\mathcal{M}(x)-x|) &=&
    \sum_{\substack{i \leq j \\ k > j}} \Pr(L_j=i) \Pr(R_j=k) \big((\frac{B_{k}-x}{B_{k}-B_{i}}) (x-B_{i}) + (\frac{x-B_{i}}{B_{k}-B_{i}}) (B_{k}-x)\big)\\
     &=& \sum_{\substack{i \in [1, j] \\ k \in [j+1, m]}}\Pr(L_j=i)  \Pr(R_j=k) (\frac{2(x-B_{i})(B_{k}-x)}{B_{k}-B_{i}}) \\
     &\leq&  \sum_{\substack{i \in [1, j] \\ k \in [j+1, m]}}\Pr(L_j=i) \Pr(R_j=k) (\frac{B_{k}-B_{i}}{2}) \\
     &=&  \frac{1}{2}\mathbb{E}(B_{r}-B_{l}) \\ 
     &=& \frac{1}{2}\big(\mathbb{E}(B_{r}-B_{j+1}) + \mathbb{E}(B_{j+1}-B_{j}) + \mathbb{E}(B_{j}-B_{l})\big),
\end{eqnarray*}}
where $B_r$ is the random variable denoting the bin selected on the right, and $B_l$ is the random variable denoting the bin selected on the left.

Considering the process of selecting one bin from $n$ bins: $B_1, B_2, \cdots, B_n$ according to the selection probability $q_n(1), q_n(2), \cdots, q_n(n)$. Denote the expected distance between the output bin $B_{i}$ and $B_{n}$ as $\zeta_j$. $\zeta_n = \sum_{i \in [1,n]} q_n(i) (B_{n}-B_{i})$, which is the linear combination of selection probabilities when the value of bins are fixed. 
We know that $\mathbb{E}(B_{r}-B_{j+1}) = \zeta_{m-j}$, and $\mathbb{E}(B_{j}-B_{l}) = \zeta_{j}$.
Hence we obtain:
\begin{align*}
    \mathbb{E}(|\mathcal{M}(x)-x|) \leq \frac{1}{2} \big(\zeta_{m-j} + (B_{j+1}-B_{j}) + \zeta_{j}\big). \\
\end{align*}
\end{proof}

\begin{proof}
(of Theorem~\ref{thm:uniform}):

Assume that the position of bins are given (either uniformly or non-uniformly distributed), and the input $x \in [-c, c]$ follows uniform distribution, and the probability density function of $X$ is equal to $f_X(x) = \frac{1}{2c}$. Then, we find an upper bound  for $\mathbb{E}(|\mathcal{M}(X)-X|)$ using Lemma \ref{lemma:x_error} and law of total expectation as follows, 
{\small
\begin{align*}
     &\mathbb{E}(|\mathcal{M}(X)-X|) = \int_{-c}^c \frac{1}{2c}\mathbb{E}(|\mathcal{M}(x)-x|)dx \\  
     &= \int_{-c}^{B_s}\frac{1}{2c}\mathbb{E}(|\mathcal{M}(x)-x|)dx + \sum_{i=s}^{t-1}\int_{B_{i}}^{B_{i+1}}\frac{1}{2c}\mathbb{E}(|\mathcal{M}(x)-x|)dx +\int_{B_{t}}^{c}\frac{1}{2c}\mathbb{E}(|\mathcal{M}(x)-x|)dx \\
    &\leq  \frac{1}{2c} \big( (B_s+c)(\zeta_{m-s+1}+B_{s+1}-B_s+\zeta_{s-1})
    + \sum_{i=s}^{t-1} (B_{i+1}-B_{i})(\zeta_{m-i}+B_{i+1}-B_{i}+\zeta_{i}) 
    + (c-B_{t})(\zeta_{m-t}+B_{t+1}-B_{t}+\zeta_t) \big),
\end{align*}
}
 where $-c-\Delta \leq B_{s-1} < -c \leq B_s < B_{t} \leq c < B_{t+1}\leq c+\Delta $.
Discarding the constant terms, we can have the following objective function:
\begin{equation}
    \min_{q_j(i)} \sum_{s\leq n\leq  t+1} \big(\min(c, B_{n})-\max(-c, B_{n-1})\big) \big(\zeta_{n-1} + \zeta_{m-n+1}\big),
\end{equation}
where $\zeta_n$ is given in Lemma \ref{lemma:x_error} and Theorem~\ref{thm:uniform}.

\end{proof}

\begin{proof}
(of Lemma~\ref{lemma:pr_set}):

When $-c \leq B_i \leq c$ and $x \geq B_{i}$, according to \eqref{equ:output},  $p(x,i)=q_j(i)  {\sum}_{m\geq r \geq j+1} \left( q_{m-j}(m-r+1) \frac{B_{r}-x}{B_{r}-B_{i}}  \right)$. Since $\frac{B_{r}-x}{B_{r}-B_{i}} < 1$, we obtain that:

\begin{equation*}
    p(x,i) \leq q_j(i)  {\sum}_{m\geq r \geq j+1} q_{m-j}(m-r+1) = q_j(i). 
\end{equation*}

$\forall j \in [m], j \geq i$,  we assume $q_i(i) \geq q_{j}(i)$, hence we have $q_i(i) \in \overline{\mathcal{S}}_i$, where $\overline{\mathcal{S}}_i$ is as defined in Lemma~\ref{lemma:pr_set}. Similarly, we can prove that when $-c \leq B_i \leq c$ and $x < B_{i}$, $q_{m+1-i}(m+1-i) \in \overline{\mathcal{S}}_i$.

If $B_{i} \leq x$, then $\forall x, x^\prime \in [B_k, B_{k+1}), x \leq x^\prime$, we have $p(x,i) \geq p(x^\prime,i)$. This indicates that $p(x,i)$ is monotonic between each interval divided by bins (e.g., $[-c, B_{i})$, $[B_{i}, B_{i+1})$, or $[B_{i}, c)$), and is decreasing as $x$ is moving farther from $B_{i}$. We can also prove this when $x < B_i$. Hence if $B_i < -c$, $\max p(x,i)$ is achieved only when $x=-c$ or $x=B_{k}$ $(-c \leq B_{k} < c)$. If $B_{i} > c$, then $\max p(x,i)$ is achieved only when $x=c$ or $x=B_{k}$ $(-c \leq B_{k} < c)$. 
Similarly, $\min p(x,i)$ is achieved only when $x$ is approaching the position of bins $(B_{k})$, or locating at the edge ($c$ or $-c$) which is farther from $B_i$. 

\end{proof}

\begin{proof}
(of Theorem~\ref{thm:constraint2}):

According to Lemma~\ref{lemma:pr_set}, when $B_{i} \leq -c$, we have:

{ 
\begin{equation*}
\max_x p(x,i) \in \{p(-c,i)\} \cup \{p(B_{k},i)\} (k \in [m], -c \leq B_{k} \leq c).
\end{equation*}}

According to \eqref{equ:output}, when $B_{i} \leq x$:

{ 
\begin{equation*}
    p(x,i) = q_j(i)  {\sum}_{m\geq r \geq j+1} \left( q_{m-j}(m-r+1) \frac{B_{r}-x}{B_{r}-B_{i}}  \right) .
\end{equation*}}

Hence we can get:

{ 
\begin{equation*}
    p(B_{k},i) = q_k(i)  {\sum}_{m\geq r \geq k+1} \left( q_{m-k}(m-r+1) \frac{B_{r}-B_{k}}{B_{r}-B_{i}}  \right) .
\end{equation*}}

{ 
\begin{equation*}
    p(B_{k+1},i) = q_{k+1}(i)  {\sum}_{m\geq r \geq k+2} \left( q_{m-k-1}(m-r+1) \frac{B_{r}-B_{k+1}}{B_{r}-B_{i}}  \right) .
\end{equation*}}

Assume that $\forall i,j \in [m], i \leq j$:

{ 
\begin{equation}\label{equ:extra1}
    q_j(i) \geq q_{j+1}(i) , 
\end{equation}}

and $\forall k,r \in [m], s \leq k \leq t, r > k+1$ ($s$ and $t$ are as defined in Theorem~\ref{thm:uniform}), we assume:

\begin{equation*}
    q_{m-k}(m-r+1) \cdot (B_{r}-B_{k}) \geq q_{m-k-1}(m-r+1) \cdot (B_{r}-B_{k+1}),
\end{equation*}

then we get: 

{ 
\begin{equation}\label{equ:extra2}
    p(B_{k},i) \geq p(B_{k+1},i).
\end{equation}}

From \eqref{equ:output}, we can also know that:

{ 
\begin{equation*}
    p(x,i-1) = q_j(i-1)  {\sum}_{m\geq r \geq j+1} \left( q_{m-j}(m-r+1) \frac{B_{r}-x}{B_{r}-B_{i-1}}  \right) .
\end{equation*}}

Assume that $\forall i,j \in [m], i \leq j$, we have:
\begin{equation}\label{equ:extra3}
    q_j(i-1) \leq q_j(i),
\end{equation}

hence we can know that:

{ 
\begin{equation}\label{equ:extra4}
    p(x,i-1) \leq p(x, i). 
\end{equation}}

Through \eqref{equ:extra2}, \eqref{equ:extra4}, and Lemma~\ref{lemma:pr_set}, we can know that when $B_{i} < -c$, we have $p(-c, s-1) \in \overline{\mathcal{S}}_i$, where $\overline{\mathcal{S}}_i$ is as defined in Lemma~\ref{lemma:pr_set}, $s$ is as defined in Theorem~\ref{thm:uniform}.

When $-c \leq B_{i} \leq c$, we have $\max_x p(x,i) \in \{q_i(i), q_{m+1-i}(m+1-i)\}$. Assume that $\forall i \in [m], q_{i}(i) \geq q_{i+1}(i+1)$, we have $q_{s}(s) \in \overline{\mathcal{S}}_i$. Now we have $\overline{\mathcal{S}}_i = \{ p(-c, s-1), q_{s}(s) \}$.

According to Lemma~\ref{lemma:pr_set} and \eqref{equ:extra4}, $\min_{x,i} p(x,i) \in \{ \lim_{x \to B_{k}} p(x,1) |-c\leq B_{k} \leq c\} \cup \{p(c,1)\}$. We have:

{ 
\begin{equation}
    \lim_{x \to B_{k}}p(x,1)
    =  q_{k-1}(1)  {\sum}_{r \in [k+1, m]}\bigg( q_{m-k+1}(m-r+1) \frac{B_{r}-B_{k}}{B_{r}-B_{1}} \bigg) ,
\end{equation}}

{ 
\begin{equation}
    \lim_{x \to B_{k+1}}p(x,1)   
    =  q_{k}(1)  {\sum}_{r \in [k+2, m]}\bigg( q_{m-k}(m-r+1) \frac{B_{r}-B_{k+1}}{B_{r}-B_{1}} \bigg) .
\end{equation}}

According to \eqref{equ:extra1}, we have $q_{k-1}(j) \geq q_{k}(j)$, hence by requiring that for any $r, k  \in [m], s \leq k \leq t, r > k+1$:

{ 
\begin{equation}\label{equ:extra7}
    q_{m-k+1}(m-r+1)(B_{r}-B_{k}) \geq q_{m-k}(m-r+1)(B_{r}-B_{k+1}) ,
\end{equation}}

we obtain:

{ 
\begin{equation}\label{equ:extra7}
    \lim_{x \to B_{k}}p(x,1) >  \lim_{x \to B_{k+1}}p(x,1).
\end{equation}}

Combining \eqref{equ:extra7} with Lemma~\ref{lemma:pr_set}, we can know that:

\begin{align}
    \min_x p(x,i) \in \{ \lim_{x \to B_t} p(x,1), p(c,1)\}.
\end{align}

\end{proof}

\section{Experimental details}\label{exp_detail}

The hyperparameters used in each experiment are given as follows.

\begin{minipage}[t]{0.25\textwidth}

\begin{table}[H]\small
\setlength\tabcolsep{1pt}
    \centering
    \begin{tabular}{cc}
         \toprule
        Hyperparameter & Value \\
        \midrule
        OPTM bins & [-6.00, -0.40, 0.40, 6.00] \\
        MVU bins & [-4.34, -3.60, 3.60, 4.34] \\
        \bottomrule
    \end{tabular}
    \caption{Hyperparameters for scalar inputs when $\epsilon$ = 0.5}
    \label{tab:hp_eps0.5}
\end{table}
\end{minipage}
\qquad
\qquad
\begin{minipage}[t]{0.25\textwidth}
    \begin{table}[H]\small
    \setlength\tabcolsep{1pt}
    \centering
    \begin{tabular}{cc}
    \toprule
    Hyperparameter & Value \\
    \midrule
        OPTM bins & [-3.00, -0.50, 0.50, 3.00] \\
        MVU bins & [-2.42, -1.69, 1.69, 2.42] \\
        ERM $\gamma$ & 0.026 \\
        ERM bins & [-5.10, -0.10, 0.10, 5.10] \\
        RQM $q$ & 0.220 \\
        RQM bins & [-2.70, -0.90, 0.90, 2.70] \\
   \bottomrule
\end{tabular}
    \caption{Hyperparameters for scalar inputs when $\epsilon$ = 1.0}
    \label{tab:hp_eps1.0}
\end{table}
\end{minipage}
\qquad
\qquad
\begin{minipage}[t]{0.25\textwidth}
    \begin{table}[H]\small
    \setlength\tabcolsep{1pt}
    \centering
    \begin{tabular}{cc}
        \toprule
        Hyperparameter & Value \\
        \midrule
        OPTM bins & [-3.00, -0.50, 0.50, 3.00] \\
        MVU bins & [-1.83, -1.11, 1.11, 1.83] \\
        ERM $\gamma$ & 0.043 \\
        ERM bins & [-2.70, -0.40, 0.40, 2.70] \\
        RQM $q$ & 0.498 \\
        RQM bins & [-2.60, -0.87, 0.87, 2.60] \\
        \bottomrule
    \end{tabular}
    \caption{Hyperparameters for scalar inputs when $\epsilon$ = 1.5}
    \label{tab:hp_eps1.5}
\end{table}
\end{minipage}

\begin{minipage}[t]{0.25\textwidth}
    \begin{table}[H]\small
    \setlength\tabcolsep{1pt}
    \centering
    \begin{tabular}{cc}
    \toprule
    Hyperparameter & Value \\
    \midrule
        OPTM bins & [-4.00, 0.20, 0.60, 4.00] \\
        MVU bins & [-2.42, -1.69, 1.69, 2.42] \\
        RQM $q$ & 0.220 \\
        RQM bins & [-2.70, -0.90, 0.90, 2.70] \\
   \bottomrule
\end{tabular}
    \caption{Hyperparameters for truncated Gaussian distribution}
    \label{tab:hp_gauss}
\end{table}
\end{minipage}
\qquad
\qquad
\begin{minipage}[t]{0.25\textwidth}
\begin{table}[H]\small
\setlength\tabcolsep{1pt}
    \centering
    \begin{tabular}{cc}
         \toprule
        Hyperparameter & Value \\
        \midrule
        OPTM bins & [-3, -0.5, 0.5, 3] \\
        RQM bins & [-3, -1, 1, 3] \\
        \bottomrule
    \end{tabular}
    \caption{Hyperparameters for vector inputs}
    \label{tab:dp_vector}
\end{table}
\end{minipage}
\qquad
\begin{minipage}[t]{0.25\textwidth}
    \begin{table}[H]\small
    \setlength\tabcolsep{1pt}
    \centering
    \begin{tabular}{cc}
    \toprule
    Hyperparameter & Value \\
    \midrule
    Batch size & 8 \\
    DP budget $\epsilon$ & 1 \\
    Gradient norm clip & 0.1 \\
    OPTM bins & [-2.2, -0.4, 0.4, 2.2] \\
    RQM bins & [-2.7, -0.9, 0.9, 2.7] \\
    RQM $q$ & 0.22 \\
   \bottomrule
\end{tabular}
    \caption{Hyperparameters for DP-SGD on Breast Cancer dataset}
    \label{tab:dp_cancer}
\end{table}
\end{minipage}

\begin{minipage}[t]{0.25\textwidth}
    \begin{table}[H]\small
    \setlength\tabcolsep{1pt}
    \centering
    \begin{tabular}{cc}
        \toprule
        Hyperparameter & Value \\
        \midrule
        Batch size & 32 \\
        DP budget $\epsilon$ & 1 \\
        Gradient norm clip & 0.01 \\
        OPTM bins & [-2.6, -0.4, 0.4, 2.6] \\
        RQM bins & [-2.7, -0.9, 0.9, 2.7] \\
        RQM $q$ & 0.22 \\
        \bottomrule
    \end{tabular}
    \caption{Hyperparameters for DP-SGD on MNIST}
    \label{tab:dp_mnist}
\end{table}
\end{minipage}


