\section{Weak to Strong Classification in LLP} \label{sec:weak_to_strong_alg}

Given $\alpha, \eps > 0$ we set $t$ to be $\frac{32}{\eps}\left(\frac{C_0}{\alpha}\right)^2$ where $C_0 > 0$ is an absolute constant to be decided. 
We begin by defining in Fig. \ref{algo:DistnDbar} a distribution $\ol{D}$ over bags $(\ol{B}, \ol{\sigma})$ where $\ol{B}$ is the union of at most $t$ bags from $\mc{B}$ and $\ol{\sigma}$ is the sum of their aggregate labels. 

\begin{figure}[!htb]
\begin{mdframed}
\small
\textbf{Input:} : Bags $\mc{B}$, $t$.\\
\textbf{Steps:}
\begin{enumerate}[leftmargin=*]
    \item Independently for $i = 1, \dots, t$, let $\mc{P}_i = (B_i, \sigma_i)$ where $(B_i, \sigma_i)$ is sampled u.a.r. from $\mc{B}$.
    \item Independently for $i = 1, \dots, t$: set $\mc{Q}_i = \mc{P}_i$ w.p. $1/2$ and set $\mc{Q}_i = \star$ w.p. $1/2$.
    \item Output $(\ol{B}, \ol{\sigma})$ where
    \begin{equation}
    \displaystyle \ol{B} = \underset{\{i\,\mid\,\mc{Q}_i = (B_i, \sigma_i) \neq \star\}}{\bigcup} B_i, \ \ \ol{\sigma} = \underset{\{i\,\mid,\mc{Q}_i = (B_i, \sigma_i) \neq \star\}}{\sum}\sigma_i
    \end{equation}\label{eqn:olB}
\end{enumerate}
\end{mdframed}
\caption{Distribution $\ol{D}$.}\label{algo:DistnDbar}
\end{figure}

To aid our subsequent analysis we shall use the following straightforward lemma.
\begin{lemma}\label{lem:chernofappl}
   For $\kappa \in [0,1]$ and any subset $\mc{S} \subseteq \mc{B}$ s.t. $|\mc{S}| \geq \kappa |\mc{B}|$, in Step 1. of Fig. \ref{algo:DistnDbar}, $\Pr\left[|\{i\,\mid\,(B_i, \sigma_i) \in \mc{S})\}| < \kappa t/2\right] \leq \tn{exp}(-\kappa t/8)$.
\end{lemma}
\begin{proof}
    Since each $(B_i, \sigma_i)$ independently belongs to $\mc{S}$ w.p. $\kappa$,  $\Pr[(B_i, \sigma_i) \in \mc{S}] \geq \kappa$ and therefore $\mu := \E\left[|\{i\,\mid\,(B_i, \sigma_i) \in \mc{S})\}| \right] \geq \kappa t$. Thus, $\Pr\left[|\{i\,\mid\,(B_i, \sigma_i) \in \mc{S})\}| < \kappa t/2\right] \leq \Pr\left[|\{i\,\mid\,(B_i, \sigma_i) \in \mc{S})\}| < \mu/2\right] \leq \tn{exp}(-\mu/8) \leq \tn{exp}(-\kappa t/8)$, where we use the Chernoff Tail Bound (Lemma \ref{lemma:chernoff_bounds}) using $\eta = 1/2$ and the lower bound of $\kappa t$ for $\mu$.
\end{proof}

\subsection{Analysis for a fixed classifier $h$}
We prove the following lemma.
\begin{lemma}\label{lem:errorampl}
    Let $h: \bm{\mc{X}} \to \{0,1\}$ be a classifier such that $h$ has accuracy $< (1- \zeta)$ on $\mc{B}$. Then, 
$$\Pr_{(\ol{B}, \ol{\sigma}) \leftarrow \ol{D}}\left[\sum_{\bx \in \ol{B}}h(\bx) = \ol{\sigma} \right] \leq C_0/\sqrt{\zeta t} + \tn{exp}(-\zeta t/8)$$
for some absolute constant $C_0 > 0$.
\end{lemma}
\begin{proof}
    Let $\mc{B}_{\tn{err}}$ be the \emph{error} bags $(B, \sigma) \in \mc{B}$ on which $\sum_{\bx \in B} h(\bx) \neq \sigma$, so that $|\mc{B}_{\tn{err}}| \geq \zeta |\mc{B}|$. For convenience, we shall abuse the notation $h(B)$ to denote $\sum_{\bx \in B}h(\bx)$, and therefore, for an error bag $B$, $\left|h(B) - \sigma\right| \geq 1$. Depending on the choices in Step 1. of Fig. \ref{algo:DistnDbar}, define the set $I := \{i\,\mid\,(B_i, \sigma_i) \in \mc{B}_{\tn{err}})\}$ and let $E_0$ be the event that the following occurs: $\left\{ |I| \geq \zeta t/2\right\}$. Further, let $E_1$ be the event that the LHS of the following equivalence occurs:
\begin{equation}
        h(\ol{B}) = \bar{\sigma} \Leftrightarrow \underset{\{i\,\mid,\mc{Q}_i = (B_i, \sigma_i) \neq \star\}}{\sum}\left(h(B_i) - \sigma_i\right) = 0 \label{eq:E1event}
\end{equation}
where $(\ol{B}, \ol{\sigma})$ is the output in Step 3. Now, 
\begin{eqnarray}
\Pr[E_1] = & \Pr[E_1 | E_0]\Pr[E_0] + \Pr[E_1 | \neg E_0]\Pr[\neg E_0] \nonumber \\ \leq & \Pr[E_1 | E_0] + \Pr[\neg E_0] \nonumber
\end{eqnarray}
Since $|\mc{B}_{\tn{err}}| \geq \zeta |\mc{B}|$, Lemma \ref{lem:chernofappl} yields that $\Pr[\neg E_0] \leq \tn{exp}(-\zeta t/8)$. On the other hand, fix the set $I$ and bags $\{(B_i, \sigma_i)\}_{i\in I}$ and let $a_i := h(B_i) - \sigma_i$ ($i = 1, \dots t$). Defining $\{X_i\,\mid\, i \in I\}$ to be i.i.d $\{0,1\}$-valued Bernoulli random variables which are $1$ w.p. $1/2$, we obtain that $\Pr[E_1] = \Pr[\sum_{i \in I}a_iX_i = 0] \leq C/\sqrt{|I|}$ by applying Lemma \ref{lemma:littlewood_offord}. Therefore, $\Pr[E_1 | E_0] \leq C/\sqrt{(\zeta /2)t}$  and using the above bounds, $\Pr[E_1] \leq  C/\sqrt{(\zeta/2) t} + \tn{exp}(-\zeta t/8)$. %
\end{proof}

\subsection{Deterministic algorithm $\mc{A}_1$}\label{sec:A_1}
\begin{figure}[!htb]
\begin{mdframed}
\small
\textbf{Input:} : Bags $\mc{B}$, $k = \max_{(B,\sigma) \in \mc{B}} |B|$, $\alpha > 0$, $t$, oracle $\mc{O}_{kt, \alpha}$.\\
\textbf{Steps:}
\begin{enumerate}
    \item Let ${\sf supp}(\ol{D})$ be the support of $\ol{\mc{D}}$ (Fig. \ref{algo:DistnDbar}), and for each $(\ol{B}, \ol{\sigma}) \in {\sf supp}(\ol{D})$ let its weight $w_{(\ol{B}, \ol{\sigma})}$ be its probability under $\ol{D}$. Let $\ol{\mc{B}}$ be ${\sf supp}(\ol{D})$ with weights  $w_{(\ol{B}, \ol{\sigma})}$. 
    \item Output the classifier $h^*$ given by  $\mc{O}_{kt,\alpha}(\ol{\mc{B}})$.
\end{enumerate}
\end{mdframed}
\caption{Algorithm $\mc{A}_1$.}\label{algo:A1}
\end{figure}
Figure \ref{algo:A1} describes algorithm $\mc{A}_1$ using\footnote{We include in Appendix \ref{sec:suppD} an explanation on computing the probabilities under $\ol{D}$.} the distribution $\ol{D}$ defined in Figure \ref{algo:DistnDbar}. Suppose for a contradiction that the output $h^*$ of $\mc{A}_1$ has accuracy $< (1- \eps)$ on $\mc{B}$. Then, from Lemma \ref{lem:errorampl} we obtain that the probability that $(\ol{B}, \ol{\sigma})$ sampled from $\mc{D}$ is satisfied by $h^*$ is at most $C_0/\sqrt{\eps t} + \tn{exp}(-\eps t/8)$ which -- upon plugging in the value of $t$ -- is at most $\alpha/2$ which contradicts the accuracy of $h^*$ on $\ol{\mc{B}}$.

We next describe a more efficient, albeit randomized, variant of the algorithm.











