\documentclass[accepted]{uai2023} % for initial submission
% \documentclass[accepted]{uai2023} % after acceptance, for a revised
                                    % version; also before submission to
                                    % see how the non-anonymous paper
                                    % would look like
%% There is a class option to choose the math font
% \documentclass[mathfont=ptmx]{uai2023} % ptmx math instead of Computer
                                         % Modern (has noticable issues)
% \documentclass[mathfont=newtx]{uai2023} % newtx fonts (improves upon
                                          % ptmx; less tested, no support)
% NOTE: Only keep *one* line above as appropriate, as it will be replaced
%       automatically for papers to be published. Do not make any other
%       change above this note for an accepted version.

%% Choose your variant of English; be consistent
\usepackage[american]{babel}
% \usepackage[british]{babel}

%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
    \bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams
\usepackage{amsmath,amsfonts}
\usepackage{IEEEtrantools}
\usepackage{amsthm}
\usepackage{graphicx}
\usepackage{tikz}
\usetikzlibrary{positioning,chains,fit,shapes,calc}
\usetikzlibrary{arrows}
\usepackage{algorithm}
\usepackage{algorithmic}
% \usepackage{algpseudocode} %%% NOT COMPATIBLE WITH algorithmic package
\usepackage{xspace}
\usepackage{pgfplots}
\usepackage{pgfplotstable}
\usepackage{comment}
% \usepackage[compact]{titlesec}
%\newcomand{\comment}[1]{}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%         COMMANDS specific to the paper
\newtheorem{problem}{Problem}
\newtheorem{observation}{\textsc{Observation}}
\newtheorem{theorem}{\textsc{Theorem}}
\newtheorem{lemma}{\textsc{Lemma}}
\newtheorem{corollary}{\textsc{Corollary}}
\newtheorem{remark}{\textsc{Remark}}
\newtheorem{claim}{\textsc{Claim}}
\DeclareMathOperator*{\argmax}{arg\,max}
\DeclareMathOperator*{\argmin}{arg\,min}
\newcommand{\greedy}{\textsc{Greedy}\xspace}
\newcommand{\mcge}{{\em Maximum Constrained Gain Element }}
\newcommand{\mcg} {{\em Maximum Constrained Gain }}
\newcommand{\MCGE}{{\rm MCGE}}
\newcommand{\MCG}{{\rm MCG}}
\newcommand{\OPT}{{\rm OPT}}
\newcommand{\NP}{{\rm NP}}
\newcommand{\OptimalS}{{\rm OptimalSeq}}
\newcommand{\SCSKC}{{\sc SCSK-C}\xspace}
\newcommand{\DiffC}{{\sc Diff-C}\xspace}
\newcommand{\SCSK}{{\sc SCSK}\xspace}
\newcommand{\Diff}{{\sc Diff}\xspace}
\newcommand{\lineara}{{\sc Linear-Approx}\xspace}
\newcommand{\loga}{{\sc Log-Approx}\xspace}
%% Provided macros
% \smaller: Because the class footnote size is essentially LaTeX's \small,
%           redefining \footnotesize, we provide the original \footnotesize
%           using this macro.
%           (Use only sparingly, e.g., in drawings, as it is quite small.)

%% Self-defined macros
\newcommand{\swap}[3][-]{#3#1#2} % just an example

\title{Maximizing Submodular Functions under Submodular Constraints}

% The standard author block has changed for UAI 2023 to provide
% more space for long author lists and allow for complex affiliations
%
% All author information is authomatically removed by the class for the
% anonymous submission version of your paper, so you can already add your
% information below.
%
% Add authors
\author[1]{\href{mailto:<madhavrp@iastate.edu>?Subject=Maximizing Submodular Functions under Submodular Constraints}{Madhavan R. Padmanabhan}{}}

\author[1]{\href{mailto:<yanhui@iastate.edu>?Subject=Maximizing Submodular Functions under Submodular Constraints}{Yanhui Zhu}{}}

\author[1]{\href{mailto:<sbasu@iastate.edu>?Subject=Maximizing Submodular Functions under Submodular Constraints}{Samik Basu}{}}

\author[1]{\href{mailto:<pavan@iastate.edu>?Subject=Maximizing Submodular Functions under Submodular Constraints}{A. Pavan}{}}


% \author[1]{Madhavan R. Padmanabhan}
% \author[1]{Yanhui Zhu}
% \author[1]{Samik Basu}
% \author[1]{A. Pavan}
% Add affiliations after the authors
\affil[1]{%
    Department of Computer Science\\

    Iowa State University\\
    
    Ames, Iowa, USA
}
% \affil[2]{%
%     Second Affiliation\\
%     Address\\
%     …
% }
% \affil[3]{%
%     Another Affiliation\\
%     Address\\
%     …
%   }
  
  \begin{document}
\maketitle

\begin{abstract}
  
We consider the problem of maximizing submodular functions under submodular constraints by formulating the problem in two ways: \SCSKC and \DiffC. Given two submodular functions $f$ and $g$ where $f$ is monotone, the objective of \SCSKC problem is to find a  set $S$ of size at most $k$  that maximizes $f(S)$ under the constraint that $g(S)\leq \theta$, for a given value of $\theta$.
The problem of \DiffC focuses on finding a set $S$ of size at most $k$ such that $h(S) = f(S)-g(S)$ is maximized. It is known that these problems are highly inapproximable and do not admit any constant factor multiplicative approximation algorithms unless NP is easy. Known approximation algorithms involve  data-dependent approximation factors that are not efficiently computable.

We initiate a study of the design of approximation algorithms where the approximation factors are efficiently computable. For the problem of \SCSKC, we prove that the greedy algorithm produces a solution whose value is at least $(1-1/e)f(\OPT) - A$, where $A$ is the data-dependent additive error. For the \DiffC problem, we design an algorithm that uses the \SCSKC greedy algorithm as a subroutine. This algorithm produces a solution whose value is at least $(1-1/e)h(\OPT)-B$, where $B$ is also a data-dependent additive error. A salient feature of our approach is that the additive error terms can be computed efficiently, thus enabling us to ascertain the quality of the solutions produced.

\end{abstract}

% \input{intro.tex}
\section{Introduction}
\label{sec:intro}

%% Several optimization problems possess a very interesting property
%% known as {\em submodularity}.

For a ground set $V$ of size $n$, a function $f:2^V\rightarrow \mathbb{R}$ is {\em
  submodular} if for every $S \subseteq T \subseteq V$, and for every
$x \in V - T$, $f(S\cup \{x\}) - f(S) \geq f(T \cup \{x\}) - f(T)$. I.e.,
the gain in the value of the function when $x$ is added to $S$ is at
least the gain when $x$ is added to a superset of $S$. 
\begin{comment}
Optimization
problems involving such submodular functions naturally arise in a wide
variety of application domains, such as machine
learning~\cite{Krause:JMLR08,Bach13}, sensor
placement~\cite{Krause:JMLR08,KrauseGGK06}, information
diffusion~\cite{Kempe:kdd03}, video and image
processing~\cite{BoykovJ01,KohliKT09,Jegelka:CVPR11},
document summarization~\cite{LinB11}, speech
processing~\cite{WeiLKB13,Lin:Interspeech09}, 
SAT-solvers~\cite{StreeterG08}, and combinatorial
auctions~\cite{Vondrak08,DobzinskiNS10}.
Submodular functions can be viewed as a discrete analog of convex
functions~\cite{Lovasz:MP83}. Even though various submodular
optimization problems are known to be NP-hard, quite a few variants
admit approximation algorithms and are amenable to rigorous
theoretical analysis.
\end{comment}
Optimizing submodular functions under various constraints has been studied
extensively. These problems are of the following form: For a
submodular function $f$, \emph{find a set $S \subseteq V$ that
  maximizes $f(S)$ subject to the constraint that $S
  \in \mathcal{F}$, where $\mathcal{F}$ is a family of sets}. A few of the
well-studied constraints are {\em cardinality constraint}, {\em
  knapsack/modular constraint}, and {\em matroid constraints}. Even for the  least restrictive constraint, cardinality constraint, the problem is known to be
NP-hard. The classical work of Nemhauser {\it et al.}  showed that a
greedy algorithm achieves a $(1-1/e)$ approximation ratio if the
submodular function $f$ is monotone~\cite{nemhauser:MP78}.  

\noindent
\textbf{Submodular Constraints.\ } Often, in submodular maximization problems, there is a conflicting minimization constraint.  The generic nature of these problems is of the following form: Given  a submodular function $f$, another function $g$, 
%and a natural number $k$ 
find a set $S$ of size at most  $k$ that {\em maximizes $f(S)$, while minimizing $g(S)$}.  In this work, we study the case where the function {\em $g$ is also a submodular function}. The problem of maximizing a submodular function under a submodular constraint appears in a few application domains. The works of~\cite{Iyer:UAI2012,Iyer:NIPS13} discuss several scenarios  where these problems arise naturally.  These application domains include  sensor placement, speech data set selection, probabilistic inference, and information diffusion~\cite{Kempe:kdd03,Lin:Interspeech09,Lin:Interspeech11,KSG08,JB11}.



\begin{comment}
{\em Sensor Placement.} Consider a location that needs to be under
surveillance (e.g., collecting weather-related data or monitoring
wildfire) using sensors. Each sensor, placed at a location, can cover a
certain area of the region. The function $f(A)$, which describes the
area covered by the sensor placed at location $A$, is submodular.
The goal is to place  at most $k$ sensors at locations that maximize coverage
area. However, we often have budgetary constraints that require
minimizing the cost of the sensors deployed. In many real-world
scenarios, the cost function $g$ of the sensors is submodular as bulk-cost is often
less than the sum of the cost of the individual entities.
Thus the goal is to find a set $A$ of $k$-sensor locations while minimizing the  cost function $g$.


%Furthermore,
%there may be restrictions on the total number of sensors that can be
%deployed (e.g., due to lack of biodegradability of sensor-equipments
%and its negative environmental impact). Thus, in this scenario, the
%problem is SCSK-C: maximize the submodular function $f$ (capturing the
%coverage), subject to minimizing the submodular cost function and
%satisfying the cardinality constraint (capturing the number of
%sensors).

{\em Information Diffusion.}  The diffusion of information in a social
network under various probabilistic diffusion models is captured as a
submodular function~\cite{Kempe:kdd03}. For a (seed) set $X$, if
information originates from $X$, the expected number of users in the
network who receives (or potentially get influenced by) the
information is captured by the submodular function $f(X)$. In the
context of advertising, it is profitable to identify a set $X$ of nodes, termed {\em social influencers}, for which the $f(X)$ is maximal, i.e., the
information related to the item that is being advertised reaches a large
proportion of the population in the network. On the other hand, there
is also some cost ($g(X)$) associated with engaging social
influencers in propagating information. Often it is the case that a friend of social influencer is more likely  to be  engaged at a lower cost and as a result, such a cost function could be
submodular. At the same time, it is desirable not to have a large set of social influencers.  Therefore, in a real-world scenario, the objective is to
maximize the value of the submodular function describing the expected
reach of advertisement subject to the  while minimizing the cost of engaging the social influencers engaged in the process. 

%budgetary constraints on the
%cost (submodular function) and number (cardinality) of influencers
%engaged in the process~\cite{Crawford:ICML19}. This problem is
%again cast as an instance of SCSK-C.  
%% {\bf not sure I Understand this sentence.  In certain
%% scenarios, it is also important to ``target'' the spread--target
%% customers of high-end wood-working tools are professional wood-workers
%% rather than casual weekend wood-working
%% hobbyists~\cite{Padmanabhan:Bigdata18}}. In this scenario, the problem
%% of maximizing the influence over the targets while keeping the
%% influence over the non-targets within certain limit subject to the
%% constraint over modular cost function for engaging social influencers
%% corresponds to SCSK-C as well.


%% However, it is also important to target the spread---target
%% customers of high-end wood-working tools are professional wood-workers
%% rather than casual weekend wood-working hobbyists.  The entities are
%% $X$ are referred to as social influencers.  Finally, there is often
%% some cost ($c(X)$) associated with engaging social influencers in
%% propagating information. Therefore, in real-world scenario, the objective
%% is to maximize the submodular function describing the expected number
%% of target users being influenced while (a) satisfying the constraint
%% over submodular function describing the number of non-target users
%% being influenced and (b) keeping the cost of engaging social influencers
%% within budget~\cite{Padmanabhan:Bigdata18}.  

{\em Speech Data Selection.} Consider the existence of a large
data-set of conversations, which includes utterances and mapping of
utterances to the words. For the evaluation of automated
speech recognition systems, it is desirable to produce a small
(cardinality constraint: the size of the set) representative subset of the
data-set, which is rich on one hand and does not suffer from
over-representation on the other. Motivated by this objective Lin and
Bilmes~\cite{Lin:Interspeech09,Lin:Interspeech11} have presented
techniques that rely on maximizing the richness by selecting the
representative utterances while limiting the mapping of selected
utterances to words, thus reducing over-representation. The function
capturing the notion of richness can be described using the facility
location function, which is submodular (say, $f(U)$); while the
mapping of utterances to words denotes a bipartite graph relationship,
another submodular function (say, $g(U)$).  Again, here the goal is to
to find a subset of utterances $U$, such that $|U|\leq k$ such that $f(U)$ is maximized while limiting the value of $g(U)$.
\end{comment}

\smallskip
\noindent {\bf \SCSKC and \DiffC.} Two of the standard ways to formalize the above-mentioned maximization-minimization problem is via introducing a submodular constraint~\cite{Iyer:NIPS13,Crawford:ICML19,Wan:COA10} and as maximizing the difference between submodular functions~\cite{Iyer:UAI2012, Narasimhan:UAI2005,JinYYSHX21, KawaharaW11}. The \emph{Submodular Cost Submodular Knapsack (SCSK)} is as follows: given two
non-negative, submodular functions $f$ and $g$ over a ground set
such that $f$ is monotone and a value $\theta$, the goal is to find a set $S$ that
maximizes $f(S)$ subject to the constraint that $g(S) \leq \theta$. The Diff problem is the following: Given non-negative, submodular functions $f$ and $g$ where $f$ is monotone, find a set $S$ such that $f(S) - g(S)$ is maximized. In this paper, we will consider  generalizations of these problems called \SCSKC and \DiffC. The \SCSKC problem is to find set $S$ that maximizes $f$ subject to the constraint $g(S) \leq \theta$ and $|S| \leq k$. Similarly, \DiffC problem is to find a set $S$ that maximizes the function $f-g$ such that $|S| \leq k$. 
In \SCSKC setting, we will refer to $g(X)\leq \theta$
 as submodularity budget constraint and $\theta$ as
 the submodular budget. In this work, we address the problem of obtaining approximation algorithms for these problems whose {\em approximation factors can be efficiently computed}.

 
\noindent
\textbf{Data Dependent Approximations.} Unfortunately, for both \SCSK and \Diff (and hence for both \SCSKC and \DiffC) it is known that efficient approximation algorithms are not feasible. From the work of Iyer and Bilmes~\cite{Iyer:NIPS13} it follows that if P does not equal NP, then \SCSK does not admit even admit $1/n^{1/2-\epsilon}$-multiplicative approximation algorithms, for any $\epsilon >0$. Narasimhan and Bilmes~\cite{Narasimhan:UAI2005} showed that every set function can be represented as the difference between two submodular functions, and thus  \Diff is inapproximable~\cite{Narasimhan:UAI2005, Iyer:UAI2012}.
Given the high inapproximability of these problems, it seems that there is no hope of theoretically analyzing the quality of the solutions produced by efficient algorithms for these problems. However, it turns out that {\em data-dependent approximation} guarantees can be obtained for the \SCSKC problem.

A $d$-multiplicative approximation for submodular maximization produces a solution $S$ such that $f(S) \geq d \times f(OPT)$, where $OPT$ is the optimal solution. Typically the value $d$ is independent of the actual function $f$ that is being maximized. This is either a universal constant (such as $1/2$ or $(1-1/e)$) or depends on the size of the ground set $V$.  On the contrary, algorithms with data-dependent approximation guarantees have the following flavor: For a given function $f$, the value of the solution produced by the algorithm is at least $d_f \times OPT$, where $d_f$ depends on the function $f$ that we seek to maximize and the constraints.

For the problem of \SCSKC, \cite{Iyer:NIPS13,IJB13} presented data-dependent
approximation algorithms, However, we  observe that this data-dependent approximation factor is NP-hard to compute. Given this, it is hard to judge the quality of the solution produced by the approximation algorithm (i.e., how close it is to the optimal solution).   We discuss this issue in detail in Section~\ref{sec:hardness}.  For the problem of \DiffC, to the best of our knowledge, no data-dependent approximation bounds have been established. The works of~\cite{Iyer:UAI2012,Narasimhan:UAI2005} provide a heuristic approach to solve the problems of maximizing the difference between two submodular functions $f$ and $g$ by replacing $g$ with a surrogate modular function $g'$ that bounds $g$ and instead maximize $f-g'$, which is submodular. 



Our thesis is that {\em data-dependent approximation factors are  more meaningful only when they can be computed efficiently}. Motivated by this, we explore the possibility of designing algorithms with {\em efficiently computable} data-dependent approximation factors.



\noindent\textbf{Our Contributions.} 
To achieve efficiently computable approximation factors,  we study the notion of {\em multiplicative-additive error approximation} algorithms. We say that $\mathcal{A}$ is a $(d, A)$-multiplicative-additive
approximation algorithm for the problem maximization problem
if the $output~ of~ \mathcal{A} \geq
d\times f(OPT)-A$. We refer to $d$ as {\em multiplicative factor} and $A$ as {\em additive error}.

{\em Algorithms with Efficiently Computable Approximation Factors for \SCSKC.}  We first consider the standard greedy algorithm for \SCSKC. We establish a new guarantee on the quality of the solution produced by the solution. Namely, we prove that if $S$ is the solution produced, then $f(S) \geq (1-1/e)f(OPT)-A_{fg}$. Here the additive error $A_{fg}$ is the data-dependent factor that depends on $f$ and $g$. A hallmark of our proof and analysis is that $A_{fg}$ can be computed while running the greedy algorithm with very little overhead, thus making the computation of $A_{fg}$ efficient. Combining this proof with ideas from~\cite{ConfortiC84}, we refine the multiplicative error to $\frac{1}{c_f}(1- (1-\frac{c_f}{k})^k)$, while keeping the  additive error same. We remark that while our proofs start with the standard arguments known in the literature, there are critical departure points. The main contribution in the proofs is conceptual rather than technical, which enables us to obtain the desired bounds. 



{\em \DiffC via \SCSKC}. We first observe that the \DiffC problem reduces to \SCSKC problem when the range of $g$ is non-negative integers.  Building on this, we design an approximation algorithm for \DiffC that uses the natural greedy algorithm for \SCSKC as a subroutine. 
Building upon our theoretical analysis of the greedy algorithm for \SCSKC, we analyze the quality of the solution $S$ produced for the \DiffC problem and show that 
$f(S) - g(S) \geq (1-1/e) [f(OPT)-g(OPT)]- A_{fg}$, where $A_{fg}$ is the efficiently computable additive error.

{\em Experimental Validation: } As proof of concept, we have conducted  experiments in the domains of information diffusion. For both problems, these experiments reveal that in practice the additive error is small, thus indicating that our proposed algorithms produce a solution whose value is close to $(1-1/e)$ of the optimal solution. 

\noindent\textbf{Prior and Related Work.} 
For submodular maximization with knapsack/modular constraint, \cite{sviridenko2004note} proposed a greedy algorithm with $1-1/e$ approximation ratio, albeit with time complexity of $O(n^5)$ oracle calls.  Later works improved the run-time~\cite{feldman2020practical,yaroslavtsev2020bring, li2022submodular} with a small sacrifice in the approximation quality. One of the well-studied constraints is the matroid constraint for which 
\cite{nemhauser1978analysis} provided a $1/2$-approximation algorithm. The breakthrough work of~\cite{calinescu2011maximizing}  presented a randomized algorithm with  the optimal approximation ratio to $1-1/e$.
The work \cite{buchbinder2019deterministic} proposed the first deterministic algorithm with an approximation ratio of $0.5008$. 

The work in~\cite{harshaw2019submodular} studied maximizing $f-g$ under a cardinality constraint when $f$ is submodular and $g$ is modular, whereas~\cite{jin2021unconstrained} studied the problem without the cardinality constraint and provided a {\em multiplicative-additive} error approximation  algorithm. 

SCSC is a dual problem of SCSK studied in \cite{Iyer:NIPS13,Crawford:ICML19}. The problem involves minimizing a submodular function $g$ while ensuring that another submodular function $f$  is no less than a given threshold $\tau$. There has been a vast amount of prior and related work on submodular optimization. We refer the reader to the survey articles~\cite{KG14,BF18}.

\section{Hardness of Approximation Factors}
\label{sec:hardness}


In~\cite{Iyer:NIPS13,IJB13},  building on the work of \cite{ConfortiC84}, the authors show that for Submodular Maximization under a down-monotone constraint, the greedy algorithms can be analyzed using data-dependent approximation factors.  When applied to SCSK-C, it follows that the natural greedy algorithm   is a $\frac{1}{c_f}(1 - (\frac{(K_g-c_f)}{K_g})^{k_g})$-approximation algorithm where 
\begin{itemize}
    \item  $c_f$ is the curvature of the function $f$. \\
    $c_f = \min_{x \in V} \frac{1-f(x|V-\{x\})}{f(x)}$.
    
    \item $K_g$ is the size of the largest feasible set that satisfies both the  constraints, \\
    $
    K_g = \max \{ |X| : g(X) \leq \theta \texttt{ and } |X|\leq k\}
    $.
    
    \item $k_g$ is the size of the smallest feasible $S$ that satisfies the constraints, but adding some element to $S$ violates the constraint.
    $
    k_g = \min \{ |X| : g(X) \leq \theta \texttt{ and } \exists j \notin X, g(X \cup j) > \theta \}
    $.
    \end{itemize}

To gauge the quality of the solution produced by the algorithm, one should be able to effectively compute the value of the expression $\frac{1}{c_f}(1 - (\frac{(K_g-c_f)}{K_g})^{k_g})$.  We observe that  it is NP-Hard to compute $K_g$.  

\begin{observation}
Given a submodular function $g$ and $\theta$, it is NP-Hard to calculate $K_g$ where $K_g = max\{|X|: g(X)\leq \theta \text{ and } |X|\leq k\}$.
\label{thm:upperrankhardness}
\end{observation}
The proof is provided in the Appendix. 

This limitation implies that while we can run the algorithm knowing that it has an approximation factor of $\frac{1}{c_f}(1 - (\frac{(K_g-c_f)}{K_g})^{k_g})$, we cannot hope to effectively compute what this term evaluates to and thus we will not be able to ascertain the quality of the solution produced.  If we attempt to bound the $c_f$, $K_g$ and $k_g$, then in the worst case $c_f=1, K_g=k, k_g=1$, leading to  trivial $\frac{1}{k}$-approximation when applied to \SCSKC. 

{\em The above observation and discussion motivate the need for establishing guarantees with efficiently computable approximation factors. }


\section{Greedy Algorithm for SCSK-C}
\label{sec:greedy}
\begin{algorithm}[t]
\scriptsize
\caption{Basic Greedy Algorithm}
\label{algo:algorithm1}
\begin{algorithmic}[1]
  \STATE $X = \emptyset$
  \FOR { $i=1$ to $k$ } 
 % \STATE If there is no $v$ such that $g(X \cup \{v\}) \leq  \theta$, then  return $X$
 % \STATE \ \ \ \ \ \  return $X$.
  \STATE $X = X \cup \{ \argmax_v f(X\cup \{v\}) | g(X\cup \{v\})\leq \theta\} $
  
  \ENDFOR
  \RETURN $X$
\end{algorithmic}
\end{algorithm}

In this section, we provide approximation guarantees, with efficiently computable approximation factors, for the natural greedy algorithm for \SCSKC. The \greedy algorithm for \SCSKC problem is described in 
Algorithm~\ref{algo:algorithm1}. 


 The Algorithm~\ref{algo:algorithm1} computes $X$ by iteratively
 adding the ``best'' element to the partial
 solution. Given an element $v \in V$ and $X \subseteq V$, the marginal gain of $v$ with respect to $X$, denoted $f(v|X)$, is $f(X \cup \{v\}) - f(X)$.
Given a set $S \subseteq V$ and an integer $\gamma$, we define 
\mcge (denoted { \MCGE}) as the element $v$ that
achieves the maximal marginal gain, $f(v|X)$ subject to the constraint $g(X
\cup \{v\}) \leq \gamma$. More formally
%
\[
\MCGE(S, \gamma) = \argmax_{v \in V} \{f(v|S)~|~g(S \cup v) \leq \gamma\}
\]
%
where $\argmax\{\emptyset\}$ is considered as undefined.  Given $X$ and
$\gamma$, we define \mcg ~(denoted \MCG) as the marginal gain of $f$
due to $\MCGE(S, \gamma)$ with respect to $S$.  i.e., 
$
\MCG(S, \gamma) = f(\MCGE(S, \gamma)|S). %= f(S \cup \{\MCGE(S, \gamma)\}) - f(S).
$
%

The following theorem characterizes the solution obtained using
Algorithm~\ref{algo:algorithm1} using additive and multiplicative errors. 

\begin{theorem} \label{thm:main}
Let $OPT_{k,\theta}$ be the optimal value of $f$ under the constraints, and 
$X$ be the solution returned by the
Algorithm~\ref{algo:algorithm1}, then the following holds
\[
\begin{array}{l}
f(X) \geq (1-1/e) [OPT_{k, \theta}] \ -\ \\
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ 
 \sum_{i=1}^{k-1}\left[\displaystyle \MCG(X_i, 2\theta) - \MCG(X_i, \theta)\right]
\end{array}
\]
\label{thm:baseline}
\end{theorem}
\begin{proof}
%% The number of elements in the optimal solution is assumed to be $O$
%% and the number of elements in the solution computed by the greedy
%% algorithm is assumed to be $\ell$ (greedy algorithm terminates after
%% $\ell$ iterations).

Let  $X^*_{k,\theta}$ be an optimal solution such that $f(X^*_{k, \theta}) = OPT_{k,\theta}$ and let $O$ denote the size of $X^*_{k, \theta}$. Note that $O \leq k$.
Let $X_{i-1}$ denote the partial solution at the
start of the $i$th iteration of the greedy algorithm; and initially
$X_0 = \emptyset$.  

%\paragraph{$X'_i$: Extension with $2\theta$-submodularity budget.\ }
With each iteration $i$, we associate an additional set $X'_i$ as
follows.  During iteration $i$, let $u_i$ be an element that can
maximize $f(X_{i-1} \cup \{u\})$ such that $g(X_{i-1} \cup \{u\})\leq
2\theta$. More precisely, let $u_i = \MCGE(X_{i-1}, 2\theta)$, and we
say that $X'_{i} = X_{i-1} \cup \{u_i\}$. Note that the set $X'_{i}$
is not constructed by the greedy algorithm ($X'_i$ may violate the
constraint $g(X'_{i}) \leq 2\theta\;\not\!\!\!\implies g(X'_{i}) \leq
\theta$). The set $X'_i$ is used for the analysis of the algorithm.

For every $1 \leq i \leq k-1$, we have the following inequalities. 
%
\[
\begin{array}{rcl}
OPT_{k,\theta}  & \leq & f(X^*_{k,\theta} \cup X_i) 
\ \leq \ f(X_i) + \displaystyle\sum_{e \in X^*_{k,\theta}} f(e|X_i)\\
& \leq & f(X_i) + \displaystyle\sum_{e \in X^*_{k,\theta}}  [f(X'_{i+1}) - f(X_i)] \\ 
& \leq & f(X_i) + O\times f(X'_{i+1}) - O\times f(X_i)
\end{array}
\]
%
The first two inequalities follow since $f$ is monotone and
submodular.  We now explain the third inequality: a subtle point here
is that we cannot claim that $f(e|X_i) \geq f(X_{i+1}) - f(X_i)$ as it
might be possible that $g(X_i \cup \{e\}) > \theta$ and this element
$e$ is not considered during iteration $i$. However, as
$X^*_{k,\theta}$ is an optimal solution, we have $g(X^*_{k,\theta})
\leq \theta$, which, in turn, implies that $g(e) \leq \theta$ for
every $e \in X^*_{k,\theta}$. Therefore, $g(X_i \cup \{e\}) \leq
2\theta$ due to submodularity of $g$.  Recall that $X'_{i+1}$ is
obtained by adding $u_{i+1}=\MCGE(X_i, 2\theta)$ to the set $X_i$.
Since $g(X_i \cup \{e\}) \leq 2\theta$, it must be the case that
$f(u_{i+1}|X_i) \geq f(e|X_i)$. Thus, $f(e|X_i) \leq f(X'_{i+1})-
f(X_i)$. The last inequality follows because the size of the optimal
solution is $O$.
  
By adding $(O-1)OPT_{k,\theta}$ on both sides of the last inequality
and rearranging terms, we obtain
\begin{eqnarray}
\label{eqn:eq2}
OPT_{k,\theta} - f(X'_{i+1}) &\leq& \frac{O-1}{O}
\left(OPT_{k,\theta}-f(X_i)\right)
\end{eqnarray}

This inequality relates $X'_{i+1}$ with $X_i$. However, if we
could relate $X_{i+1}$ with $X_i$ instead, then we could obtain a recurrence
relation. To achieve this, we now consider the relationship between
the sets $X_{i+1}$ and $X'_{i+1}$.

By our definitions of $X_{i+1}$ and $X'_{i+1}$, we have
\\
$f(X_{i+1}) \ = \ f(X_i) + \MCG(X_i, \theta)$.\\
$f(X'_{i+1}) \ = \  f(X_i) + \MCG(X_i, 2\theta)$.

Thus, \\
$
-f(X'_{i+1}) = -f(X_{i+1}) -[\MCG(X_i, 2\theta)-\MCG(X_i,\theta)]
$.

%
Substituting this in Equation~\ref{eqn:eq2}, we obtain the following recurrence relation.
%
\begin{equation}
\label{eqn:recurrence}
\begin{array}{rcl}
OPT_{k,\theta} - f(X_{i+1} ) & \leq & \displaystyle\frac{O-1}{O}
\left(OPT_{k,\theta}-f(X_i)\right) \\[0.5em] 
 & &  +\ [\MCG(X_i, 2\theta)-\MCG(X_i,\theta)].
\end{array}
\end{equation}
%
For notational brevity, we use $MCGD_i$ to denote
$\MCG(X_i, 2\theta)-\MCG(X_i,\theta)$.
%
%
\begin{claim}\label{clm:mainclm}
%For $1 \leq i \leq k$. 
\[
\begin{array}{rcl}
OPT_{k,\theta} - f(X_k ) & \leq & \displaystyle\left(\frac{O-1}{O}\right)^{k - 1} \left(OPT_{k,\theta}-f(X_1)\right) \\[1em]
 & &+ \displaystyle\sum_{i=1}^{k-1} MCDG_i
\end{array}
\]
\label{claim:1}
\end{claim}
The proof of the claim is provided in the Appendix.

Since $f(X_1) \geq \frac{OPT_{k,\theta}}{O}$, it follows that $OPT_{k,\theta} - f(X_1) \leq \frac{O-1}{O}\cdot OPT_{k,\theta}$. Plugging this in the inequality from Claim~\ref{clm:mainclm} we obtain that 
\[f(X_k) \geq (1-1/e)OPT_{k, \theta} -\sum_{i=1}^{k-1}MCDG_i\]
This concludes the proof.
\end{proof}

%We make the following observation about $OPT_{k,\theta} - f(X_1)$.
%\[
%\begin{array}{l}
%f(X_1)  \geq \displaystyle\frac{OPT_{k,\theta}}{O} \\
%\Rightarrow \
%OPT_{k,\theta} - f(X_1) \ \leq  \ OPT_{k,\theta} - \displaystyle\frac{OPT_{k,\theta}}{O} \\[1em]
%\Rightarrow \ OPT_{k,\theta} - f(X_1) \ %\leq \ \displaystyle\left( \frac{O-1}{O} \right)\cdot OPT_{k,\theta}  
%\end{array}
%\]

%We apply the upperbound for $OPT_{k,\theta} - f(X_1)$ in Claim~\ref{claim:1}.
%\begin{eqnarray*}
%OPT_{k,\theta} - f(X_k ) & \leq & \left( \frac{O-1}{O} \right)^k  \cdot OPT_{k,\theta}   + \sum_{i=1}^{k-1}  MCDG_{i} \\
%& \leq & (1/e) \cdot OPT_{k,\theta} + \sum_{i=1}^{k-1} MCDG_{i} 
%\\
%& & \mbox{Since } O\leq k.
%\end{eqnarray*}
%Thus
%\[f(X_k) \geq (1-1/e)OPT_{k, \theta} -\sum_{i=1}^{k-1}MCDG_i\]
%This concludes the proof.
%\end{proof}


\begin{algorithm}[t]
\scriptsize
\caption{Basic Greedy with Additive Error Computation}
\label{algo:additive}
\begin{algorithmic}[1]
  \STATE $X = \emptyset; A = 0$
  \FOR { $i =1$ to $k$} 
 % \STATE If there is no $v$ such that $g(X \cup \{v\}) \leq  \theta$,  then exit
  %\STATE \ \ \ \ \ \ then return $X$.
  \STATE $w = \argmax_v\{f(X \cup \{v\}~|~g(X\cup \{v\})\leq \theta\} $.
   \IF {($i \neq 1$)}
  \STATE $u = \argmax_v \{f(X\cup \{v\}) | g(X\cup \{v\})\leq 2\theta\} $.
  \STATE $A = A+ f(u|X)-f(w|X)$.
  \ENDIF
  \STATE $X = X \cup \{w\}$.
  \ENDFOR
  \RETURN $A$ and $X$.
    % \RETURN $A$
\end{algorithmic}
\label{algo:algorithm2}
\end{algorithm}


\subsection{Additive Error: Computation Interpretation and Tightness} 

\textbf{Computation.\ } We show that additive error term $\sum_{i=i}^{k-1}\MCG(X_i, 2\theta) - \sum_{i=1}^{k-1}\MCG(X_i, \theta)$ can be computed very efficiently. Consider Algorithm~\ref{algo:additive}.  Consider an iteration $\ell$ of this algorithm, note that $u = \MCG(X_{\ell-1}, \theta)$ and $v = \MCG(X_{\ell-1}, 2\theta)$. Thus at the end of the algorithm $A$ equals $\sum_{i=i}^{\ell-1}\MCG(X_i, 2\theta) - \sum_{i=1}^{\ell-1}\MCG(X_i, \theta)$.  Clearly, the set $X$ is the greedy solution. Note that the total number of calls made by Algorithm~\ref{algo:additive} to $f$ and $g$ is $O(nk)$,  which is asymptotically the same as the number of calls made by the Algorithm~\ref{algo:algorithm1}.  Here $n$ is the size of the ground set.  As stated in the introduction,
this paves way
for a quick  understanding of the quality of the result generated by the greedy algorithm. 


{\bf Interpretation.} We now discuss  the interpretation of the additive error.  
Informally,
additive error captures the difference between the
solutions produced by the greedy algorithms that are run with
submodular budgets of $2\theta$ and $\theta$. More precisely, it is
the following. Let $X_i$ be the set at the end of the $i$th iteration
of the greedy algorithm (with submodular budget $\theta$). Let $w_i$
be the maximum marginal gain possible with respect to $X_i$ with
submodular budget of $\theta$ and $u_i$ be the maximum marginal gain
possible with respect to $X_i$ with submodular budget of
$2\theta$. The additive loss is the sum of the differences $u_i -
w_i$.  %The term $(1-(1-1/k)^\ell)$ from Theorem~\ref{thm:baseline} 

 

{\bf Tightness.} Next, we consider whether the approximation factors in the above analysis can be improved. In the above, the additive error is data-dependent, and it is natural to ask whether this is necessary. Our next result establishes that the additive error can not be made data-independent even if we settle for a multiplicative factor that is lower than $(1-1/e)$. We establish the following result whose proof appears in the appendix.

\begin{theorem}
There does not exist a polynomial time algorithm $\mathcal{A}$ for SCSK and SCSK-C such that it outputs a set $X$ with guarantee $f(X)\geq d\cdot OPT - A$ where $d<1, A>0$ are universal constants. 
\label{thm:datadependentA}
\end{theorem}

%\begin{proof}
%In ~\cite{Iyer:NIPS13}, it is shown that there exists no polynomial time algorithm for SCSK that achieves a constant factor approximation. We will use this to show that there also does not exist a $(d,A)$-multiplicative additive approximation algorithm with constants $d<1, A>0$ for SCSK. 

%Assume that there exists an algorithm $\mathcal{A}$ such that it outputs a set $f(X) \geq d\cdot OPT - A$. 

% Let the optimum solution be $X^*$. Consider an instance $f'$ such that $f'(X^*) = f(X^*) + D$. $f'(X) = f(X) \forall X\subseteq$ 
%Let $f'(X) = A\cdot f(X)$. Consider the same problem with $f'$. The optimum solution is $OPT' = A \cdot OPT$. 
%The guarantee is still $f'(X)\geq d\cdot OPT' - A$.  Thus
%$A\cdot f(X) \geq  d\cdot A\cdot OPT - A$ and this implies that $f(X) \geq  d\cdot OPT - 1$ 
%
%\begin{IEEEeqnarray*}{lCr}
%f'(X) &\geq & d\cdot OPT' - A \\
%A\cdot f(X) &\geq & d\cdot A\cdot OPT - A  \\
%f(X) &\geq & d\cdot OPT - 1  \\
%\end{IEEEeqnarray*}
%
%Thus, algorithm $\mathcal{A}$ has the guarantee with $A=1$. We can repeat the argument by defining $f'(X) = q\cdot f(X)$ leading to guarantee $f(X) \geq d\cdot OPT - \frac{1}{q}$. $\mathcal{A}$ has $(d,A)$-multiplicative additive guarantee for an arbitrarily small $A$. 

%Consider an instance $\mathcal{X}$ of SCSK. Let $\mathcal{I}$ be the family of feasible sets for this instance. Let $\mathcal{I'} = \{X \texttt{ s.t }  f(X)< d\cdot OPT\}$. Let $R\in \mathcal{I'}$ such that $\forall X\in \mathcal{I'}, f(R)\geq f(X)$. Let $\epsilon = d\cdot OPT - f(R)$. $\mathcal{A}$ has $(d,\epsilon')$-multiplicative additive guarantee where $\epsilon'<\epsilon$. In such an instance, the following inequalities hold.
%\begin{IEEEeqnarray*}{l}
%f(\mathcal{A(\mathcal{X})}) \geq  d\cdot OPT - \epsilon' >  d\cdot OPT - \epsilon = f(R) 
%\end{IEEEeqnarray*}
%If $f(\mathcal{A(\mathcal{X})})>f(R)$ then it must be the case that $A(\mathcal{X}) \in \mathcal{I}\setminus \mathcal{I'}$. By definition of $\mathcal{I'}$, $f(A(\mathcal{X})) \geq d\cdot OPT$. If $\mathcal{I'}=\phi$, we consider the feasible element $e^*$ that has the maximum value on $f$ among all feasible elements. In such cases, if we output $max\{f(e^*), f(A(\mathcal{X}))\}$, we obtain a $d-$approximate algorithm for SCSK. This is a contradiction as such an algorithm does not exist for SCSK ~\cite{Iyer:NIPS13} which invalidates our assumption. We can extend the argument for SCSK-C by varying the cardinality from $1..n$ to arrive at a similar contradiction. 
%\end{proof}

\subsection{Extensions}
\label{sec:multi-curve}

We extend the above proof and analysis in two different directions.  First, we can refine the above result and capture the multiplicative error using the curvature of the function $f$,
denoted by $c_f$ and defined as $1 - \min_{x} \frac{f_{V- \{x\}}(x)}{f(x)}$.
The proof of the following theorem is provided in the appendix.

\begin{theorem}\label{thm:cur}
Let $X$ be the solution produced by Algorithm~\ref{algo:algorithm1}, then 
\[f(X) \geq \frac{1}{c_f}\left (1 - (1-\frac{c_f}{k})^{k}\right )OPT_{k, \theta} - A,\]
%and when $\ell=k$, then
%$f(X) \geq \frac{1}{c_f}\left (1 - e^{-c_f}\right ) OPT_{k, \theta} - A,$
where $A$ is the additive error  same as in Theorem~\ref{thm:main}.
\end{theorem}


We next consider a slight modification of Algorithm~\ref{algo:algorithm2}.
Note that the for loop is executed exactly $k$ times. Suppose that during an iteration $i$,  there is no element $v$ such that $g(X \cup \{v\}) \leq \theta$. Once this happens the algorithm does not append any new elements to $X$ in future iterations, however, the value $A$ could keep changing (as there could be elements $u$ for which $g(X \cup \{u\}) \leq 2\theta$. Consider a modification where the algorithm stops when it fails to find an element $v$ such that $g(X \cup \{v\}) \leq \theta$. In this case, the algorithm will produce a set $X$ of size $\ell \leq k$. We can bound the quality of the solution produced as stated in the following theorem.

\begin{theorem} \label{thm:aux}
Let $OPT_{k,\theta}$ be the optimal value of $f$ under the constraints, and 
$X$ be the solution with $|X| = \ell$ obtained from above describe modified version of 
Algorithm~\ref{algo:algorithm2}, then the following holds
\[
\begin{array}{l}
f(X) \geq (1 - (1-1/k)^\ell) [OPT_{k, \theta}] \ -\ \\
\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ 
 \sum_{i=1}^{\ell-1}\left[\displaystyle MCG(X_i, 2\theta) - MCG(X_i, \theta)\right]
\end{array}
\]
\label{thm:baseline}
\end{theorem}

The proof of the above theorem is exactly  the same as the proof of Theorem~\ref{thm:main}. Thus we omit the proof.
Note that, both  the additive error and multiplicative error (which is $(1-(1-1/k)^\ell)$ can be computed efficiently in this case as well.  The main difference between Theorem~\ref{thm:main} and~\ref{thm:aux} is that Theorem~\ref{thm:main} has a higher (and thus better) multiplicative factor but also a  higher additive error (and thus worse) compared to Theorem~\ref{thm:aux}.

\section{From \SCSKC to \DiffC}

%We now define the Difference of Submodular Functions problem under a cardinality constraint (\DiffC). 

%\begin{problem}
%\label{prob:diffC}
%Given monotone, non-negative, non-decreasing submodular %functions $f,g$, a positive integer $k$, maximize $f(S) - %g(S)$ subject $|S|\leq k$. 
%\end{problem}

In this section, we design algorithms for \DiffC, that use algorithm for \SCSKC as a subroutine. 
 Algorithm~\ref{algo:diff} (\lineara algorithm) presents
the algorithm for \DiffC problem. 

The bound $\lambda$ on the iteration is based
on the maximum valuation of $g$; $\mathcal{A}$ denotes the algorithm
for addressing the \SCSKC problem. 
In each iteration $i$ (i.e., for each
valuation of $g$), $\mathcal{A}$ is
used to compute the set $X$ for which $f$ 
is maximal under the constraint that $g$'s valuation is $\leq i$ and 
$|X|\leq k$. The difference between $f$
and $g$ at $X$ is then compared
against
the prior computed difference and the
larger of the two is considered
as the current maximal difference. 


\begin{theorem}\label{thm:diff-exact}

Let $f$ and $g$ be two submodular functions where $f$ is monotone, and let $h = f-g$. In Algorithm~\ref{algo:diff},
if the subroutine $\mathcal{A}$ can solve \SCSKC exactly, then the algorithm produces a set $S$ such that
$h(S) \geq h(OPT) - 1$.  Algorithm~\ref{algo:diff} makes $O(\lambda)$ calls to  $\mathcal{A}$, where $\lambda = k\times max_{e\in V} g(e)$.
\end{theorem}

The proof is provided in the Appendix.

\begin{algorithm}[t]\label{algo:diffc}
\scriptsize
\caption{Algorithm for \DiffC: \lineara}
\label{algo:diff}
\begin{algorithmic}[1]
  \STATE $S = \phi$
  \FOR { $i= 0$ to $\lambda$ } 
    \STATE $X = \mathcal{A}(f, g, k, i)$
    \IF{$f(X) - g(X) > f(S) - g(S)$}
        \STATE $S = X$
    \ENDIF
  \ENDFOR
  \RETURN $X$
\end{algorithmic}
\end{algorithm}

\begin{theorem}
In Algorithm~\ref{algo:diff}, suppose that Algorithm~$\mathcal{A}$ is the Basic Greedy Algorithm (Algorithm~\ref{algo:algorithm1}) for \SCSKC, let $h = f - g$. If Algorithm~\ref{algo:diff} outputs a set $G$ then 
\[h(G) \geq \left(1- 1/e\right) h(OPT) - A,\] where the additive error $A$ can be computed efficiently.        
\end{theorem}

\begin{proof}
We will start with some notation. Let $S^*_i$ is the optimal solution to the \SCSKC instance with $\theta = i$. Let $G_i$ be the set returned by the Basic Greedy Algorithm for \SCSKC instance with $\theta = i$. Let $A(i)$ be the  corresponding additive error. We first consider the case when the range of $g$ is integers. By Theorem~\ref{thm:main}, we have for $1 \leq i \leq \lambda$,
\begin{equation}\label{eqn:3}
f(G_i) \geq (1-1/e)f(S^*_i) - A(i)
\end{equation}
Let $OPT$ be the optimal solution for $h = f-g$, and let $\theta^* = g(OPT)$. Note that $h(OPT) = f(OPT) - \theta^*$. Let the solution returned by the Algorithm~\ref{algo:diff} occur at $i = \beta$. Thus the set $G$ returned by the algorithm is $G_\beta$ and $h(G) = h(G_\beta) = f(G_\beta) - g(G_\beta)$. Note that $g(G_\beta)$ must equal $\beta$, otherwise the algorithm would not have returned the set $G_\beta$.
 

Since the algorithm returned the set $G_{\beta}$, we have $f(G_\beta) - \beta \geq f(G_{\theta^*}) - \theta^*$. And we also know that  by Inequality~\ref{eqn:3} $f(G_{\theta^*}) \geq \left(1 - 1/e\right) f(OPT) - A(\theta^*)$. Thus 
\[
\begin{array}{ll}
& f(G_\beta) - \beta \\
\geq &f(G_{\theta^*}) - \theta^*\\
\geq &\left(1 - 1/e\right) f(OPT) - A(\theta^*)- \theta^*\\
=&\left(1-1/e\right)\left(f(OPT)- \theta^*\right)- \left(\theta^*/e + A(\theta^*)\right)\\
= & \left(1-1/e\right )h(OPT)- \left(\theta^*/e + A(\theta^*)\right)
\end{array}
\]


In the above we can view $\frac{\theta^*}{e}+A(\theta^*)$ as additive error. However, since we do not know the value of $\theta^*$, we do not know how to compute this value efficiently, instead will exhibit and upper bound on this quantity that can be comouted efficiently. One way to achieve this is to compute $i/e+A(i)$, $1, \leq i, \leq \lambda$ and take the maximum of these values. This will be an upper bound on the additive error and clearly, this quantity can be computed efficiently. Below we employ another approach to bound the above quantity.
We will first derive a  bound on $\theta^*$. Building on this, we derive an efficiently computable upper bound on $\frac{\theta^*}{e} +A(\theta^*)$.

We know that $f(S^*_\beta) - \beta$ is at most $f(OPT) - \theta^*$ and  $f(G_\beta) - \beta$ is at least $f(G_{\theta^*} ) - \theta^*$. A worst possible scenario at which this happens is $f(G_\beta)$ is as large as possible and $f(G_{\theta^*})$ is as small as possible. This happens when $f(G_\beta) = f(S^*_\beta)$ and $f(G_{\theta^*})$ equals $(1-1/e)f(OPT) - A(\theta^*)$. Thus in this scenario
\[f(G_{\beta}) - \beta = f(S^*_\beta) - \beta \leq f(OPT) - \theta^*\]
Since $f(G_{\theta^*}) = (1-1/e)f(OPT) - A(\theta^*)$, we obtain that
\[f(G_{\beta}) - \beta \leq \frac{f(G_{\theta^*}) + A(\theta^*)}{1-1/e} - \theta^*\]
Thus
\[\theta^* \leq \frac{f(G_{\theta^*}) + A(\theta^*)}{1-1/e} - f(G_\beta) + \beta\]
From this it follows that
\[\theta^* \leq B = \max_{i} \frac{f(G_i)+A(i)}{1-1/e}- f(G_\beta) + \beta\]

Thus $B$ is the desired upperbound on $\theta^*$. Note that for every $i$, we can compute $f(G_i) + A(i)$ while running Algorithm~\ref{algo:diff}. Thus the bound $B$ can be efficiently computed. Let $A = \max_{i\leq B} (A(i)+ i/e)$. Note that 
$\frac{\theta^*}{e} + A(\theta^*) \leq A$. Thus we have
\[h(S) = h(G_{\beta}) \geq \left(1-\frac{1}{e}\right)h(OPT) - A\]
When the range of $g$ is not necessarily positive integers, then, as in the proof of Theorem~\ref{thm:diff-exact} the additive error will have  an additional factor of $1$. 

{\bf Computing the Additive Error.} We note that the additive error $A$ can be computed efficiently as follows: When call the Greedy algorithm for SCSK-C in Step 3, we can compute $A(i)$. Thus we keep track of $A(i) + i/e$ for every $1 \leq i \leq \lambda$. As discussed above we can compute the value $B$ while running the algorithm. This implies that $A = \max_{i \leq B} (A(i)+i)$ can be computed efficiently.
\end{proof}

 
%\paragraph
{\bf \loga Algorithm: a faster approximation for \DiffC.} We now make a few remarks about improving the runtime of Algorithm~\ref{algo:diff}.  The run time of the is proportional to $\lambda$, which in turn depends on the range of $g$ --- the algorithm is invoking $\mathcal{A}(f, g, k, i)$ for every $i$, $1 \leq i \leq \lambda$. This could be expensive in practice. Thus we propose a modification to the Algorithm; we refer to the modified version as \loga algorithm. 
This algorithm calls $\mathcal{A}(f, g, k, 2^i)$ for every $i$, $1 \leq \log\lambda$. This will ensure that we make only $\log \lambda$ invocations of the subroutine $\mathcal{A}$ and thus drastically reduce the run time. By doing the same analysis as above we can prove that $h(S) \geq \frac{1}{2}(1 - 1/e) h(OPT) - A$. 

\section{Experiments}
\label{sec:expt}

% \input{experimentsCommand.tex}
% ---------------------------------------------------------------------
% SCSK-C figure settings
% ---------------------------------------------------------------------
\newcommand{\thetaVObjective}[2]{
\begin{tikzpicture}
    \begin{axis}[xlabel=\textbf{$\theta$}, ylabel=\textbf{\Large $f(X)$}, title=\textbf{#2}, legend columns=2, legend style={at={(0.5,-0.1)},anchor=north,draw=none}, transpose legend]
        \pgfplotstableread[col sep=comma]{datasets/#1}
        \datatable
        
        \addplot table[y = Greedy] from \datatable ;
        % \addplot table[y = dynamic] from \datatable ;
        \addplot table[y = Random] from \datatable;
        \addplot table[y = Equal] from \datatable;
        % \addplot table[y = 0.63OPTUpperBound] from \datatable;
        % \addplot table[y = maxsingle] from \datatable;
        % \addplot table[y = randomizedgreedy] from \datatable;
        % \addlegendentry{\Large Greedy}
        % \addlegendentry{\Large Dynamic Programming}
        % \addlegendentry{$0.63 OPT Bound$}
        % \addlegendentry{\Large MaxSingle}
        % \addlegendentry{\Large Random Part.}
        % \addlegendentry{\Large Equal Part.}
        % \addlegendentry{\Large Rand Greedy}
        
    \end{axis}
\end{tikzpicture}}

\newcommand{\thetaVError}[2]{
\begin{tikzpicture}
    \begin{axis}[xlabel=\textbf{$\theta$}, ylabel=\textbf{\Large Additive Error}, title=\textbf{#2}, legend columns=-1, legend style={at={(0.5,-0.1)},anchor=north,draw=none}]
        \pgfplotstableread[col sep=comma]{datasets/#1}
        \datatable
        
        \addplot table[y = Greedy] from \datatable ;
        % \addplot table[y = dpError] from \datatable ;
        \addplot table[y = Random] from \datatable ;
        \addplot table[y = Equal] from \datatable ;
        % \addlegendentry{\Large Greedy}
        % \addlegendentry{\Large Dynamic Programming}
        % \addlegendentry{\Large Random Part.}
        % \addlegendentry{\Large Equal Part.}
        
    \end{axis}
\end{tikzpicture}}

\newcommand{\thetaVTime}[2]{
\begin{tikzpicture}
    \begin{axis}[xlabel=\textbf{$\theta$}, ylabel=\textbf{\Large Runtime(s)}, title=\textbf{#2}, legend columns=-1, legend style={at={(0.5,-0.1)},anchor=north,draw=none}]
        \pgfplotstableread[col sep=comma]{datasets/#1}
        \datatable
        
        \addplot table[y = Greedy] from \datatable ;
        % \addplot table[y = dpError] from \datatable ;
        \addplot table[y = Random] from \datatable ;
        \addplot table[y = Equal] from \datatable ;
        % \addlegendentry{\Large Greedy}
        % \addlegendentry{\Large Dynamic Programming}
        % \addlegendentry{\Large Random Part.}
        % \addlegendentry{\Large Equal Part.}
        
    \end{axis}
\end{tikzpicture}}

% ---------------------------------------------------------------------
% DIFF-C figure settings
% ---------------------------------------------------------------------

\newcommand{\diffValuesSubmodularCost}[2]{
\begin{tikzpicture}
    \begin{axis}[xlabel=\textbf{$k$}, ylabel=\textbf{\Large $f(X) - g(X)$}, title=\textbf{#2}, legend columns=3, legend style={at={(0.5,-0.1)},anchor=north,draw=none}, transpose legend]
        \pgfplotstableread[col sep=comma]{datasets/#1}
        \datatable
        
        \addplot table[y = Exponential] from \datatable ;
        \addplot table[y = Linear] from \datatable ;
        \addplot table[y = supSub] from \datatable;
        % \addlegendentry{Algorithm $\mathcal{B}$ - Exponential Search}
        % \addlegendentry{Algorithm $\mathcal{B}$ - Linear Search}
        % \addlegendentry{SupSub}
        
    \end{axis}
\end{tikzpicture}}

\newcommand{\diffTimeSubmodularCost}[2]{
\begin{tikzpicture}
    \begin{axis}[xlabel=\textbf{$k$}, ylabel=\textbf{\Large $\log(runtime)$}, title=\textbf{#2}, legend columns=3, legend style={at={(0.5,-0.1)},anchor=north,draw=none}, transpose legend]
        \pgfplotstableread[col sep=comma]{datasets/#1}
        \datatable
        
        \addplot table[y = Exponential] from \datatable ;
        \addplot table[y = Linear] from \datatable ;
        \addplot table[y = supSub] from \datatable;
        % \addlegendentry{Algorithm $\mathcal{B}$ - Exponential Search}
        % \addlegendentry{Algorithm $\mathcal{B}$ - Linear Search}
        % \addlegendentry{SupSub}
        
    \end{axis}
\end{tikzpicture}}

\newcommand{\diffErrorsSubmodularCost}[2]{
\begin{tikzpicture}
    \begin{axis}[xlabel=\textbf{$k$}, ylabel=\textbf{\Large Additive Error}, title=\textbf{#2}, legend columns=2, legend style={at={(0.5,-0.1)},anchor=north,draw=none}, transpose legend]
        \pgfplotstableread[col sep=comma]{datasets/#1}
        \datatable
        \addplot table[y = Exponential] from \datatable ;
        \addplot table[y = Linear] from \datatable ;
        % \addlegendentry{Algorithm $\mathcal{B}$ - Exponential Search}
        % \addlegendentry{Algorithm $\mathcal{B}$ - Linear Search}
    \end{axis}
\end{tikzpicture}}

In this section, we empirically examine the performance of \SCSKC and \DiffC on the application of Information Diffusion in social networks. All the algorithms are implemented in C++ and run on a Linux server with AMD Opteron 6320 CPU (8 cores and 2.8 GHz) and 64GB RAM. 

\textbf{Information Diffusion.\ }
The diffusion of information in a social
network under various probabilistic diffusion models is captured as a
submodular function~\cite{Kempe:kdd03}. 
For a (seed) set $X \subseteq V$,  the submodular function $f(X)$ is the expected number of users influenced by $X$. On the other hand, there is often some cost function $g$ associated with each seed set; a candidate $g$, in the context of social influence, quantifies
the value of a set of entities in the network based on the number of followers of the set. We use such a submodular cost function in our experiments. 
The goal is to find a seed set of size $\leq k$ that maximizes $f$ (influence) while minimizing $g$ (cost). 


\textbf{Datasets.\ } For the application of information diffusion, we collect six directed networks to conduct experiments: NetHept \cite{NetGraph}, p2p-Gnutella31 \cite{ripeanu2002mapping}, Facebook \cite{leskovec2012learning}, Bitcoin \cite{kumar2016edge}, Wikipedia \cite{leskovec2010predicting} and DBLP \cite{yang2012defining}. The number of nodes of them ranges from 3,783 to 317,080. Due to space limitations, we present the plots only for three of these graphs.
%in the main body
%; plots for the additional graphs are in the appendix.


\subsection{Experiments for \SCSKC}
\label{sec:exp:scsk}

The main objective we seek in these experiments is to demonstrate that the approximation factors can be computed efficiently, which helps to gain an understanding of the quality of the solution.  For the Natural greedy algorithm (Algorithm~\ref{algo:algorithm2}), we compute the additive error produced and also study how the additive error changes as the submodular budget $\theta$ increases.
 

 
\begin{algorithm}[t]
\scriptsize
\caption{Budget-Conscious Greedy Algorithm}
\label{algo:Generic}
\begin{algorithmic}[1]
\STATE Input: $\theta_1, \cdots, \theta_k$.
  \STATE $X = \emptyset$
  \FOR {$~i= 1$ to $k$ }
  \STATE \!\!\!\!\!\!\!\textbf{If} there is no $v$ such that $g(X \cup \{v\}) \leq \theta_i$, then 
  \STATE \ \ \ \ \ \ \ $X$ remains unchanged
  \STATE \!\!\!\!\!\!\!\textbf{Else} $X\!=\!X \cup \{{\small\argmax_v} f(X\cup \{v\}) | g(X\cup \{v\})\leq \theta_i\} $
  \ENDFOR
  \RETURN $X$
\end{algorithmic}
\end{algorithm}

\textbf{Comparison Algorithms.\ } We compare the solutions produced by the Natural Greedy algorithm (Algorithm~\ref{algo:algorithm2}) with two variants. Note that during each iteration of the Algorithm~\ref{algo:algorithm2}, the entire submodular budget $\theta$ is made available. We obtain a {\em budget-conscious} variant of this algorithm that allows iteration $i$ to spend at most $\theta_i < \theta$ budget. 
Algorithm \ref{algo:Generic} describes this strategy. By following an analysis that is very similar to that of Theorem~\ref{thm:main}, we can show that this algorithm produces a set $X$ for such that $f(X)$ is at least $(1-1/e)f(OPT)-A$, and $A$ can be computed efficiently.
We use the following budget-conscious algorithms (Algorithm \ref{algo:Generic}).
%\begin{itemize}
     \textbf{Equal Partition:} Use $\theta/k, 2\theta/k, \cdots \theta$ as input to the budget-conscious  Greedy algorithm. 
   \textbf{Random Partition:} Select a random sequence of thresholds to use in the budget-conscious Greedy Algorithms. 
   %We run Random Partition for five times and report the average objective function values, additive errors, and running time.
%\end{itemize}

% We also compare the above algorithms with two baseline algorithms: \textbf{Maximum Single Element} where we select an element with the largest value on $f$, and \textbf{Random Greedy}  where at each iteration, select a random element that satisfies the submodular constraint.

% \paragraph{Information Diffusion}

 % argument #1: any options
    \newenvironment{customlegend}[1][]{%
        \begingroup
        % inits/clears the lists (which might be populated from previous
        % axes):
        \csname pgfplots@init@cleared@structures\endcsname
        \pgfplotsset{#1}%
    }{%
        % draws the legend:
        \csname pgfplots@createlegend\endcsname
        \endgroup
    }%

    % makes \addlegendimage available (typically only available within an
    % axis environment):

    \def\addlegendimage{\csname pgfplots@addlegendimage\endcsname}
\pgfplotsset{
cycle list={%
% {draw=green,mark=star,solid}, % 
{draw=blue, mark=square,solid},%densely dashed}, % 
% {draw=black,mark=+,solid},%dashdotted}, %every mark/.append style={rotate=90}, % Random Partition
{draw=red,mark=o,solid}, % 
% {draw=red,mark=square,solid}, % 0.63OPT Upper Bound
{draw=green,mark=+,solid}, % 
% {draw=pink,mark=square,solid} % Random Greedy
}}



\begin{figure}[tbh]
\begin{tabular}{c c}
\resizebox {0.4\linewidth} {!} {
\thetaVObjective{SCSK-C/objective/nethept-obj-scsk-c.csv}{}
}
&
\resizebox {0.4\linewidth} {!} {
\thetaVError{SCSK-C/error/nethept-error-scsk-c.csv}{}
}
\end{tabular}
\begin{tikzpicture}
        \begin{customlegend}[legend columns=2,legend style={align=center,draw=none,column sep=2ex},legend entries={Basic Greedy, Random Partition, Equal Partition}]
        % \addlegendimage{draw=green,mark=star,solid}
        \addlegendimage{draw=blue, mark=square,solid}
        % \addlegendimage{draw=black,mark=+,solid}
        \addlegendimage{draw=red,mark=o,solid}
        % \addlegendimage{draw=red,mark=square,solid}
        \addlegendimage{draw=green,mark=+,solid}
        % \addlegendimage{draw=pink,mark=square,solid}
        \end{customlegend}
     \end{tikzpicture}
\vspace{-1em}
\caption{\SCSKC, NetHept; $k=50$, a) $\theta$ vs. $f(X)$; b) $\theta$ vs. Additive Error with submodular cost on Basic Greedy, Random Partition and Equal Partition}
\label{fig:nethept-scsk}
\end{figure}


\begin{figure}[t]
\begin{tabular}{c c}
\resizebox {0.4\linewidth} {!} {
\thetaVObjective{SCSK-C/objective/p2p-obj-scsk-c.csv}{}
}
&
\resizebox {0.4\linewidth} {!} {
\thetaVError{SCSK-C/error/p2p-error-scsk-c.csv}{}
}
\end{tabular}
\vspace{-1em}
\caption{\SCSKC, p2p-Gnutella31; $k=50$, a) $\theta$ vs. $f(X)$; b) $\theta$ vs. Additive Error with submodular cost on Basic Greedy, Random Partition and Equal Partition}
\label{fig:p2p-scsk}
\vspace{-1em}
\end{figure}


\begin{figure}[t]
\begin{tabular}{c c}
\resizebox {0.4\linewidth} {!} {
\thetaVObjective{SCSK-C/objective/wiki-obj-scsk-c.csv}{}
}
&
\resizebox {0.4\linewidth} {!} {
\thetaVError{SCSK-C/error/wiki-error-scsk-c.csv}{}
}
\end{tabular}
\vspace{-1em}
\caption{\SCSKC, Wikipedia; $k=50$, a) $\theta$ vs. $f(X)$; b) $\theta$ vs. Additive Error with submodular cost on Basic Greedy, Random Partition and Equal Partition}
\label{fig:wiki-scsk}
\vspace{-1em}
\end{figure}


\begin{figure}[t]
\begin{tabular}{c c}
\resizebox {0.4\linewidth} {!} {
\thetaVObjective{SCSK-C/objective/dblp-obj-scsk-c.csv}{}
}
&
\resizebox {0.4\linewidth} {!} {
\thetaVError{SCSK-C/error/dblp-error-scsk-c.csv}{}
}
\end{tabular}
\vspace{-1em}
\caption{\SCSKC, DBLP; $k=50$, a) $\theta$ vs. $f(X)$; b) $\theta$ vs. Additive Error with submodular cost on Basic Greedy, Random Partition and Equal Partition}
\label{fig:dblp-scsk}
\vspace{-1em}
\end{figure}





\textbf{Results Analyses.\ } We chose $k = 50$ and varied the submodularity budget $\theta$ from $10$ to $300$. The results are shown in Fig. ~\ref{fig:nethept-scsk} to ~\ref{fig:dblp-scsk}. As can be seen, the Basic Greedy, Equal Partition and Random Partition algorithms produce very similar results, except for Facebook. It can be seen  from Fig.~\ref{fig:nethept-scsk}b,~\ref{fig:p2p-scsk}b,~\ref{fig:dblp-scsk}b,  that as the submodular budget increases, the additive error decreases. Recall that the additive factor is approximately the difference between the quality of the seed sets produced with submodular constraints $\theta$ and $2\theta$. Thus all $\theta$ grows larges there may not be much difference between the constraints $g(X) \leq \theta$ and $g(X) \leq 2\theta$. It is likely that a set that satisfies the latter constraint will also satisfy the former constraint.

We analyze the quality of the produced solutions. For NetHept, p2p-Gnutella31 and DBLP, the additive error is less than $10\%$ of $f(X)$ most of the time and much smaller many times. When this happens, we can conclude that for all these sets $f(X) \geq 0.53f(OPT)$.
For example, for NetHept, when $k = 50, \theta =200$, the greedy algorithm produced a solution $X$ of size $50$, and the additive error is $0$ and $f(X)  = 968.21$. This implies that $f(X) \geq 0.63f(OPT)$. Another example is DBLP, at $\theta = 20$, Basic Greedy produced a solution with value $13810$ and the additive error is $1044$. This implies that additive error is less than $7.5\%$ of the optimal value. Thus we can be guaranteed that the value produced by the algorithm is at least $0.55f(OPT)$. For graphs such as Facebook, Bitcoin, and Wikipedia, additive errors are higher. For example, for Bitcoin with $\theta = 160$, the Basic Greedy produced a solution with value $110$, whereas the additive error is $26$. This implies that the value of the solution is at least $0.4f(OPT)$. The density of the graphs could explain this phenomenon. The Average degrees of Facebook, Bitcoin, and Wikipedia graphs are 43, 12, and 29, whereas, for the other graphs, the average degree is less than 8. For higher average degree graphs, there is a larger difference between the constraints $g(X) \leq \theta$ and $g(X) \leq 2\theta$.
 

In terms of running time, all the three algorithms can finish in 12 seconds on the NetHept network with over 15,000 nodes, demonstrating the time-efficiency of our algorithm (the details are presented in supplementary materials). Compared to Random Partition and Basic Greedy algorithms, Equal Partition is faster because it started from a small cost, which allows for faster identification of the element incurring maximal marginal gain within the cost budget (at a specific iteration). In contrast, Random Partition can generate various cost sequences while the submodular cost of each iteration for Basic Greedy is fixed.

\subsection{Experiments for \DiffC}
\label{sec:exp:diff}


\def\addlegendimage{\csname pgfplots@addlegendimage\endcsname}
\pgfplotsset{
cycle list={%
% {draw=green,mark=star,solid}, % 
{draw=blue, mark=square,solid},%densely dashed}, % 
% {draw=black,mark=+,solid},%dashdotted}, %every mark/.append style={rotate=90}, % Random Partition
{draw=brown,mark=o,solid}, % 
% {draw=red,mark=square,solid}, % 0.63OPT Upper Bound
{draw=black,mark=+,solid}, % 
% {draw=pink,mark=square,solid} % Random Greedy
}}


\begin{figure}[h]
\vspace{-1em}
\begin{tabular}{c c}
\resizebox {0.4\linewidth} {!} {
\diffValuesSubmodularCost{DIFF-C/objective/nethept-obj-diff-c.csv}{}
}
&
\resizebox {0.4\linewidth} {!} {
\diffErrorsSubmodularCost{DIFF-C/error/nethept-error-diff-c.csv}{}
}
\end{tabular}
\begin{tikzpicture}
        \begin{customlegend}[legend columns=3,legend style={align=center,draw=none,column sep=1ex},legend entries={\loga, \lineara, supSub}]
        % \addlegendimage{draw=green,mark=star,solid}
        \addlegendimage{draw=blue, mark=square,solid}
        % \addlegendimage{draw=black,mark=+,solid}
        \addlegendimage{draw=brown,mark=o,solid}
        % \addlegendimage{draw=red,mark=square,solid}
        \addlegendimage{draw=black,mark=+,solid}
        % \addlegendimage{draw=pink,mark=square,solid}
        \end{customlegend}
     \end{tikzpicture}
     \vspace{-1em}
\caption{\DiffC, NetHept; a) Budget vs Difference on \loga, \lineara and supSub; b) Budget vs Additive error on \loga and \lineara}
\label{fig:nethept-diff}
\vspace{-1em}
\end{figure}


\begin{figure}[h]
\vspace{-0.5em}
\begin{tabular}{c c}
\resizebox {0.4\linewidth} {!} {
\diffValuesSubmodularCost{DIFF-C/objective/p2p-obj-diff-c.csv}{}
}
&
\resizebox {0.4\linewidth} {!} {
\diffErrorsSubmodularCost{DIFF-C/error/p2p-error-diff-c.csv}{}
}
\end{tabular}
\vspace{-1em}
\caption{\DiffC, p2p-Gnutella31; a) Budget vs Difference on \loga, \lineara and supSub; b) Budget vs Additive error on \loga, \lineara}
\label{fig:nethept-diff}
\vspace{-1em}
\end{figure}

% \textbf{Submodular Cost Function}
We use Basic Greedy of \SCSKC (Algorithm \ref{algo:algorithm1}) as a subroutine of \loga and \lineara. 

\textbf{Baseline Algorithm.\ } We compare our methods  against the supSub method proposed by ~\cite{Iyer:CORR12}. 
This replaces the submodular function $g$ with a surrogate modular function $g'$ and attempts to maximize $f-g'$. In addition, this method iteratively updates the surrogate modular function $g'$ the seed set until convergence.  
The work of~\cite{jin2021unconstrained} presents the best known  algorithm (called ROI-Greedy)  to maximize  
$f-g'$, when $f$ is submodular and $g'$ is modular. 
In our  implementation  of supsub, we use this algorithm.
 We vary the cardinality constraint $k$ from $10$ to $100$ to compare our \loga and \lineara with supSub.


\textbf{Results Analyses.\ } As we see in Fig. ~\ref{fig:nethept-diff} to \ref{fig:dblp-diff},  Algorithm \loga and \lineara perform better than the supSub method. Interestingly, we observe that \loga and \lineara produced similar results on NetHept, p2p-Gnutella31, Bitcoin and DBLP. The plots of the Bitcoin network are presented in supplementary materials. Based on this observation, it is sufficient to use Algorithm \loga when the cost function is submodular, as it is fast and only sacrifices a small amount of objective value.  While supSub performed well on Wikipedia, it required more time to converge on the Bitcoin network. Overall, there is still a substantial performance gap between our \lineara/\loga and supSub. Details of the timing results are presented in
supplementary materials. When we examine the additive errors, we find the same pattern as for \SCSKC. For low average degree graphs, the average (over all choices of $k$) additive errors are small ($8\%, 4\%, 6\% $ for NetHept, P2P and DBLP) and larger for graphs denser graphs ($29\%, 43\%, 13\%$ for Wiki, Facebook,  and Bitcoin). This implies that for the Nethept graph, the (average) quality of the solution produced is at least $0.55OPT$ whereas for the Wiki graph, the (average) quality of the solution is at least $0.34OPT$. 





% \begin{figure}[H]
% \vspace{-0.5em}
% \begin{tabular}{c c}
% \resizebox {0.4\linewidth} {!} {
% \diffValuesSubmodularCost{DIFF-C/objective/facebook-obj-diff-c.csv}{}
% }
% &
% \resizebox {0.4\linewidth} {!} {
% \diffErrorsSubmodularCost{DIFF-C/error/facebook-error-diff-c.csv}{}
% }
% \end{tabular}
% \caption{Facebook a) Budget vs Difference on $\mathcal{B}$-Exponential, $\mathcal{B}$-Linear and supSub; b) Budget vs Additive error on $\mathcal{B}$-Exponential and $\mathcal{B}$-Linear}
% \label{fig:facebook-diff}
% \vspace{-0.5em}
% \end{figure}


% \begin{figure}[H]
% \vspace{-0.5em}
% \begin{tabular}{c c}
% \resizebox {0.4\linewidth} {!} {
% \diffValuesSubmodularCost{DIFF-C/objective/bitcoin-obj-diff-c.csv}{}
% }
% &
% \resizebox {0.4\linewidth} {!} {
% \diffErrorsSubmodularCost{DIFF-C/error/bitcoin-error-diff-c.csv}{}
% }
% \end{tabular}
% \caption{Bitcoin a) Budget vs Difference on $\mathcal{B}$-Exponential, $\mathcal{B}$-Linear and supSub; b) Budget vs Additive error on $\mathcal{B}$-Exponential and $\mathcal{B}$-Linear}
% \label{fig:bitcoin-diff}
% \vspace{-0.5em}
% \end{figure}



\begin{figure}[h]
\vspace{-0.5em}
\begin{tabular}{c c}
\resizebox {0.4\linewidth} {!} {
\diffValuesSubmodularCost{DIFF-C/objective/wiki-obj-diff-c.csv}{}
}
&
\resizebox {0.4\linewidth} {!} {
\diffErrorsSubmodularCost{DIFF-C/error/wiki-error-diff-c.csv}{}
}
\end{tabular}
\vspace{-1em}
\caption{\DiffC, Wikipedia; a) Budget vs Difference on \loga, \lineara and supSub; b) Budget vs Additive error on \loga and \lineara}
\label{fig:wiki-diff}
\vspace{-0.5em}
\end{figure}


\begin{figure}[h]
\vspace{-0.5em}
\begin{tabular}{c c}
\resizebox {0.4\linewidth} {!} {
\diffValuesSubmodularCost{DIFF-C/objective/dblp-obj-diff-c.csv}{}
}
&
\resizebox {0.4\linewidth} {!} {
\diffErrorsSubmodularCost{DIFF-C/error/dblp-error-diff-c.csv}{}
}
\end{tabular}
\vspace{-1em}
\caption{\DiffC, DBLP; a) Budget vs Difference on \loga, \lineara and supSub; b) Budget vs Additive error on \loga and \lineara}
\label{fig:dblp-diff}
\vspace{-0.5em}
\end{figure}


\section{Conclusions}
In this work, for \SCSKC and \DiffC, we designed algorithms, and established multiplicative-additive approximation guarantees on the quality of the solutions produced while ensuring that the multiplicative factor and the additive error can be computed efficiently.
An interesting research direction is to extend this methodology to other submodular optimization problems.

% \section*{Acknowledgements}
\begin{acknowledgements}
The work was supported in part by the NSF grants 1934884 and 2130536.
\end{acknowledgements}



% \input{hardnessApproximation}
% \input{greedyAdditiveError}

% \input{scskcVsDiffC}

% \input{experiments}
% \input{conclusions}
% \newpage
%\input{related}
%\input{broader}
% \bibliographystyle{plain}
\bibliography{padmanabhan_530}

% \input{appendixProofs}
%\newpage
%\input{kddAppendix}

% References
% \bibliography{uai2023-template}
\end{document}
