%%%%%% Macros
% Macros
\newcommand{\needsCite}{\textcolor{magenta}{(needs cite)}}
\definecolor{fillerTextColor}{HTML}{000080}
\newcommand{\fillerPar}{\textcolor{fillerTextColor}{\blindtext}}
\newcommand{\ch}[1]{\textcolor{magenta}{ch: #1}}
\newcommand{\ts}[1]{\textcolor{green}{ts: #1}}
\newcommand{\jn}[1]{\textcolor{blue}{jn: #1}}
\newcommand{\promisedApp}[1]{\textcolor{red}{promised appendix: #1}}
\newcommand{\llmNames}{\textcolor{magenta}{GPT-3.5 Turbo, GPT-4, and Llama 2}}
\newcommand{\meExpTwoQueryN}{\textcolor{magenta}{600}}
\DeclareMathOperator*{\argmax}{arg\,max}
\DeclareMathOperator*{\argmin}{arg\,min}

% Experiment-related params
\newcommand{\motivExpk}{$3$ }
\newcommand{\motivExpNumParamSlots}{$4$ }
\newcommand{\uncertainPrefOrdering}{\scriptstyle{\stackrel{?}{\underset{\prec}{\succ}}}}
\definecolor{fullQueryNode}{HTML}{FFE680} % darker pastel yellow 
\definecolor{partialQueryNode}{HTML}{FFFFCC} % lighter pastel yellow \definecolor{llmNode}{HTML}{B3E6FF} 
\definecolor{llmNode}{HTML}{B3E6FF} % pastel blue
\definecolor{fullQResponseNode}{HTML}{CCFFD9} % darker pastel green 
\definecolor{partialQResponseNode}{HTML}{CCE6B3} % lighter pastel green 
\definecolor{judgeNode}{HTML}{F2E6FF} % pastel purple (llm (blue) \cup people (red))
\definecolor{translationFunc}{HTML}{F5F5F5} % light gray
\definecolor{brainColor}{HTML}{daa2a5}

% Naming-related macros
\newcommand{\Copilot}{LLM-based Chatbot}
\newcommand{\Copilots}{LLM-based Chatbots}
\newcommand{\copilot}{LLM-based chatbot}
\newcommand{\copilots}{LLM-based chatbots}
\newcommand{\intent}{\theta}
\newcommand{\intentdomain}{\Theta}
\newcommand{\query}{q}
\newcommand{\querydomain}{\mathcal{Q}}
\newcommand{\action}{a}
\newcommand{\actiondomain}{\mathcal{A}}
\newcommand{\observation}{o}
\newcommand{\observationdomain}{\mathcal{O}}
\newcommand{\prompt}{\mathbf{p}}
\newcommand{\utility}{\mathcal{U}}
\newcommand{\policy}{\pi}
\newcommand{\data}{\mathcal{D}}
\newcommand{\ActionTypes}{Response Strategies}
\newcommand{\ActionType}{Response Strategy}
\newcommand{\actiontypes}{response strategies}
\newcommand{\actiontype}{response strategy}
\newcommand{\ActionTypeSet}{\mathcal{T}}
\newcommand{\sampledactiontype}{\tau}
\newcommand{\rs}{\texttt{RecSet}}
\newcommand{\conversationHistory}{\left[\action, \observation\right]^\ast}
\newcommand{\conversationHistoryDomain}{\left[\actiondomain, \observationdomain\right]^\ast}
\newcommand{\defaultPolicy}{\policy^{\text{RLHF}}}
\newcommand{\recalibratedPolicy}{\beta}

%%%%%%%%%%%%%
%% NOTATIONS
%%%%%%%%%%%%%
% User query, Q
% Chatbot answer, A
% Chatbot response policy, \pi: \pi(Q) = A
% Subsequent turns of the conversation: A^u (user-turn), A^c (chatbot-turn)
% Conversation Conv = Q, A, ... { A^u, A^c ... }
% User utility, U | Conv
% User cost, C | Conv
% Answer types or meta-strategies: \mathbb{A}
% Strategy 1: RESPOND (using LLM prior)
% Strategy 2: 
% Meta-policy: \Pi(Q) = \mathbb{A}

% Define JSON formatting
\lstdefinelanguage{json}{
    basicstyle=\footnotesize\ttfamily,
    numbers=left,
    numberstyle=\tiny,
    stepnumber=1,
    numbersep=8pt,
    showstringspaces=false,
    breaklines=true,
    frame=lines,
    backgroundcolor=\color{lightgray},
    literate=
     *{0}{{{\color{blue}0}}}{1}
      {1}{{{\color{blue}1}}}{1}
      {2}{{{\color{blue}2}}}{1}
      {3}{{{\color{blue}3}}}{1}
      {4}{{{\color{blue}4}}}{1}
      {5}{{{\color{blue}5}}}{1}
      {6}{{{\color{blue}6}}}{1}
      {7}{{{\color{blue}7}}}{1}
      {8}{{{\color{blue}8}}}{1}
      {9}{{{\color{blue}9}}}{1}
      {:}{{{\color{red}{:}}}}{1}
      {,}{{{\color{red}{,}}}}{1}
      {\{}{{{\color{red}{\{}}}}{1}
      {\}}{{{\color{red}{\}}}}}{1}
      {[}{{{\color{red}{[}}}}{1}
      {]}{{{\color{red}{]}}}}{1},
}
