 % \def\isarxiv{1} %%% for icml submission version, we comment this line

\ifdefined\isarxiv
\documentclass[11pt]{article}

\usepackage[numbers]{natbib}

\else
\documentclass[accepted]{uai2025} % for initial submission
%\documentclass[accepted]{uai2025} % after acceptance, for a revised version; 
% also before submission to see how the non-anonymous paper would look like 
                        
%% There is a class option to choose the math font
% \documentclass[mathfont=ptmx]{uai2025} % ptmx math instead of Computer
                                         % Modern (has noticeable issues)
% \documentclass[mathfont=newtx]{uai2025} % newtx fonts (improves upon
                                          % ptmx; less tested, no support)
% NOTE: Only keep *one* line above as appropriate, as it will be replaced
%       automatically for papers to be published. Do not make any other
%       change above this note for an accepted version.

%% Choose your variant of English; be consistent
\usepackage[american]{babel}
% \usepackage[british]{babel}

%% Some suggested packages, as needed:
\usepackage{natbib} % has a nice set of citation styles and commands
\bibliographystyle{plainnat}
    \renewcommand{\bibsection}{\subsubsection*{References}}
\usepackage{mathtools} % amsmath with fixes and additions
% \usepackage{siunitx} % for proper typesetting of numbers and units
\usepackage{booktabs} % commands to create good-looking tables
\usepackage{tikz} % nice language for creating drawings and diagrams

%% Provided macros
% \smaller: Because the class footnote size is essentially LaTeX's \small,
%           redefining \footnotesize, we provide the original \footnotesize
%           using this macro.
%           (Use only sparingly, e.g., in drawings, as it is quite small.)

%% Self-defined macros
\newcommand{\swap}[3][-]{#3#1#2} % just an example

\title{A Fast Optimization View: Reformulating Single Layer Attention in LLM Based on Tensor and SVM Trick, and Solving It in Matrix Multiplication Time}

% The standard author block has changed for UAI 2025 to provide
% more space for long author lists and allow for complex affiliations
%
% All author information is authomatically removed by the class for the
% anonymous submission version of your paper, so you can already add your
% information below.
%


% Add authors
\author[1]{Yeqi Gao}
\author[2,*]{Zhao Song}
\author[3,$\dagger$]{Weixin Wang}
\author[4,$\ddagger$]{Junze Yin}

% Add affiliations after the authors
\affil[1]{%
    University of Washington, \textsuperscript{2}University of California, Berkeley
}
\affil[3]{%
    Johns Hopkins University, \textsuperscript{4}Boston University
} 
\affil[*]{%
    \texttt{magic.linuxkde@gmail.com}, \textsuperscript{$\dagger$}\texttt{weixinw1@uci.edu},\textsuperscript{$\ddagger$}\texttt{junze@bu.edu}
}
\fi


\usepackage{amsmath}
\usepackage{amsthm}
\usepackage{amssymb}
\usepackage{algorithm}
\usepackage{subfig}
\usepackage{algpseudocode}
\usepackage{graphicx}
\usepackage{grffile}
\usepackage{wrapfig,epsfig}
\usepackage{url}
\usepackage{xcolor}
\usepackage{epstopdf}


\usepackage{bbm}
\usepackage{dsfont}

 %%% print refs in table of contents
%\displaybreak
\allowdisplaybreaks

%\usepackage[lmargin=1in,rmargin=1in,tmargin=0.8in,bmargin=0.8in]{geometry}

\ifdefined\isarxiv

\let\C\relax
\usepackage{tikz}
\usepackage{hyperref}  %%% arxiv don't allow this.
%\hypersetup{colorlinks=true,citecolor=blue,linkcolor=blue} %%% Zhao : maybe we should comment this in submission.
\usetikzlibrary{arrows}
\usepackage[margin=1in]{geometry}

\else

\usepackage{microtype}
\usepackage{hyperref}
\definecolor{mydarkblue}{rgb}{0,0.08,0.45}
%\hypersetup{colorlinks=true, citecolor=mydarkblue,linkcolor=mydarkblue}
%\usepackage[capitalize,noabbrev]{cleveref}
%\usepackage{colortbl}

\fi
%\linespread{1}
%\newcommand{\QED}{\hfill$\qed$}
\graphicspath{{./figs/}}


\newtheorem{theorem}{Theorem}[section]
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{definition}[theorem]{Definition}
\newtheorem{notation}[theorem]{Notation}
%\newtheorem{proof}[theorem]{Proof}
\newtheorem{proposition}[theorem]{Proposition}
\newtheorem{corollary}[theorem]{Corollary}
\newtheorem{conjecture}[theorem]{Conjecture}
\newtheorem{assumption}[theorem]{Assumption}
\newtheorem{observation}[theorem]{Observation}
\newtheorem{fact}[theorem]{Fact}
\newtheorem{remark}[theorem]{Remark}
\newtheorem{claim}[theorem]{Claim}
\newtheorem{example}[theorem]{Example}
\newtheorem{problem}[theorem]{Problem}
\newtheorem{open}[theorem]{Open Problem}
\newtheorem{property}[theorem]{Property}
\newtheorem{hypothesis}[theorem]{Hypothesis}

\newcommand{\wh}{\widehat}
\newcommand{\wt}{\widetilde}
\newcommand{\ov}{\overline}
\newcommand{\N}{\mathcal{N}}
\newcommand{\R}{\mathbb{R}}
\newcommand{\RHS}{\mathrm{RHS}}
\newcommand{\LHS}{\mathrm{LHS}}
\renewcommand{\d}{\mathrm{d}}
\renewcommand{\i}{\mathbf{i}}
\renewcommand{\tilde}{\wt}
\renewcommand{\hat}{\wh}
\newcommand{\Tmat}{{\cal T}_{\mathrm{mat}}}

\DeclareMathOperator*{\E}{{\mathbb{E}}}
\DeclareMathOperator*{\var}{\mathrm{Var}}
\DeclareMathOperator*{\Z}{\mathbb{Z}}
\DeclareMathOperator*{\C}{\mathbb{C}}
\DeclareMathOperator*{\D}{\mathcal{D}}
\DeclareMathOperator*{\median}{median}
\DeclareMathOperator*{\mean}{mean}
\DeclareMathOperator{\OPT}{OPT}
\DeclareMathOperator{\supp}{supp}
\DeclareMathOperator{\poly}{poly}

\DeclareMathOperator{\nnz}{nnz}
\DeclareMathOperator{\sparsity}{sparsity}
\DeclareMathOperator{\rank}{rank}
\DeclareMathOperator{\diag}{diag}
\DeclareMathOperator{\dist}{dist}
\DeclareMathOperator{\cost}{cost}
\DeclareMathOperator{\vect}{vec}
\DeclareMathOperator{\tr}{tr}
\DeclareMathOperator{\dis}{dis}
\DeclareMathOperator{\cts}{cts}
\DeclareMathOperator{\reg}{reg}
\DeclareMathOperator{\A}{\mathsf{A}}


\makeatletter
\newcommand*{\RN}[1]{\expandafter\@slowromancap\romannumeral #1@}
\makeatother
% \newcommand{\Zhao}[1]{{\color{red}[Zhao: #1]}}  
%%%Change to intern name


\usepackage{lineno}
\def\linenumberfont{\normalfont\small}





\begin{document}

\ifdefined\isarxiv

\date{}


\title{A Fast Optimization View: Reformulating Single Layer Attention in LLM Based on Tensor and SVM Trick, and Solving It in Matrix Multiplication Time}

\author{
}




\else

% \title{Intern Project} 
\maketitle 

\fi





\ifdefined\isarxiv
\begin{titlepage}
  \maketitle
  \begin{abstract}
\input{abstract}

  \end{abstract}
  \thispagestyle{empty}
\end{titlepage}

{%\hypersetup{linkcolor=black}
%\tableofcontents
}
\newpage

\else

\begin{abstract}
\input{abstract}
\end{abstract}

\fi


\input{intro} %%% Section 1. Introduction

\input{related_work}

\input{tech}

\input{discussion}

\input{conclusion}

\ifdefined\isarxiv
%\section*{Acknowledgments}
% \bibliographystyle{alpha}
% \bibliography{ref}
\else
\newpage
\subsubsection*{Acknowledgements}
The authors would like to thank the anonymous reviewer of UAI 2025 for their highly insightful suggestions.

\bibliography{ref}
%\bibliographystyle{icml2022}
% \bibliographystyle{alpha}

\fi

\newpage
\onecolumn

\title{A Fast Optimization View: Reformulating Single Layer Attention in LLM Based on Tensor and SVM Trick, and Solving It in Matrix Multiplication Time\\(Supplementary Material)}
\maketitle

\appendix

%\newpage
\input{preli}

%\newpage
\input{gradient}

%\newpage
\input{hessian}

%\newpage
\input{hessian_X}

%\newpage 
\input{lipschitz_x}

%\newpage
\input{psd_x}

%\newpage
\input{hessian_Y}

%\newpage
\input{hessian_XY}

%\newpage
\input{lipschitz_xy}

%\newpage
\input{psd_xy}

%\newpage
\input{tensorsketch}

%\newpage
\input{newton}

\ifdefined\isarxiv
%\section*{Acknowledgments}
\bibliographystyle{alpha}
\bibliography{ref}
\else
% \bibliography{ref}
%\bibliographystyle{icml2022}
% \bibliographystyle{alpha}

\fi









%%%% Cut-line between first 10 pages and appendix







%%% some writing rules

%% Writing rule for creating tags.
%% Tags :
%% Theorem    \ref{thm:bla_bla}
%% Lemma      \ref{lem:bla_bla}
%% Claim      \ref{cla:bla_bla}
%% Corollary  \ref{cor:bla_bla}
%% Fact       \ref{fac:bla_bla}
%% Definition \ref{def:bla_bla}
%% Section    \ref{sec:bla_bla}
%% Subsection \ref{sub:bla_bla}
%% Equation   \ref{eq:bla_bla}



\end{document}



%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
