






\begin{table*}[t]
\footnotesize
\caption{Benchmarking comparison on C3VD and SimCol.\  * denotes the methods that solely rely on supervised learning, while ** indicates the model is trained with additional data with self-supervised learning (SSL). 1-4 represents the challenge $1^{st}$ to $4^{th}$ place. The \textbf{best} is highlighted. EndoStreamDepth outperforms the compared methods, except for the absolute relative error in C3VD.}
\label{benchmark_table}
\centering
\begin{tabular}{lccc | lccc}
\toprule
& \multicolumn{3}{c|}{C3VD split 2} & & \multicolumn{3}{c}{SimCol III} \\
\cmidrule(lr){2-4} \cmidrule(lr){6-8}
Methods & AbsRel$\downarrow$ & SqRel$\downarrow$ & RMSE$\downarrow$
& Methods & L1$\downarrow$ & RMSE$\downarrow$ & AbsRel$\downarrow$ \\
\midrule
LightDepth & 0.078          & 1.81          &  6.55

& CVML$^1$  &  0.099            & 0.141            & 0.025 \\

NormDepth+ $^{**}$       & 0.155          & 1.53          & 7.51

& MIVA$^2$  & 0.107             & 0.163             & 0.025 \\

PPSNet-Teacher$^{*}$         & 0.053          & 0.15         & 2.15

& EndoAI$^3$  & 0.111             & 0.168             & 0.028 \\

PPSNet-Student$^{**}$           & \textbf{0.049}          & 0.14          & 2.06

& IntuitiveIL$^4$  & 0.167             & 0.233             & 0.047 \\

\hline
Ours-frame$^{*}$        & 0.077          & 0.27          & 1.74

& Ours-frame  &     0.099         & 0.140           & 0.028 \\

Ours-video$^{*}$            & 0.052 & \textbf{0.11} & \textbf{1.72}
& Ours-video          & \textbf{0.087}    & \textbf{0.126}    & \textbf{0.023} \\
\bottomrule
\end{tabular}
\end{table*}


% calibration: 
% 0.0765238615119017	0.271384403837065	1.73512285934615



% non-cali:
% 0.0905062739998546	0.320547714490517	1.84028871107282



% video:

% cali: 
% 0.0520        0.1103        1.7156


% non-cali: 
% 0.0657178741665573	0.154828725635394	1.79771348248227



% simcol:
% image:
% 0.0139918663644271	0.0742857149134299	0.0393415479548473
% 0.0111874283154752	0.0734810130428871	0.041094000386345
% 0.027980957398963	0.140394103892075	0.0990352414323067


% video:

% 0.020187454950225	0.0824733238460018	0.054063691570454

% 0.0168168839897015	0.0888289121681533	0.0612199726327667

% 0.0234709419201952	0.125922154835864	0.0866599464561217