\begin{table*}[t]
\caption{Comparison with state-of-the-art on metric depth estimation on the C3VD dataset  with the first split (Sec.~\ref{C3VD dataset}). Distance-based metrics are in $\mathrm{mm}$. The \textbf{best} is
highlighted for each category.  The ablation study is formulated as incremental ablation (each row adds one component), the detailed settings can be viewed in Tab.~\ref{tab:ablation_settings}. Gray rows denote the methods for further public benchmarking. The detailed ablation setting can be viewed in Appendix.~\ref{blation setting}. Our model outperforms the compared baselines in every metric.} 
\label{main_table}
\footnotesize
\begin{center}
    \begin{tabular}{ l  | c |c |c |c |c |c |c}
    \toprule
    \multicolumn{1}{l}{Methods} &  \multicolumn{1}{c}{$\delta_1\uparrow$} & \multicolumn{1}{c}{AbsRel$\downarrow$} & \multicolumn{1}{c}{SqRel$\downarrow$} & \multicolumn{1}{c}{RMSE$\downarrow$} &  \multicolumn{1}{c}{RMSE log$\downarrow$} & \multicolumn{1}{c}{L1$\downarrow$} &  \multicolumn{1}{c}{F1$\uparrow$} \\
    
    
            
    \midrule
    DepthAnything v2 & 0.847  & 0.158  & 0.635
            & 3.497 & 0.169 & 2.503 & 0.089 \\
    Metric DAv2 * & 0.850  & 0.149  & 0.566
            & 3.538 & 0.166 & 2.492 & 0.095 \\
    EndoOmni & 0.836 & 0.154  & 0.610
            & 3.623 & 0.170 & 2.596 & 0.109 \\
    
    DINOv3 depth & 0.731 & 0.192  & 1.188
            & 5.457 & 0.194 & 3.955 & 0.070 \\
    
    FlashDepth ** & 0.730 & 0.188  & 1.046
            & 4.989 & 0.190 & 3.780 & 0.116 \\
            
    EndoStreamDepth (Ours) & \textbf{0.952} & \textbf{0.085}  & \textbf{0.246}
            & \textbf{2.739} & \textbf{0.107} &\textbf{1.780} & \textbf{0.143} \\
    
    % \bottomrule
    
    \midrule
    
    \multicolumn{8}{c}{Ablation study (single-frame depth network)} \\
    
    \midrule
    \rowcolor{lavender} 
    * + EST & 0.948  & 0.109  & 0.402
            & 3.081 & 0.122 & 1.928 & 0.114 \\
    
    
    \midrule
    \multicolumn{8}{c}{Ablation study (Video stream depth network)} \\
    \midrule
    
    ** with SiLog & 0.853  & 0.139  & 0.593
            & 3.774 & 0.151 & 2.695 & 0.112 \\

    + EST & 0.952  & 0.109  & 0.395
            & 3.023 & 0.122 & 1.872 & 0.134 \\
    \midrule
    
    + Metric loss & \textbf{0.954}  & 0.107  & 0.397
            & 3.078 & 0.121 & 1.871 & 0.132 \\
    
    
    + Edge loss & 0.953  & 0.105  & 0.387
            & 2.917 & 0.118 & 1.774 & 0.135 \\
  \midrule
    
    
    + Multi-level temp. & 0.952  & 0.106  & 0.379
            & 2.810 & 0.120 & \textbf{1.748} & 0.123 \\
    
    
    
    + Multi-scale sup. & \textbf{0.954}  & 0.086  & 0.254
            & 2.866 & \textbf{0.106} & 1.806 & 0.134 \\
    
    
    \rowcolor{lavender}      
    + Temporal reg. (Ours) & 0.952 & \textbf{0.085}  & \textbf{0.246}
            & \textbf{2.739} & 0.107 &1.780 & \textbf{0.143} \\
    
    \bottomrule
    
     
\end{tabular} 

\end{center}
\end{table*}









