% This is samplepaper.tex, a sample chapter demonstrating the
% LLNCS macro package for Springer Computer Science proceedings;
% Version 2.21 of 2022/01/12
%
\documentclass[runningheads]{llncs}
%
\usepackage[T1]{fontenc}
% T1 fonts will be used to generate the final print and online PDFs, 
% so please use T1 fonts in your manuscript whenever possible.
% Other font encondings may result in incorrect characters.
%
\usepackage{orcidlink}
\usepackage{multirow}
\usepackage{graphicx}
\usepackage{float}
\usepackage{amsmath}
\usepackage[numbers,sort&compress]{natbib}
\usepackage[hidelinks]{hyperref}
% Used for displaying a sample figure. If possible, figure files    should
% be included in EPS format.
%
% If you use the hyperref package, please uncomment the following two lines
% to display URLs in blue roman font according to Springer's eBook style:
%\usepackage{color}
%\renewcommand\UrlFont{\color{blue}\rmfamily}
\usepackage[hidelinks]{hyperref}          % no colored borders, no boxes
\hypersetup{pdfborder={0 0 0}}            % belt-and-suspenders: suppress any PDF border
\urlstyle{same}  


%
\begin{document}
%
\title{Semi-supervised CBCT–IOS Registration Using PointNetLK}
%
\titlerunning{Semi-supervised CBCT--IOS Registration}
% If the paper title is too long for the running head, you can set
% an abbreviated paper title here
%
\author{%
Ajo Babu George\inst{1}\orcidlink{0009-0005-3026-0959} \and
Gadha Lekshmi P\inst{2}\orcidlink{0009-0006-4741-7286} \and
Sadhvik Bathini\inst{3}\orcidlink{0009-0007-1011-3761} \and
Govind A\inst{3}\orcidlink{0009-0006-4741-7286}
}

\authorrunning{George et al.}

\institute{%
DiceMed, India \and
Indira Gandhi National Open University , India \and
Indian Institute of Technology Kharagpur, West Bengal, India \\
\email{drajo\_george@DiceMed.in}
}
%
\maketitle              % typeset the header of the contribution
%
\begin{abstract}
Accurate alignment of intraoral scans (IOS) with cone-beam computed tomography (CBCT) is essential for integrated dental diagnostics and surgical planning. A semi-supervised registration framework was developed, combining PointNetLK for feature-based initialization with iterative closest point (ICP) refinement. Pseudo-labels were incorporated to enhance supervision while mitigating the limited availability of annotated datasets. Chamfer distance and clinical registration metrics were used to evaluate alignment quality. Across the test cohort, the approach yielded a mean translation error of 41.67 mm and a mean rotation error of 33.96°, highlighting the challenge of partial-arch fusion. Despite substantial errors relative to clinical requirements, the framework demonstrates feasibility of semi-supervised deep learning for IOS–CBCT registration and establishes a foundation for future refinement toward clinically viable integration.

\keywords{Point cloud registration  \and CBCT \and IOS \and PointNetLK \and Dental Imaging \and Semi-supervision.}
\end{abstract}
\section{Introduction}
\vspace{-5pt}
\subsection{Background and Challenge Overview}
Rigid registration of 3D dental models obtained from CBCT scans and intraoral optical scans (IOS—STL format) is foundational for precise diagnosis, treatment planning, and surgical simulation in maxillofacial radiology. However, differences in modality (volumetric vs. surface), anatomical asymmetry, and noise make alignment challenging. The MICCAI STSR 2025 challenge highlights these difficulties, bringing together state-of-the-art medical imaging and machine learning techniques targeting accurate multi-modal dental registrations.
CBCT beam hardening and IOS occlusal shadowing degrade correspondence search. Recent advances in oriented bounding-box normalization and curvature-aware features partially mitigate such variability \cite{alsheghri2024robust, Zhang2019MSLesion}. Semi-supervised and self-supervised strategies have shown promise in stabilizing geometric representations under limited annotation \cite{Wang2024STSM2, Wang2025, jing2024usct, liu2022hierarchical}.
\vspace{-7pt}
\subsection{Related Work}
\vspace{-5pt}
Prior research has explored classical methods like Iterative Closest Point (ICP) and multi-point registration, which struggle with large initial misalignments in CBCT-IOS scenarios. Deep learning approaches, particularly PointNet and PointNet++, have demonstrated superior feature extraction for irregular point clouds. The Lucas-Kanade algorithm, adapted for 3D point cloud registration via PointNetLK, enables global feature-based transformations with robust performance in noisy conditions. Semi-supervised approaches are emerging for better clinical generalizability \cite{Wang2024SemiSupervised, Wang2025, Wang2024STSM2}. For segmentation conditioning in CBCT--IOS registration, ArchSeg achieved Dice scores of 
$0.936 \pm 0.008$ (mandible) and $0.948 \pm 0.007$ (maxilla) using Point Transformer V2 with 
curvature cues and graph-cut refinements \cite{alsheghri2024robust}. Multi-phase semi-supervised 
training with entropy-confidence-aware pseudo-label refinement has improved generalization 
\cite{Wang2024STSM2,Wang2025}. In CBCT, uncertainty-regularized symmetric consistency learning 
(USCT) outperforms semi-supervised baselines \cite{jing2024usct}, while hierarchical 
self-supervised contrastive pretraining (STSNet) enhances IOS mesh processing 
\cite{liu2022hierarchical}. Teacher--student SAM adaptations leverage LoRA fine-tuning for 
improved performance \cite{gan2025segmentation}.
\vspace{-7pt}
\subsection{Motivation and Contributions}
\vspace{-5pt}
This work presents a unified PointNetLK pipeline optimized for noisy dental CBCT and IOS data. Our contributions include: (i) end-to-end alignment leveraging both unlabeled and labeled data for feature generalization; (ii) ICP post-processing for enhanced registration refinement; and (iii) automated point cloud normalization and anatomical variability management.
Methodological Rationale. Segmentation reliability directly conditions rigid alignment stability under metal artifacts, partial arches, and age-related morphology \cite{alsheghri2024robust, jing2024usct,Wang2025}. PointNetLK's permutation-invariant global features enable robust initialization under occlusion and partial overlap conditions typical of IOS–CBCT pairs, while ICP refinement subsequently optimizes local geometric consistency. Our semi-supervised pipeline comprises three components:
\begin{itemize}
\item \textbf{Semi-supervised IOS--CBCT registration:} A unified framework combines PointNetLK-based initialization with ICP refinement, enabling alignment under limited annotated data conditions.
\item \textbf{Pseudo-label enhanced supervision:} Pseudo-labels generated from a teacher network are integrated to expand training signals and mitigate annotation scarcity.
\item \textbf{Clinically relevant evaluation and baseline:} Registration accuracy is quantified using Chamfer distance, translation and rotation errors to establish a reproducible benchmark; experimental results highlight partial-arch fusion challenges and provide a foundation for future clinical integration.
\end{itemize}

\vspace{-20pt}
\section{Resources}
\vspace{-5pt}
\subsection{Dataset}
\vspace{-5pt}
The dataset originates from the MICCAI STSR 2025 Challenge Task 2, providing paired CBCT and intraoral scan (IOS) data for cross-modal dental registration, emphasizing alignment of IOS-derived crown structures with CBCT-derived root anatomies.
The dataset comprises three subsets:
\begin{itemize}
\item Training Set (Labeled): 30 CBCT–IOS pairs with ground truth rigid transformations.
\item Training Set (Unlabeled): 300 CBCT–IOS pairs without annotation for semi-supervised strategies.
\item Validation Set: 50 CBCT–IOS pairs with hidden ground truth for evaluation.
\end{itemize}
The limited availability of richly annotated 3D dental datasets mirrors broader trends in dental imaging, where most publicly available resources—particularly in pediatric populations—focus on 2D panoramic radiographs rather than volumetric CBCT or cross-modal data \cite{zhang2023children}. This disparity further motivates semi-supervised learning strategies for 3D dental applications.
\vspace{-15pt}
\subsection{Models}
\vspace{-5pt}
The proposed model, illustrated in Figure~\ref{model}, implements a PointNetLK-based registration pipeline tailored for CBCT–IOS point cloud alignment.
\begin{figure}[H]
    \centering
\includegraphics[width=1.1\linewidth]{figure (7).pdf}
    \caption{PointNetLK registration pipeline: PointNet extracts features, the feature difference feeds the Update Network to predict 6-DOF $\Delta T$, applied iteratively to the source to produce the final 4$\times$4 transform.}
    \label{model}
\end{figure}
\vspace{-20pt}
The green blocks correspond to dual PointNet encoders, which independently process the source (IOS crown) and target (CBCT root) point clouds. Each encoder is composed of three successive pointwise convolutional layers: the first maps 3D coordinates to 64-dimensional features, the second expands to 128 dimensions, and the final layer produces a 1024-dimensional global descriptor. Batch normalization and ReLU activations follow each convolution, while global max pooling ensures permutation-invariant feature vectors, summarizing the overall geometric structure of the point clouds.
The magenta ball represents the feature difference $f_t - f_s$ between the target and the transformed source, which serves as the input to the Update Network (orange box). This multi-layer perceptron predicts incremental 6-DOF transformations, applied to the source in the blue Apply Transform box. The red arrow looping back from Apply Transform to the feature difference visually indicates the iterative Lucas–Kanade refinement, repeated multiple times to improve alignment.
Finally, the purple box outputs the cumulative 4×4 transformation matrix, which maps the IOS crown onto the CBCT root. This modular design allows for task-specific heads or post-hoc refinement steps while maintaining efficiency and stability across variable point cloud sizes.
\vspace{-15pt}
\section{Methodology}
\vspace{-5pt}
The overall data flow, preprocessing strategy, training protocol, and evaluation metrics are summarized in Fig.~\ref{fig:exp_setup}.
\begin{figure}[H]
    \centering
    \includegraphics[width=0.9\linewidth]{diagram-export-9-22-2025-4_22_27-PM.png}
    \caption{Overview of the methodological setup, including dataset splits, preprocessing of CBCT and IOS data, two-stage training protocol, and evaluation metrics.}
    \label{fig:exp_setup}
\end{figure}
\vspace{-20pt}
Our approach adapts the established PointNetLK framework \cite{aoki2019pointnetlk} for dental-specific CBCT-IOS registration through three key contributions: (i) a two-stage semi-supervised training protocol leveraging pseudo-labels, (ii) dental-specific preprocessing tailored for multimodal point clouds, and (iii) hybrid neural-classical refinement combining learned features with ICP optimization \cite{besl1992method}.\\
\textbf{Two-Stage Semi-Supervised Training Protocol}\\
Stage 1 employs supervised learning on labeled pairs to establish baseline registration capability. Stage 2 generates pseudo-labels on unlabeled data using the trained model, applies confidence-based filtering, and performs semi-supervised fine-tuning on the combined dataset. This approach addresses the scarcity of annotated CBCT-IOS pairs in clinical settings.\\
\textbf{Dental-Specific Preprocessing}\\
Point cloud preprocessing incorporates domain knowledge for dental registration. CBCT volumes undergo threshold-based segmentation at 800 HU to isolate dental structures, followed by coordinate normalization to [-1,1] range. STL meshes are processed through vertex extraction and subsampling. Ground truth transformations are consistently mapped to normalized coordinate space to ensure training stability.\\
\textbf{Hybrid Neural-Classical Refinement}\\
The framework combines the global feature learning of PointNetLK \cite{aoki2019pointnetlk} with classical ICP refinement \cite{besl1992method}. PointNet encoders \cite{qi2017pointnet} extract permutation-invariant features, PointNetLK performs iterative pose estimation through differentiable Lucas-Kanade optimization \cite{lucas1981iterative}, and multi-stage ICP provides final geometric consistency. This hybrid approach balances robustness to initialization with precise local alignment.
\vspace{-10pt}
\section{Experimental Setup}
\vspace{-5pt}
All experiments were conducted on the MICCAI STSR 2025 Challenge Task 2 dataset using the three-subset division for supervised training, pseudo-label generation, and validation.
\vspace{-10pt}
\subsection{Data Preprocessing}
\vspace{-5pt}
CBCT volumes were thresholded at 800 HU to isolate dental structures and converted to world coordinates using the NIfTI affine matrix. STL meshes were processed by extracting vertices directly from triangle meshes. Point clouds were subsampled to 50,000 points maximum for memory efficiency. During training, 1024 points were randomly sampled from both CBCT-derived point clouds and STL meshes.
Normalization was applied by computing global center and scale from combined CBCT and STL point clouds, mapping coordinates to approximately [-1, 1] range. Ground truth transformation matrices were transformed to normalized coordinate space to ensure consistency between predicted and target transformations. Preprocessing was performed inline during training to maintain coordinate system integrity.

\textbf{Preprocessing Limitations.} The fixed 800 HU threshold may not generalize across CBCT scanners with varying grayscale characteristics or patients with metallic restorations. Future work should validate robustness across multi-vendor datasets and explore adaptive thresholding or learned segmentation networks to improve scanner independence.

\vspace{-10pt}
\subsection{Training Setup}
\textbf{Stage 1: Supervised Training}- The PointNet-based feature extraction combined with PointNetLK iterative alignment was trained using a combined loss function incorporating both Chamfer distance and direct transformation supervision:
\[
L_{\text{total}} = L_{\text{chamfer}} + L_{\text{transform}}
\]
where $L_{\text{transform}}$ includes both translation and rotation errors between predicted and ground truth transformation matrices in normalized coordinate space.\\
\textbf{Pseudo-Label Generation}- The trained Stage 1 model generated predictions for unlabeled cases. ICP post-processing refined initial neural network predictions. High-confidence pseudo-labels were selected by filtering predictions with Chamfer distance below the median of Stage 1 validation performance and excluding cases with extreme transformation magnitudes indicative of failed alignment. This filtering retained approximately 60\% of unlabeled cases for Stage 2 training.\\

\textbf{Stage 2: Semi-Supervised Training}The model was fine-tuned using the combined dataset of original labeled cases plus selected high-confidence pseudo-labeled cases from the unlabeled set.
\vspace{-15pt}
\subsection{Training Protocols}
\vspace{-25pt}
\begin{table*}[h]
\caption{Training protocols.}
\label{table:training}
\begin{center}
% \resizebox{0.47\textwidth}{!}{
\begin{tabular}{ll} 
\hline
Batch size                    & 4-8 \\
\hline 
Total epochs & 100 (Stage 1, early stopping), 50 (Stage 2 )\\
\hline
Optimizer          & Adam \\ 
\hline
Initial learning rate (lr)  & $5 \times 10^{-4}$ \\ 
\hline
Lr decay schedule & ReduceLROnPlateau (patience=10, factor=0.5) \\
\hline
Training time                                           & $\sim$4.2 hours (Stage 1), $\sim$ 12 hours \\  
\hline 
Loss function & Combined Chamfer + Transformation Loss \\     
\hline
Number of model parameters    & 0.81M \\ 
\hline
Number of flops & 2.37G \\ 
\hline
\end{tabular}
%}
\end{center}
\end{table*}
\vspace{-40pt}
\subsection{Evaluation Metrics}
\vspace{-5pt}
Performance assessment employed multiple metrics to comprehensively evaluate registration quality. Translation error measures the Euclidean distance between predicted and ground truth translation vectors in millimeters, while rotation error quantifies the angular difference between predicted and ground truth rotation matrices in degrees. Chamfer distance evaluates symmetric point-to-point distances between aligned point clouds, and Surface Dice Coefficient assesses overlap for registration quality. Additional point cloud similarity metrics including RMSE, NCC, and NMI provide supplementary performance indicators.
\vspace{-15pt}
\subsection{Post-Processing Pipeline}
\vspace{-5pt}
The post-processing pipeline consists of four sequential steps to refine neural network predictions. Initially, the neural network generates a 4×4 transformation matrix in normalized coordinate space. These predictions are then transformed back to original coordinate space through coordinate denormalization. Multi-stage ICP refinement follows, incorporating correspondence finding with statistical outlier rejection (distance-based trimming to suppress artifact-induced false correspondences), SVD-based optimal rotation estimation, and iterative refinement with convergence criteria. Finally, the refined transformation is applied to original point clouds to achieve final alignment.
\vspace{-5pt}
\section{Results and discussion}
\vspace{-10pt}
\subsection{Quantitative Results and Ablation Study}
\vspace{-5pt}
Table \ref{tab:results} presents registration performance and component contributions on the MICCAI STSR 2025 validation set.
\vspace{-10pt}
\begin{table}[H]
\caption{Registration performance and ablation study on MICCAI STSR 2025 validation set.}
\label{tab:results}
\centering
\begin{tabular}{lcccc}
\hline
Method Configuration & Translation (mm) & Rotation (°) & Chamfer & DSC \\
\hline
Stage 1 (Supervised only) & 47.23 & 39.84 & 2.05 & 0.798 \\
Stage 2 (Semi-supervised) & 43.91 & 36.72 & 1.92 & 0.824 \\
\textbf{Full Pipeline (Stage 2 + ICP)} & \textbf{41.67} & \textbf{33.96} & \textbf{1.83} & \textbf{0.846} \\
\hline
\end{tabular}
\end{table}
\vspace{-20pt}
The ablation study quantifies each component's contribution: semi-supervised training improved translation accuracy by 3.32 mm (47.23→43.91 mm) and rotation by 3.12° (39.84°→36.72°), while ICP refinement provided further gains of 2.24 mm and 2.76°, demonstrating the value of the hybrid approach.
\vspace{-15pt}
\subsection{Training Analysis}
\vspace{-5pt}
Figure \ref{fig:training} shows error curves for both stages. Stage 1 converged after 50 epochs, while Stage 2 demonstrated further improvement through pseudo-label utilization.
\begin{figure}[H]
\centering
% Insert your actual training loss plot here
\includegraphics[width=1\linewidth]{Blank diagram - Page 4 (8).png}
\caption{Stage~1 training (red, solid) and validation (red, dotted) curves, and Stage~2 training (blue, solid) and validation (blue, dotted) curves.}
\label{fig:training}
\end{figure}
\vspace{-15pt}
\subsection{Qualitative Results}
Figure \ref{fig:visual_result} shows a representative registration result demonstrating successful alignment between IOS crown surfaces.The semi-supervised training effectively utilizes unlabeled data, addressing limited annotated CBCT-IOS pairs in clinical practice. 
\begin{figure}[H]
\centering
% Insert your actual visual result here
\includegraphics[width=1\linewidth]{Screenshot 2025-09-23 155859.png}
\caption{Registration result: (left) initial misalignment, (right) aligned output.}
\label{fig:visual_result}
\end{figure}
However, the reported translation (41.67 mm) and rotation (33.96°) errors substantially exceed clinical tolerances for surgical planning. Analysis identifies three primary failure modes: (i) partial-arch cases with limited overlapping geometry lack sufficient alignment constraints and exhibit disproportionately high errors; (ii) CBCT metal artifacts introduce non-anatomical points that corrupt PointNet features and bias ICP correspondence; and (iii) large initial misalignments prevent convergence within the fixed iteration budget. Future refinement will prioritize scale-consistent normalization, keypoint-based coarse alignment for improved initialization, and artifact-aware feature weighting to suppress corrupted descriptors.
\vspace{-10pt}
\section{Conclusion and Future Work}
\vspace{-5pt}
This study demonstrates a semi-supervised PointNetLK framework for CBCT-IOS dental registration, combining pseudo-label-enhanced training with hybrid neural-classical refinement. The two-stage protocol effectively leverages unlabeled data, achieving mean translation errors of 41.67 mm and rotation errors of 33.96°. While these errors exceed clinical thresholds, the ablation study validates each component's contribution, establishing feasibility for semi-supervised deep learning in cross-modal dental registration.

Translation and rotation errors remain above clinical requirements primarily due to partial-arch coverage limitations and initialization sensitivity. The fixed 800 HU threshold constrains scanner generalization. Future work will explore uncertainty quantification for pseudo-label selection, transformer-based architectures for long-range context modeling \cite{vaswani2017attention}, multi-scale feature fusion \cite{ronneberger2015u}, and integration with foundation models \cite{kirillov2023segment}. Validation on larger multi-center datasets with diverse scanner protocols will be essential for clinical deployment.
% ---- Bibliography ----
%
% BibTeX users should specify bibliography style 'splncs04'.
% References will then be sorted and formatted in the correct style.
%
\bibliographystyle{splncs04}
\bibliography{ref}



\end{document}
