\documentclass{midl} % Include author names

% The following packages will be automatically loaded:
% jmlr, amsmath, amssymb, natbib, graphicx, url, algorithm2e
% ifoddpage, relsize and probably more
% make sure they are installed with your latex distribution

\usepackage{mwe} % to get dummy images
\usepackage[inkscapelatex=false]{svg}
\usepackage{booktabs} % for \toprule, \midrule, etc.
\usepackage{xcolor}
\usepackage{soul}
% set highlight color (light yellow)
% \sethlcolor{yellow!30}
\jmlryear{2026}
\jmlrworkshop{Full Paper -- MIDL 2026}
\jmlrvolume{-- 23}
\editors{Accepted for publication at MIDL 2026}


\title[Morphometric Analysis of Stratum Corneum Nanotexture]{Multicenter Morphometric Analysis of Stratum Corneum Nanotexture for Skin Barrier Assessment}

%\footnotetext[1]{Contributed equally}

% More complicate cases, e.g. with dual affiliations and joint authorship
\midlauthor{
\Name{Jen-Hung Wang\nametag{$^{1,2}$}} \orcid{0000-0001-9214-5837} \Email{jen-hung.wang@scilifelab.se}\\
\Name{Chia-Yu Chu\nametag{$^{3}$}} \orcid{0000-0002-9370-3279} \Email{chiayu@ntu.edu.tw}\\
\Name{Felipe Colombelli\nametag{$^{1}$}} \orcid{0000-0002-8684-8955} \Email{felco@scilifelab.se}\\
\Name{Ching-Wen Du\nametag{$^{3}$}} \orcid{0009-0006-5517-1845} \Email{judydu@ntuh.gov.tw}\\
\Name{Maria Oberl\"ander Christensen\nametag{$^{4}$}} \orcid{0000-0001-7177-1054} \Email{maria.oberlaender.christensen@regionh.dk}\\
\Name{Jorge Pereda\nametag{$^{2}$}} \orcid{0000-0002-4802-0591} \Email{jorgepereda@gmail.com}\\
\Name{Ivone Jakasa\nametag{$^{5}$}} \orcid{0000-0002-7961-4069} \Email{ijakasa@pbf.hr}\\
\Name{Sanja Kezic\nametag{$^{6}$}} \orcid{0000-0002-1063-4547} \Email{s.kezic@amsterdamumc.nl}\\
\Name{Jacob P. Thyssen\nametag{$^{4}$}} \orcid{0000-0003-3770-1743} \Email{jacob.pontoppidan.thyssen@regionh.dk}\\
\Name{Edwin En-Te Hwu\midljointauthortext{Contributed equally}\nametag{$^{2}$}} \orcid{0000-0002-5971-4978} \Email{etehw@dtu.dk}\\
\Name{Gisele Miranda\midlotherjointauthor\nametag{$^{1}$}} \orcid{0000-0001-6079-0452} \Email{gisele.miranda@scilifelab.se}\\
\addr $^{1}$ Science for Life Laboratory, Department of Computational Science and Technology, KTH Royal University of Technology, Stockholm, Sweden \\
\addr $^{2}$ Department of Health Technology, Technical University of Denmark, Kongens Lyngby, Denmark \\
\addr $^{3}$ Department of Dermatology, National Taiwan University Hospital and National Taiwan University College of Medicine, Taipei, Taiwan \\
\addr $^{4}$ Department of Dermatology, Bispebjerg and Frederiksberg Hospital (BFH), University Hospitals of Copenhagen, Copenhagen, Denmark \\
\addr $^{5}$ Laboratory for Analytical Chemistry, Department of Chemistry and Biochemistry, Faculty of Food Technology and Biotechnology, University of Zagreb, Zagreb, Croatia \\
\addr $^{6}$ Department of Public and Occupational Health, Amsterdam Public Health Research Institute, Amsterdam University Medical Center, Amsterdam, The Netherlands
}

\begin{document}

\maketitle

\begin{abstract}
Stratum corneum nanotexture (SCN) has emerged as a promising non-invasive biomarker for quantifying skin barrier impairment and the severity of inflammatory skin diseases such as atopic dermatitis (AD). In this multicenter study, we analyzed stratum corneum tape-strip samples from 90 patients with AD and 30 healthy controls recruited in Taiwan and Denmark, yielding a heterogeneous dataset of more than 2,000 SCN images. Participants were evenly stratified into four AD severity groups defined by the Eczema Area and Severity Index (EASI), enabling robust evaluation of SCN-derived metrics across the full spectrum of disease severity. Previous studies have primarily relied on count-based measures to quantify the density of circular nano-size objects (CNOs) in SCN images from single-center cohorts, without leveraging instance-level segmentation or comprehensive morphometric profiling. In this study, we propose and validate a segmentation-based SCN analysis pipeline that integrates YOLOv12 with Segment Anything Model 3 (SAM3) for accurate CNO delineation in a multicenter setting. This framework enables the extraction of detailed morphometric descriptors and facilitates systematic evaluation of SCN-derived biomarkers for quantitative skin barrier assessment in AD. Our code is available at \url{https://github.com/mirandaresearchlab/SCN-SAM}.


\end{abstract}

\begin{keywords}
Atopic Dermatitis, Object Detection, Instance Segmentation, Morphometric Analysis, Stratum Corneum Nanotexture
\end{keywords}

\begin{figure}[t!]
    \centering
    \includegraphics[width=1\textwidth]{figures/Fig1.pdf}
    \caption{Overview of the proposed segmentation-based morphometric analysis pipeline for stratum corneum nanotexture (SCN).}
    \label{fig:1}
\end{figure}

\section{Introduction}

Atopic dermatitis (AD) is a chronic, relapsing inflammatory skin disease characterized by intense pruritus, eczematous lesions, and a substantial impact on quality of life \cite{Langan2020}. Globally, AD is estimated to affect approximately 790 million individuals, including about 20\% of children and 10\% of adults \cite{Silverberg2021}. In clinical practice, disease severity is most commonly assessed using the Eczema Area and Severity Index (EASI), which combines the extent of body surface involvement with the intensity scores of key clinical signs (i.e., erythema, edema/papulation, excoriation, and lichenification) across four body regions \cite{Hanifin2001}. Despite its widespread use, EASI relies on subjective visual assessment and demonstrates only moderate interrater reliability \cite{Schmitt2013}. Consequently, there is growing interest in objective, quantitative biomarkers that can capture epidermal barrier function and disease severity in a reproducible and scalable manner.

In recent years, stratum corneum nanotexture (SCN), the nanoscale topography of corneocyte surfaces, has emerged as a promising non-invasive biomarker of skin barrier function and disease activity, with potential to assess inflammatory skin disorders such as AD \cite{Thyssen2020}. SCN can be measured from stratum corneum (SC) tape strips \cite{Clausen2016}, a minimally invasive sampling method that can be combined with high-speed dermal atomic force microscopy (HS-DAFM) \cite{Liao2022} to obtain high-resolution images of corneocyte surface architecture and its nanoscale alterations \cite{Pereda2024}. In particular, the presence and distribution of circular nano-size objects (CNOs) on the corneocyte surface have been associated with impaired barrier function and filaggrin-related abnormalities, indicating that SCN-derived metrics could serve as quantitative biomarkers of disease severity in AD \cite{Franz2015}.

However, prior SCN studies have mainly relied on simple count-based measures of CNO density, without leveraging instance-level segmentation to capture the detailed morphology of individual CNOs \cite{Riethmuller2015, Wang2024}. When segmentation has been attempted, approaches have typically employed heuristic thresholding or handcrafted image-processing pipelines, which are vulnerable to noise, artifacts, and variations in imaging conditions \cite{Riethmller2018}. Moreover, most existing methods were developed and evaluated on single-center cohorts, which limits their generalizability and does not capture the heterogeneity of clinical practice, imaging protocols, and patient populations \cite{Engebretsen2018ad, Engebretsen2018}.

In this study, we validate SCN-derived metrics in a multicenter AD cohort comprising 90 patients with AD and 30 healthy controls recruited in Taiwan and Denmark, yielding a heterogeneous dataset of more than 2,000 SCN images that covers the full spectrum of disease severity. Building on recent advances in deep learning, we adopt a two-stage pipeline in which a YOLOv12 \cite{tian2025yolov12} model generates CNO detection bounding boxes as prompts for the Segment Anything Model 3 (SAM3) \cite{sam3}, enabling accurate instance-level segmentation and subsequent morphometric profiling of individual CNOs. To evaluate the clinical utility of the proposed segmentation-based SCN morphometrics, we assess their robustness across centers and Fitzpatrick skin phototypes \cite{Fitzpatrick1988}, as well as their ability to capture skin barrier impairment and AD severity.


\section{Method}
\subsection{Study cohort and sample collection}

This multicenter study enrolled 120 adults (age $\geq$ 18 years), comprising 90 patients with AD and 30 healthy controls, recruited in equal numbers from National Taiwan University Hospital (Taipei, Taiwan) and Bispebjerg and Frederiksberg Hospital (Copenhagen, Denmark). The cohorts have been described previously in studies with different objectives \cite{Wang2024,Du2025}. Participants were evenly divided into four groups (n=30 per group) based on AD history and EASI scores: healthy controls (no AD), mild AD (0 $<$ EASI $\leq$ 7.0), moderate AD (7.0 $<$ EASI $\leq$ 21.0), and severe AD (EASI $>$ 21.0). Patients with other chronic skin diseases, active skin infections, or recent application of topical corticosteroids to the sampling sites (within the preceding three days) were excluded. No specific instructions were given to discontinue topical treatments, ensuring that the collected SC samples reflected routine clinical practice.

For each participant, SC samples were collected using a standardized tape-stripping procedure \cite{Dapic2013} with circular adhesive tape strips (D-Squame D101; 1.54 cm$^2$; Clinical \& Derm). Each strip was applied to the skin under controlled pressure using a disc pressure instrument (D-Squame D500; 255 g/cm$^2$; Clinical \& Derm) for 5-10 seconds, then gently removed with tweezers and stored individually in labeled sampling vials. In patients with AD, SC sampling was performed at a clinically defined lesional site on the volar forearm (approximately 10 cm below the elbow crease) and at a contralateral, anatomically matched non-lesional site. Healthy controls were sampled at corresponding volar forearm sites.

At each sampling site, five consecutive tape strips were collected. The first two strips were discarded to minimize potential surface contamination. The third strip was designated for RNA analysis \cite{Shima2022}, the fourth strip for surface topography imaging using HS-DAFM, and the fifth strip for quantification of natural moisturizing factors (NMFs) \cite{Kezic2009}. Tape strips allocated for HS-DAFM topographic imaging were stored at room temperature, whereas all remaining strips were immediately stored at $-80^\circ\mathrm{C}$ until analysis.

\subsection{Image acquisition and dataset preparation}
SCN images were acquired using HS-DAFM equipped with an aluminium-coated silicon nitride probe (spring constant 30 pN/nm, CSC38/Al; MikroMasch) with a nominal tip radius of 8 nm. All measurements were performed in contact mode at constant height, with the contact force maintained below 10 nN to ensure consistent image quality and minimize sample deformation. For each SC sample, ten randomly selected regions were imaged to characterize corneocyte surface topography. Each SCN image was acquired at a resolution of $512 \times 512$ pixels, covering an area of $20 \times 20$ $\mu\mathrm{m}^2$. All images were visually inspected by trained experts, and scans with visible artifacts (e.g., excessive noise, motion blur, or probe defects) were excluded.

Preprocessing was applied to mitigate common AFM imaging challenges, including low contrast and striping artifacts, while enhancing CNO visibility without introducing bias. Raw images first underwent Gaussian smoothing ($\sigma$=10 pixels) to reduce high-frequency noise \cite{6044249}. Row-wise mean subtraction was then performed to correct horizontal striping caused by scanner drift \cite{Canale2011}. Image intensities were normalized to the [0, 1] range across all samples to ensure a consistent dynamic range. Finally, percentile-based local contrast enhancement was applied using disk-shaped morphological elements (diameters 9 and 15 pixels) as percentile filters, systematically scanning each image to amplify subtle features such as CNOs while suppressing background variation \cite{Soille2004, Mukhopadhyay2000, Kimori2011}.

The resulting SCN dataset comprised 2,100 images, distributed across four clinical groups (600 from mild AD, 600 from moderate AD, 600 from severe AD, and 300 from healthy controls) and equally split between the Taiwan and Denmark cohorts (1,050 images each). This balanced multicenter design provided a heterogeneous yet well-controlled dataset for robust evaluation of SCN-derived biomarkers across disease severities and study sites.

\subsection{Two-stage deep learning pipeline for CNO detection and segmentation}
To obtain instance-level segmentations of CNOs, we implemented a two-stage deep learning pipeline that integrates real-time object detection with promptable segmentation (\figureref{fig:1}). First, candidate CNOs were localized using YOLOv12, an attention-centric one-stage detector that achieves state-of-the-art accuracy-latency trade-offs by combining area attention with residual efficient layer aggregation networks (R-ELAN). Second, the resulting bounding boxes were used as box prompts for SAM3, a foundation model for promptable image and video segmentation that supports point-, box-, and mask-based queries, thereby generating instance-level CNO masks.

\subsubsection{YOLO-based CNO detection}
For CNO detection, we trained YOLOv12 models on a previously curated dataset of 300 SCN images with expert-annotated CNO bounding boxes, yielding an average of approximately 250 annotations per image and more than 74,000 labeled instances in total. The dataset was randomly partitioned into a training set (90\%) and a held-out test set (10\%). Within the training set, we employed 10-fold cross-validation to obtain robust performance estimates across YOLOv12 variants and to select the model used for subsequent instance-level segmentation. In each fold, the training images were expanded three-fold using data augmentation, including adjustments to brightness ($-25\%$ to 25\%), exposure ($-15\%$ to 15\%), blur (up to 1 pixel), noise (up to 2\% of pixels), and Mosaic augmentation, following the protocol described in previous work \cite{Wang2024}.

All YOLOv12 variants (N, S, M, L, X) were fine-tuned from checkpoints pretrained on the MS COCO dataset \cite{Lin2014}. Each variant was trained for 600 epochs on the SCN training set using stochastic gradient descent (SGD) with momentum (see \appendixref{app:yolo} for detailed hyperparameter settings). Model complexity was quantified by the number of parameters (in millions, M) and floating-point operations (FLOPs, in gigaflops, G), and detection performance on the held-out test set was evaluated using precision, recall, AP@50, and AP@50--95. The variant achieving the highest AP@50--95 was subsequently selected to generate CNO detection bounding boxes for the multicenter AD dataset. All models were trained and evaluated on a single NVIDIA GeForce RTX 3090 GPU.


\subsubsection{SAM-based instance-level segmentation}
For instance-level segmentation of CNOs, we employed SAM-based models with YOLOv12 detections provided as box prompts. Specifically, we evaluated all SAM2 variants (SAM2.1-Tiny, SAM2.1-Small, SAM2.1-Base, and SAM2.1-Large), SAM3, and Cellpose-SAM, a widely used foundation model for biological image segmentation included as an off-the-shelf baseline. 

To quantitatively benchmark these segmentation approaches, we curated a held-out test subset comprising 10 SCN images with expert-annotated instance masks ($\approx$2,700 labeled CNO instances in total). Model performance was evaluated using object-level metrics (precision, recall, and F1 score at mask IoU $\ge$ 0.5) and pixel-level metrics. We assessed segmentation accuracy using the Dice Similarity Coefficient (DSC) for spatial overlap, and the Average Symmetric Surface Distance (ASSD) and the 95th percentile Hausdorff Distance (HD95) to quantify boundary error. No additional morphological operations (e.g., opening or closing) were applied, as image-level quality control had already excluded scans with severe artifacts \protect\cite{MaierHein2024}.


\subsection{Morphometric feature extraction and sample-level profiling}
Instance-wise morphometric descriptors were computed from predicted CNO masks using region-based morphological analysis. For each CNO instance, we quantified area, perimeter, eccentricity, solidity (ratio of object area to convex hull area), major and minor axis lengths of the best-fitting ellipse, orientation, and centroid coordinates. From these primary measurements, we derived an aspect ratio as a measure of elongation, defined as
\begin{equation}\label{eq:aspect_ratio}
\text{aspect ratio} = \frac{\text{major axis length}}{\text{minor axis length}}
\end{equation}
and a compactness index (``circularity'') defined as
\begin{equation}\label{eq:circularity}
\text{circularity} = \frac{4\pi\,\text{area}}{\text{perimeter}^2}
\end{equation}
which approaches 1 for a perfect circle. Instances with degenerate geometry (e.g., zero minor axis length or zero perimeter) that yielded undefined shape descriptors were excluded from further analysis.

For subsequent morphometric analysis, we characterized individual CNOs using six geometric descriptors: area, perimeter, eccentricity, solidity, aspect ratio, and circularity. CNO masks were first filtered by area, excluding instances below the 5\textsuperscript{th} percentile or above the 95\textsuperscript{th} percentile of the global area distribution to suppress spurious detections. For each SC tape-strip sample, the remaining per-instance features from all CNOs across the ten fields of view (FOVs) were then pooled and aggregated using the per-feature median to obtain a sample-level morphometric profile. To assess robustness of sample-level profiling to the choice of aggregation statistic, we additionally report mean-aggregated morphometric results in \appendixref{app:mean}. The Wilcoxon rank-sum test \cite{Wilcoxon1945} was used to assess differences between independent sample groups. Samples with missing data were excluded from the analysis.


\section{Results}

\subsection{Detection performance of YOLOv12 variants on SCN images}

Across all YOLOv12 variants, CNO detection on SCN images achieved consistently high performance, with AP@50 exceeding 80\% for every model (\tableref{tab:1}). Increasing model complexity from YOLOv12-N (2.6M parameters, 6.3G FLOPs) to YOLOv12-L (26.4M parameters, 88.5G FLOPs) led to a gradual increase in AP@50--95 from 38.37\% to 41.01\%, indicating improved localization performance under stricter IoU thresholds. Detection performance saturated for the largest variant, with YOLOv12-X incurring substantially higher computational cost without further improvement in AP@50--95. Considering this accuracy-efficiency trade-off, we selected YOLOv12-L as the default detector for generating CNO bounding boxes in the subsequent SAM-based segmentation and morphometric analyses.


\begin{table}[htbp]
\floatconts
  {tab:1}%
  {\caption{Comparison of detection performance and model complexity for YOLOv12 variants on the held-out test set (n = 30 SCN images). Precision, recall, AP@50, and AP@50--95 are reported as mean $\pm$ standard deviation over 10-fold cross-validation.}}%
  {%
    \centering% Explicit centering (optional, as it's default)
    \resizebox{\textwidth}{!}{%
      {\renewcommand{\arraystretch}{1.3}% increase row height
      \begin{tabular}{lcccccc}
      \hline
      \bfseries Model 
        & \bfseries Parameters (M) 
        & \bfseries FLOPs (G) 
        & \bfseries Precision $\uparrow$ (\%) 
        & \bfseries Recall $\uparrow$ (\%) 
        & \bfseries AP@50 $\uparrow$ (\%) 
        & \bfseries AP@50-95 $\uparrow$ (\%) \\
      \hline
      YOLOv12-N & 2.6  & 6.3   & 78.32 $\pm$ 0.56 & 78.04 $\pm$ 0.57 & 81.99 $\pm$ 0.28 & 38.37 $\pm$ 0.35 \\
      YOLOv12-S & 9.3  & 21.2  & 79.12 $\pm$ 0.42 & 78.70 $\pm$ 0.41 & 82.77 $\pm$ 0.23 & 39.93 $\pm$ 0.25 \\
      YOLOv12-M & 20.2 & 67.1  & 79.60 $\pm$ 0.43 & 78.10 $\pm$ 0.43 & 82.79 $\pm$ 0.49 & 40.46 $\pm$ 0.33 \\
      YOLOv12-L & 26.4 & 88.5  & 79.77 $\pm$ 0.47 & 78.26 $\pm$ 0.52 & 82.90 $\pm$ 0.49 & 41.01 $\pm$ 0.32 \\
      YOLOv12-X & 59.1 & 198.5 & 79.73 $\pm$ 0.29 & 77.51 $\pm$ 0.68 & 82.23 $\pm$ 0.44 & 40.70 $\pm$ 0.28 \\
      \hline
      \end{tabular}%
      }% end renewcommand scope
    }%
  }%
\end{table}


\begin{table}[ht!]
\floatconts
  {tab:2}%
  {\caption{Comparison of segmentation performance and model complexity for Cellpose-SAM, SAM2 variants, and SAM3 on the held-out test subset (n = 10 SCN images). Models were evaluated using object-level metrics (mask IoU $\geq$ 0.5) alongside pixel-level overlap and boundary metrics.}}%
  {%
    \centering
    \resizebox{\textwidth}{!}{%
      {\renewcommand{\arraystretch}{1.3}% increase row height
      \begin{tabular}{lccccccc}
      \hline
      \rule{0pt}{2ex} & & \multicolumn{3}{c}{\bfseries Object-Level Metrics} & \multicolumn{3}{c}{\bfseries Pixel-Level Metrics} \\
      \cmidrule(lr){3-5} \cmidrule(lr){6-8}
      \bfseries Model 
        & \bfseries \shortstack{Parameters (M)}
        & \bfseries Precision $\uparrow$ (\%)
        & \bfseries Recall $\uparrow$ (\%)
        & \bfseries F1 $\uparrow$ (\%)
        & \bfseries DSC $\uparrow$ (\%)
        & \bfseries ASSD $\downarrow$ (pixels)
        & \bfseries HD95 $\downarrow$ (pixels) \\
      \hline
      Cellpose-SAM & -     & 41.34 & 40.50 & 40.91 & 82.34 & 0.77 & 1.91 \\
      SAM2.1-Tiny  & 38.9  & 78.82 & 75.55 & 77.15 & 81.32 & 0.87 & 1.96 \\
      SAM2.1-Small & 46.0  & 80.29 & 76.96 & 78.59 & 81.86 & 0.84 & 1.90 \\
      SAM2.1-Base  & 80.8  & 78.36 & 75.11 & 76.70 & 81.53 & 0.86 & 1.96 \\
      SAM2.1-Large & 224.4 & 76.85 & 73.67 & 75.23 & 81.02 & 0.88 & 2.04 \\
      SAM3         & 84.8  & 82.22 & 79.33 & 80.75 & 83.07 & 0.76 & 1.75 \\
      \hline
      \end{tabular}%
      }% end renewcommand scope
    }%
  }%
\end{table}

\subsection{Segmentation performance of SAM-based models}

\subsubsection{Pixel-level benchmark of CNO instance segmentation}

Using YOLOv12-L detections as box prompts, we evaluated the CNO segmentation performance of SAM2 variants and SAM3 on the held-out test subset (\tableref{tab:2}), and included the off-the-shelf Cellpose-SAM baseline for comparison. Among the promptable models, SAM3 achieved the best overall performance, attaining the highest object-level F1 score (80.75\%) and pixel-level DSC (83.07\%). It also yielded the most accurate boundary delineation, with the lowest boundary errors (ASSD = 0.76 pixels; HD95 = 1.75 pixels). The SAM2 variants showed comparable performance across both object- and pixel-level metrics. Notably, SAM2.1-Small was the best-performing SAM2 variant (F1 = 78.59\%; DSC = 81.86\%), while performance declined marginally with larger architectures.

In contrast, Cellpose-SAM exhibited substantially lower object-level performance (F1 = 40.91\%), indicating limited ability to distinguish individual instances. Nevertheless, its pixel-level overlap remained competitive (DSC = 82.34\%), with boundary errors comparable to the top-performing models (ASSD = 0.77 pixels; HD95 = 1.91 pixels). These quantitative results are consistent with the qualitative examples in \figureref{fig:2}, in which Cellpose--SAM exhibits frequent false detections and notable underdetection of CNOs, whereas SAM-based models generate contours that more closely align with the ground-truth annotations.


\subsubsection{Effect of prompt quality on SAM3 segmentation performance}

To assess SAM3's sensitivity to prompt quality, we generated box prompts using YOLOv12 detectors of varying scales (N, S, M, L, X) and evaluated the resulting segmentation performance on the held-out test subset (\tableref{tab:3}). Overall, segmentation accuracy exhibited a positive correlation with detector capacity, with the YOLOv12-L configuration achieving the highest performance across both object- and pixel-level metrics. However, increased detection accuracy resulted in only marginal reductions in boundary error, as ASSD remained within a narrow range (0.76--0.81 pixels) despite object-level F1 scores varying from 75.33\% to 80.75\%. This indicates that while robust prompts are crucial for accurate object localization, the intrinsic boundary delineation capability of SAM3 remains largely invariant to minor variations in prompt precision.


\begin{figure}[h!]
    \centering
    \includegraphics[width=1\textwidth]{figures/Fig2.pdf}
    \caption{Qualitative comparison of CNO segmentation by Cellpose-SAM, SAM2 variants, and SAM3. Representative SCN images are shown with model predictions overlaid on ground-truth masks (black contours). Green contours denote true-positive segmentations (mask IoU $\geq$ 0.5), whereas red contours indicate false-positive predictions (mask IoU $<$ 0.5 or unmatched detections).}
    \label{fig:2}
\end{figure}



\begin{table}[ht!]
\floatconts
  {tab:3}%
  {\caption{Evaluation of SAM3 segmentation performance using box prompts generated by YOLOv12 detectors of varying scales (N, S, M, L, X). Performance is reported using object-level metrics (Mask IoU $\ge$ 0.5) alongside pixel-level overlap and boundary metrics.}}
  {%
    \centering
    \resizebox{\textwidth}{!}{%
      {\renewcommand{\arraystretch}{1.3}% increase row height
      % CHANGED: Reduced to 7 columns (l + 6 c's)
      \begin{tabular}{lcccccc}
      \hline
      \rule{0pt}{2ex} & \multicolumn{3}{c}{\bfseries Object-Level Metrics} & \multicolumn{3}{c}{\bfseries Pixel-Level Metrics} \\
      % CHANGED: cmidrules now span 2-4 and 5-7 correctly
      \cmidrule(lr){2-4} \cmidrule(lr){5-7}
      \bfseries Model (+SAM3) 
        & \bfseries Precision $\uparrow$ (\%)
        & \bfseries Recall $\uparrow$ (\%)
        & \bfseries F1 $\uparrow$ (\%)
        & \bfseries DSC $\uparrow$ (\%)
        & \bfseries ASSD $\downarrow$ (pixels)
        & \bfseries HD95 $\downarrow$ (pixels) \\
      \hline
      YOLOv12-N & 70.35 & 81.07 & 75.33 & 82.06 & 0.81 & 1.88 \\
      YOLOv12-S & 73.84 & 79.14 & 76.40 & 82.06 & 0.81 & 1.87 \\
      YOLOv12-M & 77.32 & 76.04 & 76.67 & 82.30 & 0.80 & 1.84 \\
      YOLOv12-L & 82.22 & 79.33 & 80.75 & 83.07 & 0.76 & 1.75 \\
      YOLOv12-X & 79.00 & 75.26 & 77.08 & 82.63 & 0.78 & 1.83 \\
      \hline
      \end{tabular}%
      }% end renewcommand scope
    }%
  }%
\end{table}


\subsection{SCN morphometric analysis of healthy, non-lesional AD, and lesional AD skin}

We assessed the association between SCN-derived morphometric profiles and clinical disease status by comparing three sample types within each center: healthy control skin, clinically non-lesional skin from patients with AD, and lesional AD skin. For this analysis, AD samples were pooled across mild, moderate, and severe disease (\figureref{fig:3}). 

In both the Taiwanese and Danish AD cohorts, the shape-related descriptors (eccentricity, solidity, circularity, and aspect ratio) exhibited consistent and statistically significant differences across the three sample types. Eccentricity and aspect ratio increased from healthy control skin to non-lesional AD skin and were highest in lesional AD skin, indicating that CNOs became progressively more elongated with increasing disease involvement. Conversely, solidity and circularity decreased along the same gradient, reflecting a shift from compact, nearly circular CNOs in healthy skin toward less compact, more irregular structures in diseased skin. Differences in area and perimeter were modest and did not reach statistical significance across sample types.

Collectively, these center-specific analyses demonstrate that segmentation-based SCN morphometric profiles capture consistent, directional alterations in CNO geometry associated with disease involvement across geographically and ethnically distinct cohorts. Elongation and loss of compactness of CNOs are already detectable in clinically non-lesional AD skin and become further accentuated in lesional sites, in line with the progressive gradients observed in the established count-based metric, the Effective Corneocyte Topographical Index (ECTI) \cite{Wang2024}.


\subsection{SCN morphometric gradients across AD severity at clinically non-lesional sites}

We evaluated whether SCN-derived morphometric profiles capture graded differences in disease severity at clinically non-lesional sites. Within each center, four clinical severity groups were compared: healthy controls and patients with mild, moderate, or severe AD (\figureref{fig:4}).

In the Taiwanese cohort, the shape-related descriptors (eccentricity, solidity, circularity, and aspect ratio) exhibited statistically significant differences between healthy controls and AD severity groups, with most metrics demonstrating graded shifts across mild, moderate, and severe disease. Consistently, the Danish cohort showed a similar directional pattern, characterized by progressive elongation and reduced compactness of CNOs with increasing AD severity, albeit with smaller effect sizes and fewer statistically significant differences between healthy controls and the individual AD severity groups.

Across centers, the circularity of CNOs emerged as the most consistent discriminator, exhibiting significant differences between healthy controls and AD severity groups in both cohorts, whereas area and perimeter showed no clear association with disease severity. Notably, severe AD samples tended to contain a higher number of detected CNOs, which may have contributed to the increased variability observed in their morphometric profiles. These results indicate that segmentation-based SCN morphometric profiling aligns closely with the established count-based ECTI metric across two geographically distinct centers.

\begin{figure}[htp!]
 % Caption and label go in the first argument and the figure contents
 % go in the second argument
\floatconts
  {fig:3}
  {\caption{Center-specific analysis of SCN morphometry across healthy control skin, clinically non-lesional AD skin, and lesional AD skin in (a) Taiwanese and (b) Danish cohorts. For each cohort, the segmentation-based analysis (left) presents sample-level boxplots of CNO eccentricity, solidity, circularity, area, perimeter, and aspect ratio derived from SAM3-based instance segmentations. The count-based analysis (right) shows boxplots of the Effective Corneocyte Topographical Index (ECTI). Boxplot notation: *p $<$ 0.05, **p $<$ 0.01, ***p $<$ 0.001, ****p $<$ 0.0001; ns, not significant.}}
  {\includegraphics[width=1.02\linewidth]{figures/Fig3.pdf}}
\end{figure}

\begin{figure}[htbp]
 % Caption and label go in the first argument and the figure contents
 % go in the second argument
\floatconts
  {fig:4}
  {\caption{Center-specific analysis of SCN morphometry at clinically non-lesional sites in healthy controls and patients with mild, moderate, or severe AD in (a) Taiwanese and (b) Danish cohorts. For each cohort, the segmentation-based analysis (left) presents sample-level boxplots of CNO eccentricity, solidity, circularity, area, perimeter, and aspect ratio derived from SAM3-based instance segmentations. The count-based analysis (right) shows boxplots of the Effective Corneocyte Topographical Index (ECTI). Boxplot notation: *p $<$ 0.05, **p $<$ 0.01, ***p $<$ 0.001, ****p $<$ 0.0001; ns, not significant.}}
  {\includegraphics[width=1.02\linewidth]{figures/Fig4.pdf}}
\end{figure}


\subsection{SCN morphometry across Fitzpatrick skin phototypes}

We evaluated the influence of skin phototype on SCN morphometry by examining sample-level morphometric profiles stratified by Fitzpatrick type across healthy controls, clinically non-lesional AD skin, and lesional AD skin within each center (\figureref{fig:5}). For the stratified analyses, we restricted the Taiwanese cohort to phototypes III--V and the Danish cohort to phototypes II--IV (\tableref{tab:4}). Phototype II in Taiwan and phototypes I and V in Denmark were either absent or represented by only 1--2 participants per clinical group, thereby precluding reliable estimation of phototype-specific differences.

Across the retained phototypes, both cohorts exhibited similar disease-related gradients in most morphometric descriptors. Eccentricity and aspect ratio increased from healthy controls to clinically non-lesional and lesional AD skin, whereas solidity and circularity decreased. Changes in area and perimeter were minimal. For a given sample type, mean morphometric values were comparable across Fitzpatrick types, and no consistent monotonic trends with increasing phototype were observed. These findings indicate that, within the represented phototype range, SCN morphometry is driven primarily by clinical disease status rather than skin phototype.


\begin{table}[hb!]
\floatconts
  {tab:4}%
  {\caption{Distribution of Fitzpatrick skin phototypes (Types I--V) among healthy controls and patients with AD in the Taiwanese and Danish cohorts. Values indicate the number of participants per center, clinical group, and skin phototype.}}%
  {%
    \centering
    \resizebox{0.9\textwidth}{!}{%
      {\renewcommand{\arraystretch}{1.2}% increase row height
      \begin{tabular}{lcccccc}
      \hline
      \rule{0pt}{2ex}\bfseries Center / Clinical group
        & \multicolumn{5}{c}{\bfseries Fitzpatrick skin phototype}
        & \bfseries Total \\
      \cline{2-6}
        & \bfseries Type I
        & \bfseries Type II
        & \bfseries Type III
        & \bfseries Type IV
        & \bfseries Type V
        & \\
      \hline
      \multicolumn{7}{l}{\textit{Taiwan}} \\
      \quad Healthy controls   & 0 & 2 & 4  & 9  & 0  & 15 \\
      \quad Patients with AD   & 0 & 2 & 12 & 15 & 16 & 45 \\
      \hline
      \multicolumn{7}{l}{\textit{Denmark}} \\
      \quad Healthy controls   & 1 & 5 & 5  & 4  & 0  & 15 \\
      \quad Patients with AD   & 1 & 16 & 16 & 11 & 1  & 45 \\
      \hline
      \end{tabular}%
      }% end renewcommand scope
    }%
  }%
\end{table}

\begin{figure}[htbp!]
 % Caption and label go in the first argument and the figure contents
 % go in the second argument
\floatconts
  {fig:5}
  {\caption{SCN morphometry stratified by Fitzpatrick skin phototype in (a) Taiwanese and (b) Danish cohorts. For each cohort, bar plots show sample-level mean values of CNO eccentricity, solidity, circularity, area, perimeter, and aspect ratio for healthy controls, clinically non-lesional AD skin, and lesional AD skin across the retained Fitzpatrick types (III--V for Taiwan; II--IV for Denmark). Bars indicate group means and error bars denote standard errors.}}
  {\includegraphics[width=1\linewidth]{figures/Fig5.pdf}}
\end{figure}

\newpage

\section{Discussion}

In this multicenter study, we demonstrate that segmentation-based SCN analysis provides robust, clinically meaningful morphometric descriptors of CNO geometry that are reproducible across centers and disease severities and appear largely independent of Fitzpatrick skin phototype within the represented range. Building on prior work that quantified CNO density using count-based metrics, we show that instance-level segmentation with SAM3 enables the extraction of complementary morphometric features that capture subtle alterations in corneocyte surface architecture associated with AD. Across the Taiwanese and Danish cohorts, shape-related descriptors (eccentricity, solidity, circularity, and aspect ratio) exhibited consistent gradients from healthy control skin to non-lesional and lesional AD skin, and from mild to severe disease at clinically non-lesional sites, whereas size-related metrics (area and perimeter) contributed limited discriminative value. Overall, the findings indicate that SCN morphometry can serve as an objective, quantitative biomarker of skin barrier impairment.

The observed pattern of progressive CNO elongation and loss of compactness from healthy to clinically non-lesional and lesional AD skin indicates that segmentation-based SCN morphometry is sensitive to subtle disease-related alterations that may not be apparent on routine clinical examination. The finding that SCN morphometric profiles in non-lesional AD skin are intermediate between those of healthy and lesional AD skin supports the presence of subclinical barrier abnormalities at clinically normal-appearing skin. These gradients were consistent across centers and closely aligned with the established ECTI metric, indicating that SCN morphometry provides complementary information to count-based measures and enhances the characterization of barrier involvement across the AD severity spectrum.

Methodologically, the proposed YOLOv12-SAM3 pipeline advances SCN analysis beyond heuristic, count-based approaches by establishing a fully automated, instance-level segmentation framework based on modern deep learning. YOLOv12-L achieved a high AP@50 of approximately 82.9\% for CNO detection, providing accurate and computationally efficient bounding boxes as prompts for SAM3. This two-stage pipeline outperformed the off-the-shelf Cellpose-SAM baseline on the pixel-level segmentation benchmark, achieving a DSC of 83.07\% with sub-pixel boundary accuracy (ASSD = 0.76 pixels). The subsequent extraction of region-based morphometric descriptors transforms SCN from a single scalar measure (e.g., CNO counts or ECTI) into a multivariate profile that can be analyzed at both the instance and sample level, enabling more nuanced characterization of barrier-related alterations and facilitating integration into downstream statistical and machine learning models.

Several limitations should be acknowledged. First, all participants were adults recruited from two medical centers, and sampling was restricted to a single skin site, which limits generalizability to pediatric populations, other anatomical regions, and additional ethnic groups. Second, the proposed segmentation pipeline was trained and evaluated on a curated set of expert-annotated images and relied on manual quality control to exclude scans with severe artifacts. Although this reflects current best practice in SCN image analysis, translation into routine clinical workflows will require further refinement, including automated image quality assessment, reduced reliance on manual annotation, and standardized image acquisition and analysis protocols. Third, this study focused on limited set of handcrafted features and sample-level aggregation. More sophisticated modeling of SCN morphometric distributions, spatial organization, and multiscale texture may further enhance clinical utility and robustness of SCN-derived biomarkers for specific clinical endpoints. Future work may benefit from longitudinal studies integrating SCN morphometrics, clinical severity scores, and biochemical markers (e.g., NMF levels) to determine whether CNO geometry can serve not only as a cross-sectional biomarker of current barrier impairment but also as a predictor of disease trajectory and therapeutic response.


\section{Conclusion}

This multicenter study demonstrates that segmentation-based analysis of SCN provides a robust and scalable framework for quantitative skin barrier assessment in AD, complementing established count-based metrics such as ECTI. By integrating the YOLOv12-SAM3 segmentation pipeline with downstream morphometric profiling, we show that geometric descriptors of CNOs sensitively capture subclinical barrier impairment at clinically non-lesional sites across two geographically and ethnically distinct centers, while remaining functionally independent of Fitzpatrick skin phototype within the examined range. These findings support SCN morphometry as a non-invasive, objective biomarker for quantitative skin barrier assessment and establish a methodological foundation for future applications in longitudinal monitoring, treatment response assessment, and extension to other inflammatory or barrier-related skin diseases.


\clearpage  % Acknowledgements, references, and appendix do not count toward the page limit (if any)
% Acknowledgments---Will not appear in anonymized version
\midlacknowledgments{This work was supported by the LEO Foundation (LF-OC-20-000370 and LF-OC-24-001760), the Novo Nordisk Foundation (NNF22OC0076607), the National Science and Technology Council of Taiwan (NSTC 112-2314-B-002-074-MY3), and the Chan Zuckerberg Initiative grant (DAF2021-225261, \url{https://doi.org/10.37921/644085ggkbos}), an advised fund of Silicon Valley Community Foundation (funder \url{https://doi.org/10.13039/100014989}). We thank all study participants and collaborating clinical staff for their contributions to this work.}



\bibliography{midl26_23}

\newpage
\appendix
\setcounter{table}{0}
\renewcommand{\thetable}{\Roman{table}}
\setcounter{figure}{0}          
\renewcommand{\thefigure}{\Roman{figure}}

\section{YOLOv12 training and augmentation hyperparameters}\label{app:yolo}

\tableref{tab:yolo-hparams} summarizes the training configuration, loss weights, and data augmentation settings used for the YOLOv12-N/S/M/L/X models in our CNO detection pipeline, following the hyperparameter configuration described in the YOLOv12 paper \cite{tian2025yolov12}. We adopt an SGD optimizer for 600 epochs with momentum 0.937, weight decay $5 \times 10^{-4}$, and a learning rate that decays linearly from $10^{-2}$ to $10^{-4}$. The loss terms are weighted according to the default YOLOv12 configuration, and data augmentation is implemented using the Albumentations library \cite{Buslaev2020} with Mosaic, Mixup, copy-paste, and additional color and geometric jitter.

\newpage

\begin{table}[ht!]
\floatconts
  {tab:yolo-hparams}%
  {\caption{Training configuration and data augmentation hyperparameters for the YOLOv12-N/S/M/L/X models used for CNO detection in SCN images.}}%
  {%
    \centering
    \resizebox{0.7\textwidth}{!}{%
      {\renewcommand{\arraystretch}{1.2}%
      \begin{tabular}{lc}
      \hline
      \rule{0pt}{2ex}\bfseries Hyperparameters
        & \bfseries YOLOv12-N/S/M/L/X \\
      \hline
      \multicolumn{2}{l}{\textit{Training Configuration}} \\
      \quad Epochs                      & 600 \\
      \quad Optimizer                   & SGD \\
      \quad Momentum                    & 0.937 \\
      \quad Batch size                  & 16 \\
      \quad Weight decay                & $5 \times 10^{-4}$ \\
      \quad Warm-up epochs              & 3 \\
      \quad Warm-up momentum            & 0.8 \\
      \quad Warm-up bias learning rate  & 0.0 \\
      \quad Initial learning rate       & $10^{-2}$ \\
      \quad Final learning rate         & $10^{-4}$ \\
      \quad Learning rate schedule      & Linear decay \\
      \hline
      \multicolumn{2}{l}{\textit{Loss Parameters}} \\
      \quad Box loss gain               & 7.5 \\
      \quad Class loss gain             & 0.5 \\
      \quad DFL loss gain               & 1.5 \\
      \hline
      \multicolumn{2}{l}{\textit{Augmentation Parameters}} \\
      \quad HSV saturation augmentation & 0.7 \\
      \quad HSV value augmentation      & 0.4 \\
      \quad HSV hue augmentation        & 0.015 \\
      \quad Translation augmentation    & 0.1 \\
      \quad Scale augmentation          & 0.5/0.9/0.9/0.9/0.9 \\
      \quad Mosaic augmentation         & 1.0 \\
      \quad Mixup augmentation          & 0.0/0.05/0.15/0.15/0.2 \\
      \quad Copy-paste augmentation     & 0.1/0.15/0.4/0.5/0.6 \\
      \quad Close mosaic epochs         & 10 \\
      \hline
      \end{tabular}%
      }%
    }%
  }%
\end{table}

\newpage

\section{Mean-based morphometric analysis}\label{app:mean}

To verify that the observed morphometric trends are robust and not artifacts of specific statistical descriptors, we performed a sensitivity analysis by employing mean-based aggregation for sample-level profiling. The alternative approach reproduced the same directional differences and statistical significance patterns as the median-based analysis across both cohorts, indicating that the study's conclusions are insensitive to the choice of aggregation method. \figureref{fig:a1} and \figureref{fig:a2} present the mean-aggregated morphometric results. \figureref{fig:a1} compares disease status across healthy control skin, clinically non-lesional AD skin, and lesional AD skin, while \figureref{fig:a2} shows severity-stratified comparisons at clinically non-lesional sites.


\begin{figure}[hb!]
 % Caption and label go in the first argument and the figure contents
 % go in the second argument
\floatconts
  {fig:a1}
  {\caption{Mean-based aggregation of SCN morphometry by disease status across healthy control skin, clinically non-lesional AD skin, and lesional AD skin in (a) Taiwanese and (b) Danish cohorts. Boxplot notation: *p $<$ 0.05, **p $<$ 0.01, ***p $<$ 0.001, ****p $<$ 0.0001; ns, not significant.}}
  {\includegraphics[width=.95\linewidth]{figures/FigA1.pdf}}
\end{figure}

\begin{figure}[hb!]
 % Caption and label go in the first argument and the figure contents
 % go in the second argument
\floatconts
  {fig:a2}
  {\caption{Mean-based aggregation of SCN morphometry at clinically non-lesional sites in healthy controls and patients with mild, moderate, or severe AD in (a) Taiwanese and (b) Danish cohorts. Boxplot notation: *p $<$ 0.05, **p $<$ 0.01, ***p $<$ 0.001, ****p $<$ 0.0001; ns, not significant.}}
  {\includegraphics[width=.95\linewidth]{figures/FigA2.pdf}}
\end{figure}

\end{document}


