\documentclass{midl} % Include author names
\usepackage[dvipsnames]{xcolor}
\usepackage{comment}
\usepackage{mwe} % to get dummy images
\usepackage{multirow}
\usepackage{array}
\usepackage{caption}
\usepackage{arydshln} 
\usepackage{xcolor}
\usepackage{algorithm,algpseudocode}
\usepackage{algorithmicx}
\usepackage{algpseudocode}
\usepackage{minted}
\usepackage{float}
\usepackage{enumerate}
\usemintedstyle{colorful}
\setminted{
  breaklines=true,
  breakanywhere=true,
  fontsize=\footnotesize,
  linenos,
  numberblanklines=true,
  numbersep=12pt,
  framesep=2mm,
  xleftmargin=8pt,
  funcnamehighlighting=true,
  tabsize=4,
  obeytabs=true,
  mathescape=false,
  samepage=false,
  showspaces=false,
  showtabs=false,
  texcl=false,
  %bgcolor=lightgray,
}
\newfloat{listing}{thp}{lop}
\floatname{listing}{Code Listing}
\definecolor{darkorange}{HTML}{FA6800}

\jmlrvolume{-- 256}
\jmlryear{2026}
\jmlrworkshop{Full Paper -- MIDL 2026 }
\editors{Accepted for publication on at MIDL 2026}
\title[PIKACHU]{PIKACHU: Prototypical In-context Knowledge Adaptation for Clinical Heterogeneous Usage}
\midlauthor{\Name{Amar Kumar\nametag{$^{1,2}$}} \Email{amar.kumar@mail.mcgill.ca}\AND
\Name{Zahra TehraniNasab\nametag{$^{1,2}$}} \Email{zahra.tehraninasab@mail.mcgill.ca}\AND
\Name{Emily Kaczmarek\nametag{$^{1,2}$}} \Email{emily.kaczmarek@mail.mcgill.ca}\AND
\Name{Tal Arbel\nametag{$^{1,2}$}} \Email{tal.arbel@mcgill.ca}\\
\addr $^{1}$ Center for Intelligent Machines, McGill University, Montreal, Canada. \\
\addr $^{2}$ Mila - Quebec AI Institute, Montreal, Canada. }

\newcommand{\ourmethod}{\textit{PIKACHU}}

\begin{document}

\maketitle

\begin{abstract}

Medical imaging systems increasingly rely on large vision language foundation models (VLFMs) trained on diverse biomedical corpora, yet these models remain difficult to adapt to new clinical tasks without costly fine-tuning and large annotated datasets. We present \textbf{\ourmethod} (\textit{Prototypical In-Context Knowledge Adaptation for Clinical Heterogeneous Usage}), a lightweight and generalizable framework that enables rapid few-shot adaptation of frozen medical FMs using only a handful of labelled examples. Unlike prior approaches that modify backbone weights or introduce heavy attention-based adapters, \ourmethod\ performs all task adaptation directly in the FM feature space through \emph{in-context prototypical reasoning}. Given a small support set, the framework constructs class prototypes by averaging normalized embeddings from a frozen VLFM image encoder and performs prediction on query images using temperature-scaled cosine similarity. Only a single temperature parameter is learned. We evaluate \ourmethod\ across three heterogeneous medical imaging datasets - dermatological images (ISIC), Optical Coherence Tomography (OCT), and Diabetic Retinopathy (DR), using established vision models (SigLIP, PubMedCLIP, DinoV2, and ViT) as backbones. The proposed in-context learning (ICL) strategy consistently outperforms the baseline (zero-shot) approaches across all datasets and architectures, achieving substantial improvements in both accuracy and AUC. Notably, with PubMedCLIP as the backbone, \ourmethod\ achieves 0.69 accuracy on the ISIC dataset, 0.72 on OCT, and 0.79 on DR, demonstrating robust generalization across diverse clinical imaging modalities. These results highlight the promise of feature-space in-context learning as an efficient and deployable paradigm for test-time adaptation of foundation models, without the need for extensive retraining. To facilitate broader adoption and research, we make
our code publicly available at \url{https://github.com/Amarkr1/pikachu}. 
\end{abstract}

\begin{keywords}
Foundation Models, In-Context Learning, Large Language Models.
\end{keywords}

\input{sections/1_introduction}
\input{sections/2_methodology_edits}
\input{sections/3_experiments}

\section{Conclusion}
In this work, we presented \ourmethod, a lightweight and universal in-context learning framework for medical image classification. By operating entirely in the feature space of frozen foundation models and leveraging a simple yet effective prototypical reasoning mechanism, \ourmethod\ enables rapid adaptation to new clinical tasks using only a few labeled examples. Our method requires no fine-tuning, no retraining, and only a single learned temperature parameter, making it computationally efficient and easy to deploy across heterogeneous clinical environments. Experiments across multiple backbones, including PubMedCLIP, SigLIP, DINOv2, and ViT, demonstrate that incorporating a small support set consistently improves performance over standard zero-shot or frozen-feature baselines by just using 5 samples from each class. These findings highlight the potential of in-context learning as a practical and scalable pathway for adapting foundation models to the varied and evolving demands of real-world medical imaging workflows.

While \ourmethod\ offers a simple and effective solution for few-shot medical image classification, several promising directions remain for future research. First, extending in-context adaptation to more complex clinical tasks such as multi-label disease prediction, longitudinal progression modeling, or structured report generation may further expand the applicability of this framework. Second, exploring richer task representations, including textual or multimodal prompts, may enhance prototype quality and improve robustness under severe distribution shifts. Third, integrating uncertainty estimation or confidence calibration into the ICL pipeline could make the method more suitable for safety-critical scenarios. Finally, evaluating \ourmethod\ across larger-scale hospital datasets and under real-world operational constraints would provide deeper insight into its practical utility. Together, these directions offer a path toward more adaptive, reliable, and generalizable AI systems for clinical imaging.

\clearpage 
\section*{Acknowledgements}
The authors are grateful for funding provided by the Natural Sciences and Engineering Research Council of Canada, the Canadian Institute for Advanced Research (CIFAR) Artificial Intelligence Chairs program, Mila - Quebec AI Institute, Google Research, Calcul Quebec, Fonds de recherche du Québec (FRQNT), the Digital Research Alliance of Canada, and the Vadasz Scholar McGill Engineering Doctoral Award.

\bibliography{midl26_256}
\clearpage
\newpage
\appendix
\input{sections/algo_appendix}
\input{sections/ablation_appendix}
\input{sections/ablations_appendix}
%\input{sections/limitations_appendix}

\end{document}