\documentclass[
]{ceurart}

\sloppy

\usepackage{listings}
\lstset{breaklines=true}

\usepackage{booktabs}
\usepackage{float}
\begin{document}




\copyrightyear{2026}
\copyrightclause{Copyright for this paper by its authors.
  Use permitted under Creative Commons License Attribution 4.0
  International (CC BY 4.0).}

\conference{GenAIK-NORA 2026: Workshop on Generative AI and Knowledge
  Graphs \& Knowledge Graphs and Agentic Systems Interplay,
  co-located with IJCAI-ECAI 2026, August 15--17, 2026, Bremen, Germany}

\title{KoRe: Compact Knowledge Representations for Large Language Models}


\author[1]{Davide Cavicchini}[%
orcid=0009-0005-9662-8496,
email=davide.cavicchini@unitn.it,
]
\cormark[1]
\address[1]{University of Trento, Via Calepina, 14, 38122 Trento TN, Italy}

\author[1]{Fausto Giunchiglia}[%
orcid=0000-0002-5903-6150,
email=fausto.giunchiglia@unitn.it,
]

\author[1]{Jacopo Staiano}[%
orcid=0000-0002-1260-4640,
email=jacopo.staiano@unitn.it,
]

\cortext[1]{Corresponding author.}


\begin{abstract}
Modern Large Language Models (LLMs) have shown impressive performances in user-facing tasks such as question answering, as well as consistent improvements in reasoning capabilities.
Still, the way these models encode knowledge seems inherently flawed: by design, LLMs encode world-knowledge within their parameters. This way of representing knowledge is inherently opaque, difficult to debug and update, and prone to hallucinations.
On the other hand, Knowledge Graphs can provide human-readable and easily editable world knowledge representations, and their application in knowledge-intensive tasks has consistently proven beneficial to downstream performance.
Nonetheless, current integration techniques require extensive retraining or finetuning. 
To overcome this issue, we introduce KoRe, a methodology to encode 1-hop sub-graphs into compact discrete knowledge tokens and inject them into a LLM backbone. We test the proposed approach on three established benchmarks, and report competitive performances coupled with a significant reduction (up to 10x) in token usage. 
Our results show that compact discrete KG representations can efficiently and effectively be used to ground modern LLMs.

The code will be available at \url{https://github.com/DavidC001/KoRe}
\end{abstract}

\begin{keywords}
  Large Language Models \sep
  Knowledge Graph \sep
  Token Embedding \sep
  Graph Neural Networks \sep
  Vector Quantization
\end{keywords}

\maketitle

\section{Introduction}
\label{sec:intro}
\input{sections/Introduction}


\input{sections/Related_work}

\input{sections/methodology}


\section{Results and Discussion}
\label{sec:results}
\input{sections/results}

\section{Conclusion}
\label{sec:conclusions}
\input{sections/conclusions}


\section{Limitations}
\label{sec:limitations}
\input{sections/limitations}










\begin{acknowledgments}
    We acknowledge ISCRA for awarding this project access to the LEONARDO supercomputer, owned by the EuroHPC Joint Undertaking, hosted by CINECA (Italy) .

    The work described in this presentation has been conducted within the project TRUMAN. The research leading to these results has received funding from HORIZON-CL4-2024-HUMAN-03, under Grant Agreement no 101214000 
  
    Thanks to the developers of ACM consolidated LaTeX styles \url{https://github.com/borisveytsman/acmart} and to the developers of Elsevier updated \LaTeX{} templates \url{https://www.ctan.org/tex-archive/macros/latex/contrib/els-cas-templates}.  
\end{acknowledgments}

\section*{Declaration on Generative AI}
 During the preparation of this work, the author(s) used Grammarly to: Grammar and spelling check. 
 Additionally, the OpenWebUI interface with Qwen3.6 and Gemma4 models was used for content enhancement and improving writing style.
 After using these tool(s)/service(s), the author(s) reviewed and edited the content as needed and take(s) full responsibility for the publication’s content. 

\bibliography{sample-ceur}


\appendix

\section{Ablation}
\label{app:ablations}
To define the best parameters for our architecture, we performed an ablation study on the parameters related to KG compression.
These models were trained exclusively on Tri-REx and tested on both Tri-REx and SimpleQuestions validation split.

\subsection{Codebook size}
We investigate codebook sizes of $\{64, 128, 256, 1024\}$ entries while fixing the quantizer depth at $Q{=}10$ to isolate the impact of vocabulary size.
Table~\ref{tab:tri_rex_valid_val_control_Codebook_q_10_0_ema_default_lora_kq} and Table~\ref{tab:simplequestions_valid_val_control_Codebook_q_10_0_ema_default_lora_kq} report these controlled comparisons.
On the original Tri-REx dataset, the model benefited from larger codebooks, but this came at the cost of both codebook utilization and performance on the out-of-distribution SimpleQuestion benchmark.
These observations guided our later experiments and led us to choose a codebook size of $128$.

\begin{table}[h]
\centering
\caption{Controlled comparison on Tri-REx (valid): vary Codebook; hold constant Q=10.0, $N_{dead}$=2.}
\label{tab:tri_rex_valid_val_control_Codebook_q_10_0_ema_default_lora_kq}
\begin{tabular}{l|cccc|c}
\hline
Codebook Dim & Hit@1 & Hit@3 & Hit@5 & Hit@10 & CodeUtil\\
\hline
64 & 30.7 & 48.3 & 55.7 & 63.8 & \textit{99.7} \\
128 & 31.7 & 48.8 & 56.2 & 64.2 & 78.6 \\
256 & 31.9 & 49.8 & 57.0 & 65.0 & 49.8 \\
1024 & \textit{32.5} & \textit{50.1} & \textit{57.5} & \textit{65.4}  & 16.6\\
\hline
\end{tabular}
\end{table}

\begin{table}[h]
\centering
\caption{Controlled comparison on SimpleQuestions (valid): vary Codebook; hold constant Q=10.0, $N_{dead}$=2.}
\label{tab:simplequestions_valid_val_control_Codebook_q_10_0_ema_default_lora_kq}
\begin{tabular}{l|cccc}
\hline
Codebook Dim & Hit@1 & Hit@3 & Hit@5 & Hit@10 \\
\hline
64 & 10.8 & 26.2 & 32.2 & 40.7  \\
128 & \textit{28.9} & \textit{43.8} & \textit{49.2} & \textit{56.1}  \\
256 & 18.2 & 35.8 & 41.6 & 47.5  \\
1024 & 14.3 & 28.1 & 33.0 & 39.7  \\
\hline
\end{tabular}
\end{table}


\subsection{EMA Variant}
We compare two EMA setups where $N_{dead}$ is set to either $2$ or $4$.
Table~\ref{tab:tri_rex_valid_val_control_EMA_q_10_0_codebook_256_0_lora_kq} and Table~\ref{tab:simplequestions_valid_val_control_EMA_q_10_0_codebook_256_0_lora_kq} report controlled comparisons.
The findings indicate that the less aggressive $N_{dead}=4$ variant achieves better performance and improves code utilization.
Relaxing the dead code replacement prevents the quantization process from entering a "thrashing" state, in which codes are repeatedly reassigned without receiving sufficient training signal to stabilize their usage.


\begin{table}[h]
\centering
\caption{Controlled comparison on Tri-REx (valid): vary $N_{dead}$; hold constant Q=10.0, Codebook=256.0.}
\label{tab:tri_rex_valid_val_control_EMA_q_10_0_codebook_256_0_lora_kq}
\begin{tabular}{l|cccc|c}
\hline
$N_{dead}$ & Hit@1 & Hit@3 & Hit@5 & Hit@10 & Code Util \\
\hline
 2 & 31.9 & 49.8 & 57.0 & 65.0 & 49.4  \\
 4 & \textit{32.3} & \textit{50.2} & \textit{57.5} & \textit{65.3} & \textit{56.7}  \\
\hline
\end{tabular}
\end{table}

\begin{table}[h]
\centering
\caption{Controlled comparison on SimpleQuestions (valid): vary $N_{dead}$; hold constant Q=10.0, Codebook=256.0.}
\label{tab:simplequestions_valid_val_control_EMA_q_10_0_codebook_256_0_lora_kq}
\begin{tabular}{l|cccc}
\hline
$N_{dead}$ & Hit@1 & Hit@3 & Hit@5 & Hit@10 \\
\hline
$N=2$ & 18.2 & 35.8 & \textit{41.6} & \textit{54.29} \\
$N=4$ & \textit{24.0} & \textit{36.1} & 41.0 & 48.7 \\
\hline
\end{tabular}
\end{table}



\subsection{Number of Quantizers}
The quantization depth directly affects both how expressive our knowledge graph encodings are and how many tokens are needed for knowledge injection. 
We experiment with $Q \in \{5, 10, 20\}$ quantizers.
Table~\ref{tab:tri_rex_valid_val_control_Q_codebook_256_0_ema_default_lora_kq} and Table~\ref{tab:simplequestions_valid_val_control_Q_codebook_256_0_ema_default_lora_kq} report the controlled comparisons.
We observe that increasing quantization depth enhances model performance. 
However, it also exhibits diminishing returns, suggesting that, under our current architecture, additional quantizers add little useful signal.
This is probably a consequence of our simple strategy for selecting the final graph representation; more sophisticated approaches might better leverage the finer-grained reconstructions provided by multiple quantizers.

\begin{table}[h]
\centering
\caption{Controlled comparison on Tri-REx (valid): vary Q; hold constant Codebook=256.0, $N_{dead}$=2.}
\label{tab:tri_rex_valid_val_control_Q_codebook_256_0_ema_default_lora_kq}
\begin{tabular}{l|cccc|c}
\hline
Q & Hit@1 & Hit@3 & Hit@5 & Hit@10 & AvgTokens \\
\hline
 5.0 & 30.9 & 48.4 & 55.7 & 63.7 & 56.30 \\
 10.0 & \textit{31.9} & \textit{49.8} & \textit{57.0} & \textit{65.0} & 61.30 \\
 20.0 & 31.2 & 48.7 & 55.8 & 63.9 & 71.30 \\
\hline
\end{tabular}
\end{table}

\begin{table}[h]
\centering
\caption{Controlled comparison on SimpleQuestions (valid): vary Q; hold constant Codebook=256.0, $N_{dead}$=2.}
\label{tab:simplequestions_valid_val_control_Q_codebook_256_0_ema_default_lora_kq}
\begin{tabular}{l|cccc|c}
\hline
Q & Hit@1 & Hit@3 & Hit@5 & Hit@10 & AvgTokens \\
\hline
 5.0 & 23.0 & 32.7 & 38.4 & 46.1 & 49.27 \\
 10.0 & 18.2 & 35.8 & 41.6 & 47.5 & 54.29 \\
 20.0 & \textit{28.2} & \textit{42.0} & \textit{47.4} & \textit{54.9} & 64.32 \\
\hline
\end{tabular}
\end{table}





















\end{document}
