<!DOCTYPE html>
<html lang="en-us">

  <head>
  <link href="http://gmpg.org/xfn/11" rel="profile">
  <meta http-equiv="content-type" content="text/html; charset=utf-8">

  <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1">

  <title>
    
      Classification or Generation? Understanding Structure Prediction for Knowledge-Intensive Tasks &middot; The ICLR Blog Track
    
  </title>

  
  <link rel="canonical" href="https://iclr.iro.umontreal.ca/f7745c0a-ca79-4853-b5a4-628954353afd_1642159617/2021/12/01/Classification-or-Generation-Understanding-Structure-Prediction-for-Knowledge-Intensive-Tasks/">
  

  <link rel="stylesheet" href="https://iclr.iro.umontreal.ca/f7745c0a-ca79-4853-b5a4-628954353afd_1642159617/public/css/poole.css">
  <link rel="stylesheet" href="https://iclr.iro.umontreal.ca/f7745c0a-ca79-4853-b5a4-628954353afd_1642159617/public/css/syntax.css">
  <link rel="stylesheet" href="https://iclr.iro.umontreal.ca/f7745c0a-ca79-4853-b5a4-628954353afd_1642159617/public/css/lanyon.css">
  <link rel="stylesheet" href="https://iclr.iro.umontreal.ca/f7745c0a-ca79-4853-b5a4-628954353afd_1642159617/public/css/custom.css">
  <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=PT+Serif:400,400italic,700%7CPT+Sans:400">

  <link rel="apple-touch-icon-precomposed" sizes="144x144" href="https://iclr.iro.umontreal.ca/f7745c0a-ca79-4853-b5a4-628954353afd_1642159617/public/apple-touch-icon-precomposed.png">
  <link rel="shortcut icon" href="https://iclr.iro.umontreal.ca/f7745c0a-ca79-4853-b5a4-628954353afd_1642159617/public/favicon.ico">

  <link rel="alternate" type="application/rss+xml" title="RSS" href="https://iclr.iro.umontreal.ca/f7745c0a-ca79-4853-b5a4-628954353afd_1642159617/atom.xml">

  

  <script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-AMS-MML_HTMLorMML" type="text/javascript" ></script>
 <!-- <script type="text/x-mathjax-config"> MathJax.Hub.Config({ TeX: { equationNumbers: { autoNumber: "AMS" } } }); </script> -->
  <script type="text/x-mathjax-config">
      MathJax.Hub.Config({
        tex2jax: { inlineMath: [ ['$','$'], ["\\(","\\)"] ],
         processEscapes: false
        }
      });
</script>
</head>


  <body>

    <!-- Target for toggling the sidebar `.sidebar-checkbox` is for regular
     styles, `#sidebar-checkbox` for behavior. -->
<input type="checkbox" class="sidebar-checkbox" id="sidebar-checkbox">
<!-- <input type="checkbox" class="sidebar-checkbox" id="sidebar-checkbox" > -->

<!-- Toggleable sidebar -->
<div class="sidebar" id="sidebar">
  <div class="sidebar-item">
    <p>For short-term, peer-sourced tests of time, generalizations, specializations, reproductions, etc.!</p>
  </div>

  <nav class="sidebar-nav">

    

    
    
      
        
          <a class="sidebar-nav-item" href="https://iclr.iro.umontreal.ca/f7745c0a-ca79-4853-b5a4-628954353afd_1642159617/">ICLR 2022 Blog Track</a>
        
      
    
      
        
      
    
      
        
          <a class="sidebar-nav-item" href="https://iclr.iro.umontreal.ca/f7745c0a-ca79-4853-b5a4-628954353afd_1642159617/about/">About</a>
        
      
    
      
    
      
        
      
    
      
        
          <a class="sidebar-nav-item" href="https://iclr.iro.umontreal.ca/f7745c0a-ca79-4853-b5a4-628954353afd_1642159617/submitting/">Submitting</a>
        
      
    
      
        
          <a class="sidebar-nav-item" href="https://iclr.iro.umontreal.ca/f7745c0a-ca79-4853-b5a4-628954353afd_1642159617/tags/">Tags</a>
        
      
    

    <a class="sidebar-nav-item" href="https://github.com/iclr-blog-track/iclr-blog-track.github.io">GitHub project</a>
    <span class="sidebar-nav-item">Currently vICLR Spring 2021</span>
  </nav>

  <div class="sidebar-item">
    <p>
      &copy; 2022. All rights reserved.
    </p>
  </div>
</div>


    <!-- Wrap is the content to shift when toggling the sidebar. We wrap the
         content to avoid any CSS collisions with our real content. -->
    <div class="wrap">
      <div class="masthead">
        <div class="container">
          <h3 class="masthead-title">
            <a href="/" title="Home">The ICLR Blog Track</a>
            <small></small>
          </h3>
        </div>
      </div>

      <div class="container content">
        <div class="post">
  <h1 id="iclr-post-title" class="post-title">Classification or Generation? Understanding Structure Prediction for Knowledge-Intensive Tasks</h1>
  <span class="post-date">01 Dec 2021 | 
    <a class="content-tag" href="/tags/#natural-language-processing"> Natural Language Processing </a>
  
    <a class="content-tag" href="/tags/#language-modeling"> Language Modeling </a>
  
    <a class="content-tag" href="/tags/#entity-retrieval"> Entity Retrieval </a>
  </span>

  <span id="iclr-post-authors" class="post-date">Anonymous</span>
  <h2 id="contents">Contents</h2>
<ul>
  <li><a href="#contents">Contents</a></li>
  <li><a href="#1-introduction-to-entity-retrieval">1. Introduction to Entity Retrieval</a>
    <ul>
      <li><a href="#11-problem-definition">1.1 Problem Definition</a></li>
    </ul>
  </li>
  <li><a href="#2-reformulation-of-the-problem">2. Reformulation of the Problem</a></li>
  <li><a href="#3-methodology">3. Methodology</a>
    <ul>
      <li><a href="#31-prefix-tree">3.1 Prefix Tree</a></li>
      <li><a href="#32-autoregressive-end-to-end-entity-linking">3.2 Autoregressive End-To-End Entity Linking</a></li>
    </ul>
  </li>
  <li><a href="#experiments-and-analyses">Experiments and Analyses</a></li>
  <li><a href="#classification-vs-generation">Classification vs. Generation</a></li>
  <li><a href="#conclusion">Conclusion</a></li>
  <li><a href="#references">References</a></li>
</ul>

<hr />
<div id="Section-1"></div>

<h2 id="1-introduction-to-entity-retrieval">1. Introduction to Entity Retrieval</h2>
<p>Search engines have become part of our daily lives. We use Google (Bing, Yandex, Baidu, etc.) as the main gateway to information on the Web. With a specific type of content in mind, we may search directly on a particular site or service, e.g., on Facebook or LinkedIn for people, organizations, and events; on Amazon or eBay for products; or YouTube or Spotify for music. Accustomed to a search box somewhere near the top of the screen, we have also increased our expectations of the quality and speed of the responses to our searches.</p>

<p><strong>Information retrieval</strong> (IR), on the top level of abstraction, is about matching <em>information needs</em> with <em>information objects</em>. When a user puts a <em>query</em>, i.e., an expression varying from some keywords (e.g., <em>Apple</em>) to a natural language question (e.g., <em>who is the CEO of Apple company</em>), the search engine responds with a ranked list of information objects, traditionally related documents.</p>

<p>With the support of the enormous development of large-scale structured knowledge bases, we have witnessed the transition from “documents” to “answers”, as search engines directly return related entities or facts instead of merely “ten blue links”. The knowledge bases organize information around specific things and objects referred to as entities. The need to make search engines respond to queries with related entities brings us to the field of entity retrieval (ER), which is also the main problem the paper presented here tries to tackle, <a href="https://arxiv.org/abs/2010.00904">“<strong><em>Autoregressive Entity Retrieval</em></strong>”</a> By Nicola De Cao, Gautier Izacard, Sebastian Riedel, Fabio Petroni.</p>

<div id="Section-1.1"></div>

<h3 id="11-problem-definition">1.1 Problem Definition</h3>
<p>Formally, entities are uniquely identifiable objects or things (such as persons, organizations, and places), characterized by their types, attributes, and relationships to other entities. In an entity retrieval task, we have a collection of entities $\mathcal{E}$ (e.g., Wikipedia articles) where each entity is an entry in a Knowledge Base (KB) such as Wikipedia. Given a textual input source $x$ (e.g., question), a model has to return the most relevant entities from $\mathcal{E}$ concerning $x$. We assume that each $e \in \mathcal{E}$ is uniquely assigned to a textual representation (i.e., its name): a sequence of tokens $y$ (e.g., Wikipedia pages are identiﬁed by their titles).</p>

<p>Concretely, the following tasks are involved in this paper:</p>
<ul>
  <li><strong>Entity Disambiguation</strong> (ED), where an input $x$ is annotated with a mention and a system has to select either its corresponding entity from $\mathcal{E}$, or to predict that there is no corresponding entry in the KB (see <a href="#Figure-1">Figure 1</a> for an example).</li>
  <li><strong>End-To-End Entity Linking</strong> (EL). This task is to jointly detect entity mentions $m$ from an input $x$ and link those mentions to their respective KB entities $e \in \mathcal{E}$.</li>
  <li><strong>Page-level Document Retrieval</strong> (DR). The input $x$ is intended as a query and $\mathcal{E}$ as a collection of documents identiﬁed by their unique titles (e.g., Wikipedia articles).</li>
</ul>

<!-- ![Figure1-entity-retrieval](../public/images/2022-12-01-Better-Generating-Than-Classifying/Figure1.png) -->

<p><img src="https://iclr.iro.umontreal.ca/f7745c0a-ca79-4853-b5a4-628954353afd_1642159617/public/images/2022-12-01-Better-Generating-Than-Classifying/Figure1.png" alt="Figure1-entity-retrieval" /></p>

<div id="Section-2"></div>

<h2 id="2-reformulation-of-the-problem">2. Reformulation of the Problem</h2>
<p>In previous research, entity retrieval has been modeled as a multi-class classification problem where each entity is assigned with a unique atomic label. A typical retrieval system consists of these parts:</p>
<ol>
  <li>An encoder model that converts input queries to hidden representations;</li>
  <li>An retrieval model captures context and entity affinity, usually with vector dot products.</li>
</ol>

<p>The output of the retrieval model is sorted, and top-k similar candidates are chosen as matches. This process has several obvious drawbacks:</p>
<ul>
  <li>Training the system requires constructing negative samples where the mismatched entities and query pairs are fed into the model, and the choice of negative pairs has a strong influence on the final performance;</li>
  <li>When provided with large sets of entities, the storage of their dense representations requires a large memory footprint;</li>
  <li>The process of vector dot product might fail in modeling the fine-grained interactions between the context and the entities.</li>
</ul>

<p>Now, let’s return to the basics: by classifying or ranking the output of interactions between the queries and the entities, <em>what</em> are we supposed to achieve with this system? In a page-level Document Retrieval problem, we expect the model to output the most relevant documents (or sentences) in the KB given queries containing certain entity mentions; in an Entity Disambiguation problem, we want the model to output the mentioned entities in the given queries.</p>

<p>In other words, we can reformulate the retrieval problems as a generation task where the system gets an input sentence and outputs another - that’s exactly what a Seq2Seq model does!</p>

<p>But hold on, as we may get some unexpected answers from the model which do not appear in the given KB, we need to add some constraints. To ensure the outputs strictly follow the KBs content, we may build and apply a <strong><em>trie</em></strong>, i.e., a prefix tree, to constrain the decoding process since the generation is performed from left to right. We will talk about the details in later sections. Now, with this Seq2Seq alternative, it is surprising to find that the problems mentioned above are alleviated:</p>
<ul>
  <li>In a Seq2Seq task, we don’t have to worry about the construction of negative samples as all the other sentences already serve as negative samples to a certain extent;</li>
  <li>The memory overhead of a Seq2Seq model relies mainly on the size of beam search and the average length of output sequence, much smaller than that of storing all entities’ representations;</li>
  <li>The Seq2Seq model, together with the prefix constraints, captures interactions at the token level, which is intuitively better than the dot product between representation vectors.</li>
</ul>

<div id="Section-3"></div>

<h2 id="3-methodology">3. Methodology</h2>
<p>Up to now the main idea behind the paradigm proposed in this paper, <strong>“GENRE”</strong> (for <em>Generative ENtity REtrieval</em>),  is covered, and here are some more details.</p>

<p>Concretely, the paper leverages a transformer-based architecture pre-trained with a language model objective (i.e., the BART model) and fine-tuned to generate entity names. GENRE ranks each entity $e \in \mathcal{E}$ by calculating a score with an autoregressive formulation: 
\(\operatorname{score}(e \mid x)=p_{\theta}(y \mid x)=\prod_{i=1}^{N} p_{\theta}\left(y_{i} \mid y_{&lt;i}, x\right),\)
where $y$ is the set of $N$ tokens in the identifier of $e$, and $\theta$ the parameters of the model.</p>

<div id="Section-3.1"></div>

<h3 id="31-prefix-tree">3.1 Prefix Tree</h3>
<p>Now let’s take a closer look at the trie constraints applied to the decoding part.</p>
<blockquote>
  <p>In computer science, a trie, also called digital tree or prefix tree, is a type of search tree, a tree data structure used for locating specific keys from within a set. These keys are most often strings, with links between nodes defined not by the entire key, but by individual characters. In order to access a key (to recover its value, change it, or remove it), the trie is traversed depth-first, following the links between nodes, which represent each character in the key…</p>

  <p>All the children of a node have a common prefix of the string associated with that parent node, and the root is associated with the empty string. – Wikipedia</p>
</blockquote>

<p>In the prefix tree we mentioned here, each node is associated with a token instead of an individual character. For example, given the following phrases:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>English language
English literature
France
</code></pre></div></div>
<p>we can build a prefix tree as shown in <a href="#Figure-2">Figure 2</a>:</p>

<!-- ![Figure2-trie](https://iclr.iro.umontreal.ca/f7745c0a-ca79-4853-b5a4-628954353afd_1642159617/public/images/2022-12-01-Better-Generating-Than-Classifying/Figure2.png) -->

<object id="Figure-2" type="application/pdf" data="https://iclr.iro.umontreal.ca/f7745c0a-ca79-4853-b5a4-628954353afd_1642159617/public/images/2021-12-01-post/trie.pdf"></object>

<p>The sentences are aggregated with the same prefix tokens, and each complete path (i.e., a path that begins with a <code class="language-plaintext highlighter-rouge">BOS</code> node and ends with an <code class="language-plaintext highlighter-rouge">EOS</code> node) represents a sentence. We can perform a sentence search efficiently by comparing an input sequence of tokens with the associated tokens in different nodes.</p>

<p>In the decoding process, with the tokens already output, we can set the probability of tokens that don’t appear in the children nodes of the current node to zero and make the model choose possible tokens till we meet an <code class="language-plaintext highlighter-rouge">EOS</code> node. In this way, we make sure the model only outputs “legal” sentences that appeared in our KB. The trie reduces the search space of beam search while performing sentence inference.</p>

<p>Another advantage of a trie is its low memory overhead (e.g., constraining on Wikipedia titles using the BART tokenizer produces a trie with ∼6M leaves, ∼17M internal nodes that occupied ∼600MB of disk space), since it is a compressed representation of a series of documents and can be pre-computed and stored in memory.</p>

<div id="Section-3.2"></div>

<h3 id="32-autoregressive-end-to-end-entity-linking">3.2 Autoregressive End-To-End Entity Linking</h3>
<p>When putting the autoregressive framework further to address end-to-end Entity Linking (EL) problem, a markup annotation is used where spans boundaries are ﬂagged with special tokens and accompanied by their corresponding entity identiﬁers. As an example, given an input sentence:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>In 1503, Leonardo began painting the Mona Lisa.
</code></pre></div></div>
<p>where the mention “Leonardo” refers to the entity “Leonardo da Vinci”, and the mention “Mona Lisa” refers to the entity “Mona Lisa” in the knowledge base, its corresponding output will be:</p>
<div class="language-plaintext highlighter-rouge"><div class="highlight"><pre class="highlight"><code>In 1503, [Leonardo](Leonardo da Vinci) began painting the [Mona Lisa](Mona Lisa).
</code></pre></div></div>
<p>Since the annotated output space is exponentially large, it becomes intractable to pre-compute a trie for decoding, and the search probability is computed dynamically instead. In such a dynamic decoding straregy, there are three different conditions at each generating step:</p>
<ol>
  <li>Outside in the sentence, where the decoder can either start a new mention with a special token (i.e., <code class="language-plaintext highlighter-rouge">[</code>) or continue by copying the next input token;</li>
  <li>Inside an entity mention, where the decoder can either continue with next input token or to end this mention with a special token (i.e., <code class="language-plaintext highlighter-rouge">]</code>);</li>
  <li>Inside an entity link, where the decoder follows an entity trie discussed above to generate valid entity identifiers.</li>
</ol>

<p>The model is constrained differently under these circumstances, as shown in Figure 3.</p>

<p><img src="https://iclr.iro.umontreal.ca/f7745c0a-ca79-4853-b5a4-628954353afd_1642159617/public/images/2022-12-01-Better-Generating-Than-Classifying/Figure3.png" alt="Figure3-dynamical-constraints" /></p>

<div id="Section-4"></div>

<h2 id="experiments-and-analyses">Experiments and Analyses</h2>
<p>Extensive evaluations on more than 20 datasets across three tasks (Entity Disambiguation, end-to-end Entity Linking (EL), and page-level Document Retrieval) report the effectiveness of the GENRE paradigm.</p>

<p>Overall, GENRE achieves very competitive results in all of the three settings being the best performing system on average across all of them, especially on the page-level retrieval tasks of KILT benchmark (<a href="#Table-1">Table 1</a>):</p>

<div id="Table-1">Table 1: R-Precision for page-level retrieval on KILT test data. Bold indicates the best model and underline indicates the second best.</div>

<table>
  <thead>
    <tr>
      <th> </th>
      <th>Fact Check.</th>
      <th>Entity Disambiguation</th>
      <th>Slot Filling</th>
      <th>Open Domain QA</th>
      <th>Dial.</th>
      <th> </th>
      <th> </th>
      <th> </th>
      <th> </th>
      <th> </th>
      <th> </th>
      <th> </th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td><strong>Model</strong></td>
      <td><strong>FEV</strong></td>
      <td><strong>AY2</strong></td>
      <td><strong>WnWi</strong></td>
      <td><strong>WnCw</strong></td>
      <td><strong>T-REx</strong></td>
      <td><strong>zsRE</strong></td>
      <td><strong>NQ</strong></td>
      <td><strong>HoPo</strong></td>
      <td><strong>TQA</strong></td>
      <td><strong>ELI5</strong></td>
      <td><strong>WoW</strong></td>
      <td><strong>Avg.</strong></td>
    </tr>
    <tr>
      <td>DPR + BERT</td>
      <td><ins>72.9</ins></td>
      <td>-</td>
      <td>-</td>
      <td>-</td>
      <td>-</td>
      <td>40.1</td>
      <td><strong>60.7</strong></td>
      <td>25.0</td>
      <td>43.4</td>
      <td>-</td>
      <td>-</td>
      <td>-</td>
    </tr>
    <tr>
      <td>DPR</td>
      <td>55.3</td>
      <td>1.8</td>
      <td>0.3</td>
      <td>0.5</td>
      <td>13.3</td>
      <td>28.9</td>
      <td>54.3</td>
      <td>25.0</td>
      <td>44.5</td>
      <td>10.7</td>
      <td>25.5</td>
      <td>23.6</td>
    </tr>
    <tr>
      <td>tf-idf</td>
      <td>50.9</td>
      <td>3.7</td>
      <td>0.24</td>
      <td>2.1</td>
      <td>44.7</td>
      <td>60.8</td>
      <td>28.1</td>
      <td>34.1</td>
      <td>46.4</td>
      <td><ins>13.7</ins></td>
      <td>49.0</td>
      <td>30.5</td>
    </tr>
    <tr>
      <td>DPR + BART</td>
      <td>55.3</td>
      <td>75.5</td>
      <td>45.2</td>
      <td>46.9</td>
      <td>13.3</td>
      <td>28.9</td>
      <td>54.3</td>
      <td>25.0</td>
      <td>44.4</td>
      <td>10.7</td>
      <td>25.4</td>
      <td>38.6</td>
    </tr>
    <tr>
      <td>RAG</td>
      <td>61.9</td>
      <td>72.6</td>
      <td>48.1</td>
      <td>47.6</td>
      <td>28.7</td>
      <td>53.7</td>
      <td>59.5</td>
      <td>30.6</td>
      <td>48.7</td>
      <td>11.0</td>
      <td><ins>57.8</ins></td>
      <td>47.3</td>
    </tr>
    <tr>
      <td>BLINK + flair</td>
      <td>63.7</td>
      <td><ins>81.5</ins></td>
      <td><ins>80.2</ins></td>
      <td><ins>68.8</ins></td>
      <td><ins>59.6</ins></td>
      <td><ins>78.8</ins></td>
      <td>24.5</td>
      <td><ins>46.1</ins></td>
      <td><ins>65.6</ins></td>
      <td>9.3</td>
      <td>38.2</td>
      <td><ins>56.0</ins></td>
    </tr>
    <tr>
      <td><strong>genre</strong></td>
      <td><strong>83.6</strong></td>
      <td><strong>89.9</strong></td>
      <td><strong>87.4</strong></td>
      <td><strong>71.2</strong></td>
      <td><strong>79.4</strong></td>
      <td><strong>95.8</strong></td>
      <td><ins>60.3</ins></td>
      <td><strong>51.3</strong></td>
      <td><strong>69.2</strong></td>
      <td><strong>15.8</strong></td>
      <td><strong>62.9</strong></td>
      <td><strong>69.7</strong></td>
    </tr>
  </tbody>
</table>

<p>Despite outperforming other SotA models, GENRE significantly reduces its memory overhead, occupying 14 times less memory than BLINK and 34 times less memory than memory DPR. As the entity names are stored in the prefix tree in advance, the GENRE model also has an advantage under the cold start setting where only the name of entities are available in the KBs.</p>

<!-- # TODO part
- reduce experiment analysis
- add generative model analysis
- add hierachical classification analysis -->

<div id="Section-5"></div>

<!-- Structured Prediction as Translation between Augmented Natural Languages

Zero-Shot Information Extraction as a Unified Text-to-Triple Translation

Text2Event: Controllable Sequence-to-Structure Generation for End-to-end Event Extraction -->
<h2 id="classification-vs-generation">Classification vs. Generation</h2>
<p>To push forward the success of this paradigm shift and apply generative models to more classification problems, we need to find out the intrinsic reasons behind the superiority of generative models over classification models.</p>

<p>Generation is technically a hierarchical classification procedure: at each generating step, the decoder chooses one token to output based on the ranks of softmax logits - in other words, it performs token classification, and each step narrows the search space of remaining categories as whole sequences. The categories are clustered by their preceding tokens, i.e., the related categories (similar entities in the entity retrieval problem) with the same prefix tokens are grouped at the same search space.</p>

<p>The</p>

<h2 id="conclusion">Conclusion</h2>
<p>Entity retrieval is the task of finding the precise exact entity that natural language refers to. Existing approaches treated it as a search problem, where one retrieves an entity from a KG given a piece of text.</p>

<p>This work proposes a straightforward paradigm: finding an entity identifier by autoregressively generating it with prefix constraints. Effectively, this means cross-encoding entities and their context with the advantage that the memory footprint scales linearly with the vocabulary size and no need to sample negative data. Without search or reranking, this plain and simple approach shatters some existing benchmarks surprisingly.</p>

<h2 id="references">References</h2>
<ul>
  <li>[1] De Cao, N., Izacard, G., Riedel, S., &amp; Petroni, F. (2020). Autoregressive entity retrieval. arXiv preprint arXiv:2010.00904.</li>
  <li>[2] Wikipedia contributors. (2022, January 8). Trie. In Wikipedia, The Free Encyclopedia. Retrieved 03:30, January 14, 2022, from https://en.wikipedia.org/w/index.php?title=Trie&amp;oldid=1064464503</li>
  <li>[3]</li>
</ul>

</div>

<div id="bibtex-container" class="related">
  For attribution in academic contexts, please cite this work as
  <pre id="bibtex-academic-attribution">

  </pre>

  BibTeX citation
  <pre id="bibtex-box">

  </pre>
</div>
<script>
  let authorsSpan = document.getElementById("iclr-post-authors");
  let authorsText = authorsSpan.textContent;
  let lnameFnameInstitution = authorsText.split(";");
  let lfiList = lnameFnameInstitution.map(lfi => lfi.split(",").map(item => item.trim()));
  let bibtexLFI = lfiList.map(lfi => lfi[0] + ", " + lfi[1]).join(" and ")
  let academicLFI = lfiList.map(lfi => lfi[0]);
  {
    if(academicLFI.length > 2) academicLFI = academicLFI[0] + ", et al.";
    else if(academicLFI.length == 2) academicLFI = academicLFI[0] + " & " + academicLFI[1];
    else academicLFI = academicLFI[0];
  }

  let titleSpan = document.getElementById("iclr-post-title");
  let titleText = titleSpan.textContent.trim();
  let bibtexTitleShorthand = (lfiList[0][1]+
    "2022"+
    titleText.split(" ").slice(0, 3).join("")
  ).replace(" ", "").replace(/[\p{P}$+<=>^`|~]/gu, '').toLowerCase().trim();

  let bibtexTemplate = `
@inproceedings{${bibtexTitleShorthand}},
  author = {${bibtexLFI}},
  title = {${titleText}},
  booktitle = {ICLR Blog Track},
  year = {2022},
  note = {${window.location.href}},
  url  = {${window.location.href}}
}
  `.trim();
  document.getElementById("bibtex-box").innerText = bibtexTemplate;

  let academicTemplate = `
${academicLFI}, "${titleText}", ICLR Blog Track, 2022.
`.trim();
  document.getElementById("bibtex-academic-attribution").innerText = academicTemplate;

</script>


<div class="related">
  <h2>Related posts</h2>
  <ul class="related-posts">
    
      <li>
        <h3>
          <a href="/2021/09/01/sample-submission/">
            Sample Submission
            <small>01 Sep 2021 | 
    <a class="content-tag" href="/tags/#natural-language-processing"> Natural Language Processing </a>
  
    <a class="content-tag" href="/tags/#language-modeling"> Language Modeling </a>
  
    <a class="content-tag" href="/tags/#entity-retrieval"> Entity Retrieval </a>
  </small>
          </a>
        </h3>
      </li>
    
      <li>
        <h3>
          <a href="/2020/04/02/example-content/">
            Example content (Basic Markdown)
            <small>02 Apr 2020 | 
    <a class="content-tag" href="/tags/#natural-language-processing"> Natural Language Processing </a>
  
    <a class="content-tag" href="/tags/#language-modeling"> Language Modeling </a>
  
    <a class="content-tag" href="/tags/#entity-retrieval"> Entity Retrieval </a>
  </small>
          </a>
        </h3>
      </li>
    
  </ul>
</div>


<script src="https://utteranc.es/client.js"
        repo="iclr-blog-track/iclr-blog-track.github.io"
        issue-term="pathname"
        label="utterance"
        theme="boxy-light"
        crossorigin="anonymous"
        >
</script>


      </div>
    </div>

    <label for="sidebar-checkbox" class="sidebar-toggle"></label>

    <script src='/public/js/script.js'></script>
  </body>
</html>
