<!DOCTYPE html>
<html class="client-nojs vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-sticky-header-disabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-enabled vector-feature-main-menu-pinned-disabled vector-feature-limited-width-enabled vector-feature-limited-width-content-enabled vector-feature-zebra-design-disabled" lang="en" dir="ltr">
<head>
<meta charset="UTF-8">
<title>Language model - Wikipedia</title>
<script>document.documentElement.className="client-js vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-sticky-header-disabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-enabled vector-feature-main-menu-pinned-disabled vector-feature-limited-width-enabled vector-feature-limited-width-content-enabled vector-feature-zebra-design-disabled";(function(){var cookie=document.cookie.match(/(?:^|; )enwikimwclientprefs=([^;]+)/);if(cookie){var featureName=cookie[1];document.documentElement.className=document.documentElement.className.replace(featureName+'-enabled',featureName+'-disabled');}}());RLCONF={"wgBreakFrames":false,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"b4bf7343-fe91-451b-addd-e622b7d1c354","wgCSPNonce":false,
"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":false,"wgNamespaceNumber":0,"wgPageName":"Language_model","wgTitle":"Language model","wgCurRevisionId":1161280651,"wgRevisionId":1161280651,"wgArticleId":1911810,"wgIsArticle":true,"wgIsRedirect":false,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Webarchive template wayback links","CS1 maint: multiple names: authors list","Articles with short description","Short description matches Wikidata","Use dmy dates from July 2022","Articles with excerpts","All articles needing examples","Articles needing examples from December 2017","All articles with unsourced statements","Articles with unsourced statements from December 2017","Language modeling","Statistical natural language processing","Markov models"],"wgPageViewLanguage":"en","wgPageContentLanguage":"en","wgPageContentModel":"wikitext","wgRelevantPageName":"Language_model","wgRelevantArticleId":1911810,"wgIsProbablyEditable":true,
"wgRelevantPageIsProbablyEditable":true,"wgRestrictionEdit":[],"wgRestrictionMove":[],"wgFlaggedRevsParams":{"tags":{"status":{"levels":1}}},"wgVisualEditor":{"pageLanguageCode":"en","pageLanguageDir":"ltr","pageVariantFallbacks":"en"},"wgMFDisplayWikibaseDescriptions":{"search":true,"watchlist":true,"tagline":false,"nearby":true},"wgWMESchemaEditAttemptStepOversample":false,"wgWMEPageLength":20000,"wgNoticeProject":"wikipedia","wgMediaViewerOnClick":true,"wgMediaViewerEnabledByDefault":true,"wgPopupsFlags":10,"wgULSCurrentAutonym":"English","wgEditSubmitButtonLabelPublish":true,"wgCentralAuthMobileDomain":false,"wgULSPosition":"interlanguage","wgULSisCompactLinksEnabled":true,"wgULSisLanguageSelectorEmpty":false,"wgWikibaseItemId":"Q3621696","GEHomepageSuggestedEditsEnableTopics":true,"wgGETopicsMatchModeEnabled":false,"wgGEStructuredTaskRejectionReasonTextInputEnabled":false,"wgGELevelingUpEnabledForUser":false};RLSTATE={"skins.vector.user.styles":"ready",
"ext.globalCssJs.user.styles":"ready","site.styles":"ready","user.styles":"ready","skins.vector.user":"ready","ext.globalCssJs.user":"ready","user":"ready","user.options":"loading","ext.cite.styles":"ready","ext.math.styles":"ready","codex-search-styles":"ready","skins.vector.styles":"ready","skins.vector.icons":"ready","jquery.makeCollapsible.styles":"ready","ext.visualEditor.desktopArticleTarget.noscript":"ready","ext.wikimediaBadges":"ready","ext.uls.interlanguage":"ready","wikibase.client.init":"ready"};RLPAGEMODULES=["ext.cite.ux-enhancements","site","mediawiki.page.ready","jquery.makeCollapsible","mediawiki.toc","skins.vector.js","ext.visualEditor.desktopArticleTarget.init","ext.visualEditor.targetLoader","ext.eventLogging","ext.wikimediaEvents","ext.navigationTiming","ext.cx.eventlogging.campaigns","ext.quicksurveys.init","ext.centralNotice.geoIP","ext.centralNotice.startUp","ext.gadget.ReferenceTooltips","ext.gadget.charinsert","ext.gadget.extra-toolbar-buttons",
"ext.gadget.switcher","ext.centralauth.centralautologin","ext.popups","ext.echo.centralauth","ext.uls.compactlinks","ext.uls.interface","ext.cx.uls.quick.actions","wikibase.client.vector-2022","ext.growthExperiments.SuggestedEditSession"];</script>
<script>(RLQ=window.RLQ||[]).push(function(){mw.loader.implement("user.options@12s5i",function($,jQuery,require,module){mw.user.tokens.set({"patrolToken":"+\\","watchToken":"+\\","csrfToken":"+\\"});});});</script>
<link rel="stylesheet" href="/w/load.php?lang=en&amp;modules=codex-search-styles%7Cext.cite.styles%7Cext.math.styles%7Cext.uls.interlanguage%7Cext.visualEditor.desktopArticleTarget.noscript%7Cext.wikimediaBadges%7Cjquery.makeCollapsible.styles%7Cskins.vector.icons%2Cstyles%7Cwikibase.client.init&amp;only=styles&amp;skin=vector-2022">
<script async="" src="/w/load.php?lang=en&amp;modules=startup&amp;only=scripts&amp;raw=1&amp;skin=vector-2022"></script>
<meta name="ResourceLoaderDynamicStyles" content="">
<link rel="stylesheet" href="/w/load.php?lang=en&amp;modules=site.styles&amp;only=styles&amp;skin=vector-2022">
<meta name="generator" content="MediaWiki 1.41.0-wmf.15">
<meta name="referrer" content="origin">
<meta name="referrer" content="origin-when-crossorigin">
<meta name="referrer" content="origin-when-cross-origin">
<meta name="robots" content="max-image-preview:standard">
<meta name="format-detection" content="telephone=no">
<meta name="viewport" content="width=1000">
<meta property="og:title" content="Language model - Wikipedia">
<meta property="og:type" content="website">
<link rel="alternate" media="only screen and (max-width: 720px)" href="//en.m.wikipedia.org/wiki/Language_model">
<link rel="alternate" type="application/x-wiki" title="Edit this page" href="/w/index.php?title=Language_model&amp;action=edit">
<link rel="apple-touch-icon" href="/static/apple-touch/wikipedia.png">
<link rel="icon" href="/static/favicon/wikipedia.ico">
<link rel="search" type="application/opensearchdescription+xml" href="/w/opensearch_desc.php" title="Wikipedia (en)">
<link rel="EditURI" type="application/rsd+xml" href="//en.wikipedia.org/w/api.php?action=rsd">
<link rel="canonical" href="https://en.wikipedia.org/wiki/Language_model">
<link rel="license" href="https://creativecommons.org/licenses/by-sa/4.0/deed.en">
<link rel="alternate" type="application/atom+xml" title="Wikipedia Atom feed" href="/w/index.php?title=Special:RecentChanges&amp;feed=atom">
<link rel="dns-prefetch" href="//meta.wikimedia.org" />
<link rel="dns-prefetch" href="//login.wikimedia.org">
</head>
<body class="skin-vector skin-vector-search-vue mediawiki ltr sitedir-ltr mw-hide-empty-elt ns-0 ns-subject mw-editable page-Language_model rootpage-Language_model skin-vector-2022 action-view"><a class="mw-jump-link" href="#bodyContent">Jump to content</a>
<div class="vector-header-container">
	<header class="vector-header mw-header">
		<div class="vector-header-start">
			<nav class="vector-main-menu-landmark" aria-label="Site" role="navigation">
				
<div id="vector-main-menu-dropdown" class="vector-menu vector-dropdown vector-menu-dropdown vector-main-menu-dropdown vector-button-flush-left vector-button-flush-right"  >
	<input type="checkbox"
		id="vector-main-menu-dropdown-checkbox"
		role="button"
		aria-haspopup="true"
		data-event-name="ui.dropdown-vector-main-menu-dropdown"
		class="vector-dropdown-checkbox vector-menu-checkbox "
		
		aria-label="Main menu"
		
	/>
	<label
		id="vector-main-menu-dropdown-label"
		for="vector-main-menu-dropdown-checkbox"
		class="vector-dropdown-label vector-menu-heading cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only "
		aria-hidden="true"
		
	>
		<span class="vector-icon mw-ui-icon-menu mw-ui-icon-wikimedia-menu"></span>


		<span class="vector-dropdown-label-text vector-menu-heading-label">Main menu</span>
	</label>
	<div class="vector-menu-content vector-dropdown-content">


				<div id="vector-main-menu-unpinned-container" class="vector-unpinned-container">
		
<div id="vector-main-menu" class="vector-main-menu vector-pinnable-element">
	<div
	class="vector-pinnable-header vector-main-menu-pinnable-header vector-pinnable-header-unpinned"
	data-feature-name="main-menu-pinned"
	data-pinnable-element-id="vector-main-menu"
	data-pinned-container-id="vector-main-menu-pinned-container"
	data-unpinned-container-id="vector-main-menu-unpinned-container"
>
	<div class="vector-pinnable-header-label">Main menu</div>
	<button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-main-menu.pin">move to sidebar</button>
	<button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-main-menu.unpin">hide</button>
</div>

	
<div id="p-navigation" class="vector-menu mw-portlet mw-portlet-navigation"  >
	<div class="vector-menu-heading">
		Navigation
	</div>
	<div class="vector-menu-content">
		
		<ul class="vector-menu-content-list"><li id="n-mainpage-description" class="mw-list-item"><a href="/wiki/Main_Page" title="Visit the main page [z]" accesskey="z"><span>Main page</span></a></li><li id="n-contents" class="mw-list-item"><a href="/wiki/Wikipedia:Contents" title="Guides to browsing Wikipedia"><span>Contents</span></a></li><li id="n-currentevents" class="mw-list-item"><a href="/wiki/Portal:Current_events" title="Articles related to current events"><span>Current events</span></a></li><li id="n-randompage" class="mw-list-item"><a href="/wiki/Special:Random" title="Visit a randomly selected article [x]" accesskey="x"><span>Random article</span></a></li><li id="n-aboutsite" class="mw-list-item"><a href="/wiki/Wikipedia:About" title="Learn about Wikipedia and how it works"><span>About Wikipedia</span></a></li><li id="n-contactpage" class="mw-list-item"><a href="//en.wikipedia.org/wiki/Wikipedia:Contact_us" title="How to contact Wikipedia"><span>Contact us</span></a></li><li id="n-sitesupport" class="mw-list-item"><a href="https://donate.wikimedia.org/wiki/Special:FundraiserRedirector?utm_source=donate&amp;utm_medium=sidebar&amp;utm_campaign=C13_en.wikipedia.org&amp;uselang=en" title="Support us by donating to the Wikimedia Foundation"><span>Donate</span></a></li></ul>
		
	</div>
</div>

	
	
<div id="p-interaction" class="vector-menu mw-portlet mw-portlet-interaction"  >
	<div class="vector-menu-heading">
		Contribute
	</div>
	<div class="vector-menu-content">
		
		<ul class="vector-menu-content-list"><li id="n-help" class="mw-list-item"><a href="/wiki/Help:Contents" title="Guidance on how to use and edit Wikipedia"><span>Help</span></a></li><li id="n-introduction" class="mw-list-item"><a href="/wiki/Help:Introduction" title="Learn how to edit Wikipedia"><span>Learn to edit</span></a></li><li id="n-portal" class="mw-list-item"><a href="/wiki/Wikipedia:Community_portal" title="The hub for editors"><span>Community portal</span></a></li><li id="n-recentchanges" class="mw-list-item"><a href="/wiki/Special:RecentChanges" title="A list of recent changes to Wikipedia [r]" accesskey="r"><span>Recent changes</span></a></li><li id="n-upload" class="mw-list-item"><a href="/wiki/Wikipedia:File_upload_wizard" title="Add images or other media for use on Wikipedia"><span>Upload file</span></a></li></ul>
		
	</div>
</div>

	
<div class="vector-main-menu-action vector-main-menu-action-lang-alert">
	<div class="vector-main-menu-action-item">
		<div class="vector-main-menu-action-heading vector-menu-heading">Languages</div>
		<div class="vector-main-menu-action-content vector-menu-content">
			<div class="mw-message-box cdx-message cdx-message--block mw-message-box-notice cdx-message--notice vector-language-sidebar-alert"><span class="cdx-message__icon"></span><div class="cdx-message__content">Language links are at the top of the page across from the title.</div></div>
		</div>
	</div>
</div>

</div>

				</div>

	</div>
</div>
		</nav>
			
<a href="/wiki/Main_Page" class="mw-logo">
	<img class="mw-logo-icon" src="/static/images/icons/wikipedia.png" alt=""
		aria-hidden="true" height="50" width="50">
	<span class="mw-logo-container">
		<img class="mw-logo-wordmark" alt="Wikipedia"
			src="/static/images/mobile/copyright/wikipedia-wordmark-en.svg" style="width: 7.5em; height: 1.125em;">
		<img class="mw-logo-tagline"
			alt="The Free Encyclopedia"
			src="/static/images/mobile/copyright/wikipedia-tagline-en.svg" width="117" height="13" style="width: 7.3125em; height: 0.8125em;">
	</span>
</a>

		</div>
		<div class="vector-header-end">
			
<div id="p-search" role="search" class="vector-search-box-vue  vector-search-box-collapses vector-search-box-show-thumbnail vector-search-box-auto-expand-width vector-search-box">
	<a href="/wiki/Special:Search"
	
		id=""
		class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only search-toggle"
		title="Search Wikipedia [f]"accesskey="f"><span class="vector-icon mw-ui-icon-search mw-ui-icon-wikimedia-search"></span>

<span>Search</span>
	</a>
	
	<div class="vector-typeahead-search-container">
		<div class="cdx-typeahead-search cdx-typeahead-search--show-thumbnail cdx-typeahead-search--auto-expand-width">
			<form action="/w/index.php" id="searchform" class="cdx-search-input cdx-search-input--has-end-button">
				<div id="simpleSearch" class="cdx-search-input__input-wrapper"  data-search-loc="header-moved">
					<div class="cdx-text-input cdx-text-input--has-start-icon">
						<input
							class="cdx-text-input__input"
							 type="search" name="search" placeholder="Search Wikipedia" aria-label="Search Wikipedia" autocapitalize="sentences" title="Search Wikipedia [f]" accesskey="f" id="searchInput"
							
						>
						<span class="cdx-text-input__icon cdx-text-input__start-icon"></span>
					</div>
					<input type="hidden" name="title" value="Special:Search">
				</div>
				<button class="cdx-button cdx-search-input__end-button">Search</button>
			</form>
		</div>
	</div>
</div>

			<nav class="vector-user-links" aria-label="Personal tools" role="navigation" >
	
<div id="p-vector-user-menu-overflow" class="vector-menu mw-portlet mw-portlet-vector-user-menu-overflow"  >
	<div class="vector-menu-content">
		
		<ul class="vector-menu-content-list"><li id="pt-createaccount-2" class="user-links-collapsible-item mw-list-item"><a href="/w/index.php?title=Special:CreateAccount&amp;returnto=Language+model" title="You are encouraged to create an account and log in; however, it is not mandatory"><span>Create account</span></a></li><li id="pt-login-2" class="user-links-collapsible-item mw-list-item"><a href="/w/index.php?title=Special:UserLogin&amp;returnto=Language+model" title="You&#039;re encouraged to log in; however, it&#039;s not mandatory. [o]" accesskey="o"><span>Log in</span></a></li></ul>
		
	</div>
</div>

	
<div id="vector-user-links-dropdown" class="vector-menu vector-dropdown vector-menu-dropdown vector-user-menu vector-button-flush-right vector-user-menu-logged-out"  title="Log in and more options" >
	<input type="checkbox"
		id="vector-user-links-dropdown-checkbox"
		role="button"
		aria-haspopup="true"
		data-event-name="ui.dropdown-vector-user-links-dropdown"
		class="vector-dropdown-checkbox vector-menu-checkbox "
		
		aria-label="Personal tools"
		
	/>
	<label
		id="vector-user-links-dropdown-label"
		for="vector-user-links-dropdown-checkbox"
		class="vector-dropdown-label vector-menu-heading cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only "
		aria-hidden="true"
		
	>
		<span class="vector-icon mw-ui-icon-ellipsis mw-ui-icon-wikimedia-ellipsis"></span>


		<span class="vector-dropdown-label-text vector-menu-heading-label">Personal tools</span>
	</label>
	<div class="vector-menu-content vector-dropdown-content">


		
<div id="p-personal" class="vector-menu mw-portlet mw-portlet-personal user-links-collapsible-item"  title="User menu" >
	<div class="vector-menu-content">
		
		<ul class="vector-menu-content-list"><li id="pt-createaccount" class="user-links-collapsible-item mw-list-item"><a href="/w/index.php?title=Special:CreateAccount&amp;returnto=Language+model" title="You are encouraged to create an account and log in; however, it is not mandatory"><span class="vector-icon mw-ui-icon-userAdd mw-ui-icon-wikimedia-userAdd"></span> <span>Create account</span></a></li><li id="pt-login" class="user-links-collapsible-item mw-list-item"><a href="/w/index.php?title=Special:UserLogin&amp;returnto=Language+model" title="You&#039;re encouraged to log in; however, it&#039;s not mandatory. [o]" accesskey="o"><span class="vector-icon mw-ui-icon-logIn mw-ui-icon-wikimedia-logIn"></span> <span>Log in</span></a></li></ul>
		
	</div>
</div>

<div id="p-user-menu-anon-editor" class="vector-menu mw-portlet mw-portlet-user-menu-anon-editor"  >
	<div class="vector-menu-heading">
		Pages for logged out editors <a href="/wiki/Help:Introduction" aria-label="Learn more about editing"><span>learn more</span></a>
	</div>
	<div class="vector-menu-content">
		
		<ul class="vector-menu-content-list"><li id="pt-anoncontribs" class="mw-list-item"><a href="/wiki/Special:MyContributions" title="A list of edits made from this IP address [y]" accesskey="y"><span>Contributions</span></a></li><li id="pt-anontalk" class="mw-list-item"><a href="/wiki/Special:MyTalk" title="Discussion about edits from this IP address [n]" accesskey="n"><span>Talk</span></a></li></ul>
		
	</div>
</div>

	
	</div>
</div>
</nav>

		</div>
	</header>
</div>
<div class="mw-page-container">
	<div class="mw-page-container-inner">
		<div class="vector-main-menu-container ">
			<div id="mw-navigation">
				<nav id="mw-panel" class="vector-main-menu-landmark" aria-label="Site" role="navigation">
					<div id="vector-main-menu-pinned-container" class="vector-pinned-container">
				
					</div>
		</nav>
			</div>
		</div>
		<div class="vector-sitenotice-container">
			<div id="siteNotice"><!-- CentralNotice --></div>
		</div>
		<input type="checkbox" id="vector-toc-collapsed-checkbox" class="vector-menu-checkbox">
		<nav id="mw-panel-toc" role="navigation" aria-label="Contents" data-event-name="ui.sidebar-toc" class="mw-table-of-contents-container vector-toc-landmark vector-sticky-pinned-container">
			<div id="vector-toc-pinned-container" class="vector-pinned-container">
					<div id="vector-toc" class="vector-toc vector-pinnable-element">
	<div
	class="vector-pinnable-header vector-toc-pinnable-header vector-pinnable-header-pinned"
	data-feature-name="toc-pinned"
	data-pinnable-element-id="vector-toc"
	
	
>
	<h2 class="vector-pinnable-header-label">Contents</h2>
	<button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-toc.pin">move to sidebar</button>
	<button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-toc.unpin">hide</button>
</div>


	<ul class="vector-toc-contents" id="mw-panel-toc-list">
		<li id="toc-mw-content-text"
			class="vector-toc-list-item vector-toc-level-1">
			<a href="#" class="vector-toc-link">
				<div class="vector-toc-text">(Top)</div>
			</a>
		</li>
		<li id="toc-Model_types"
		class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded">
		<a class="vector-toc-link" href="#Model_types">
			<div class="vector-toc-text">
			<span class="vector-toc-numb">1</span>Model types</div>
		</a>
		
			<button aria-controls="toc-Model_types-sublist" class="cdx-button cdx-button--weight-quiet cdx-button--icon-only vector-toc-toggle">
				<span class="vector-icon vector-icon--x-small mw-ui-icon-wikimedia-expand"></span>
				<span>Toggle Model types subsection</span>
			</button>
		
		<ul id="toc-Model_types-sublist" class="vector-toc-list">
			<li id="toc-n-gram"
			class="vector-toc-list-item vector-toc-level-2">
			<a class="vector-toc-link" href="#n-gram">
				<div class="vector-toc-text">
				<span class="vector-toc-numb">1.1</span><i>n</i>-gram</div>
			</a>
			
			<ul id="toc-n-gram-sublist" class="vector-toc-list">
			</ul>
		</li>
		<li id="toc-Exponential"
			class="vector-toc-list-item vector-toc-level-2">
			<a class="vector-toc-link" href="#Exponential">
				<div class="vector-toc-text">
				<span class="vector-toc-numb">1.2</span>Exponential</div>
			</a>
			
			<ul id="toc-Exponential-sublist" class="vector-toc-list">
			</ul>
		</li>
		<li id="toc-Neural_network"
			class="vector-toc-list-item vector-toc-level-2">
			<a class="vector-toc-link" href="#Neural_network">
				<div class="vector-toc-text">
				<span class="vector-toc-numb">1.3</span>Neural network</div>
			</a>
			
			<ul id="toc-Neural_network-sublist" class="vector-toc-list">
			</ul>
		</li>
		<li id="toc-Other"
			class="vector-toc-list-item vector-toc-level-2">
			<a class="vector-toc-link" href="#Other">
				<div class="vector-toc-text">
				<span class="vector-toc-numb">1.4</span>Other</div>
			</a>
			
			<ul id="toc-Other-sublist" class="vector-toc-list">
			</ul>
		</li>
	</ul>
	</li>
	<li id="toc-Evaluation_and_benchmarks"
		class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded">
		<a class="vector-toc-link" href="#Evaluation_and_benchmarks">
			<div class="vector-toc-text">
			<span class="vector-toc-numb">2</span>Evaluation and benchmarks</div>
		</a>
		
		<ul id="toc-Evaluation_and_benchmarks-sublist" class="vector-toc-list">
		</ul>
	</li>
	<li id="toc-Criticism"
		class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded">
		<a class="vector-toc-link" href="#Criticism">
			<div class="vector-toc-text">
			<span class="vector-toc-numb">3</span>Criticism</div>
		</a>
		
		<ul id="toc-Criticism-sublist" class="vector-toc-list">
		</ul>
	</li>
	<li id="toc-See_also"
		class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded">
		<a class="vector-toc-link" href="#See_also">
			<div class="vector-toc-text">
			<span class="vector-toc-numb">4</span>See also</div>
		</a>
		
		<ul id="toc-See_also-sublist" class="vector-toc-list">
		</ul>
	</li>
	<li id="toc-Notes"
		class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded">
		<a class="vector-toc-link" href="#Notes">
			<div class="vector-toc-text">
			<span class="vector-toc-numb">5</span>Notes</div>
		</a>
		
		<ul id="toc-Notes-sublist" class="vector-toc-list">
		</ul>
	</li>
	<li id="toc-References"
		class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded">
		<a class="vector-toc-link" href="#References">
			<div class="vector-toc-text">
			<span class="vector-toc-numb">6</span>References</div>
		</a>
		
		<ul id="toc-References-sublist" class="vector-toc-list">
		</ul>
	</li>
	<li id="toc-Further_reading"
		class="vector-toc-list-item vector-toc-level-1 vector-toc-list-item-expanded">
		<a class="vector-toc-link" href="#Further_reading">
			<div class="vector-toc-text">
			<span class="vector-toc-numb">7</span>Further reading</div>
		</a>
		
		<ul id="toc-Further_reading-sublist" class="vector-toc-list">
		</ul>
	</li>
</ul>
</div>

			</div>
		</nav>
		<div class="mw-content-container">
			<main id="content" class="mw-body" role="main">
				<header class="mw-body-header vector-page-titlebar">
					<label
						id="vector-toc-collapsed-button"
						class="cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet vector-button-flush-left cdx-button--icon-only"
						for="vector-toc-collapsed-checkbox"
						role="button"
						aria-controls="vector-toc"
						tabindex="0"
						title="Table of Contents">
						<span class="vector-icon mw-ui-icon-wikimedia-listBullet"></span>
						<span>Toggle the table of contents</span>
					</label>
				
					<nav role="navigation" aria-label="Contents" class="vector-toc-landmark">
						
<div id="vector-page-titlebar-toc" class="vector-menu vector-dropdown vector-menu-dropdown vector-page-titlebar-toc vector-button-flush-left"  >
	<input type="checkbox"
		id="vector-page-titlebar-toc-checkbox"
		role="button"
		aria-haspopup="true"
		data-event-name="ui.dropdown-vector-page-titlebar-toc"
		class="vector-dropdown-checkbox vector-menu-checkbox "
		
		aria-label="Toggle the table of contents"
		
	/>
	<label
		id="vector-page-titlebar-toc-label"
		for="vector-page-titlebar-toc-checkbox"
		class="vector-dropdown-label vector-menu-heading cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--icon-only "
		aria-hidden="true"
		
	>
		<span class="vector-icon mw-ui-icon-listBullet mw-ui-icon-wikimedia-listBullet"></span>


		<span class="vector-dropdown-label-text vector-menu-heading-label">Toggle the table of contents</span>
	</label>
	<div class="vector-menu-content vector-dropdown-content">


							<div id="vector-page-titlebar-toc-unpinned-container" class="vector-unpinned-container">
			</div>
		
	</div>
</div>
					</nav>
					<h1 id="firstHeading" class="firstHeading mw-first-heading"><span class="mw-page-title-main">Language model</span></h1>
				
							
<div id="p-lang-btn" class="vector-menu vector-dropdown vector-menu-dropdown mw-portlet mw-portlet-lang"  >
	<input type="checkbox"
		id="p-lang-btn-checkbox"
		role="button"
		aria-haspopup="true"
		data-event-name="ui.dropdown-p-lang-btn"
		class="vector-dropdown-checkbox vector-menu-checkbox mw-interlanguage-selector"
		aria-label="Go to an article in another language. Available in 24 languages"
		
		
	/>
	<label
		id="p-lang-btn-label"
		for="p-lang-btn-checkbox"
		class="vector-dropdown-label vector-menu-heading cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet cdx-button--action-progressive mw-portlet-lang-heading-24"
		aria-hidden="true"
		
	>
		<span class="vector-icon mw-ui-icon-language-progressive mw-ui-icon-wikimedia-language-progressive"></span>


		<span class="vector-dropdown-label-text vector-menu-heading-label">24 languages</span>
	</label>
	<div class="vector-menu-content vector-dropdown-content">

		<div class="vector-menu-content">
			
			<ul class="vector-menu-content-list"><li class="interlanguage-link interwiki-af mw-list-item"><a href="https://af.wikipedia.org/wiki/Taalmodel" title="Taalmodel – Afrikaans" lang="af" hreflang="af" class="interlanguage-link-target"><span>Afrikaans</span></a></li><li class="interlanguage-link interwiki-ar mw-list-item"><a href="https://ar.wikipedia.org/wiki/%D8%A3%D9%86%D9%85%D9%88%D8%B0%D8%AC_%D8%A7%D9%84%D9%84%D8%BA%D8%A9" title="أنموذج اللغة – Arabic" lang="ar" hreflang="ar" class="interlanguage-link-target"><span>العربية</span></a></li><li class="interlanguage-link interwiki-hyw mw-list-item"><a href="https://hyw.wikipedia.org/wiki/%D4%BC%D5%A5%D5%A6%D5%B8%D6%82%D5%AB_%D4%BF%D5%A1%D5%B2%D5%A1%D5%BA%D5%A1%D6%80" title="Լեզուի Կաղապար – Western Armenian" lang="hyw" hreflang="hyw" class="interlanguage-link-target"><span>Արեւմտահայերէն</span></a></li><li class="interlanguage-link interwiki-bg mw-list-item"><a href="https://bg.wikipedia.org/wiki/%D0%95%D0%B7%D0%B8%D0%BA%D0%BE%D0%B2_%D0%BC%D0%BE%D0%B4%D0%B5%D0%BB" title="Езиков модел – Bulgarian" lang="bg" hreflang="bg" class="interlanguage-link-target"><span>Български</span></a></li><li class="interlanguage-link interwiki-ca mw-list-item"><a href="https://ca.wikipedia.org/wiki/Model_de_llenguatge" title="Model de llenguatge – Catalan" lang="ca" hreflang="ca" class="interlanguage-link-target"><span>Català</span></a></li><li class="interlanguage-link interwiki-es mw-list-item"><a href="https://es.wikipedia.org/wiki/Modelaci%C3%B3n_del_lenguaje" title="Modelación del lenguaje – Spanish" lang="es" hreflang="es" class="interlanguage-link-target"><span>Español</span></a></li><li class="interlanguage-link interwiki-eu mw-list-item"><a href="https://eu.wikipedia.org/wiki/Hizkuntza_eredu" title="Hizkuntza eredu – Basque" lang="eu" hreflang="eu" class="interlanguage-link-target"><span>Euskara</span></a></li><li class="interlanguage-link interwiki-fa mw-list-item"><a href="https://fa.wikipedia.org/wiki/%D9%85%D8%AF%D9%84_%D8%B2%D8%A8%D8%A7%D9%86%DB%8C" title="مدل زبانی – Persian" lang="fa" hreflang="fa" class="interlanguage-link-target"><span>فارسی</span></a></li><li class="interlanguage-link interwiki-fr mw-list-item"><a href="https://fr.wikipedia.org/wiki/Mod%C3%A8le_de_langage" title="Modèle de langage – French" lang="fr" hreflang="fr" class="interlanguage-link-target"><span>Français</span></a></li><li class="interlanguage-link interwiki-ko mw-list-item"><a href="https://ko.wikipedia.org/wiki/%EC%96%B8%EC%96%B4_%EB%AA%A8%EB%8D%B8" title="언어 모델 – Korean" lang="ko" hreflang="ko" class="interlanguage-link-target"><span>한국어</span></a></li><li class="interlanguage-link interwiki-zu mw-list-item"><a href="https://zu.wikipedia.org/wiki/UNongo_lolimi" title="UNongo lolimi – Zulu" lang="zu" hreflang="zu" class="interlanguage-link-target"><span>IsiZulu</span></a></li><li class="interlanguage-link interwiki-he mw-list-item"><a href="https://he.wikipedia.org/wiki/%D7%9E%D7%95%D7%93%D7%9C_%D7%A9%D7%A4%D7%94" title="מודל שפה – Hebrew" lang="he" hreflang="he" class="interlanguage-link-target"><span>עברית</span></a></li><li class="interlanguage-link interwiki-lv mw-list-item"><a href="https://lv.wikipedia.org/wiki/Valodas_modelis" title="Valodas modelis – Latvian" lang="lv" hreflang="lv" class="interlanguage-link-target"><span>Latviešu</span></a></li><li class="interlanguage-link interwiki-nl mw-list-item"><a href="https://nl.wikipedia.org/wiki/Taalmodel" title="Taalmodel – Dutch" lang="nl" hreflang="nl" class="interlanguage-link-target"><span>Nederlands</span></a></li><li class="interlanguage-link interwiki-ja mw-list-item"><a href="https://ja.wikipedia.org/wiki/%E8%A8%80%E8%AA%9E%E3%83%A2%E3%83%87%E3%83%AB" title="言語モデル – Japanese" lang="ja" hreflang="ja" class="interlanguage-link-target"><span>日本語</span></a></li><li class="interlanguage-link interwiki-nn mw-list-item"><a href="https://nn.wikipedia.org/wiki/Spr%C3%A5kmodell" title="Språkmodell – Norwegian Nynorsk" lang="nn" hreflang="nn" class="interlanguage-link-target"><span>Norsk nynorsk</span></a></li><li class="interlanguage-link interwiki-pt mw-list-item"><a href="https://pt.wikipedia.org/wiki/Modelo_de_linguagem" title="Modelo de linguagem – Portuguese" lang="pt" hreflang="pt" class="interlanguage-link-target"><span>Português</span></a></li><li class="interlanguage-link interwiki-ru mw-list-item"><a href="https://ru.wikipedia.org/wiki/%D0%AF%D0%B7%D1%8B%D0%BA%D0%BE%D0%B2%D0%B0%D1%8F_%D0%BC%D0%BE%D0%B4%D0%B5%D0%BB%D1%8C" title="Языковая модель – Russian" lang="ru" hreflang="ru" class="interlanguage-link-target"><span>Русский</span></a></li><li class="interlanguage-link interwiki-fi mw-list-item"><a href="https://fi.wikipedia.org/wiki/Kielimalli" title="Kielimalli – Finnish" lang="fi" hreflang="fi" class="interlanguage-link-target"><span>Suomi</span></a></li><li class="interlanguage-link interwiki-sv mw-list-item"><a href="https://sv.wikipedia.org/wiki/Spr%C3%A5kmodell" title="Språkmodell – Swedish" lang="sv" hreflang="sv" class="interlanguage-link-target"><span>Svenska</span></a></li><li class="interlanguage-link interwiki-tr mw-list-item"><a href="https://tr.wikipedia.org/wiki/Dil_modeli" title="Dil modeli – Turkish" lang="tr" hreflang="tr" class="interlanguage-link-target"><span>Türkçe</span></a></li><li class="interlanguage-link interwiki-uk mw-list-item"><a href="https://uk.wikipedia.org/wiki/%D0%9C%D0%BE%D0%B4%D0%B5%D0%BB%D1%8C_%D0%BC%D0%BE%D0%B2%D0%B8" title="Модель мови – Ukrainian" lang="uk" hreflang="uk" class="interlanguage-link-target"><span>Українська</span></a></li><li class="interlanguage-link interwiki-zh-yue mw-list-item"><a href="https://zh-yue.wikipedia.org/wiki/%E8%AA%9E%E8%A8%80%E6%A8%A1%E5%9E%8B" title="語言模型 – Cantonese" lang="yue" hreflang="yue" class="interlanguage-link-target"><span>粵語</span></a></li><li class="interlanguage-link interwiki-zh mw-list-item"><a href="https://zh.wikipedia.org/wiki/%E8%AA%9E%E8%A8%80%E6%A8%A1%E5%9E%8B" title="語言模型 – Chinese" lang="zh" hreflang="zh" class="interlanguage-link-target"><span>中文</span></a></li></ul>
			<div class="after-portlet after-portlet-lang"><span class="wb-langlinks-edit wb-langlinks-link"><a href="https://www.wikidata.org/wiki/Special:EntityPage/Q3621696#sitelinks-wikipedia" title="Edit interlanguage links" class="wbc-editpage">Edit links</a></span></div>
		</div>

	</div>
</div>
				</header>
				<div class="vector-page-toolbar">
					<div class="vector-page-toolbar-container">
						<div id="left-navigation">
							<nav aria-label="Namespaces">
								
<div id="p-associated-pages" class="vector-menu vector-menu-tabs mw-portlet mw-portlet-associated-pages"  >
	<div class="vector-menu-content">
		
		<ul class="vector-menu-content-list">
			<li id="ca-nstab-main" class="selected vector-tab-noicon mw-list-item"><a data-mw="interface" href="/wiki/Language_model" title="View the content page [c]" accesskey="c" class=""><span>Article</span></a>
</li>
<li id="ca-talk" class="vector-tab-noicon mw-list-item"><a data-mw="interface" href="/wiki/Talk:Language_model" rel="discussion" title="Discuss improvements to the content page [t]" accesskey="t" class=""><span>Talk</span></a>
</li>

		</ul>
		
	</div>
</div>

								

<div id="p-variants" class="vector-menu vector-dropdown vector-menu-dropdown mw-portlet mw-portlet-variants emptyPortlet"  >
	<input type="checkbox"
		id="p-variants-checkbox"
		role="button"
		aria-haspopup="true"
		data-event-name="ui.dropdown-p-variants"
		class="vector-dropdown-checkbox vector-menu-checkbox"
		aria-label="Change language variant"
		
		
	/>
	<label
		id="p-variants-label"
		for="p-variants-checkbox"
		class="vector-dropdown-label vector-menu-heading "
		aria-hidden="true"
		
	>
		
		<span class="vector-dropdown-label-text vector-menu-heading-label">English</span>
	</label>
	<div class="vector-menu-content vector-dropdown-content">

	<div class="vector-menu-content">
		
		<ul class="vector-menu-content-list"></ul>
		
	</div>

	</div>
</div>
							</nav>
						</div>
						<div id="right-navigation" class="vector-collapsible">
							<nav aria-label="Views">
								
<div id="p-views" class="vector-menu vector-menu-tabs mw-portlet mw-portlet-views"  >
	<div class="vector-menu-content">
		
		<ul class="vector-menu-content-list">
			<li id="ca-view" class="selected vector-tab-noicon mw-list-item"><a data-mw="interface" href="/wiki/Language_model" class=""><span>Read</span></a>
</li>
<li id="ca-edit" class="vector-tab-noicon mw-list-item"><a data-mw="interface" href="/w/index.php?title=Language_model&amp;action=edit" title="Edit this page [e]" accesskey="e" class=""><span>Edit</span></a>
</li>
<li id="ca-history" class="vector-tab-noicon mw-list-item"><a data-mw="interface" href="/w/index.php?title=Language_model&amp;action=history" title="Past revisions of this page [h]" accesskey="h" class=""><span>View history</span></a>
</li>

		</ul>
		
	</div>
</div>

							</nav>
				
							<nav class="vector-page-tools-landmark" aria-label="More options">
								
<div id="vector-page-tools-dropdown" class="vector-menu vector-dropdown vector-menu-dropdown vector-page-tools-dropdown"  >
	<input type="checkbox"
		id="vector-page-tools-dropdown-checkbox"
		role="button"
		aria-haspopup="true"
		data-event-name="ui.dropdown-vector-page-tools-dropdown"
		class="vector-dropdown-checkbox vector-menu-checkbox "
		
		aria-label="Tools"
		
	/>
	<label
		id="vector-page-tools-dropdown-label"
		for="vector-page-tools-dropdown-checkbox"
		class="vector-dropdown-label vector-menu-heading cdx-button cdx-button--fake-button cdx-button--fake-button--enabled cdx-button--weight-quiet"
		aria-hidden="true"
		
	>
		
		<span class="vector-dropdown-label-text vector-menu-heading-label">Tools</span>
	</label>
	<div class="vector-menu-content vector-dropdown-content">


									<div id="vector-page-tools-unpinned-container" class="vector-unpinned-container">
						
<div id="vector-page-tools" class="vector-page-tools vector-pinnable-element">
	<div
	class="vector-pinnable-header vector-page-tools-pinnable-header vector-pinnable-header-unpinned"
	data-feature-name="page-tools-pinned"
	data-pinnable-element-id="vector-page-tools"
	data-pinned-container-id="vector-page-tools-pinned-container"
	data-unpinned-container-id="vector-page-tools-unpinned-container"
>
	<div class="vector-pinnable-header-label">Tools</div>
	<button class="vector-pinnable-header-toggle-button vector-pinnable-header-pin-button" data-event-name="pinnable-header.vector-page-tools.pin">move to sidebar</button>
	<button class="vector-pinnable-header-toggle-button vector-pinnable-header-unpin-button" data-event-name="pinnable-header.vector-page-tools.unpin">hide</button>
</div>

	
<div id="p-cactions" class="vector-menu mw-portlet mw-portlet-cactions emptyPortlet vector-has-collapsible-items"  title="More options" >
	<div class="vector-menu-heading">
		Actions
	</div>
	<div class="vector-menu-content">
		
		<ul class="vector-menu-content-list"><li id="ca-more-view" class="selected vector-more-collapsible-item mw-list-item"><a href="/wiki/Language_model"><span>Read</span></a></li><li id="ca-more-edit" class="vector-more-collapsible-item mw-list-item"><a href="/w/index.php?title=Language_model&amp;action=edit"><span>Edit</span></a></li><li id="ca-more-history" class="vector-more-collapsible-item mw-list-item"><a href="/w/index.php?title=Language_model&amp;action=history"><span>View history</span></a></li></ul>
		
	</div>
</div>

<div id="p-tb" class="vector-menu mw-portlet mw-portlet-tb"  >
	<div class="vector-menu-heading">
		General
	</div>
	<div class="vector-menu-content">
		
		<ul class="vector-menu-content-list"><li id="t-whatlinkshere" class="mw-list-item"><a href="/wiki/Special:WhatLinksHere/Language_model" title="List of all English Wikipedia pages containing links to this page [j]" accesskey="j"><span>What links here</span></a></li><li id="t-recentchangeslinked" class="mw-list-item"><a href="/wiki/Special:RecentChangesLinked/Language_model" rel="nofollow" title="Recent changes in pages linked from this page [k]" accesskey="k"><span>Related changes</span></a></li><li id="t-upload" class="mw-list-item"><a href="/wiki/Wikipedia:File_Upload_Wizard" title="Upload files [u]" accesskey="u"><span>Upload file</span></a></li><li id="t-specialpages" class="mw-list-item"><a href="/wiki/Special:SpecialPages" title="A list of all special pages [q]" accesskey="q"><span>Special pages</span></a></li><li id="t-permalink" class="mw-list-item"><a href="/w/index.php?title=Language_model&amp;oldid=1161280651" title="Permanent link to this revision of this page"><span>Permanent link</span></a></li><li id="t-info" class="mw-list-item"><a href="/w/index.php?title=Language_model&amp;action=info" title="More information about this page"><span>Page information</span></a></li><li id="t-cite" class="mw-list-item"><a href="/w/index.php?title=Special:CiteThisPage&amp;page=Language_model&amp;id=1161280651&amp;wpFormIdentifier=titleform" title="Information on how to cite this page"><span>Cite this page</span></a></li><li id="t-wikibase" class="mw-list-item"><a href="https://www.wikidata.org/wiki/Special:EntityPage/Q3621696" title="Structured data on this page hosted by Wikidata [g]" accesskey="g"><span>Wikidata item</span></a></li></ul>
		
	</div>
</div>

<div id="p-coll-print_export" class="vector-menu mw-portlet mw-portlet-coll-print_export"  >
	<div class="vector-menu-heading">
		Print/export
	</div>
	<div class="vector-menu-content">
		
		<ul class="vector-menu-content-list"><li id="coll-download-as-rl" class="mw-list-item"><a href="/w/index.php?title=Special:DownloadAsPdf&amp;page=Language_model&amp;action=show-download-screen" title="Download this page as a PDF file"><span>Download as PDF</span></a></li><li id="t-print" class="mw-list-item"><a href="/w/index.php?title=Language_model&amp;printable=yes" title="Printable version of this page [p]" accesskey="p"><span>Printable version</span></a></li></ul>
		
	</div>
</div>

</div>

									</div>
				
	</div>
</div>
							</nav>
						</div>
					</div>
				</div>
				<div class="vector-column-end">
					<nav class="vector-page-tools-landmark vector-sticky-pinned-container" aria-label="More options">
						<div id="vector-page-tools-pinned-container" class="vector-pinned-container">
			
						</div>
	</nav>
				</div>
				<div id="bodyContent" class="vector-body" aria-labelledby="firstHeading" data-mw-ve-target-container>
					<div class="vector-body-before-content">
							<div class="mw-indicators">
		</div>

						<div id="siteSub" class="noprint">From Wikipedia, the free encyclopedia</div>
					</div>
					<div id="contentSub"><div id="mw-content-subtitle"></div></div>
					
					
					<div id="mw-content-text" class="mw-body-content mw-content-ltr" lang="en" dir="ltr"><div class="mw-parser-output"><div class="shortdescription nomobile noexcerpt noprint searchaux" style="display:none">Statistical model of structure of language</div>
<p class="mw-empty-elt">
</p><p>A <b>language model</b> is a <a href="/wiki/Probability_distribution" title="Probability distribution">probability distribution</a> over sequences of words.<sup id="cite_ref-1" class="reference"><a href="#cite_note-1">&#91;1&#93;</a></sup> Given any sequence of words of length <span class="texhtml mvar" style="font-style:italic;">m</span>, a language model assigns a probability <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML"  alttext="{\displaystyle P(w_{1},\ldots ,w_{m})}">
  <semantics>
    <mrow class="MJX-TeXAtom-ORD">
      <mstyle displaystyle="true" scriptlevel="0">
        <mi>P</mi>
        <mo stretchy="false">(</mo>
        <msub>
          <mi>w</mi>
          <mrow class="MJX-TeXAtom-ORD">
            <mn>1</mn>
          </mrow>
        </msub>
        <mo>,</mo>
        <mo>&#x2026;<!-- … --></mo>
        <mo>,</mo>
        <msub>
          <mi>w</mi>
          <mrow class="MJX-TeXAtom-ORD">
            <mi>m</mi>
          </mrow>
        </msub>
        <mo stretchy="false">)</mo>
      </mstyle>
    </mrow>
    <annotation encoding="application/x-tex">{\displaystyle P(w_{1},\ldots ,w_{m})}</annotation>
  </semantics>
</math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/7a7d2debedc6711cdc40d6617617c0c9cc67e945" class="mwe-math-fallback-image-inline" aria-hidden="true" style="vertical-align: -0.838ex; width:14.79ex; height:2.843ex;" alt="P(w_{1},\ldots ,w_{m})"></span> to the whole sequence. Language models generate probabilities by training on <a href="/wiki/Text_corpus" title="Text corpus">text corpora</a> in one or many languages. Given that languages can be used to express an infinite variety of valid sentences (the property of <a href="/wiki/Digital_infinity" title="Digital infinity">digital infinity</a>), language modeling faces the problem of assigning non-zero probabilities to linguistically valid sequences that may never be encountered in the training data. Several modelling approaches have been designed to surmount this problem, such as applying the <a href="/wiki/Markov_assumption" class="mw-redirect" title="Markov assumption">Markov assumption</a> or using neural architectures such as <a href="/wiki/Recurrent_neural_network" title="Recurrent neural network">recurrent neural networks</a> or <a href="/wiki/Transformer_(machine_learning_model)" title="Transformer (machine learning model)">transformers</a>.
</p><p>Language models are useful for a variety of problems in <a href="/wiki/Computational_linguistics" title="Computational linguistics">computational linguistics</a>; from initial applications in <a href="/wiki/Speech_recognition" title="Speech recognition">speech recognition</a><sup id="cite_ref-2" class="reference"><a href="#cite_note-2">&#91;2&#93;</a></sup> to ensure nonsensical (i.e. low-probability) word sequences are not predicted, to wider use in <a href="/wiki/Machine_translation" title="Machine translation">machine translation</a><sup id="cite_ref-Semantic_parsing_as_machine_translation_3-0" class="reference"><a href="#cite_note-Semantic_parsing_as_machine_translation-3">&#91;3&#93;</a></sup> (e.g. scoring candidate translations), <a href="/wiki/Natural_language_generation" title="Natural language generation">natural language generation</a> (generating more human-like text), <a href="/wiki/Part-of-speech_tagging" title="Part-of-speech tagging">part-of-speech tagging</a>, <a href="/wiki/Parsing" title="Parsing">parsing</a>,<sup id="cite_ref-Semantic_parsing_as_machine_translation_3-1" class="reference"><a href="#cite_note-Semantic_parsing_as_machine_translation-3">&#91;3&#93;</a></sup> <a href="/wiki/Optical_character_recognition" title="Optical character recognition">optical character recognition</a>, <a href="/wiki/Handwriting_recognition" title="Handwriting recognition">handwriting recognition</a>,<sup id="cite_ref-4" class="reference"><a href="#cite_note-4">&#91;4&#93;</a></sup> <a href="/wiki/Grammar_induction" title="Grammar induction">grammar induction</a>,<sup id="cite_ref-5" class="reference"><a href="#cite_note-5">&#91;5&#93;</a></sup> <a href="/wiki/Information_retrieval" title="Information retrieval">information retrieval</a>,<sup id="cite_ref-ponte1998_6-0" class="reference"><a href="#cite_note-ponte1998-6">&#91;6&#93;</a></sup><sup id="cite_ref-hiemstra1998_7-0" class="reference"><a href="#cite_note-hiemstra1998-7">&#91;7&#93;</a></sup> and other applications.
</p><p>Language models are used in information retrieval in the <a href="/wiki/Query_likelihood_model" title="Query likelihood model">query likelihood model</a>. There, a separate language model is associated with each <a href="/wiki/Document" title="Document">document</a> in a collection. Documents are ranked based on the probability of the query <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML"  alttext="{\displaystyle Q}">
  <semantics>
    <mrow class="MJX-TeXAtom-ORD">
      <mstyle displaystyle="true" scriptlevel="0">
        <mi>Q</mi>
      </mstyle>
    </mrow>
    <annotation encoding="application/x-tex">{\displaystyle Q}</annotation>
  </semantics>
</math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/8752c7023b4b3286800fe3238271bbca681219ed" class="mwe-math-fallback-image-inline" aria-hidden="true" style="vertical-align: -0.671ex; width:1.838ex; height:2.509ex;" alt="Q"></span> in the document's language model <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML"  alttext="{\displaystyle M_{d}}">
  <semantics>
    <mrow class="MJX-TeXAtom-ORD">
      <mstyle displaystyle="true" scriptlevel="0">
        <msub>
          <mi>M</mi>
          <mrow class="MJX-TeXAtom-ORD">
            <mi>d</mi>
          </mrow>
        </msub>
      </mstyle>
    </mrow>
    <annotation encoding="application/x-tex">{\displaystyle M_{d}}</annotation>
  </semantics>
</math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/dc690f2dccbe052928df1f8fb48c15914f5017f1" class="mwe-math-fallback-image-inline" aria-hidden="true" style="vertical-align: -0.671ex; width:3.346ex; height:2.509ex;" alt="M_d"></span>: <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML"  alttext="{\displaystyle P(Q\mid M_{d})}">
  <semantics>
    <mrow class="MJX-TeXAtom-ORD">
      <mstyle displaystyle="true" scriptlevel="0">
        <mi>P</mi>
        <mo stretchy="false">(</mo>
        <mi>Q</mi>
        <mo>&#x2223;<!-- ∣ --></mo>
        <msub>
          <mi>M</mi>
          <mrow class="MJX-TeXAtom-ORD">
            <mi>d</mi>
          </mrow>
        </msub>
        <mo stretchy="false">)</mo>
      </mstyle>
    </mrow>
    <annotation encoding="application/x-tex">{\displaystyle P(Q\mid M_{d})}</annotation>
  </semantics>
</math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/184e6286edd58eebf6d40d9601ff4bf35ff540fd" class="mwe-math-fallback-image-inline" aria-hidden="true" style="vertical-align: -0.838ex; width:10.676ex; height:2.843ex;" alt="P(Q\mid M_{d})"></span>. Commonly, the <a href="/wiki/Unigram" class="mw-redirect" title="Unigram">unigram</a> language model is used for this purpose.
</p><p>Since 2018, <a href="/wiki/Large_language_model" title="Large language model">large language models</a> (LLMs) consisting of <a href="/wiki/Artificial_neural_network" title="Artificial neural network">deep neural networks</a> with billions of trainable parameters, trained on massive datasets of unlabelled text, have demonstrated impressive results on a wide variety of natural language processing tasks. This development has led to a shift in research focus toward the use of general-purpose LLMs.<sup id="cite_ref-Manning-2022_8-0" class="reference"><a href="#cite_note-Manning-2022-8">&#91;8&#93;</a></sup>
</p>
<meta property="mw:PageProp/toc" />
<h2><span class="mw-headline" id="Model_types">Model types</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Language_model&amp;action=edit&amp;section=1" title="Edit section: Model types">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
<h3><span class="mw-headline" id="n-gram"><i>n</i>-gram</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Language_model&amp;action=edit&amp;section=2" title="Edit section: n-gram">edit</a><span class="mw-editsection-bracket">]</span></span></h3>
<div class="excerpt-block"><style data-mw-deduplicate="TemplateStyles:r1066933788">.mw-parser-output .excerpt-hat .mw-editsection-like{font-style:normal}</style><style data-mw-deduplicate="TemplateStyles:r1033289096">.mw-parser-output .hatnote{font-style:italic}.mw-parser-output div.hatnote{padding-left:1.6em;margin-bottom:0.5em}.mw-parser-output .hatnote i{font-style:normal}.mw-parser-output .hatnote+link+.hatnote{margin-top:-0.5em}</style><div role="note" class="hatnote navigation-not-searchable dablink excerpt-hat selfref">This section is an excerpt from <a href="/wiki/N-gram_language_model" title="N-gram language model">N-gram language model</a>.<span class="mw-editsection-like plainlinks"><span class="mw-editsection-bracket">[</span><a class="external text" href="https://en.wikipedia.org/w/index.php?title=N-gram_language_model&amp;action=edit">edit</a><span class="mw-editsection-bracket">]</span></span></div><div class="excerpt">
<p>An <a href="/wiki/N-gram_language_model" title="N-gram language model">n-gram language model</a> is a language model that models sequences of words as a <a href="/wiki/Markov_process" class="mw-redirect" title="Markov process">Markov process</a>. It makes use of the simplifying assumption that the probability of the next word in a sequence depends only on a fixed size window of previous words. A bigram model considers one previous word, a trigram model considers two, and in general, an <i>n</i>-gram model considers <i>n</i>-1 words of previous context.<sup id="cite_ref-N-gram_language_model_jm_9-0" class="reference"><a href="#cite_note-N-gram_language_model_jm-9">&#91;9&#93;</a></sup>
</p><p>For example, a bigram language model models the probability of the sentence <i>I saw the red house</i> as:
</p><p><div class="mwe-math-element"><div class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML"  alttext="{\displaystyle P({\text{I, saw, the, red, house}})\approx P({\text{I}}\mid \langle s\rangle )P({\text{saw}}\mid {\text{I}})P({\text{the}}\mid {\text{saw}})P({\text{red}}\mid {\text{the}})P({\text{house}}\mid {\text{red}})P(\langle /s\rangle \mid {\text{house}})}">
  <semantics>
    <mrow class="MJX-TeXAtom-ORD">
      <mstyle displaystyle="true" scriptlevel="0">
        <mi>P</mi>
        <mo stretchy="false">(</mo>
        <mrow class="MJX-TeXAtom-ORD">
          <mtext>I, saw, the, red, house</mtext>
        </mrow>
        <mo stretchy="false">)</mo>
        <mo>&#x2248;<!-- ≈ --></mo>
        <mi>P</mi>
        <mo stretchy="false">(</mo>
        <mrow class="MJX-TeXAtom-ORD">
          <mtext>I</mtext>
        </mrow>
        <mo>&#x2223;<!-- ∣ --></mo>
        <mo fence="false" stretchy="false">&#x27E8;<!-- ⟨ --></mo>
        <mi>s</mi>
        <mo fence="false" stretchy="false">&#x27E9;<!-- ⟩ --></mo>
        <mo stretchy="false">)</mo>
        <mi>P</mi>
        <mo stretchy="false">(</mo>
        <mrow class="MJX-TeXAtom-ORD">
          <mtext>saw</mtext>
        </mrow>
        <mo>&#x2223;<!-- ∣ --></mo>
        <mrow class="MJX-TeXAtom-ORD">
          <mtext>I</mtext>
        </mrow>
        <mo stretchy="false">)</mo>
        <mi>P</mi>
        <mo stretchy="false">(</mo>
        <mrow class="MJX-TeXAtom-ORD">
          <mtext>the</mtext>
        </mrow>
        <mo>&#x2223;<!-- ∣ --></mo>
        <mrow class="MJX-TeXAtom-ORD">
          <mtext>saw</mtext>
        </mrow>
        <mo stretchy="false">)</mo>
        <mi>P</mi>
        <mo stretchy="false">(</mo>
        <mrow class="MJX-TeXAtom-ORD">
          <mtext>red</mtext>
        </mrow>
        <mo>&#x2223;<!-- ∣ --></mo>
        <mrow class="MJX-TeXAtom-ORD">
          <mtext>the</mtext>
        </mrow>
        <mo stretchy="false">)</mo>
        <mi>P</mi>
        <mo stretchy="false">(</mo>
        <mrow class="MJX-TeXAtom-ORD">
          <mtext>house</mtext>
        </mrow>
        <mo>&#x2223;<!-- ∣ --></mo>
        <mrow class="MJX-TeXAtom-ORD">
          <mtext>red</mtext>
        </mrow>
        <mo stretchy="false">)</mo>
        <mi>P</mi>
        <mo stretchy="false">(</mo>
        <mo fence="false" stretchy="false">&#x27E8;<!-- ⟨ --></mo>
        <mrow class="MJX-TeXAtom-ORD">
          <mo>/</mo>
        </mrow>
        <mi>s</mi>
        <mo fence="false" stretchy="false">&#x27E9;<!-- ⟩ --></mo>
        <mo>&#x2223;<!-- ∣ --></mo>
        <mrow class="MJX-TeXAtom-ORD">
          <mtext>house</mtext>
        </mrow>
        <mo stretchy="false">)</mo>
      </mstyle>
    </mrow>
    <annotation encoding="application/x-tex">{\displaystyle P({\text{I, saw, the, red, house}})\approx P({\text{I}}\mid \langle s\rangle )P({\text{saw}}\mid {\text{I}})P({\text{the}}\mid {\text{saw}})P({\text{red}}\mid {\text{the}})P({\text{house}}\mid {\text{red}})P(\langle /s\rangle \mid {\text{house}})}</annotation>
  </semantics>
</math></div><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/3c51686ab4a5d64340ef43dae80edb5281be16c3" class="mwe-math-fallback-image-display" aria-hidden="true" style="vertical-align: -0.838ex; width:101.752ex; height:2.843ex;" alt="{\displaystyle P({\text{I, saw, the, red, house}})\approx P({\text{I}}\mid \langle s\rangle )P({\text{saw}}\mid {\text{I}})P({\text{the}}\mid {\text{saw}})P({\text{red}}\mid {\text{the}})P({\text{house}}\mid {\text{red}})P(\langle /s\rangle \mid {\text{house}})}"></div>
</p><p>Where <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML"  alttext="{\displaystyle \langle s\rangle }">
  <semantics>
    <mrow class="MJX-TeXAtom-ORD">
      <mstyle displaystyle="true" scriptlevel="0">
        <mo fence="false" stretchy="false">&#x27E8;<!-- ⟨ --></mo>
        <mi>s</mi>
        <mo fence="false" stretchy="false">&#x27E9;<!-- ⟩ --></mo>
      </mstyle>
    </mrow>
    <annotation encoding="application/x-tex">{\displaystyle \langle s\rangle }</annotation>
  </semantics>
</math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/a3c56a2b45b7686b9b2602a2fc70934e60fb9d72" class="mwe-math-fallback-image-inline" aria-hidden="true" style="vertical-align: -0.838ex; width:2.9ex; height:2.843ex;" alt="{\displaystyle \langle s\rangle }"></span> and <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML"  alttext="{\displaystyle \langle /s\rangle }">
  <semantics>
    <mrow class="MJX-TeXAtom-ORD">
      <mstyle displaystyle="true" scriptlevel="0">
        <mo fence="false" stretchy="false">&#x27E8;<!-- ⟨ --></mo>
        <mrow class="MJX-TeXAtom-ORD">
          <mo>/</mo>
        </mrow>
        <mi>s</mi>
        <mo fence="false" stretchy="false">&#x27E9;<!-- ⟩ --></mo>
      </mstyle>
    </mrow>
    <annotation encoding="application/x-tex">{\displaystyle \langle /s\rangle }</annotation>
  </semantics>
</math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/22589bb236f4585ebac14299e749f9cdb30a382b" class="mwe-math-fallback-image-inline" aria-hidden="true" style="vertical-align: -0.838ex; width:4.062ex; height:2.843ex;" alt="{\displaystyle \langle /s\rangle }"></span> are special tokens denoting the start and end of a sentence.
</p><p>These conditional probabilities may be estimated based on frequency counts in some <a href="/wiki/Text_corpus" title="Text corpus">text corpus</a>. For example, <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML"  alttext="{\displaystyle P({\text{saw}}\mid {\text{I}})}">
  <semantics>
    <mrow class="MJX-TeXAtom-ORD">
      <mstyle displaystyle="true" scriptlevel="0">
        <mi>P</mi>
        <mo stretchy="false">(</mo>
        <mrow class="MJX-TeXAtom-ORD">
          <mtext>saw</mtext>
        </mrow>
        <mo>&#x2223;<!-- ∣ --></mo>
        <mrow class="MJX-TeXAtom-ORD">
          <mtext>I</mtext>
        </mrow>
        <mo stretchy="false">)</mo>
      </mstyle>
    </mrow>
    <annotation encoding="application/x-tex">{\displaystyle P({\text{saw}}\mid {\text{I}})}</annotation>
  </semantics>
</math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/61e80288f96631dc64ac2faa9fcfd743f190eb7f" class="mwe-math-fallback-image-inline" aria-hidden="true" style="vertical-align: -0.838ex; width:10.088ex; height:2.843ex;" alt="{\displaystyle P({\text{saw}}\mid {\text{I}})}"></span> can be naively estimated as the proportion of occurrences of the word <i>I</i> which are followed by <i>saw</i> in the corpus. The problem of sparsity (for example, if the bigram "red house" has zero occurrences in our corpus) may necessitate modifying the basic markov model by <a href="/wiki/Smoothing" title="Smoothing">smoothing</a> techniques, particularly when using larger context windows.<sup id="cite_ref-N-gram_language_model_jm_9-1" class="reference"><a href="#cite_note-N-gram_language_model_jm-9">&#91;9&#93;</a></sup>
</p>
n-gram models are no longer commonly used in <a href="/wiki/Natural_language_processing" title="Natural language processing">natural language processing</a> research and applications, as they have been supplanted by state of the art <a href="/wiki/Neural_language_model" class="mw-redirect" title="Neural language model">deep learning methods</a>, most recently <a href="/wiki/Large_language_model" title="Large language model">large language models</a>.</div></div>
<h3><span class="mw-headline" id="Exponential">Exponential</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Language_model&amp;action=edit&amp;section=3" title="Edit section: Exponential">edit</a><span class="mw-editsection-bracket">]</span></span></h3>
<p><a href="/wiki/Principle_of_maximum_entropy" title="Principle of maximum entropy">Maximum entropy</a> language models encode the relationship between a word and the n-gram history using feature functions. The equation is
</p><p><div class="mwe-math-element"><div class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML"  alttext="{\displaystyle P(w_{m}\mid w_{1},\ldots ,w_{m-1})={\frac {1}{Z(w_{1},\ldots ,w_{m-1})}}\exp(a^{T}f(w_{1},\ldots ,w_{m}))}">
  <semantics>
    <mrow class="MJX-TeXAtom-ORD">
      <mstyle displaystyle="true" scriptlevel="0">
        <mi>P</mi>
        <mo stretchy="false">(</mo>
        <msub>
          <mi>w</mi>
          <mrow class="MJX-TeXAtom-ORD">
            <mi>m</mi>
          </mrow>
        </msub>
        <mo>&#x2223;<!-- ∣ --></mo>
        <msub>
          <mi>w</mi>
          <mrow class="MJX-TeXAtom-ORD">
            <mn>1</mn>
          </mrow>
        </msub>
        <mo>,</mo>
        <mo>&#x2026;<!-- … --></mo>
        <mo>,</mo>
        <msub>
          <mi>w</mi>
          <mrow class="MJX-TeXAtom-ORD">
            <mi>m</mi>
            <mo>&#x2212;<!-- − --></mo>
            <mn>1</mn>
          </mrow>
        </msub>
        <mo stretchy="false">)</mo>
        <mo>=</mo>
        <mrow class="MJX-TeXAtom-ORD">
          <mfrac>
            <mn>1</mn>
            <mrow>
              <mi>Z</mi>
              <mo stretchy="false">(</mo>
              <msub>
                <mi>w</mi>
                <mrow class="MJX-TeXAtom-ORD">
                  <mn>1</mn>
                </mrow>
              </msub>
              <mo>,</mo>
              <mo>&#x2026;<!-- … --></mo>
              <mo>,</mo>
              <msub>
                <mi>w</mi>
                <mrow class="MJX-TeXAtom-ORD">
                  <mi>m</mi>
                  <mo>&#x2212;<!-- − --></mo>
                  <mn>1</mn>
                </mrow>
              </msub>
              <mo stretchy="false">)</mo>
            </mrow>
          </mfrac>
        </mrow>
        <mi>exp</mi>
        <mo>&#x2061;<!-- ⁡ --></mo>
        <mo stretchy="false">(</mo>
        <msup>
          <mi>a</mi>
          <mrow class="MJX-TeXAtom-ORD">
            <mi>T</mi>
          </mrow>
        </msup>
        <mi>f</mi>
        <mo stretchy="false">(</mo>
        <msub>
          <mi>w</mi>
          <mrow class="MJX-TeXAtom-ORD">
            <mn>1</mn>
          </mrow>
        </msub>
        <mo>,</mo>
        <mo>&#x2026;<!-- … --></mo>
        <mo>,</mo>
        <msub>
          <mi>w</mi>
          <mrow class="MJX-TeXAtom-ORD">
            <mi>m</mi>
          </mrow>
        </msub>
        <mo stretchy="false">)</mo>
        <mo stretchy="false">)</mo>
      </mstyle>
    </mrow>
    <annotation encoding="application/x-tex">{\displaystyle P(w_{m}\mid w_{1},\ldots ,w_{m-1})={\frac {1}{Z(w_{1},\ldots ,w_{m-1})}}\exp(a^{T}f(w_{1},\ldots ,w_{m}))}</annotation>
  </semantics>
</math></div><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/99beafb1eed251682c4037f19a4d80da3003cd4e" class="mwe-math-fallback-image-display" aria-hidden="true" style="vertical-align: -2.671ex; width:65.619ex; height:6.009ex;" alt="{\displaystyle P(w_{m}\mid w_{1},\ldots ,w_{m-1})={\frac {1}{Z(w_{1},\ldots ,w_{m-1})}}\exp(a^{T}f(w_{1},\ldots ,w_{m}))}"></div>
</p><p>where <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML"  alttext="{\displaystyle Z(w_{1},\ldots ,w_{m-1})}">
  <semantics>
    <mrow class="MJX-TeXAtom-ORD">
      <mstyle displaystyle="true" scriptlevel="0">
        <mi>Z</mi>
        <mo stretchy="false">(</mo>
        <msub>
          <mi>w</mi>
          <mrow class="MJX-TeXAtom-ORD">
            <mn>1</mn>
          </mrow>
        </msub>
        <mo>,</mo>
        <mo>&#x2026;<!-- … --></mo>
        <mo>,</mo>
        <msub>
          <mi>w</mi>
          <mrow class="MJX-TeXAtom-ORD">
            <mi>m</mi>
            <mo>&#x2212;<!-- − --></mo>
            <mn>1</mn>
          </mrow>
        </msub>
        <mo stretchy="false">)</mo>
      </mstyle>
    </mrow>
    <annotation encoding="application/x-tex">{\displaystyle Z(w_{1},\ldots ,w_{m-1})}</annotation>
  </semantics>
</math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/db345c7489ff361147f1363116ee351dc88b5719" class="mwe-math-fallback-image-inline" aria-hidden="true" style="vertical-align: -0.838ex; width:16.826ex; height:2.843ex;" alt="{\displaystyle Z(w_{1},\ldots ,w_{m-1})}"></span> is the <a href="/wiki/Partition_function_(mathematics)" title="Partition function (mathematics)">partition function</a>, <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML"  alttext="{\displaystyle a}">
  <semantics>
    <mrow class="MJX-TeXAtom-ORD">
      <mstyle displaystyle="true" scriptlevel="0">
        <mi>a</mi>
      </mstyle>
    </mrow>
    <annotation encoding="application/x-tex">{\displaystyle a}</annotation>
  </semantics>
</math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/ffd2487510aa438433a2579450ab2b3d557e5edc" class="mwe-math-fallback-image-inline" aria-hidden="true" style="vertical-align: -0.338ex; width:1.23ex; height:1.676ex;" alt="a"></span> is the parameter vector, and <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML"  alttext="{\displaystyle f(w_{1},\ldots ,w_{m})}">
  <semantics>
    <mrow class="MJX-TeXAtom-ORD">
      <mstyle displaystyle="true" scriptlevel="0">
        <mi>f</mi>
        <mo stretchy="false">(</mo>
        <msub>
          <mi>w</mi>
          <mrow class="MJX-TeXAtom-ORD">
            <mn>1</mn>
          </mrow>
        </msub>
        <mo>,</mo>
        <mo>&#x2026;<!-- … --></mo>
        <mo>,</mo>
        <msub>
          <mi>w</mi>
          <mrow class="MJX-TeXAtom-ORD">
            <mi>m</mi>
          </mrow>
        </msub>
        <mo stretchy="false">)</mo>
      </mstyle>
    </mrow>
    <annotation encoding="application/x-tex">{\displaystyle f(w_{1},\ldots ,w_{m})}</annotation>
  </semantics>
</math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/899a4aa9d383560f5fe3446a9ac9693c225bb184" class="mwe-math-fallback-image-inline" aria-hidden="true" style="vertical-align: -0.838ex; width:14.324ex; height:2.843ex;" alt="{\displaystyle f(w_{1},\ldots ,w_{m})}"></span> is the feature function. In the simplest case, the feature function is just an indicator of the presence of a certain n-gram. It is helpful to use a prior on <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML"  alttext="{\displaystyle a}">
  <semantics>
    <mrow class="MJX-TeXAtom-ORD">
      <mstyle displaystyle="true" scriptlevel="0">
        <mi>a</mi>
      </mstyle>
    </mrow>
    <annotation encoding="application/x-tex">{\displaystyle a}</annotation>
  </semantics>
</math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/ffd2487510aa438433a2579450ab2b3d557e5edc" class="mwe-math-fallback-image-inline" aria-hidden="true" style="vertical-align: -0.338ex; width:1.23ex; height:1.676ex;" alt="a"></span> or some form of regularization.
</p><p>The log-bilinear model is another example of an exponential language model.
</p>
<h3><span class="mw-headline" id="Neural_network">Neural network</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Language_model&amp;action=edit&amp;section=4" title="Edit section: Neural network">edit</a><span class="mw-editsection-bracket">]</span></span></h3>
<p>Neural language models (or <i>continuous space language models</i>) use continuous representations or <a href="/wiki/Word_embedding" title="Word embedding">embeddings of words</a> to make their predictions.<sup id="cite_ref-10" class="reference"><a href="#cite_note-10">&#91;10&#93;</a></sup> These models make use of <a href="/wiki/Artificial_neural_network" title="Artificial neural network">neural networks</a>.
</p><p>Continuous space embeddings help to alleviate the <a href="/wiki/Curse_of_dimensionality" title="Curse of dimensionality">curse of dimensionality</a> in language modeling: as language models are trained on larger and larger texts, the number of unique words (the vocabulary) increases.<sup id="cite_ref-11" class="reference"><a href="#cite_note-11">&#91;a&#93;</a></sup> The number of possible sequences of words increases <a href="/wiki/Exponential_growth" title="Exponential growth">exponentially</a> with the size of the vocabulary, causing a data sparsity problem because of the exponentially many sequences. Thus, statistics are needed to properly estimate probabilities. Neural networks avoid this problem by representing words in a <a href="/wiki/Distributed_representation" class="mw-redirect" title="Distributed representation">distributed</a> way, as non-linear combinations of weights in a neural net.<sup id="cite_ref-bengio_12-0" class="reference"><a href="#cite_note-bengio-12">&#91;11&#93;</a></sup> An alternate description is that a neural net approximates the language function. The neural net architecture might be <a href="/wiki/Feedforward_neural_network" title="Feedforward neural network">feed-forward</a> or <a href="/wiki/Recurrent_neural_network" title="Recurrent neural network">recurrent</a>, and while the former is simpler the latter is more common.<sup class="noprint Inline-Template" style="white-space:nowrap;">&#91;<i><a href="/wiki/Wikipedia:AUDIENCE" class="mw-redirect" title="Wikipedia:AUDIENCE"><span title="An editor has requested that an example be provided. (December 2017)"><span style="font-style:italic; padding-right:0.15em;">example  needed</span></span></a></i>&#93;</sup><sup class="noprint Inline-Template Template-Fact" style="white-space:nowrap;">&#91;<i><a href="/wiki/Wikipedia:Citation_needed" title="Wikipedia:Citation needed"><span title="This claim needs references to reliable sources. (December 2017)">citation needed</span></a></i>&#93;</sup>
</p><p>Typically, neural net language models are constructed and trained as <a href="/wiki/Probabilistic_classifier" class="mw-redirect" title="Probabilistic classifier">probabilistic classifiers</a> that learn to predict a probability distribution
</p><p><div class="mwe-math-element"><div class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML"  alttext="{\displaystyle P(w_{t}\mid \mathrm {context} )\,\forall t\in V.}">
  <semantics>
    <mrow class="MJX-TeXAtom-ORD">
      <mstyle displaystyle="true" scriptlevel="0">
        <mi>P</mi>
        <mo stretchy="false">(</mo>
        <msub>
          <mi>w</mi>
          <mrow class="MJX-TeXAtom-ORD">
            <mi>t</mi>
          </mrow>
        </msub>
        <mo>&#x2223;<!-- ∣ --></mo>
        <mrow class="MJX-TeXAtom-ORD">
          <mi mathvariant="normal">c</mi>
          <mi mathvariant="normal">o</mi>
          <mi mathvariant="normal">n</mi>
          <mi mathvariant="normal">t</mi>
          <mi mathvariant="normal">e</mi>
          <mi mathvariant="normal">x</mi>
          <mi mathvariant="normal">t</mi>
        </mrow>
        <mo stretchy="false">)</mo>
        <mspace width="thinmathspace" />
        <mi mathvariant="normal">&#x2200;<!-- ∀ --></mi>
        <mi>t</mi>
        <mo>&#x2208;<!-- ∈ --></mo>
        <mi>V</mi>
        <mo>.</mo>
      </mstyle>
    </mrow>
    <annotation encoding="application/x-tex">{\displaystyle P(w_{t}\mid \mathrm {context} )\,\forall t\in V.}</annotation>
  </semantics>
</math></div><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/2ff4b49403457fa140cbaa2a3e0cbad8de34c5a4" class="mwe-math-fallback-image-display" aria-hidden="true" style="vertical-align: -0.838ex; width:23.332ex; height:2.843ex;" alt="{\displaystyle P(w_{t}\mid \mathrm {context} )\,\forall t\in V.}"></div>
</p><p>That is, the network is trained to predict a probability distribution over the vocabulary, given some linguistic context. This is done using standard neural net training algorithms such as <a href="/wiki/Stochastic_gradient_descent" title="Stochastic gradient descent">stochastic gradient descent</a> with <a href="/wiki/Backpropagation" title="Backpropagation">backpropagation</a>.<sup id="cite_ref-bengio_12-1" class="reference"><a href="#cite_note-bengio-12">&#91;11&#93;</a></sup> The context might be a fixed-size window of previous words, so that the network predicts
</p><p><div class="mwe-math-element"><div class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML"  alttext="{\displaystyle P(w_{t}\mid w_{t-k},\dots ,w_{t-1})}">
  <semantics>
    <mrow class="MJX-TeXAtom-ORD">
      <mstyle displaystyle="true" scriptlevel="0">
        <mi>P</mi>
        <mo stretchy="false">(</mo>
        <msub>
          <mi>w</mi>
          <mrow class="MJX-TeXAtom-ORD">
            <mi>t</mi>
          </mrow>
        </msub>
        <mo>&#x2223;<!-- ∣ --></mo>
        <msub>
          <mi>w</mi>
          <mrow class="MJX-TeXAtom-ORD">
            <mi>t</mi>
            <mo>&#x2212;<!-- − --></mo>
            <mi>k</mi>
          </mrow>
        </msub>
        <mo>,</mo>
        <mo>&#x2026;<!-- … --></mo>
        <mo>,</mo>
        <msub>
          <mi>w</mi>
          <mrow class="MJX-TeXAtom-ORD">
            <mi>t</mi>
            <mo>&#x2212;<!-- − --></mo>
            <mn>1</mn>
          </mrow>
        </msub>
        <mo stretchy="false">)</mo>
      </mstyle>
    </mrow>
    <annotation encoding="application/x-tex">{\displaystyle P(w_{t}\mid w_{t-k},\dots ,w_{t-1})}</annotation>
  </semantics>
</math></div><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/552c89bd04dd161873451c887987b771f28b85f7" class="mwe-math-fallback-image-display" aria-hidden="true" style="vertical-align: -0.838ex; width:22.376ex; height:2.843ex;" alt="{\displaystyle P(w_{t}\mid w_{t-k},\dots ,w_{t-1})}"></div>
</p><p>from a <a href="/wiki/Feature_vector" class="mw-redirect" title="Feature vector">feature vector</a> representing the previous <span class="texhtml mvar" style="font-style:italic;">k</span> words.<sup id="cite_ref-bengio_12-2" class="reference"><a href="#cite_note-bengio-12">&#91;11&#93;</a></sup> Another option is to use "future" words as well as "past" words as features,<sup id="cite_ref-:0_13-0" class="reference"><a href="#cite_note-:0-13">&#91;12&#93;</a></sup> so that the estimated probability is
</p><p><div class="mwe-math-element"><div class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML"  alttext="{\displaystyle P(w_{t}\mid w_{t-k},\dots ,w_{t-1},w_{t+1},\dots ,w_{t+k}).}">
  <semantics>
    <mrow class="MJX-TeXAtom-ORD">
      <mstyle displaystyle="true" scriptlevel="0">
        <mi>P</mi>
        <mo stretchy="false">(</mo>
        <msub>
          <mi>w</mi>
          <mrow class="MJX-TeXAtom-ORD">
            <mi>t</mi>
          </mrow>
        </msub>
        <mo>&#x2223;<!-- ∣ --></mo>
        <msub>
          <mi>w</mi>
          <mrow class="MJX-TeXAtom-ORD">
            <mi>t</mi>
            <mo>&#x2212;<!-- − --></mo>
            <mi>k</mi>
          </mrow>
        </msub>
        <mo>,</mo>
        <mo>&#x2026;<!-- … --></mo>
        <mo>,</mo>
        <msub>
          <mi>w</mi>
          <mrow class="MJX-TeXAtom-ORD">
            <mi>t</mi>
            <mo>&#x2212;<!-- − --></mo>
            <mn>1</mn>
          </mrow>
        </msub>
        <mo>,</mo>
        <msub>
          <mi>w</mi>
          <mrow class="MJX-TeXAtom-ORD">
            <mi>t</mi>
            <mo>+</mo>
            <mn>1</mn>
          </mrow>
        </msub>
        <mo>,</mo>
        <mo>&#x2026;<!-- … --></mo>
        <mo>,</mo>
        <msub>
          <mi>w</mi>
          <mrow class="MJX-TeXAtom-ORD">
            <mi>t</mi>
            <mo>+</mo>
            <mi>k</mi>
          </mrow>
        </msub>
        <mo stretchy="false">)</mo>
        <mo>.</mo>
      </mstyle>
    </mrow>
    <annotation encoding="application/x-tex">{\displaystyle P(w_{t}\mid w_{t-k},\dots ,w_{t-1},w_{t+1},\dots ,w_{t+k}).}</annotation>
  </semantics>
</math></div><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/79647f9068edd4486281b0afc1d57b7239366d2e" class="mwe-math-fallback-image-display" aria-hidden="true" style="vertical-align: -0.838ex; width:38.451ex; height:2.843ex;" alt="{\displaystyle P(w_{t}\mid w_{t-k},\dots ,w_{t-1},w_{t+1},\dots ,w_{t+k}).}"></div>
</p><p>This is called a <a href="/wiki/Bag-of-words" class="mw-redirect" title="Bag-of-words">bag-of-words</a> model. When the <a href="/wiki/Feature_vector" class="mw-redirect" title="Feature vector">feature vectors</a> for the words in the context are combined by a continuous operation, this model is referred to as the continuous bag-of-words architecture (CBOW).<sup id="cite_ref-mikolov_14-0" class="reference"><a href="#cite_note-mikolov-14">&#91;13&#93;</a></sup>
</p><p>A third option that trains slower than the CBOW but performs slightly better is to invert the previous problem and make a neural network learn the context, given a word.<sup id="cite_ref-mikolov_14-1" class="reference"><a href="#cite_note-mikolov-14">&#91;13&#93;</a></sup> More formally, given a sequence of training words <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML"  alttext="{\displaystyle w_{1},w_{2},w_{3},\dots ,w_{T}}">
  <semantics>
    <mrow class="MJX-TeXAtom-ORD">
      <mstyle displaystyle="true" scriptlevel="0">
        <msub>
          <mi>w</mi>
          <mrow class="MJX-TeXAtom-ORD">
            <mn>1</mn>
          </mrow>
        </msub>
        <mo>,</mo>
        <msub>
          <mi>w</mi>
          <mrow class="MJX-TeXAtom-ORD">
            <mn>2</mn>
          </mrow>
        </msub>
        <mo>,</mo>
        <msub>
          <mi>w</mi>
          <mrow class="MJX-TeXAtom-ORD">
            <mn>3</mn>
          </mrow>
        </msub>
        <mo>,</mo>
        <mo>&#x2026;<!-- … --></mo>
        <mo>,</mo>
        <msub>
          <mi>w</mi>
          <mrow class="MJX-TeXAtom-ORD">
            <mi>T</mi>
          </mrow>
        </msub>
      </mstyle>
    </mrow>
    <annotation encoding="application/x-tex">{\displaystyle w_{1},w_{2},w_{3},\dots ,w_{T}}</annotation>
  </semantics>
</math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/90c2622171ba2665e863f0846e980aebcb77bb6c" class="mwe-math-fallback-image-inline" aria-hidden="true" style="vertical-align: -0.671ex; width:18.455ex; height:2.009ex;" alt="{\displaystyle w_{1},w_{2},w_{3},\dots ,w_{T}}"></span>, one maximizes the average log-probability
</p><p><div class="mwe-math-element"><div class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML"  alttext="{\displaystyle {\frac {1}{T}}\sum _{t=1}^{T}\sum _{-k\leq j\leq k,j\neq 0}\log P(w_{t+j}\mid w_{t})}">
  <semantics>
    <mrow class="MJX-TeXAtom-ORD">
      <mstyle displaystyle="true" scriptlevel="0">
        <mrow class="MJX-TeXAtom-ORD">
          <mfrac>
            <mn>1</mn>
            <mi>T</mi>
          </mfrac>
        </mrow>
        <munderover>
          <mo>&#x2211;<!-- ∑ --></mo>
          <mrow class="MJX-TeXAtom-ORD">
            <mi>t</mi>
            <mo>=</mo>
            <mn>1</mn>
          </mrow>
          <mrow class="MJX-TeXAtom-ORD">
            <mi>T</mi>
          </mrow>
        </munderover>
        <munder>
          <mo>&#x2211;<!-- ∑ --></mo>
          <mrow class="MJX-TeXAtom-ORD">
            <mo>&#x2212;<!-- − --></mo>
            <mi>k</mi>
            <mo>&#x2264;<!-- ≤ --></mo>
            <mi>j</mi>
            <mo>&#x2264;<!-- ≤ --></mo>
            <mi>k</mi>
            <mo>,</mo>
            <mi>j</mi>
            <mo>&#x2260;<!-- ≠ --></mo>
            <mn>0</mn>
          </mrow>
        </munder>
        <mi>log</mi>
        <mo>&#x2061;<!-- ⁡ --></mo>
        <mi>P</mi>
        <mo stretchy="false">(</mo>
        <msub>
          <mi>w</mi>
          <mrow class="MJX-TeXAtom-ORD">
            <mi>t</mi>
            <mo>+</mo>
            <mi>j</mi>
          </mrow>
        </msub>
        <mo>&#x2223;<!-- ∣ --></mo>
        <msub>
          <mi>w</mi>
          <mrow class="MJX-TeXAtom-ORD">
            <mi>t</mi>
          </mrow>
        </msub>
        <mo stretchy="false">)</mo>
      </mstyle>
    </mrow>
    <annotation encoding="application/x-tex">{\displaystyle {\frac {1}{T}}\sum _{t=1}^{T}\sum _{-k\leq j\leq k,j\neq 0}\log P(w_{t+j}\mid w_{t})}</annotation>
  </semantics>
</math></div><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/ce4e660d09404e518ae9449efcc094a1bc63f11f" class="mwe-math-fallback-image-display" aria-hidden="true" style="vertical-align: -3.505ex; width:32.237ex; height:7.843ex;" alt="{\displaystyle {\frac {1}{T}}\sum _{t=1}^{T}\sum _{-k\leq j\leq k,j\neq 0}\log P(w_{t+j}\mid w_{t})}"></div>
</p><p>where <span class="texhtml mvar" style="font-style:italic;">k</span>, the size of the training context, can be a function of the center word <span class="mwe-math-element"><span class="mwe-math-mathml-inline mwe-math-mathml-a11y" style="display: none;"><math xmlns="http://www.w3.org/1998/Math/MathML"  alttext="{\displaystyle w_{t}}">
  <semantics>
    <mrow class="MJX-TeXAtom-ORD">
      <mstyle displaystyle="true" scriptlevel="0">
        <msub>
          <mi>w</mi>
          <mrow class="MJX-TeXAtom-ORD">
            <mi>t</mi>
          </mrow>
        </msub>
      </mstyle>
    </mrow>
    <annotation encoding="application/x-tex">{\displaystyle w_{t}}</annotation>
  </semantics>
</math></span><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/6cc7b029066adbf68868f39f3dceb58eab2d1a12" class="mwe-math-fallback-image-inline" aria-hidden="true" style="vertical-align: -0.671ex; width:2.49ex; height:2.009ex;" alt="w_{t}"></span>. This is called a <a href="/wiki/Skip-gram" class="mw-redirect" title="Skip-gram">skip-gram</a> language model.<sup id="cite_ref-compositionality_15-0" class="reference"><a href="#cite_note-compositionality-15">&#91;14&#93;</a></sup> Bag-of-words and skip-gram models are the basis of the <a href="/wiki/Word2vec" title="Word2vec">word2vec</a> program.<sup id="cite_ref-16" class="reference"><a href="#cite_note-16">&#91;15&#93;</a></sup>
</p><p>Instead of using neural net language models to produce actual probabilities, it is common to instead use the distributed representation encoded in the networks' "hidden" layers as representations of words; each word is then mapped onto an <span class="texhtml mvar" style="font-style:italic;">n</span>-dimensional real vector called the <a href="/wiki/Word_embedding" title="Word embedding">word embedding</a>, where <span class="texhtml mvar" style="font-style:italic;">n</span> is the size of the layer just before the output layer. The representations in skip-gram models have the distinct characteristic that they model semantic relations between words as <a href="/wiki/Linear_combination" title="Linear combination">linear combinations</a>, capturing a form of <a href="/wiki/Compositionality" class="mw-redirect" title="Compositionality">compositionality</a>. For example, in some such models, if <span class="texhtml mvar" style="font-style:italic;">v</span> is the function that maps a word <span class="texhtml mvar" style="font-style:italic;">w</span> to its <span class="texhtml mvar" style="font-style:italic;">n</span>-d vector representation, then
</p><p><div class="mwe-math-element"><div class="mwe-math-mathml-display mwe-math-mathml-a11y" style="display: none;"><math display="block" xmlns="http://www.w3.org/1998/Math/MathML"  alttext="{\displaystyle v(\mathrm {king} )-v(\mathrm {male} )+v(\mathrm {female} )\approx v(\mathrm {queen} )}">
  <semantics>
    <mrow class="MJX-TeXAtom-ORD">
      <mstyle displaystyle="true" scriptlevel="0">
        <mi>v</mi>
        <mo stretchy="false">(</mo>
        <mrow class="MJX-TeXAtom-ORD">
          <mi mathvariant="normal">k</mi>
          <mi mathvariant="normal">i</mi>
          <mi mathvariant="normal">n</mi>
          <mi mathvariant="normal">g</mi>
        </mrow>
        <mo stretchy="false">)</mo>
        <mo>&#x2212;<!-- − --></mo>
        <mi>v</mi>
        <mo stretchy="false">(</mo>
        <mrow class="MJX-TeXAtom-ORD">
          <mi mathvariant="normal">m</mi>
          <mi mathvariant="normal">a</mi>
          <mi mathvariant="normal">l</mi>
          <mi mathvariant="normal">e</mi>
        </mrow>
        <mo stretchy="false">)</mo>
        <mo>+</mo>
        <mi>v</mi>
        <mo stretchy="false">(</mo>
        <mrow class="MJX-TeXAtom-ORD">
          <mi mathvariant="normal">f</mi>
          <mi mathvariant="normal">e</mi>
          <mi mathvariant="normal">m</mi>
          <mi mathvariant="normal">a</mi>
          <mi mathvariant="normal">l</mi>
          <mi mathvariant="normal">e</mi>
        </mrow>
        <mo stretchy="false">)</mo>
        <mo>&#x2248;<!-- ≈ --></mo>
        <mi>v</mi>
        <mo stretchy="false">(</mo>
        <mrow class="MJX-TeXAtom-ORD">
          <mi mathvariant="normal">q</mi>
          <mi mathvariant="normal">u</mi>
          <mi mathvariant="normal">e</mi>
          <mi mathvariant="normal">e</mi>
          <mi mathvariant="normal">n</mi>
        </mrow>
        <mo stretchy="false">)</mo>
      </mstyle>
    </mrow>
    <annotation encoding="application/x-tex">{\displaystyle v(\mathrm {king} )-v(\mathrm {male} )+v(\mathrm {female} )\approx v(\mathrm {queen} )}</annotation>
  </semantics>
</math></div><img src="https://wikimedia.org/api/rest_v1/media/math/render/svg/8b2186a8c7c082260c363b8c16677f39fedaba00" class="mwe-math-fallback-image-display" aria-hidden="true" style="vertical-align: -0.838ex; width:42.202ex; height:2.843ex;" alt="{\displaystyle v(\mathrm {king} )-v(\mathrm {male} )+v(\mathrm {female} )\approx v(\mathrm {queen} )}"></div>
</p><p>where ≈ is made precise by stipulating that its right-hand side must be the <a href="/wiki/Nearest_neighbor_search" title="Nearest neighbor search">nearest neighbor</a> of the value of the left-hand side.<sup id="cite_ref-mikolov_14-2" class="reference"><a href="#cite_note-mikolov-14">&#91;13&#93;</a></sup><sup id="cite_ref-compositionality_15-1" class="reference"><a href="#cite_note-compositionality-15">&#91;14&#93;</a></sup>
</p>
<h3><span class="mw-headline" id="Other">Other</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Language_model&amp;action=edit&amp;section=5" title="Edit section: Other">edit</a><span class="mw-editsection-bracket">]</span></span></h3>
<p>A positional language model<sup id="cite_ref-17" class="reference"><a href="#cite_note-17">&#91;16&#93;</a></sup> assesses the probability of given words occurring close to one another in a text, not necessarily immediately adjacent. Similarly, bag-of-concepts models<sup id="cite_ref-18" class="reference"><a href="#cite_note-18">&#91;17&#93;</a></sup> leverage the semantics associated with multi-word expressions such as <i>buy_christmas_present</i>, even when they are used in information-rich sentences like "today I bought a lot of very nice Christmas presents".
</p><p>Despite the limited successes in using neural networks,<sup id="cite_ref-19" class="reference"><a href="#cite_note-19">&#91;18&#93;</a></sup> authors acknowledge the need for other techniques when modelling sign languages.
</p>
<h2><span class="mw-headline" id="Evaluation_and_benchmarks">Evaluation and benchmarks</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Language_model&amp;action=edit&amp;section=6" title="Edit section: Evaluation and benchmarks">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
<p>Evaluation of the quality of language models is mostly done by comparison to human created sample benchmarks created from typical language-oriented tasks.  Other, less established, quality tests examine the intrinsic character of a language model or compare two such models. Since language models are typically intended to be dynamic and to learn from data it sees, some proposed models investigate the rate of learning, e.g. through inspection of learning curves. <sup id="cite_ref-20" class="reference"><a href="#cite_note-20">&#91;19&#93;</a></sup>
</p><p>Various data sets have been developed to use to evaluate language processing systems.<sup id="cite_ref-:0_13-1" class="reference"><a href="#cite_note-:0-13">&#91;12&#93;</a></sup> These include:
</p>
<ul><li>Corpus of Linguistic Acceptability<sup id="cite_ref-21" class="reference"><a href="#cite_note-21">&#91;20&#93;</a></sup></li>
<li>GLUE benchmark<sup id="cite_ref-22" class="reference"><a href="#cite_note-22">&#91;21&#93;</a></sup></li>
<li>Microsoft Research Paraphrase Corpus<sup id="cite_ref-23" class="reference"><a href="#cite_note-23">&#91;22&#93;</a></sup></li>
<li>Multi-Genre Natural Language Inference</li>
<li>Question Natural Language Inference</li>
<li>Quora Question Pairs<sup id="cite_ref-24" class="reference"><a href="#cite_note-24">&#91;23&#93;</a></sup></li>
<li>Recognizing Textual Entailment<sup id="cite_ref-25" class="reference"><a href="#cite_note-25">&#91;24&#93;</a></sup></li>
<li>Semantic Textual Similarity Benchmark</li>
<li>SQuAD question answering Test<sup id="cite_ref-26" class="reference"><a href="#cite_note-26">&#91;25&#93;</a></sup></li>
<li>Stanford Sentiment <a href="/wiki/Treebank" title="Treebank">Treebank</a><sup id="cite_ref-27" class="reference"><a href="#cite_note-27">&#91;26&#93;</a></sup></li>
<li>Winograd NLI</li>
<li>BoolQ, PIQA, SIQA, HellaSwag, WinoGrande, ARC, OpenBookQA, NaturalQuestions, TriviaQA, RACE, MMLU (Measuring Massive Multitask Language Understanding), BIG-bench hard, GSM8k, RealToxicityPrompts, WinoGender, CrowS-Pairs.<sup id="cite_ref-28" class="reference"><a href="#cite_note-28">&#91;27&#93;</a></sup> (<a rel="nofollow" class="external text" href="https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md">LLaMa Benchmark</a>)</li></ul>
<h2><span class="mw-headline" id="Criticism">Criticism</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Language_model&amp;action=edit&amp;section=7" title="Edit section: Criticism">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
<p>Although contemporary language models, such as <a href="/wiki/Generative_pre-trained_transformer" title="Generative pre-trained transformer">GPTs</a>, can be shown to match human performance on some tasks, it is not clear they are plausible <a href="/wiki/Cognitive_model" title="Cognitive model">cognitive models</a>. For instance, recurrent neural networks have been shown to learn patterns humans do not learn and fail to learn patterns that humans do learn.<sup id="cite_ref-29" class="reference"><a href="#cite_note-29">&#91;28&#93;</a></sup>
</p>
<h2><span class="mw-headline" id="See_also">See also</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Language_model&amp;action=edit&amp;section=8" title="Edit section: See also">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
<style data-mw-deduplicate="TemplateStyles:r1147244281">.mw-parser-output .div-col{margin-top:0.3em;column-width:30em}.mw-parser-output .div-col-small{font-size:90%}.mw-parser-output .div-col-rules{column-rule:1px solid #aaa}.mw-parser-output .div-col dl,.mw-parser-output .div-col ol,.mw-parser-output .div-col ul{margin-top:0}.mw-parser-output .div-col li,.mw-parser-output .div-col dd{page-break-inside:avoid;break-inside:avoid-column}.mw-parser-output .plainlist ol,.mw-parser-output .plainlist ul{line-height:inherit;list-style:none;margin:0}.mw-parser-output .plainlist ol li,.mw-parser-output .plainlist ul li{margin-bottom:0}</style><div class="div-col" style="column-width: 18em;">
<ul><li><a href="/wiki/Cache_language_model" title="Cache language model">Cache language model</a></li>
<li><a href="/wiki/Deep_linguistic_processing" title="Deep linguistic processing">Deep linguistic processing</a></li>
<li><a href="/wiki/Factored_language_model" title="Factored language model">Factored language model</a></li>
<li><a href="/wiki/Generative_pre-trained_transformer" title="Generative pre-trained transformer">Generative pre-trained transformer</a></li>
<li><a href="/wiki/Katz%27s_back-off_model" title="Katz&#39;s back-off model">Katz's back-off model</a></li>
<li><a href="/wiki/Language_technology" title="Language technology">Language technology</a></li>
<li><a href="/wiki/Statistical_model" title="Statistical model">Statistical model</a></li>
<li><a href="/wiki/Ethics_of_artificial_intelligence" title="Ethics of artificial intelligence">Ethics of artificial intelligence</a></li></ul>
</div>
<h2><span class="mw-headline" id="Notes">Notes</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Language_model&amp;action=edit&amp;section=9" title="Edit section: Notes">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
<style data-mw-deduplicate="TemplateStyles:r1011085734">.mw-parser-output .reflist{font-size:90%;margin-bottom:0.5em;list-style-type:decimal}.mw-parser-output .reflist .references{font-size:100%;margin-bottom:0;list-style-type:inherit}.mw-parser-output .reflist-columns-2{column-width:30em}.mw-parser-output .reflist-columns-3{column-width:25em}.mw-parser-output .reflist-columns{margin-top:0.3em}.mw-parser-output .reflist-columns ol{margin-top:0}.mw-parser-output .reflist-columns li{page-break-inside:avoid;break-inside:avoid-column}.mw-parser-output .reflist-upper-alpha{list-style-type:upper-alpha}.mw-parser-output .reflist-upper-roman{list-style-type:upper-roman}.mw-parser-output .reflist-lower-alpha{list-style-type:lower-alpha}.mw-parser-output .reflist-lower-greek{list-style-type:lower-greek}.mw-parser-output .reflist-lower-roman{list-style-type:lower-roman}</style><div class="reflist reflist-lower-alpha">
<div class="mw-references-wrap"><ol class="references">
<li id="cite_note-11"><span class="mw-cite-backlink"><b><a href="#cite_ref-11">^</a></b></span> <span class="reference-text">See <a href="/wiki/Heaps%27_law" title="Heaps&#39; law">Heaps' law</a>.</span>
</li>
</ol></div></div>
<h2><span class="mw-headline" id="References">References</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Language_model&amp;action=edit&amp;section=10" title="Edit section: References">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
<link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1011085734"><div class="reflist">
<div class="mw-references-wrap mw-references-columns"><ol class="references">
<li id="cite_note-1"><span class="mw-cite-backlink"><b><a href="#cite_ref-1">^</a></b></span> <span class="reference-text"><style data-mw-deduplicate="TemplateStyles:r1133582631">.mw-parser-output cite.citation{font-style:inherit;word-wrap:break-word}.mw-parser-output .citation q{quotes:"\"""\"""'""'"}.mw-parser-output .citation:target{background-color:rgba(0,127,255,0.133)}.mw-parser-output .id-lock-free a,.mw-parser-output .citation .cs1-lock-free a{background:url("//upload.wikimedia.org/wikipedia/commons/6/65/Lock-green.svg")right 0.1em center/9px no-repeat}.mw-parser-output .id-lock-limited a,.mw-parser-output .id-lock-registration a,.mw-parser-output .citation .cs1-lock-limited a,.mw-parser-output .citation .cs1-lock-registration a{background:url("//upload.wikimedia.org/wikipedia/commons/d/d6/Lock-gray-alt-2.svg")right 0.1em center/9px no-repeat}.mw-parser-output .id-lock-subscription a,.mw-parser-output .citation .cs1-lock-subscription a{background:url("//upload.wikimedia.org/wikipedia/commons/a/aa/Lock-red-alt-2.svg")right 0.1em center/9px no-repeat}.mw-parser-output .cs1-ws-icon a{background:url("//upload.wikimedia.org/wikipedia/commons/4/4c/Wikisource-logo.svg")right 0.1em center/12px no-repeat}.mw-parser-output .cs1-code{color:inherit;background:inherit;border:none;padding:inherit}.mw-parser-output .cs1-hidden-error{display:none;color:#d33}.mw-parser-output .cs1-visible-error{color:#d33}.mw-parser-output .cs1-maint{display:none;color:#3a3;margin-left:0.3em}.mw-parser-output .cs1-format{font-size:95%}.mw-parser-output .cs1-kern-left{padding-left:0.2em}.mw-parser-output .cs1-kern-right{padding-right:0.2em}.mw-parser-output .citation .mw-selflink{font-weight:inherit}</style><cite id="CITEREFJurafskyMartin2021" class="citation book cs1">Jurafsky, Dan; Martin, James H. (2021). "N-gram Language Models". <a rel="nofollow" class="external text" href="https://web.stanford.edu/~jurafsky/slp3/"><i>Speech and Language Processing</i></a> (3rd&#160;ed.). <a rel="nofollow" class="external text" href="https://web.archive.org/web/20220522005855/https://web.stanford.edu/~jurafsky/slp3/">Archived</a> from the original on 22 May 2022<span class="reference-accessdate">. Retrieved <span class="nowrap">24 May</span> 2022</span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=bookitem&amp;rft.atitle=N-gram+Language+Models&amp;rft.btitle=Speech+and+Language+Processing&amp;rft.edition=3rd&amp;rft.date=2021&amp;rft.aulast=Jurafsky&amp;rft.aufirst=Dan&amp;rft.au=Martin%2C+James+H.&amp;rft_id=https%3A%2F%2Fweb.stanford.edu%2F~jurafsky%2Fslp3%2F&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALanguage+model" class="Z3988"></span></span>
</li>
<li id="cite_note-2"><span class="mw-cite-backlink"><b><a href="#cite_ref-2">^</a></b></span> <span class="reference-text">Kuhn, Roland, and Renato De Mori (1990). <a rel="nofollow" class="external text" href="https://www.researchgate.net/profile/Roland_Kuhn2/publication/3191800_Cache-based_natural_language_model_for_speech_recognition/links/004635184ee5b2c24f000000.pdf">"A cache-based natural language model for speech recognition"</a>. <i>IEEE transactions on pattern analysis and machine intelligence</i> 12.6: 570–583.</span>
</li>
<li id="cite_note-Semantic_parsing_as_machine_translation-3"><span class="mw-cite-backlink">^ <a href="#cite_ref-Semantic_parsing_as_machine_translation_3-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-Semantic_parsing_as_machine_translation_3-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text">Andreas, Jacob, Andreas Vlachos, and Stephen Clark (2013). <a rel="nofollow" class="external text" href="https://www.aclweb.org/anthology/P13-2009">"Semantic parsing as machine translation"</a> <a rel="nofollow" class="external text" href="https://web.archive.org/web/20200815080932/https://www.aclweb.org/anthology/P13-2009/">Archived</a> 15 August 2020 at the <a href="/wiki/Wayback_Machine" title="Wayback Machine">Wayback Machine</a>. Proceedings of the 51st Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers).</span>
</li>
<li id="cite_note-4"><span class="mw-cite-backlink"><b><a href="#cite_ref-4">^</a></b></span> <span class="reference-text">Pham, Vu, et al (2014). <a rel="nofollow" class="external text" href="https://arxiv.org/abs/1312.4569">"Dropout improves recurrent neural networks for handwriting recognition"</a> <a rel="nofollow" class="external text" href="https://web.archive.org/web/20201111170554/https://arxiv.org/abs/1312.4569">Archived</a> 11 November 2020 at the <a href="/wiki/Wayback_Machine" title="Wayback Machine">Wayback Machine</a>. 14th International Conference on Frontiers in Handwriting Recognition. IEEE.</span>
</li>
<li id="cite_note-5"><span class="mw-cite-backlink"><b><a href="#cite_ref-5">^</a></b></span> <span class="reference-text">Htut, Phu Mon, Kyunghyun Cho, and Samuel R. Bowman (2018). <a rel="nofollow" class="external text" href="https://arxiv.org/pdf/1808.10000.pdf?source=post_page---------------------------">"Grammar induction with neural language models: An unusual replication"</a> <a rel="nofollow" class="external text" href="https://web.archive.org/web/20220814010528/https://arxiv.org/pdf/1808.10000.pdf?source=post_page---------------------------">Archived</a> 14 August 2022 at the <a href="/wiki/Wayback_Machine" title="Wayback Machine">Wayback Machine</a>. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<a rel="nofollow" class="external text" href="https://arxiv.org/abs/1808.10000">1808.10000</a>.</span>
</li>
<li id="cite_note-ponte1998-6"><span class="mw-cite-backlink"><b><a href="#cite_ref-ponte1998_6-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1133582631"><cite id="CITEREFPonteCroft1998" class="citation conference cs1">Ponte, Jay M.; Croft, W. Bruce (1998). <i>A language modeling approach to information retrieval</i>. Proceedings of the 21st ACM SIGIR Conference. Melbourne, Australia: ACM. pp.&#160;275–281. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1145%2F290941.291008">10.1145/290941.291008</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=conference&amp;rft.btitle=A+language+modeling+approach+to+information+retrieval&amp;rft.place=Melbourne%2C+Australia&amp;rft.pages=275-281&amp;rft.pub=ACM&amp;rft.date=1998&amp;rft_id=info%3Adoi%2F10.1145%2F290941.291008&amp;rft.aulast=Ponte&amp;rft.aufirst=Jay+M.&amp;rft.au=Croft%2C+W.+Bruce&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALanguage+model" class="Z3988"></span></span>
</li>
<li id="cite_note-hiemstra1998-7"><span class="mw-cite-backlink"><b><a href="#cite_ref-hiemstra1998_7-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1133582631"><cite id="CITEREFHiemstra1998" class="citation conference cs1">Hiemstra, Djoerd (1998). <i>A linguistically motivated probabilistically model of information retrieval</i>. Proceedings of the 2nd European conference on Research and Advanced Technology for Digital Libraries. LNCS, Springer. pp.&#160;569–584. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1007%2F3-540-49653-X_34">10.1007/3-540-49653-X_34</a>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=conference&amp;rft.btitle=A+linguistically+motivated+probabilistically+model+of+information+retrieval&amp;rft.pages=569-584&amp;rft.pub=LNCS%2C+Springer&amp;rft.date=1998&amp;rft_id=info%3Adoi%2F10.1007%2F3-540-49653-X_34&amp;rft.aulast=Hiemstra&amp;rft.aufirst=Djoerd&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALanguage+model" class="Z3988"></span></span>
</li>
<li id="cite_note-Manning-2022-8"><span class="mw-cite-backlink"><b><a href="#cite_ref-Manning-2022_8-0">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1133582631"><cite id="CITEREFManning2022" class="citation journal cs1"><a href="/wiki/Christopher_D._Manning" title="Christopher D. Manning">Manning, Christopher D.</a> (2022). <a rel="nofollow" class="external text" href="https://www.amacad.org/publication/human-language-understanding-reasoning">"Human Language Understanding &amp; Reasoning"</a>. <i>Daedalus</i>. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20230309154322/https://www.amacad.org/publication/human-language-understanding-reasoning">Archived</a> from the original on 9 March 2023<span class="reference-accessdate">. Retrieved <span class="nowrap">10 March</span> 2023</span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=Daedalus&amp;rft.atitle=Human+Language+Understanding+%26+Reasoning&amp;rft.date=2022&amp;rft.aulast=Manning&amp;rft.aufirst=Christopher+D.&amp;rft_id=https%3A%2F%2Fwww.amacad.org%2Fpublication%2Fhuman-language-understanding-reasoning&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALanguage+model" class="Z3988"></span></span>
</li>
<li id="cite_note-N-gram_language_model_jm-9"><span class="mw-cite-backlink">^ <a href="#cite_ref-N-gram_language_model_jm_9-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-N-gram_language_model_jm_9-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1133582631"><cite id="CITEREFJurafskyMartin2023" class="citation book cs1">Jurafsky, Dan; Martin, James H. (7 January 2023). "N-gram Language Models". <a rel="nofollow" class="external text" href="https://web.stanford.edu/~jurafsky/slp3/ed3book_jan72023.pdf"><i>Speech and Language Processing</i></a> <span class="cs1-format">(PDF)</span> (3rd edition draft&#160;ed.)<span class="reference-accessdate">. Retrieved <span class="nowrap">24 May</span> 2022</span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=bookitem&amp;rft.atitle=N-gram+Language+Models&amp;rft.btitle=Speech+and+Language+Processing&amp;rft.edition=3rd+edition+draft&amp;rft.date=2023-01-07&amp;rft.aulast=Jurafsky&amp;rft.aufirst=Dan&amp;rft.au=Martin%2C+James+H.&amp;rft_id=https%3A%2F%2Fweb.stanford.edu%2F~jurafsky%2Fslp3%2Fed3book_jan72023.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALanguage+model" class="Z3988"></span></span>
</li>
<li id="cite_note-10"><span class="mw-cite-backlink"><b><a href="#cite_ref-10">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1133582631"><cite id="CITEREFKarpathy" class="citation web cs1">Karpathy, Andrej. <a rel="nofollow" class="external text" href="https://karpathy.github.io/2015/05/21/rnn-effectiveness/">"The Unreasonable Effectiveness of Recurrent Neural Networks"</a>. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20201101215448/http://karpathy.github.io/2015/05/21/rnn-effectiveness/">Archived</a> from the original on 1 November 2020<span class="reference-accessdate">. Retrieved <span class="nowrap">27 January</span> 2019</span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=The+Unreasonable+Effectiveness+of+Recurrent+Neural+Networks&amp;rft.aulast=Karpathy&amp;rft.aufirst=Andrej&amp;rft_id=https%3A%2F%2Fkarpathy.github.io%2F2015%2F05%2F21%2Frnn-effectiveness%2F&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALanguage+model" class="Z3988"></span></span>
</li>
<li id="cite_note-bengio-12"><span class="mw-cite-backlink">^ <a href="#cite_ref-bengio_12-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-bengio_12-1"><sup><i><b>b</b></i></sup></a> <a href="#cite_ref-bengio_12-2"><sup><i><b>c</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1133582631"><cite id="CITEREFBengio2008" class="citation encyclopaedia cs1">Bengio, Yoshua (2008). <a rel="nofollow" class="external text" href="http://www.scholarpedia.org/article/Neural_net_language_models">"Neural net language models"</a>. <i><a href="/wiki/Scholarpedia" title="Scholarpedia">Scholarpedia</a></i>. Vol.&#160;3. p.&#160;3881. <a href="/wiki/Bibcode_(identifier)" class="mw-redirect" title="Bibcode (identifier)">Bibcode</a>:<a rel="nofollow" class="external text" href="https://ui.adsabs.harvard.edu/abs/2008SchpJ...3.3881B">2008SchpJ...3.3881B</a>. <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<span class="cs1-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://doi.org/10.4249%2Fscholarpedia.3881">10.4249/scholarpedia.3881</a></span>. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20201026161505/http://www.scholarpedia.org/article/Neural_net_language_models">Archived</a> from the original on 26 October 2020<span class="reference-accessdate">. Retrieved <span class="nowrap">28 August</span> 2015</span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=bookitem&amp;rft.atitle=Neural+net+language+models&amp;rft.btitle=Scholarpedia&amp;rft.pages=3881&amp;rft.date=2008&amp;rft_id=info%3Adoi%2F10.4249%2Fscholarpedia.3881&amp;rft_id=info%3Abibcode%2F2008SchpJ...3.3881B&amp;rft.aulast=Bengio&amp;rft.aufirst=Yoshua&amp;rft_id=http%3A%2F%2Fwww.scholarpedia.org%2Farticle%2FNeural_net_language_models&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALanguage+model" class="Z3988"></span></span>
</li>
<li id="cite_note-:0-13"><span class="mw-cite-backlink">^ <a href="#cite_ref-:0_13-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-:0_13-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1133582631"><cite id="CITEREFDevlinChangLeeToutanova2018" class="citation arxiv cs1">Devlin, Jacob; Chang, Ming-Wei; Lee, Kenton; Toutanova, Kristina (10 October 2018). "BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="cs1-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1810.04805">1810.04805</a></span> [<a rel="nofollow" class="external text" href="//arxiv.org/archive/cs.CL">cs.CL</a>].</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=preprint&amp;rft.jtitle=arXiv&amp;rft.atitle=BERT%3A+Pre-training+of+Deep+Bidirectional+Transformers+for+Language+Understanding&amp;rft.date=2018-10-10&amp;rft_id=info%3Aarxiv%2F1810.04805&amp;rft.aulast=Devlin&amp;rft.aufirst=Jacob&amp;rft.au=Chang%2C+Ming-Wei&amp;rft.au=Lee%2C+Kenton&amp;rft.au=Toutanova%2C+Kristina&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALanguage+model" class="Z3988"></span></span>
</li>
<li id="cite_note-mikolov-14"><span class="mw-cite-backlink">^ <a href="#cite_ref-mikolov_14-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-mikolov_14-1"><sup><i><b>b</b></i></sup></a> <a href="#cite_ref-mikolov_14-2"><sup><i><b>c</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1133582631"><cite id="CITEREFMikolovChenCorradoDean2013" class="citation arxiv cs1">Mikolov, Tomas; Chen, Kai; Corrado, Greg; Dean, Jeffrey (2013). "Efficient estimation of word representations in vector space". <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="cs1-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/1301.3781">1301.3781</a></span> [<a rel="nofollow" class="external text" href="//arxiv.org/archive/cs.CL">cs.CL</a>].</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=preprint&amp;rft.jtitle=arXiv&amp;rft.atitle=Efficient+estimation+of+word+representations+in+vector+space&amp;rft.date=2013&amp;rft_id=info%3Aarxiv%2F1301.3781&amp;rft.aulast=Mikolov&amp;rft.aufirst=Tomas&amp;rft.au=Chen%2C+Kai&amp;rft.au=Corrado%2C+Greg&amp;rft.au=Dean%2C+Jeffrey&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALanguage+model" class="Z3988"></span></span>
</li>
<li id="cite_note-compositionality-15"><span class="mw-cite-backlink">^ <a href="#cite_ref-compositionality_15-0"><sup><i><b>a</b></i></sup></a> <a href="#cite_ref-compositionality_15-1"><sup><i><b>b</b></i></sup></a></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1133582631"><cite id="CITEREFMikolovSutskeverChenCorrado_irst4=Greg_S.2013" class="citation conference cs1">Mikolov, Tomas; Sutskever, Ilya; Chen, Kai; Corrado irst4=Greg S.; Dean, Jeff (2013). <a rel="nofollow" class="external text" href="http://papers.nips.cc/paper/5021-distributed-representations-of-words-and-phrases-and-their-compositionality.pdf"><i>Distributed Representations of Words and Phrases and their Compositionality</i></a> <span class="cs1-format">(PDF)</span>. <a href="/wiki/Advances_in_Neural_Information_Processing_Systems" class="mw-redirect" title="Advances in Neural Information Processing Systems">Advances in Neural Information Processing Systems</a>. pp.&#160;3111–3119. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20201029083132/https://papers.nips.cc/paper/5021-distributed-representations-of-words-and-phrases-and-their-compositionality.pdf">Archived</a> <span class="cs1-format">(PDF)</span> from the original on 29 October 2020<span class="reference-accessdate">. Retrieved <span class="nowrap">22 June</span> 2015</span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=conference&amp;rft.btitle=Distributed+Representations+of+Words+and+Phrases+and+their+Compositionality&amp;rft.pages=3111-3119&amp;rft.date=2013&amp;rft.aulast=Mikolov&amp;rft.aufirst=Tomas&amp;rft.au=Sutskever%2C+Ilya&amp;rft.au=Chen%2C+Kai&amp;rft.au=Corrado+irst4%3DGreg+S.&amp;rft.au=Dean%2C+Jeff&amp;rft_id=http%3A%2F%2Fpapers.nips.cc%2Fpaper%2F5021-distributed-representations-of-words-and-phrases-and-their-compositionality.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALanguage+model" class="Z3988"></span></span>
</li>
<li id="cite_note-16"><span class="mw-cite-backlink"><b><a href="#cite_ref-16">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1133582631"><cite id="CITEREFHarris2013" class="citation web cs1">Harris, Derrick (16 August 2013). <a rel="nofollow" class="external text" href="https://gigaom.com/2013/08/16/were-on-the-cusp-of-deep-learning-for-the-masses-you-can-thank-google-later/">"We're on the cusp of deep learning for the masses. You can thank Google later"</a>. <i>Gigaom</i>. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20201111211737/https://gigaom.com/2013/08/16/were-on-the-cusp-of-deep-learning-for-the-masses-you-can-thank-google-later/">Archived</a> from the original on 11 November 2020<span class="reference-accessdate">. Retrieved <span class="nowrap">22 June</span> 2015</span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=Gigaom&amp;rft.atitle=We%27re+on+the+cusp+of+deep+learning+for+the+masses.+You+can+thank+Google+later&amp;rft.date=2013-08-16&amp;rft.aulast=Harris&amp;rft.aufirst=Derrick&amp;rft_id=https%3A%2F%2Fgigaom.com%2F2013%2F08%2F16%2Fwere-on-the-cusp-of-deep-learning-for-the-masses-you-can-thank-google-later%2F&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALanguage+model" class="Z3988"></span></span>
</li>
<li id="cite_note-17"><span class="mw-cite-backlink"><b><a href="#cite_ref-17">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1133582631"><cite id="CITEREFLvZhai2009" class="citation conference cs1">Lv, Yuanhua; Zhai, ChengXiang (2009). <a rel="nofollow" class="external text" href="https://web.archive.org/web/20201124011432/http://times.cs.uiuc.edu/czhai/pub/sigir09-PLM.pdf">"Positional Language Models for Information Retrieval in"</a> <span class="cs1-format">(PDF)</span>. <i>Proceedings</i>. 32nd international ACM SIGIR conference on Research and development in information retrieval (SIGIR). Archived from <a rel="nofollow" class="external text" href="http://times.cs.uiuc.edu/czhai/pub/sigir09-PLM.pdf">the original</a> <span class="cs1-format">(PDF)</span> on 24 November 2020<span class="reference-accessdate">. Retrieved <span class="nowrap">7 April</span> 2012</span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=conference&amp;rft.atitle=Positional+Language+Models+for+Information+Retrieval+in&amp;rft.btitle=Proceedings&amp;rft.date=2009&amp;rft.aulast=Lv&amp;rft.aufirst=Yuanhua&amp;rft.au=Zhai%2C+ChengXiang&amp;rft_id=http%3A%2F%2Ftimes.cs.uiuc.edu%2Fczhai%2Fpub%2Fsigir09-PLM.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALanguage+model" class="Z3988"></span></span>
</li>
<li id="cite_note-18"><span class="mw-cite-backlink"><b><a href="#cite_ref-18">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1133582631"><cite id="CITEREFCambriaHussain2012" class="citation book cs1">Cambria, Erik; Hussain, Amir (28 July 2012). <a rel="nofollow" class="external text" href="https://books.google.com/books?id=NrtcLwEACAAJ"><i>Sentic Computing: Techniques, Tools, and Applications</i></a>. Springer Netherlands. <a href="/wiki/ISBN_(identifier)" class="mw-redirect" title="ISBN (identifier)">ISBN</a>&#160;<a href="/wiki/Special:BookSources/978-94-007-5069-2" title="Special:BookSources/978-94-007-5069-2"><bdi>978-94-007-5069-2</bdi></a>. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20230416160337/https://books.google.com/books?id=NrtcLwEACAAJ">Archived</a> from the original on 16 April 2023<span class="reference-accessdate">. Retrieved <span class="nowrap">25 February</span> 2019</span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=book&amp;rft.btitle=Sentic+Computing%3A+Techniques%2C+Tools%2C+and+Applications&amp;rft.pub=Springer+Netherlands&amp;rft.date=2012-07-28&amp;rft.isbn=978-94-007-5069-2&amp;rft.aulast=Cambria&amp;rft.aufirst=Erik&amp;rft.au=Hussain%2C+Amir&amp;rft_id=https%3A%2F%2Fbooks.google.com%2Fbooks%3Fid%3DNrtcLwEACAAJ&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALanguage+model" class="Z3988"></span></span>
</li>
<li id="cite_note-19"><span class="mw-cite-backlink"><b><a href="#cite_ref-19">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1133582631"><cite id="CITEREFMocialovHastieTurner2018" class="citation journal cs1">Mocialov, Boris; Hastie, Helen; Turner, Graham (August 2018). <a rel="nofollow" class="external text" href="https://www.aclweb.org/anthology/W18-3911/">"Transfer Learning for British Sign Language Modelling"</a>. <i>Proceedings of the Fifth Workshop on NLP for Similar Languages, Varieties and Dialects (VarDial 2018)</i>: 101–110. <a href="/wiki/ArXiv_(identifier)" class="mw-redirect" title="ArXiv (identifier)">arXiv</a>:<span class="cs1-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://arxiv.org/abs/2006.02144">2006.02144</a></span>. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20201205140143/https://www.aclweb.org/anthology/W18-3911/">Archived</a> from the original on 5 December 2020<span class="reference-accessdate">. Retrieved <span class="nowrap">14 March</span> 2020</span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=article&amp;rft.jtitle=Proceedings+of+the+Fifth+Workshop+on+NLP+for+Similar+Languages%2C+Varieties+and+Dialects+%28VarDial+2018%29&amp;rft.atitle=Transfer+Learning+for+British+Sign+Language+Modelling&amp;rft.pages=101-110&amp;rft.date=2018-08&amp;rft_id=info%3Aarxiv%2F2006.02144&amp;rft.aulast=Mocialov&amp;rft.aufirst=Boris&amp;rft.au=Hastie%2C+Helen&amp;rft.au=Turner%2C+Graham&amp;rft_id=https%3A%2F%2Fwww.aclweb.org%2Fanthology%2FW18-3911%2F&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALanguage+model" class="Z3988"></span></span>
</li>
<li id="cite_note-20"><span class="mw-cite-backlink"><b><a href="#cite_ref-20">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1133582631"><cite id="CITEREFKarlgrenSchutze2015" class="citation cs2">Karlgren, Jussi; Schutze, Hinrich (2015), "Evaluating Learning Language Representations", <i>International Conference of the Cross-Language Evaluation Forum</i>, Lecture Notes in Computer Science, Springer International Publishing, pp.&#160;254–260, <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1007%2F978-3-319-64206-2_8">10.1007/978-3-319-64206-2_8</a>, <a href="/wiki/ISBN_(identifier)" class="mw-redirect" title="ISBN (identifier)">ISBN</a>&#160;<a href="/wiki/Special:BookSources/9783319642055" title="Special:BookSources/9783319642055"><bdi>9783319642055</bdi></a></cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=bookitem&amp;rft.atitle=Evaluating+Learning+Language+Representations&amp;rft.btitle=International+Conference+of+the+Cross-Language+Evaluation+Forum&amp;rft.series=Lecture+Notes+in+Computer+Science&amp;rft.pages=254-260&amp;rft.pub=Springer+International+Publishing&amp;rft.date=2015&amp;rft_id=info%3Adoi%2F10.1007%2F978-3-319-64206-2_8&amp;rft.isbn=9783319642055&amp;rft.aulast=Karlgren&amp;rft.aufirst=Jussi&amp;rft.au=Schutze%2C+Hinrich&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALanguage+model" class="Z3988"></span></span>
</li>
<li id="cite_note-21"><span class="mw-cite-backlink"><b><a href="#cite_ref-21">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1133582631"><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://nyu-mll.github.io/CoLA/">"The Corpus of Linguistic Acceptability (CoLA)"</a>. <i>nyu-mll.github.io</i>. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20201207081834/https://nyu-mll.github.io/CoLA/">Archived</a> from the original on 7 December 2020<span class="reference-accessdate">. Retrieved <span class="nowrap">25 February</span> 2019</span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=nyu-mll.github.io&amp;rft.atitle=The+Corpus+of+Linguistic+Acceptability+%28CoLA%29&amp;rft_id=https%3A%2F%2Fnyu-mll.github.io%2FCoLA%2F&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALanguage+model" class="Z3988"></span></span>
</li>
<li id="cite_note-22"><span class="mw-cite-backlink"><b><a href="#cite_ref-22">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1133582631"><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://gluebenchmark.com/">"GLUE Benchmark"</a>. <i>gluebenchmark.com</i>. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20201104161928/https://gluebenchmark.com/">Archived</a> from the original on 4 November 2020<span class="reference-accessdate">. Retrieved <span class="nowrap">25 February</span> 2019</span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=gluebenchmark.com&amp;rft.atitle=GLUE+Benchmark&amp;rft_id=https%3A%2F%2Fgluebenchmark.com%2F&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALanguage+model" class="Z3988"></span></span>
</li>
<li id="cite_note-23"><span class="mw-cite-backlink"><b><a href="#cite_ref-23">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1133582631"><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://www.microsoft.com/en-us/download/details.aspx?id=52398">"Microsoft Research Paraphrase Corpus"</a>. <i>Microsoft Download Center</i>. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20201025121243/https://www.microsoft.com/en-us/download/details.aspx?id=52398">Archived</a> from the original on 25 October 2020<span class="reference-accessdate">. Retrieved <span class="nowrap">25 February</span> 2019</span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=Microsoft+Download+Center&amp;rft.atitle=Microsoft+Research+Paraphrase+Corpus&amp;rft_id=https%3A%2F%2Fwww.microsoft.com%2Fen-us%2Fdownload%2Fdetails.aspx%3Fid%3D52398&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALanguage+model" class="Z3988"></span></span>
</li>
<li id="cite_note-24"><span class="mw-cite-backlink"><b><a href="#cite_ref-24">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1133582631"><cite id="CITEREFAghaebrahimian2017" class="citation cs2">Aghaebrahimian, Ahmad (2017), "Quora Question Answer Dataset", <i>Text, Speech, and Dialogue</i>, Lecture Notes in Computer Science, vol.&#160;10415, Springer International Publishing, pp.&#160;66–73, <a href="/wiki/Doi_(identifier)" class="mw-redirect" title="Doi (identifier)">doi</a>:<a rel="nofollow" class="external text" href="https://doi.org/10.1007%2F978-3-319-64206-2_8">10.1007/978-3-319-64206-2_8</a>, <a href="/wiki/ISBN_(identifier)" class="mw-redirect" title="ISBN (identifier)">ISBN</a>&#160;<a href="/wiki/Special:BookSources/9783319642055" title="Special:BookSources/9783319642055"><bdi>9783319642055</bdi></a></cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=bookitem&amp;rft.atitle=Quora+Question+Answer+Dataset&amp;rft.btitle=Text%2C+Speech%2C+and+Dialogue&amp;rft.series=Lecture+Notes+in+Computer+Science&amp;rft.pages=66-73&amp;rft.pub=Springer+International+Publishing&amp;rft.date=2017&amp;rft_id=info%3Adoi%2F10.1007%2F978-3-319-64206-2_8&amp;rft.isbn=9783319642055&amp;rft.aulast=Aghaebrahimian&amp;rft.aufirst=Ahmad&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALanguage+model" class="Z3988"></span></span>
</li>
<li id="cite_note-25"><span class="mw-cite-backlink"><b><a href="#cite_ref-25">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1133582631"><cite id="CITEREFSammons,_V.G.Vinod_Vydiswaran,_Dan_RothVydiswaranRoth" class="citation web cs1">Sammons, V.G.Vinod Vydiswaran, Dan Roth, Mark; Vydiswaran, V.G.; Roth, Dan. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20170809113438/http://l2r.cs.uiuc.edu/~danr/Teaching/CS546-12/TeChapter.pdf">"Recognizing Textual Entailment"</a> <span class="cs1-format">(PDF)</span>. Archived from <a rel="nofollow" class="external text" href="http://l2r.cs.uiuc.edu/~danr/Teaching/CS546-12/TeChapter.pdf">the original</a> <span class="cs1-format">(PDF)</span> on 9 August 2017<span class="reference-accessdate">. Retrieved <span class="nowrap">24 February</span> 2019</span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=unknown&amp;rft.btitle=Recognizing+Textual+Entailment&amp;rft.aulast=Sammons%2C+V.G.Vinod+Vydiswaran%2C+Dan+Roth&amp;rft.aufirst=Mark&amp;rft.au=Vydiswaran%2C+V.G.&amp;rft.au=Roth%2C+Dan&amp;rft_id=http%3A%2F%2Fl2r.cs.uiuc.edu%2F~danr%2FTeaching%2FCS546-12%2FTeChapter.pdf&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALanguage+model" class="Z3988"></span><span class="cs1-maint citation-comment"><code class="cs1-code">{{<a href="/wiki/Template:Cite_web" title="Template:Cite web">cite web</a>}}</code>:  CS1 maint: multiple names: authors list (<a href="/wiki/Category:CS1_maint:_multiple_names:_authors_list" title="Category:CS1 maint: multiple names: authors list">link</a>)</span></span>
</li>
<li id="cite_note-26"><span class="mw-cite-backlink"><b><a href="#cite_ref-26">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1133582631"><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://rajpurkar.github.io/SQuAD-explorer/">"The Stanford Question Answering Dataset"</a>. <i>rajpurkar.github.io</i>. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20201030072130/https://rajpurkar.github.io/SQuAD-explorer/">Archived</a> from the original on 30 October 2020<span class="reference-accessdate">. Retrieved <span class="nowrap">25 February</span> 2019</span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=rajpurkar.github.io&amp;rft.atitle=The+Stanford+Question+Answering+Dataset&amp;rft_id=https%3A%2F%2Frajpurkar.github.io%2FSQuAD-explorer%2F&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALanguage+model" class="Z3988"></span></span>
</li>
<li id="cite_note-27"><span class="mw-cite-backlink"><b><a href="#cite_ref-27">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1133582631"><cite class="citation web cs1"><a rel="nofollow" class="external text" href="https://nlp.stanford.edu/sentiment/treebank.html">"Recursive Deep Models for Semantic Compositionality Over a Sentiment Treebank"</a>. <i>nlp.stanford.edu</i>. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20201027125825/https://nlp.stanford.edu/sentiment/treebank.html">Archived</a> from the original on 27 October 2020<span class="reference-accessdate">. Retrieved <span class="nowrap">25 February</span> 2019</span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&amp;rft.genre=unknown&amp;rft.jtitle=nlp.stanford.edu&amp;rft.atitle=Recursive+Deep+Models+for+Semantic+Compositionality+Over+a+Sentiment+Treebank&amp;rft_id=https%3A%2F%2Fnlp.stanford.edu%2Fsentiment%2Ftreebank.html&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALanguage+model" class="Z3988"></span></span>
</li>
<li id="cite_note-28"><span class="mw-cite-backlink"><b><a href="#cite_ref-28">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1133582631"><cite id="CITEREFHendrycks2023" class="citation cs2">Hendrycks, Dan (14 March 2023), <a rel="nofollow" class="external text" href="https://github.com/hendrycks/test"><i>Measuring Massive Multitask Language Understanding</i></a>, <a rel="nofollow" class="external text" href="https://web.archive.org/web/20230315011614/https://github.com/hendrycks/test">archived</a> from the original on 15 March 2023<span class="reference-accessdate">, retrieved <span class="nowrap">15 March</span> 2023</span></cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=book&amp;rft.btitle=Measuring+Massive+Multitask+Language+Understanding&amp;rft.date=2023-03-14&amp;rft.aulast=Hendrycks&amp;rft.aufirst=Dan&amp;rft_id=https%3A%2F%2Fgithub.com%2Fhendrycks%2Ftest&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALanguage+model" class="Z3988"></span></span>
</li>
<li id="cite_note-29"><span class="mw-cite-backlink"><b><a href="#cite_ref-29">^</a></b></span> <span class="reference-text"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1133582631"><cite id="CITEREFHornsteinLasnikPatel-GroszYang2018" class="citation book cs1">Hornstein, Norbert; Lasnik, Howard; Patel-Grosz, Pritty; Yang, Charles (9 January 2018). <a rel="nofollow" class="external text" href="https://books.google.com/books?id=XoxsDwAAQBAJ&amp;dq=adger+%22goldilocks%22&amp;pg=PA153"><i>Syntactic Structures after 60 Years: The Impact of the Chomskyan Revolution in Linguistics</i></a>. Walter de Gruyter GmbH &amp; Co KG. <a href="/wiki/ISBN_(identifier)" class="mw-redirect" title="ISBN (identifier)">ISBN</a>&#160;<a href="/wiki/Special:BookSources/978-1-5015-0692-5" title="Special:BookSources/978-1-5015-0692-5"><bdi>978-1-5015-0692-5</bdi></a>. <a rel="nofollow" class="external text" href="https://web.archive.org/web/20230416160343/https://books.google.com/books?id=XoxsDwAAQBAJ&amp;dq=adger+%22goldilocks%22&amp;pg=PA153">Archived</a> from the original on 16 April 2023<span class="reference-accessdate">. Retrieved <span class="nowrap">11 December</span> 2021</span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=book&amp;rft.btitle=Syntactic+Structures+after+60+Years%3A+The+Impact+of+the+Chomskyan+Revolution+in+Linguistics&amp;rft.pub=Walter+de+Gruyter+GmbH+%26+Co+KG&amp;rft.date=2018-01-09&amp;rft.isbn=978-1-5015-0692-5&amp;rft.aulast=Hornstein&amp;rft.aufirst=Norbert&amp;rft.au=Lasnik%2C+Howard&amp;rft.au=Patel-Grosz%2C+Pritty&amp;rft.au=Yang%2C+Charles&amp;rft_id=https%3A%2F%2Fbooks.google.com%2Fbooks%3Fid%3DXoxsDwAAQBAJ%26dq%3Dadger%2B%2522goldilocks%2522%26pg%3DPA153&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALanguage+model" class="Z3988"></span></span>
</li>
</ol></div></div>
<h2><span class="mw-headline" id="Further_reading">Further reading</span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/w/index.php?title=Language_model&amp;action=edit&amp;section=11" title="Edit section: Further reading">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
<style data-mw-deduplicate="TemplateStyles:r1054258005">.mw-parser-output .refbegin{font-size:90%;margin-bottom:0.5em}.mw-parser-output .refbegin-hanging-indents>ul{margin-left:0}.mw-parser-output .refbegin-hanging-indents>ul>li{margin-left:0;padding-left:3.2em;text-indent:-3.2em}.mw-parser-output .refbegin-hanging-indents ul,.mw-parser-output .refbegin-hanging-indents ul li{list-style:none}@media(max-width:720px){.mw-parser-output .refbegin-hanging-indents>ul>li{padding-left:1.6em;text-indent:-1.6em}}.mw-parser-output .refbegin-columns{margin-top:0.3em}.mw-parser-output .refbegin-columns ul{margin-top:0}.mw-parser-output .refbegin-columns li{page-break-inside:avoid;break-inside:avoid-column}</style><div class="refbegin" style="">
<ul><li><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1133582631"><cite id="CITEREFJ_M_PonteW_B_Croft1998" class="citation conference cs1">J M Ponte; W B Croft (1998). "A Language Modeling Approach to Information Retrieval". <i>Research and Development in Information Retrieval</i>. pp.&#160;275–281. <a href="/wiki/CiteSeerX_(identifier)" class="mw-redirect" title="CiteSeerX (identifier)">CiteSeerX</a>&#160;<span class="cs1-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.117.4237">10.1.1.117.4237</a></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=conference&amp;rft.atitle=A+Language+Modeling+Approach+to+Information+Retrieval&amp;rft.btitle=Research+and+Development+in+Information+Retrieval&amp;rft.pages=275-281&amp;rft.date=1998&amp;rft_id=https%3A%2F%2Fciteseerx.ist.psu.edu%2Fviewdoc%2Fsummary%3Fdoi%3D10.1.1.117.4237%23id-name%3DCiteSeerX&amp;rft.au=J+M+Ponte&amp;rft.au=W+B+Croft&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALanguage+model" class="Z3988"></span></li>
<li><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1133582631"><cite id="CITEREFF_SongW_B_Croft1999" class="citation conference cs1">F Song; W B Croft (1999). "A General Language Model for Information Retrieval". <i>Research and Development in Information Retrieval</i>. pp.&#160;279–280. <a href="/wiki/CiteSeerX_(identifier)" class="mw-redirect" title="CiteSeerX (identifier)">CiteSeerX</a>&#160;<span class="cs1-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.21.6467">10.1.1.21.6467</a></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=conference&amp;rft.atitle=A+General+Language+Model+for+Information+Retrieval&amp;rft.btitle=Research+and+Development+in+Information+Retrieval&amp;rft.pages=279-280&amp;rft.date=1999&amp;rft_id=https%3A%2F%2Fciteseerx.ist.psu.edu%2Fviewdoc%2Fsummary%3Fdoi%3D10.1.1.21.6467%23id-name%3DCiteSeerX&amp;rft.au=F+Song&amp;rft.au=W+B+Croft&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALanguage+model" class="Z3988"></span></li>
<li><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1133582631"><cite id="CITEREFChenJoshua_Goodman1998" class="citation techreport cs1">Chen, Stanley; Joshua Goodman (1998). <i>An Empirical Study of Smoothing Techniques for Language Modeling</i> (Technical report). Harvard University. <a href="/wiki/CiteSeerX_(identifier)" class="mw-redirect" title="CiteSeerX (identifier)">CiteSeerX</a>&#160;<span class="cs1-lock-free" title="Freely accessible"><a rel="nofollow" class="external text" href="https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.131.5458">10.1.1.131.5458</a></span>.</cite><span title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&amp;rft.genre=report&amp;rft.btitle=An+Empirical+Study+of+Smoothing+Techniques+for+Language+Modeling&amp;rft.pub=Harvard+University&amp;rft.date=1998&amp;rft_id=https%3A%2F%2Fciteseerx.ist.psu.edu%2Fviewdoc%2Fsummary%3Fdoi%3D10.1.1.131.5458%23id-name%3DCiteSeerX&amp;rft.aulast=Chen&amp;rft.aufirst=Stanley&amp;rft.au=Joshua+Goodman&amp;rfr_id=info%3Asid%2Fen.wikipedia.org%3ALanguage+model" class="Z3988"></span></li></ul>
</div>
<div class="navbox-styles"><style data-mw-deduplicate="TemplateStyles:r1129693374">.mw-parser-output .hlist dl,.mw-parser-output .hlist ol,.mw-parser-output .hlist ul{margin:0;padding:0}.mw-parser-output .hlist dd,.mw-parser-output .hlist dt,.mw-parser-output .hlist li{margin:0;display:inline}.mw-parser-output .hlist.inline,.mw-parser-output .hlist.inline dl,.mw-parser-output .hlist.inline ol,.mw-parser-output .hlist.inline ul,.mw-parser-output .hlist dl dl,.mw-parser-output .hlist dl ol,.mw-parser-output .hlist dl ul,.mw-parser-output .hlist ol dl,.mw-parser-output .hlist ol ol,.mw-parser-output .hlist ol ul,.mw-parser-output .hlist ul dl,.mw-parser-output .hlist ul ol,.mw-parser-output .hlist ul ul{display:inline}.mw-parser-output .hlist .mw-empty-li{display:none}.mw-parser-output .hlist dt::after{content:": "}.mw-parser-output .hlist dd::after,.mw-parser-output .hlist li::after{content:" · ";font-weight:bold}.mw-parser-output .hlist dd:last-child::after,.mw-parser-output .hlist dt:last-child::after,.mw-parser-output .hlist li:last-child::after{content:none}.mw-parser-output .hlist dd dd:first-child::before,.mw-parser-output .hlist dd dt:first-child::before,.mw-parser-output .hlist dd li:first-child::before,.mw-parser-output .hlist dt dd:first-child::before,.mw-parser-output .hlist dt dt:first-child::before,.mw-parser-output .hlist dt li:first-child::before,.mw-parser-output .hlist li dd:first-child::before,.mw-parser-output .hlist li dt:first-child::before,.mw-parser-output .hlist li li:first-child::before{content:" (";font-weight:normal}.mw-parser-output .hlist dd dd:last-child::after,.mw-parser-output .hlist dd dt:last-child::after,.mw-parser-output .hlist dd li:last-child::after,.mw-parser-output .hlist dt dd:last-child::after,.mw-parser-output .hlist dt dt:last-child::after,.mw-parser-output .hlist dt li:last-child::after,.mw-parser-output .hlist li dd:last-child::after,.mw-parser-output .hlist li dt:last-child::after,.mw-parser-output .hlist li li:last-child::after{content:")";font-weight:normal}.mw-parser-output .hlist ol{counter-reset:listitem}.mw-parser-output .hlist ol>li{counter-increment:listitem}.mw-parser-output .hlist ol>li::before{content:" "counter(listitem)"\a0 "}.mw-parser-output .hlist dd ol>li:first-child::before,.mw-parser-output .hlist dt ol>li:first-child::before,.mw-parser-output .hlist li ol>li:first-child::before{content:" ("counter(listitem)"\a0 "}</style><style data-mw-deduplicate="TemplateStyles:r1061467846">.mw-parser-output .navbox{box-sizing:border-box;border:1px solid #a2a9b1;width:100%;clear:both;font-size:88%;text-align:center;padding:1px;margin:1em auto 0}.mw-parser-output .navbox .navbox{margin-top:0}.mw-parser-output .navbox+.navbox,.mw-parser-output .navbox+.navbox-styles+.navbox{margin-top:-1px}.mw-parser-output .navbox-inner,.mw-parser-output .navbox-subgroup{width:100%}.mw-parser-output .navbox-group,.mw-parser-output .navbox-title,.mw-parser-output .navbox-abovebelow{padding:0.25em 1em;line-height:1.5em;text-align:center}.mw-parser-output .navbox-group{white-space:nowrap;text-align:right}.mw-parser-output .navbox,.mw-parser-output .navbox-subgroup{background-color:#fdfdfd}.mw-parser-output .navbox-list{line-height:1.5em;border-color:#fdfdfd}.mw-parser-output .navbox-list-with-group{text-align:left;border-left-width:2px;border-left-style:solid}.mw-parser-output tr+tr>.navbox-abovebelow,.mw-parser-output tr+tr>.navbox-group,.mw-parser-output tr+tr>.navbox-image,.mw-parser-output tr+tr>.navbox-list{border-top:2px solid #fdfdfd}.mw-parser-output .navbox-title{background-color:#ccf}.mw-parser-output .navbox-abovebelow,.mw-parser-output .navbox-group,.mw-parser-output .navbox-subgroup .navbox-title{background-color:#ddf}.mw-parser-output .navbox-subgroup .navbox-group,.mw-parser-output .navbox-subgroup .navbox-abovebelow{background-color:#e6e6ff}.mw-parser-output .navbox-even{background-color:#f7f7f7}.mw-parser-output .navbox-odd{background-color:transparent}.mw-parser-output .navbox .hlist td dl,.mw-parser-output .navbox .hlist td ol,.mw-parser-output .navbox .hlist td ul,.mw-parser-output .navbox td.hlist dl,.mw-parser-output .navbox td.hlist ol,.mw-parser-output .navbox td.hlist ul{padding:0.125em 0}.mw-parser-output .navbox .navbar{display:block;font-size:100%}.mw-parser-output .navbox-title .navbar{float:left;text-align:left;margin-right:0.5em}</style></div><div role="navigation" class="navbox" aria-labelledby="Natural_language_processing" style="padding:3px"><table class="nowraplinks hlist mw-collapsible mw-collapsed navbox-inner" style="border-spacing:0;background:transparent;color:inherit"><tbody><tr><th scope="col" class="navbox-title" colspan="2"><link rel="mw-deduplicated-inline-style" href="mw-data:TemplateStyles:r1129693374"><style data-mw-deduplicate="TemplateStyles:r1063604349">.mw-parser-output .navbar{display:inline;font-size:88%;font-weight:normal}.mw-parser-output .navbar-collapse{float:left;text-align:left}.mw-parser-output .navbar-boxtext{word-spacing:0}.mw-parser-output .navbar ul{display:inline-block;white-space:nowrap;line-height:inherit}.mw-parser-output .navbar-brackets::before{margin-right:-0.125em;content:"[ "}.mw-parser-output .navbar-brackets::after{margin-left:-0.125em;content:" ]"}.mw-parser-output .navbar li{word-spacing:-0.125em}.mw-parser-output .navbar a>span,.mw-parser-output .navbar a>abbr{text-decoration:inherit}.mw-parser-output .navbar-mini abbr{font-variant:small-caps;border-bottom:none;text-decoration:none;cursor:inherit}.mw-parser-output .navbar-ct-full{font-size:114%;margin:0 7em}.mw-parser-output .navbar-ct-mini{font-size:114%;margin:0 4em}</style><div class="navbar plainlinks hlist navbar-mini"><ul><li class="nv-view"><a href="/wiki/Template:Natural_language_processing" title="Template:Natural language processing"><abbr title="View this template" style=";;background:none transparent;border:none;box-shadow:none;padding:0;">v</abbr></a></li><li class="nv-talk"><a href="/wiki/Template_talk:Natural_language_processing" title="Template talk:Natural language processing"><abbr title="Discuss this template" style=";;background:none transparent;border:none;box-shadow:none;padding:0;">t</abbr></a></li><li class="nv-edit"><a class="external text" href="https://en.wikipedia.org/w/index.php?title=Template:Natural_language_processing&amp;action=edit"><abbr title="Edit this template" style=";;background:none transparent;border:none;box-shadow:none;padding:0;">e</abbr></a></li></ul></div><div id="Natural_language_processing" style="font-size:114%;margin:0 4em"><a href="/wiki/Natural_language_processing" title="Natural language processing">Natural language processing</a></div></th></tr><tr><th scope="row" class="navbox-group" style="width:1%">General terms</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em">
<ul><li><a href="/wiki/AI-complete" title="AI-complete">AI-complete</a></li>
<li><a href="/wiki/Bag-of-words_model" title="Bag-of-words model">Bag-of-words</a></li>
<li><a href="/wiki/N-gram" title="N-gram">n-gram</a>
<ul><li><a href="/wiki/Bigram" title="Bigram">Bigram</a></li>
<li><a href="/wiki/Trigram" title="Trigram">Trigram</a></li></ul></li>
<li><a href="/wiki/Computational_linguistics" title="Computational linguistics">Computational linguistics</a></li>
<li><a href="/wiki/Natural-language_understanding" title="Natural-language understanding">Natural-language understanding</a></li>
<li><a href="/wiki/Stop_word" title="Stop word">Stop words</a></li>
<li><a href="/wiki/Text_processing" title="Text processing">Text processing</a></li></ul>
</div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Text_mining" title="Text mining">Text analysis</a></th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em">
<ul><li><a href="/wiki/Collocation_extraction" title="Collocation extraction">Collocation extraction</a></li>
<li><a href="/wiki/Concept_mining" title="Concept mining">Concept mining</a></li>
<li><a href="/wiki/Coreference#Coreference_resolution" title="Coreference">Coreference resolution</a></li>
<li><a href="/wiki/Deep_linguistic_processing" title="Deep linguistic processing">Deep linguistic processing</a></li>
<li><a href="/wiki/Distant_reading" title="Distant reading">Distant reading</a></li>
<li><a href="/wiki/Information_extraction" title="Information extraction">Information extraction</a></li>
<li><a href="/wiki/Named-entity_recognition" title="Named-entity recognition">Named-entity recognition</a></li>
<li><a href="/wiki/Ontology_learning" title="Ontology learning">Ontology learning</a></li>
<li><a href="/wiki/Parsing" title="Parsing">Parsing</a></li>
<li><a href="/wiki/Part-of-speech_tagging" title="Part-of-speech tagging">Part-of-speech tagging</a></li>
<li><a href="/wiki/Semantic_role_labeling" title="Semantic role labeling">Semantic role labeling</a></li>
<li><a href="/wiki/Semantic_similarity" title="Semantic similarity">Semantic similarity</a></li>
<li><a href="/wiki/Sentiment_analysis" title="Sentiment analysis">Sentiment analysis</a></li>
<li><a href="/wiki/Terminology_extraction" title="Terminology extraction">Terminology extraction</a></li>
<li><a href="/wiki/Text_mining" title="Text mining">Text mining</a></li>
<li><a href="/wiki/Textual_entailment" title="Textual entailment">Textual entailment</a></li>
<li><a href="/wiki/Truecasing" title="Truecasing">Truecasing</a></li>
<li><a href="/wiki/Word-sense_disambiguation" title="Word-sense disambiguation">Word-sense disambiguation</a></li>
<li><a href="/wiki/Word-sense_induction" title="Word-sense induction">Word-sense induction</a></li></ul>
</div><table class="nowraplinks navbox-subgroup" style="border-spacing:0"><tbody><tr><th id="Text_segmentation" scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Text_segmentation" title="Text segmentation">Text segmentation</a></th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em">
<ul><li><a href="/wiki/Compound-term_processing" title="Compound-term processing">Compound-term processing</a></li>
<li><a href="/wiki/Lemmatisation" title="Lemmatisation">Lemmatisation</a></li>
<li><a href="/wiki/Lexical_analysis" title="Lexical analysis">Lexical analysis</a></li>
<li><a href="/wiki/Shallow_parsing" title="Shallow parsing">Text chunking</a></li>
<li><a href="/wiki/Stemming" title="Stemming">Stemming</a></li>
<li><a href="/wiki/Sentence_boundary_disambiguation" title="Sentence boundary disambiguation">Sentence segmentation</a></li>
<li><a href="/wiki/Word#Word_boundaries" title="Word">Word segmentation</a></li></ul>
</div></td></tr></tbody></table><div>
</div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Automatic_summarization" title="Automatic summarization">Automatic summarization</a></th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em">
<ul><li><a href="/wiki/Multi-document_summarization" title="Multi-document summarization">Multi-document summarization</a></li>
<li><a href="/wiki/Sentence_extraction" title="Sentence extraction">Sentence extraction</a></li>
<li><a href="/wiki/Text_simplification" title="Text simplification">Text simplification</a></li></ul>
</div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Machine_translation" title="Machine translation">Machine translation</a></th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em">
<ul><li><a href="/wiki/Computer-assisted_translation" title="Computer-assisted translation">Computer-assisted</a></li>
<li><a href="/wiki/Example-based_machine_translation" title="Example-based machine translation">Example-based</a></li>
<li><a href="/wiki/Rule-based_machine_translation" title="Rule-based machine translation">Rule-based</a></li>
<li><a href="/wiki/Statistical_machine_translation" title="Statistical machine translation">Statistical</a></li>
<li><a href="/wiki/Transfer-based_machine_translation" title="Transfer-based machine translation">Transfer-based</a></li>
<li><a href="/wiki/Neural_machine_translation" title="Neural machine translation">Neural</a></li></ul>
</div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Distributional_semantics" title="Distributional semantics">Distributional semantics</a> models</th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em">
<ul><li><a href="/wiki/BERT_(language_model)" title="BERT (language model)">BERT</a></li>
<li><a href="/wiki/Document-term_matrix" title="Document-term matrix">Document-term matrix</a></li>
<li><a href="/wiki/Explicit_semantic_analysis" title="Explicit semantic analysis">Explicit semantic analysis</a></li>
<li><a href="/wiki/FastText" title="FastText">fastText</a></li>
<li><a href="/wiki/GloVe" title="GloVe">GloVe</a></li>
<li><a class="mw-selflink selflink">Language model</a> (<a href="/wiki/Large_language_model" title="Large language model">large</a>)</li>
<li><a href="/wiki/Latent_semantic_analysis" title="Latent semantic analysis">Latent semantic analysis</a></li>
<li><a href="/wiki/Seq2seq" title="Seq2seq">Seq2seq</a></li>
<li><a href="/wiki/Word_embedding" title="Word embedding">Word embedding</a></li>
<li><a href="/wiki/Word2vec" title="Word2vec">Word2vec</a></li></ul>
</div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Language_resource" title="Language resource">Language resources</a>,<br />datasets and corpora</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em"></div><table class="nowraplinks navbox-subgroup" style="border-spacing:0"><tbody><tr><th scope="row" class="navbox-group" style="width:1%">Types and<br />standards</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em">
<ul><li><a href="/wiki/Corpus_linguistics" title="Corpus linguistics">Corpus linguistics</a></li>
<li><a href="/wiki/Lexical_resource" title="Lexical resource">Lexical resource</a></li>
<li><a href="/wiki/Linguistic_Linked_Open_Data" title="Linguistic Linked Open Data">Linguistic Linked Open Data</a></li>
<li><a href="/wiki/Machine-readable_dictionary" title="Machine-readable dictionary">Machine-readable dictionary</a></li>
<li><a href="/wiki/Parallel_text" title="Parallel text">Parallel text</a></li>
<li><a href="/wiki/PropBank" title="PropBank">PropBank</a></li>
<li><a href="/wiki/Semantic_network" title="Semantic network">Semantic network</a></li>
<li><a href="/wiki/Simple_Knowledge_Organization_System" title="Simple Knowledge Organization System">Simple Knowledge Organization System</a></li>
<li><a href="/wiki/Speech_corpus" title="Speech corpus">Speech corpus</a></li>
<li><a href="/wiki/Text_corpus" title="Text corpus">Text corpus</a></li>
<li><a href="/wiki/Thesaurus_(information_retrieval)" title="Thesaurus (information retrieval)">Thesaurus (information retrieval)</a></li>
<li><a href="/wiki/Treebank" title="Treebank">Treebank</a></li>
<li><a href="/wiki/Universal_Dependencies" title="Universal Dependencies">Universal Dependencies</a></li></ul>
</div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Data</th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em">
<ul><li><a href="/wiki/BabelNet" title="BabelNet">BabelNet</a></li>
<li><a href="/wiki/Bank_of_English" title="Bank of English">Bank of English</a></li>
<li><a href="/wiki/DBpedia" title="DBpedia">DBpedia</a></li>
<li><a href="/wiki/FrameNet" title="FrameNet">FrameNet</a></li>
<li><a href="/wiki/Google_Ngram_Viewer" title="Google Ngram Viewer">Google Ngram Viewer</a></li>
<li><a href="/wiki/UBY" title="UBY">UBY</a></li>
<li><a href="/wiki/WordNet" title="WordNet">WordNet</a></li></ul>
</div></td></tr></tbody></table><div></div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Automatic_identification_and_data_capture" title="Automatic identification and data capture">Automatic identification<br />and data capture</a></th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em">
<ul><li><a href="/wiki/Speech_recognition" title="Speech recognition">Speech recognition</a></li>
<li><a href="/wiki/Speech_segmentation" title="Speech segmentation">Speech segmentation</a></li>
<li><a href="/wiki/Speech_synthesis" title="Speech synthesis">Speech synthesis</a></li>
<li><a href="/wiki/Natural_language_generation" title="Natural language generation">Natural language generation</a></li>
<li><a href="/wiki/Optical_character_recognition" title="Optical character recognition">Optical character recognition</a></li></ul>
</div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Topic_model" title="Topic model">Topic model</a></th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em">
<ul><li><a href="/wiki/Document_classification" title="Document classification">Document classification</a></li>
<li><a href="/wiki/Latent_Dirichlet_allocation" title="Latent Dirichlet allocation">Latent Dirichlet allocation</a></li>
<li><a href="/wiki/Pachinko_allocation" title="Pachinko allocation">Pachinko allocation</a></li></ul>
</div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Computer-assisted_reviewing" title="Computer-assisted reviewing">Computer-assisted<br />reviewing</a></th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em">
<ul><li><a href="/wiki/Automated_essay_scoring" title="Automated essay scoring">Automated essay scoring</a></li>
<li><a href="/wiki/Concordancer" title="Concordancer">Concordancer</a></li>
<li><a href="/wiki/Grammar_checker" title="Grammar checker">Grammar checker</a></li>
<li><a href="/wiki/Predictive_text" title="Predictive text">Predictive text</a></li>
<li><a href="/wiki/Pronunciation_assessment" title="Pronunciation assessment">Pronunciation assessment</a></li>
<li><a href="/wiki/Spell_checker" title="Spell checker">Spell checker</a></li>
<li><a href="/wiki/Syntax_guessing" class="mw-redirect" title="Syntax guessing">Syntax guessing</a></li></ul>
</div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%"><a href="/wiki/Natural_language_user_interface" class="mw-redirect" title="Natural language user interface">Natural language<br />user interface</a></th><td class="navbox-list-with-group navbox-list navbox-even" style="width:100%;padding:0"><div style="padding:0 0.25em">
<ul><li><a href="/wiki/Chatbot" title="Chatbot">Chatbot</a></li>
<li><a href="/wiki/Interactive_fiction" title="Interactive fiction">Interactive fiction</a></li>
<li><a href="/wiki/Question_answering" title="Question answering">Question answering</a></li>
<li><a href="/wiki/Virtual_assistant" title="Virtual assistant">Virtual assistant</a></li>
<li><a href="/wiki/Voice_user_interface" title="Voice user interface">Voice user interface</a></li></ul>
</div></td></tr><tr><th scope="row" class="navbox-group" style="width:1%">Related</th><td class="navbox-list-with-group navbox-list navbox-odd" style="width:100%;padding:0"><div style="padding:0 0.25em">
<ul><li><a href="/wiki/Hallucination_(artificial_intelligence)" title="Hallucination (artificial intelligence)">Hallucination</a></li>
<li><a href="/wiki/Natural_Language_Toolkit" title="Natural Language Toolkit">Natural Language Toolkit</a></li>
<li><a href="/wiki/SpaCy" title="SpaCy">spaCy</a></li></ul>
</div></td></tr></tbody></table></div>
<!-- 
NewPP limit report
Parsed by mw1434
Cached time: 20230703122133
Cache expiry: 1814400
Reduced expiry: false
Complications: [vary‐revision‐sha1, show‐toc]
CPU time usage: 0.601 seconds
Real time usage: 0.782 seconds
Preprocessor visited node count: 3779/1000000
Post‐expand include size: 94931/2097152 bytes
Template argument size: 2539/2097152 bytes
Highest expansion depth: 12/100
Expensive parser function count: 5/500
Unstrip recursion depth: 1/20
Unstrip post‐expand size: 94053/5000000 bytes
Lua time usage: 0.376/10.000 seconds
Lua memory usage: 7550992/52428800 bytes
Number of Wikibase entities loaded: 0/400
-->
<!--
Transclusion expansion time report (%,ms,calls,template)
100.00%  636.197      1 -total
 40.66%  258.652      2 Template:Reflist
 15.04%   95.672      4 Template:Cite_book
 14.96%   95.144      3 Template:Navbox
 14.71%   93.569      1 Template:Natural_language_processing
 14.27%   90.760      1 Template:Excerpt
  8.97%   57.037      1 Template:Short_description
  5.61%   35.674      6 Template:Cite_conference
  5.24%   33.342      8 Template:Cite_web
  4.77%   30.331      5 Template:R
-->

<!-- Saved in parser cache with key enwiki:pcache:idhash:1911810-0!canonical and timestamp 20230703122132 and revision id 1161280651. Rendering was triggered because: page-view
 -->
</div><!--esi <esi:include src="/esitest-fa8a495983347898/content" /> --><noscript><img src="//en.wikipedia.org/wiki/Special:CentralAutoLogin/start?type=1x1" alt="" title="" width="1" height="1" style="border: none; position: absolute;" /></noscript>
<div class="printfooter" data-nosnippet="">Retrieved from "<a dir="ltr" href="https://en.wikipedia.org/w/index.php?title=Language_model&amp;oldid=1161280651">https://en.wikipedia.org/w/index.php?title=Language_model&amp;oldid=1161280651</a>"</div></div>
					<div id="catlinks" class="catlinks" data-mw="interface"><div id="mw-normal-catlinks" class="mw-normal-catlinks"><a href="/wiki/Help:Category" title="Help:Category">Categories</a>: <ul><li><a href="/wiki/Category:Language_modeling" title="Category:Language modeling">Language modeling</a></li><li><a href="/wiki/Category:Statistical_natural_language_processing" title="Category:Statistical natural language processing">Statistical natural language processing</a></li><li><a href="/wiki/Category:Markov_models" title="Category:Markov models">Markov models</a></li></ul></div><div id="mw-hidden-catlinks" class="mw-hidden-catlinks mw-hidden-cats-hidden">Hidden categories: <ul><li><a href="/wiki/Category:Webarchive_template_wayback_links" title="Category:Webarchive template wayback links">Webarchive template wayback links</a></li><li><a href="/wiki/Category:CS1_maint:_multiple_names:_authors_list" title="Category:CS1 maint: multiple names: authors list">CS1 maint: multiple names: authors list</a></li><li><a href="/wiki/Category:Articles_with_short_description" title="Category:Articles with short description">Articles with short description</a></li><li><a href="/wiki/Category:Short_description_matches_Wikidata" title="Category:Short description matches Wikidata">Short description matches Wikidata</a></li><li><a href="/wiki/Category:Use_dmy_dates_from_July_2022" title="Category:Use dmy dates from July 2022">Use dmy dates from July 2022</a></li><li><a href="/wiki/Category:Articles_with_excerpts" title="Category:Articles with excerpts">Articles with excerpts</a></li><li><a href="/wiki/Category:All_articles_needing_examples" title="Category:All articles needing examples">All articles needing examples</a></li><li><a href="/wiki/Category:Articles_needing_examples_from_December_2017" title="Category:Articles needing examples from December 2017">Articles needing examples from December 2017</a></li><li><a href="/wiki/Category:All_articles_with_unsourced_statements" title="Category:All articles with unsourced statements">All articles with unsourced statements</a></li><li><a href="/wiki/Category:Articles_with_unsourced_statements_from_December_2017" title="Category:Articles with unsourced statements from December 2017">Articles with unsourced statements from December 2017</a></li></ul></div></div>
				</div>
			</main>
			
		</div>
		<div class="mw-footer-container">
			
<footer id="footer" class="mw-footer" role="contentinfo" >
	<ul id="footer-info">
	<li id="footer-info-lastmod"> This page was last edited on 21 June 2023, at 18:45<span class="anonymous-show">&#160;(UTC)</span>.</li>
	<li id="footer-info-copyright">Text is available under the <a rel="license" href="https://creativecommons.org/licenses/by-sa/4.0/">Creative Commons Attribution-ShareAlike License 4.0</a><a rel="license" href="//creativecommons.org/licenses/by-sa/4.0/" style="display:none;"></a>;
additional terms may apply.  By using this site, you agree to the <a href="//foundation.wikimedia.org/wiki/Terms_of_Use">Terms of Use</a> and <a href="//foundation.wikimedia.org/wiki/Privacy_policy">Privacy Policy</a>. Wikipedia® is a registered trademark of the <a href="//www.wikimediafoundation.org/">Wikimedia Foundation, Inc.</a>, a non-profit organization.</li>
</ul>

	<ul id="footer-places">
	<li id="footer-places-privacy"><a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Privacy_policy">Privacy policy</a></li>
	<li id="footer-places-about"><a href="/wiki/Wikipedia:About">About Wikipedia</a></li>
	<li id="footer-places-disclaimers"><a href="/wiki/Wikipedia:General_disclaimer">Disclaimers</a></li>
	<li id="footer-places-contact"><a href="//en.wikipedia.org/wiki/Wikipedia:Contact_us">Contact Wikipedia</a></li>
	<li id="footer-places-wm-codeofconduct"><a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Universal_Code_of_Conduct">Code of Conduct</a></li>
	<li id="footer-places-mobileview"><a href="//en.m.wikipedia.org/w/index.php?title=Language_model&amp;mobileaction=toggle_view_mobile" class="noprint stopMobileRedirectToggle">Mobile view</a></li>
	<li id="footer-places-developers"><a href="https://developer.wikimedia.org">Developers</a></li>
	<li id="footer-places-statslink"><a href="https://stats.wikimedia.org/#/en.wikipedia.org">Statistics</a></li>
	<li id="footer-places-cookiestatement"><a href="https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Cookie_statement">Cookie statement</a></li>
</ul>

	<ul id="footer-icons" class="noprint">
	<li id="footer-copyrightico"><a href="https://wikimediafoundation.org/"><img src="/static/images/footer/wikimedia-button.png" srcset="/static/images/footer/wikimedia-button-1.5x.png 1.5x, /static/images/footer/wikimedia-button-2x.png 2x" width="88" height="31" alt="Wikimedia Foundation" loading="lazy" /></a></li>
	<li id="footer-poweredbyico"><a href="https://www.mediawiki.org/"><img src="/static/images/footer/poweredby_mediawiki_88x31.png" alt="Powered by MediaWiki" srcset="/static/images/footer/poweredby_mediawiki_132x47.png 1.5x, /static/images/footer/poweredby_mediawiki_176x62.png 2x" width="88" height="31" loading="lazy"></a></li>
</ul>

</footer>

		</div>
	</div> 
</div> 
<div class='vector-settings'>
	
<button
	id=""
	class="cdx-button cdx-button--icon-only vector-limited-width-toggle"
	><span class="vector-icon mw-ui-icon-fullScreen mw-ui-icon-wikimedia-fullScreen"></span>

<span>Toggle limited content width</span>

</button>

</div>
<script>(RLQ=window.RLQ||[]).push(function(){mw.config.set({"wgHostname":"mw1403","wgBackendResponseTime":118,"wgPageParseReport":{"limitreport":{"cputime":"0.601","walltime":"0.782","ppvisitednodes":{"value":3779,"limit":1000000},"postexpandincludesize":{"value":94931,"limit":2097152},"templateargumentsize":{"value":2539,"limit":2097152},"expansiondepth":{"value":12,"limit":100},"expensivefunctioncount":{"value":5,"limit":500},"unstrip-depth":{"value":1,"limit":20},"unstrip-size":{"value":94053,"limit":5000000},"entityaccesscount":{"value":0,"limit":400},"timingprofile":["100.00%  636.197      1 -total"," 40.66%  258.652      2 Template:Reflist"," 15.04%   95.672      4 Template:Cite_book"," 14.96%   95.144      3 Template:Navbox"," 14.71%   93.569      1 Template:Natural_language_processing"," 14.27%   90.760      1 Template:Excerpt","  8.97%   57.037      1 Template:Short_description","  5.61%   35.674      6 Template:Cite_conference","  5.24%   33.342      8 Template:Cite_web","  4.77%   30.331      5 Template:R"]},"scribunto":{"limitreport-timeusage":{"value":"0.376","limit":"10.000"},"limitreport-memusage":{"value":7550992,"limit":52428800}},"cachereport":{"origin":"mw1434","timestamp":"20230703122133","ttl":1814400,"transientcontent":false}}});});</script>
<script type="application/ld+json">{"@context":"https:\/\/schema.org","@type":"Article","name":"Language model","url":"https:\/\/en.wikipedia.org\/wiki\/Language_model","sameAs":"http:\/\/www.wikidata.org\/entity\/Q3621696","mainEntity":"http:\/\/www.wikidata.org\/entity\/Q3621696","author":{"@type":"Organization","name":"Contributors to Wikimedia projects"},"publisher":{"@type":"Organization","name":"Wikimedia Foundation, Inc.","logo":{"@type":"ImageObject","url":"https:\/\/www.wikimedia.org\/static\/images\/wmf-hor-googpub.png"}},"datePublished":"2005-05-19T22:47:52Z","dateModified":"2023-06-21T18:45:11Z","headline":"statistical model of structure of language"}</script><script type="application/ld+json">{"@context":"https:\/\/schema.org","@type":"Article","name":"Language model","url":"https:\/\/en.wikipedia.org\/wiki\/Language_model","sameAs":"http:\/\/www.wikidata.org\/entity\/Q3621696","mainEntity":"http:\/\/www.wikidata.org\/entity\/Q3621696","author":{"@type":"Organization","name":"Contributors to Wikimedia projects"},"publisher":{"@type":"Organization","name":"Wikimedia Foundation, Inc.","logo":{"@type":"ImageObject","url":"https:\/\/www.wikimedia.org\/static\/images\/wmf-hor-googpub.png"}},"datePublished":"2005-05-19T22:47:52Z","dateModified":"2023-06-21T18:45:11Z","headline":"statistical model of structure of language"}</script>
</body>
</html>