static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
		unsigned long haddr, bool freeze)
{
	struct mm_struct *mm = vma->vm_mm;
	struct folio *folio;
	struct page *page;
	pgtable_t pgtable;
	pmd_t old_pmd, _pmd;
	bool young, write, soft_dirty, pmd_migration = false, uffd_wp = false;
	bool anon_exclusive = false, dirty = false;
	unsigned long addr;
	pte_t *pte;
	int i;

	VM_BUG_ON(haddr & ~HPAGE_PMD_MASK);
	VM_BUG_ON_VMA(vma->vm_start > haddr, vma);
	VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PMD_SIZE, vma);
	VM_BUG_ON(!is_pmd_migration_entry(*pmd) && !pmd_trans_huge(*pmd)
				&& !pmd_devmap(*pmd));

	count_vm_event(THP_SPLIT_PMD);

	if (!vma_is_anonymous(vma)) {
		old_pmd = pmdp_huge_clear_flush(vma, haddr, pmd);
		/*
		 * We are going to unmap this huge page. So
		 * just go ahead and zap it
		 */
		if (arch_needs_pgtable_deposit())
			zap_deposited_table(mm, pmd);
		if (vma_is_special_huge(vma))
			return;
		if (unlikely(is_pmd_migration_entry(old_pmd))) {
			swp_entry_t entry;

			entry = pmd_to_swp_entry(old_pmd);
			folio = pfn_swap_entry_folio(entry);
		} else {
			page = pmd_page(old_pmd);
			folio = page_folio(page);
			if (!folio_test_dirty(folio) && pmd_dirty(old_pmd))
				folio_mark_dirty(folio);
			if (!folio_test_referenced(folio) && pmd_young(old_pmd))
				folio_set_referenced(folio);
			add_reliable_page_counter(page, mm, -HPAGE_PMD_NR);
			folio_remove_rmap_pmd(folio, page, vma);
			folio_put(folio);
		}
		add_mm_counter(mm, mm_counter_file(folio), -HPAGE_PMD_NR);
		return;
	}

	if (is_huge_zero_pmd(*pmd)) {
		/*
		 * FIXME: Do we want to invalidate secondary mmu by calling
		 * mmu_notifier_arch_invalidate_secondary_tlbs() see comments below
		 * inside __split_huge_pmd() ?
		 *
		 * We are going from a zero huge page write protected to zero
		 * small page also write protected so it does not seems useful
		 * to invalidate secondary mmu at this time.
		 */
		return __split_huge_zero_page_pmd(vma, haddr, pmd);
	}

	/*
	 * Up to this point the pmd is present and huge and userland has the
	 * whole access to the hugepage during the split (which happens in
	 * place). If we overwrite the pmd with the not-huge version pointing
	 * to the pte here (which of course we could if all CPUs were bug
	 * free), userland could trigger a small page size TLB miss on the
	 * small sized TLB while the hugepage TLB entry is still established in
	 * the huge TLB. Some CPU doesn't like that.
	 * See http://support.amd.com/TechDocs/41322_10h_Rev_Gd.pdf, Erratum
	 * 383 on page 105. Intel should be safe but is also warns that it's
	 * only safe if the permission and cache attributes of the two entries
	 * loaded in the two TLB is identical (which should be the case here).
	 * But it is generally safer to never allow small and huge TLB entries
	 * for the same virtual address to be loaded simultaneously. So instead
	 * of doing "pmd_populate(); flush_pmd_tlb_range();" we first mark the
	 * current pmd notpresent (atomically because here the pmd_trans_huge
	 * must remain set at all times on the pmd until the split is complete
	 * for this pmd), then we flush the SMP TLB and finally we write the
	 * non-huge version of the pmd entry with pmd_populate.
	 */
	old_pmd = pmdp_invalidate(vma, haddr, pmd);

	pmd_migration = is_pmd_migration_entry(old_pmd);
	if (unlikely(pmd_migration)) {
		swp_entry_t entry;

		entry = pmd_to_swp_entry(old_pmd);
		page = pfn_swap_entry_to_page(entry);
		write = is_writable_migration_entry(entry);
		if (PageAnon(page))
			anon_exclusive = is_readable_exclusive_migration_entry(entry);
		young = is_migration_entry_young(entry);
		dirty = is_migration_entry_dirty(entry);
		soft_dirty = pmd_swp_soft_dirty(old_pmd);
		uffd_wp = pmd_swp_uffd_wp(old_pmd);
	} else {
		page = pmd_page(old_pmd);
		folio = page_folio(page);
		if (pmd_dirty(old_pmd)) {
			dirty = true;
			folio_set_dirty(folio);
		}
		write = pmd_write(old_pmd);
		young = pmd_young(old_pmd);
		soft_dirty = pmd_soft_dirty(old_pmd);
		uffd_wp = pmd_uffd_wp(old_pmd);

		VM_WARN_ON_FOLIO(!folio_ref_count(folio), folio);
		VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio);

		/*
		 * Without "freeze", we'll simply split the PMD, propagating the
		 * PageAnonExclusive() flag for each PTE by setting it for
		 * each subpage -- no need to (temporarily) clear.
		 *
		 * With "freeze" we want to replace mapped pages by
		 * migration entries right away. This is only possible if we
		 * managed to clear PageAnonExclusive() -- see
		 * set_pmd_migration_entry().
		 *
		 * In case we cannot clear PageAnonExclusive(), split the PMD
		 * only and let try_to_migrate_one() fail later.
		 *
		 * See folio_try_share_anon_rmap_pmd(): invalidate PMD first.
		 */
		anon_exclusive = PageAnonExclusive(page);
		if (freeze && anon_exclusive &&
		    folio_try_share_anon_rmap_pmd(folio, page))
			freeze = false;
		if (!freeze) {
			rmap_t rmap_flags = RMAP_NONE;

			folio_ref_add(folio, HPAGE_PMD_NR - 1);
			if (anon_exclusive)
				rmap_flags |= RMAP_EXCLUSIVE;
			folio_add_anon_rmap_ptes(folio, page, HPAGE_PMD_NR,
						 vma, haddr, rmap_flags);
		}
	}

	/*
	 * Withdraw the table only after we mark the pmd entry invalid.
	 * This's critical for some architectures (Power).
	 */
	pgtable = pgtable_trans_huge_withdraw(mm, pmd);
	pmd_populate(mm, &_pmd, pgtable);

	pte = pte_offset_map(&_pmd, haddr);
	VM_BUG_ON(!pte);

	/*
	 * Note that NUMA hinting access restrictions are not transferred to
	 * avoid any possibility of altering permissions across VMAs.
	 */
	if (freeze || pmd_migration) {
		for (i = 0, addr = haddr; i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE) {
			pte_t entry;
			swp_entry_t swp_entry;

			if (write)
				swp_entry = make_writable_migration_entry(
							page_to_pfn(page + i));
			else if (anon_exclusive)
				swp_entry = make_readable_exclusive_migration_entry(
							page_to_pfn(page + i));
			else
				swp_entry = make_readable_migration_entry(
							page_to_pfn(page + i));
			if (young)
				swp_entry = make_migration_entry_young(swp_entry);
			if (dirty)
				swp_entry = make_migration_entry_dirty(swp_entry);
			entry = swp_entry_to_pte(swp_entry);
			if (soft_dirty)
				entry = pte_swp_mksoft_dirty(entry);
			if (uffd_wp)
				entry = pte_swp_mkuffd_wp(entry);

			VM_WARN_ON(!pte_none(ptep_get(pte + i)));
			set_pte_at(mm, addr, pte + i, entry);
		}
	} else {
		pte_t entry;

		entry = mk_pte(page, READ_ONCE(vma->vm_page_prot));
		if (write)
			entry = pte_mkwrite(entry, vma);
		if (!young)
			entry = pte_mkold(entry);
		/* NOTE: this may set soft-dirty too on some archs */
		if (dirty)
			entry = pte_mkdirty(entry);
		if (soft_dirty)
			entry = pte_mksoft_dirty(entry);
		if (uffd_wp)
			entry = pte_mkuffd_wp(entry);

		for (i = 0; i < HPAGE_PMD_NR; i++)
			VM_WARN_ON(!pte_none(ptep_get(pte + i)));

		set_ptes(mm, haddr, pte, entry, HPAGE_PMD_NR);
	}
	pte_unmap(pte);

	if (!pmd_migration)
		folio_remove_rmap_pmd(folio, page, vma);
	if (freeze)
		put_page(page);

	smp_wmb(); /* make pte visible before pmd */
	pmd_populate(mm, pmd, pgtable);
}