{
    "version": 1,
    "title": "possible deadlock in userfaultfd_release",
    "display-title": "possible deadlock in userfaultfd_release",
    "id": "6408a8ba0fa0e3940c5c2dfa40e808cbf4228689",
    "status": "fixed",
    "fix-commits": [
        {
            "title": "fs/userfaultfd.c: disable irqs for fault_pending and event locks",
            "link": "https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=cbcfa130a911c613a1d9d921af2eea171c414172",
            "hash": "cbcfa130a911c613a1d9d921af2eea171c414172",
            "repo": "git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git",
            "branch": "master"
        }
    ],
    "cause-commit": {
        "title": "aio: implement IOCB_CMD_POLL",
        "link": "https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=bfe4037e722ec672c9dafd5730d9132afeeb76e9",
        "hash": "bfe4037e722ec672c9dafd5730d9132afeeb76e9",
        "repo": "git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git",
        "branch": "master"
    },
    "discussions": [
        "https://lore.kernel.org/all/000000000000700cdc057935aa71@google.com/T/",
        "https://lore.kernel.org/all/20190627075004.21259-1-ebiggers@kernel.org/T/",
        "https://lore.kernel.org/all/20190708150521.829733162@linuxfoundation.org/T/",
        "https://lore.kernel.org/all/20190708150526.234572443@linuxfoundation.org/T/"
    ],
    "crashes": [
        {
            "title": "",
            "syz-reproducer": "/text?tag=ReproSyz&x=1111f2eb400000",
            "c-reproducer": "/text?tag=ReproC&x=13e874f5400000",
            "kernel-config": "/text?tag=KernelConfig&x=5c0a49d2b5210087",
            "kernel-source-git": "https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/log/?id=345671ea0f9258f410eb057b9ced9cefbbe5dc78",
            "kernel-source-commit": "345671ea0f9258f410eb057b9ced9cefbbe5dc78",
            "syzkaller-git": "https://github.com/google/syzkaller/commits/a8292de95851bd79cba2006dd032e28142f06c25",
            "syzkaller-commit": "a8292de95851bd79cba2006dd032e28142f06c25",
            "compiler-description": "gcc (GCC) 8.0.1 20180413 (experimental)",
            "architecture": "amd64",
            "crash-report-link": "/text?tag=CrashReport&x=17a47233400000"
        }
    ],
    "subsystems": [],
    "parent_of_fix_commit": "b9705d8778e7adc97de38f405f835a2426e14d84",
    "patch": "diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c\nindex ae0b8b5f69e6..ccbdbd62f0d8 100644\n--- a/fs/userfaultfd.c\n+++ b/fs/userfaultfd.c\n@@ -40,6 +40,16 @@ enum userfaultfd_state {\n /*\n  * Start with fault_pending_wqh and fault_wqh so they're more likely\n  * to be in the same cacheline.\n+ *\n+ * Locking order:\n+ *\tfd_wqh.lock\n+ *\t\tfault_pending_wqh.lock\n+ *\t\t\tfault_wqh.lock\n+ *\t\tevent_wqh.lock\n+ *\n+ * To avoid deadlocks, IRQs must be disabled when taking any of the above locks,\n+ * since fd_wqh.lock is taken by aio_poll() while it's holding a lock that's\n+ * also taken in IRQ context.\n  */\n struct userfaultfd_ctx {\n \t/* waitqueue head for the pending (i.e. not read) userfaults */\n@@ -458,7 +468,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)\n \tblocking_state = return_to_userland ? TASK_INTERRUPTIBLE :\n \t\t\t TASK_KILLABLE;\n \n-\tspin_lock(&ctx->fault_pending_wqh.lock);\n+\tspin_lock_irq(&ctx->fault_pending_wqh.lock);\n \t/*\n \t * After the __add_wait_queue the uwq is visible to userland\n \t * through poll/read().\n@@ -470,7 +480,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)\n \t * __add_wait_queue.\n \t */\n \tset_current_state(blocking_state);\n-\tspin_unlock(&ctx->fault_pending_wqh.lock);\n+\tspin_unlock_irq(&ctx->fault_pending_wqh.lock);\n \n \tif (!is_vm_hugetlb_page(vmf->vma))\n \t\tmust_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags,\n@@ -552,13 +562,13 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)\n \t * kernel stack can be released after the list_del_init.\n \t */\n \tif (!list_empty_careful(&uwq.wq.entry)) {\n-\t\tspin_lock(&ctx->fault_pending_wqh.lock);\n+\t\tspin_lock_irq(&ctx->fault_pending_wqh.lock);\n \t\t/*\n \t\t * No need of list_del_init(), the uwq on the stack\n \t\t * will be freed shortly anyway.\n \t\t */\n \t\tlist_del(&uwq.wq.entry);\n-\t\tspin_unlock(&ctx->fault_pending_wqh.lock);\n+\t\tspin_unlock_irq(&ctx->fault_pending_wqh.lock);\n \t}\n \n \t/*\n@@ -583,7 +593,7 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,\n \tinit_waitqueue_entry(&ewq->wq, current);\n \trelease_new_ctx = NULL;\n \n-\tspin_lock(&ctx->event_wqh.lock);\n+\tspin_lock_irq(&ctx->event_wqh.lock);\n \t/*\n \t * After the __add_wait_queue the uwq is visible to userland\n \t * through poll/read().\n@@ -613,15 +623,15 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,\n \t\t\tbreak;\n \t\t}\n \n-\t\tspin_unlock(&ctx->event_wqh.lock);\n+\t\tspin_unlock_irq(&ctx->event_wqh.lock);\n \n \t\twake_up_poll(&ctx->fd_wqh, EPOLLIN);\n \t\tschedule();\n \n-\t\tspin_lock(&ctx->event_wqh.lock);\n+\t\tspin_lock_irq(&ctx->event_wqh.lock);\n \t}\n \t__set_current_state(TASK_RUNNING);\n-\tspin_unlock(&ctx->event_wqh.lock);\n+\tspin_unlock_irq(&ctx->event_wqh.lock);\n \n \tif (release_new_ctx) {\n \t\tstruct vm_area_struct *vma;\n@@ -918,10 +928,10 @@ static int userfaultfd_release(struct inode *inode, struct file *file)\n \t * the last page faults that may have been already waiting on\n \t * the fault_*wqh.\n \t */\n-\tspin_lock(&ctx->fault_pending_wqh.lock);\n+\tspin_lock_irq(&ctx->fault_pending_wqh.lock);\n \t__wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL, &range);\n \t__wake_up(&ctx->fault_wqh, TASK_NORMAL, 1, &range);\n-\tspin_unlock(&ctx->fault_pending_wqh.lock);\n+\tspin_unlock_irq(&ctx->fault_pending_wqh.lock);\n \n \t/* Flush pending events that may still wait on event_wqh */\n \twake_up_all(&ctx->event_wqh);\n@@ -1134,7 +1144,7 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,\n \n \tif (!ret && msg->event == UFFD_EVENT_FORK) {\n \t\tret = resolve_userfault_fork(ctx, fork_nctx, msg);\n-\t\tspin_lock(&ctx->event_wqh.lock);\n+\t\tspin_lock_irq(&ctx->event_wqh.lock);\n \t\tif (!list_empty(&fork_event)) {\n \t\t\t/*\n \t\t\t * The fork thread didn't abort, so we can\n@@ -1180,7 +1190,7 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,\n \t\t\tif (ret)\n \t\t\t\tuserfaultfd_ctx_put(fork_nctx);\n \t\t}\n-\t\tspin_unlock(&ctx->event_wqh.lock);\n+\t\tspin_unlock_irq(&ctx->event_wqh.lock);\n \t}\n \n \treturn ret;\n@@ -1219,14 +1229,14 @@ static ssize_t userfaultfd_read(struct file *file, char __user *buf,\n static void __wake_userfault(struct userfaultfd_ctx *ctx,\n \t\t\t     struct userfaultfd_wake_range *range)\n {\n-\tspin_lock(&ctx->fault_pending_wqh.lock);\n+\tspin_lock_irq(&ctx->fault_pending_wqh.lock);\n \t/* wake all in the range and autoremove */\n \tif (waitqueue_active(&ctx->fault_pending_wqh))\n \t\t__wake_up_locked_key(&ctx->fault_pending_wqh, TASK_NORMAL,\n \t\t\t\t     range);\n \tif (waitqueue_active(&ctx->fault_wqh))\n \t\t__wake_up(&ctx->fault_wqh, TASK_NORMAL, 1, range);\n-\tspin_unlock(&ctx->fault_pending_wqh.lock);\n+\tspin_unlock_irq(&ctx->fault_pending_wqh.lock);\n }\n \n static __always_inline void wake_userfault(struct userfaultfd_ctx *ctx,\n@@ -1881,7 +1891,7 @@ static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f)\n \twait_queue_entry_t *wq;\n \tunsigned long pending = 0, total = 0;\n \n-\tspin_lock(&ctx->fault_pending_wqh.lock);\n+\tspin_lock_irq(&ctx->fault_pending_wqh.lock);\n \tlist_for_each_entry(wq, &ctx->fault_pending_wqh.head, entry) {\n \t\tpending++;\n \t\ttotal++;\n@@ -1889,7 +1899,7 @@ static void userfaultfd_show_fdinfo(struct seq_file *m, struct file *f)\n \tlist_for_each_entry(wq, &ctx->fault_wqh.head, entry) {\n \t\ttotal++;\n \t}\n-\tspin_unlock(&ctx->fault_pending_wqh.lock);\n+\tspin_unlock_irq(&ctx->fault_pending_wqh.lock);\n \n \t/*\n \t * If more protocols will be added, there will be all shown\n",
    "patch_modified_files": [
        "fs/userfaultfd.c"
    ]
}