From: Tejun Heo Date: Tue, 4 Mar 2014 20:38:46 +0000 (-0500) Subject: kernfs: cache atomic_write_len in kernfs_open_file X-Git-Url: http://git.cdn.openwrt.org/?a=commitdiff_plain;h=b7ce40cff0b9f6597f8318fd761accd92727f61f;p=openwrt%2Fstaging%2Fblogic.git kernfs: cache atomic_write_len in kernfs_open_file While implementing atomic_write_len, 4d3773c4bb41 ("kernfs: implement kernfs_ops->atomic_write_len") moved data copy from userland inside kernfs_get_active() and kernfs_open_file->mutex so that kernfs_ops->atomic_write_len can be accessed before copying buffer from userland; unfortunately, this could lead to locking order inversion involving mmap_sem if copy_from_user() takes a page fault. ====================================================== [ INFO: possible circular locking dependency detected ] 3.14.0-rc4-next-20140228-sasha-00011-g4077c67-dirty #26 Tainted: G W ------------------------------------------------------- trinity-c236/10658 is trying to acquire lock: (&of->mutex#2){+.+.+.}, at: [] kernfs_fop_mmap+0x54/0x120 but task is already holding lock: (&mm->mmap_sem){++++++}, at: [] vm_mmap_pgoff+0x6e/0xe0 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&mm->mmap_sem){++++++}: [] validate_chain+0x6c5/0x7b0 [] __lock_acquire+0x4cd/0x5a0 [] lock_acquire+0x182/0x1d0 [] might_fault+0x7e/0xb0 [] kernfs_fop_write+0xd8/0x190 [] vfs_write+0xe3/0x1d0 [] SyS_write+0x5d/0xa0 [] tracesys+0xdd/0xe2 -> #0 (&of->mutex#2){+.+.+.}: [] check_prev_add+0x13f/0x560 [] validate_chain+0x6c5/0x7b0 [] __lock_acquire+0x4cd/0x5a0 [] lock_acquire+0x182/0x1d0 [] mutex_lock_nested+0x6a/0x510 [] kernfs_fop_mmap+0x54/0x120 [] mmap_region+0x310/0x5c0 [] do_mmap_pgoff+0x385/0x430 [] vm_mmap_pgoff+0x8f/0xe0 [] SyS_mmap_pgoff+0x1b0/0x210 [] SyS_mmap+0x1d/0x20 [] tracesys+0xdd/0xe2 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&mm->mmap_sem); lock(&of->mutex#2); lock(&mm->mmap_sem); lock(&of->mutex#2); *** DEADLOCK *** 1 lock held by trinity-c236/10658: #0: (&mm->mmap_sem){++++++}, at: [] vm_mmap_pgoff+0x6e/0xe0 stack backtrace: CPU: 2 PID: 10658 Comm: trinity-c236 Tainted: G W 3.14.0-rc4-next-20140228-sasha-00011-g4077c67-dirty #26 0000000000000000 ffff88011911fa48 ffffffff8438e945 0000000000000000 0000000000000000 ffff88011911fa98 ffffffff811a0109 ffff88011911fab8 ffff88011911fab8 ffff88011911fa98 ffff880119128cc0 ffff880119128cf8 Call Trace: [] dump_stack+0x52/0x7f [] print_circular_bug+0x129/0x160 [] check_prev_add+0x13f/0x560 [] ? deactivate_slab+0x511/0x550 [] validate_chain+0x6c5/0x7b0 [] __lock_acquire+0x4cd/0x5a0 [] ? mmap_region+0x24a/0x5c0 [] lock_acquire+0x182/0x1d0 [] ? kernfs_fop_mmap+0x54/0x120 [] mutex_lock_nested+0x6a/0x510 [] ? kernfs_fop_mmap+0x54/0x120 [] ? get_parent_ip+0x11/0x50 [] ? kernfs_fop_mmap+0x54/0x120 [] kernfs_fop_mmap+0x54/0x120 [] mmap_region+0x310/0x5c0 [] do_mmap_pgoff+0x385/0x430 [] ? vm_mmap_pgoff+0x6e/0xe0 [] vm_mmap_pgoff+0x8f/0xe0 [] ? __rcu_read_unlock+0x44/0xb0 [] ? dup_fd+0x3c0/0x3c0 [] SyS_mmap_pgoff+0x1b0/0x210 [] SyS_mmap+0x1d/0x20 [] tracesys+0xdd/0xe2 Fix it by caching atomic_write_len in kernfs_open_file during open so that it can be determined without accessing kernfs_ops in kernfs_fop_write(). This restores the structure of kernfs_fop_write() before 4d3773c4bb41 with updated @len determination logic. Signed-off-by: Tejun Heo Reported-by: Sasha Levin References: http://lkml.kernel.org/g/53113485.2090407@oracle.com Signed-off-by: Greg Kroah-Hartman --- diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index ddcb471b9cc9..8034706a7af8 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -253,55 +253,50 @@ static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf, { struct kernfs_open_file *of = kernfs_of(file); const struct kernfs_ops *ops; - char *buf = NULL; - ssize_t len; - - /* - * @of->mutex nests outside active ref and is just to ensure that - * the ops aren't called concurrently for the same open file. - */ - mutex_lock(&of->mutex); - if (!kernfs_get_active(of->kn)) { - mutex_unlock(&of->mutex); - return -ENODEV; - } - - ops = kernfs_ops(of->kn); - if (!ops->write) { - len = -EINVAL; - goto out_unlock; - } + size_t len; + char *buf; - if (ops->atomic_write_len) { + if (of->atomic_write_len) { len = count; - if (len > ops->atomic_write_len) { - len = -E2BIG; - goto out_unlock; - } + if (len > of->atomic_write_len) + return -E2BIG; } else { len = min_t(size_t, count, PAGE_SIZE); } buf = kmalloc(len + 1, GFP_KERNEL); - if (!buf) { - len = -ENOMEM; - goto out_unlock; - } + if (!buf) + return -ENOMEM; if (copy_from_user(buf, user_buf, len)) { len = -EFAULT; - goto out_unlock; + goto out_free; } buf[len] = '\0'; /* guarantee string termination */ - len = ops->write(of, buf, len, *ppos); -out_unlock: + /* + * @of->mutex nests outside active ref and is just to ensure that + * the ops aren't called concurrently for the same open file. + */ + mutex_lock(&of->mutex); + if (!kernfs_get_active(of->kn)) { + mutex_unlock(&of->mutex); + len = -ENODEV; + goto out_free; + } + + ops = kernfs_ops(of->kn); + if (ops->write) + len = ops->write(of, buf, len, *ppos); + else + len = -EINVAL; + kernfs_put_active(of->kn); mutex_unlock(&of->mutex); if (len > 0) *ppos += len; - +out_free: kfree(buf); return len; } @@ -665,6 +660,12 @@ static int kernfs_fop_open(struct inode *inode, struct file *file) of->kn = kn; of->file = file; + /* + * Write path needs to atomic_write_len outside active reference. + * Cache it in open_file. See kernfs_fop_write() for details. + */ + of->atomic_write_len = ops->atomic_write_len; + /* * Always instantiate seq_file even if read access doesn't use * seq_file or is not requested. This unifies private data access diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 09669d092748..b0122dc6f96a 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -158,6 +158,7 @@ struct kernfs_open_file { int event; struct list_head list; + size_t atomic_write_len; bool mmapped; const struct vm_operations_struct *vm_ops; };