nfs: per-name sillyunlink exclusion
authorAl Viro <viro@zeniv.linux.org.uk>
Fri, 29 Apr 2016 03:56:31 +0000 (23:56 -0400)
committerAl Viro <viro@zeniv.linux.org.uk>
Mon, 9 May 2016 15:39:45 +0000 (11:39 -0400)
use d_alloc_parallel() for sillyunlink/lookup exclusion and
explicit rwsem (nfs_rmdir() being a writer and nfs_call_unlink() -
a reader) for rmdir/sillyunlink one.

That ought to make lookup/readdir/!O_CREAT atomic_open really
parallel on NFS.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
fs/nfs/dir.c
fs/nfs/inode.c
fs/nfs/nfs4proc.c
fs/nfs/nfstrace.h
fs/nfs/unlink.c
include/linux/nfs_fs.h
include/linux/nfs_xdr.h

index cd3c754e3f89fb56f78dfd330cb64ad29aab565c..aaf7bd0cbae20216333042c5a082a7560f6af61f 100644 (file)
@@ -909,7 +909,6 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
        desc->decode = NFS_PROTO(inode)->decode_dirent;
        desc->plus = nfs_use_readdirplus(inode, ctx) ? 1 : 0;
 
-       nfs_block_sillyrename(dentry);
        if (ctx->pos == 0 || nfs_dir_mapping_need_revalidate(inode))
                res = nfs_revalidate_mapping(inode, file->f_mapping);
        if (res < 0)
@@ -945,7 +944,6 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
                        break;
        } while (!desc->eof);
 out:
-       nfs_unblock_sillyrename(dentry);
        if (res > 0)
                res = 0;
        dfprintk(FILE, "NFS: readdir(%pD2) returns %d\n", file, res);
@@ -1398,7 +1396,6 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in
        parent = dentry->d_parent;
        /* Protect against concurrent sillydeletes */
        trace_nfs_lookup_enter(dir, dentry, flags);
-       nfs_block_sillyrename(parent);
        error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label);
        if (error == -ENOENT)
                goto no_entry;
@@ -1423,7 +1420,6 @@ no_entry:
        }
        nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
 out_unblock_sillyrename:
-       nfs_unblock_sillyrename(parent);
        trace_nfs_lookup_exit(dir, dentry, flags, error);
        nfs4_label_free(label);
 out:
@@ -1535,9 +1531,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
                goto out;
 
        trace_nfs_atomic_open_enter(dir, ctx, open_flags);
-       nfs_block_sillyrename(dentry->d_parent);
        inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr, opened);
-       nfs_unblock_sillyrename(dentry->d_parent);
        if (IS_ERR(inode)) {
                err = PTR_ERR(inode);
                trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);
@@ -1781,7 +1775,7 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry)
 
        trace_nfs_rmdir_enter(dir, dentry);
        if (d_really_is_positive(dentry)) {
-               nfs_wait_on_sillyrename(dentry);
+               down_write(&NFS_I(d_inode(dentry))->rmdir_sem);
                error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
                /* Ensure the VFS deletes this inode */
                switch (error) {
@@ -1791,6 +1785,7 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry)
                case -ENOENT:
                        nfs_dentry_handle_enoent(dentry);
                }
+               up_write(&NFS_I(d_inode(dentry))->rmdir_sem);
        } else
                error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
        trace_nfs_rmdir_exit(dir, dentry, error);
index 738c84a42eb0217eae22b264eea27930f29f9118..52e7d6869e3b2445dc6417875b3c753edb681ecc 100644 (file)
@@ -1958,9 +1958,7 @@ static void init_once(void *foo)
        nfsi->nrequests = 0;
        nfsi->commit_info.ncommit = 0;
        atomic_set(&nfsi->commit_info.rpcs_out, 0);
-       atomic_set(&nfsi->silly_count, 1);
-       INIT_HLIST_HEAD(&nfsi->silly_list);
-       init_waitqueue_head(&nfsi->waitqueue);
+       init_rwsem(&nfsi->rmdir_sem);
        nfs4_init_once(nfsi);
 }
 
index 7a7ac1dafa020c630edd72529be5fb3898728dd4..084e8570da18bbdac42416a353fa1e4786403f51 100644 (file)
@@ -3777,7 +3777,7 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir)
 
 static void nfs4_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data)
 {
-       nfs4_setup_sequence(NFS_SERVER(data->dir),
+       nfs4_setup_sequence(NFS_SB(data->dentry->d_sb),
                        &data->args.seq_args,
                        &data->res.seq_res,
                        task);
index 9f80a086b612a622b68ef0f21a51aa0117cf5371..0b9e5cc9a74705f3cade3c431b479eaa6e01e08c 100644 (file)
@@ -702,7 +702,7 @@ TRACE_EVENT(nfs_sillyrename_unlink,
                ),
 
                TP_fast_assign(
-                       struct inode *dir = data->dir;
+                       struct inode *dir = d_inode(data->dentry->d_parent);
                        size_t len = data->args.name.len;
                        __entry->dev = dir->i_sb->s_dev;
                        __entry->dir = NFS_FILEID(dir);
index 7d6deaccf89bdb27c67ddd9173595c2b6013f782..1868246f56e6f0602140c29322d9cf7dc2234501 100644 (file)
 static void
 nfs_free_unlinkdata(struct nfs_unlinkdata *data)
 {
-       iput(data->dir);
        put_rpccred(data->cred);
        kfree(data->args.name.name);
        kfree(data);
 }
 
-#define NAME_ALLOC_LEN(len)    ((len+16) & ~15)
-/**
- * nfs_copy_dname - copy dentry name to data structure
- * @dentry: pointer to dentry
- * @data: nfs_unlinkdata
- */
-static int nfs_copy_dname(struct dentry *dentry, struct nfs_unlinkdata *data)
-{
-       char            *str;
-       int             len = dentry->d_name.len;
-
-       str = kmemdup(dentry->d_name.name, NAME_ALLOC_LEN(len), GFP_KERNEL);
-       if (!str)
-               return -ENOMEM;
-       data->args.name.len = len;
-       data->args.name.name = str;
-       return 0;
-}
-
-static void nfs_free_dname(struct nfs_unlinkdata *data)
-{
-       kfree(data->args.name.name);
-       data->args.name.name = NULL;
-       data->args.name.len = 0;
-}
-
-static void nfs_dec_sillycount(struct inode *dir)
-{
-       struct nfs_inode *nfsi = NFS_I(dir);
-       if (atomic_dec_return(&nfsi->silly_count) == 1)
-               wake_up(&nfsi->waitqueue);
-}
-
 /**
  * nfs_async_unlink_done - Sillydelete post-processing
  * @task: rpc_task of the sillydelete
@@ -78,7 +44,7 @@ static void nfs_dec_sillycount(struct inode *dir)
 static void nfs_async_unlink_done(struct rpc_task *task, void *calldata)
 {
        struct nfs_unlinkdata *data = calldata;
-       struct inode *dir = data->dir;
+       struct inode *dir = d_inode(data->dentry->d_parent);
 
        trace_nfs_sillyrename_unlink(data, task->tk_status);
        if (!NFS_PROTO(dir)->unlink_done(task, dir))
@@ -95,17 +61,21 @@ static void nfs_async_unlink_done(struct rpc_task *task, void *calldata)
 static void nfs_async_unlink_release(void *calldata)
 {
        struct nfs_unlinkdata   *data = calldata;
-       struct super_block *sb = data->dir->i_sb;
+       struct dentry *dentry = data->dentry;
+       struct super_block *sb = dentry->d_sb;
 
-       nfs_dec_sillycount(data->dir);
+       up_read_non_owner(&NFS_I(d_inode(dentry->d_parent))->rmdir_sem);
+       d_lookup_done(dentry);
        nfs_free_unlinkdata(data);
+       dput(dentry);
        nfs_sb_deactive(sb);
 }
 
 static void nfs_unlink_prepare(struct rpc_task *task, void *calldata)
 {
        struct nfs_unlinkdata *data = calldata;
-       NFS_PROTO(data->dir)->unlink_rpc_prepare(task, data);
+       struct inode *dir = d_inode(data->dentry->d_parent);
+       NFS_PROTO(dir)->unlink_rpc_prepare(task, data);
 }
 
 static const struct rpc_call_ops nfs_unlink_ops = {
@@ -114,7 +84,7 @@ static const struct rpc_call_ops nfs_unlink_ops = {
        .rpc_call_prepare = nfs_unlink_prepare,
 };
 
-static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct nfs_unlinkdata *data)
+static void nfs_do_call_unlink(struct nfs_unlinkdata *data)
 {
        struct rpc_message msg = {
                .rpc_argp = &data->args,
@@ -129,10 +99,31 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
                .flags = RPC_TASK_ASYNC,
        };
        struct rpc_task *task;
+       struct inode *dir = d_inode(data->dentry->d_parent);
+       nfs_sb_active(dir->i_sb);
+       data->args.fh = NFS_FH(dir);
+       nfs_fattr_init(data->res.dir_attr);
+
+       NFS_PROTO(dir)->unlink_setup(&msg, dir);
+
+       task_setup_data.rpc_client = NFS_CLIENT(dir);
+       task = rpc_run_task(&task_setup_data);
+       if (!IS_ERR(task))
+               rpc_put_task_async(task);
+}
+
+static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data)
+{
+       struct inode *dir = d_inode(dentry->d_parent);
        struct dentry *alias;
 
-       alias = d_lookup(parent, &data->args.name);
-       if (alias != NULL) {
+       down_read_non_owner(&NFS_I(dir)->rmdir_sem);
+       alias = d_alloc_parallel(dentry->d_parent, &data->args.name, &data->wq);
+       if (IS_ERR(alias)) {
+               up_read_non_owner(&NFS_I(dir)->rmdir_sem);
+               return 0;
+       }
+       if (!d_in_lookup(alias)) {
                int ret;
                void *devname_garbage = NULL;
 
@@ -140,10 +131,8 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
                 * Hey, we raced with lookup... See if we need to transfer
                 * the sillyrename information to the aliased dentry.
                 */
-               nfs_free_dname(data);
-               ret = nfs_copy_dname(alias, data);
                spin_lock(&alias->d_lock);
-               if (ret == 0 && d_really_is_positive(alias) &&
+               if (d_really_is_positive(alias) &&
                    !(alias->d_flags & DCACHE_NFSFS_RENAMED)) {
                        devname_garbage = alias->d_fsdata;
                        alias->d_fsdata = data;
@@ -152,8 +141,8 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
                } else
                        ret = 0;
                spin_unlock(&alias->d_lock);
-               nfs_dec_sillycount(dir);
                dput(alias);
+               up_read_non_owner(&NFS_I(dir)->rmdir_sem);
                /*
                 * If we'd displaced old cached devname, free it.  At that
                 * point dentry is definitely not a root, so we won't need
@@ -162,95 +151,18 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
                kfree(devname_garbage);
                return ret;
        }
-       data->dir = igrab(dir);
-       if (!data->dir) {
-               nfs_dec_sillycount(dir);
-               return 0;
-       }
-       nfs_sb_active(dir->i_sb);
-       data->args.fh = NFS_FH(dir);
-       nfs_fattr_init(data->res.dir_attr);
-
-       NFS_PROTO(dir)->unlink_setup(&msg, dir);
-
-       task_setup_data.rpc_client = NFS_CLIENT(dir);
-       task = rpc_run_task(&task_setup_data);
-       if (!IS_ERR(task))
-               rpc_put_task_async(task);
+       data->dentry = alias;
+       nfs_do_call_unlink(data);
        return 1;
 }
 
-static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data)
-{
-       struct dentry *parent;
-       struct inode *dir;
-       int ret = 0;
-
-
-       parent = dget_parent(dentry);
-       if (parent == NULL)
-               goto out_free;
-       dir = d_inode(parent);
-       /* Non-exclusive lock protects against concurrent lookup() calls */
-       spin_lock(&dir->i_lock);
-       if (atomic_inc_not_zero(&NFS_I(dir)->silly_count) == 0) {
-               /* Deferred delete */
-               hlist_add_head(&data->list, &NFS_I(dir)->silly_list);
-               spin_unlock(&dir->i_lock);
-               ret = 1;
-               goto out_dput;
-       }
-       spin_unlock(&dir->i_lock);
-       ret = nfs_do_call_unlink(parent, dir, data);
-out_dput:
-       dput(parent);
-out_free:
-       return ret;
-}
-
-void nfs_wait_on_sillyrename(struct dentry *dentry)
-{
-       struct nfs_inode *nfsi = NFS_I(d_inode(dentry));
-
-       wait_event(nfsi->waitqueue, atomic_read(&nfsi->silly_count) <= 1);
-}
-
-void nfs_block_sillyrename(struct dentry *dentry)
-{
-       struct nfs_inode *nfsi = NFS_I(d_inode(dentry));
-
-       wait_event(nfsi->waitqueue, atomic_cmpxchg(&nfsi->silly_count, 1, 0) == 1);
-}
-
-void nfs_unblock_sillyrename(struct dentry *dentry)
-{
-       struct inode *dir = d_inode(dentry);
-       struct nfs_inode *nfsi = NFS_I(dir);
-       struct nfs_unlinkdata *data;
-
-       atomic_inc(&nfsi->silly_count);
-       wake_up(&nfsi->waitqueue);
-       spin_lock(&dir->i_lock);
-       while (!hlist_empty(&nfsi->silly_list)) {
-               if (!atomic_inc_not_zero(&nfsi->silly_count))
-                       break;
-               data = hlist_entry(nfsi->silly_list.first, struct nfs_unlinkdata, list);
-               hlist_del(&data->list);
-               spin_unlock(&dir->i_lock);
-               if (nfs_do_call_unlink(dentry, dir, data) == 0)
-                       nfs_free_unlinkdata(data);
-               spin_lock(&dir->i_lock);
-       }
-       spin_unlock(&dir->i_lock);
-}
-
 /**
  * nfs_async_unlink - asynchronous unlinking of a file
  * @dir: parent directory of dentry
  * @dentry: dentry to unlink
  */
 static int
-nfs_async_unlink(struct inode *dir, struct dentry *dentry)
+nfs_async_unlink(struct dentry *dentry, struct qstr *name)
 {
        struct nfs_unlinkdata *data;
        int status = -ENOMEM;
@@ -259,13 +171,18 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry)
        data = kzalloc(sizeof(*data), GFP_KERNEL);
        if (data == NULL)
                goto out;
+       data->args.name.name = kstrdup(name->name, GFP_KERNEL);
+       if (!data->args.name.name)
+               goto out_free;
+       data->args.name.len = name->len;
 
        data->cred = rpc_lookup_cred();
        if (IS_ERR(data->cred)) {
                status = PTR_ERR(data->cred);
-               goto out_free;
+               goto out_free_name;
        }
        data->res.dir_attr = &data->dir_attr;
+       init_waitqueue_head(&data->wq);
 
        status = -EBUSY;
        spin_lock(&dentry->d_lock);
@@ -285,6 +202,8 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry)
 out_unlock:
        spin_unlock(&dentry->d_lock);
        put_rpccred(data->cred);
+out_free_name:
+       kfree(data->args.name.name);
 out_free:
        kfree(data);
 out:
@@ -303,17 +222,15 @@ out:
 void
 nfs_complete_unlink(struct dentry *dentry, struct inode *inode)
 {
-       struct nfs_unlinkdata   *data = NULL;
+       struct nfs_unlinkdata   *data;
 
        spin_lock(&dentry->d_lock);
-       if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
-               dentry->d_flags &= ~DCACHE_NFSFS_RENAMED;
-               data = dentry->d_fsdata;
-               dentry->d_fsdata = NULL;
-       }
+       dentry->d_flags &= ~DCACHE_NFSFS_RENAMED;
+       data = dentry->d_fsdata;
+       dentry->d_fsdata = NULL;
        spin_unlock(&dentry->d_lock);
 
-       if (data != NULL && (NFS_STALE(inode) || !nfs_call_unlink(dentry, data)))
+       if (NFS_STALE(inode) || !nfs_call_unlink(dentry, data))
                nfs_free_unlinkdata(data);
 }
 
@@ -560,18 +477,10 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
        /* queue unlink first. Can't do this from rpc_release as it
         * has to allocate memory
         */
-       error = nfs_async_unlink(dir, dentry);
+       error = nfs_async_unlink(dentry, &sdentry->d_name);
        if (error)
                goto out_dput;
 
-       /* populate unlinkdata with the right dname */
-       error = nfs_copy_dname(sdentry,
-                               (struct nfs_unlinkdata *)dentry->d_fsdata);
-       if (error) {
-               nfs_cancel_async_unlink(dentry);
-               goto out_dput;
-       }
-
        /* run the rename task, undo unlink if it fails */
        task = nfs_async_rename(dir, dir, dentry, sdentry,
                                        nfs_complete_sillyrename);
index 67300f8e5f2f0248e8651cbe17e5c14e1110ca2c..fa167f25465d3ba2a76524e631757c8881f2684a 100644 (file)
@@ -163,11 +163,9 @@ struct nfs_inode {
        /* Open contexts for shared mmap writes */
        struct list_head        open_files;
 
-       /* Number of in-flight sillydelete RPC calls */
-       atomic_t                silly_count;
-       /* List of deferred sillydelete requests */
-       struct hlist_head       silly_list;
-       wait_queue_head_t       waitqueue;
+       /* Readers: in-flight sillydelete RPC calls */
+       /* Writers: rmdir */
+       struct rw_semaphore     rmdir_sem;
 
 #if IS_ENABLED(CONFIG_NFS_V4)
        struct nfs4_cached_acl  *nfs4_acl;
@@ -492,9 +490,6 @@ extern void nfs_release_automount_timer(void);
  * linux/fs/nfs/unlink.c
  */
 extern void nfs_complete_unlink(struct dentry *dentry, struct inode *);
-extern void nfs_wait_on_sillyrename(struct dentry *dentry);
-extern void nfs_block_sillyrename(struct dentry *dentry);
-extern void nfs_unblock_sillyrename(struct dentry *dentry);
 
 /*
  * linux/fs/nfs/write.c
index d320906cf13e5d7d3d443a6249992202eb171f59..ee8491dadbf385aa53b689efb1912d685790fbc9 100644 (file)
@@ -1468,10 +1468,10 @@ struct nfs_pgio_completion_ops {
 };
 
 struct nfs_unlinkdata {
-       struct hlist_node list;
        struct nfs_removeargs args;
        struct nfs_removeres res;
-       struct inode *dir;
+       struct dentry *dentry;
+       wait_queue_head_t wq;
        struct rpc_cred *cred;
        struct nfs_fattr dir_attr;
        long timeout;