From: Fred Isaman Date: Thu, 3 Feb 2011 18:28:52 +0000 (+0000) Subject: pnfs: fix pnfs lock inversion of i_lock and cl_lock X-Git-Url: http://git.cdn.openwrt.org/?a=commitdiff_plain;h=f49f9baac8f63de9cbc17a0a84e04060496e8e76;p=openwrt%2Fstaging%2Fblogic.git pnfs: fix pnfs lock inversion of i_lock and cl_lock The pnfs code was using throughout the lock order i_lock, cl_lock. This conflicts with the nfs delegation code. Rework the pnfs code to avoid taking both locks simultaneously. Currently the code takes the double lock to add/remove the layout to a nfs_client list, while atomically checking that the list of lsegs is empty. To avoid this, we rely on existing serializations. When a layout is initialized with lseg count equal zero, LAYOUTGET's openstateid serialization is in effect, making it safe to assume it stays zero unless we change it. And once a layout's lseg count drops to zero, it is set as DESTROYED and so will stay at zero. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 89587573fe50..2f41dccea18e 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -188,10 +188,10 @@ static u32 initiate_bulk_draining(struct nfs_client *clp, rv = NFS4ERR_DELAY; list_del_init(&lo->plh_bulk_recall); spin_unlock(&ino->i_lock); + pnfs_free_lseg_list(&free_me_list); put_layout_hdr(lo); iput(ino); } - pnfs_free_lseg_list(&free_me_list); return rv; } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index c17edfbbaebf..0f5b66f90d17 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -247,13 +247,6 @@ put_lseg_locked(struct pnfs_layout_segment *lseg, BUG_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); list_del(&lseg->pls_list); if (list_empty(&lseg->pls_layout->plh_segs)) { - struct nfs_client *clp; - - clp = NFS_SERVER(ino)->nfs_client; - spin_lock(&clp->cl_lock); - /* List does not take a reference, so no need for put here */ - list_del_init(&lseg->pls_layout->plh_layouts); - spin_unlock(&clp->cl_lock); set_bit(NFS_LAYOUT_DESTROYED, &lseg->pls_layout->plh_flags); /* Matched by initial refcount set in alloc_init_layout_hdr */ put_layout_hdr_locked(lseg->pls_layout); @@ -319,11 +312,27 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, return invalid - removed; } +/* note free_me must contain lsegs from a single layout_hdr */ void pnfs_free_lseg_list(struct list_head *free_me) { struct pnfs_layout_segment *lseg, *tmp; + struct pnfs_layout_hdr *lo; + + if (list_empty(free_me)) + return; + lo = list_first_entry(free_me, struct pnfs_layout_segment, + pls_list)->pls_layout; + + if (test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags)) { + struct nfs_client *clp; + + clp = NFS_SERVER(lo->plh_inode)->nfs_client; + spin_lock(&clp->cl_lock); + list_del_init(&lo->plh_layouts); + spin_unlock(&clp->cl_lock); + } list_for_each_entry_safe(lseg, tmp, free_me, pls_list) { list_del(&lseg->pls_list); free_lseg(lseg); @@ -705,6 +714,7 @@ pnfs_update_layout(struct inode *ino, struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; struct pnfs_layout_hdr *lo; struct pnfs_layout_segment *lseg = NULL; + bool first = false; if (!pnfs_enabled_sb(NFS_SERVER(ino))) return NULL; @@ -735,7 +745,10 @@ pnfs_update_layout(struct inode *ino, atomic_inc(&lo->plh_outstanding); get_layout_hdr(lo); - if (list_empty(&lo->plh_segs)) { + if (list_empty(&lo->plh_segs)) + first = true; + spin_unlock(&ino->i_lock); + if (first) { /* The lo must be on the clp list if there is any * chance of a CB_LAYOUTRECALL(FILE) coming in. */ @@ -744,17 +757,12 @@ pnfs_update_layout(struct inode *ino, list_add_tail(&lo->plh_layouts, &clp->cl_layouts); spin_unlock(&clp->cl_lock); } - spin_unlock(&ino->i_lock); lseg = send_layoutget(lo, ctx, iomode); - if (!lseg) { - spin_lock(&ino->i_lock); - if (list_empty(&lo->plh_segs)) { - spin_lock(&clp->cl_lock); - list_del_init(&lo->plh_layouts); - spin_unlock(&clp->cl_lock); - } - spin_unlock(&ino->i_lock); + if (!lseg && first) { + spin_lock(&clp->cl_lock); + list_del_init(&lo->plh_layouts); + spin_unlock(&clp->cl_lock); } atomic_dec(&lo->plh_outstanding); put_layout_hdr(lo);