NFS: Fix spurious EIO read errors
authorTrond Myklebust <trond.myklebust@hammerspace.com>
Thu, 15 Aug 2019 16:26:05 +0000 (12:26 -0400)
committerTrond Myklebust <trond.myklebust@hammerspace.com>
Mon, 26 Aug 2019 19:31:29 +0000 (15:31 -0400)
If the client attempts to read a page, but the read fails due to some
spurious error (e.g. an ACCESS error or a timeout, ...) then we need
to allow other processes to retry.
Also try to report errors correctly when doing a synchronous readpage.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
fs/nfs/internal.h
fs/nfs/read.c
fs/nfs/write.c

index a2346a2f8361d56c0608af517adf5c74a7d52404..e64f810223be6b961c865e92b45afa40800a45c2 100644 (file)
@@ -775,3 +775,13 @@ static inline bool nfs_error_is_fatal(int err)
        }
 }
 
+static inline bool nfs_error_is_fatal_on_server(int err)
+{
+       switch (err) {
+       case 0:
+       case -ERESTARTSYS:
+       case -EINTR:
+               return false;
+       }
+       return nfs_error_is_fatal(err);
+}
index c19841c82b6a3213203d1e964e5fcd85e8c96e1a..cfe0b586eadd4d0ed5bae36a00bfabf85ea90fe9 100644 (file)
@@ -91,19 +91,25 @@ void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
 }
 EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);
 
-static void nfs_readpage_release(struct nfs_page *req)
+static void nfs_readpage_release(struct nfs_page *req, int error)
 {
        struct inode *inode = d_inode(nfs_req_openctx(req)->dentry);
+       struct page *page = req->wb_page;
 
        dprintk("NFS: read done (%s/%llu %d@%lld)\n", inode->i_sb->s_id,
                (unsigned long long)NFS_FILEID(inode), req->wb_bytes,
                (long long)req_offset(req));
 
+       if (nfs_error_is_fatal_on_server(error) && error != -ETIMEDOUT)
+               SetPageError(page);
        if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) {
-               if (PageUptodate(req->wb_page))
-                       nfs_readpage_to_fscache(inode, req->wb_page, 0);
+               struct address_space *mapping = page_file_mapping(page);
 
-               unlock_page(req->wb_page);
+               if (PageUptodate(page))
+                       nfs_readpage_to_fscache(inode, page, 0);
+               else if (!PageError(page) && !PagePrivate(page))
+                       generic_error_remove_page(mapping, page);
+               unlock_page(page);
        }
        nfs_release_request(req);
 }
@@ -131,7 +137,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
                             &nfs_async_read_completion_ops);
        if (!nfs_pageio_add_request(&pgio, new)) {
                nfs_list_remove_request(new);
-               nfs_readpage_release(new);
+               nfs_readpage_release(new, pgio.pg_error);
        }
        nfs_pageio_complete(&pgio);
 
@@ -153,6 +159,7 @@ static void nfs_page_group_set_uptodate(struct nfs_page *req)
 static void nfs_read_completion(struct nfs_pgio_header *hdr)
 {
        unsigned long bytes = 0;
+       int error;
 
        if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
                goto out;
@@ -179,14 +186,19 @@ static void nfs_read_completion(struct nfs_pgio_header *hdr)
                                zero_user_segment(page, start, end);
                        }
                }
+               error = 0;
                bytes += req->wb_bytes;
                if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
                        if (bytes <= hdr->good_bytes)
                                nfs_page_group_set_uptodate(req);
+                       else {
+                               error = hdr->error;
+                               xchg(&nfs_req_openctx(req)->error, error);
+                       }
                } else
                        nfs_page_group_set_uptodate(req);
                nfs_list_remove_request(req);
-               nfs_readpage_release(req);
+               nfs_readpage_release(req, error);
        }
 out:
        hdr->release(hdr);
@@ -213,7 +225,7 @@ nfs_async_read_error(struct list_head *head, int error)
        while (!list_empty(head)) {
                req = nfs_list_entry(head->next);
                nfs_list_remove_request(req);
-               nfs_readpage_release(req);
+               nfs_readpage_release(req, error);
        }
 }
 
@@ -337,8 +349,13 @@ int nfs_readpage(struct file *file, struct page *page)
                        goto out;
        }
 
+       xchg(&ctx->error, 0);
        error = nfs_readpage_async(ctx, inode, page);
-
+       if (!error) {
+               error = wait_on_page_locked_killable(page);
+               if (!PageUptodate(page) && !error)
+                       error = xchg(&ctx->error, 0);
+       }
 out:
        put_nfs_open_context(ctx);
        return error;
@@ -372,8 +389,8 @@ readpage_async_filler(void *data, struct page *page)
                zero_user_segment(page, len, PAGE_SIZE);
        if (!nfs_pageio_add_request(desc->pgio, new)) {
                nfs_list_remove_request(new);
-               nfs_readpage_release(new);
                error = desc->pgio->pg_error;
+               nfs_readpage_release(new, error);
                goto out;
        }
        return 0;
index 3399149435ce0e1c0cb8b13ae87a32818f05f7a3..cee9905e419c9543a2066f89a96c5a369e63ff6b 100644 (file)
@@ -599,18 +599,6 @@ static void nfs_write_error(struct nfs_page *req, int error)
        nfs_release_request(req);
 }
 
-static bool
-nfs_error_is_fatal_on_server(int err)
-{
-       switch (err) {
-       case 0:
-       case -ERESTARTSYS:
-       case -EINTR:
-               return false;
-       }
-       return nfs_error_is_fatal(err);
-}
-
 /*
  * Find an associated nfs write request, and prepare to flush it out
  * May return an error if the user signalled nfs_wait_on_request().