For ITER_BVEC, if we're holding on to kernel pages, the caller
doesn't need to grab a reference to the bvec pages, and drop that
same reference on IO completion. This is essentially safe for any
ITER_BVEC, but some use cases end up reusing pages and uncondtionally
dropping a page reference on completion. And example of that is
sendfile(2), that ends up being a splice_in + splice_out on the
pipe pages.
Add a flag that tells us it's fine to not grab a page reference
to the bvec pages, since that caller knows not to drop a reference
when it's done with the pages.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
iov_iter_bvec(iter, rw, imu->bvec, imu->nr_bvecs, offset + len);
if (offset)
iov_iter_advance(iter, offset);
+
+ /* don't drop a reference to these pages */
+ iter->type |= ITER_BVEC_FLAG_NO_REF;
return 0;
}
};
enum iter_type {
- ITER_IOVEC = 0,
- ITER_KVEC = 2,
- ITER_BVEC = 4,
- ITER_PIPE = 8,
- ITER_DISCARD = 16,
+ /* set if ITER_BVEC doesn't hold a bv_page ref */
+ ITER_BVEC_FLAG_NO_REF = 2,
+
+ /* iter types */
+ ITER_IOVEC = 4,
+ ITER_KVEC = 8,
+ ITER_BVEC = 16,
+ ITER_PIPE = 32,
+ ITER_DISCARD = 64,
};
struct iov_iter {
+ /*
+ * Bit 0 is the read/write bit, set if we're writing.
+ * Bit 1 is the BVEC_FLAG_NO_REF bit, set if type is a bvec and
+ * the caller isn't expecting to drop a page reference when done.
+ */
unsigned int type;
size_t iov_offset;
size_t count;
return i->type & (READ | WRITE);
}
+static inline bool iov_iter_bvec_no_ref(const struct iov_iter *i)
+{
+ return (i->type & ITER_BVEC_FLAG_NO_REF) != 0;
+}
+
/*
* Total number of bytes covered by an iovec.
*