block: Added in stricter no merge semantics for block I/O

author Alan D. Brunelle <Alan.Brunelle@hp.com>

Fri, 29 Jan 2010 08:04:08 +0000 (09:04 +0100)

committer Jens Axboe <jens.axboe@oracle.com>

Fri, 29 Jan 2010 08:04:08 +0000 (09:04 +0100)
author Alan D. Brunelle <Alan.Brunelle@hp.com>
Fri, 29 Jan 2010 08:04:08 +0000 (09:04 +0100)
committer Jens Axboe <jens.axboe@oracle.com>
Fri, 29 Jan 2010 08:04:08 +0000 (09:04 +0100)
diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block

index d2f90334bb93f90af2986e96d3cfd9710180eca7..4873c759d535a7549d5eecf62a7125b29d6c2dea 100644 (file)
--- a/Documentation/ABI/testing/sysfs-block
+++ b/Documentation/ABI/testing/sysfs-block
@@ -128,3 +128,17 @@ Description:
                 preferred request size for workloads where sustained
                 throughput is desired.  If no optimal I/O size is
                 reported this file contains 0.
+
+What:          /sys/block/<disk>/queue/nomerges
+Date:          January 2010
+Contact:
+Description:
+               Standard I/O elevator operations include attempts to
+               merge contiguous I/Os. For known random I/O loads these
+               attempts will always fail and result in extra cycles
+               being spent in the kernel. This allows one to turn off
+               this behavior on one of two ways: When set to 1, complex
+               merge checks are disabled, but the simple one-shot merges
+               with the previous I/O request are enabled. When set to 2,
+               all merge tries are disabled. The default value is 0 -
+               which enables all types of merge tries.
diff --git a/Documentation/block/queue-sysfs.txt b/Documentation/block/queue-sysfs.txt

index e164403f60e19b92e01050b3800826acbe27448a..f65274081c8d19a1c2f6f8b53712a1cf9cbee54d 100644 (file)
--- a/Documentation/block/queue-sysfs.txt
+++ b/Documentation/block/queue-sysfs.txt
@@ -25,11 +25,11 @@ size allowed by the hardware.
  
  nomerges (RW)
  -------------
-This enables the user to disable the lookup logic involved with IO merging
-requests in the block layer. Merging may still occur through a direct
-1-hit cache, since that comes for (almost) free. The IO scheduler will not
-waste cycles doing tree/hash lookups for merges if nomerges is 1. Defaults
-to 0, enabling all merges.
+This enables the user to disable the lookup logic involved with IO
+merging requests in the block layer. By default (0) all merges are
+enabled. When set to 1 only simple one-hit merges will be tried. When
+set to 2 no merge algorithms will be tried (including one-hit or more
+complex tree/hash lookups).
  
  nr_requests (RW)
  ----------------
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c

index 8606c9543fdda0368c6bdde8b4af2d00584e674d..e85442415db34174f5ea4efce83cc0912813adcc 100644 (file)
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -189,7 +189,8 @@ static ssize_t queue_nonrot_store(struct request_queue *q, const char *page,
  
  static ssize_t queue_nomerges_show(struct request_queue *q, char *page)
  {
-       return queue_var_show(blk_queue_nomerges(q), page);
+       return queue_var_show((blk_queue_nomerges(q) << 1) |
+                              blk_queue_noxmerges(q), page);
  }
  
  static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
@@ -199,10 +200,12 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
         ssize_t ret = queue_var_store(&nm, page, count);
  
         spin_lock_irq(q->queue_lock);
-       if (nm)
+       queue_flag_clear(QUEUE_FLAG_NOMERGES, q);
+       queue_flag_clear(QUEUE_FLAG_NOXMERGES, q);
+       if (nm == 2)
                 queue_flag_set(QUEUE_FLAG_NOMERGES, q);
-       else
-               queue_flag_clear(QUEUE_FLAG_NOMERGES, q);
+       else if (nm)
+               queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
         spin_unlock_irq(q->queue_lock);
  
         return ret;
diff --git a/block/elevator.c b/block/elevator.c

index 9ad5ccc4c5eeca51408eef4bbfc1da33a7c8d532..ee3a883840f268b1343300ad40368005e2d317dc 100644 (file)
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -473,6 +473,15 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
         struct request *__rq;
         int ret;
  
+       /*
+        * Levels of merges:
+        *      nomerges:  No merges at all attempted
+        *      noxmerges: Only simple one-hit cache try
+        *      merges:    All merge tries attempted
+        */
+       if (blk_queue_nomerges(q))
+               return ELEVATOR_NO_MERGE;
+
         /*
          * First try one-hit cache.
          */
@@ -484,7 +493,7 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
                 }
         }
  
-       if (blk_queue_nomerges(q))
+       if (blk_queue_noxmerges(q))
                 return ELEVATOR_NO_MERGE;
  
         /*
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h

index ffb13ad35716d124644afee9efc618e86ed3237e..f71f5c58620c39b6d540321dc2e14e9efd392ef6 100644 (file)
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -463,6 +463,7 @@ struct request_queue
  #define QUEUE_FLAG_IO_STAT     15      /* do IO stats */
  #define QUEUE_FLAG_CQ         16       /* hardware does queuing */
  #define QUEUE_FLAG_DISCARD     17      /* supports DISCARD */
+#define QUEUE_FLAG_NOXMERGES   18      /* No extended merges */
  
  #define QUEUE_FLAG_DEFAULT     ((1 << QUEUE_FLAG_IO_STAT) |            \
                                  (1 << QUEUE_FLAG_CLUSTER) |            \
@@ -589,6 +590,8 @@ enum {
  #define blk_queue_queuing(q)   test_bit(QUEUE_FLAG_CQ, &(q)->queue_flags)
  #define blk_queue_stopped(q)   test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
  #define blk_queue_nomerges(q)  test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
+#define blk_queue_noxmerges(q) \
+       test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags)
  #define blk_queue_nonrot(q)    test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags)
  #define blk_queue_io_stat(q)   test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags)
  #define blk_queue_flushing(q)  ((q)->ordseq)
author	Alan D. Brunelle <Alan.Brunelle@hp.com>
	Fri, 29 Jan 2010 08:04:08 +0000 (09:04 +0100)
committer	Jens Axboe <jens.axboe@oracle.com>
	Fri, 29 Jan 2010 08:04:08 +0000 (09:04 +0100)
Documentation/ABI/testing/sysfs-block		patch \| blob \| history
Documentation/block/queue-sysfs.txt		patch \| blob \| history
block/blk-sysfs.c		patch \| blob \| history
block/elevator.c		patch \| blob \| history
include/linux/blkdev.h		patch \| blob \| history