From 6958f145459ca7ad9715024de97445addacb8510 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 3 Sep 2010 11:56:16 +0200
Subject: block: kill QUEUE_ORDERED_BY_TAG

Nobody is making meaningful use of ORDERED_BY_TAG now and queue
draining for barrier requests will be removed soon which will render
the advantage of tag ordering moot.  Kill ORDERED_BY_TAG.  The
following users are affected.

* brd: converted to ORDERED_DRAIN.
* virtio_blk: ORDERED_TAG path was already marked deprecated.  Removed.
* xen-blkfront: ORDERED_TAG case dropped.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Chris Wright <chrisw@sous-sol.org>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-barrier.c | 35 +++++++----------------------------
 1 file changed, 7 insertions(+), 28 deletions(-)

(limited to 'block')

diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index f0faefca032f..c807e9ca3a68 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -26,10 +26,7 @@ int blk_queue_ordered(struct request_queue *q, unsigned ordered)
 	if (ordered != QUEUE_ORDERED_NONE &&
 	    ordered != QUEUE_ORDERED_DRAIN &&
 	    ordered != QUEUE_ORDERED_DRAIN_FLUSH &&
-	    ordered != QUEUE_ORDERED_DRAIN_FUA &&
-	    ordered != QUEUE_ORDERED_TAG &&
-	    ordered != QUEUE_ORDERED_TAG_FLUSH &&
-	    ordered != QUEUE_ORDERED_TAG_FUA) {
+	    ordered != QUEUE_ORDERED_DRAIN_FUA) {
 		printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered);
 		return -EINVAL;
 	}
@@ -155,21 +152,9 @@ static inline bool start_ordered(struct request_queue *q, struct request **rqp)
 	 * For an empty barrier, there's no actual BAR request, which
 	 * in turn makes POSTFLUSH unnecessary.  Mask them off.
 	 */
-	if (!blk_rq_sectors(rq)) {
+	if (!blk_rq_sectors(rq))
 		q->ordered &= ~(QUEUE_ORDERED_DO_BAR |
 				QUEUE_ORDERED_DO_POSTFLUSH);
-		/*
-		 * Empty barrier on a write-through device w/ ordered
-		 * tag has no command to issue and without any command
-		 * to issue, ordering by tag can't be used.  Drain
-		 * instead.
-		 */
-		if ((q->ordered & QUEUE_ORDERED_BY_TAG) &&
-		    !(q->ordered & QUEUE_ORDERED_DO_PREFLUSH)) {
-			q->ordered &= ~QUEUE_ORDERED_BY_TAG;
-			q->ordered |= QUEUE_ORDERED_BY_DRAIN;
-		}
-	}
 
 	/* stash away the original request */
 	blk_dequeue_request(rq);
@@ -210,7 +195,7 @@ static inline bool start_ordered(struct request_queue *q, struct request **rqp)
 	} else
 		skip |= QUEUE_ORDSEQ_PREFLUSH;
 
-	if ((q->ordered & QUEUE_ORDERED_BY_DRAIN) && queue_in_flight(q))
+	if (queue_in_flight(q))
 		rq = NULL;
 	else
 		skip |= QUEUE_ORDSEQ_DRAIN;
@@ -257,16 +242,10 @@ bool blk_do_ordered(struct request_queue *q, struct request **rqp)
 	    rq != &q->pre_flush_rq && rq != &q->post_flush_rq)
 		return true;
 
-	if (q->ordered & QUEUE_ORDERED_BY_TAG) {
-		/* Ordered by tag.  Blocking the next barrier is enough. */
-		if (is_barrier && rq != &q->bar_rq)
-			*rqp = NULL;
-	} else {
-		/* Ordered by draining.  Wait for turn. */
-		WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q));
-		if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q))
-			*rqp = NULL;
-	}
+	/* Ordered by draining.  Wait for turn. */
+	WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q));
+	if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q))
+		*rqp = NULL;
 
 	return true;
 }
-- 
cgit v1.2.3


From 4913efe456c987057e5d36a3f0a55422a9072cae Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 3 Sep 2010 11:56:16 +0200
Subject: block: deprecate barrier and replace blk_queue_ordered() with
 blk_queue_flush()

Barrier is deemed too heavy and will soon be replaced by FLUSH/FUA
requests.  Deprecate barrier.  All REQ_HARDBARRIERs are failed with
-EOPNOTSUPP and blk_queue_ordered() is replaced with simpler
blk_queue_flush().

blk_queue_flush() takes combinations of REQ_FLUSH and FUA.  If a
device has write cache and can flush it, it should set REQ_FLUSH.  If
the device can handle FUA writes, it should also set REQ_FUA.

All blk_queue_ordered() users are converted.

* ORDERED_DRAIN is mapped to 0 which is the default value.
* ORDERED_DRAIN_FLUSH is mapped to REQ_FLUSH.
* ORDERED_DRAIN_FLUSH_FUA is mapped to REQ_FLUSH | REQ_FUA.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Boaz Harrosh <bharrosh@panasas.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Chris Wright <chrisw@sous-sol.org>
Cc: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: Geert Uytterhoeven <Geert.Uytterhoeven@sonycom.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Alasdair G Kergon <agk@redhat.com>
Cc: Pierre Ossman <drzeus@drzeus.cx>
Cc: Stefan Weinhuber <wein@de.ibm.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-barrier.c  | 29 -----------------------------
 block/blk-core.c     |  6 ++++--
 block/blk-settings.c | 20 ++++++++++++++++++++
 3 files changed, 24 insertions(+), 31 deletions(-)

(limited to 'block')

diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index c807e9ca3a68..ed0aba5463ab 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -9,35 +9,6 @@
 
 #include "blk.h"
 
-/**
- * blk_queue_ordered - does this queue support ordered writes
- * @q:        the request queue
- * @ordered:  one of QUEUE_ORDERED_*
- *
- * Description:
- *   For journalled file systems, doing ordered writes on a commit
- *   block instead of explicitly doing wait_on_buffer (which is bad
- *   for performance) can be a big win. Block drivers supporting this
- *   feature should call this function and indicate so.
- *
- **/
-int blk_queue_ordered(struct request_queue *q, unsigned ordered)
-{
-	if (ordered != QUEUE_ORDERED_NONE &&
-	    ordered != QUEUE_ORDERED_DRAIN &&
-	    ordered != QUEUE_ORDERED_DRAIN_FLUSH &&
-	    ordered != QUEUE_ORDERED_DRAIN_FUA) {
-		printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered);
-		return -EINVAL;
-	}
-
-	q->ordered = ordered;
-	q->next_ordered = ordered;
-
-	return 0;
-}
-EXPORT_SYMBOL(blk_queue_ordered);
-
 /*
  * Cache flushing for ordered writes handling
  */
diff --git a/block/blk-core.c b/block/blk-core.c
index ee1a1e7e63cc..f06354183b29 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1203,11 +1203,13 @@ static int __make_request(struct request_queue *q, struct bio *bio)
 	const unsigned int ff = bio->bi_rw & REQ_FAILFAST_MASK;
 	int rw_flags;
 
-	if ((bio->bi_rw & REQ_HARDBARRIER) &&
-	    (q->next_ordered == QUEUE_ORDERED_NONE)) {
+	/* REQ_HARDBARRIER is no more */
+	if (WARN_ONCE(bio->bi_rw & REQ_HARDBARRIER,
+		"block: HARDBARRIER is deprecated, use FLUSH/FUA instead\n")) {
 		bio_endio(bio, -EOPNOTSUPP);
 		return 0;
 	}
+
 	/*
 	 * low level driver can indicate that it wants pages above a
 	 * certain limit bounced to low memory (ie for highmem, or even
diff --git a/block/blk-settings.c b/block/blk-settings.c
index a234f4bf1d6f..9b18afcfe925 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -794,6 +794,26 @@ void blk_queue_update_dma_alignment(struct request_queue *q, int mask)
 }
 EXPORT_SYMBOL(blk_queue_update_dma_alignment);
 
+/**
+ * blk_queue_flush - configure queue's cache flush capability
+ * @q:		the request queue for the device
+ * @flush:	0, REQ_FLUSH or REQ_FLUSH | REQ_FUA
+ *
+ * Tell block layer cache flush capability of @q.  If it supports
+ * flushing, REQ_FLUSH should be set.  If it supports bypassing
+ * write cache for individual writes, REQ_FUA should be set.
+ */
+void blk_queue_flush(struct request_queue *q, unsigned int flush)
+{
+	WARN_ON_ONCE(flush & ~(REQ_FLUSH | REQ_FUA));
+
+	if (WARN_ON_ONCE(!(flush & REQ_FLUSH) && (flush & REQ_FUA)))
+		flush &= ~REQ_FUA;
+
+	q->flush_flags = flush & (REQ_FLUSH | REQ_FUA);
+}
+EXPORT_SYMBOL_GPL(blk_queue_flush);
+
 static int __init blk_settings_init(void)
 {
 	blk_max_low_pfn = max_low_pfn - 1;
-- 
cgit v1.2.3


From dd831006d5be7f74c3fe7aef82380c51c3637960 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 3 Sep 2010 11:56:16 +0200
Subject: block: misc cleanups in barrier code

Make the following cleanups in preparation of barrier/flush update.

* blk_do_ordered() declaration is moved from include/linux/blkdev.h to
  block/blk.h.

* blk_do_ordered() now returns pointer to struct request, with %NULL
  meaning "try the next request" and ERR_PTR(-EAGAIN) "try again
  later".  The third case will be dropped with further changes.

* In the initialization of proxy barrier request, data direction is
  already set by init_request_from_bio().  Drop unnecessary explicit
  REQ_WRITE setting and move init_request_from_bio() above REQ_FUA
  flag setting.

* add_request() is collapsed into __make_request().

These changes don't make any functional difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-barrier.c | 32 ++++++++++++++------------------
 block/blk-core.c    | 21 ++++-----------------
 block/blk.h         |  7 +++++--
 3 files changed, 23 insertions(+), 37 deletions(-)

(limited to 'block')

diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index ed0aba5463ab..f1be85ba2bb5 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -110,9 +110,9 @@ static void queue_flush(struct request_queue *q, unsigned which)
 	elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
 }
 
-static inline bool start_ordered(struct request_queue *q, struct request **rqp)
+static inline struct request *start_ordered(struct request_queue *q,
+					    struct request *rq)
 {
-	struct request *rq = *rqp;
 	unsigned skip = 0;
 
 	q->orderr = 0;
@@ -149,11 +149,9 @@ static inline bool start_ordered(struct request_queue *q, struct request **rqp)
 
 		/* initialize proxy request and queue it */
 		blk_rq_init(q, rq);
-		if (bio_data_dir(q->orig_bar_rq->bio) == WRITE)
-			rq->cmd_flags |= REQ_WRITE;
+		init_request_from_bio(rq, q->orig_bar_rq->bio);
 		if (q->ordered & QUEUE_ORDERED_DO_FUA)
 			rq->cmd_flags |= REQ_FUA;
-		init_request_from_bio(rq, q->orig_bar_rq->bio);
 		rq->end_io = bar_end_io;
 
 		elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
@@ -171,27 +169,26 @@ static inline bool start_ordered(struct request_queue *q, struct request **rqp)
 	else
 		skip |= QUEUE_ORDSEQ_DRAIN;
 
-	*rqp = rq;
-
 	/*
 	 * Complete skipped sequences.  If whole sequence is complete,
-	 * return false to tell elevator that this request is gone.
+	 * return %NULL to tell elevator that this request is gone.
 	 */
-	return !blk_ordered_complete_seq(q, skip, 0);
+	if (blk_ordered_complete_seq(q, skip, 0))
+		rq = NULL;
+	return rq;
 }
 
-bool blk_do_ordered(struct request_queue *q, struct request **rqp)
+struct request *blk_do_ordered(struct request_queue *q, struct request *rq)
 {
-	struct request *rq = *rqp;
 	const int is_barrier = rq->cmd_type == REQ_TYPE_FS &&
 				(rq->cmd_flags & REQ_HARDBARRIER);
 
 	if (!q->ordseq) {
 		if (!is_barrier)
-			return true;
+			return rq;
 
 		if (q->next_ordered != QUEUE_ORDERED_NONE)
-			return start_ordered(q, rqp);
+			return start_ordered(q, rq);
 		else {
 			/*
 			 * Queue ordering not supported.  Terminate
@@ -199,8 +196,7 @@ bool blk_do_ordered(struct request_queue *q, struct request **rqp)
 			 */
 			blk_dequeue_request(rq);
 			__blk_end_request_all(rq, -EOPNOTSUPP);
-			*rqp = NULL;
-			return false;
+			return NULL;
 		}
 	}
 
@@ -211,14 +207,14 @@ bool blk_do_ordered(struct request_queue *q, struct request **rqp)
 	/* Special requests are not subject to ordering rules. */
 	if (rq->cmd_type != REQ_TYPE_FS &&
 	    rq != &q->pre_flush_rq && rq != &q->post_flush_rq)
-		return true;
+		return rq;
 
 	/* Ordered by draining.  Wait for turn. */
 	WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q));
 	if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q))
-		*rqp = NULL;
+		rq = ERR_PTR(-EAGAIN);
 
-	return true;
+	return rq;
 }
 
 static void bio_end_empty_barrier(struct bio *bio, int err)
diff --git a/block/blk-core.c b/block/blk-core.c
index f06354183b29..f8d37a8e2c55 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1037,22 +1037,6 @@ void blk_insert_request(struct request_queue *q, struct request *rq,
 }
 EXPORT_SYMBOL(blk_insert_request);
 
-/*
- * add-request adds a request to the linked list.
- * queue lock is held and interrupts disabled, as we muck with the
- * request queue list.
- */
-static inline void add_request(struct request_queue *q, struct request *req)
-{
-	drive_stat_acct(req, 1);
-
-	/*
-	 * elevator indicated where it wants this request to be
-	 * inserted at elevator_merge time
-	 */
-	__elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0);
-}
-
 static void part_round_stats_single(int cpu, struct hd_struct *part,
 				    unsigned long now)
 {
@@ -1316,7 +1300,10 @@ get_rq:
 		req->cpu = blk_cpu_to_group(smp_processor_id());
 	if (queue_should_plug(q) && elv_queue_empty(q))
 		blk_plug_device(q);
-	add_request(q, req);
+
+	/* insert the request into the elevator */
+	drive_stat_acct(req, 1);
+	__elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0);
 out:
 	if (unplug || !queue_should_plug(q))
 		__generic_unplug_device(q);
diff --git a/block/blk.h b/block/blk.h
index 6e7dc87141e4..874eb4ea8093 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -51,6 +51,8 @@ static inline void blk_clear_rq_complete(struct request *rq)
  */
 #define ELV_ON_HASH(rq)		(!hlist_unhashed(&(rq)->hash))
 
+struct request *blk_do_ordered(struct request_queue *q, struct request *rq);
+
 static inline struct request *__elv_next_request(struct request_queue *q)
 {
 	struct request *rq;
@@ -58,8 +60,9 @@ static inline struct request *__elv_next_request(struct request_queue *q)
 	while (1) {
 		while (!list_empty(&q->queue_head)) {
 			rq = list_entry_rq(q->queue_head.next);
-			if (blk_do_ordered(q, &rq))
-				return rq;
+			rq = blk_do_ordered(q, rq);
+			if (rq)
+				return !IS_ERR(rq) ? rq : NULL;
 		}
 
 		if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
-- 
cgit v1.2.3


From 28e7d1845216538303bb95d679d8fd4de50e2f1a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 3 Sep 2010 11:56:16 +0200
Subject: block: drop barrier ordering by queue draining

Filesystems will take all the responsibilities for ordering requests
around commit writes and will only indicate how the commit writes
themselves should be handled by block layers.  This patch drops
barrier ordering by queue draining from block layer.  Ordering by
draining implementation was somewhat invasive to request handling.
List of notable changes follow.

* Each queue has 1 bit color which is flipped on each barrier issue.
  This is used to track whether a given request is issued before the
  current barrier or not.  REQ_ORDERED_COLOR flag and coloring
  implementation in __elv_add_request() are removed.

* Requests which shouldn't be processed yet for draining were stalled
  by returning -EAGAIN from blk_do_ordered() according to the test
  result between blk_ordered_req_seq() and blk_blk_ordered_cur_seq().
  This logic is removed.

* Draining completion logic in elv_completed_request() removed.

* All barrier sequence requests were queued to request queue and then
  trckled to lower layer according to progress and thus maintaining
  request orders during requeue was necessary.  This is replaced by
  queueing the next request in the barrier sequence only after the
  current one is complete from blk_ordered_complete_seq(), which
  removes the need for multiple proxy requests in struct request_queue
  and the request sorting logic in the ELEVATOR_INSERT_REQUEUE path of
  elv_insert().

* As barriers no longer have ordering constraints, there's no need to
  dump the whole elevator onto the dispatch queue on each barrier.
  Insert barriers at the front instead.

* If other barrier requests come to the front of the dispatch queue
  while one is already in progress, they are stored in
  q->pending_barriers and restored to dispatch queue one-by-one after
  each barrier completion from blk_ordered_complete_seq().

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-barrier.c | 220 ++++++++++++++++++++--------------------------------
 block/blk-core.c    |  11 ++-
 block/blk.h         |   2 +-
 block/elevator.c    |  79 +++----------------
 4 files changed, 105 insertions(+), 207 deletions(-)

(limited to 'block')

diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index f1be85ba2bb5..e8b2e5c091b1 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -9,6 +9,8 @@
 
 #include "blk.h"
 
+static struct request *queue_next_ordseq(struct request_queue *q);
+
 /*
  * Cache flushing for ordered writes handling
  */
@@ -19,38 +21,10 @@ unsigned blk_ordered_cur_seq(struct request_queue *q)
 	return 1 << ffz(q->ordseq);
 }
 
-unsigned blk_ordered_req_seq(struct request *rq)
-{
-	struct request_queue *q = rq->q;
-
-	BUG_ON(q->ordseq == 0);
-
-	if (rq == &q->pre_flush_rq)
-		return QUEUE_ORDSEQ_PREFLUSH;
-	if (rq == &q->bar_rq)
-		return QUEUE_ORDSEQ_BAR;
-	if (rq == &q->post_flush_rq)
-		return QUEUE_ORDSEQ_POSTFLUSH;
-
-	/*
-	 * !fs requests don't need to follow barrier ordering.  Always
-	 * put them at the front.  This fixes the following deadlock.
-	 *
-	 * http://thread.gmane.org/gmane.linux.kernel/537473
-	 */
-	if (rq->cmd_type != REQ_TYPE_FS)
-		return QUEUE_ORDSEQ_DRAIN;
-
-	if ((rq->cmd_flags & REQ_ORDERED_COLOR) ==
-	    (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR))
-		return QUEUE_ORDSEQ_DRAIN;
-	else
-		return QUEUE_ORDSEQ_DONE;
-}
-
-bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
+static struct request *blk_ordered_complete_seq(struct request_queue *q,
+						unsigned seq, int error)
 {
-	struct request *rq;
+	struct request *next_rq = NULL;
 
 	if (error && !q->orderr)
 		q->orderr = error;
@@ -58,16 +32,22 @@ bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
 	BUG_ON(q->ordseq & seq);
 	q->ordseq |= seq;
 
-	if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE)
-		return false;
-
-	/*
-	 * Okay, sequence complete.
-	 */
-	q->ordseq = 0;
-	rq = q->orig_bar_rq;
-	__blk_end_request_all(rq, q->orderr);
-	return true;
+	if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) {
+		/* not complete yet, queue the next ordered sequence */
+		next_rq = queue_next_ordseq(q);
+	} else {
+		/* complete this barrier request */
+		__blk_end_request_all(q->orig_bar_rq, q->orderr);
+		q->orig_bar_rq = NULL;
+		q->ordseq = 0;
+
+		/* dispatch the next barrier if there's one */
+		if (!list_empty(&q->pending_barriers)) {
+			next_rq = list_entry_rq(q->pending_barriers.next);
+			list_move(&next_rq->queuelist, &q->queue_head);
+		}
+	}
+	return next_rq;
 }
 
 static void pre_flush_end_io(struct request *rq, int error)
@@ -88,133 +68,105 @@ static void post_flush_end_io(struct request *rq, int error)
 	blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error);
 }
 
-static void queue_flush(struct request_queue *q, unsigned which)
+static void queue_flush(struct request_queue *q, struct request *rq,
+			rq_end_io_fn *end_io)
 {
-	struct request *rq;
-	rq_end_io_fn *end_io;
-
-	if (which == QUEUE_ORDERED_DO_PREFLUSH) {
-		rq = &q->pre_flush_rq;
-		end_io = pre_flush_end_io;
-	} else {
-		rq = &q->post_flush_rq;
-		end_io = post_flush_end_io;
-	}
-
 	blk_rq_init(q, rq);
 	rq->cmd_type = REQ_TYPE_FS;
-	rq->cmd_flags = REQ_HARDBARRIER | REQ_FLUSH;
+	rq->cmd_flags = REQ_FLUSH;
 	rq->rq_disk = q->orig_bar_rq->rq_disk;
 	rq->end_io = end_io;
 
 	elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
 }
 
-static inline struct request *start_ordered(struct request_queue *q,
-					    struct request *rq)
+static struct request *queue_next_ordseq(struct request_queue *q)
 {
-	unsigned skip = 0;
-
-	q->orderr = 0;
-	q->ordered = q->next_ordered;
-	q->ordseq |= QUEUE_ORDSEQ_STARTED;
-
-	/*
-	 * For an empty barrier, there's no actual BAR request, which
-	 * in turn makes POSTFLUSH unnecessary.  Mask them off.
-	 */
-	if (!blk_rq_sectors(rq))
-		q->ordered &= ~(QUEUE_ORDERED_DO_BAR |
-				QUEUE_ORDERED_DO_POSTFLUSH);
-
-	/* stash away the original request */
-	blk_dequeue_request(rq);
-	q->orig_bar_rq = rq;
-	rq = NULL;
-
-	/*
-	 * Queue ordered sequence.  As we stack them at the head, we
-	 * need to queue in reverse order.  Note that we rely on that
-	 * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs
-	 * request gets inbetween ordered sequence.
-	 */
-	if (q->ordered & QUEUE_ORDERED_DO_POSTFLUSH) {
-		queue_flush(q, QUEUE_ORDERED_DO_POSTFLUSH);
-		rq = &q->post_flush_rq;
-	} else
-		skip |= QUEUE_ORDSEQ_POSTFLUSH;
+	struct request *rq = &q->bar_rq;
 
-	if (q->ordered & QUEUE_ORDERED_DO_BAR) {
-		rq = &q->bar_rq;
+	switch (blk_ordered_cur_seq(q)) {
+	case QUEUE_ORDSEQ_PREFLUSH:
+		queue_flush(q, rq, pre_flush_end_io);
+		break;
 
+	case QUEUE_ORDSEQ_BAR:
 		/* initialize proxy request and queue it */
 		blk_rq_init(q, rq);
 		init_request_from_bio(rq, q->orig_bar_rq->bio);
+		rq->cmd_flags &= ~REQ_HARDBARRIER;
 		if (q->ordered & QUEUE_ORDERED_DO_FUA)
 			rq->cmd_flags |= REQ_FUA;
 		rq->end_io = bar_end_io;
 
 		elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
-	} else
-		skip |= QUEUE_ORDSEQ_BAR;
+		break;
 
-	if (q->ordered & QUEUE_ORDERED_DO_PREFLUSH) {
-		queue_flush(q, QUEUE_ORDERED_DO_PREFLUSH);
-		rq = &q->pre_flush_rq;
-	} else
-		skip |= QUEUE_ORDSEQ_PREFLUSH;
+	case QUEUE_ORDSEQ_POSTFLUSH:
+		queue_flush(q, rq, post_flush_end_io);
+		break;
 
-	if (queue_in_flight(q))
-		rq = NULL;
-	else
-		skip |= QUEUE_ORDSEQ_DRAIN;
-
-	/*
-	 * Complete skipped sequences.  If whole sequence is complete,
-	 * return %NULL to tell elevator that this request is gone.
-	 */
-	if (blk_ordered_complete_seq(q, skip, 0))
-		rq = NULL;
+	default:
+		BUG();
+	}
 	return rq;
 }
 
 struct request *blk_do_ordered(struct request_queue *q, struct request *rq)
 {
-	const int is_barrier = rq->cmd_type == REQ_TYPE_FS &&
-				(rq->cmd_flags & REQ_HARDBARRIER);
-
-	if (!q->ordseq) {
-		if (!is_barrier)
-			return rq;
-
-		if (q->next_ordered != QUEUE_ORDERED_NONE)
-			return start_ordered(q, rq);
-		else {
-			/*
-			 * Queue ordering not supported.  Terminate
-			 * with prejudice.
-			 */
-			blk_dequeue_request(rq);
-			__blk_end_request_all(rq, -EOPNOTSUPP);
-			return NULL;
-		}
+	unsigned skip = 0;
+
+	if (!(rq->cmd_flags & REQ_HARDBARRIER))
+		return rq;
+
+	if (q->ordseq) {
+		/*
+		 * Barrier is already in progress and they can't be
+		 * processed in parallel.  Queue for later processing.
+		 */
+		list_move_tail(&rq->queuelist, &q->pending_barriers);
+		return NULL;
+	}
+
+	if (unlikely(q->next_ordered == QUEUE_ORDERED_NONE)) {
+		/*
+		 * Queue ordering not supported.  Terminate
+		 * with prejudice.
+		 */
+		blk_dequeue_request(rq);
+		__blk_end_request_all(rq, -EOPNOTSUPP);
+		return NULL;
 	}
 
 	/*
-	 * Ordered sequence in progress
+	 * Start a new ordered sequence
 	 */
+	q->orderr = 0;
+	q->ordered = q->next_ordered;
+	q->ordseq |= QUEUE_ORDSEQ_STARTED;
 
-	/* Special requests are not subject to ordering rules. */
-	if (rq->cmd_type != REQ_TYPE_FS &&
-	    rq != &q->pre_flush_rq && rq != &q->post_flush_rq)
-		return rq;
+	/*
+	 * For an empty barrier, there's no actual BAR request, which
+	 * in turn makes POSTFLUSH unnecessary.  Mask them off.
+	 */
+	if (!blk_rq_sectors(rq))
+		q->ordered &= ~(QUEUE_ORDERED_DO_BAR |
+				QUEUE_ORDERED_DO_POSTFLUSH);
 
-	/* Ordered by draining.  Wait for turn. */
-	WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q));
-	if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q))
-		rq = ERR_PTR(-EAGAIN);
+	/* stash away the original request */
+	blk_dequeue_request(rq);
+	q->orig_bar_rq = rq;
 
-	return rq;
+	if (!(q->ordered & QUEUE_ORDERED_DO_PREFLUSH))
+		skip |= QUEUE_ORDSEQ_PREFLUSH;
+
+	if (!(q->ordered & QUEUE_ORDERED_DO_BAR))
+		skip |= QUEUE_ORDSEQ_BAR;
+
+	if (!(q->ordered & QUEUE_ORDERED_DO_POSTFLUSH))
+		skip |= QUEUE_ORDSEQ_POSTFLUSH;
+
+	/* complete skipped sequences and return the first sequence */
+	return blk_ordered_complete_seq(q, skip, 0);
 }
 
 static void bio_end_empty_barrier(struct bio *bio, int err)
diff --git a/block/blk-core.c b/block/blk-core.c
index f8d37a8e2c55..d316662682c8 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -520,6 +520,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 	init_timer(&q->unplug_timer);
 	setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
 	INIT_LIST_HEAD(&q->timeout_list);
+	INIT_LIST_HEAD(&q->pending_barriers);
 	INIT_WORK(&q->unplug_work, blk_unplug_work);
 
 	kobject_init(&q->kobj, &blk_queue_ktype);
@@ -1185,6 +1186,7 @@ static int __make_request(struct request_queue *q, struct bio *bio)
 	const bool sync = (bio->bi_rw & REQ_SYNC);
 	const bool unplug = (bio->bi_rw & REQ_UNPLUG);
 	const unsigned int ff = bio->bi_rw & REQ_FAILFAST_MASK;
+	int where = ELEVATOR_INSERT_SORT;
 	int rw_flags;
 
 	/* REQ_HARDBARRIER is no more */
@@ -1203,7 +1205,12 @@ static int __make_request(struct request_queue *q, struct bio *bio)
 
 	spin_lock_irq(q->queue_lock);
 
-	if (unlikely((bio->bi_rw & REQ_HARDBARRIER)) || elv_queue_empty(q))
+	if (bio->bi_rw & REQ_HARDBARRIER) {
+		where = ELEVATOR_INSERT_FRONT;
+		goto get_rq;
+	}
+
+	if (elv_queue_empty(q))
 		goto get_rq;
 
 	el_ret = elv_merge(q, &req, bio);
@@ -1303,7 +1310,7 @@ get_rq:
 
 	/* insert the request into the elevator */
 	drive_stat_acct(req, 1);
-	__elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0);
+	__elv_add_request(q, req, where, 0);
 out:
 	if (unplug || !queue_should_plug(q))
 		__generic_unplug_device(q);
diff --git a/block/blk.h b/block/blk.h
index 874eb4ea8093..08081e4b294e 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -62,7 +62,7 @@ static inline struct request *__elv_next_request(struct request_queue *q)
 			rq = list_entry_rq(q->queue_head.next);
 			rq = blk_do_ordered(q, rq);
 			if (rq)
-				return !IS_ERR(rq) ? rq : NULL;
+				return rq;
 		}
 
 		if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
diff --git a/block/elevator.c b/block/elevator.c
index ec585c9554d3..241c69c45c5f 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -617,8 +617,6 @@ void elv_quiesce_end(struct request_queue *q)
 
 void elv_insert(struct request_queue *q, struct request *rq, int where)
 {
-	struct list_head *pos;
-	unsigned ordseq;
 	int unplug_it = 1;
 
 	trace_block_rq_insert(q, rq);
@@ -626,9 +624,16 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
 	rq->q = q;
 
 	switch (where) {
+	case ELEVATOR_INSERT_REQUEUE:
+		/*
+		 * Most requeues happen because of a busy condition,
+		 * don't force unplug of the queue for that case.
+		 * Clear unplug_it and fall through.
+		 */
+		unplug_it = 0;
+
 	case ELEVATOR_INSERT_FRONT:
 		rq->cmd_flags |= REQ_SOFTBARRIER;
-
 		list_add(&rq->queuelist, &q->queue_head);
 		break;
 
@@ -668,36 +673,6 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
 		q->elevator->ops->elevator_add_req_fn(q, rq);
 		break;
 
-	case ELEVATOR_INSERT_REQUEUE:
-		/*
-		 * If ordered flush isn't in progress, we do front
-		 * insertion; otherwise, requests should be requeued
-		 * in ordseq order.
-		 */
-		rq->cmd_flags |= REQ_SOFTBARRIER;
-
-		/*
-		 * Most requeues happen because of a busy condition,
-		 * don't force unplug of the queue for that case.
-		 */
-		unplug_it = 0;
-
-		if (q->ordseq == 0) {
-			list_add(&rq->queuelist, &q->queue_head);
-			break;
-		}
-
-		ordseq = blk_ordered_req_seq(rq);
-
-		list_for_each(pos, &q->queue_head) {
-			struct request *pos_rq = list_entry_rq(pos);
-			if (ordseq <= blk_ordered_req_seq(pos_rq))
-				break;
-		}
-
-		list_add_tail(&rq->queuelist, pos);
-		break;
-
 	default:
 		printk(KERN_ERR "%s: bad insertion point %d\n",
 		       __func__, where);
@@ -716,26 +691,8 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
 void __elv_add_request(struct request_queue *q, struct request *rq, int where,
 		       int plug)
 {
-	if (q->ordcolor)
-		rq->cmd_flags |= REQ_ORDERED_COLOR;
-
 	if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
-		/*
-		 * toggle ordered color
-		 */
-		if (rq->cmd_flags & REQ_HARDBARRIER)
-			q->ordcolor ^= 1;
-
-		/*
-		 * barriers implicitly indicate back insertion
-		 */
-		if (where == ELEVATOR_INSERT_SORT)
-			where = ELEVATOR_INSERT_BACK;
-
-		/*
-		 * this request is scheduling boundary, update
-		 * end_sector
-		 */
+		/* barriers are scheduling boundary, update end_sector */
 		if (rq->cmd_type == REQ_TYPE_FS ||
 		    (rq->cmd_flags & REQ_DISCARD)) {
 			q->end_sector = rq_end_sector(rq);
@@ -855,24 +812,6 @@ void elv_completed_request(struct request_queue *q, struct request *rq)
 		    e->ops->elevator_completed_req_fn)
 			e->ops->elevator_completed_req_fn(q, rq);
 	}
-
-	/*
-	 * Check if the queue is waiting for fs requests to be
-	 * drained for flush sequence.
-	 */
-	if (unlikely(q->ordseq)) {
-		struct request *next = NULL;
-
-		if (!list_empty(&q->queue_head))
-			next = list_entry_rq(q->queue_head.next);
-
-		if (!queue_in_flight(q) &&
-		    blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN &&
-		    (!next || blk_ordered_req_seq(next) > QUEUE_ORDSEQ_DRAIN)) {
-			blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0);
-			__blk_run_queue(q);
-		}
-	}
 }
 
 #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr)
-- 
cgit v1.2.3


From 8839a0e055d9abd6c011d533373a8dd266cad011 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 3 Sep 2010 11:56:16 +0200
Subject: block: rename blk-barrier.c to blk-flush.c

Without ordering requirements, barrier and ordering are minomers.
Rename block/blk-barrier.c to block/blk-flush.c.  Rename of symbols
will follow.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/Makefile      |   2 +-
 block/blk-barrier.c | 248 ----------------------------------------------------
 block/blk-flush.c   | 248 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 249 insertions(+), 249 deletions(-)
 delete mode 100644 block/blk-barrier.c
 create mode 100644 block/blk-flush.c

(limited to 'block')

diff --git a/block/Makefile b/block/Makefile
index 0bb499a739cd..f627e4b1a9da 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -3,7 +3,7 @@
 #
 
 obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
-			blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \
+			blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
 			blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
 			blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o
 
diff --git a/block/blk-barrier.c b/block/blk-barrier.c
deleted file mode 100644
index e8b2e5c091b1..000000000000
--- a/block/blk-barrier.c
+++ /dev/null
@@ -1,248 +0,0 @@
-/*
- * Functions related to barrier IO handling
- */
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/bio.h>
-#include <linux/blkdev.h>
-#include <linux/gfp.h>
-
-#include "blk.h"
-
-static struct request *queue_next_ordseq(struct request_queue *q);
-
-/*
- * Cache flushing for ordered writes handling
- */
-unsigned blk_ordered_cur_seq(struct request_queue *q)
-{
-	if (!q->ordseq)
-		return 0;
-	return 1 << ffz(q->ordseq);
-}
-
-static struct request *blk_ordered_complete_seq(struct request_queue *q,
-						unsigned seq, int error)
-{
-	struct request *next_rq = NULL;
-
-	if (error && !q->orderr)
-		q->orderr = error;
-
-	BUG_ON(q->ordseq & seq);
-	q->ordseq |= seq;
-
-	if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) {
-		/* not complete yet, queue the next ordered sequence */
-		next_rq = queue_next_ordseq(q);
-	} else {
-		/* complete this barrier request */
-		__blk_end_request_all(q->orig_bar_rq, q->orderr);
-		q->orig_bar_rq = NULL;
-		q->ordseq = 0;
-
-		/* dispatch the next barrier if there's one */
-		if (!list_empty(&q->pending_barriers)) {
-			next_rq = list_entry_rq(q->pending_barriers.next);
-			list_move(&next_rq->queuelist, &q->queue_head);
-		}
-	}
-	return next_rq;
-}
-
-static void pre_flush_end_io(struct request *rq, int error)
-{
-	elv_completed_request(rq->q, rq);
-	blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error);
-}
-
-static void bar_end_io(struct request *rq, int error)
-{
-	elv_completed_request(rq->q, rq);
-	blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error);
-}
-
-static void post_flush_end_io(struct request *rq, int error)
-{
-	elv_completed_request(rq->q, rq);
-	blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error);
-}
-
-static void queue_flush(struct request_queue *q, struct request *rq,
-			rq_end_io_fn *end_io)
-{
-	blk_rq_init(q, rq);
-	rq->cmd_type = REQ_TYPE_FS;
-	rq->cmd_flags = REQ_FLUSH;
-	rq->rq_disk = q->orig_bar_rq->rq_disk;
-	rq->end_io = end_io;
-
-	elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
-}
-
-static struct request *queue_next_ordseq(struct request_queue *q)
-{
-	struct request *rq = &q->bar_rq;
-
-	switch (blk_ordered_cur_seq(q)) {
-	case QUEUE_ORDSEQ_PREFLUSH:
-		queue_flush(q, rq, pre_flush_end_io);
-		break;
-
-	case QUEUE_ORDSEQ_BAR:
-		/* initialize proxy request and queue it */
-		blk_rq_init(q, rq);
-		init_request_from_bio(rq, q->orig_bar_rq->bio);
-		rq->cmd_flags &= ~REQ_HARDBARRIER;
-		if (q->ordered & QUEUE_ORDERED_DO_FUA)
-			rq->cmd_flags |= REQ_FUA;
-		rq->end_io = bar_end_io;
-
-		elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
-		break;
-
-	case QUEUE_ORDSEQ_POSTFLUSH:
-		queue_flush(q, rq, post_flush_end_io);
-		break;
-
-	default:
-		BUG();
-	}
-	return rq;
-}
-
-struct request *blk_do_ordered(struct request_queue *q, struct request *rq)
-{
-	unsigned skip = 0;
-
-	if (!(rq->cmd_flags & REQ_HARDBARRIER))
-		return rq;
-
-	if (q->ordseq) {
-		/*
-		 * Barrier is already in progress and they can't be
-		 * processed in parallel.  Queue for later processing.
-		 */
-		list_move_tail(&rq->queuelist, &q->pending_barriers);
-		return NULL;
-	}
-
-	if (unlikely(q->next_ordered == QUEUE_ORDERED_NONE)) {
-		/*
-		 * Queue ordering not supported.  Terminate
-		 * with prejudice.
-		 */
-		blk_dequeue_request(rq);
-		__blk_end_request_all(rq, -EOPNOTSUPP);
-		return NULL;
-	}
-
-	/*
-	 * Start a new ordered sequence
-	 */
-	q->orderr = 0;
-	q->ordered = q->next_ordered;
-	q->ordseq |= QUEUE_ORDSEQ_STARTED;
-
-	/*
-	 * For an empty barrier, there's no actual BAR request, which
-	 * in turn makes POSTFLUSH unnecessary.  Mask them off.
-	 */
-	if (!blk_rq_sectors(rq))
-		q->ordered &= ~(QUEUE_ORDERED_DO_BAR |
-				QUEUE_ORDERED_DO_POSTFLUSH);
-
-	/* stash away the original request */
-	blk_dequeue_request(rq);
-	q->orig_bar_rq = rq;
-
-	if (!(q->ordered & QUEUE_ORDERED_DO_PREFLUSH))
-		skip |= QUEUE_ORDSEQ_PREFLUSH;
-
-	if (!(q->ordered & QUEUE_ORDERED_DO_BAR))
-		skip |= QUEUE_ORDSEQ_BAR;
-
-	if (!(q->ordered & QUEUE_ORDERED_DO_POSTFLUSH))
-		skip |= QUEUE_ORDSEQ_POSTFLUSH;
-
-	/* complete skipped sequences and return the first sequence */
-	return blk_ordered_complete_seq(q, skip, 0);
-}
-
-static void bio_end_empty_barrier(struct bio *bio, int err)
-{
-	if (err) {
-		if (err == -EOPNOTSUPP)
-			set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
-		clear_bit(BIO_UPTODATE, &bio->bi_flags);
-	}
-	if (bio->bi_private)
-		complete(bio->bi_private);
-	bio_put(bio);
-}
-
-/**
- * blkdev_issue_flush - queue a flush
- * @bdev:	blockdev to issue flush for
- * @gfp_mask:	memory allocation flags (for bio_alloc)
- * @error_sector:	error sector
- * @flags:	BLKDEV_IFL_* flags to control behaviour
- *
- * Description:
- *    Issue a flush for the block device in question. Caller can supply
- *    room for storing the error offset in case of a flush error, if they
- *    wish to. If WAIT flag is not passed then caller may check only what
- *    request was pushed in some internal queue for later handling.
- */
-int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
-		sector_t *error_sector, unsigned long flags)
-{
-	DECLARE_COMPLETION_ONSTACK(wait);
-	struct request_queue *q;
-	struct bio *bio;
-	int ret = 0;
-
-	if (bdev->bd_disk == NULL)
-		return -ENXIO;
-
-	q = bdev_get_queue(bdev);
-	if (!q)
-		return -ENXIO;
-
-	/*
-	 * some block devices may not have their queue correctly set up here
-	 * (e.g. loop device without a backing file) and so issuing a flush
-	 * here will panic. Ensure there is a request function before issuing
-	 * the barrier.
-	 */
-	if (!q->make_request_fn)
-		return -ENXIO;
-
-	bio = bio_alloc(gfp_mask, 0);
-	bio->bi_end_io = bio_end_empty_barrier;
-	bio->bi_bdev = bdev;
-	if (test_bit(BLKDEV_WAIT, &flags))
-		bio->bi_private = &wait;
-
-	bio_get(bio);
-	submit_bio(WRITE_BARRIER, bio);
-	if (test_bit(BLKDEV_WAIT, &flags)) {
-		wait_for_completion(&wait);
-		/*
-		 * The driver must store the error location in ->bi_sector, if
-		 * it supports it. For non-stacked drivers, this should be
-		 * copied from blk_rq_pos(rq).
-		 */
-		if (error_sector)
-			*error_sector = bio->bi_sector;
-	}
-
-	if (bio_flagged(bio, BIO_EOPNOTSUPP))
-		ret = -EOPNOTSUPP;
-	else if (!bio_flagged(bio, BIO_UPTODATE))
-		ret = -EIO;
-
-	bio_put(bio);
-	return ret;
-}
-EXPORT_SYMBOL(blkdev_issue_flush);
diff --git a/block/blk-flush.c b/block/blk-flush.c
new file mode 100644
index 000000000000..e8b2e5c091b1
--- /dev/null
+++ b/block/blk-flush.c
@@ -0,0 +1,248 @@
+/*
+ * Functions related to barrier IO handling
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/gfp.h>
+
+#include "blk.h"
+
+static struct request *queue_next_ordseq(struct request_queue *q);
+
+/*
+ * Cache flushing for ordered writes handling
+ */
+unsigned blk_ordered_cur_seq(struct request_queue *q)
+{
+	if (!q->ordseq)
+		return 0;
+	return 1 << ffz(q->ordseq);
+}
+
+static struct request *blk_ordered_complete_seq(struct request_queue *q,
+						unsigned seq, int error)
+{
+	struct request *next_rq = NULL;
+
+	if (error && !q->orderr)
+		q->orderr = error;
+
+	BUG_ON(q->ordseq & seq);
+	q->ordseq |= seq;
+
+	if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) {
+		/* not complete yet, queue the next ordered sequence */
+		next_rq = queue_next_ordseq(q);
+	} else {
+		/* complete this barrier request */
+		__blk_end_request_all(q->orig_bar_rq, q->orderr);
+		q->orig_bar_rq = NULL;
+		q->ordseq = 0;
+
+		/* dispatch the next barrier if there's one */
+		if (!list_empty(&q->pending_barriers)) {
+			next_rq = list_entry_rq(q->pending_barriers.next);
+			list_move(&next_rq->queuelist, &q->queue_head);
+		}
+	}
+	return next_rq;
+}
+
+static void pre_flush_end_io(struct request *rq, int error)
+{
+	elv_completed_request(rq->q, rq);
+	blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error);
+}
+
+static void bar_end_io(struct request *rq, int error)
+{
+	elv_completed_request(rq->q, rq);
+	blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error);
+}
+
+static void post_flush_end_io(struct request *rq, int error)
+{
+	elv_completed_request(rq->q, rq);
+	blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error);
+}
+
+static void queue_flush(struct request_queue *q, struct request *rq,
+			rq_end_io_fn *end_io)
+{
+	blk_rq_init(q, rq);
+	rq->cmd_type = REQ_TYPE_FS;
+	rq->cmd_flags = REQ_FLUSH;
+	rq->rq_disk = q->orig_bar_rq->rq_disk;
+	rq->end_io = end_io;
+
+	elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
+}
+
+static struct request *queue_next_ordseq(struct request_queue *q)
+{
+	struct request *rq = &q->bar_rq;
+
+	switch (blk_ordered_cur_seq(q)) {
+	case QUEUE_ORDSEQ_PREFLUSH:
+		queue_flush(q, rq, pre_flush_end_io);
+		break;
+
+	case QUEUE_ORDSEQ_BAR:
+		/* initialize proxy request and queue it */
+		blk_rq_init(q, rq);
+		init_request_from_bio(rq, q->orig_bar_rq->bio);
+		rq->cmd_flags &= ~REQ_HARDBARRIER;
+		if (q->ordered & QUEUE_ORDERED_DO_FUA)
+			rq->cmd_flags |= REQ_FUA;
+		rq->end_io = bar_end_io;
+
+		elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
+		break;
+
+	case QUEUE_ORDSEQ_POSTFLUSH:
+		queue_flush(q, rq, post_flush_end_io);
+		break;
+
+	default:
+		BUG();
+	}
+	return rq;
+}
+
+struct request *blk_do_ordered(struct request_queue *q, struct request *rq)
+{
+	unsigned skip = 0;
+
+	if (!(rq->cmd_flags & REQ_HARDBARRIER))
+		return rq;
+
+	if (q->ordseq) {
+		/*
+		 * Barrier is already in progress and they can't be
+		 * processed in parallel.  Queue for later processing.
+		 */
+		list_move_tail(&rq->queuelist, &q->pending_barriers);
+		return NULL;
+	}
+
+	if (unlikely(q->next_ordered == QUEUE_ORDERED_NONE)) {
+		/*
+		 * Queue ordering not supported.  Terminate
+		 * with prejudice.
+		 */
+		blk_dequeue_request(rq);
+		__blk_end_request_all(rq, -EOPNOTSUPP);
+		return NULL;
+	}
+
+	/*
+	 * Start a new ordered sequence
+	 */
+	q->orderr = 0;
+	q->ordered = q->next_ordered;
+	q->ordseq |= QUEUE_ORDSEQ_STARTED;
+
+	/*
+	 * For an empty barrier, there's no actual BAR request, which
+	 * in turn makes POSTFLUSH unnecessary.  Mask them off.
+	 */
+	if (!blk_rq_sectors(rq))
+		q->ordered &= ~(QUEUE_ORDERED_DO_BAR |
+				QUEUE_ORDERED_DO_POSTFLUSH);
+
+	/* stash away the original request */
+	blk_dequeue_request(rq);
+	q->orig_bar_rq = rq;
+
+	if (!(q->ordered & QUEUE_ORDERED_DO_PREFLUSH))
+		skip |= QUEUE_ORDSEQ_PREFLUSH;
+
+	if (!(q->ordered & QUEUE_ORDERED_DO_BAR))
+		skip |= QUEUE_ORDSEQ_BAR;
+
+	if (!(q->ordered & QUEUE_ORDERED_DO_POSTFLUSH))
+		skip |= QUEUE_ORDSEQ_POSTFLUSH;
+
+	/* complete skipped sequences and return the first sequence */
+	return blk_ordered_complete_seq(q, skip, 0);
+}
+
+static void bio_end_empty_barrier(struct bio *bio, int err)
+{
+	if (err) {
+		if (err == -EOPNOTSUPP)
+			set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
+		clear_bit(BIO_UPTODATE, &bio->bi_flags);
+	}
+	if (bio->bi_private)
+		complete(bio->bi_private);
+	bio_put(bio);
+}
+
+/**
+ * blkdev_issue_flush - queue a flush
+ * @bdev:	blockdev to issue flush for
+ * @gfp_mask:	memory allocation flags (for bio_alloc)
+ * @error_sector:	error sector
+ * @flags:	BLKDEV_IFL_* flags to control behaviour
+ *
+ * Description:
+ *    Issue a flush for the block device in question. Caller can supply
+ *    room for storing the error offset in case of a flush error, if they
+ *    wish to. If WAIT flag is not passed then caller may check only what
+ *    request was pushed in some internal queue for later handling.
+ */
+int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
+		sector_t *error_sector, unsigned long flags)
+{
+	DECLARE_COMPLETION_ONSTACK(wait);
+	struct request_queue *q;
+	struct bio *bio;
+	int ret = 0;
+
+	if (bdev->bd_disk == NULL)
+		return -ENXIO;
+
+	q = bdev_get_queue(bdev);
+	if (!q)
+		return -ENXIO;
+
+	/*
+	 * some block devices may not have their queue correctly set up here
+	 * (e.g. loop device without a backing file) and so issuing a flush
+	 * here will panic. Ensure there is a request function before issuing
+	 * the barrier.
+	 */
+	if (!q->make_request_fn)
+		return -ENXIO;
+
+	bio = bio_alloc(gfp_mask, 0);
+	bio->bi_end_io = bio_end_empty_barrier;
+	bio->bi_bdev = bdev;
+	if (test_bit(BLKDEV_WAIT, &flags))
+		bio->bi_private = &wait;
+
+	bio_get(bio);
+	submit_bio(WRITE_BARRIER, bio);
+	if (test_bit(BLKDEV_WAIT, &flags)) {
+		wait_for_completion(&wait);
+		/*
+		 * The driver must store the error location in ->bi_sector, if
+		 * it supports it. For non-stacked drivers, this should be
+		 * copied from blk_rq_pos(rq).
+		 */
+		if (error_sector)
+			*error_sector = bio->bi_sector;
+	}
+
+	if (bio_flagged(bio, BIO_EOPNOTSUPP))
+		ret = -EOPNOTSUPP;
+	else if (!bio_flagged(bio, BIO_UPTODATE))
+		ret = -EIO;
+
+	bio_put(bio);
+	return ret;
+}
+EXPORT_SYMBOL(blkdev_issue_flush);
-- 
cgit v1.2.3


From dd4c133f387c48f526022860ad70354637a80f4c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 3 Sep 2010 11:56:16 +0200
Subject: block: rename barrier/ordered to flush

With ordering requirements dropped, barrier and ordered are misnomers.
Now all block layer does is sequencing FLUSH and FUA.  Rename them to
flush.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-core.c  | 21 ++++++------
 block/blk-flush.c | 98 +++++++++++++++++++++++++++----------------------------
 block/blk.h       |  4 +--
 3 files changed, 60 insertions(+), 63 deletions(-)

(limited to 'block')

diff --git a/block/blk-core.c b/block/blk-core.c
index d316662682c8..8870ae40179d 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -136,7 +136,7 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
 {
 	struct request_queue *q = rq->q;
 
-	if (&q->bar_rq != rq) {
+	if (&q->flush_rq != rq) {
 		if (error)
 			clear_bit(BIO_UPTODATE, &bio->bi_flags);
 		else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
@@ -160,13 +160,12 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
 		if (bio->bi_size == 0)
 			bio_endio(bio, error);
 	} else {
-
 		/*
-		 * Okay, this is the barrier request in progress, just
-		 * record the error;
+		 * Okay, this is the sequenced flush request in
+		 * progress, just record the error;
 		 */
-		if (error && !q->orderr)
-			q->orderr = error;
+		if (error && !q->flush_err)
+			q->flush_err = error;
 	}
 }
 
@@ -520,7 +519,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 	init_timer(&q->unplug_timer);
 	setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
 	INIT_LIST_HEAD(&q->timeout_list);
-	INIT_LIST_HEAD(&q->pending_barriers);
+	INIT_LIST_HEAD(&q->pending_flushes);
 	INIT_WORK(&q->unplug_work, blk_unplug_work);
 
 	kobject_init(&q->kobj, &blk_queue_ktype);
@@ -1764,11 +1763,11 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes)
 static void blk_account_io_done(struct request *req)
 {
 	/*
-	 * Account IO completion.  bar_rq isn't accounted as a normal
-	 * IO on queueing nor completion.  Accounting the containing
-	 * request is enough.
+	 * Account IO completion.  flush_rq isn't accounted as a
+	 * normal IO on queueing nor completion.  Accounting the
+	 * containing request is enough.
 	 */
-	if (blk_do_io_stat(req) && req != &req->q->bar_rq) {
+	if (blk_do_io_stat(req) && req != &req->q->flush_rq) {
 		unsigned long duration = jiffies - req->start_time;
 		const int rw = rq_data_dir(req);
 		struct hd_struct *part;
diff --git a/block/blk-flush.c b/block/blk-flush.c
index e8b2e5c091b1..dd873225da97 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -9,41 +9,38 @@
 
 #include "blk.h"
 
-static struct request *queue_next_ordseq(struct request_queue *q);
+static struct request *queue_next_fseq(struct request_queue *q);
 
-/*
- * Cache flushing for ordered writes handling
- */
-unsigned blk_ordered_cur_seq(struct request_queue *q)
+unsigned blk_flush_cur_seq(struct request_queue *q)
 {
-	if (!q->ordseq)
+	if (!q->flush_seq)
 		return 0;
-	return 1 << ffz(q->ordseq);
+	return 1 << ffz(q->flush_seq);
 }
 
-static struct request *blk_ordered_complete_seq(struct request_queue *q,
-						unsigned seq, int error)
+static struct request *blk_flush_complete_seq(struct request_queue *q,
+					      unsigned seq, int error)
 {
 	struct request *next_rq = NULL;
 
-	if (error && !q->orderr)
-		q->orderr = error;
+	if (error && !q->flush_err)
+		q->flush_err = error;
 
-	BUG_ON(q->ordseq & seq);
-	q->ordseq |= seq;
+	BUG_ON(q->flush_seq & seq);
+	q->flush_seq |= seq;
 
-	if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) {
-		/* not complete yet, queue the next ordered sequence */
-		next_rq = queue_next_ordseq(q);
+	if (blk_flush_cur_seq(q) != QUEUE_FSEQ_DONE) {
+		/* not complete yet, queue the next flush sequence */
+		next_rq = queue_next_fseq(q);
 	} else {
-		/* complete this barrier request */
-		__blk_end_request_all(q->orig_bar_rq, q->orderr);
-		q->orig_bar_rq = NULL;
-		q->ordseq = 0;
-
-		/* dispatch the next barrier if there's one */
-		if (!list_empty(&q->pending_barriers)) {
-			next_rq = list_entry_rq(q->pending_barriers.next);
+		/* complete this flush request */
+		__blk_end_request_all(q->orig_flush_rq, q->flush_err);
+		q->orig_flush_rq = NULL;
+		q->flush_seq = 0;
+
+		/* dispatch the next flush if there's one */
+		if (!list_empty(&q->pending_flushes)) {
+			next_rq = list_entry_rq(q->pending_flushes.next);
 			list_move(&next_rq->queuelist, &q->queue_head);
 		}
 	}
@@ -53,19 +50,19 @@ static struct request *blk_ordered_complete_seq(struct request_queue *q,
 static void pre_flush_end_io(struct request *rq, int error)
 {
 	elv_completed_request(rq->q, rq);
-	blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error);
+	blk_flush_complete_seq(rq->q, QUEUE_FSEQ_PREFLUSH, error);
 }
 
-static void bar_end_io(struct request *rq, int error)
+static void flush_data_end_io(struct request *rq, int error)
 {
 	elv_completed_request(rq->q, rq);
-	blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error);
+	blk_flush_complete_seq(rq->q, QUEUE_FSEQ_DATA, error);
 }
 
 static void post_flush_end_io(struct request *rq, int error)
 {
 	elv_completed_request(rq->q, rq);
-	blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error);
+	blk_flush_complete_seq(rq->q, QUEUE_FSEQ_POSTFLUSH, error);
 }
 
 static void queue_flush(struct request_queue *q, struct request *rq,
@@ -74,34 +71,34 @@ static void queue_flush(struct request_queue *q, struct request *rq,
 	blk_rq_init(q, rq);
 	rq->cmd_type = REQ_TYPE_FS;
 	rq->cmd_flags = REQ_FLUSH;
-	rq->rq_disk = q->orig_bar_rq->rq_disk;
+	rq->rq_disk = q->orig_flush_rq->rq_disk;
 	rq->end_io = end_io;
 
 	elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
 }
 
-static struct request *queue_next_ordseq(struct request_queue *q)
+static struct request *queue_next_fseq(struct request_queue *q)
 {
-	struct request *rq = &q->bar_rq;
+	struct request *rq = &q->flush_rq;
 
-	switch (blk_ordered_cur_seq(q)) {
-	case QUEUE_ORDSEQ_PREFLUSH:
+	switch (blk_flush_cur_seq(q)) {
+	case QUEUE_FSEQ_PREFLUSH:
 		queue_flush(q, rq, pre_flush_end_io);
 		break;
 
-	case QUEUE_ORDSEQ_BAR:
+	case QUEUE_FSEQ_DATA:
 		/* initialize proxy request and queue it */
 		blk_rq_init(q, rq);
-		init_request_from_bio(rq, q->orig_bar_rq->bio);
+		init_request_from_bio(rq, q->orig_flush_rq->bio);
 		rq->cmd_flags &= ~REQ_HARDBARRIER;
 		if (q->ordered & QUEUE_ORDERED_DO_FUA)
 			rq->cmd_flags |= REQ_FUA;
-		rq->end_io = bar_end_io;
+		rq->end_io = flush_data_end_io;
 
 		elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
 		break;
 
-	case QUEUE_ORDSEQ_POSTFLUSH:
+	case QUEUE_FSEQ_POSTFLUSH:
 		queue_flush(q, rq, post_flush_end_io);
 		break;
 
@@ -111,19 +108,20 @@ static struct request *queue_next_ordseq(struct request_queue *q)
 	return rq;
 }
 
-struct request *blk_do_ordered(struct request_queue *q, struct request *rq)
+struct request *blk_do_flush(struct request_queue *q, struct request *rq)
 {
 	unsigned skip = 0;
 
 	if (!(rq->cmd_flags & REQ_HARDBARRIER))
 		return rq;
 
-	if (q->ordseq) {
+	if (q->flush_seq) {
 		/*
-		 * Barrier is already in progress and they can't be
-		 * processed in parallel.  Queue for later processing.
+		 * Sequenced flush is already in progress and they
+		 * can't be processed in parallel.  Queue for later
+		 * processing.
 		 */
-		list_move_tail(&rq->queuelist, &q->pending_barriers);
+		list_move_tail(&rq->queuelist, &q->pending_flushes);
 		return NULL;
 	}
 
@@ -138,11 +136,11 @@ struct request *blk_do_ordered(struct request_queue *q, struct request *rq)
 	}
 
 	/*
-	 * Start a new ordered sequence
+	 * Start a new flush sequence
 	 */
-	q->orderr = 0;
+	q->flush_err = 0;
 	q->ordered = q->next_ordered;
-	q->ordseq |= QUEUE_ORDSEQ_STARTED;
+	q->flush_seq |= QUEUE_FSEQ_STARTED;
 
 	/*
 	 * For an empty barrier, there's no actual BAR request, which
@@ -154,19 +152,19 @@ struct request *blk_do_ordered(struct request_queue *q, struct request *rq)
 
 	/* stash away the original request */
 	blk_dequeue_request(rq);
-	q->orig_bar_rq = rq;
+	q->orig_flush_rq = rq;
 
 	if (!(q->ordered & QUEUE_ORDERED_DO_PREFLUSH))
-		skip |= QUEUE_ORDSEQ_PREFLUSH;
+		skip |= QUEUE_FSEQ_PREFLUSH;
 
 	if (!(q->ordered & QUEUE_ORDERED_DO_BAR))
-		skip |= QUEUE_ORDSEQ_BAR;
+		skip |= QUEUE_FSEQ_DATA;
 
 	if (!(q->ordered & QUEUE_ORDERED_DO_POSTFLUSH))
-		skip |= QUEUE_ORDSEQ_POSTFLUSH;
+		skip |= QUEUE_FSEQ_POSTFLUSH;
 
 	/* complete skipped sequences and return the first sequence */
-	return blk_ordered_complete_seq(q, skip, 0);
+	return blk_flush_complete_seq(q, skip, 0);
 }
 
 static void bio_end_empty_barrier(struct bio *bio, int err)
diff --git a/block/blk.h b/block/blk.h
index 08081e4b294e..24b92bd78f37 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -51,7 +51,7 @@ static inline void blk_clear_rq_complete(struct request *rq)
  */
 #define ELV_ON_HASH(rq)		(!hlist_unhashed(&(rq)->hash))
 
-struct request *blk_do_ordered(struct request_queue *q, struct request *rq);
+struct request *blk_do_flush(struct request_queue *q, struct request *rq);
 
 static inline struct request *__elv_next_request(struct request_queue *q)
 {
@@ -60,7 +60,7 @@ static inline struct request *__elv_next_request(struct request_queue *q)
 	while (1) {
 		while (!list_empty(&q->queue_head)) {
 			rq = list_entry_rq(q->queue_head.next);
-			rq = blk_do_ordered(q, rq);
+			rq = blk_do_flush(q, rq);
 			if (rq)
 				return rq;
 		}
-- 
cgit v1.2.3


From 4fed947cb311e5aa51781d316cefca836352f6ce Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 3 Sep 2010 11:56:17 +0200
Subject: block: implement REQ_FLUSH/FUA based interface for FLUSH/FUA requests

Now that the backend conversion is complete, export sequenced
FLUSH/FUA capability through REQ_FLUSH/FUA flags.  REQ_FLUSH means the
device cache should be flushed before executing the request.  REQ_FUA
means that the data in the request should be on non-volatile media on
completion.

Block layer will choose the correct way of implementing the semantics
and execute it.  The request may be passed to the device directly if
the device can handle it; otherwise, it will be sequenced using one or
more proxy requests.  Devices will never see REQ_FLUSH and/or FUA
which it doesn't support.

Also, unlike the original REQ_HARDBARRIER, REQ_FLUSH/FUA requests are
never failed with -EOPNOTSUPP.  If the underlying device doesn't
support FLUSH/FUA, the block layer simply make those noop.  IOW, it no
longer distinguishes between writeback cache which doesn't support
cache flush and writethrough/no cache.  Devices which have WB cache
w/o flush are very difficult to come by these days and there's nothing
much we can do anyway, so it doesn't make sense to require everyone to
implement -EOPNOTSUPP handling.  This will simplify filesystems and
block drivers as they can drop -EOPNOTSUPP retry logic for barriers.

* QUEUE_ORDERED_* are removed and QUEUE_FSEQ_* are moved into
  blk-flush.c.

* REQ_FLUSH w/o data can also be directly passed to drivers without
  sequencing but some drivers assume that zero length requests don't
  have rq->bio which isn't true for these requests requiring the use
  of proxy requests.

* REQ_COMMON_MASK now includes REQ_FLUSH | REQ_FUA so that they are
  copied from bio to request.

* WRITE_BARRIER is marked deprecated and WRITE_FLUSH, WRITE_FUA and
  WRITE_FLUSH_FUA are added.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-core.c  |  2 +-
 block/blk-flush.c | 85 ++++++++++++++++++++++++++++++-------------------------
 block/blk.h       |  3 ++
 3 files changed, 50 insertions(+), 40 deletions(-)

(limited to 'block')

diff --git a/block/blk-core.c b/block/blk-core.c
index 8870ae40179d..18455c4f618a 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1204,7 +1204,7 @@ static int __make_request(struct request_queue *q, struct bio *bio)
 
 	spin_lock_irq(q->queue_lock);
 
-	if (bio->bi_rw & REQ_HARDBARRIER) {
+	if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {
 		where = ELEVATOR_INSERT_FRONT;
 		goto get_rq;
 	}
diff --git a/block/blk-flush.c b/block/blk-flush.c
index dd873225da97..452c552e9ead 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -1,5 +1,5 @@
 /*
- * Functions related to barrier IO handling
+ * Functions to sequence FLUSH and FUA writes.
  */
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -9,6 +9,15 @@
 
 #include "blk.h"
 
+/* FLUSH/FUA sequences */
+enum {
+	QUEUE_FSEQ_STARTED	= (1 << 0), /* flushing in progress */
+	QUEUE_FSEQ_PREFLUSH	= (1 << 1), /* pre-flushing in progress */
+	QUEUE_FSEQ_DATA		= (1 << 2), /* data write in progress */
+	QUEUE_FSEQ_POSTFLUSH	= (1 << 3), /* post-flushing in progress */
+	QUEUE_FSEQ_DONE		= (1 << 4),
+};
+
 static struct request *queue_next_fseq(struct request_queue *q);
 
 unsigned blk_flush_cur_seq(struct request_queue *q)
@@ -79,6 +88,7 @@ static void queue_flush(struct request_queue *q, struct request *rq,
 
 static struct request *queue_next_fseq(struct request_queue *q)
 {
+	struct request *orig_rq = q->orig_flush_rq;
 	struct request *rq = &q->flush_rq;
 
 	switch (blk_flush_cur_seq(q)) {
@@ -87,12 +97,11 @@ static struct request *queue_next_fseq(struct request_queue *q)
 		break;
 
 	case QUEUE_FSEQ_DATA:
-		/* initialize proxy request and queue it */
+		/* initialize proxy request, inherit FLUSH/FUA and queue it */
 		blk_rq_init(q, rq);
-		init_request_from_bio(rq, q->orig_flush_rq->bio);
-		rq->cmd_flags &= ~REQ_HARDBARRIER;
-		if (q->ordered & QUEUE_ORDERED_DO_FUA)
-			rq->cmd_flags |= REQ_FUA;
+		init_request_from_bio(rq, orig_rq->bio);
+		rq->cmd_flags &= ~(REQ_FLUSH | REQ_FUA);
+		rq->cmd_flags |= orig_rq->cmd_flags & (REQ_FLUSH | REQ_FUA);
 		rq->end_io = flush_data_end_io;
 
 		elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
@@ -110,60 +119,58 @@ static struct request *queue_next_fseq(struct request_queue *q)
 
 struct request *blk_do_flush(struct request_queue *q, struct request *rq)
 {
+	unsigned int fflags = q->flush_flags; /* may change, cache it */
+	bool has_flush = fflags & REQ_FLUSH, has_fua = fflags & REQ_FUA;
+	bool do_preflush = has_flush && (rq->cmd_flags & REQ_FLUSH);
+	bool do_postflush = has_flush && !has_fua && (rq->cmd_flags & REQ_FUA);
 	unsigned skip = 0;
 
-	if (!(rq->cmd_flags & REQ_HARDBARRIER))
+	/*
+	 * Special case.  If there's data but flush is not necessary,
+	 * the request can be issued directly.
+	 *
+	 * Flush w/o data should be able to be issued directly too but
+	 * currently some drivers assume that rq->bio contains
+	 * non-zero data if it isn't NULL and empty FLUSH requests
+	 * getting here usually have bio's without data.
+	 */
+	if (blk_rq_sectors(rq) && !do_preflush && !do_postflush) {
+		rq->cmd_flags &= ~REQ_FLUSH;
+		if (!has_fua)
+			rq->cmd_flags &= ~REQ_FUA;
 		return rq;
+	}
 
+	/*
+	 * Sequenced flushes can't be processed in parallel.  If
+	 * another one is already in progress, queue for later
+	 * processing.
+	 */
 	if (q->flush_seq) {
-		/*
-		 * Sequenced flush is already in progress and they
-		 * can't be processed in parallel.  Queue for later
-		 * processing.
-		 */
 		list_move_tail(&rq->queuelist, &q->pending_flushes);
 		return NULL;
 	}
 
-	if (unlikely(q->next_ordered == QUEUE_ORDERED_NONE)) {
-		/*
-		 * Queue ordering not supported.  Terminate
-		 * with prejudice.
-		 */
-		blk_dequeue_request(rq);
-		__blk_end_request_all(rq, -EOPNOTSUPP);
-		return NULL;
-	}
-
 	/*
 	 * Start a new flush sequence
 	 */
 	q->flush_err = 0;
-	q->ordered = q->next_ordered;
 	q->flush_seq |= QUEUE_FSEQ_STARTED;
 
-	/*
-	 * For an empty barrier, there's no actual BAR request, which
-	 * in turn makes POSTFLUSH unnecessary.  Mask them off.
-	 */
-	if (!blk_rq_sectors(rq))
-		q->ordered &= ~(QUEUE_ORDERED_DO_BAR |
-				QUEUE_ORDERED_DO_POSTFLUSH);
-
-	/* stash away the original request */
+	/* adjust FLUSH/FUA of the original request and stash it away */
+	rq->cmd_flags &= ~REQ_FLUSH;
+	if (!has_fua)
+		rq->cmd_flags &= ~REQ_FUA;
 	blk_dequeue_request(rq);
 	q->orig_flush_rq = rq;
 
-	if (!(q->ordered & QUEUE_ORDERED_DO_PREFLUSH))
+	/* skip unneded sequences and return the first one */
+	if (!do_preflush)
 		skip |= QUEUE_FSEQ_PREFLUSH;
-
-	if (!(q->ordered & QUEUE_ORDERED_DO_BAR))
+	if (!blk_rq_sectors(rq))
 		skip |= QUEUE_FSEQ_DATA;
-
-	if (!(q->ordered & QUEUE_ORDERED_DO_POSTFLUSH))
+	if (!do_postflush)
 		skip |= QUEUE_FSEQ_POSTFLUSH;
-
-	/* complete skipped sequences and return the first sequence */
 	return blk_flush_complete_seq(q, skip, 0);
 }
 
diff --git a/block/blk.h b/block/blk.h
index 24b92bd78f37..a09c18b19116 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -60,6 +60,9 @@ static inline struct request *__elv_next_request(struct request_queue *q)
 	while (1) {
 		while (!list_empty(&q->queue_head)) {
 			rq = list_entry_rq(q->queue_head.next);
+			if (!(rq->cmd_flags & (REQ_FLUSH | REQ_FUA)) ||
+			    rq == &q->flush_rq)
+				return rq;
 			rq = blk_do_flush(q, rq);
 			if (rq)
 				return rq;
-- 
cgit v1.2.3


From 1e87901e189c8f01750d67485009fe3827c691bf Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 3 Sep 2010 11:56:17 +0200
Subject: block: filter flush bio's in __generic_make_request()

There are a number of make_request based drivers which don't support
cache flushes.  Filter out flush bio's in __generic_make_request() so
that they don't have to worry about them.  All FLUSH/FUA requests with
data are converted to regular IO requests and empty ones are completed
immediately.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-core.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'block')

diff --git a/block/blk-core.c b/block/blk-core.c
index 18455c4f618a..495bdc4a23da 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1509,6 +1509,19 @@ static inline void __generic_make_request(struct bio *bio)
 		if (bio_check_eod(bio, nr_sectors))
 			goto end_io;
 
+		/*
+		 * Filter flush bio's early so that make_request based
+		 * drivers without flush support don't have to worry
+		 * about them.
+		 */
+		if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && !q->flush_flags) {
+			bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA);
+			if (!nr_sectors) {
+				err = 0;
+				goto end_io;
+			}
+		}
+
 		if ((bio->bi_rw & REQ_DISCARD) &&
 		    (!blk_queue_discard(q) ||
 		     ((bio->bi_rw & REQ_SECURE) &&
-- 
cgit v1.2.3


From cde4c406d8fb051c5aafc917643adbb9dbd0abc2 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 3 Sep 2010 11:56:17 +0200
Subject: block: simplify queue_next_fseq

We need to call blk_rq_init and elv_insert for all cases in queue_next_fseq,
so take these calls into common code.  Also move the end_io initialization
from queue_flush into queue_next_fseq and rename queue_flush to
init_flush_request now that it's old name doesn't apply anymore.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-flush.c | 26 ++++++++++----------------
 1 file changed, 10 insertions(+), 16 deletions(-)

(limited to 'block')

diff --git a/block/blk-flush.c b/block/blk-flush.c
index 452c552e9ead..72905f862d31 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -74,16 +74,11 @@ static void post_flush_end_io(struct request *rq, int error)
 	blk_flush_complete_seq(rq->q, QUEUE_FSEQ_POSTFLUSH, error);
 }
 
-static void queue_flush(struct request_queue *q, struct request *rq,
-			rq_end_io_fn *end_io)
+static void init_flush_request(struct request *rq, struct gendisk *disk)
 {
-	blk_rq_init(q, rq);
 	rq->cmd_type = REQ_TYPE_FS;
 	rq->cmd_flags = REQ_FLUSH;
-	rq->rq_disk = q->orig_flush_rq->rq_disk;
-	rq->end_io = end_io;
-
-	elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
+	rq->rq_disk = disk;
 }
 
 static struct request *queue_next_fseq(struct request_queue *q)
@@ -91,29 +86,28 @@ static struct request *queue_next_fseq(struct request_queue *q)
 	struct request *orig_rq = q->orig_flush_rq;
 	struct request *rq = &q->flush_rq;
 
+	blk_rq_init(q, rq);
+
 	switch (blk_flush_cur_seq(q)) {
 	case QUEUE_FSEQ_PREFLUSH:
-		queue_flush(q, rq, pre_flush_end_io);
+		init_flush_request(rq, orig_rq->rq_disk);
+		rq->end_io = pre_flush_end_io;
 		break;
-
 	case QUEUE_FSEQ_DATA:
-		/* initialize proxy request, inherit FLUSH/FUA and queue it */
-		blk_rq_init(q, rq);
 		init_request_from_bio(rq, orig_rq->bio);
 		rq->cmd_flags &= ~(REQ_FLUSH | REQ_FUA);
 		rq->cmd_flags |= orig_rq->cmd_flags & (REQ_FLUSH | REQ_FUA);
 		rq->end_io = flush_data_end_io;
-
-		elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
 		break;
-
 	case QUEUE_FSEQ_POSTFLUSH:
-		queue_flush(q, rq, post_flush_end_io);
+		init_flush_request(rq, orig_rq->rq_disk);
+		rq->end_io = post_flush_end_io;
 		break;
-
 	default:
 		BUG();
 	}
+
+	elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
 	return rq;
 }
 
-- 
cgit v1.2.3


From 337238be1bf52e1242f940fc6fe83fb395e55057 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 3 Sep 2010 11:56:17 +0200
Subject: block: initialize flush request with WRITE_FLUSH instead of REQ_FLUSH

init_flush_request() only set REQ_FLUSH when initializing flush
requests making them READ requests.  Use WRITE_FLUSH instead.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-flush.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'block')

diff --git a/block/blk-flush.c b/block/blk-flush.c
index 72905f862d31..f357f1fc411c 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -77,7 +77,7 @@ static void post_flush_end_io(struct request *rq, int error)
 static void init_flush_request(struct request *rq, struct gendisk *disk)
 {
 	rq->cmd_type = REQ_TYPE_FS;
-	rq->cmd_flags = REQ_FLUSH;
+	rq->cmd_flags = WRITE_FLUSH;
 	rq->rq_disk = disk;
 }
 
-- 
cgit v1.2.3


From 47f70d5a6ca78c40a1c799d43506efbfed914f7b Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 3 Sep 2010 11:56:17 +0200
Subject: block: kick queue after sequencing REQ_FLUSH/FUA

While completing a request from a REQ_FLUSH/FUA sequence, another
request can be pushed to the request queue.  If a driver tests
elv_queue_empty() before completing a request and runs the queue again
only if the queue wasn't empty, this may lead to hang.  Please note
that most drivers either kick the queue unconditionally or test queue
emptiness after completing the current request and don't have this
problem.

This patch removes this possibility by making REQ_FLUSH/FUA sequence
code kick the queue if the queue was empty before completing a request
from REQ_FLUSH/FUA sequence.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-flush.c | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

(limited to 'block')

diff --git a/block/blk-flush.c b/block/blk-flush.c
index f357f1fc411c..cb4c8440a1fc 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -56,22 +56,38 @@ static struct request *blk_flush_complete_seq(struct request_queue *q,
 	return next_rq;
 }
 
+static void blk_flush_complete_seq_end_io(struct request_queue *q,
+					  unsigned seq, int error)
+{
+	bool was_empty = elv_queue_empty(q);
+	struct request *next_rq;
+
+	next_rq = blk_flush_complete_seq(q, seq, error);
+
+	/*
+	 * Moving a request silently to empty queue_head may stall the
+	 * queue.  Kick the queue in those cases.
+	 */
+	if (was_empty && next_rq)
+		__blk_run_queue(q);
+}
+
 static void pre_flush_end_io(struct request *rq, int error)
 {
 	elv_completed_request(rq->q, rq);
-	blk_flush_complete_seq(rq->q, QUEUE_FSEQ_PREFLUSH, error);
+	blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_PREFLUSH, error);
 }
 
 static void flush_data_end_io(struct request *rq, int error)
 {
 	elv_completed_request(rq->q, rq);
-	blk_flush_complete_seq(rq->q, QUEUE_FSEQ_DATA, error);
+	blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_DATA, error);
 }
 
 static void post_flush_end_io(struct request *rq, int error)
 {
 	elv_completed_request(rq->q, rq);
-	blk_flush_complete_seq(rq->q, QUEUE_FSEQ_POSTFLUSH, error);
+	blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_POSTFLUSH, error);
 }
 
 static void init_flush_request(struct request *rq, struct gendisk *disk)
-- 
cgit v1.2.3


From 09d60c701b64b509f328cac72970eb894f485b9e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 3 Sep 2010 11:56:17 +0200
Subject: block: make sure FSEQ_DATA request has the same rq_disk as the
 original

rq->rq_disk and bio->bi_bdev->bd_disk may differ if a request has
passed through remapping drivers.  FSEQ_DATA request incorrectly
followed bio->bi_bdev->bd_disk ending up being issued w/ mismatching
rq_disk.  Make it follow orig_rq->rq_disk.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Tested-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-flush.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'block')

diff --git a/block/blk-flush.c b/block/blk-flush.c
index cb4c8440a1fc..7d1fc982e78f 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -111,6 +111,13 @@ static struct request *queue_next_fseq(struct request_queue *q)
 		break;
 	case QUEUE_FSEQ_DATA:
 		init_request_from_bio(rq, orig_rq->bio);
+		/*
+		 * orig_rq->rq_disk may be different from
+		 * bio->bi_bdev->bd_disk if orig_rq got here through
+		 * remapping drivers.  Make sure rq->rq_disk points
+		 * to the same one as orig_rq.
+		 */
+		rq->rq_disk = orig_rq->rq_disk;
 		rq->cmd_flags &= ~(REQ_FLUSH | REQ_FUA);
 		rq->cmd_flags |= orig_rq->cmd_flags & (REQ_FLUSH | REQ_FUA);
 		rq->end_io = flush_data_end_io;
-- 
cgit v1.2.3


From d391a2dda2f1c993f094bdb3a8a342c5e0546553 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 3 Sep 2010 11:56:17 +0200
Subject: block: use REQ_FLUSH in blkdev_issue_flush()

Update blkdev_issue_flush() to use new REQ_FLUSH interface.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-flush.c | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

(limited to 'block')

diff --git a/block/blk-flush.c b/block/blk-flush.c
index 7d1fc982e78f..62b7df9bca9d 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -191,13 +191,10 @@ struct request *blk_do_flush(struct request_queue *q, struct request *rq)
 	return blk_flush_complete_seq(q, skip, 0);
 }
 
-static void bio_end_empty_barrier(struct bio *bio, int err)
+static void bio_end_flush(struct bio *bio, int err)
 {
-	if (err) {
-		if (err == -EOPNOTSUPP)
-			set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
+	if (err)
 		clear_bit(BIO_UPTODATE, &bio->bi_flags);
-	}
 	if (bio->bi_private)
 		complete(bio->bi_private);
 	bio_put(bio);
@@ -235,19 +232,19 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
 	 * some block devices may not have their queue correctly set up here
 	 * (e.g. loop device without a backing file) and so issuing a flush
 	 * here will panic. Ensure there is a request function before issuing
-	 * the barrier.
+	 * the flush.
 	 */
 	if (!q->make_request_fn)
 		return -ENXIO;
 
 	bio = bio_alloc(gfp_mask, 0);
-	bio->bi_end_io = bio_end_empty_barrier;
+	bio->bi_end_io = bio_end_flush;
 	bio->bi_bdev = bdev;
 	if (test_bit(BLKDEV_WAIT, &flags))
 		bio->bi_private = &wait;
 
 	bio_get(bio);
-	submit_bio(WRITE_BARRIER, bio);
+	submit_bio(WRITE_FLUSH, bio);
 	if (test_bit(BLKDEV_WAIT, &flags)) {
 		wait_for_completion(&wait);
 		/*
@@ -259,9 +256,7 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
 			*error_sector = bio->bi_sector;
 	}
 
-	if (bio_flagged(bio, BIO_EOPNOTSUPP))
-		ret = -EOPNOTSUPP;
-	else if (!bio_flagged(bio, BIO_UPTODATE))
+	if (!bio_flagged(bio, BIO_UPTODATE))
 		ret = -EIO;
 
 	bio_put(bio);
-- 
cgit v1.2.3


From 3a2edd0d6ddbd5fa3b389ea6db811285415ce6c8 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 3 Sep 2010 11:56:18 +0200
Subject: block: make __blk_rq_prep_clone() copy most command flags

Currently __blk_rq_prep_clone() copies only REQ_WRITE and REQ_DISCARD.
There's no reason to omit other command flags and REQ_FUA needs to be
copied to implement FUA support in request-based dm.

REQ_COMMON_MASK which specifies flags to be copied from bio to request
already identifies all the command flags.  Define REQ_CLONE_MASK to be
the same as REQ_COMMON_MASK for clarity and make __blk_rq_prep_clone()
copy all flags in the mask.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-core.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'block')

diff --git a/block/blk-core.c b/block/blk-core.c
index 495bdc4a23da..2a5b19204546 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2505,9 +2505,7 @@ EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);
 static void __blk_rq_prep_clone(struct request *dst, struct request *src)
 {
 	dst->cpu = src->cpu;
-	dst->cmd_flags = (rq_data_dir(src) | REQ_NOMERGE);
-	if (src->cmd_flags & REQ_DISCARD)
-		dst->cmd_flags |= REQ_DISCARD;
+	dst->cmd_flags = (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE;
 	dst->cmd_type = src->cmd_type;
 	dst->__sector = blk_rq_pos(src);
 	dst->__data_len = blk_rq_bytes(src);
-- 
cgit v1.2.3


From 8c5553678237b7121355108e03c36086037d8975 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Wed, 18 Aug 2010 05:29:22 -0400
Subject: block: remove the BLKDEV_IFL_BARRIER flag

Remove support for barriers on discards, which is unused now.  Also
remove the DISCARD_NOBARRIER I/O type in favour of just setting the
rw flags up locally in blkdev_issue_discard.

tj: Also remove DISCARD_SECURE and use REQ_SECURE directly.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-lib.c | 18 ++----------------
 1 file changed, 2 insertions(+), 16 deletions(-)

(limited to 'block')

diff --git a/block/blk-lib.c b/block/blk-lib.c
index c392029a104e..fe2e6ed0f510 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -39,8 +39,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 {
 	DECLARE_COMPLETION_ONSTACK(wait);
 	struct request_queue *q = bdev_get_queue(bdev);
-	int type = flags & BLKDEV_IFL_BARRIER ?
-		DISCARD_BARRIER : DISCARD_NOBARRIER;
+	int type = REQ_WRITE | REQ_DISCARD;
 	unsigned int max_discard_sectors;
 	struct bio *bio;
 	int ret = 0;
@@ -65,7 +64,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 	if (flags & BLKDEV_IFL_SECURE) {
 		if (!blk_queue_secdiscard(q))
 			return -EOPNOTSUPP;
-		type |= DISCARD_SECURE;
+		type |= REQ_SECURE;
 	}
 
 	while (nr_sects && !ret) {
@@ -162,12 +161,6 @@ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
 	bb.wait = &wait;
 	bb.end_io = NULL;
 
-	if (flags & BLKDEV_IFL_BARRIER) {
-		/* issue async barrier before the data */
-		ret = blkdev_issue_flush(bdev, gfp_mask, NULL, 0);
-		if (ret)
-			return ret;
-	}
 submit:
 	ret = 0;
 	while (nr_sects != 0) {
@@ -199,13 +192,6 @@ submit:
 		issued++;
 		submit_bio(WRITE, bio);
 	}
-	/*
-	 * When all data bios are in flight. Send final barrier if requeted.
-	 */
-	if (nr_sects == 0 && flags & BLKDEV_IFL_BARRIER)
-		ret = blkdev_issue_flush(bdev, gfp_mask, NULL,
-					flags & BLKDEV_IFL_WAIT);
-
 
 	if (flags & BLKDEV_IFL_WAIT)
 		/* Wait for bios in-flight */
-- 
cgit v1.2.3


From dd3932eddf428571762596e17b65f5dc92ca361b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 16 Sep 2010 20:51:46 +0200
Subject: block: remove BLKDEV_IFL_WAIT

All the blkdev_issue_* helpers can only sanely be used for synchronous
caller.  To issue cache flushes or barriers asynchronously the caller needs
to set up a bio by itself with a completion callback to move the asynchronous
state machine ahead.  So drop the BLKDEV_IFL_WAIT flag that is always
specified when calling blkdev_issue_* and also remove the now unused flags
argument to blkdev_issue_flush and blkdev_issue_zeroout.  For
blkdev_issue_discard we need to keep it for the secure discard flag, which
gains a more descriptive name and loses the bitops vs flag confusion.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
---
 block/blk-flush.c | 25 +++++++++++--------------
 block/blk-lib.c   | 21 ++++++++-------------
 block/ioctl.c     |  4 ++--
 3 files changed, 21 insertions(+), 29 deletions(-)

(limited to 'block')

diff --git a/block/blk-flush.c b/block/blk-flush.c
index 62b7df9bca9d..54b123d6563e 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -205,7 +205,6 @@ static void bio_end_flush(struct bio *bio, int err)
  * @bdev:	blockdev to issue flush for
  * @gfp_mask:	memory allocation flags (for bio_alloc)
  * @error_sector:	error sector
- * @flags:	BLKDEV_IFL_* flags to control behaviour
  *
  * Description:
  *    Issue a flush for the block device in question. Caller can supply
@@ -214,7 +213,7 @@ static void bio_end_flush(struct bio *bio, int err)
  *    request was pushed in some internal queue for later handling.
  */
 int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
-		sector_t *error_sector, unsigned long flags)
+		sector_t *error_sector)
 {
 	DECLARE_COMPLETION_ONSTACK(wait);
 	struct request_queue *q;
@@ -240,21 +239,19 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
 	bio = bio_alloc(gfp_mask, 0);
 	bio->bi_end_io = bio_end_flush;
 	bio->bi_bdev = bdev;
-	if (test_bit(BLKDEV_WAIT, &flags))
-		bio->bi_private = &wait;
+	bio->bi_private = &wait;
 
 	bio_get(bio);
 	submit_bio(WRITE_FLUSH, bio);
-	if (test_bit(BLKDEV_WAIT, &flags)) {
-		wait_for_completion(&wait);
-		/*
-		 * The driver must store the error location in ->bi_sector, if
-		 * it supports it. For non-stacked drivers, this should be
-		 * copied from blk_rq_pos(rq).
-		 */
-		if (error_sector)
-			*error_sector = bio->bi_sector;
-	}
+	wait_for_completion(&wait);
+
+	/*
+	 * The driver must store the error location in ->bi_sector, if
+	 * it supports it. For non-stacked drivers, this should be
+	 * copied from blk_rq_pos(rq).
+	 */
+	if (error_sector)
+               *error_sector = bio->bi_sector;
 
 	if (!bio_flagged(bio, BIO_UPTODATE))
 		ret = -EIO;
diff --git a/block/blk-lib.c b/block/blk-lib.c
index fe2e6ed0f510..1a320d2406b0 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -61,7 +61,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 		max_discard_sectors &= ~(disc_sects - 1);
 	}
 
-	if (flags & BLKDEV_IFL_SECURE) {
+	if (flags & BLKDEV_DISCARD_SECURE) {
 		if (!blk_queue_secdiscard(q))
 			return -EOPNOTSUPP;
 		type |= REQ_SECURE;
@@ -77,8 +77,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 		bio->bi_sector = sector;
 		bio->bi_end_io = blkdev_discard_end_io;
 		bio->bi_bdev = bdev;
-		if (flags & BLKDEV_IFL_WAIT)
-			bio->bi_private = &wait;
+		bio->bi_private = &wait;
 
 		if (nr_sects > max_discard_sectors) {
 			bio->bi_size = max_discard_sectors << 9;
@@ -92,8 +91,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 		bio_get(bio);
 		submit_bio(type, bio);
 
-		if (flags & BLKDEV_IFL_WAIT)
-			wait_for_completion(&wait);
+		wait_for_completion(&wait);
 
 		if (bio_flagged(bio, BIO_EOPNOTSUPP))
 			ret = -EOPNOTSUPP;
@@ -139,7 +137,6 @@ static void bio_batch_end_io(struct bio *bio, int err)
  * @sector:	start sector
  * @nr_sects:	number of sectors to write
  * @gfp_mask:	memory allocation flags (for bio_alloc)
- * @flags:	BLKDEV_IFL_* flags to control behaviour
  *
  * Description:
  *  Generate and issue number of bios with zerofiled pages.
@@ -148,7 +145,7 @@ static void bio_batch_end_io(struct bio *bio, int err)
  */
 
 int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
-			sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
+			sector_t nr_sects, gfp_t gfp_mask)
 {
 	int ret;
 	struct bio *bio;
@@ -174,8 +171,7 @@ submit:
 		bio->bi_sector = sector;
 		bio->bi_bdev   = bdev;
 		bio->bi_end_io = bio_batch_end_io;
-		if (flags & BLKDEV_IFL_WAIT)
-			bio->bi_private = &bb;
+		bio->bi_private = &bb;
 
 		while (nr_sects != 0) {
 			sz = min((sector_t) PAGE_SIZE >> 9 , nr_sects);
@@ -193,10 +189,9 @@ submit:
 		submit_bio(WRITE, bio);
 	}
 
-	if (flags & BLKDEV_IFL_WAIT)
-		/* Wait for bios in-flight */
-		while ( issued != atomic_read(&bb.done))
-			wait_for_completion(&wait);
+	/* Wait for bios in-flight */
+	while (issued != atomic_read(&bb.done))
+		wait_for_completion(&wait);
 
 	if (!test_bit(BIO_UPTODATE, &bb.flags))
 		/* One of bios in the batch was completed with error.*/
diff --git a/block/ioctl.c b/block/ioctl.c
index d8052f0dabd3..cb2b9099862b 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -116,7 +116,7 @@ static int blkdev_reread_part(struct block_device *bdev)
 static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
 			     uint64_t len, int secure)
 {
-	unsigned long flags = BLKDEV_IFL_WAIT;
+	unsigned long flags = 0;
 
 	if (start & 511)
 		return -EINVAL;
@@ -128,7 +128,7 @@ static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
 	if (start + len > (bdev->bd_inode->i_size >> 9))
 		return -EINVAL;
 	if (secure)
-		flags |= BLKDEV_IFL_SECURE;
+		flags |= BLKDEV_DISCARD_SECURE;
 	return blkdev_issue_discard(bdev, start, len, GFP_KERNEL, flags);
 }
 
-- 
cgit v1.2.3