From b4d7124b2f2e29541e5c8815bd84ea55158dd730 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Tue, 18 Feb 2014 13:09:34 -0500
Subject: bio: don't write "bio: create slab" messages to syslog

When using device mapper, there are many "bio: create slab" messages in
the log. Device mapper targets have different front_pad, so each time when
we load a target that wasn't loaded before, we allocate a slab with the
appropriate front_pad and there is associated "bio: create slab" message.

This patch removes these messages, there is no need for them.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 fs/bio.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/bio.c b/fs/bio.c
index 8754e7b6eb49..b2dd42ed9edd 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -116,7 +116,6 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
 	if (!slab)
 		goto out_unlock;
 
-	printk(KERN_INFO "bio: create slab <%s> at %d\n", bslab->name, entry);
 	bslab->slab = slab;
 	bslab->slab_ref = 1;
 	bslab->slab_size = sz;
-- 
cgit v1.2.3


From e3ebf0d457039c857dfeb45434e3be9780dea499 Mon Sep 17 00:00:00 2001
From: Gideon Israel Dsouza <gidisrael@gmail.com>
Date: Mon, 17 Feb 2014 21:17:16 +0530
Subject: block: Use macros from compiler.h instead of __attribute__((...))

To increase compiler portability there are several macros defined
in <linux/compiler.h> for various gcc __attribute((..)) constructs.
I've made sure gcc these specific were replaced with the right
macro and an #include <linux/compiler.h> was placed where needed.

Signed-off-by: Gideon Israel Dsouza <gidisrael@gmail.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/partitions/atari.h | 4 +++-
 block/partitions/efi.h   | 9 +++++----
 block/partitions/karma.c | 3 ++-
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/block/partitions/atari.h b/block/partitions/atari.h
index fe2d32a89f36..f2ec43bfeec1 100644
--- a/block/partitions/atari.h
+++ b/block/partitions/atari.h
@@ -11,6 +11,8 @@
  * by Guenther Kelleter (guenther@pool.informatik.rwth-aachen.de)
  */
 
+#include <linux/compiler.h>
+
 struct partition_info
 {
   u8 flg;			/* bit 0: active; bit 7: bootable */
@@ -29,6 +31,6 @@ struct rootsector
   u32 bsl_st;			/* start of bad sector list */
   u32 bsl_cnt;			/* length of bad sector list */
   u16 checksum;			/* checksum for bootable disks */
-} __attribute__((__packed__));
+} __packed;
 
 int atari_partition(struct parsed_partitions *state);
diff --git a/block/partitions/efi.h b/block/partitions/efi.h
index 4efcafba7e64..abd0b19288a6 100644
--- a/block/partitions/efi.h
+++ b/block/partitions/efi.h
@@ -32,6 +32,7 @@
 #include <linux/major.h>
 #include <linux/string.h>
 #include <linux/efi.h>
+#include <linux/compiler.h>
 
 #define MSDOS_MBR_SIGNATURE 0xaa55
 #define EFI_PMBR_OSTYPE_EFI 0xEF
@@ -87,13 +88,13 @@ typedef struct _gpt_header {
 	 *
 	 * uint8_t		reserved2[ BlockSize - 92 ];
 	 */
-} __attribute__ ((packed)) gpt_header;
+} __packed gpt_header;
 
 typedef struct _gpt_entry_attributes {
 	u64 required_to_function:1;
 	u64 reserved:47;
         u64 type_guid_specific:16;
-} __attribute__ ((packed)) gpt_entry_attributes;
+} __packed gpt_entry_attributes;
 
 typedef struct _gpt_entry {
 	efi_guid_t partition_type_guid;
@@ -102,7 +103,7 @@ typedef struct _gpt_entry {
 	__le64 ending_lba;
 	gpt_entry_attributes attributes;
 	efi_char16_t partition_name[72 / sizeof (efi_char16_t)];
-} __attribute__ ((packed)) gpt_entry;
+} __packed gpt_entry;
 
 typedef struct _gpt_mbr_record {
 	u8	boot_indicator; /* unused by EFI, set to 0x80 for bootable */
@@ -124,7 +125,7 @@ typedef struct _legacy_mbr {
 	__le16 unknown;
 	gpt_mbr_record partition_record[4];
 	__le16 signature;
-} __attribute__ ((packed)) legacy_mbr;
+} __packed legacy_mbr;
 
 /* Functions */
 extern int efi_partition(struct parsed_partitions *state);
diff --git a/block/partitions/karma.c b/block/partitions/karma.c
index 0ea19312706b..9721fa589bb1 100644
--- a/block/partitions/karma.c
+++ b/block/partitions/karma.c
@@ -8,6 +8,7 @@
 
 #include "check.h"
 #include "karma.h"
+#include <linux/compiler.h>
 
 int karma_partition(struct parsed_partitions *state)
 {
@@ -26,7 +27,7 @@ int karma_partition(struct parsed_partitions *state)
 		} d_partitions[2];
 		u8 d_blank[208];
 		__le16 d_magic;
-	} __attribute__((packed)) *label;
+	} __packed *label;
 	struct d_partition *p;
 
 	data = read_part_sector(state, 0, &sect);
-- 
cgit v1.2.3


From ec6c676a08b0779a32c9cec9fae7532add32fe15 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 17 Feb 2014 13:35:57 -0800
Subject: block: Substitute rcu_access_pointer() for rcu_dereference_raw()

(Trivial patch.)

If the code is looking at the RCU-protected pointer itself, but not
dereferencing it, the rcu_dereference() functions can be downgraded to
rcu_access_pointer().  This commit makes this downgrade in blkg_destroy()
and ioc_destroy_icq(), both of which simply compare the RCU-protected
pointer against another pointer with no dereferencing.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-cgroup.c | 2 +-
 block/blk-ioc.c    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 4e491d9b5292..b6e95b5e262f 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -336,7 +336,7 @@ static void blkg_destroy(struct blkcg_gq *blkg)
 	 * under queue_lock.  If it's not pointing to @blkg now, it never
 	 * will.  Hint assignment itself can race safely.
 	 */
-	if (rcu_dereference_raw(blkcg->blkg_hint) == blkg)
+	if (rcu_access_pointer(blkcg->blkg_hint) == blkg)
 		rcu_assign_pointer(blkcg->blkg_hint, NULL);
 
 	/*
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 242df01413f6..1a27f45ec776 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -68,7 +68,7 @@ static void ioc_destroy_icq(struct io_cq *icq)
 	 * under queue_lock.  If it's not pointing to @icq now, it never
 	 * will.  Hint assignment itself can race safely.
 	 */
-	if (rcu_dereference_raw(ioc->icq_hint) == icq)
+	if (rcu_access_pointer(ioc->icq_hint) == icq)
 		rcu_assign_pointer(ioc->icq_hint, NULL);
 
 	ioc_exit_icq(icq);
-- 
cgit v1.2.3


From bf36f9cfa6d3d129387b9d62f495d978ed4ade7c Mon Sep 17 00:00:00 2001
From: Gu Zheng <guz.fnst@cn.fujitsu.com>
Date: Fri, 21 Feb 2014 18:10:59 +0800
Subject: fs/bio-integrity: remove duplicate code

Most code of function bio_integrity_verify and bio_integrity_generate
is the same, so introduce a help function bio_integrity_generate_verify()
to remove the duplicate code.

Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 fs/bio-integrity.c | 86 +++++++++++++++++++++++-------------------------------
 1 file changed, 37 insertions(+), 49 deletions(-)

diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index 0129b78a6908..413312f2b295 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -301,25 +301,26 @@ int bio_integrity_get_tag(struct bio *bio, void *tag_buf, unsigned int len)
 EXPORT_SYMBOL(bio_integrity_get_tag);
 
 /**
- * bio_integrity_generate - Generate integrity metadata for a bio
- * @bio:	bio to generate integrity metadata for
- *
- * Description: Generates integrity metadata for a bio by calling the
- * block device's generation callback function.  The bio must have a
- * bip attached with enough room to accommodate the generated
- * integrity metadata.
+ * bio_integrity_generate_verify - Generate/verify integrity metadata for a bio
+ * @bio:	bio to generate/verify integrity metadata for
+ * @operate:	operate number, 1 for generate, 0 for verify
  */
-static void bio_integrity_generate(struct bio *bio)
+static int bio_integrity_generate_verify(struct bio *bio, int operate)
 {
 	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
 	struct blk_integrity_exchg bix;
 	struct bio_vec bv;
 	struct bvec_iter iter;
-	sector_t sector = bio->bi_iter.bi_sector;
-	unsigned int sectors, total;
+	sector_t sector;
+	unsigned int sectors, total, ret;
 	void *prot_buf = bio->bi_integrity->bip_buf;
 
-	total = 0;
+	if (operate)
+		sector = bio->bi_iter.bi_sector;
+	else
+		sector = bio->bi_integrity->bip_iter.bi_sector;
+
+	total = ret = 0;
 	bix.disk_name = bio->bi_bdev->bd_disk->disk_name;
 	bix.sector_size = bi->sector_size;
 
@@ -330,7 +331,15 @@ static void bio_integrity_generate(struct bio *bio)
 		bix.prot_buf = prot_buf;
 		bix.sector = sector;
 
-		bi->generate_fn(&bix);
+		if (operate) {
+			bi->generate_fn(&bix);
+		} else {
+			ret = bi->verify_fn(&bix);
+			if (ret) {
+				kunmap_atomic(kaddr);
+				return ret;
+			}
+		}
 
 		sectors = bv.bv_len / bi->sector_size;
 		sector += sectors;
@@ -340,6 +349,21 @@ static void bio_integrity_generate(struct bio *bio)
 
 		kunmap_atomic(kaddr);
 	}
+	return ret;
+}
+
+/**
+ * bio_integrity_generate - Generate integrity metadata for a bio
+ * @bio:	bio to generate integrity metadata for
+ *
+ * Description: Generates integrity metadata for a bio by calling the
+ * block device's generation callback function.  The bio must have a
+ * bip attached with enough room to accommodate the generated
+ * integrity metadata.
+ */
+static void bio_integrity_generate(struct bio *bio)
+{
+	bio_integrity_generate_verify(bio, 1);
 }
 
 static inline unsigned short blk_integrity_tuple_size(struct blk_integrity *bi)
@@ -454,43 +478,7 @@ EXPORT_SYMBOL(bio_integrity_prep);
  */
 static int bio_integrity_verify(struct bio *bio)
 {
-	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
-	struct blk_integrity_exchg bix;
-	struct bio_vec *bv;
-	sector_t sector = bio->bi_integrity->bip_iter.bi_sector;
-	unsigned int sectors, total, ret;
-	void *prot_buf = bio->bi_integrity->bip_buf;
-	int i;
-
-	ret = total = 0;
-	bix.disk_name = bio->bi_bdev->bd_disk->disk_name;
-	bix.sector_size = bi->sector_size;
-
-	bio_for_each_segment_all(bv, bio, i) {
-		void *kaddr = kmap_atomic(bv->bv_page);
-
-		bix.data_buf = kaddr + bv->bv_offset;
-		bix.data_size = bv->bv_len;
-		bix.prot_buf = prot_buf;
-		bix.sector = sector;
-
-		ret = bi->verify_fn(&bix);
-
-		if (ret) {
-			kunmap_atomic(kaddr);
-			return ret;
-		}
-
-		sectors = bv->bv_len / bi->sector_size;
-		sector += sectors;
-		prot_buf += sectors * bi->tuple_size;
-		total += sectors * bi->tuple_size;
-		BUG_ON(total > bio->bi_integrity->bip_iter.bi_size);
-
-		kunmap_atomic(kaddr);
-	}
-
-	return ret;
+	return bio_integrity_generate_verify(bio, 0);
 }
 
 /**
-- 
cgit v1.2.3


From 8b4922d3173d2eee7b43be8e5caec3ab7d30feff Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 24 Feb 2014 16:39:52 +0100
Subject: block: Stop abusing csd.list for fifo_time

Block layer currently abuses rq->csd.list.next for storing fifo_time.
That is a terrible hack and completely unnecessary as well. Union
achieves the same space saving in a cleaner way.

Signed-off-by: Jan Kara <jack@suse.cz>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jens Axboe <axboe@fb.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/cfq-iosched.c      | 8 ++++----
 block/deadline-iosched.c | 8 ++++----
 include/linux/blkdev.h   | 1 +
 include/linux/elevator.h | 6 ------
 4 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 744833b630c6..5873e4ada9eb 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -2367,10 +2367,10 @@ cfq_merged_requests(struct request_queue *q, struct request *rq,
 	 * reposition in fifo if next is older than rq
 	 */
 	if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) &&
-	    time_before(rq_fifo_time(next), rq_fifo_time(rq)) &&
+	    time_before(next->fifo_time, rq->fifo_time) &&
 	    cfqq == RQ_CFQQ(next)) {
 		list_move(&rq->queuelist, &next->queuelist);
-		rq_set_fifo_time(rq, rq_fifo_time(next));
+		rq->fifo_time = next->fifo_time;
 	}
 
 	if (cfqq->next_rq == next)
@@ -2814,7 +2814,7 @@ static struct request *cfq_check_fifo(struct cfq_queue *cfqq)
 		return NULL;
 
 	rq = rq_entry_fifo(cfqq->fifo.next);
-	if (time_before(jiffies, rq_fifo_time(rq)))
+	if (time_before(jiffies, rq->fifo_time))
 		rq = NULL;
 
 	cfq_log_cfqq(cfqq->cfqd, cfqq, "fifo=%p", rq);
@@ -3927,7 +3927,7 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq)
 	cfq_log_cfqq(cfqd, cfqq, "insert_request");
 	cfq_init_prio_data(cfqq, RQ_CIC(rq));
 
-	rq_set_fifo_time(rq, jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)]);
+	rq->fifo_time = jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)];
 	list_add_tail(&rq->queuelist, &cfqq->fifo);
 	cfq_add_rq_rb(rq);
 	cfqg_stats_update_io_add(RQ_CFQG(rq), cfqd->serving_group,
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index 9ef66406c625..a753df2b3fc2 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -106,7 +106,7 @@ deadline_add_request(struct request_queue *q, struct request *rq)
 	/*
 	 * set expire time and add to fifo list
 	 */
-	rq_set_fifo_time(rq, jiffies + dd->fifo_expire[data_dir]);
+	rq->fifo_time = jiffies + dd->fifo_expire[data_dir];
 	list_add_tail(&rq->queuelist, &dd->fifo_list[data_dir]);
 }
 
@@ -174,9 +174,9 @@ deadline_merged_requests(struct request_queue *q, struct request *req,
 	 * and move into next position (next will be deleted) in fifo
 	 */
 	if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) {
-		if (time_before(rq_fifo_time(next), rq_fifo_time(req))) {
+		if (time_before(next->fifo_time, req->fifo_time)) {
 			list_move(&req->queuelist, &next->queuelist);
-			rq_set_fifo_time(req, rq_fifo_time(next));
+			req->fifo_time = next->fifo_time;
 		}
 	}
 
@@ -230,7 +230,7 @@ static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
 	/*
 	 * rq is expired!
 	 */
-	if (time_after_eq(jiffies, rq_fifo_time(rq)))
+	if (time_after_eq(jiffies, rq->fifo_time))
 		return 1;
 
 	return 0;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 4afa4f8f6090..1e1fa3f93d5f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -99,6 +99,7 @@ struct request {
 	union {
 		struct call_single_data csd;
 		struct work_struct mq_flush_work;
+		unsigned long fifo_time;
 	};
 
 	struct request_queue *q;
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 306dd8cd0b6f..0bdfd46f4735 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -202,12 +202,6 @@ enum {
 #define rq_end_sector(rq)	(blk_rq_pos(rq) + blk_rq_sectors(rq))
 #define rb_entry_rq(node)	rb_entry((node), struct request, rb_node)
 
-/*
- * Hack to reuse the csd.list list_head as the fifo time holder while
- * the request is in the io scheduler. Saves an unsigned long in rq.
- */
-#define rq_fifo_time(rq)	((unsigned long) (rq)->csd.list.next)
-#define rq_set_fifo_time(rq,exp)	((rq)->csd.list.next = (void *) (exp))
 #define rq_entry_fifo(ptr)	list_entry((ptr), struct request, queuelist)
 #define rq_fifo_clear(rq)	do {		\
 	list_del_init(&(rq)->queuelist);	\
-- 
cgit v1.2.3


From d9a74df512e44580a34bf6e70f5d08c126507354 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Mon, 24 Feb 2014 16:39:53 +0100
Subject: block: Remove useless IPI struct initialization

rq_fifo_clear() reset the csd.list through INIT_LIST_HEAD for no clear
purpose. The csd.list doesn't need to be initialized as a list head
because it's only ever used as a list node.

Lets remove this useless initialization.

Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Jens Axboe <axboe@fb.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/elevator.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 0bdfd46f4735..df63bd3a8cf1 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -203,10 +203,7 @@ enum {
 #define rb_entry_rq(node)	rb_entry((node), struct request, rb_node)
 
 #define rq_entry_fifo(ptr)	list_entry((ptr), struct request, queuelist)
-#define rq_fifo_clear(rq)	do {		\
-	list_del_init(&(rq)->queuelist);	\
-	INIT_LIST_HEAD(&(rq)->csd.list);	\
-	} while (0)
+#define rq_fifo_clear(rq)	list_del_init(&(rq)->queuelist)
 
 #else /* CONFIG_BLOCK */
 
-- 
cgit v1.2.3


From 6d113398dcf4dfcd9787a4ead738b186f7b7ff0f Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 24 Feb 2014 16:39:54 +0100
Subject: block: Stop abusing rq->csd.list in blk-softirq

Abusing rq->csd.list for a list of requests to complete is rather ugly.
We use rq->queuelist instead which is much cleaner. It is safe because
queuelist is used by the block layer only for requests waiting to be
submitted to a device. Thus it is unused when irq reports the request IO
is finished.

Signed-off-by: Jan Kara <jack@suse.cz>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jens Axboe <axboe@fb.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-softirq.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/block/blk-softirq.c b/block/blk-softirq.c
index 57790c1a97eb..b5c37d96cf0e 100644
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -30,8 +30,8 @@ static void blk_done_softirq(struct softirq_action *h)
 	while (!list_empty(&local_list)) {
 		struct request *rq;
 
-		rq = list_entry(local_list.next, struct request, csd.list);
-		list_del_init(&rq->csd.list);
+		rq = list_entry(local_list.next, struct request, queuelist);
+		list_del_init(&rq->queuelist);
 		rq->q->softirq_done_fn(rq);
 	}
 }
@@ -45,9 +45,14 @@ static void trigger_softirq(void *data)
 
 	local_irq_save(flags);
 	list = this_cpu_ptr(&blk_cpu_done);
-	list_add_tail(&rq->csd.list, list);
+	/*
+	 * We reuse queuelist for a list of requests to process. Since the
+	 * queuelist is used by the block layer only for requests waiting to be
+	 * submitted to the device it is unused now.
+	 */
+	list_add_tail(&rq->queuelist, list);
 
-	if (list->next == &rq->csd.list)
+	if (list->next == &rq->queuelist)
 		raise_softirq_irqoff(BLOCK_SOFTIRQ);
 
 	local_irq_restore(flags);
@@ -136,7 +141,7 @@ void __blk_complete_request(struct request *req)
 		struct list_head *list;
 do_local:
 		list = this_cpu_ptr(&blk_cpu_done);
-		list_add_tail(&req->csd.list, list);
+		list_add_tail(&req->queuelist, list);
 
 		/*
 		 * if the list only contains our just added request,
@@ -144,7 +149,7 @@ do_local:
 		 * entries there, someone already raised the irq but it
 		 * hasn't run yet.
 		 */
-		if (list->next == &req->csd.list)
+		if (list->next == &req->queuelist)
 			raise_softirq_irqoff(BLOCK_SOFTIRQ);
 	} else if (raise_blk_irq(ccpu, req))
 		goto do_local;
-- 
cgit v1.2.3


From 5fd77595ec62141fa71e575bdbf410e0192f87d0 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 24 Feb 2014 16:39:55 +0100
Subject: smp: Iterate functions through llist_for_each_entry_safe()

The IPI function llist iteration is open coded. Lets simplify this
with using an llist iterator.

Also we want to keep the iteration safe against possible
csd.llist->next value reuse from the IPI handler. At least the block
subsystem used to do such things so lets stay careful and use
llist_for_each_entry_safe().

Signed-off-by: Jan Kara <jack@suse.cz>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jens Axboe <axboe@fb.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 kernel/smp.c | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/kernel/smp.c b/kernel/smp.c
index ffee35bef179..e3852de042a6 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -151,7 +151,8 @@ static void generic_exec_single(int cpu, struct call_single_data *csd, int wait)
  */
 void generic_smp_call_function_single_interrupt(void)
 {
-	struct llist_node *entry, *next;
+	struct llist_node *entry;
+	struct call_single_data *csd, *csd_next;
 
 	/*
 	 * Shouldn't receive this interrupt on a cpu that is not yet online.
@@ -161,16 +162,9 @@ void generic_smp_call_function_single_interrupt(void)
 	entry = llist_del_all(&__get_cpu_var(call_single_queue));
 	entry = llist_reverse_order(entry);
 
-	while (entry) {
-		struct call_single_data *csd;
-
-		next = entry->next;
-
-		csd = llist_entry(entry, struct call_single_data, llist);
+	llist_for_each_entry_safe(csd, csd_next, entry, llist) {
 		csd->func(csd->info);
 		csd_unlock(csd);
-
-		entry = next;
 	}
 }
 
-- 
cgit v1.2.3


From 0ebeb79ce920bed3a4a55113534951d95a444fbb Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 24 Feb 2014 16:39:56 +0100
Subject: smp: Remove unused list_head from csd

Now that we got rid of all the remaining code which fiddled with csd.list,
lets remove it.

Signed-off-by: Jan Kara <jack@suse.cz>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jens Axboe <axboe@fb.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/smp.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/include/linux/smp.h b/include/linux/smp.h
index 6ae004e437ea..4991c6b12831 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -17,10 +17,7 @@ extern void cpu_idle(void);
 
 typedef void (*smp_call_func_t)(void *info);
 struct call_single_data {
-	union {
-		struct list_head list;
-		struct llist_node llist;
-	};
+	struct llist_node llist;
 	smp_call_func_t func;
 	void *info;
 	u16 flags;
-- 
cgit v1.2.3


From 08eed44c7249d381a099bc55577e55c6bb533160 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 24 Feb 2014 16:39:57 +0100
Subject: smp: Teach __smp_call_function_single() to check for offline cpus

Align __smp_call_function_single() with smp_call_function_single() so
that it also checks whether requested cpu is still online.

Signed-off-by: Jan Kara <jack@suse.cz>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jens Axboe <axboe@fb.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/smp.h |  3 +--
 kernel/smp.c        | 11 +++++++----
 kernel/up.c         |  5 +++--
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/include/linux/smp.h b/include/linux/smp.h
index 4991c6b12831..c39074c794c5 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -50,8 +50,7 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
 		smp_call_func_t func, void *info, bool wait,
 		gfp_t gfp_flags);
 
-void __smp_call_function_single(int cpuid, struct call_single_data *data,
-				int wait);
+int __smp_call_function_single(int cpu, struct call_single_data *csd, int wait);
 
 #ifdef CONFIG_SMP
 
diff --git a/kernel/smp.c b/kernel/smp.c
index e3852de042a6..5ff14e3739ca 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -276,18 +276,18 @@ EXPORT_SYMBOL_GPL(smp_call_function_any);
 /**
  * __smp_call_function_single(): Run a function on a specific CPU
  * @cpu: The CPU to run on.
- * @data: Pre-allocated and setup data structure
+ * @csd: Pre-allocated and setup data structure
  * @wait: If true, wait until function has completed on specified CPU.
  *
  * Like smp_call_function_single(), but allow caller to pass in a
  * pre-allocated data structure. Useful for embedding @data inside
  * other structures, for instance.
  */
-void __smp_call_function_single(int cpu, struct call_single_data *csd,
-				int wait)
+int __smp_call_function_single(int cpu, struct call_single_data *csd, int wait)
 {
 	unsigned int this_cpu;
 	unsigned long flags;
+	int err = 0;
 
 	this_cpu = get_cpu();
 	/*
@@ -303,11 +303,14 @@ void __smp_call_function_single(int cpu, struct call_single_data *csd,
 		local_irq_save(flags);
 		csd->func(csd->info);
 		local_irq_restore(flags);
-	} else {
+	} else if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) {
 		csd_lock(csd);
 		generic_exec_single(cpu, csd, wait);
+	} else {
+		err = -ENXIO;	/* CPU not online */
 	}
 	put_cpu();
+	return err;
 }
 EXPORT_SYMBOL_GPL(__smp_call_function_single);
 
diff --git a/kernel/up.c b/kernel/up.c
index 509403e3fbc6..cdf03d16840e 100644
--- a/kernel/up.c
+++ b/kernel/up.c
@@ -22,14 +22,15 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
 }
 EXPORT_SYMBOL(smp_call_function_single);
 
-void __smp_call_function_single(int cpu, struct call_single_data *csd,
-				int wait)
+int __smp_call_function_single(int cpu, struct call_single_data *csd,
+			       int wait)
 {
 	unsigned long flags;
 
 	local_irq_save(flags);
 	csd->func(csd->info);
 	local_irq_restore(flags);
+	return 0;
 }
 EXPORT_SYMBOL(__smp_call_function_single);
 
-- 
cgit v1.2.3


From 8b28499a71d3431c9128abc743e2d2bfbdae3ed4 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Mon, 24 Feb 2014 16:39:58 +0100
Subject: smp: Consolidate the various smp_call_function_single() declensions

__smp_call_function_single() and smp_call_function_single() share some
code that can be factorized: execute inline when the target is local,
check if the target is online, lock the csd, call generic_exec_single().

Lets move the common parts to generic_exec_single().

Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Jens Axboe <axboe@fb.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 kernel/smp.c | 80 +++++++++++++++++++++++++++++-------------------------------
 1 file changed, 39 insertions(+), 41 deletions(-)

diff --git a/kernel/smp.c b/kernel/smp.c
index 5ff14e3739ca..64bb0d48e96f 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -117,13 +117,43 @@ static void csd_unlock(struct call_single_data *csd)
 	csd->flags &= ~CSD_FLAG_LOCK;
 }
 
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_single_data, csd_data);
+
 /*
  * Insert a previously allocated call_single_data element
  * for execution on the given CPU. data must already have
  * ->func, ->info, and ->flags set.
  */
-static void generic_exec_single(int cpu, struct call_single_data *csd, int wait)
+static int generic_exec_single(int cpu, struct call_single_data *csd,
+			       smp_call_func_t func, void *info, int wait)
 {
+	struct call_single_data csd_stack = { .flags = 0 };
+	unsigned long flags;
+
+
+	if (cpu == smp_processor_id()) {
+		local_irq_save(flags);
+		func(info);
+		local_irq_restore(flags);
+		return 0;
+	}
+
+
+	if ((unsigned)cpu >= nr_cpu_ids || !cpu_online(cpu))
+		return -ENXIO;
+
+
+	if (!csd) {
+		csd = &csd_stack;
+		if (!wait)
+			csd = &__get_cpu_var(csd_data);
+	}
+
+	csd_lock(csd);
+
+	csd->func = func;
+	csd->info = info;
+
 	if (wait)
 		csd->flags |= CSD_FLAG_WAIT;
 
@@ -143,6 +173,8 @@ static void generic_exec_single(int cpu, struct call_single_data *csd, int wait)
 
 	if (wait)
 		csd_lock_wait(csd);
+
+	return 0;
 }
 
 /*
@@ -168,8 +200,6 @@ void generic_smp_call_function_single_interrupt(void)
 	}
 }
 
-static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_single_data, csd_data);
-
 /*
  * smp_call_function_single - Run a function on a specific CPU
  * @func: The function to run. This must be fast and non-blocking.
@@ -181,12 +211,8 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_single_data, csd_data);
 int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
 			     int wait)
 {
-	struct call_single_data d = {
-		.flags = 0,
-	};
-	unsigned long flags;
 	int this_cpu;
-	int err = 0;
+	int err;
 
 	/*
 	 * prevent preemption and reschedule on another processor,
@@ -203,26 +229,7 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
 	WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
 		     && !oops_in_progress);
 
-	if (cpu == this_cpu) {
-		local_irq_save(flags);
-		func(info);
-		local_irq_restore(flags);
-	} else {
-		if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) {
-			struct call_single_data *csd = &d;
-
-			if (!wait)
-				csd = &__get_cpu_var(csd_data);
-
-			csd_lock(csd);
-
-			csd->func = func;
-			csd->info = info;
-			generic_exec_single(cpu, csd, wait);
-		} else {
-			err = -ENXIO;	/* CPU not online */
-		}
-	}
+	err = generic_exec_single(cpu, NULL, func, info, wait);
 
 	put_cpu();
 
@@ -285,9 +292,8 @@ EXPORT_SYMBOL_GPL(smp_call_function_any);
  */
 int __smp_call_function_single(int cpu, struct call_single_data *csd, int wait)
 {
-	unsigned int this_cpu;
-	unsigned long flags;
 	int err = 0;
+	int this_cpu;
 
 	this_cpu = get_cpu();
 	/*
@@ -296,20 +302,12 @@ int __smp_call_function_single(int cpu, struct call_single_data *csd, int wait)
 	 * send smp call function interrupt to this cpu and as such deadlocks
 	 * can't happen.
 	 */
-	WARN_ON_ONCE(cpu_online(smp_processor_id()) && wait && irqs_disabled()
+	WARN_ON_ONCE(cpu_online(this_cpu) && wait && irqs_disabled()
 		     && !oops_in_progress);
 
-	if (cpu == this_cpu) {
-		local_irq_save(flags);
-		csd->func(csd->info);
-		local_irq_restore(flags);
-	} else if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) {
-		csd_lock(csd);
-		generic_exec_single(cpu, csd, wait);
-	} else {
-		err = -ENXIO;	/* CPU not online */
-	}
+	err = generic_exec_single(cpu, csd, csd->func, csd->info, wait);
 	put_cpu();
+
 	return err;
 }
 EXPORT_SYMBOL_GPL(__smp_call_function_single);
-- 
cgit v1.2.3


From d7877c03f1b62de06f9c00417952f39f56c1ab00 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Mon, 24 Feb 2014 16:39:59 +0100
Subject: smp: Move __smp_call_function_single() below its safe version

Move this function closer to __smp_call_function_single(). These functions
have very similar behavior and should be displayed in the same block
for clarity.

Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Jens Axboe <axboe@fb.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 kernel/smp.c | 64 ++++++++++++++++++++++++++++++------------------------------
 1 file changed, 32 insertions(+), 32 deletions(-)

diff --git a/kernel/smp.c b/kernel/smp.c
index 64bb0d48e96f..fa04ab938e52 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -237,6 +237,38 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
 }
 EXPORT_SYMBOL(smp_call_function_single);
 
+/**
+ * __smp_call_function_single(): Run a function on a specific CPU
+ * @cpu: The CPU to run on.
+ * @csd: Pre-allocated and setup data structure
+ * @wait: If true, wait until function has completed on specified CPU.
+ *
+ * Like smp_call_function_single(), but allow caller to pass in a
+ * pre-allocated data structure. Useful for embedding @data inside
+ * other structures, for instance.
+ */
+int __smp_call_function_single(int cpu, struct call_single_data *csd, int wait)
+{
+	int err = 0;
+	int this_cpu;
+
+	this_cpu = get_cpu();
+	/*
+	 * Can deadlock when called with interrupts disabled.
+	 * We allow cpu's that are not yet online though, as no one else can
+	 * send smp call function interrupt to this cpu and as such deadlocks
+	 * can't happen.
+	 */
+	WARN_ON_ONCE(cpu_online(this_cpu) && wait && irqs_disabled()
+		     && !oops_in_progress);
+
+	err = generic_exec_single(cpu, csd, csd->func, csd->info, wait);
+	put_cpu();
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(__smp_call_function_single);
+
 /*
  * smp_call_function_any - Run a function on any of the given cpus
  * @mask: The mask of cpus it can run on.
@@ -280,38 +312,6 @@ call:
 }
 EXPORT_SYMBOL_GPL(smp_call_function_any);
 
-/**
- * __smp_call_function_single(): Run a function on a specific CPU
- * @cpu: The CPU to run on.
- * @csd: Pre-allocated and setup data structure
- * @wait: If true, wait until function has completed on specified CPU.
- *
- * Like smp_call_function_single(), but allow caller to pass in a
- * pre-allocated data structure. Useful for embedding @data inside
- * other structures, for instance.
- */
-int __smp_call_function_single(int cpu, struct call_single_data *csd, int wait)
-{
-	int err = 0;
-	int this_cpu;
-
-	this_cpu = get_cpu();
-	/*
-	 * Can deadlock when called with interrupts disabled.
-	 * We allow cpu's that are not yet online though, as no one else can
-	 * send smp call function interrupt to this cpu and as such deadlocks
-	 * can't happen.
-	 */
-	WARN_ON_ONCE(cpu_online(this_cpu) && wait && irqs_disabled()
-		     && !oops_in_progress);
-
-	err = generic_exec_single(cpu, csd, csd->func, csd->info, wait);
-	put_cpu();
-
-	return err;
-}
-EXPORT_SYMBOL_GPL(__smp_call_function_single);
-
 /**
  * smp_call_function_many(): Run a function on a set of other CPUs.
  * @mask: The set of cpus to run on (only runs on online subset).
-- 
cgit v1.2.3


From e0a23b0628b10d25f2c178be6fcfc17c1ab49fda Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Mon, 24 Feb 2014 16:40:00 +0100
Subject: watchdog: Simplify a little the IPI call

In order to remotely restart the watchdog hrtimer, update_timers()
allocates a csd on the stack and pass it to __smp_call_function_single().

There is no partcular need, however, for a specific csd here. Lets
simplify that a little by calling smp_call_function_single()
which can already take care of the csd allocation by itself.

Acked-by: Don Zickus <dzickus@redhat.com>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Jens Axboe <axboe@fb.com>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 kernel/watchdog.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 4431610f049a..01c6f979486f 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -505,7 +505,6 @@ static void restart_watchdog_hrtimer(void *info)
 
 static void update_timers(int cpu)
 {
-	struct call_single_data data = {.func = restart_watchdog_hrtimer};
 	/*
 	 * Make sure that perf event counter will adopt to a new
 	 * sampling period. Updating the sampling period directly would
@@ -515,7 +514,7 @@ static void update_timers(int cpu)
 	 * might be late already so we have to restart the timer as well.
 	 */
 	watchdog_nmi_disable(cpu);
-	__smp_call_function_single(cpu, &data, 1);
+	smp_call_function_single(cpu, restart_watchdog_hrtimer, NULL, 1);
 	watchdog_nmi_enable(cpu);
 }
 
-- 
cgit v1.2.3


From fce8ad1568c57e7f334018dec4fa1744c926c135 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Mon, 24 Feb 2014 16:40:01 +0100
Subject: smp: Remove wait argument from __smp_call_function_single()

The main point of calling __smp_call_function_single() is to send
an IPI in a pure asynchronous way. By embedding a csd in an object,
a caller can send the IPI without waiting for a previous one to complete
as is required by smp_call_function_single() for example. As such,
sending this kind of IPI can be safe even when irqs are disabled.

This flexibility comes at the expense of the caller who then needs to
synchronize the csd lifecycle by himself and make sure that IPIs on a
single csd are serialized.

This is how __smp_call_function_single() works when wait = 0 and this
usecase is relevant.

Now there don't seem to be any usecase with wait = 1 that can't be
covered by smp_call_function_single() instead, which is safer. Lets look
at the two possible scenario:

1) The user calls __smp_call_function_single(wait = 1) on a csd embedded
   in an object. It looks like a nice and convenient pattern at the first
   sight because we can then retrieve the object from the IPI handler easily.

   But actually it is a waste of memory space in the object since the csd
   can be allocated from the stack by smp_call_function_single(wait = 1)
   and the object can be passed an the IPI argument.

   Besides that, embedding the csd in an object is more error prone
   because the caller must take care of the serialization of the IPIs
   for this csd.

2) The user calls __smp_call_function_single(wait = 1) on a csd that
   is allocated on the stack. It's ok but smp_call_function_single()
   can do it as well and it already takes care of the allocation on the
   stack. Again it's more simple and less error prone.

Therefore, using the underscore prepend API version with wait = 1
is a bad pattern and a sign that the caller can do safer and more
simple.

There was a single user of that which has just been converted.
So lets remove this option to discourage further users.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Jens Axboe <axboe@fb.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c            |  2 +-
 block/blk-softirq.c       |  2 +-
 drivers/cpuidle/coupled.c |  2 +-
 include/linux/smp.h       |  2 +-
 kernel/sched/core.c       |  2 +-
 kernel/smp.c              | 19 ++++---------------
 kernel/up.c               |  3 +--
 net/core/dev.c            |  2 +-
 8 files changed, 11 insertions(+), 23 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 1fa9dd153fde..62154edf1489 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -353,7 +353,7 @@ void __blk_mq_complete_request(struct request *rq)
 		rq->csd.func = __blk_mq_complete_request_remote;
 		rq->csd.info = rq;
 		rq->csd.flags = 0;
-		__smp_call_function_single(ctx->cpu, &rq->csd, 0);
+		__smp_call_function_single(ctx->cpu, &rq->csd);
 	} else {
 		rq->q->softirq_done_fn(rq);
 	}
diff --git a/block/blk-softirq.c b/block/blk-softirq.c
index b5c37d96cf0e..6345b7ebd0df 100644
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -70,7 +70,7 @@ static int raise_blk_irq(int cpu, struct request *rq)
 		data->info = rq;
 		data->flags = 0;
 
-		__smp_call_function_single(cpu, data, 0);
+		__smp_call_function_single(cpu, data);
 		return 0;
 	}
 
diff --git a/drivers/cpuidle/coupled.c b/drivers/cpuidle/coupled.c
index e952936418d0..04115947accc 100644
--- a/drivers/cpuidle/coupled.c
+++ b/drivers/cpuidle/coupled.c
@@ -323,7 +323,7 @@ static void cpuidle_coupled_poke(int cpu)
 	struct call_single_data *csd = &per_cpu(cpuidle_coupled_poke_cb, cpu);
 
 	if (!cpumask_test_and_set_cpu(cpu, &cpuidle_coupled_poke_pending))
-		__smp_call_function_single(cpu, csd, 0);
+		__smp_call_function_single(cpu, csd);
 }
 
 /**
diff --git a/include/linux/smp.h b/include/linux/smp.h
index c39074c794c5..b410a1f23281 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -50,7 +50,7 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
 		smp_call_func_t func, void *info, bool wait,
 		gfp_t gfp_flags);
 
-int __smp_call_function_single(int cpu, struct call_single_data *csd, int wait);
+int __smp_call_function_single(int cpu, struct call_single_data *csd);
 
 #ifdef CONFIG_SMP
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b46131ef6aab..eba3d84765f3 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -432,7 +432,7 @@ void hrtick_start(struct rq *rq, u64 delay)
 	if (rq == this_rq()) {
 		__hrtick_restart(rq);
 	} else if (!rq->hrtick_csd_pending) {
-		__smp_call_function_single(cpu_of(rq), &rq->hrtick_csd, 0);
+		__smp_call_function_single(cpu_of(rq), &rq->hrtick_csd);
 		rq->hrtick_csd_pending = 1;
 	}
 }
diff --git a/kernel/smp.c b/kernel/smp.c
index fa04ab938e52..b76763189752 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -241,29 +241,18 @@ EXPORT_SYMBOL(smp_call_function_single);
  * __smp_call_function_single(): Run a function on a specific CPU
  * @cpu: The CPU to run on.
  * @csd: Pre-allocated and setup data structure
- * @wait: If true, wait until function has completed on specified CPU.
  *
  * Like smp_call_function_single(), but allow caller to pass in a
  * pre-allocated data structure. Useful for embedding @data inside
  * other structures, for instance.
  */
-int __smp_call_function_single(int cpu, struct call_single_data *csd, int wait)
+int __smp_call_function_single(int cpu, struct call_single_data *csd)
 {
 	int err = 0;
-	int this_cpu;
 
-	this_cpu = get_cpu();
-	/*
-	 * Can deadlock when called with interrupts disabled.
-	 * We allow cpu's that are not yet online though, as no one else can
-	 * send smp call function interrupt to this cpu and as such deadlocks
-	 * can't happen.
-	 */
-	WARN_ON_ONCE(cpu_online(this_cpu) && wait && irqs_disabled()
-		     && !oops_in_progress);
-
-	err = generic_exec_single(cpu, csd, csd->func, csd->info, wait);
-	put_cpu();
+	preempt_disable();
+	err = generic_exec_single(cpu, csd, csd->func, csd->info, 0);
+	preempt_enable();
 
 	return err;
 }
diff --git a/kernel/up.c b/kernel/up.c
index cdf03d16840e..4e199d4cef8e 100644
--- a/kernel/up.c
+++ b/kernel/up.c
@@ -22,8 +22,7 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
 }
 EXPORT_SYMBOL(smp_call_function_single);
 
-int __smp_call_function_single(int cpu, struct call_single_data *csd,
-			       int wait)
+int __smp_call_function_single(int cpu, struct call_single_data *csd)
 {
 	unsigned long flags;
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 4ad1b78c9c77..d1298128bff4 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4129,7 +4129,7 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd)
 
 			if (cpu_online(remsd->cpu))
 				__smp_call_function_single(remsd->cpu,
-							   &remsd->csd, 0);
+							   &remsd->csd);
 			remsd = next;
 		}
 	} else
-- 
cgit v1.2.3


From c46fff2a3b29794b35d717b5680a27f31a6a6bc0 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Mon, 24 Feb 2014 16:40:02 +0100
Subject: smp: Rename __smp_call_function_single() to
 smp_call_function_single_async()

The name __smp_call_function_single() doesn't tell much about the
properties of this function, especially when compared to
smp_call_function_single().

The comments above the implementation are also misleading. The main
point of this function is actually not to be able to embed the csd
in an object. This is actually a requirement that result from the
purpose of this function which is to raise an IPI asynchronously.

As such it can be called with interrupts disabled. And this feature
comes at the cost of the caller who then needs to serialize the
IPIs on this csd.

Lets rename the function and enhance the comments so that they reflect
these properties.

Suggested-by: Christoph Hellwig <hch@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Jens Axboe <axboe@fb.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c            |  2 +-
 block/blk-softirq.c       |  2 +-
 drivers/cpuidle/coupled.c |  2 +-
 include/linux/smp.h       |  2 +-
 kernel/sched/core.c       |  2 +-
 kernel/smp.c              | 19 +++++++++++++------
 kernel/up.c               |  4 ++--
 net/core/dev.c            |  2 +-
 8 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 62154edf1489..6468a715a0e4 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -353,7 +353,7 @@ void __blk_mq_complete_request(struct request *rq)
 		rq->csd.func = __blk_mq_complete_request_remote;
 		rq->csd.info = rq;
 		rq->csd.flags = 0;
-		__smp_call_function_single(ctx->cpu, &rq->csd);
+		smp_call_function_single_async(ctx->cpu, &rq->csd);
 	} else {
 		rq->q->softirq_done_fn(rq);
 	}
diff --git a/block/blk-softirq.c b/block/blk-softirq.c
index 6345b7ebd0df..ebd6b6f1bdeb 100644
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -70,7 +70,7 @@ static int raise_blk_irq(int cpu, struct request *rq)
 		data->info = rq;
 		data->flags = 0;
 
-		__smp_call_function_single(cpu, data);
+		smp_call_function_single_async(cpu, data);
 		return 0;
 	}
 
diff --git a/drivers/cpuidle/coupled.c b/drivers/cpuidle/coupled.c
index 04115947accc..cb6654bfad77 100644
--- a/drivers/cpuidle/coupled.c
+++ b/drivers/cpuidle/coupled.c
@@ -323,7 +323,7 @@ static void cpuidle_coupled_poke(int cpu)
 	struct call_single_data *csd = &per_cpu(cpuidle_coupled_poke_cb, cpu);
 
 	if (!cpumask_test_and_set_cpu(cpu, &cpuidle_coupled_poke_pending))
-		__smp_call_function_single(cpu, csd);
+		smp_call_function_single_async(cpu, csd);
 }
 
 /**
diff --git a/include/linux/smp.h b/include/linux/smp.h
index b410a1f23281..633f5edd7470 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -50,7 +50,7 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
 		smp_call_func_t func, void *info, bool wait,
 		gfp_t gfp_flags);
 
-int __smp_call_function_single(int cpu, struct call_single_data *csd);
+int smp_call_function_single_async(int cpu, struct call_single_data *csd);
 
 #ifdef CONFIG_SMP
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index eba3d84765f3..0cca04a53de0 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -432,7 +432,7 @@ void hrtick_start(struct rq *rq, u64 delay)
 	if (rq == this_rq()) {
 		__hrtick_restart(rq);
 	} else if (!rq->hrtick_csd_pending) {
-		__smp_call_function_single(cpu_of(rq), &rq->hrtick_csd);
+		smp_call_function_single_async(cpu_of(rq), &rq->hrtick_csd);
 		rq->hrtick_csd_pending = 1;
 	}
 }
diff --git a/kernel/smp.c b/kernel/smp.c
index b76763189752..06d574e42c72 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -238,15 +238,22 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
 EXPORT_SYMBOL(smp_call_function_single);
 
 /**
- * __smp_call_function_single(): Run a function on a specific CPU
+ * smp_call_function_single_async(): Run an asynchronous function on a
+ * 			         specific CPU.
  * @cpu: The CPU to run on.
  * @csd: Pre-allocated and setup data structure
  *
- * Like smp_call_function_single(), but allow caller to pass in a
- * pre-allocated data structure. Useful for embedding @data inside
- * other structures, for instance.
+ * Like smp_call_function_single(), but the call is asynchonous and
+ * can thus be done from contexts with disabled interrupts.
+ *
+ * The caller passes his own pre-allocated data structure
+ * (ie: embedded in an object) and is responsible for synchronizing it
+ * such that the IPIs performed on the @csd are strictly serialized.
+ *
+ * NOTE: Be careful, there is unfortunately no current debugging facility to
+ * validate the correctness of this serialization.
  */
-int __smp_call_function_single(int cpu, struct call_single_data *csd)
+int smp_call_function_single_async(int cpu, struct call_single_data *csd)
 {
 	int err = 0;
 
@@ -256,7 +263,7 @@ int __smp_call_function_single(int cpu, struct call_single_data *csd)
 
 	return err;
 }
-EXPORT_SYMBOL_GPL(__smp_call_function_single);
+EXPORT_SYMBOL_GPL(smp_call_function_single_async);
 
 /*
  * smp_call_function_any - Run a function on any of the given cpus
diff --git a/kernel/up.c b/kernel/up.c
index 4e199d4cef8e..1760bf3d1463 100644
--- a/kernel/up.c
+++ b/kernel/up.c
@@ -22,7 +22,7 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
 }
 EXPORT_SYMBOL(smp_call_function_single);
 
-int __smp_call_function_single(int cpu, struct call_single_data *csd)
+int smp_call_function_single_async(int cpu, struct call_single_data *csd)
 {
 	unsigned long flags;
 
@@ -31,7 +31,7 @@ int __smp_call_function_single(int cpu, struct call_single_data *csd)
 	local_irq_restore(flags);
 	return 0;
 }
-EXPORT_SYMBOL(__smp_call_function_single);
+EXPORT_SYMBOL(smp_call_function_single_async);
 
 int on_each_cpu(smp_call_func_t func, void *info, int wait)
 {
diff --git a/net/core/dev.c b/net/core/dev.c
index d1298128bff4..ac7a2abb7f1a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4128,7 +4128,7 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd)
 			struct softnet_data *next = remsd->rps_ipi_next;
 
 			if (cpu_online(remsd->cpu))
-				__smp_call_function_single(remsd->cpu,
+				smp_call_function_single_async(remsd->cpu,
 							   &remsd->csd);
 			remsd = next;
 		}
-- 
cgit v1.2.3


From af5040da01ef980670b3741b3e10733ee3e33566 Mon Sep 17 00:00:00 2001
From: Roman Pen <r.peniaev@gmail.com>
Date: Tue, 4 Mar 2014 23:13:10 +0900
Subject: blktrace: fix accounting of partially completed requests

trace_block_rq_complete does not take into account that request can
be partially completed, so we can get the following incorrect output
of blkparser:

  C   R 232 + 240 [0]
  C   R 240 + 232 [0]
  C   R 248 + 224 [0]
  C   R 256 + 216 [0]

but should be:

  C   R 232 + 8 [0]
  C   R 240 + 8 [0]
  C   R 248 + 8 [0]
  C   R 256 + 8 [0]

Also, the whole output summary statistics of completed requests and
final throughput will be incorrect.

This patch takes into account real completion size of the request and
fixes wrong completion accounting.

Signed-off-by: Roman Pen <r.peniaev@gmail.com>
CC: Steven Rostedt <rostedt@goodmis.org>
CC: Frederic Weisbecker <fweisbec@gmail.com>
CC: Ingo Molnar <mingo@redhat.com>
CC: linux-kernel@vger.kernel.org
Cc: stable@kernel.org
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-core.c             |  2 +-
 block/blk-mq.c               |  2 +-
 include/trace/events/block.h | 33 ++++++++++++++++++++++++++++++---
 kernel/trace/blktrace.c      | 20 +++++++++++---------
 4 files changed, 43 insertions(+), 14 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 853f92749202..99e20cca37e1 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2354,7 +2354,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
 	if (!req->bio)
 		return false;
 
-	trace_block_rq_complete(req->q, req);
+	trace_block_rq_complete(req->q, req, nr_bytes);
 
 	/*
 	 * For fs requests, rq is just carrier of independent bio's
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 6468a715a0e4..01d8735db8d3 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -309,7 +309,7 @@ void blk_mq_end_io(struct request *rq, int error)
 	struct bio *bio = rq->bio;
 	unsigned int bytes = 0;
 
-	trace_block_rq_complete(rq->q, rq);
+	trace_block_rq_complete(rq->q, rq, blk_rq_bytes(rq));
 
 	while (bio) {
 		struct bio *next = bio->bi_next;
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index e76ae19a8d6f..e8a5eca1dbe5 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -132,6 +132,7 @@ DEFINE_EVENT(block_rq_with_error, block_rq_requeue,
  * block_rq_complete - block IO operation completed by device driver
  * @q: queue containing the block operation request
  * @rq: block operations request
+ * @nr_bytes: number of completed bytes
  *
  * The block_rq_complete tracepoint event indicates that some portion
  * of operation request has been completed by the device driver.  If
@@ -139,11 +140,37 @@ DEFINE_EVENT(block_rq_with_error, block_rq_requeue,
  * do for the request. If @rq->bio is non-NULL then there is
  * additional work required to complete the request.
  */
-DEFINE_EVENT(block_rq_with_error, block_rq_complete,
+TRACE_EVENT(block_rq_complete,
 
-	TP_PROTO(struct request_queue *q, struct request *rq),
+	TP_PROTO(struct request_queue *q, struct request *rq,
+		 unsigned int nr_bytes),
 
-	TP_ARGS(q, rq)
+	TP_ARGS(q, rq, nr_bytes),
+
+	TP_STRUCT__entry(
+		__field(  dev_t,	dev			)
+		__field(  sector_t,	sector			)
+		__field(  unsigned int,	nr_sector		)
+		__field(  int,		errors			)
+		__array(  char,		rwbs,	RWBS_LEN	)
+		__dynamic_array( char,	cmd,	blk_cmd_buf_len(rq)	)
+	),
+
+	TP_fast_assign(
+		__entry->dev	   = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
+		__entry->sector    = blk_rq_pos(rq);
+		__entry->nr_sector = nr_bytes >> 9;
+		__entry->errors    = rq->errors;
+
+		blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, nr_bytes);
+		blk_dump_cmd(__get_str(cmd), rq);
+	),
+
+	TP_printk("%d,%d %s (%s) %llu + %u [%d]",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->rwbs, __get_str(cmd),
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->errors)
 );
 
 DECLARE_EVENT_CLASS(block_rq,
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index b418cb0d7242..4f3a3c03eadb 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -702,6 +702,7 @@ void blk_trace_shutdown(struct request_queue *q)
  * blk_add_trace_rq - Add a trace for a request oriented action
  * @q:		queue the io is for
  * @rq:		the source request
+ * @nr_bytes:	number of completed bytes
  * @what:	the action
  *
  * Description:
@@ -709,7 +710,7 @@ void blk_trace_shutdown(struct request_queue *q)
  *
  **/
 static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
-			     u32 what)
+			     unsigned int nr_bytes, u32 what)
 {
 	struct blk_trace *bt = q->blk_trace;
 
@@ -718,11 +719,11 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
 
 	if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
 		what |= BLK_TC_ACT(BLK_TC_PC);
-		__blk_add_trace(bt, 0, blk_rq_bytes(rq), rq->cmd_flags,
+		__blk_add_trace(bt, 0, nr_bytes, rq->cmd_flags,
 				what, rq->errors, rq->cmd_len, rq->cmd);
 	} else  {
 		what |= BLK_TC_ACT(BLK_TC_FS);
-		__blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq),
+		__blk_add_trace(bt, blk_rq_pos(rq), nr_bytes,
 				rq->cmd_flags, what, rq->errors, 0, NULL);
 	}
 }
@@ -730,33 +731,34 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
 static void blk_add_trace_rq_abort(void *ignore,
 				   struct request_queue *q, struct request *rq)
 {
-	blk_add_trace_rq(q, rq, BLK_TA_ABORT);
+	blk_add_trace_rq(q, rq, blk_rq_bytes(rq), BLK_TA_ABORT);
 }
 
 static void blk_add_trace_rq_insert(void *ignore,
 				    struct request_queue *q, struct request *rq)
 {
-	blk_add_trace_rq(q, rq, BLK_TA_INSERT);
+	blk_add_trace_rq(q, rq, blk_rq_bytes(rq), BLK_TA_INSERT);
 }
 
 static void blk_add_trace_rq_issue(void *ignore,
 				   struct request_queue *q, struct request *rq)
 {
-	blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
+	blk_add_trace_rq(q, rq, blk_rq_bytes(rq), BLK_TA_ISSUE);
 }
 
 static void blk_add_trace_rq_requeue(void *ignore,
 				     struct request_queue *q,
 				     struct request *rq)
 {
-	blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
+	blk_add_trace_rq(q, rq, blk_rq_bytes(rq), BLK_TA_REQUEUE);
 }
 
 static void blk_add_trace_rq_complete(void *ignore,
 				      struct request_queue *q,
-				      struct request *rq)
+				      struct request *rq,
+				      unsigned int nr_bytes)
 {
-	blk_add_trace_rq(q, rq, BLK_TA_COMPLETE);
+	blk_add_trace_rq(q, rq, nr_bytes, BLK_TA_COMPLETE);
 }
 
 /**
-- 
cgit v1.2.3


From 89f8b33ca1ea881d1d84542282cb85d07d02e78d Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Thu, 13 Mar 2014 09:38:42 -0600
Subject: block: remove old blk_iopoll_enabled variable

This was a debugging measure to toggle enabled/disabled
when testing. But for real production setups, it's not
safe to toggle this setting without either reloading
drivers of quiescing IO first. Neither of which the toggle
enforces.

Additionally, it makes drivers deal with the conditional
state.

Remove it completely. It's up to the driver whether iopoll
is enabled or not.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-iopoll.c              |   3 -
 drivers/scsi/be2iscsi/be_main.c | 206 ++++++++++++----------------------------
 drivers/scsi/ipr.c              |  15 +--
 include/linux/blk-iopoll.h      |   2 -
 kernel/sysctl.c                 |  12 ---
 5 files changed, 68 insertions(+), 170 deletions(-)

diff --git a/block/blk-iopoll.c b/block/blk-iopoll.c
index 1855bf51edb0..c11d24e379e2 100644
--- a/block/blk-iopoll.c
+++ b/block/blk-iopoll.c
@@ -14,9 +14,6 @@
 
 #include "blk.h"
 
-int blk_iopoll_enabled = 1;
-EXPORT_SYMBOL(blk_iopoll_enabled);
-
 static unsigned int blk_iopoll_budget __read_mostly = 256;
 
 static DEFINE_PER_CPU(struct list_head, blk_cpu_iopoll);
diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c
index 1f375051483a..a929c3c9aedc 100644
--- a/drivers/scsi/be2iscsi/be_main.c
+++ b/drivers/scsi/be2iscsi/be_main.c
@@ -873,7 +873,6 @@ static irqreturn_t be_isr_msix(int irq, void *dev_id)
 	struct be_queue_info *cq;
 	unsigned int num_eq_processed;
 	struct be_eq_obj *pbe_eq;
-	unsigned long flags;
 
 	pbe_eq = dev_id;
 	eq = &pbe_eq->q;
@@ -882,31 +881,15 @@ static irqreturn_t be_isr_msix(int irq, void *dev_id)
 
 	phba = pbe_eq->phba;
 	num_eq_processed = 0;
-	if (blk_iopoll_enabled) {
-		while (eqe->dw[offsetof(struct amap_eq_entry, valid) / 32]
-					& EQE_VALID_MASK) {
-			if (!blk_iopoll_sched_prep(&pbe_eq->iopoll))
-				blk_iopoll_sched(&pbe_eq->iopoll);
-
-			AMAP_SET_BITS(struct amap_eq_entry, valid, eqe, 0);
-			queue_tail_inc(eq);
-			eqe = queue_tail_node(eq);
-			num_eq_processed++;
-		}
-	} else {
-		while (eqe->dw[offsetof(struct amap_eq_entry, valid) / 32]
-						& EQE_VALID_MASK) {
-			spin_lock_irqsave(&phba->isr_lock, flags);
-			pbe_eq->todo_cq = true;
-			spin_unlock_irqrestore(&phba->isr_lock, flags);
-			AMAP_SET_BITS(struct amap_eq_entry, valid, eqe, 0);
-			queue_tail_inc(eq);
-			eqe = queue_tail_node(eq);
-			num_eq_processed++;
-		}
+	while (eqe->dw[offsetof(struct amap_eq_entry, valid) / 32]
+				& EQE_VALID_MASK) {
+		if (!blk_iopoll_sched_prep(&pbe_eq->iopoll))
+			blk_iopoll_sched(&pbe_eq->iopoll);
 
-		if (pbe_eq->todo_cq)
-			queue_work(phba->wq, &pbe_eq->work_cqs);
+		AMAP_SET_BITS(struct amap_eq_entry, valid, eqe, 0);
+		queue_tail_inc(eq);
+		eqe = queue_tail_node(eq);
+		num_eq_processed++;
 	}
 
 	if (num_eq_processed)
@@ -927,7 +910,6 @@ static irqreturn_t be_isr(int irq, void *dev_id)
 	struct hwi_context_memory *phwi_context;
 	struct be_eq_entry *eqe = NULL;
 	struct be_queue_info *eq;
-	struct be_queue_info *cq;
 	struct be_queue_info *mcc;
 	unsigned long flags, index;
 	unsigned int num_mcceq_processed, num_ioeq_processed;
@@ -953,72 +935,40 @@ static irqreturn_t be_isr(int irq, void *dev_id)
 
 	num_ioeq_processed = 0;
 	num_mcceq_processed = 0;
-	if (blk_iopoll_enabled) {
-		while (eqe->dw[offsetof(struct amap_eq_entry, valid) / 32]
-					& EQE_VALID_MASK) {
-			if (((eqe->dw[offsetof(struct amap_eq_entry,
-			     resource_id) / 32] &
-			     EQE_RESID_MASK) >> 16) == mcc->id) {
-				spin_lock_irqsave(&phba->isr_lock, flags);
-				pbe_eq->todo_mcc_cq = true;
-				spin_unlock_irqrestore(&phba->isr_lock, flags);
-				num_mcceq_processed++;
-			} else {
-				if (!blk_iopoll_sched_prep(&pbe_eq->iopoll))
-					blk_iopoll_sched(&pbe_eq->iopoll);
-				num_ioeq_processed++;
-			}
-			AMAP_SET_BITS(struct amap_eq_entry, valid, eqe, 0);
-			queue_tail_inc(eq);
-			eqe = queue_tail_node(eq);
-		}
-		if (num_ioeq_processed || num_mcceq_processed) {
-			if (pbe_eq->todo_mcc_cq)
-				queue_work(phba->wq, &pbe_eq->work_cqs);
-
-			if ((num_mcceq_processed) && (!num_ioeq_processed))
-				hwi_ring_eq_db(phba, eq->id, 0,
-					      (num_ioeq_processed +
-					       num_mcceq_processed) , 1, 1);
-			else
-				hwi_ring_eq_db(phba, eq->id, 0,
-					       (num_ioeq_processed +
-						num_mcceq_processed), 0, 1);
-
-			return IRQ_HANDLED;
-		} else
-			return IRQ_NONE;
-	} else {
-		cq = &phwi_context->be_cq[0];
-		while (eqe->dw[offsetof(struct amap_eq_entry, valid) / 32]
-						& EQE_VALID_MASK) {
-
-			if (((eqe->dw[offsetof(struct amap_eq_entry,
-			     resource_id) / 32] &
-			     EQE_RESID_MASK) >> 16) != cq->id) {
-				spin_lock_irqsave(&phba->isr_lock, flags);
-				pbe_eq->todo_mcc_cq = true;
-				spin_unlock_irqrestore(&phba->isr_lock, flags);
-			} else {
-				spin_lock_irqsave(&phba->isr_lock, flags);
-				pbe_eq->todo_cq = true;
-				spin_unlock_irqrestore(&phba->isr_lock, flags);
-			}
-			AMAP_SET_BITS(struct amap_eq_entry, valid, eqe, 0);
-			queue_tail_inc(eq);
-			eqe = queue_tail_node(eq);
+	while (eqe->dw[offsetof(struct amap_eq_entry, valid) / 32]
+				& EQE_VALID_MASK) {
+		if (((eqe->dw[offsetof(struct amap_eq_entry,
+		     resource_id) / 32] &
+		     EQE_RESID_MASK) >> 16) == mcc->id) {
+			spin_lock_irqsave(&phba->isr_lock, flags);
+			pbe_eq->todo_mcc_cq = true;
+			spin_unlock_irqrestore(&phba->isr_lock, flags);
+			num_mcceq_processed++;
+		} else {
+			if (!blk_iopoll_sched_prep(&pbe_eq->iopoll))
+				blk_iopoll_sched(&pbe_eq->iopoll);
 			num_ioeq_processed++;
 		}
-		if (pbe_eq->todo_cq || pbe_eq->todo_mcc_cq)
+		AMAP_SET_BITS(struct amap_eq_entry, valid, eqe, 0);
+		queue_tail_inc(eq);
+		eqe = queue_tail_node(eq);
+	}
+	if (num_ioeq_processed || num_mcceq_processed) {
+		if (pbe_eq->todo_mcc_cq)
 			queue_work(phba->wq, &pbe_eq->work_cqs);
 
-		if (num_ioeq_processed) {
+		if ((num_mcceq_processed) && (!num_ioeq_processed))
 			hwi_ring_eq_db(phba, eq->id, 0,
-				       num_ioeq_processed, 1, 1);
-			return IRQ_HANDLED;
-		} else
-			return IRQ_NONE;
-	}
+				      (num_ioeq_processed +
+				       num_mcceq_processed) , 1, 1);
+		else
+			hwi_ring_eq_db(phba, eq->id, 0,
+				       (num_ioeq_processed +
+					num_mcceq_processed), 0, 1);
+
+		return IRQ_HANDLED;
+	} else
+		return IRQ_NONE;
 }
 
 static int beiscsi_init_irqs(struct beiscsi_hba *phba)
@@ -5216,11 +5166,10 @@ static void beiscsi_quiesce(struct beiscsi_hba *phba,
 		}
 	pci_disable_msix(phba->pcidev);
 
-	if (blk_iopoll_enabled)
-		for (i = 0; i < phba->num_cpus; i++) {
-			pbe_eq = &phwi_context->be_eq[i];
-			blk_iopoll_disable(&pbe_eq->iopoll);
-		}
+	for (i = 0; i < phba->num_cpus; i++) {
+		pbe_eq = &phwi_context->be_eq[i];
+		blk_iopoll_disable(&pbe_eq->iopoll);
+	}
 
 	if (unload_state == BEISCSI_CLEAN_UNLOAD) {
 		destroy_workqueue(phba->wq);
@@ -5429,32 +5378,18 @@ static void beiscsi_eeh_resume(struct pci_dev *pdev)
 	phwi_ctrlr = phba->phwi_ctrlr;
 	phwi_context = phwi_ctrlr->phwi_ctxt;
 
-	if (blk_iopoll_enabled) {
-		for (i = 0; i < phba->num_cpus; i++) {
-			pbe_eq = &phwi_context->be_eq[i];
-			blk_iopoll_init(&pbe_eq->iopoll, be_iopoll_budget,
-					be_iopoll);
-			blk_iopoll_enable(&pbe_eq->iopoll);
-		}
-
-		i = (phba->msix_enabled) ? i : 0;
-		/* Work item for MCC handling */
+	for (i = 0; i < phba->num_cpus; i++) {
 		pbe_eq = &phwi_context->be_eq[i];
-		INIT_WORK(&pbe_eq->work_cqs, beiscsi_process_all_cqs);
-	} else {
-		if (phba->msix_enabled) {
-			for (i = 0; i <= phba->num_cpus; i++) {
-				pbe_eq = &phwi_context->be_eq[i];
-				INIT_WORK(&pbe_eq->work_cqs,
-					  beiscsi_process_all_cqs);
-			}
-		} else {
-			pbe_eq = &phwi_context->be_eq[0];
-			INIT_WORK(&pbe_eq->work_cqs,
-				  beiscsi_process_all_cqs);
-		}
+		blk_iopoll_init(&pbe_eq->iopoll, be_iopoll_budget,
+				be_iopoll);
+		blk_iopoll_enable(&pbe_eq->iopoll);
 	}
 
+	i = (phba->msix_enabled) ? i : 0;
+	/* Work item for MCC handling */
+	pbe_eq = &phwi_context->be_eq[i];
+	INIT_WORK(&pbe_eq->work_cqs, beiscsi_process_all_cqs);
+
 	ret = beiscsi_init_irqs(phba);
 	if (ret < 0) {
 		beiscsi_log(phba, KERN_ERR, BEISCSI_LOG_INIT,
@@ -5614,32 +5549,18 @@ static int beiscsi_dev_probe(struct pci_dev *pcidev,
 	phwi_ctrlr = phba->phwi_ctrlr;
 	phwi_context = phwi_ctrlr->phwi_ctxt;
 
-	if (blk_iopoll_enabled) {
-		for (i = 0; i < phba->num_cpus; i++) {
-			pbe_eq = &phwi_context->be_eq[i];
-			blk_iopoll_init(&pbe_eq->iopoll, be_iopoll_budget,
-					be_iopoll);
-			blk_iopoll_enable(&pbe_eq->iopoll);
-		}
-
-		i = (phba->msix_enabled) ? i : 0;
-		/* Work item for MCC handling */
+	for (i = 0; i < phba->num_cpus; i++) {
 		pbe_eq = &phwi_context->be_eq[i];
-		INIT_WORK(&pbe_eq->work_cqs, beiscsi_process_all_cqs);
-	} else {
-		if (phba->msix_enabled) {
-			for (i = 0; i <= phba->num_cpus; i++) {
-				pbe_eq = &phwi_context->be_eq[i];
-				INIT_WORK(&pbe_eq->work_cqs,
-					  beiscsi_process_all_cqs);
-			}
-		} else {
-				pbe_eq = &phwi_context->be_eq[0];
-				INIT_WORK(&pbe_eq->work_cqs,
-					  beiscsi_process_all_cqs);
-			}
+		blk_iopoll_init(&pbe_eq->iopoll, be_iopoll_budget,
+				be_iopoll);
+		blk_iopoll_enable(&pbe_eq->iopoll);
 	}
 
+	i = (phba->msix_enabled) ? i : 0;
+	/* Work item for MCC handling */
+	pbe_eq = &phwi_context->be_eq[i];
+	INIT_WORK(&pbe_eq->work_cqs, beiscsi_process_all_cqs);
+
 	ret = beiscsi_init_irqs(phba);
 	if (ret < 0) {
 		beiscsi_log(phba, KERN_ERR, BEISCSI_LOG_INIT,
@@ -5668,11 +5589,10 @@ static int beiscsi_dev_probe(struct pci_dev *pcidev,
 
 free_blkenbld:
 	destroy_workqueue(phba->wq);
-	if (blk_iopoll_enabled)
-		for (i = 0; i < phba->num_cpus; i++) {
-			pbe_eq = &phwi_context->be_eq[i];
-			blk_iopoll_disable(&pbe_eq->iopoll);
-		}
+	for (i = 0; i < phba->num_cpus; i++) {
+		pbe_eq = &phwi_context->be_eq[i];
+		blk_iopoll_disable(&pbe_eq->iopoll);
+	}
 free_twq:
 	beiscsi_clean_port(phba);
 	beiscsi_free_mem(phba);
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index 3f5b56a99892..69470f5c0ac9 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -3630,16 +3630,14 @@ static ssize_t ipr_store_iopoll_weight(struct device *dev,
 		return strlen(buf);
 	}
 
-	if (blk_iopoll_enabled && ioa_cfg->iopoll_weight &&
-			ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
+	if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
 		for (i = 1; i < ioa_cfg->hrrq_num; i++)
 			blk_iopoll_disable(&ioa_cfg->hrrq[i].iopoll);
 	}
 
 	spin_lock_irqsave(shost->host_lock, lock_flags);
 	ioa_cfg->iopoll_weight = user_iopoll_weight;
-	if (blk_iopoll_enabled && ioa_cfg->iopoll_weight &&
-			ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
+	if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
 		for (i = 1; i < ioa_cfg->hrrq_num; i++) {
 			blk_iopoll_init(&ioa_cfg->hrrq[i].iopoll,
 					ioa_cfg->iopoll_weight, ipr_iopoll);
@@ -5484,8 +5482,7 @@ static irqreturn_t ipr_isr_mhrrq(int irq, void *devp)
 		return IRQ_NONE;
 	}
 
-	if (blk_iopoll_enabled && ioa_cfg->iopoll_weight &&
-			ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
+	if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
 		if ((be32_to_cpu(*hrrq->hrrq_curr) & IPR_HRRQ_TOGGLE_BIT) ==
 		       hrrq->toggle_bit) {
 			if (!blk_iopoll_sched_prep(&hrrq->iopoll))
@@ -9859,8 +9856,7 @@ static int ipr_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id)
 	ioa_cfg->host->max_channel = IPR_VSET_BUS;
 	ioa_cfg->iopoll_weight = ioa_cfg->chip_cfg->iopoll_weight;
 
-	if (blk_iopoll_enabled && ioa_cfg->iopoll_weight &&
-			ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
+	if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
 		for (i = 1; i < ioa_cfg->hrrq_num; i++) {
 			blk_iopoll_init(&ioa_cfg->hrrq[i].iopoll,
 					ioa_cfg->iopoll_weight, ipr_iopoll);
@@ -9889,8 +9885,7 @@ static void ipr_shutdown(struct pci_dev *pdev)
 	int i;
 
 	spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags);
-	if (blk_iopoll_enabled && ioa_cfg->iopoll_weight &&
-			ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
+	if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {
 		ioa_cfg->iopoll_weight = 0;
 		for (i = 1; i < ioa_cfg->hrrq_num; i++)
 			blk_iopoll_disable(&ioa_cfg->hrrq[i].iopoll);
diff --git a/include/linux/blk-iopoll.h b/include/linux/blk-iopoll.h
index 308734d3d4a2..77ae77c0b704 100644
--- a/include/linux/blk-iopoll.h
+++ b/include/linux/blk-iopoll.h
@@ -43,6 +43,4 @@ extern void __blk_iopoll_complete(struct blk_iopoll *);
 extern void blk_iopoll_enable(struct blk_iopoll *);
 extern void blk_iopoll_disable(struct blk_iopoll *);
 
-extern int blk_iopoll_enabled;
-
 #endif
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 49e13e1f8fe6..ef0bf04e8649 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -112,9 +112,6 @@ extern int sysctl_nr_open_min, sysctl_nr_open_max;
 #ifndef CONFIG_MMU
 extern int sysctl_nr_trim_pages;
 #endif
-#ifdef CONFIG_BLOCK
-extern int blk_iopoll_enabled;
-#endif
 
 /* Constants used for minimum and  maximum */
 #ifdef CONFIG_LOCKUP_DETECTOR
@@ -1093,15 +1090,6 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
-#endif
-#ifdef CONFIG_BLOCK
-	{
-		.procname	= "blk_iopoll",
-		.data		= &blk_iopoll_enabled,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
 #endif
 	{ }
 };
-- 
cgit v1.2.3


From 95363efde193079541cb379eb47140e9c4d355d5 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Fri, 14 Mar 2014 10:43:15 -0600
Subject: blk-mq: allow blk_mq_init_commands() to return failure

If drivers do dynamic allocation in the hardware command init
path, then we need to be able to handle and return failures.

And if they do allocations or mappings in the init command path,
then we need a cleanup function to free up that space at exit
time. So add blk_mq_free_commands() as the cleanup function.

This is required for the mtip32xx driver conversion to blk-mq.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c             | 52 +++++++++++++++++++++++++++++++++++++++-------
 drivers/block/virtio_blk.c |  3 ++-
 include/linux/blk-mq.h     |  3 ++-
 3 files changed, 49 insertions(+), 9 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 01d8735db8d3..92284af4e0df 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1058,8 +1058,46 @@ static void blk_mq_hctx_notify(void *data, unsigned long action,
 	blk_mq_put_ctx(ctx);
 }
 
-static void blk_mq_init_hw_commands(struct blk_mq_hw_ctx *hctx,
-				    void (*init)(void *, struct blk_mq_hw_ctx *,
+static int blk_mq_init_hw_commands(struct blk_mq_hw_ctx *hctx,
+				   int (*init)(void *, struct blk_mq_hw_ctx *,
+					struct request *, unsigned int),
+				   void *data)
+{
+	unsigned int i;
+	int ret = 0;
+
+	for (i = 0; i < hctx->queue_depth; i++) {
+		struct request *rq = hctx->rqs[i];
+
+		ret = init(data, hctx, rq, i);
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
+int blk_mq_init_commands(struct request_queue *q,
+			 int (*init)(void *, struct blk_mq_hw_ctx *,
+					struct request *, unsigned int),
+			 void *data)
+{
+	struct blk_mq_hw_ctx *hctx;
+	unsigned int i;
+	int ret = 0;
+
+	queue_for_each_hw_ctx(q, hctx, i) {
+		ret = blk_mq_init_hw_commands(hctx, init, data);
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(blk_mq_init_commands);
+
+static void blk_mq_free_hw_commands(struct blk_mq_hw_ctx *hctx,
+				    void (*free)(void *, struct blk_mq_hw_ctx *,
 					struct request *, unsigned int),
 				    void *data)
 {
@@ -1068,12 +1106,12 @@ static void blk_mq_init_hw_commands(struct blk_mq_hw_ctx *hctx,
 	for (i = 0; i < hctx->queue_depth; i++) {
 		struct request *rq = hctx->rqs[i];
 
-		init(data, hctx, rq, i);
+		free(data, hctx, rq, i);
 	}
 }
 
-void blk_mq_init_commands(struct request_queue *q,
-			  void (*init)(void *, struct blk_mq_hw_ctx *,
+void blk_mq_free_commands(struct request_queue *q,
+			  void (*free)(void *, struct blk_mq_hw_ctx *,
 					struct request *, unsigned int),
 			  void *data)
 {
@@ -1081,9 +1119,9 @@ void blk_mq_init_commands(struct request_queue *q,
 	unsigned int i;
 
 	queue_for_each_hw_ctx(q, hctx, i)
-		blk_mq_init_hw_commands(hctx, init, data);
+		blk_mq_free_hw_commands(hctx, free, data);
 }
-EXPORT_SYMBOL(blk_mq_init_commands);
+EXPORT_SYMBOL(blk_mq_free_commands);
 
 static void blk_mq_free_rq_map(struct blk_mq_hw_ctx *hctx)
 {
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index b1cb3f4c4db4..0eace43cea11 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -490,13 +490,14 @@ static struct blk_mq_reg virtio_mq_reg = {
 	.flags		= BLK_MQ_F_SHOULD_MERGE,
 };
 
-static void virtblk_init_vbr(void *data, struct blk_mq_hw_ctx *hctx,
+static int virtblk_init_vbr(void *data, struct blk_mq_hw_ctx *hctx,
 			     struct request *rq, unsigned int nr)
 {
 	struct virtio_blk *vblk = data;
 	struct virtblk_req *vbr = rq->special;
 
 	sg_init_table(vbr->sg, vblk->sg_elems);
+	return 0;
 }
 
 static int virtblk_probe(struct virtio_device *vdev)
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 18ba8a627f46..33ff10ebcabb 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -117,7 +117,8 @@ enum {
 struct request_queue *blk_mq_init_queue(struct blk_mq_reg *, void *);
 int blk_mq_register_disk(struct gendisk *);
 void blk_mq_unregister_disk(struct gendisk *);
-void blk_mq_init_commands(struct request_queue *, void (*init)(void *data, struct blk_mq_hw_ctx *, struct request *, unsigned int), void *data);
+int blk_mq_init_commands(struct request_queue *, int (*init)(void *data, struct blk_mq_hw_ctx *, struct request *, unsigned int), void *data);
+void blk_mq_free_commands(struct request_queue *, void (*free)(void *data, struct blk_mq_hw_ctx *, struct request *, unsigned int), void *data);
 
 void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule);
 
-- 
cgit v1.2.3


From 5d12f905cc50c0810628d0deedd478ec2db48659 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Wed, 19 Mar 2014 15:25:02 -0600
Subject: blk-mq: fix wrong usage of hctx->state vs hctx->flags

BLK_MQ_F_* flags are for hctx->flags, and are non-atomic and
set at registration time. BLK_MQ_S_* flags are dynamic and
atomic, and are accessed through hctx->state.

Some of the BLK_MQ_S_STOPPED uses were wrong. Additionally,
the header file should not use a bit shift for the _S_ flags,
as they are done through the set/test_bit functions.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c         | 6 +++---
 include/linux/blk-mq.h | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 92284af4e0df..ed216f27e3b8 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -547,7 +547,7 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
 	LIST_HEAD(rq_list);
 	int bit, queued;
 
-	if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->flags)))
+	if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state)))
 		return;
 
 	hctx->run++;
@@ -636,7 +636,7 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
 
 void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
 {
-	if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->flags)))
+	if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state)))
 		return;
 
 	if (!async)
@@ -656,7 +656,7 @@ void blk_mq_run_queues(struct request_queue *q, bool async)
 	queue_for_each_hw_ctx(q, hctx, i) {
 		if ((!blk_mq_hctx_has_pending(hctx) &&
 		    list_empty_careful(&hctx->dispatch)) ||
-		    test_bit(BLK_MQ_S_STOPPED, &hctx->flags))
+		    test_bit(BLK_MQ_S_STOPPED, &hctx->state))
 			continue;
 
 		blk_mq_run_hw_queue(hctx, async);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 33ff10ebcabb..bb68b03741be 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -109,7 +109,7 @@ enum {
 	BLK_MQ_F_SHOULD_SORT	= 1 << 1,
 	BLK_MQ_F_SHOULD_IPI	= 1 << 2,
 
-	BLK_MQ_S_STOPPED	= 1 << 0,
+	BLK_MQ_S_STOPPED	= 0,
 
 	BLK_MQ_MAX_DEPTH	= 2048,
 };
-- 
cgit v1.2.3


From 676141e48af7463717896352e69c10f945ac22dd Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Thu, 20 Mar 2014 13:29:18 -0600
Subject: blk-mq: don't dump CPU -> hw queue map on driver load

Now that we are out of initial debug/bringup mode, remove
the verbose dump of the mapping table.

Provide the mapping table in sysfs, under the hardware queue
directory, in the cpu_list file.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq-cpumap.c | 10 ----------
 block/blk-mq-sysfs.c  | 31 +++++++++++++++++++++++++++++++
 block/blk-mq.c        | 10 ++++++++++
 block/blk-mq.h        |  2 ++
 4 files changed, 43 insertions(+), 10 deletions(-)

diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c
index f8721278601c..097921329619 100644
--- a/block/blk-mq-cpumap.c
+++ b/block/blk-mq-cpumap.c
@@ -9,15 +9,6 @@
 #include "blk.h"
 #include "blk-mq.h"
 
-static void show_map(unsigned int *map, unsigned int nr)
-{
-	int i;
-
-	pr_info("blk-mq: CPU -> queue map\n");
-	for_each_online_cpu(i)
-		pr_info("  CPU%2u -> Queue %u\n", i, map[i]);
-}
-
 static int cpu_to_queue_index(unsigned int nr_cpus, unsigned int nr_queues,
 			      const int cpu)
 {
@@ -85,7 +76,6 @@ int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues)
 			map[i] = map[first_sibling];
 	}
 
-	show_map(map, nr_cpus);
 	free_cpumask_var(cpus);
 	return 0;
 }
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index b91ce75bd35d..b0ba264b0522 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -244,6 +244,32 @@ static ssize_t blk_mq_hw_sysfs_tags_show(struct blk_mq_hw_ctx *hctx, char *page)
 	return blk_mq_tag_sysfs_show(hctx->tags, page);
 }
 
+static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page)
+{
+	unsigned int i, queue_num, first = 1;
+	ssize_t ret = 0;
+
+	blk_mq_disable_hotplug();
+
+	for_each_online_cpu(i) {
+		queue_num = hctx->queue->mq_map[i];
+		if (queue_num != hctx->queue_num)
+			continue;
+
+		if (first)
+			ret += sprintf(ret + page, "%u", i);
+		else
+			ret += sprintf(ret + page, ", %u", i);
+
+		first = 0;
+	}
+
+	blk_mq_enable_hotplug();
+
+	ret += sprintf(ret + page, "\n");
+	return ret;
+}
+
 static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_dispatched = {
 	.attr = {.name = "dispatched", .mode = S_IRUGO },
 	.show = blk_mq_sysfs_dispatched_show,
@@ -294,6 +320,10 @@ static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_tags = {
 	.attr = {.name = "tags", .mode = S_IRUGO },
 	.show = blk_mq_hw_sysfs_tags_show,
 };
+static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_cpus = {
+	.attr = {.name = "cpu_list", .mode = S_IRUGO },
+	.show = blk_mq_hw_sysfs_cpus_show,
+};
 
 static struct attribute *default_hw_ctx_attrs[] = {
 	&blk_mq_hw_sysfs_queued.attr,
@@ -302,6 +332,7 @@ static struct attribute *default_hw_ctx_attrs[] = {
 	&blk_mq_hw_sysfs_pending.attr,
 	&blk_mq_hw_sysfs_ipi.attr,
 	&blk_mq_hw_sysfs_tags.attr,
+	&blk_mq_hw_sysfs_cpus.attr,
 	NULL,
 };
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index ed216f27e3b8..3b1c425a935e 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1532,6 +1532,16 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
 	return NOTIFY_OK;
 }
 
+void blk_mq_disable_hotplug(void)
+{
+	mutex_lock(&all_q_mutex);
+}
+
+void blk_mq_enable_hotplug(void)
+{
+	mutex_unlock(&all_q_mutex);
+}
+
 static int __init blk_mq_init(void)
 {
 	blk_mq_cpu_init();
diff --git a/block/blk-mq.h b/block/blk-mq.h
index ed0035cd458e..361f9343dab1 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -40,6 +40,8 @@ void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier,
 void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier);
 void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier);
 void blk_mq_cpu_init(void);
+void blk_mq_enable_hotplug(void);
+void blk_mq_disable_hotplug(void);
 
 /*
  * CPU -> queue mappings
-- 
cgit v1.2.3


From 081241e592c47f4ed2999a0b576ae85e765c6da4 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Thu, 20 Feb 2014 15:32:36 -0800
Subject: blk-mq: remove blk_mq_alloc_rq

There's only one caller, which is a straight wrapper and fits the naming
scheme of the related functions a lot better.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 3b1c425a935e..a56e77383738 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -73,8 +73,8 @@ static void blk_mq_hctx_mark_pending(struct blk_mq_hw_ctx *hctx,
 		set_bit(ctx->index_hw, hctx->ctx_map);
 }
 
-static struct request *blk_mq_alloc_rq(struct blk_mq_hw_ctx *hctx, gfp_t gfp,
-				       bool reserved)
+static struct request *__blk_mq_alloc_request(struct blk_mq_hw_ctx *hctx,
+					      gfp_t gfp, bool reserved)
 {
 	struct request *rq;
 	unsigned int tag;
@@ -193,12 +193,6 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
 	ctx->rq_dispatched[rw_is_sync(rw_flags)]++;
 }
 
-static struct request *__blk_mq_alloc_request(struct blk_mq_hw_ctx *hctx,
-					      gfp_t gfp, bool reserved)
-{
-	return blk_mq_alloc_rq(hctx, gfp, reserved);
-}
-
 static struct request *blk_mq_alloc_request_pinned(struct request_queue *q,
 						   int rw, gfp_t gfp,
 						   bool reserved)
-- 
cgit v1.2.3


From eeabc850b79336575da7be3dbe186a2da4de8293 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Fri, 21 Mar 2014 08:57:37 -0600
Subject: blk-mq: merge blk_mq_insert_request and blk_mq_run_request

It's almost identical to blk_mq_insert_request, so fold the two into one
slightly more generic function by making the flush special case a bit
smarted.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-exec.c       |  2 +-
 block/blk-flush.c      |  4 ++--
 block/blk-mq.c         | 53 ++++++++++----------------------------------------
 block/blk-mq.h         |  1 -
 include/linux/blk-mq.h |  3 +--
 5 files changed, 14 insertions(+), 49 deletions(-)

diff --git a/block/blk-exec.c b/block/blk-exec.c
index c68613bb4c79..dbf4502b1d67 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -65,7 +65,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
 	 * be resued after dying flag is set
 	 */
 	if (q->mq_ops) {
-		blk_mq_insert_request(q, rq, at_head, true);
+		blk_mq_insert_request(rq, at_head, true, false);
 		return;
 	}
 
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 66e2b697f5db..f598f794c3c6 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -137,7 +137,7 @@ static void mq_flush_run(struct work_struct *work)
 	rq = container_of(work, struct request, mq_flush_work);
 
 	memset(&rq->csd, 0, sizeof(rq->csd));
-	blk_mq_run_request(rq, true, false);
+	blk_mq_insert_request(rq, false, true, false);
 }
 
 static bool blk_flush_queue_rq(struct request *rq)
@@ -411,7 +411,7 @@ void blk_insert_flush(struct request *rq)
 	if ((policy & REQ_FSEQ_DATA) &&
 	    !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
 		if (q->mq_ops) {
-			blk_mq_run_request(rq, false, true);
+			blk_mq_insert_request(rq, false, false, true);
 		} else
 			list_add_tail(&rq->queuelist, &q->queue_head);
 		return;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index a56e77383738..81ff7879bac8 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -724,61 +724,28 @@ static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx,
 	blk_mq_add_timer(rq);
 }
 
-void blk_mq_insert_request(struct request_queue *q, struct request *rq,
-			   bool at_head, bool run_queue)
+void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue,
+		bool async)
 {
+	struct request_queue *q = rq->q;
 	struct blk_mq_hw_ctx *hctx;
-	struct blk_mq_ctx *ctx, *current_ctx;
+	struct blk_mq_ctx *ctx = rq->mq_ctx, *current_ctx;
+
+	current_ctx = blk_mq_get_ctx(q);
+	if (!cpu_online(ctx->cpu))
+		rq->mq_ctx = ctx = current_ctx;
 
-	ctx = rq->mq_ctx;
 	hctx = q->mq_ops->map_queue(q, ctx->cpu);
 
-	if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA)) {
+	if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA) &&
+	    !(rq->cmd_flags & (REQ_FLUSH_SEQ))) {
 		blk_insert_flush(rq);
 	} else {
-		current_ctx = blk_mq_get_ctx(q);
-
-		if (!cpu_online(ctx->cpu)) {
-			ctx = current_ctx;
-			hctx = q->mq_ops->map_queue(q, ctx->cpu);
-			rq->mq_ctx = ctx;
-		}
 		spin_lock(&ctx->lock);
 		__blk_mq_insert_request(hctx, rq, at_head);
 		spin_unlock(&ctx->lock);
-
-		blk_mq_put_ctx(current_ctx);
 	}
 
-	if (run_queue)
-		__blk_mq_run_hw_queue(hctx);
-}
-EXPORT_SYMBOL(blk_mq_insert_request);
-
-/*
- * This is a special version of blk_mq_insert_request to bypass FLUSH request
- * check. Should only be used internally.
- */
-void blk_mq_run_request(struct request *rq, bool run_queue, bool async)
-{
-	struct request_queue *q = rq->q;
-	struct blk_mq_hw_ctx *hctx;
-	struct blk_mq_ctx *ctx, *current_ctx;
-
-	current_ctx = blk_mq_get_ctx(q);
-
-	ctx = rq->mq_ctx;
-	if (!cpu_online(ctx->cpu)) {
-		ctx = current_ctx;
-		rq->mq_ctx = ctx;
-	}
-	hctx = q->mq_ops->map_queue(q, ctx->cpu);
-
-	/* ctx->cpu might be offline */
-	spin_lock(&ctx->lock);
-	__blk_mq_insert_request(hctx, rq, false);
-	spin_unlock(&ctx->lock);
-
 	blk_mq_put_ctx(current_ctx);
 
 	if (run_queue)
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 361f9343dab1..ebbe6bac9d61 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -23,7 +23,6 @@ struct blk_mq_ctx {
 };
 
 void __blk_mq_complete_request(struct request *rq);
-void blk_mq_run_request(struct request *rq, bool run_queue, bool async);
 void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
 void blk_mq_init_flush(struct request_queue *q);
 void blk_mq_drain_queue(struct request_queue *q);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index bb68b03741be..349c730a579e 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -122,8 +122,7 @@ void blk_mq_free_commands(struct request_queue *, void (*free)(void *data, struc
 
 void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule);
 
-void blk_mq_insert_request(struct request_queue *, struct request *,
-		bool, bool);
+void blk_mq_insert_request(struct request *, bool, bool, bool);
 void blk_mq_run_queues(struct request_queue *q, bool async);
 void blk_mq_free_request(struct request *rq);
 bool blk_mq_can_queue(struct blk_mq_hw_ctx *);
-- 
cgit v1.2.3


From 7237c740b04fd173cb24391d3e5be79ebd8d485f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Thu, 20 Feb 2014 15:32:38 -0800
Subject: blk-mq: support partial I/O completions

Add a new blk_mq_end_io_partial function to partially complete requests
as needed by the SCSI layer.  We do this by reusing blk_update_request
to advance the bio instead of having a simplified version of it in
the blk-mq code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c | 37 +++++--------------------------------
 1 file changed, 5 insertions(+), 32 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 81ff7879bac8..3c2804879a90 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -283,38 +283,10 @@ void blk_mq_free_request(struct request *rq)
 	__blk_mq_free_request(hctx, ctx, rq);
 }
 
-static void blk_mq_bio_endio(struct request *rq, struct bio *bio, int error)
+bool blk_mq_end_io_partial(struct request *rq, int error, unsigned int nr_bytes)
 {
-	if (error)
-		clear_bit(BIO_UPTODATE, &bio->bi_flags);
-	else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
-		error = -EIO;
-
-	if (unlikely(rq->cmd_flags & REQ_QUIET))
-		set_bit(BIO_QUIET, &bio->bi_flags);
-
-	/* don't actually finish bio if it's part of flush sequence */
-	if (!(rq->cmd_flags & REQ_FLUSH_SEQ))
-		bio_endio(bio, error);
-}
-
-void blk_mq_end_io(struct request *rq, int error)
-{
-	struct bio *bio = rq->bio;
-	unsigned int bytes = 0;
-
-	trace_block_rq_complete(rq->q, rq, blk_rq_bytes(rq));
-
-	while (bio) {
-		struct bio *next = bio->bi_next;
-
-		bio->bi_next = NULL;
-		bytes += bio->bi_iter.bi_size;
-		blk_mq_bio_endio(rq, bio, error);
-		bio = next;
-	}
-
-	blk_account_io_completion(rq, bytes);
+	if (blk_update_request(rq, error, blk_rq_bytes(rq)))
+		return true;
 
 	blk_account_io_done(rq);
 
@@ -322,8 +294,9 @@ void blk_mq_end_io(struct request *rq, int error)
 		rq->end_io(rq, error);
 	else
 		blk_mq_free_request(rq);
+	return false;
 }
-EXPORT_SYMBOL(blk_mq_end_io);
+EXPORT_SYMBOL(blk_mq_end_io_partial);
 
 static void __blk_mq_complete_request_remote(void *data)
 {
-- 
cgit v1.2.3


From 55c816e3df86cfebf7c8cdd44892800aaaa8fce0 Mon Sep 17 00:00:00 2001
From: Mike Galbraith <bitbucket@online.de>
Date: Mon, 3 Mar 2014 05:57:26 +0100
Subject: rt,blk,mq: Make blk_mq_cpu_notify_lock a raw spinlock

[  365.164040] BUG: sleeping function called from invalid context at kernel/rtmutex.c:674
[  365.164041] in_atomic(): 1, irqs_disabled(): 1, pid: 26, name: migration/1
[  365.164043] no locks held by migration/1/26.
[  365.164044] irq event stamp: 6648
[  365.164056] hardirqs last  enabled at (6647): [<ffffffff8153d377>] restore_args+0x0/0x30
[  365.164062] hardirqs last disabled at (6648): [<ffffffff810ed98d>] multi_cpu_stop+0x9d/0x120
[  365.164070] softirqs last  enabled at (0): [<ffffffff810543bc>] copy_process.part.28+0x6fc/0x1920
[  365.164072] softirqs last disabled at (0): [<          (null)>]           (null)
[  365.164076] CPU: 1 PID: 26 Comm: migration/1 Tainted: GF           N  3.12.12-rt19-0.gcb6c4a2-rt #3
[  365.164078] Hardware name: QCI QSSC-S4R/QSSC-S4R, BIOS QSSC-S4R.QCI.01.00.S013.032920111005 03/29/2011
[  365.164091]  0000000000000001 ffff880a42ea7c30 ffffffff815367e6 ffffffff81a086c0
[  365.164099]  ffff880a42ea7c40 ffffffff8108919c ffff880a42ea7c60 ffffffff8153c24f
[  365.164107]  ffff880a42ea91f0 00000000ffffffe1 ffff880a42ea7c88 ffffffff81297ec0
[  365.164108] Call Trace:
[  365.164119]  [<ffffffff810060b1>] try_stack_unwind+0x191/0x1a0
[  365.164127]  [<ffffffff81004872>] dump_trace+0x92/0x360
[  365.164133]  [<ffffffff81006108>] show_trace_log_lvl+0x48/0x60
[  365.164138]  [<ffffffff81004c18>] show_stack_log_lvl+0xd8/0x1d0
[  365.164143]  [<ffffffff81006160>] show_stack+0x20/0x50
[  365.164153]  [<ffffffff815367e6>] dump_stack+0x54/0x9a
[  365.164163]  [<ffffffff8108919c>] __might_sleep+0xfc/0x140
[  365.164173]  [<ffffffff8153c24f>] rt_spin_lock+0x1f/0x70
[  365.164182]  [<ffffffff81297ec0>] blk_mq_main_cpu_notify+0x20/0x70
[  365.164191]  [<ffffffff81540a1c>] notifier_call_chain+0x4c/0x70
[  365.164201]  [<ffffffff81083499>] __raw_notifier_call_chain+0x9/0x10
[  365.164207]  [<ffffffff810567be>] cpu_notify+0x1e/0x40
[  365.164217]  [<ffffffff81525da2>] take_cpu_down+0x22/0x40
[  365.164223]  [<ffffffff810ed9c6>] multi_cpu_stop+0xd6/0x120
[  365.164229]  [<ffffffff810edd97>] cpu_stopper_thread+0xd7/0x1e0
[  365.164235]  [<ffffffff810863a3>] smpboot_thread_fn+0x203/0x380
[  365.164241]  [<ffffffff8107cbf8>] kthread+0xc8/0xd0
[  365.164250]  [<ffffffff8154440c>] ret_from_fork+0x7c/0xb0
[  365.164429] smpboot: CPU 1 is now offline

Signed-off-by: Mike Galbraith <bitbucket@online.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq-cpu.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/block/blk-mq-cpu.c b/block/blk-mq-cpu.c
index 3146befb56aa..136ef8643bba 100644
--- a/block/blk-mq-cpu.c
+++ b/block/blk-mq-cpu.c
@@ -11,7 +11,7 @@
 #include "blk-mq.h"
 
 static LIST_HEAD(blk_mq_cpu_notify_list);
-static DEFINE_SPINLOCK(blk_mq_cpu_notify_lock);
+static DEFINE_RAW_SPINLOCK(blk_mq_cpu_notify_lock);
 
 static int blk_mq_main_cpu_notify(struct notifier_block *self,
 				  unsigned long action, void *hcpu)
@@ -19,12 +19,12 @@ static int blk_mq_main_cpu_notify(struct notifier_block *self,
 	unsigned int cpu = (unsigned long) hcpu;
 	struct blk_mq_cpu_notifier *notify;
 
-	spin_lock(&blk_mq_cpu_notify_lock);
+	raw_spin_lock(&blk_mq_cpu_notify_lock);
 
 	list_for_each_entry(notify, &blk_mq_cpu_notify_list, list)
 		notify->notify(notify->data, action, cpu);
 
-	spin_unlock(&blk_mq_cpu_notify_lock);
+	raw_spin_unlock(&blk_mq_cpu_notify_lock);
 	return NOTIFY_OK;
 }
 
@@ -32,16 +32,16 @@ void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier)
 {
 	BUG_ON(!notifier->notify);
 
-	spin_lock(&blk_mq_cpu_notify_lock);
+	raw_spin_lock(&blk_mq_cpu_notify_lock);
 	list_add_tail(&notifier->list, &blk_mq_cpu_notify_list);
-	spin_unlock(&blk_mq_cpu_notify_lock);
+	raw_spin_unlock(&blk_mq_cpu_notify_lock);
 }
 
 void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier)
 {
-	spin_lock(&blk_mq_cpu_notify_lock);
+	raw_spin_lock(&blk_mq_cpu_notify_lock);
 	list_del(&notifier->list);
-	spin_unlock(&blk_mq_cpu_notify_lock);
+	raw_spin_unlock(&blk_mq_cpu_notify_lock);
 }
 
 void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier,
-- 
cgit v1.2.3


From 27fbf4e87c16bb3e40730890169a643a494b7c64 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@kernel.org>
Date: Wed, 19 Feb 2014 20:20:21 +0800
Subject: blk-mq: add REQ_SYNC early

Add REQ_SYNC early, so rq_dispatched[] in blk_mq_rq_ctx_init
is set correctly.

Signed-off-by: Shaohua Li<shli@fusionio.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 3c2804879a90..b1bcc619d0ea 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -860,6 +860,8 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
 	ctx = blk_mq_get_ctx(q);
 	hctx = q->mq_ops->map_queue(q, ctx->cpu);
 
+	if (is_sync)
+		rw |= REQ_SYNC;
 	trace_block_getrq(q, bio, rw);
 	rq = __blk_mq_alloc_request(hctx, GFP_ATOMIC, false);
 	if (likely(rq))
-- 
cgit v1.2.3