From 797476b88bde2a6001f9552f383f147e58c1a330 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@hgst.com>
Date: Tue, 18 Oct 2016 15:40:29 +0900
Subject: block: Add 'zoned' queue limit

Add the zoned queue limit to indicate the zoning model of a block device.
Defined values are 0 (BLK_ZONED_NONE) for regular block devices,
1 (BLK_ZONED_HA) for host-aware zone block devices and 2 (BLK_ZONED_HM)
for host-managed zone block devices. The standards defined drive managed
model is not defined here since these block devices do not provide any
command for accessing zone information. Drive managed model devices will
be reported as BLK_ZONED_NONE.

The helper functions blk_queue_zoned_model and bdev_zoned_model return
the zoned limit and the functions blk_queue_is_zoned and bdev_is_zoned
return a boolean for callers to test if a block device is zoned.

The zoned attribute is also exported as a string to applications via
sysfs. BLK_ZONED_NONE shows as "none", BLK_ZONED_HA as "host-aware" and
BLK_ZONED_HM as "host-managed".

Signed-off-by: Damien Le Moal <damien.lemoal@hgst.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Shaun Tancheff <shaun.tancheff@seagate.com>
Tested-by: Shaun Tancheff <shaun.tancheff@seagate.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blkdev.h | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c47c358ba052..f19e16bb43d1 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -261,6 +261,15 @@ struct blk_queue_tag {
 #define BLK_SCSI_MAX_CMDS	(256)
 #define BLK_SCSI_CMD_PER_LONG	(BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
 
+/*
+ * Zoned block device models (zoned limit).
+ */
+enum blk_zoned_model {
+	BLK_ZONED_NONE,	/* Regular block device */
+	BLK_ZONED_HA,	/* Host-aware zoned block device */
+	BLK_ZONED_HM,	/* Host-managed zoned block device */
+};
+
 struct queue_limits {
 	unsigned long		bounce_pfn;
 	unsigned long		seg_boundary_mask;
@@ -290,6 +299,7 @@ struct queue_limits {
 	unsigned char		cluster;
 	unsigned char		discard_zeroes_data;
 	unsigned char		raid_partial_stripes_expensive;
+	enum blk_zoned_model	zoned;
 };
 
 struct request_queue {
@@ -627,6 +637,23 @@ static inline unsigned int blk_queue_cluster(struct request_queue *q)
 	return q->limits.cluster;
 }
 
+static inline enum blk_zoned_model
+blk_queue_zoned_model(struct request_queue *q)
+{
+	return q->limits.zoned;
+}
+
+static inline bool blk_queue_is_zoned(struct request_queue *q)
+{
+	switch (blk_queue_zoned_model(q)) {
+	case BLK_ZONED_HA:
+	case BLK_ZONED_HM:
+		return true;
+	default:
+		return false;
+	}
+}
+
 /*
  * We regard a request as sync, if either a read or a sync write
  */
@@ -1354,6 +1381,26 @@ static inline unsigned int bdev_write_same(struct block_device *bdev)
 	return 0;
 }
 
+static inline enum blk_zoned_model bdev_zoned_model(struct block_device *bdev)
+{
+	struct request_queue *q = bdev_get_queue(bdev);
+
+	if (q)
+		return blk_queue_zoned_model(q);
+
+	return BLK_ZONED_NONE;
+}
+
+static inline bool bdev_is_zoned(struct block_device *bdev)
+{
+	struct request_queue *q = bdev_get_queue(bdev);
+
+	if (q)
+		return blk_queue_is_zoned(q);
+
+	return false;
+}
+
 static inline int queue_dma_alignment(struct request_queue *q)
 {
 	return q ? q->dma_alignment : 511;
-- 
cgit v1.2.3


From 2d253440b5afb128d22ccdae812dde9ba77a2cca Mon Sep 17 00:00:00 2001
From: Shaun Tancheff <shaun.tancheff@seagate.com>
Date: Tue, 18 Oct 2016 15:40:32 +0900
Subject: block: Define zoned block device operations

Define REQ_OP_ZONE_REPORT and REQ_OP_ZONE_RESET for handling zones of
host-managed and host-aware zoned block devices. With with these two
new operations, the total number of operations defined reaches 8 and
still fits with the 3 bits definition of REQ_OP_BITS.

Signed-off-by: Shaun Tancheff <shaun.tancheff@seagate.com>
Signed-off-by: Damien Le Moal <damien.lemoal@hgst.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blk_types.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index cd395ecec99d..dd50dce89a80 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -243,6 +243,8 @@ enum req_op {
 	REQ_OP_SECURE_ERASE,	/* request to securely erase sectors */
 	REQ_OP_WRITE_SAME,	/* write same block many times */
 	REQ_OP_FLUSH,		/* request for cache flush */
+	REQ_OP_ZONE_REPORT,	/* Get zone information */
+	REQ_OP_ZONE_RESET,	/* Reset a zone write pointer */
 };
 
 #define REQ_OP_BITS 3
-- 
cgit v1.2.3


From 6a0cb1bc106fc07ce0443303bcdb7f7da5131e5c Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Tue, 18 Oct 2016 15:40:33 +0900
Subject: block: Implement support for zoned block devices

Implement zoned block device zone information reporting and reset.
Zone information are reported as struct blk_zone. This implementation
does not differentiate between host-aware and host-managed device
models and is valid for both. Two functions are provided:
blkdev_report_zones for discovering the zone configuration of a
zoned block device, and blkdev_reset_zones for resetting the write
pointer of sequential zones. The helper function blk_queue_zone_size
and bdev_zone_size are also provided for, as the name suggest,
obtaining the zone size (in 512B sectors) of the zones of the device.

Signed-off-by: Hannes Reinecke <hare@suse.de>

[Damien: * Removed the zone cache
         * Implement report zones operation based on earlier proposal
           by Shaun Tancheff <shaun.tancheff@seagate.com>]
Signed-off-by: Damien Le Moal <damien.lemoal@hgst.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Shaun Tancheff <shaun.tancheff@seagate.com>
Tested-by: Shaun Tancheff <shaun.tancheff@seagate.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blkdev.h | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f19e16bb43d1..252043f7cd2c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -24,6 +24,7 @@
 #include <linux/rcupdate.h>
 #include <linux/percpu-refcount.h>
 #include <linux/scatterlist.h>
+#include <linux/blkzoned.h>
 
 struct module;
 struct scsi_ioctl_command;
@@ -302,6 +303,21 @@ struct queue_limits {
 	enum blk_zoned_model	zoned;
 };
 
+#ifdef CONFIG_BLK_DEV_ZONED
+
+struct blk_zone_report_hdr {
+	unsigned int	nr_zones;
+	u8		padding[60];
+};
+
+extern int blkdev_report_zones(struct block_device *bdev,
+			       sector_t sector, struct blk_zone *zones,
+			       unsigned int *nr_zones, gfp_t gfp_mask);
+extern int blkdev_reset_zones(struct block_device *bdev, sector_t sectors,
+			      sector_t nr_sectors, gfp_t gfp_mask);
+
+#endif /* CONFIG_BLK_DEV_ZONED */
+
 struct request_queue {
 	/*
 	 * Together with queue_head for cacheline sharing
@@ -654,6 +670,11 @@ static inline bool blk_queue_is_zoned(struct request_queue *q)
 	}
 }
 
+static inline unsigned int blk_queue_zone_size(struct request_queue *q)
+{
+	return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0;
+}
+
 /*
  * We regard a request as sync, if either a read or a sync write
  */
@@ -1401,6 +1422,16 @@ static inline bool bdev_is_zoned(struct block_device *bdev)
 	return false;
 }
 
+static inline unsigned int bdev_zone_size(struct block_device *bdev)
+{
+	struct request_queue *q = bdev_get_queue(bdev);
+
+	if (q)
+		return blk_queue_zone_size(q);
+
+	return 0;
+}
+
 static inline int queue_dma_alignment(struct request_queue *q)
 {
 	return q ? q->dma_alignment : 511;
-- 
cgit v1.2.3


From 3ed05a987e0f63b21e634101e0b460d32f3581c3 Mon Sep 17 00:00:00 2001
From: Shaun Tancheff <shaun@tancheff.com>
Date: Tue, 18 Oct 2016 15:40:35 +0900
Subject: blk-zoned: implement ioctls

Adds the new BLKREPORTZONE and BLKRESETZONE ioctls for respectively
obtaining the zone configuration of a zoned block device and resetting
the write pointer of sequential zones of a zoned block device.

The BLKREPORTZONE ioctl maps directly to a single call of the function
blkdev_report_zones. The zone information result is passed as an array
of struct blk_zone identical to the structure used internally for
processing the REQ_OP_ZONE_REPORT operation.  The BLKRESETZONE ioctl
maps to a single call of the blkdev_reset_zones function.

Signed-off-by: Shaun Tancheff <shaun.tancheff@seagate.com>
Signed-off-by: Damien Le Moal <damien.lemoal@hgst.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blkdev.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 252043f7cd2c..90097dd8b8ed 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -316,6 +316,27 @@ extern int blkdev_report_zones(struct block_device *bdev,
 extern int blkdev_reset_zones(struct block_device *bdev, sector_t sectors,
 			      sector_t nr_sectors, gfp_t gfp_mask);
 
+extern int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
+				     unsigned int cmd, unsigned long arg);
+extern int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
+				    unsigned int cmd, unsigned long arg);
+
+#else /* CONFIG_BLK_DEV_ZONED */
+
+static inline int blkdev_report_zones_ioctl(struct block_device *bdev,
+					    fmode_t mode, unsigned int cmd,
+					    unsigned long arg)
+{
+	return -ENOTTY;
+}
+
+static inline int blkdev_reset_zones_ioctl(struct block_device *bdev,
+					   fmode_t mode, unsigned int cmd,
+					   unsigned long arg)
+{
+	return -ENOTTY;
+}
+
 #endif /* CONFIG_BLK_DEV_ZONED */
 
 struct request_queue {
-- 
cgit v1.2.3


From c4aebd0332da831a3403faf2035af45059ab6b7c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 20 Oct 2016 15:12:09 +0200
Subject: block: remove bio_is_rw

With the addition of the zoned operations the tests in this function
became incorrect.  But I think it's much better to just open code the
allow operations in the only caller anyway.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Shaun Tancheff <shaun.tancheff@seagate.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/bio.h | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index 97cb48f03dc7..87ce64dafb93 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -83,17 +83,6 @@ static inline bool bio_no_advance_iter(struct bio *bio)
 	       bio_op(bio) == REQ_OP_WRITE_SAME;
 }
 
-static inline bool bio_is_rw(struct bio *bio)
-{
-	if (!bio_has_data(bio))
-		return false;
-
-	if (bio_no_advance_iter(bio))
-		return false;
-
-	return true;
-}
-
 static inline bool bio_mergeable(struct bio *bio)
 {
 	if (bio->bi_opf & REQ_NOMERGE_FLAGS)
-- 
cgit v1.2.3


From bd1c1c21741cbd6e894960bcbc8b36f719590064 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 20 Oct 2016 15:12:10 +0200
Subject: block: REQ_NOMERGE is common to the bio and request

So move it into the common setion of the request flags.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Shaun Tancheff <shaun.tancheff@seagate.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blk_types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index dd50dce89a80..b54142534793 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -158,6 +158,7 @@ enum rq_flag_bits {
 	__REQ_META,		/* metadata io request */
 	__REQ_PRIO,		/* boost priority in cfq */
 
+	__REQ_NOMERGE,		/* don't touch this for merging */
 	__REQ_NOIDLE,		/* don't anticipate more IO after this one */
 	__REQ_INTEGRITY,	/* I/O includes block integrity payload */
 	__REQ_FUA,		/* forced unit access */
@@ -171,7 +172,6 @@ enum rq_flag_bits {
 	/* request only flags */
 	__REQ_SORTED,		/* elevator knows about this request */
 	__REQ_SOFTBARRIER,	/* may not be passed by ioscheduler */
-	__REQ_NOMERGE,		/* don't touch this for merging */
 	__REQ_STARTED,		/* drive already may have started this one */
 	__REQ_DONTPREP,		/* don't call prep for this one */
 	__REQ_QUEUED,		/* uses queueing */
-- 
cgit v1.2.3


From 188bd2b16b3c6ea87a90df20f33db0adcdb75f0c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 20 Oct 2016 15:12:11 +0200
Subject: block: move REQ_RAHEAD to common flags

The information that am I/O is a read-ahead can be useful for drivers.
In fact the NVMe driver already checks it, even if it won't ever be set
at the moment.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Shaun Tancheff <shaun.tancheff@seagate.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blk_types.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index b54142534793..44f9bca332e5 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -163,9 +163,9 @@ enum rq_flag_bits {
 	__REQ_INTEGRITY,	/* I/O includes block integrity payload */
 	__REQ_FUA,		/* forced unit access */
 	__REQ_PREFLUSH,		/* request for cache flush */
+	__REQ_RAHEAD,		/* read ahead, can fail anytime */
 
 	/* bio only flags */
-	__REQ_RAHEAD,		/* read ahead, can fail anytime */
 	__REQ_THROTTLED,	/* This bio has already been subjected to
 				 * throttling rules. Don't do it again. */
 
@@ -205,7 +205,7 @@ enum rq_flag_bits {
 	(REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
 #define REQ_COMMON_MASK \
 	(REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | REQ_NOIDLE | \
-	 REQ_PREFLUSH | REQ_FUA | REQ_INTEGRITY | REQ_NOMERGE)
+	 REQ_PREFLUSH | REQ_FUA | REQ_INTEGRITY | REQ_NOMERGE | REQ_RAHEAD)
 #define REQ_CLONE_MASK		REQ_COMMON_MASK
 
 /* This mask is used for both bio and request merge checking */
-- 
cgit v1.2.3


From 8d2bbd4c8236e9e38e6b36ac9e2c54fdcfe5b335 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 20 Oct 2016 15:12:12 +0200
Subject: block: replace REQ_THROTTLED with a bio flag

It's the last bio-only REQ_* flag, and we have space for it in the bio
bi_flags field.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Shaun Tancheff <shaun.tancheff@seagate.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blk_types.h | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 44f9bca332e5..6df722de2e22 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -119,6 +119,8 @@ struct bio {
 #define BIO_QUIET	6	/* Make BIO Quiet */
 #define BIO_CHAIN	7	/* chained bio, ->bi_remaining in effect */
 #define BIO_REFFED	8	/* bio has elevated ->bi_cnt */
+#define BIO_THROTTLED	9	/* This bio has already been subjected to
+				 * throttling rules. Don't do it again. */
 
 /*
  * Flags starting here get preserved by bio_reset() - this includes
@@ -165,10 +167,6 @@ enum rq_flag_bits {
 	__REQ_PREFLUSH,		/* request for cache flush */
 	__REQ_RAHEAD,		/* read ahead, can fail anytime */
 
-	/* bio only flags */
-	__REQ_THROTTLED,	/* This bio has already been subjected to
-				 * throttling rules. Don't do it again. */
-
 	/* request only flags */
 	__REQ_SORTED,		/* elevator knows about this request */
 	__REQ_SOFTBARRIER,	/* may not be passed by ioscheduler */
@@ -213,8 +211,6 @@ enum rq_flag_bits {
 	(REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_PREFLUSH | REQ_FUA | REQ_FLUSH_SEQ)
 
 #define REQ_RAHEAD		(1ULL << __REQ_RAHEAD)
-#define REQ_THROTTLED		(1ULL << __REQ_THROTTLED)
-
 #define REQ_SORTED		(1ULL << __REQ_SORTED)
 #define REQ_SOFTBARRIER		(1ULL << __REQ_SOFTBARRIER)
 #define REQ_FUA			(1ULL << __REQ_FUA)
-- 
cgit v1.2.3


From e806402130c9c494e22c73ae9ead4e79d2a5811c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 20 Oct 2016 15:12:13 +0200
Subject: block: split out request-only flags into a new namespace

A lot of the REQ_* flags are only used on struct requests, and only of
use to the block layer and a few drivers that dig into struct request
internals.

This patch adds a new req_flags_t rq_flags field to struct request for
them, and thus dramatically shrinks the number of common requests.  It
also removes the unfortunate situation where we have to fit the fields
from the same enum into 32 bits for struct bio and 64 bits for
struct request.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Shaun Tancheff <shaun.tancheff@seagate.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blk_types.h | 39 +------------------------------------
 include/linux/blkdev.h    | 49 ++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 49 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 6df722de2e22..ec69a8fe3b29 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -167,26 +167,6 @@ enum rq_flag_bits {
 	__REQ_PREFLUSH,		/* request for cache flush */
 	__REQ_RAHEAD,		/* read ahead, can fail anytime */
 
-	/* request only flags */
-	__REQ_SORTED,		/* elevator knows about this request */
-	__REQ_SOFTBARRIER,	/* may not be passed by ioscheduler */
-	__REQ_STARTED,		/* drive already may have started this one */
-	__REQ_DONTPREP,		/* don't call prep for this one */
-	__REQ_QUEUED,		/* uses queueing */
-	__REQ_ELVPRIV,		/* elevator private data attached */
-	__REQ_FAILED,		/* set if the request failed */
-	__REQ_QUIET,		/* don't worry about errors */
-	__REQ_PREEMPT,		/* set for "ide_preempt" requests and also
-				   for requests for which the SCSI "quiesce"
-				   state must be ignored. */
-	__REQ_ALLOCED,		/* request came from our alloc pool */
-	__REQ_COPY_USER,	/* contains copies of user pages */
-	__REQ_FLUSH_SEQ,	/* request for flush sequence */
-	__REQ_IO_STAT,		/* account I/O stat */
-	__REQ_MIXED_MERGE,	/* merge of different types, fail separately */
-	__REQ_PM,		/* runtime pm request */
-	__REQ_HASHED,		/* on IO scheduler merge hash */
-	__REQ_MQ_INFLIGHT,	/* track inflight for MQ */
 	__REQ_NR_BITS,		/* stops here */
 };
 
@@ -208,29 +188,12 @@ enum rq_flag_bits {
 
 /* This mask is used for both bio and request merge checking */
 #define REQ_NOMERGE_FLAGS \
-	(REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_PREFLUSH | REQ_FUA | REQ_FLUSH_SEQ)
+	(REQ_NOMERGE | REQ_PREFLUSH | REQ_FUA)
 
 #define REQ_RAHEAD		(1ULL << __REQ_RAHEAD)
-#define REQ_SORTED		(1ULL << __REQ_SORTED)
-#define REQ_SOFTBARRIER		(1ULL << __REQ_SOFTBARRIER)
 #define REQ_FUA			(1ULL << __REQ_FUA)
 #define REQ_NOMERGE		(1ULL << __REQ_NOMERGE)
-#define REQ_STARTED		(1ULL << __REQ_STARTED)
-#define REQ_DONTPREP		(1ULL << __REQ_DONTPREP)
-#define REQ_QUEUED		(1ULL << __REQ_QUEUED)
-#define REQ_ELVPRIV		(1ULL << __REQ_ELVPRIV)
-#define REQ_FAILED		(1ULL << __REQ_FAILED)
-#define REQ_QUIET		(1ULL << __REQ_QUIET)
-#define REQ_PREEMPT		(1ULL << __REQ_PREEMPT)
-#define REQ_ALLOCED		(1ULL << __REQ_ALLOCED)
-#define REQ_COPY_USER		(1ULL << __REQ_COPY_USER)
 #define REQ_PREFLUSH		(1ULL << __REQ_PREFLUSH)
-#define REQ_FLUSH_SEQ		(1ULL << __REQ_FLUSH_SEQ)
-#define REQ_IO_STAT		(1ULL << __REQ_IO_STAT)
-#define REQ_MIXED_MERGE		(1ULL << __REQ_MIXED_MERGE)
-#define REQ_PM			(1ULL << __REQ_PM)
-#define REQ_HASHED		(1ULL << __REQ_HASHED)
-#define REQ_MQ_INFLIGHT		(1ULL << __REQ_MQ_INFLIGHT)
 
 enum req_op {
 	REQ_OP_READ,
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 90097dd8b8ed..b4415feac679 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -78,6 +78,50 @@ enum rq_cmd_type_bits {
 	REQ_TYPE_DRV_PRIV,		/* driver defined types from here */
 };
 
+/*
+ * request flags */
+typedef __u32 __bitwise req_flags_t;
+
+/* elevator knows about this request */
+#define RQF_SORTED		((__force req_flags_t)(1 << 0))
+/* drive already may have started this one */
+#define RQF_STARTED		((__force req_flags_t)(1 << 1))
+/* uses tagged queueing */
+#define RQF_QUEUED		((__force req_flags_t)(1 << 2))
+/* may not be passed by ioscheduler */
+#define RQF_SOFTBARRIER		((__force req_flags_t)(1 << 3))
+/* request for flush sequence */
+#define RQF_FLUSH_SEQ		((__force req_flags_t)(1 << 4))
+/* merge of different types, fail separately */
+#define RQF_MIXED_MERGE		((__force req_flags_t)(1 << 5))
+/* track inflight for MQ */
+#define RQF_MQ_INFLIGHT		((__force req_flags_t)(1 << 6))
+/* don't call prep for this one */
+#define RQF_DONTPREP		((__force req_flags_t)(1 << 7))
+/* set for "ide_preempt" requests and also for requests for which the SCSI
+   "quiesce" state must be ignored. */
+#define RQF_PREEMPT		((__force req_flags_t)(1 << 8))
+/* contains copies of user pages */
+#define RQF_COPY_USER		((__force req_flags_t)(1 << 9))
+/* vaguely specified driver internal error.  Ignored by the block layer */
+#define RQF_FAILED		((__force req_flags_t)(1 << 10))
+/* don't warn about errors */
+#define RQF_QUIET		((__force req_flags_t)(1 << 11))
+/* elevator private data attached */
+#define RQF_ELVPRIV		((__force req_flags_t)(1 << 12))
+/* account I/O stat */
+#define RQF_IO_STAT		((__force req_flags_t)(1 << 13))
+/* request came from our alloc pool */
+#define RQF_ALLOCED		((__force req_flags_t)(1 << 14))
+/* runtime pm request */
+#define RQF_PM			((__force req_flags_t)(1 << 15))
+/* on IO scheduler merge hash */
+#define RQF_HASHED		((__force req_flags_t)(1 << 16))
+
+/* flags that prevent us from merging requests: */
+#define RQF_NOMERGE_FLAGS \
+	(RQF_STARTED | RQF_SOFTBARRIER | RQF_FLUSH_SEQ)
+
 #define BLK_MAX_CDB	16
 
 /*
@@ -99,6 +143,7 @@ struct request {
 	int cpu;
 	unsigned cmd_type;
 	u64 cmd_flags;
+	req_flags_t rq_flags;
 	unsigned long atomic_flags;
 
 	/* the following two fields are internal, NEVER access directly */
@@ -648,7 +693,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
 			     REQ_FAILFAST_DRIVER))
 
 #define blk_account_rq(rq) \
-	(((rq)->cmd_flags & REQ_STARTED) && \
+	(((rq)->rq_flags & RQF_STARTED) && \
 	 ((rq)->cmd_type == REQ_TYPE_FS))
 
 #define blk_rq_cpu_valid(rq)	((rq)->cpu != -1)
@@ -740,6 +785,8 @@ static inline bool rq_mergeable(struct request *rq)
 
 	if (rq->cmd_flags & REQ_NOMERGE_FLAGS)
 		return false;
+	if (rq->rq_flags & RQF_NOMERGE_FLAGS)
+		return false;
 
 	return true;
 }
-- 
cgit v1.2.3


From ef295ecf090d3e86e5b742fc6ab34f1122a43773 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 28 Oct 2016 08:48:16 -0600
Subject: block: better op and flags encoding

Now that we don't need the common flags to overflow outside the range
of a 32-bit type we can encode them the same way for both the bio and
request fields.  This in addition allows us to place the operation
first (and make some room for more ops while we're at it) and to
stop having to shift around the operation values.

In addition this allows passing around only one value in the block layer
instead of two (and eventuall also in the file systems, but we can do
that later) and thus clean up a lot of code.

Last but not least this allows decreasing the size of the cmd_flags
field in struct request to 32-bits.  Various functions passing this
value could also be updated, but I'd like to avoid the churn for now.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blk-cgroup.h   | 11 +++---
 include/linux/blk_types.h    | 83 ++++++++++++++++++++------------------------
 include/linux/blkdev.h       | 26 ++------------
 include/linux/blktrace_api.h |  2 +-
 include/linux/dm-io.h        |  2 +-
 include/linux/elevator.h     |  4 +--
 6 files changed, 48 insertions(+), 80 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 3bf5d33800ab..ddaf28d0988f 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -581,15 +581,14 @@ static inline void blkg_rwstat_exit(struct blkg_rwstat *rwstat)
 /**
  * blkg_rwstat_add - add a value to a blkg_rwstat
  * @rwstat: target blkg_rwstat
- * @op: REQ_OP
- * @op_flags: rq_flag_bits
+ * @op: REQ_OP and flags
  * @val: value to add
  *
  * Add @val to @rwstat.  The counters are chosen according to @rw.  The
  * caller is responsible for synchronizing calls to this function.
  */
 static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
-				   int op, int op_flags, uint64_t val)
+				   unsigned int op, uint64_t val)
 {
 	struct percpu_counter *cnt;
 
@@ -600,7 +599,7 @@ static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
 
 	__percpu_counter_add(cnt, val, BLKG_STAT_CPU_BATCH);
 
-	if (op_flags & REQ_SYNC)
+	if (op & REQ_SYNC)
 		cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC];
 	else
 		cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC];
@@ -705,9 +704,9 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,
 
 	if (!throtl) {
 		blkg = blkg ?: q->root_blkg;
-		blkg_rwstat_add(&blkg->stat_bytes, bio_op(bio), bio->bi_opf,
+		blkg_rwstat_add(&blkg->stat_bytes, bio->bi_opf,
 				bio->bi_iter.bi_size);
-		blkg_rwstat_add(&blkg->stat_ios, bio_op(bio), bio->bi_opf, 1);
+		blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1);
 	}
 
 	rcu_read_unlock();
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index ec69a8fe3b29..dca972d67548 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -88,24 +88,6 @@ struct bio {
 	struct bio_vec		bi_inline_vecs[0];
 };
 
-#define BIO_OP_SHIFT	(8 * FIELD_SIZEOF(struct bio, bi_opf) - REQ_OP_BITS)
-#define bio_flags(bio)	((bio)->bi_opf & ((1 << BIO_OP_SHIFT) - 1))
-#define bio_op(bio)	((bio)->bi_opf >> BIO_OP_SHIFT)
-
-#define bio_set_op_attrs(bio, op, op_flags) do {			\
-	if (__builtin_constant_p(op))					\
-		BUILD_BUG_ON((op) + 0U >= (1U << REQ_OP_BITS));		\
-	else								\
-		WARN_ON_ONCE((op) + 0U >= (1U << REQ_OP_BITS));		\
-	if (__builtin_constant_p(op_flags))				\
-		BUILD_BUG_ON((op_flags) + 0U >= (1U << BIO_OP_SHIFT));	\
-	else								\
-		WARN_ON_ONCE((op_flags) + 0U >= (1U << BIO_OP_SHIFT));	\
-	(bio)->bi_opf = bio_flags(bio);					\
-	(bio)->bi_opf |= (((op) + 0U) << BIO_OP_SHIFT);			\
-	(bio)->bi_opf |= (op_flags);					\
-} while (0)
-
 #define BIO_RESET_BYTES		offsetof(struct bio, bi_max_vecs)
 
 /*
@@ -147,26 +129,40 @@ struct bio {
 #endif /* CONFIG_BLOCK */
 
 /*
- * Request flags.  For use in the cmd_flags field of struct request, and in
- * bi_opf of struct bio.  Note that some flags are only valid in either one.
+ * Operations and flags common to the bio and request structures.
+ * We use 8 bits for encoding the operation, and the remaining 24 for flags.
  */
-enum rq_flag_bits {
-	/* common flags */
-	__REQ_FAILFAST_DEV,	/* no driver retries of device errors */
+#define REQ_OP_BITS	8
+#define REQ_OP_MASK	((1 << REQ_OP_BITS) - 1)
+#define REQ_FLAG_BITS	24
+
+enum req_opf {
+	REQ_OP_READ,
+	REQ_OP_WRITE,
+	REQ_OP_DISCARD,		/* request to discard sectors */
+	REQ_OP_SECURE_ERASE,	/* request to securely erase sectors */
+	REQ_OP_WRITE_SAME,	/* write same block many times */
+	REQ_OP_FLUSH,		/* request for cache flush */
+	REQ_OP_ZONE_REPORT,	/* Get zone information */
+	REQ_OP_ZONE_RESET,	/* Reset a zone write pointer */
+
+	REQ_OP_LAST,
+};
+
+enum req_flag_bits {
+	__REQ_FAILFAST_DEV =	/* no driver retries of device errors */
+		REQ_OP_BITS,
 	__REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */
 	__REQ_FAILFAST_DRIVER,	/* no driver retries of driver errors */
-
 	__REQ_SYNC,		/* request is sync (sync write or read) */
 	__REQ_META,		/* metadata io request */
 	__REQ_PRIO,		/* boost priority in cfq */
-
 	__REQ_NOMERGE,		/* don't touch this for merging */
 	__REQ_NOIDLE,		/* don't anticipate more IO after this one */
 	__REQ_INTEGRITY,	/* I/O includes block integrity payload */
 	__REQ_FUA,		/* forced unit access */
 	__REQ_PREFLUSH,		/* request for cache flush */
 	__REQ_RAHEAD,		/* read ahead, can fail anytime */
-
 	__REQ_NR_BITS,		/* stops here */
 };
 
@@ -176,37 +172,32 @@ enum rq_flag_bits {
 #define REQ_SYNC		(1ULL << __REQ_SYNC)
 #define REQ_META		(1ULL << __REQ_META)
 #define REQ_PRIO		(1ULL << __REQ_PRIO)
+#define REQ_NOMERGE		(1ULL << __REQ_NOMERGE)
 #define REQ_NOIDLE		(1ULL << __REQ_NOIDLE)
 #define REQ_INTEGRITY		(1ULL << __REQ_INTEGRITY)
+#define REQ_FUA			(1ULL << __REQ_FUA)
+#define REQ_PREFLUSH		(1ULL << __REQ_PREFLUSH)
+#define REQ_RAHEAD		(1ULL << __REQ_RAHEAD)
 
 #define REQ_FAILFAST_MASK \
 	(REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
-#define REQ_COMMON_MASK \
-	(REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | REQ_NOIDLE | \
-	 REQ_PREFLUSH | REQ_FUA | REQ_INTEGRITY | REQ_NOMERGE | REQ_RAHEAD)
-#define REQ_CLONE_MASK		REQ_COMMON_MASK
 
-/* This mask is used for both bio and request merge checking */
 #define REQ_NOMERGE_FLAGS \
 	(REQ_NOMERGE | REQ_PREFLUSH | REQ_FUA)
 
-#define REQ_RAHEAD		(1ULL << __REQ_RAHEAD)
-#define REQ_FUA			(1ULL << __REQ_FUA)
-#define REQ_NOMERGE		(1ULL << __REQ_NOMERGE)
-#define REQ_PREFLUSH		(1ULL << __REQ_PREFLUSH)
+#define bio_op(bio) \
+	((bio)->bi_opf & REQ_OP_MASK)
+#define req_op(req) \
+	((req)->cmd_flags & REQ_OP_MASK)
 
-enum req_op {
-	REQ_OP_READ,
-	REQ_OP_WRITE,
-	REQ_OP_DISCARD,		/* request to discard sectors */
-	REQ_OP_SECURE_ERASE,	/* request to securely erase sectors */
-	REQ_OP_WRITE_SAME,	/* write same block many times */
-	REQ_OP_FLUSH,		/* request for cache flush */
-	REQ_OP_ZONE_REPORT,	/* Get zone information */
-	REQ_OP_ZONE_RESET,	/* Reset a zone write pointer */
-};
+/* obsolete, don't use in new code */
+#define bio_set_op_attrs(bio, op, op_flags) \
+	((bio)->bi_opf |= (op | op_flags))
 
-#define REQ_OP_BITS 3
+static inline bool op_is_sync(unsigned int op)
+{
+	return (op & REQ_OP_MASK) == REQ_OP_READ || (op & REQ_SYNC);
+}
 
 typedef unsigned int blk_qc_t;
 #define BLK_QC_T_NONE	-1U
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b4415feac679..8396da2bb698 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -142,7 +142,7 @@ struct request {
 
 	int cpu;
 	unsigned cmd_type;
-	u64 cmd_flags;
+	unsigned int cmd_flags;		/* op and common flags */
 	req_flags_t rq_flags;
 	unsigned long atomic_flags;
 
@@ -244,20 +244,6 @@ struct request {
 	struct request *next_rq;
 };
 
-#define REQ_OP_SHIFT (8 * sizeof(u64) - REQ_OP_BITS)
-#define req_op(req)  ((req)->cmd_flags >> REQ_OP_SHIFT)
-
-#define req_set_op(req, op) do {				\
-	WARN_ON(op >= (1 << REQ_OP_BITS));			\
-	(req)->cmd_flags &= ((1ULL << REQ_OP_SHIFT) - 1);	\
-	(req)->cmd_flags |= ((u64) (op) << REQ_OP_SHIFT);	\
-} while (0)
-
-#define req_set_op_attrs(req, op, flags) do {	\
-	req_set_op(req, op);			\
-	(req)->cmd_flags |= flags;		\
-} while (0)
-
 static inline unsigned short req_get_ioprio(struct request *req)
 {
 	return req->ioprio;
@@ -741,17 +727,9 @@ static inline unsigned int blk_queue_zone_size(struct request_queue *q)
 	return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0;
 }
 
-/*
- * We regard a request as sync, if either a read or a sync write
- */
-static inline bool rw_is_sync(int op, unsigned int rw_flags)
-{
-	return op == REQ_OP_READ || (rw_flags & REQ_SYNC);
-}
-
 static inline bool rq_is_sync(struct request *rq)
 {
-	return rw_is_sync(req_op(rq), rq->cmd_flags);
+	return op_is_sync(rq->cmd_flags);
 }
 
 static inline bool blk_rl_full(struct request_list *rl, bool sync)
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index cceb72f9e29f..e417f080219a 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -118,7 +118,7 @@ static inline int blk_cmd_buf_len(struct request *rq)
 }
 
 extern void blk_dump_cmd(char *buf, struct request *rq);
-extern void blk_fill_rwbs(char *rwbs, int op, u32 rw, int bytes);
+extern void blk_fill_rwbs(char *rwbs, unsigned int op, int bytes);
 
 #endif /* CONFIG_EVENT_TRACING && CONFIG_BLOCK */
 
diff --git a/include/linux/dm-io.h b/include/linux/dm-io.h
index b91b023deffb..a52c6580cc9a 100644
--- a/include/linux/dm-io.h
+++ b/include/linux/dm-io.h
@@ -58,7 +58,7 @@ struct dm_io_notify {
 struct dm_io_client;
 struct dm_io_request {
 	int bi_op;			/* REQ_OP */
-	int bi_op_flags;		/* rq_flag_bits */
+	int bi_op_flags;		/* req_flag_bits */
 	struct dm_io_memory mem;	/* Memory to use for io */
 	struct dm_io_notify notify;	/* Synchronous if notify.fn is NULL */
 	struct dm_io_client *client;	/* Client memory handler */
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index e7f358d2e5fc..f219c9aed360 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -30,7 +30,7 @@ typedef int (elevator_dispatch_fn) (struct request_queue *, int);
 typedef void (elevator_add_req_fn) (struct request_queue *, struct request *);
 typedef struct request *(elevator_request_list_fn) (struct request_queue *, struct request *);
 typedef void (elevator_completed_req_fn) (struct request_queue *, struct request *);
-typedef int (elevator_may_queue_fn) (struct request_queue *, int, int);
+typedef int (elevator_may_queue_fn) (struct request_queue *, unsigned int);
 
 typedef void (elevator_init_icq_fn) (struct io_cq *);
 typedef void (elevator_exit_icq_fn) (struct io_cq *);
@@ -139,7 +139,7 @@ extern struct request *elv_former_request(struct request_queue *, struct request
 extern struct request *elv_latter_request(struct request_queue *, struct request *);
 extern int elv_register_queue(struct request_queue *q);
 extern void elv_unregister_queue(struct request_queue *q);
-extern int elv_may_queue(struct request_queue *, int, int);
+extern int elv_may_queue(struct request_queue *, unsigned int);
 extern void elv_completed_request(struct request_queue *, struct request *);
 extern int elv_set_request(struct request_queue *q, struct request *rq,
 			   struct bio *bio, gfp_t gfp_mask);
-- 
cgit v1.2.3


From 87374179c535a98337569904727aa02f960fe79e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 20 Oct 2016 15:12:15 +0200
Subject: block: add a proper block layer data direction encoding

Currently the block layer op_is_write, bio_data_dir and rq_data_dir
helper treat every operation that is not a READ as a data out operation.
This worked surprisingly long, but the new REQ_OP_ZONE_REPORT operation
actually adds a second operation that reads data from the device.
Surprisingly nothing critical relied on this direction, but this might
be a good opportunity to properly fix this issue up.

We take a little inspiration and use the least significant bit of the
operation number to encode the data direction, which just requires us
to renumber the operations to fix this scheme.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Shaun Tancheff <shaun.tancheff@seagate.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blk_types.h | 38 ++++++++++++++++++++++++++++++--------
 include/linux/fs.h        |  5 -----
 2 files changed, 30 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index dca972d67548..3fa62cabe8d2 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -131,20 +131,37 @@ struct bio {
 /*
  * Operations and flags common to the bio and request structures.
  * We use 8 bits for encoding the operation, and the remaining 24 for flags.
+ *
+ * The least significant bit of the operation number indicates the data
+ * transfer direction:
+ *
+ *   - if the least significant bit is set transfers are TO the device
+ *   - if the least significant bit is not set transfers are FROM the device
+ *
+ * If a operation does not transfer data the least significant bit has no
+ * meaning.
  */
 #define REQ_OP_BITS	8
 #define REQ_OP_MASK	((1 << REQ_OP_BITS) - 1)
 #define REQ_FLAG_BITS	24
 
 enum req_opf {
-	REQ_OP_READ,
-	REQ_OP_WRITE,
-	REQ_OP_DISCARD,		/* request to discard sectors */
-	REQ_OP_SECURE_ERASE,	/* request to securely erase sectors */
-	REQ_OP_WRITE_SAME,	/* write same block many times */
-	REQ_OP_FLUSH,		/* request for cache flush */
-	REQ_OP_ZONE_REPORT,	/* Get zone information */
-	REQ_OP_ZONE_RESET,	/* Reset a zone write pointer */
+	/* read sectors from the device */
+	REQ_OP_READ		= 0,
+	/* write sectors to the device */
+	REQ_OP_WRITE		= 1,
+	/* flush the volatile write cache */
+	REQ_OP_FLUSH		= 2,
+	/* discard sectors */
+	REQ_OP_DISCARD		= 3,
+	/* get zone information */
+	REQ_OP_ZONE_REPORT	= 4,
+	/* securely erase sectors */
+	REQ_OP_SECURE_ERASE	= 5,
+	/* seset a zone write pointer */
+	REQ_OP_ZONE_RESET	= 6,
+	/* write the same sector many times */
+	REQ_OP_WRITE_SAME	= 7,
 
 	REQ_OP_LAST,
 };
@@ -194,6 +211,11 @@ enum req_flag_bits {
 #define bio_set_op_attrs(bio, op, op_flags) \
 	((bio)->bi_opf |= (op | op_flags))
 
+static inline bool op_is_write(unsigned int op)
+{
+	return (op & 1);
+}
+
 static inline bool op_is_sync(unsigned int op)
 {
 	return (op & REQ_OP_MASK) == REQ_OP_READ || (op & REQ_SYNC);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 16d2b6e874d6..e3e878f12b25 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2499,11 +2499,6 @@ extern void make_bad_inode(struct inode *);
 extern bool is_bad_inode(struct inode *);
 
 #ifdef CONFIG_BLOCK
-static inline bool op_is_write(unsigned int op)
-{
-	return op == REQ_OP_READ ? false : true;
-}
-
 /*
  * return data direction, READ or WRITE
  */
-- 
cgit v1.2.3


From d71d9ae14a0942fae519d890a743b12679e3d153 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 1 Nov 2016 07:40:03 -0600
Subject: blk-cgroup: use op_is_sync to check for synchronous requests

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blk-cgroup.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index ddaf28d0988f..01b62e7bac74 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -599,7 +599,7 @@ static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
 
 	__percpu_counter_add(cnt, val, BLKG_STAT_CPU_BATCH);
 
-	if (op & REQ_SYNC)
+	if (op_is_sync(op))
 		cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC];
 	else
 		cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC];
-- 
cgit v1.2.3


From 6f6b29171a192e84b666c816e49d2175afbbb09f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 1 Nov 2016 07:40:07 -0600
Subject: block: don't use REQ_SYNC in the READ_SYNC definition

Reads are synchronous per definition, don't add another flag for it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index e3e878f12b25..5e0078fceed7 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -196,7 +196,7 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 #define READ			REQ_OP_READ
 #define WRITE			REQ_OP_WRITE
 
-#define READ_SYNC		REQ_SYNC
+#define READ_SYNC		0
 #define WRITE_SYNC		(REQ_SYNC | REQ_NOIDLE)
 #define WRITE_ODIRECT		REQ_SYNC
 #define WRITE_FLUSH		(REQ_SYNC | REQ_NOIDLE | REQ_PREFLUSH)
-- 
cgit v1.2.3


From b685d3d65ac791406e0dfd8779cc9b3707fea5a3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 1 Nov 2016 07:40:08 -0600
Subject: block: treat REQ_FUA and REQ_PREFLUSH as synchronous
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Instead of requiring everyone to specify the REQ_SYNC flag aѕ well.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blk_types.h | 8 +++++++-
 include/linux/fs.h        | 6 +++---
 2 files changed, 10 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 3fa62cabe8d2..107d23d18096 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -216,9 +216,15 @@ static inline bool op_is_write(unsigned int op)
 	return (op & 1);
 }
 
+/*
+ * Reads are always treated as synchronous, as are requests with the FUA or
+ * PREFLUSH flag.  Other operations may be marked as synchronous using the
+ * REQ_SYNC flag.
+ */
 static inline bool op_is_sync(unsigned int op)
 {
-	return (op & REQ_OP_MASK) == REQ_OP_READ || (op & REQ_SYNC);
+	return (op & REQ_OP_MASK) == REQ_OP_READ ||
+		(op & (REQ_SYNC | REQ_FUA | REQ_PREFLUSH));
 }
 
 typedef unsigned int blk_qc_t;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5e0078fceed7..ccedccb28ec8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -199,9 +199,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 #define READ_SYNC		0
 #define WRITE_SYNC		(REQ_SYNC | REQ_NOIDLE)
 #define WRITE_ODIRECT		REQ_SYNC
-#define WRITE_FLUSH		(REQ_SYNC | REQ_NOIDLE | REQ_PREFLUSH)
-#define WRITE_FUA		(REQ_SYNC | REQ_NOIDLE | REQ_FUA)
-#define WRITE_FLUSH_FUA		(REQ_SYNC | REQ_NOIDLE | REQ_PREFLUSH | REQ_FUA)
+#define WRITE_FLUSH		(REQ_NOIDLE | REQ_PREFLUSH)
+#define WRITE_FUA		(REQ_NOIDLE | REQ_FUA)
+#define WRITE_FLUSH_FUA		(REQ_NOIDLE | REQ_PREFLUSH | REQ_FUA)
 
 /*
  * Attribute flags.  These should be or-ed together to figure out what
-- 
cgit v1.2.3


From a2b809672ee6fcb4d5756ea815725b3dbaea654e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 1 Nov 2016 07:40:09 -0600
Subject: block: replace REQ_NOIDLE with REQ_IDLE

Noidle should be the default for writes as seen by all the compounds
definitions in fs.h using it.  In fact only direct I/O really should
be using NODILE, so turn the whole flag around to get the defaults
right, which will make our life much easier especially onces the
WRITE_* defines go away.

This assumes all the existing "raw" users of REQ_SYNC for writes
want noidle behavior, which seems to be spot on from a quick audit.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blk_types.h |  4 ++--
 include/linux/fs.h        | 10 +++++-----
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 107d23d18096..63b750a3b165 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -175,7 +175,7 @@ enum req_flag_bits {
 	__REQ_META,		/* metadata io request */
 	__REQ_PRIO,		/* boost priority in cfq */
 	__REQ_NOMERGE,		/* don't touch this for merging */
-	__REQ_NOIDLE,		/* don't anticipate more IO after this one */
+	__REQ_IDLE,		/* anticipate more IO after this one */
 	__REQ_INTEGRITY,	/* I/O includes block integrity payload */
 	__REQ_FUA,		/* forced unit access */
 	__REQ_PREFLUSH,		/* request for cache flush */
@@ -190,7 +190,7 @@ enum req_flag_bits {
 #define REQ_META		(1ULL << __REQ_META)
 #define REQ_PRIO		(1ULL << __REQ_PRIO)
 #define REQ_NOMERGE		(1ULL << __REQ_NOMERGE)
-#define REQ_NOIDLE		(1ULL << __REQ_NOIDLE)
+#define REQ_IDLE		(1ULL << __REQ_IDLE)
 #define REQ_INTEGRITY		(1ULL << __REQ_INTEGRITY)
 #define REQ_FUA			(1ULL << __REQ_FUA)
 #define REQ_PREFLUSH		(1ULL << __REQ_PREFLUSH)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ccedccb28ec8..46a74209917f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -197,11 +197,11 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 #define WRITE			REQ_OP_WRITE
 
 #define READ_SYNC		0
-#define WRITE_SYNC		(REQ_SYNC | REQ_NOIDLE)
-#define WRITE_ODIRECT		REQ_SYNC
-#define WRITE_FLUSH		(REQ_NOIDLE | REQ_PREFLUSH)
-#define WRITE_FUA		(REQ_NOIDLE | REQ_FUA)
-#define WRITE_FLUSH_FUA		(REQ_NOIDLE | REQ_PREFLUSH | REQ_FUA)
+#define WRITE_SYNC		REQ_SYNC
+#define WRITE_ODIRECT		(REQ_SYNC | REQ_IDLE)
+#define WRITE_FLUSH		REQ_PREFLUSH
+#define WRITE_FUA		REQ_FUA
+#define WRITE_FLUSH_FUA		(REQ_PREFLUSH | REQ_FUA)
 
 /*
  * Attribute flags.  These should be or-ed together to figure out what
-- 
cgit v1.2.3


From 70fd76140a6cb63262bd47b68d57b42e889c10ee Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 1 Nov 2016 07:40:10 -0600
Subject: block,fs: use REQ_* flags directly

Remove the WRITE_* and READ_SYNC wrappers, and just use the flags
directly.  Where applicable this also drops usage of the
bio_set_op_attrs wrapper.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/fs.h | 47 -----------------------------------------------
 1 file changed, 47 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 46a74209917f..7a1b78ab7c15 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -151,58 +151,11 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
  */
 #define CHECK_IOVEC_ONLY -1
 
-/*
- * The below are the various read and write flags that we support. Some of
- * them include behavioral modifiers that send information down to the
- * block layer and IO scheduler. They should be used along with a req_op.
- * Terminology:
- *
- *	The block layer uses device plugging to defer IO a little bit, in
- *	the hope that we will see more IO very shortly. This increases
- *	coalescing of adjacent IO and thus reduces the number of IOs we
- *	have to send to the device. It also allows for better queuing,
- *	if the IO isn't mergeable. If the caller is going to be waiting
- *	for the IO, then he must ensure that the device is unplugged so
- *	that the IO is dispatched to the driver.
- *
- *	All IO is handled async in Linux. This is fine for background
- *	writes, but for reads or writes that someone waits for completion
- *	on, we want to notify the block layer and IO scheduler so that they
- *	know about it. That allows them to make better scheduling
- *	decisions. So when the below references 'sync' and 'async', it
- *	is referencing this priority hint.
- *
- * With that in mind, the available types are:
- *
- * READ			A normal read operation. Device will be plugged.
- * READ_SYNC		A synchronous read. Device is not plugged, caller can
- *			immediately wait on this read without caring about
- *			unplugging.
- * WRITE		A normal async write. Device will be plugged.
- * WRITE_SYNC		Synchronous write. Identical to WRITE, but passes down
- *			the hint that someone will be waiting on this IO
- *			shortly. The write equivalent of READ_SYNC.
- * WRITE_ODIRECT	Special case write for O_DIRECT only.
- * WRITE_FLUSH		Like WRITE_SYNC but with preceding cache flush.
- * WRITE_FUA		Like WRITE_SYNC but data is guaranteed to be on
- *			non-volatile media on completion.
- * WRITE_FLUSH_FUA	Combination of WRITE_FLUSH and FUA. The IO is preceded
- *			by a cache flush and data is guaranteed to be on
- *			non-volatile media on completion.
- *
- */
 #define RW_MASK			REQ_OP_WRITE
 
 #define READ			REQ_OP_READ
 #define WRITE			REQ_OP_WRITE
 
-#define READ_SYNC		0
-#define WRITE_SYNC		REQ_SYNC
-#define WRITE_ODIRECT		(REQ_SYNC | REQ_IDLE)
-#define WRITE_FLUSH		REQ_PREFLUSH
-#define WRITE_FUA		REQ_FUA
-#define WRITE_FLUSH_FUA		(REQ_PREFLUSH | REQ_FUA)
-
 /*
  * Attribute flags.  These should be or-ed together to figure out what
  * has been changed!
-- 
cgit v1.2.3


From d38499530e5f170d30f32d3841fade204e63081d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 1 Nov 2016 07:40:11 -0600
Subject: fs: decouple READ and WRITE from the block layer ops

Move READ and WRITE to kernel.h and don't define them in terms of block
layer ops; they are our generic data direction indicators these days
and have no more resemblance with the block layer ops.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/bio.h    |  6 ++++++
 include/linux/fs.h     | 13 -------------
 include/linux/kernel.h |  4 ++++
 include/linux/uio.h    |  2 +-
 4 files changed, 11 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index 87ce64dafb93..fe9a17017608 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -62,6 +62,12 @@
 #define bio_sectors(bio)	((bio)->bi_iter.bi_size >> 9)
 #define bio_end_sector(bio)	((bio)->bi_iter.bi_sector + bio_sectors((bio)))
 
+/*
+ * Return the data direction, READ or WRITE.
+ */
+#define bio_data_dir(bio) \
+	(op_is_write(bio_op(bio)) ? WRITE : READ)
+
 /*
  * Check whether this bio carries any data or not. A NULL bio is allowed.
  */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7a1b78ab7c15..0ad36e0c7fa7 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -151,11 +151,6 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
  */
 #define CHECK_IOVEC_ONLY -1
 
-#define RW_MASK			REQ_OP_WRITE
-
-#define READ			REQ_OP_READ
-#define WRITE			REQ_OP_WRITE
-
 /*
  * Attribute flags.  These should be or-ed together to figure out what
  * has been changed!
@@ -2452,14 +2447,6 @@ extern void make_bad_inode(struct inode *);
 extern bool is_bad_inode(struct inode *);
 
 #ifdef CONFIG_BLOCK
-/*
- * return data direction, READ or WRITE
- */
-static inline int bio_data_dir(struct bio *bio)
-{
-	return op_is_write(bio_op(bio)) ? WRITE : READ;
-}
-
 extern void check_disk_size_change(struct gendisk *disk,
 				   struct block_device *bdev);
 extern int revalidate_disk(struct gendisk *);
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index bc6ed52a39b9..01b6b460c34d 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -50,6 +50,10 @@
 #define PTR_ALIGN(p, a)		((typeof(p))ALIGN((unsigned long)(p), (a)))
 #define IS_ALIGNED(x, a)		(((x) & ((typeof(x))(a) - 1)) == 0)
 
+/* generic data direction definitions */
+#define READ			0
+#define WRITE			1
+
 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr))
 
 #define u64_to_user_ptr(x) (		\
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 6e22b544d039..d5aba1512b8b 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -125,7 +125,7 @@ static inline bool iter_is_iovec(const struct iov_iter *i)
  *
  * The ?: is just for type safety.
  */
-#define iov_iter_rw(i) ((0 ? (struct iov_iter *)0 : (i))->type & RW_MASK)
+#define iov_iter_rw(i) ((0 ? (struct iov_iter *)0 : (i))->type & (READ | WRITE))
 
 /*
  * Cap the iov_iter by given limit; note that the second argument is
-- 
cgit v1.2.3


From 1e3914d4cf4e14653b7917b0e965217465cb7a9c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 1 Nov 2016 07:40:12 -0600
Subject: block, fs: move submit_bio to bio.h

This is where all the other bio operations live, so users must include
bio.h anyway.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/bio.h | 2 ++
 include/linux/fs.h  | 1 -
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index fe9a17017608..5c604b4914bf 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -404,6 +404,8 @@ static inline struct bio *bio_clone_kmalloc(struct bio *bio, gfp_t gfp_mask)
 
 }
 
+extern blk_qc_t submit_bio(struct bio *);
+
 extern void bio_endio(struct bio *);
 
 static inline void bio_io_error(struct bio *bio)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0ad36e0c7fa7..5b0a9b77534d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2717,7 +2717,6 @@ static inline void remove_inode_hash(struct inode *inode)
 extern void inode_sb_list_add(struct inode *inode);
 
 #ifdef CONFIG_BLOCK
-extern blk_qc_t submit_bio(struct bio *);
 extern int bdev_read_only(struct block_device *);
 #endif
 extern int set_blocksize(struct block_device *, int);
-- 
cgit v1.2.3


From 2f8b544477e627a42e66902e948d87f86554aeca Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 1 Nov 2016 07:40:13 -0600
Subject: block,fs: untangle fs.h and blk_types.h

Nothing in fs.h should require blk_types.h to be included.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/fs.h        | 2 +-
 include/linux/swap.h      | 1 +
 include/linux/writeback.h | 2 ++
 3 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5b0a9b77534d..8533e9d59c29 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -28,7 +28,6 @@
 #include <linux/uidgid.h>
 #include <linux/lockdep.h>
 #include <linux/percpu-rwsem.h>
-#include <linux/blk_types.h>
 #include <linux/workqueue.h>
 #include <linux/percpu-rwsem.h>
 #include <linux/delayed_call.h>
@@ -38,6 +37,7 @@
 
 struct backing_dev_info;
 struct bdi_writeback;
+struct bio;
 struct export_operations;
 struct hd_geometry;
 struct iovec;
diff --git a/include/linux/swap.h b/include/linux/swap.h
index a56523cefb9b..3a6aebc23001 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -11,6 +11,7 @@
 #include <linux/fs.h>
 #include <linux/atomic.h>
 #include <linux/page-flags.h>
+#include <linux/blk_types.h>
 #include <asm/page.h>
 
 struct notifier_block;
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 797100e10010..e4c38703bf4e 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -10,6 +10,8 @@
 #include <linux/flex_proportions.h>
 #include <linux/backing-dev-defs.h>
 
+struct bio;
+
 DECLARE_PER_CPU(int, dirty_throttle_leaks);
 
 /*
-- 
cgit v1.2.3


From 9f08217120568afdfb59973a89a675e649c0096d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 1 Nov 2016 07:40:15 -0600
Subject: ceph: don't include blk_types.h in messenger.h

The file only needs the struct bvec_iter delcaration, which is available
from bvec.h.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/ceph/messenger.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 8dbd7879fdc6..67bcef2ecddb 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -1,7 +1,7 @@
 #ifndef __FS_CEPH_MESSENGER_H
 #define __FS_CEPH_MESSENGER_H
 
-#include <linux/blk_types.h>
+#include <linux/bvec.h>
 #include <linux/kref.h>
 #include <linux/mutex.h>
 #include <linux/net.h>
-- 
cgit v1.2.3


From be297968da22cf40c9c419df51e71ba8856a2ec2 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 1 Nov 2016 07:40:16 -0600
Subject: mm: only include blk_types in swap.h if CONFIG_SWAP is enabled

It's only needed for the CONFIG_SWAP-only use of bio_end_io_t.

Because CONFIG_SWAP implies CONFIG_BLOCK this will allow to drop some
ifdefs in blk_types.h.

Instead we'll need to add a few explicit includes that were implicit
before, though.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/swap.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 3a6aebc23001..bfee1af1f54f 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -11,7 +11,6 @@
 #include <linux/fs.h>
 #include <linux/atomic.h>
 #include <linux/page-flags.h>
-#include <linux/blk_types.h>
 #include <asm/page.h>
 
 struct notifier_block;
@@ -352,6 +351,9 @@ extern int kswapd_run(int nid);
 extern void kswapd_stop(int nid);
 
 #ifdef CONFIG_SWAP
+
+#include <linux/blk_types.h> /* for bio_end_io_t */
+
 /* linux/mm/page_io.c */
 extern int swap_readpage(struct page *);
 extern int swap_writepage(struct page *page, struct writeback_control *wbc);
-- 
cgit v1.2.3


From 7281b4526cefc898d180850b54d1369f38c6b202 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 1 Nov 2016 07:40:17 -0600
Subject: block: remove the CONFIG_BLOCK ifdef in blk_types.h

Now that we have a separate header for struct bio_vec there is absolutely
no excuse for including this header from non-block I/O code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blk_types.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 63b750a3b165..bb921028e7c5 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -17,7 +17,6 @@ struct io_context;
 struct cgroup_subsys_state;
 typedef void (bio_end_io_t) (struct bio *);
 
-#ifdef CONFIG_BLOCK
 /*
  * main unit of I/O for the block layer and lower layers (ie drivers and
  * stacking drivers)
@@ -126,8 +125,6 @@ struct bio {
 #define BVEC_POOL_OFFSET	(16 - BVEC_POOL_BITS)
 #define BVEC_POOL_IDX(bio)	((bio)->bi_flags >> BVEC_POOL_OFFSET)
 
-#endif /* CONFIG_BLOCK */
-
 /*
  * Operations and flags common to the bio and request structures.
  * We use 8 bits for encoding the operation, and the remaining 24 for flags.
-- 
cgit v1.2.3


From 1d796d6a9641fbfcd90fcfaf6fb4894a13d0304f Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Tue, 1 Nov 2016 09:52:57 -0600
Subject: block: add REQ_BACKGROUND

This adds a new request flag, REQ_BACKGROUND, that callers can use to
tell the block layer that this is background (non-urgent) IO.

Signed-off-by: Jens Axboe <axboe@fb.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/blk_types.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index bb921028e7c5..562ac46cb790 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -177,6 +177,7 @@ enum req_flag_bits {
 	__REQ_FUA,		/* forced unit access */
 	__REQ_PREFLUSH,		/* request for cache flush */
 	__REQ_RAHEAD,		/* read ahead, can fail anytime */
+	__REQ_BACKGROUND,	/* background IO */
 	__REQ_NR_BITS,		/* stops here */
 };
 
@@ -192,6 +193,7 @@ enum req_flag_bits {
 #define REQ_FUA			(1ULL << __REQ_FUA)
 #define REQ_PREFLUSH		(1ULL << __REQ_PREFLUSH)
 #define REQ_RAHEAD		(1ULL << __REQ_RAHEAD)
+#define REQ_BACKGROUND		(1ULL << __REQ_BACKGROUND)
 
 #define REQ_FAILFAST_MASK \
 	(REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
-- 
cgit v1.2.3


From 7637241e651ec36e409412869f986dd5f097735f Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Tue, 1 Nov 2016 10:00:38 -0600
Subject: writeback: add wbc_to_write_flags()

Add wbc_to_write_flags(), which returns the write modifier flags to use,
based on a struct writeback_control. No functional changes in this
patch, but it prepares us for factoring other wbc fields for write type.

Signed-off-by: Jens Axboe <axboe@fb.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/writeback.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index e4c38703bf4e..50c96ee8108f 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -9,6 +9,7 @@
 #include <linux/fs.h>
 #include <linux/flex_proportions.h>
 #include <linux/backing-dev-defs.h>
+#include <linux/blk_types.h>
 
 struct bio;
 
@@ -102,6 +103,14 @@ struct writeback_control {
 #endif
 };
 
+static inline int wbc_to_write_flags(struct writeback_control *wbc)
+{
+	if (wbc->sync_mode == WB_SYNC_ALL)
+		return REQ_SYNC;
+
+	return 0;
+}
+
 /*
  * A wb_domain represents a domain that wb's (bdi_writeback's) belong to
  * and are measured against each other in.  There always is one global
-- 
cgit v1.2.3


From 13edd5e7315a26b448c5f7f33fc7721b1e0c17ef Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Tue, 1 Nov 2016 10:01:35 -0600
Subject: writeback: mark background writeback as such

If we're doing background type writes, then use the appropriate
background write flags for that.

Signed-off-by: Jens Axboe <axboe@fb.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/writeback.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 50c96ee8108f..c78f9f0920b5 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -107,6 +107,8 @@ static inline int wbc_to_write_flags(struct writeback_control *wbc)
 {
 	if (wbc->sync_mode == WB_SYNC_ALL)
 		return REQ_SYNC;
+	else if (wbc->for_kupdate || wbc->for_background)
+		return REQ_BACKGROUND;
 
 	return 0;
 }
-- 
cgit v1.2.3


From 2cefe4dbaadf83b236caab46705b4b5a4958e3b6 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@gmail.com>
Date: Mon, 31 Oct 2016 11:59:24 -0600
Subject: block: add bio_iov_iter_get_pages()

This is a helper that pins down a range from an iov_iter and adds it to
a bio without requiring a separate memory allocation for the page array.
It will be used for upcoming direct I/O implementations for block devices
and iomap based file systems.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
[hch: ported to the iov_iter interface, renamed and added comments.
      All blame should be directed to me and all fame should go to Kent
      after this!]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/bio.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index 5c604b4914bf..d367cd37a7f7 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -427,6 +427,7 @@ void bio_chain(struct bio *, struct bio *);
 extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int);
 extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *,
 			   unsigned int, unsigned int);
+int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter);
 struct rq_map_data;
 extern struct bio *bio_map_user_iov(struct request_queue *,
 				    const struct iov_iter *, gfp_t);
-- 
cgit v1.2.3


From fd00144301d64f1742541a3c5e64cd1c51f39c55 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Fri, 28 Oct 2016 17:19:37 -0700
Subject: blk-mq: Introduce blk_mq_queue_stopped()

The function blk_queue_stopped() allows to test whether or not a
traditional request queue has been stopped. Introduce a helper
function that allows block drivers to query easily whether or not
one or more hardware contexts of a blk-mq queue have been stopped.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blk-mq.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 535ab2e13d2e..aa930009fcd3 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -223,6 +223,7 @@ void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs
 void blk_mq_abort_requeue_list(struct request_queue *q);
 void blk_mq_complete_request(struct request *rq, int error);
 
+bool blk_mq_queue_stopped(struct request_queue *q);
 void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx);
 void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx);
 void blk_mq_stop_hw_queues(struct request_queue *q);
-- 
cgit v1.2.3


From 9b7dd572cc439fa92e120290eb74d0295567c5a0 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Fri, 28 Oct 2016 17:20:49 -0700
Subject: blk-mq: Remove blk_mq_cancel_requeue_work()

Since blk_mq_requeue_work() no longer restarts stopped queues
canceling requeue work is no longer needed to prevent that a
stopped queue would be restarted. Hence remove this function.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Mike Snitzer <snitzer@redhat.com>
Cc: Keith Busch <keith.busch@intel.com>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blk-mq.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index aa930009fcd3..a85a20f80aaa 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -217,7 +217,6 @@ void __blk_mq_end_request(struct request *rq, int error);
 
 void blk_mq_requeue_request(struct request *rq);
 void blk_mq_add_to_requeue_list(struct request *rq, bool at_head);
-void blk_mq_cancel_requeue_work(struct request_queue *q);
 void blk_mq_kick_requeue_list(struct request_queue *q);
 void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs);
 void blk_mq_abort_requeue_list(struct request_queue *q);
-- 
cgit v1.2.3


From 6a83e74d214a47a1371cd2e6a783264fcba7d428 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Wed, 2 Nov 2016 10:09:51 -0600
Subject: blk-mq: Introduce blk_mq_quiesce_queue()

blk_mq_quiesce_queue() waits until ongoing .queue_rq() invocations
have finished. This function does *not* wait until all outstanding
requests have finished (this means invocation of request.end_io()).
The algorithm used by blk_mq_quiesce_queue() is as follows:
* Hold either an RCU read lock or an SRCU read lock around
  .queue_rq() calls. The former is used if .queue_rq() does not
  block and the latter if .queue_rq() may block.
* blk_mq_quiesce_queue() first calls blk_mq_stop_hw_queues()
  followed by synchronize_srcu() or synchronize_rcu(). The latter
  call waits for .queue_rq() invocations that started before
  blk_mq_quiesce_queue() was called.
* The blk_mq_hctx_stopped() calls that control whether or not
  .queue_rq() will be called are called with the (S)RCU read lock
  held. This is necessary to avoid race conditions against
  blk_mq_quiesce_queue().

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Ming Lei <tom.leiming@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blk-mq.h | 3 +++
 include/linux/blkdev.h | 1 +
 2 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index a85a20f80aaa..ed20ac74c62a 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -3,6 +3,7 @@
 
 #include <linux/blkdev.h>
 #include <linux/sbitmap.h>
+#include <linux/srcu.h>
 
 struct blk_mq_tags;
 struct blk_flush_queue;
@@ -35,6 +36,8 @@ struct blk_mq_hw_ctx {
 
 	struct blk_mq_tags	*tags;
 
+	struct srcu_struct	queue_rq_srcu;
+
 	unsigned long		queued;
 	unsigned long		run;
 #define BLK_MQ_MAX_DISPATCH_ORDER	7
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8396da2bb698..13d893a69b46 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -918,6 +918,7 @@ extern void __blk_run_queue(struct request_queue *q);
 extern void __blk_run_queue_uncond(struct request_queue *q);
 extern void blk_run_queue(struct request_queue *);
 extern void blk_run_queue_async(struct request_queue *q);
+extern void blk_mq_quiesce_queue(struct request_queue *q);
 extern int blk_rq_map_user(struct request_queue *, struct request *,
 			   struct rq_map_data *, void __user *, unsigned long,
 			   gfp_t);
-- 
cgit v1.2.3


From 2b053aca76b48e681be57b34ca3a8c2c10b275c5 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bart.vanassche@sandisk.com>
Date: Fri, 28 Oct 2016 17:21:41 -0700
Subject: blk-mq: Add a kick_requeue_list argument to blk_mq_requeue_request()

Most blk_mq_requeue_request() and blk_mq_add_to_requeue_list() calls
are followed by kicking the requeue list. Hence add an argument to
these two functions that allows to kick the requeue list. This was
proposed by Christoph Hellwig.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blk-mq.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index ed20ac74c62a..35a0af5ede6d 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -218,8 +218,9 @@ void blk_mq_start_request(struct request *rq);
 void blk_mq_end_request(struct request *rq, int error);
 void __blk_mq_end_request(struct request *rq, int error);
 
-void blk_mq_requeue_request(struct request *rq);
-void blk_mq_add_to_requeue_list(struct request *rq, bool at_head);
+void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list);
+void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
+				bool kick_requeue_list);
 void blk_mq_kick_requeue_list(struct request_queue *q);
 void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs);
 void blk_mq_abort_requeue_list(struct request_queue *q);
-- 
cgit v1.2.3


From 50d24c34403c62ad29e8b6db559d491bae20b4b7 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Thu, 3 Nov 2016 17:03:53 -0700
Subject: block: immediately dispatch big size request

Currently block plug holds up to 16 non-mergeable requests. This makes
sense if the request size is small, eg, reduce lock contention. But if
request size is big enough, we don't need to worry about lock
contention. Holding such request makes no sense and it lows the disk
utilization.

In practice, this improves 10% throughput for my raid5 sequential write
workload.

The size (128k) is arbitrary right now, but it makes sure lock
contention is small. This probably could be more intelligent, eg, check
average request size holded. Since this is mainly for sequential IO,
probably not worthy.

V2: check the last request instead of the first request, so as long as
there is one big size request we flush the plug.

Signed-off-by: Shaohua Li <shli@fb.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blkdev.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 13d893a69b46..9189a2d5c392 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1173,6 +1173,7 @@ struct blk_plug {
 	struct list_head cb_list; /* md requires an unplug callback */
 };
 #define BLK_MAX_REQUEST_COUNT 16
+#define BLK_PLUG_FLUSH_SIZE (128 * 1024)
 
 struct blk_plug_cb;
 typedef void (*blk_plug_cb_fn)(struct blk_plug_cb *, bool);
-- 
cgit v1.2.3


From d278d4a8892f13b6a9eb6102b356402f0e062324 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Wed, 30 Mar 2016 10:21:08 -0600
Subject: block: add code to track actual device queue depth

For blk-mq, ->nr_requests does track queue depth, at least at init
time. But for the older queue paths, it's simply a soft setting.
On top of that, it's generally larger than the hardware setting
on purpose, to allow backup of requests for merging.

Fill a hole in struct request with a 'queue_depth' member, that
drivers can call to more closely inform the block layer of the
real queue depth.

Signed-off-by: Jens Axboe <axboe@fb.com>
Reviewed-by: Jan Kara <jack@suse.cz>
---
 include/linux/blkdev.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 9189a2d5c392..d364be6e6959 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -405,6 +405,8 @@ struct request_queue {
 	struct blk_mq_ctx __percpu	*queue_ctx;
 	unsigned int		nr_queues;
 
+	unsigned int		queue_depth;
+
 	/* hw dispatch queues */
 	struct blk_mq_hw_ctx	**queue_hw_ctx;
 	unsigned int		nr_hw_queues;
@@ -777,6 +779,14 @@ static inline bool blk_write_same_mergeable(struct bio *a, struct bio *b)
 	return false;
 }
 
+static inline unsigned int blk_queue_depth(struct request_queue *q)
+{
+	if (q->queue_depth)
+		return q->queue_depth;
+
+	return q->nr_requests;
+}
+
 /*
  * q->prep_rq_fn return values
  */
@@ -1094,6 +1104,7 @@ extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min);
 extern void blk_queue_io_min(struct request_queue *q, unsigned int min);
 extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt);
 extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt);
+extern void blk_set_queue_depth(struct request_queue *q, unsigned int depth);
 extern void blk_set_default_limits(struct queue_limits *lim);
 extern void blk_set_stacking_limits(struct queue_limits *lim);
 extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
-- 
cgit v1.2.3


From b57d74aff9ab92fbfb7c197c384d1adfa2827b2e Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Thu, 1 Sep 2016 10:20:33 -0600
Subject: writeback: track if we're sleeping on progress in
 balance_dirty_pages()

Note in the bdi_writeback structure whenever a task ends up sleeping
waiting for progress. We can use that information in the lower layers
to increase the priority of writes.

Signed-off-by: Jens Axboe <axboe@fb.com>
Reviewed-by: Jan Kara <jack@suse.cz>
---
 include/linux/backing-dev-defs.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index c357f27d5483..dc5f76d7f648 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -116,6 +116,8 @@ struct bdi_writeback {
 	struct list_head work_list;
 	struct delayed_work dwork;	/* work item used for writeback */
 
+	unsigned long dirty_sleep;	/* last wait */
+
 	struct list_head bdi_node;	/* anchored at bdi->wb_list */
 
 #ifdef CONFIG_CGROUP_WRITEBACK
-- 
cgit v1.2.3


From d49187e97e94e2eb613cb6fed810356972077cc3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 10 Nov 2016 07:32:33 -0800
Subject: nvme: introduce struct nvme_request

This adds a shared per-request structure for all NVMe I/O.  This structure
is embedded as the first member in all NVMe transport drivers request
private data and allows to implement common functionality between the
drivers.

The first use is to replace the current abuse of the SCSI command
passthrough fields in struct request for the NVMe command passthrough,
but it will grow a field more fields to allow implementing things
like common abort handlers in the future.

The passthrough commands are handled by having a pointer to the SQE
(struct nvme_command) in struct nvme_request, and the union of the
possible result fields, which had to be turned from an anonymous
into a named union for that purpose.  This avoids having to pass
a reference to a full CQE around and thus makes checking the result
a lot more lightweight.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/nvme.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 7676557ce357..18ce9f7cc881 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -949,11 +949,11 @@ struct nvme_completion {
 	/*
 	 * Used by Admin and Fabrics commands to return data:
 	 */
-	union {
-		__le16	result16;
-		__le32	result;
-		__le64	result64;
-	};
+	union nvme_result {
+		__le16	u16;
+		__le32	u32;
+		__le64	u64;
+	} result;
 	__le16	sq_head;	/* how much of this queue may be reclaimed */
 	__le16	sq_id;		/* submission queue that generated this entry */
 	__u16	command_id;	/* of the command which completed */
-- 
cgit v1.2.3


From cf43e6be865a582ba66ee4747ae27a0513f6bba1 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Mon, 7 Nov 2016 21:32:37 -0700
Subject: block: add scalable completion tracking of requests

For legacy block, we simply track them in the request queue. For
blk-mq, we track them on a per-sw queue basis, which we can then
sum up through the hardware queues and finally to a per device
state.

The stats are tracked in, roughly, 0.1s interval windows.

Add sysfs files to display the stats.

The feature is off by default, to avoid any extra overhead. In-kernel
users of it can turn it on by setting QUEUE_FLAG_STATS in the queue
flags. We currently don't turn it on if someone just reads any of
the stats files, that is something we could add as well.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blk_types.h | 16 ++++++++++++++++
 include/linux/blkdev.h    |  7 +++++++
 2 files changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 562ac46cb790..4d0044d09984 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -250,4 +250,20 @@ static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie)
 	return cookie & ((1u << BLK_QC_T_SHIFT) - 1);
 }
 
+struct blk_issue_stat {
+	u64 time;
+};
+
+#define BLK_RQ_STAT_BATCH	64
+
+struct blk_rq_stat {
+	s64 mean;
+	u64 min;
+	u64 max;
+	s32 nr_samples;
+	s32 nr_batch;
+	u64 batch;
+	s64 time;
+};
+
 #endif /* __LINUX_BLK_TYPES_H */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d364be6e6959..303723a2e5b8 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -117,6 +117,8 @@ typedef __u32 __bitwise req_flags_t;
 #define RQF_PM			((__force req_flags_t)(1 << 15))
 /* on IO scheduler merge hash */
 #define RQF_HASHED		((__force req_flags_t)(1 << 16))
+/* IO stats tracking on */
+#define RQF_STATS		((__force req_flags_t)(1 << 17))
 
 /* flags that prevent us from merging requests: */
 #define RQF_NOMERGE_FLAGS \
@@ -197,6 +199,7 @@ struct request {
 	struct gendisk *rq_disk;
 	struct hd_struct *part;
 	unsigned long start_time;
+	struct blk_issue_stat issue_stat;
 #ifdef CONFIG_BLK_CGROUP
 	struct request_list *rl;		/* rl this rq is alloced from */
 	unsigned long long start_time_ns;
@@ -492,6 +495,9 @@ struct request_queue {
 
 	unsigned int		nr_sorted;
 	unsigned int		in_flight[2];
+
+	struct blk_rq_stat	rq_stats[2];
+
 	/*
 	 * Number of active block driver functions for which blk_drain_queue()
 	 * must wait. Must be incremented around functions that unlock the
@@ -585,6 +591,7 @@ struct request_queue {
 #define QUEUE_FLAG_FUA	       24	/* device supports FUA writes */
 #define QUEUE_FLAG_FLUSH_NQ    25	/* flush not queueuable */
 #define QUEUE_FLAG_DAX         26	/* device supports DAX */
+#define QUEUE_FLAG_STATS       27	/* track rq completion times */
 
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_STACKABLE)	|	\
-- 
cgit v1.2.3


From 87760e5eef359788047d6fd54fc12eec74ce0d27 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Wed, 9 Nov 2016 12:38:14 -0700
Subject: block: hook up writeback throttling

Enable throttling of buffered writeback to make it a lot
more smooth, and has way less impact on other system activity.
Background writeback should be, by definition, background
activity. The fact that we flush huge bundles of it at the time
means that it potentially has heavy impacts on foreground workloads,
which isn't ideal. We can't easily limit the sizes of writes that
we do, since that would impact file system layout in the presence
of delayed allocation. So just throttle back buffered writeback,
unless someone is waiting for it.

The algorithm for when to throttle takes its inspiration in the
CoDel networking scheduling algorithm. Like CoDel, blk-wb monitors
the minimum latencies of requests over a window of time. In that
window of time, if the minimum latency of any request exceeds a
given target, then a scale count is incremented and the queue depth
is shrunk. The next monitoring window is shrunk accordingly. Unlike
CoDel, if we hit a window that exhibits good behavior, then we
simply increment the scale count and re-calculate the limits for that
scale value. This prevents us from oscillating between a
close-to-ideal value and max all the time, instead remaining in the
windows where we get good behavior.

Unlike CoDel, blk-wb allows the scale count to to negative. This
happens if we primarily have writes going on. Unlike positive
scale counts, this doesn't change the size of the monitoring window.
When the heavy writers finish, blk-bw quickly snaps back to it's
stable state of a zero scale count.

The patch registers a sysfs entry, 'wb_lat_usec'. This sets the latency
target to me met. It defaults to 2 msec for non-rotational storage, and
75 msec for rotational storage. Setting this value to '0' disables
blk-wb. Generally, a user would not have to touch this setting.

We don't enable WBT on devices that are managed with CFQ, and have
a non-root block cgroup attached. If we have a proportional share setup
on this particular disk, then the wbt throttling will interfere with
that. We don't have a strong need for wbt for that case, since we will
rely on CFQ doing that for us.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blkdev.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 303723a2e5b8..15da9e430f90 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -38,6 +38,7 @@ struct bsg_job;
 struct blkcg_gq;
 struct blk_flush_queue;
 struct pr_ops;
+struct rq_wb;
 
 #define BLKDEV_MIN_RQ	4
 #define BLKDEV_MAX_RQ	128	/* Default maximum */
@@ -383,6 +384,8 @@ struct request_queue {
 	int			nr_rqs[2];	/* # allocated [a]sync rqs */
 	int			nr_rqs_elvpriv;	/* # allocated rqs w/ elvpriv */
 
+	struct rq_wb		*rq_wb;
+
 	/*
 	 * If blkcg is not used, @q->root_rl serves all requests.  If blkcg
 	 * is used, root blkg allocates from @q->root_rl and all other
-- 
cgit v1.2.3


From bbd7bb7017d5c2b1e75f3818b4ce88fa58bb0eab Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Fri, 4 Nov 2016 09:34:34 -0600
Subject: block: move poll code to blk-mq

The poll code is blk-mq specific, let's move it to blk-mq.c. This
is a prep patch for improving the polling code.

Signed-off-by: Jens Axboe <axboe@fb.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/blkdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 15da9e430f90..bab18ee5810d 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -952,7 +952,7 @@ extern int blk_execute_rq(struct request_queue *, struct gendisk *,
 extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
 				  struct request *, int, rq_end_io_fn *);
 
-bool blk_poll(struct request_queue *q, blk_qc_t cookie);
+bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie);
 
 static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
 {
-- 
cgit v1.2.3


From 06426adf072bca62ac31ea396ff2159a34f276c2 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Mon, 14 Nov 2016 13:01:59 -0700
Subject: blk-mq: implement hybrid poll mode for sync O_DIRECT

This patch enables a hybrid polling mode. Instead of polling after IO
submission, we can induce an artificial delay, and then poll after that.
For example, if the IO is presumed to complete in 8 usecs from now, we
can sleep for 4 usecs, wake up, and then do our polling. This still puts
a sleep/wakeup cycle in the IO path, but instead of the wakeup happening
after the IO has completed, it'll happen before. With this hybrid
scheme, we can achieve big latency reductions while still using the same
(or less) amount of CPU.

Signed-off-by: Jens Axboe <axboe@fb.com>
Tested-By: Stephen Bates <sbates@raithlin.com>
Reviewed-By: Stephen Bates <sbates@raithlin.com>
---
 include/linux/blkdev.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index bab18ee5810d..37ed4ea705c8 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -509,6 +509,7 @@ struct request_queue {
 	unsigned int		request_fn_active;
 
 	unsigned int		rq_timeout;
+	unsigned int		poll_nsec;
 	struct timer_list	timeout;
 	struct work_struct	timeout_work;
 	struct list_head	timeout_list;
-- 
cgit v1.2.3


From 64f1c21e86f7fe63337b5c23c129de3ec506431d Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Mon, 14 Nov 2016 13:03:03 -0700
Subject: blk-mq: make the polling code adaptive

The previous commit introduced the hybrid sleep/poll mode. Take
that one step further, and use the completion latencies to
automatically sleep for half the mean completion time. This is
a good approximation.

This changes the 'io_poll_delay' sysfs file a bit to expose the
various options. Depending on the value, the polling code will
behave differently:

-1	Never enter hybrid sleep mode
 0	Use half of the completion mean for the sleep delay
>0	Use this specific value as the sleep delay

Signed-off-by: Jens Axboe <axboe@fb.com>
Tested-By: Stephen Bates <sbates@raithlin.com>
Reviewed-By: Stephen Bates <sbates@raithlin.com>
---
 include/linux/blkdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 37ed4ea705c8..85699bc90a51 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -509,7 +509,7 @@ struct request_queue {
 	unsigned int		request_fn_active;
 
 	unsigned int		rq_timeout;
-	unsigned int		poll_nsec;
+	int			poll_nsec;
 	struct timer_list	timeout;
 	struct work_struct	timeout_work;
 	struct list_head	timeout_list;
-- 
cgit v1.2.3


From 9a05e7541c39680d28ecf91892338e074738d5fd Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Fri, 18 Nov 2016 15:16:06 +0100
Subject: block: Change extern inline to static inline

With compilers which follow the C99 standard (like modern versions of
gcc and clang), "extern inline" does the opposite thing from older
versions of gcc (emits code for an externally linkable version of the
inline function).

"static inline" does the intended behavior in all cases instead.

Description taken from commit 6d91857d4826 ("staging, rtl8192e,
LLVMLinux: Change extern inline to static inline").

This also fixes the following GCC warning when building with CONFIG_PM
disabled:

  ./include/linux/blkdev.h:1143:20: warning: no previous prototype for 'blk_set_runtime_active' [-Wmissing-prototypes]

Fixes: d07ab6d11477 ("block: Add blk_set_runtime_active()")
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blkdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 85699bc90a51..541fdd8787a5 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1174,7 +1174,7 @@ static inline int blk_pre_runtime_suspend(struct request_queue *q)
 static inline void blk_post_runtime_suspend(struct request_queue *q, int err) {}
 static inline void blk_pre_runtime_resume(struct request_queue *q) {}
 static inline void blk_post_runtime_resume(struct request_queue *q, int err) {}
-extern inline void blk_set_runtime_active(struct request_queue *q) {}
+static inline void blk_set_runtime_active(struct request_queue *q) {}
 #endif
 
 /*
-- 
cgit v1.2.3


From 93c5bdf7ab71bbdae27f8f51fa175e06f000d69d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 21 Nov 2016 16:18:18 +0100
Subject: block: clear all of bi_opf in bio_set_op_attrs

Since commit 87374179 ("block: add a proper block layer data direction
encoding") we only or the new op and flags into bi_opf in bio_set_op_attrs
instead of clearing the old value.  I've not seen any breakage with the
new behavior, but it seems dangerous.

Also convert it to an inline function to make the argument passing
safer.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blk_types.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 4d0044d09984..f57458a6a93b 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -207,8 +207,11 @@ enum req_flag_bits {
 	((req)->cmd_flags & REQ_OP_MASK)
 
 /* obsolete, don't use in new code */
-#define bio_set_op_attrs(bio, op, op_flags) \
-	((bio)->bi_opf |= (op | op_flags))
+static inline void bio_set_op_attrs(struct bio *bio, unsigned op,
+		unsigned op_flags)
+{
+	bio->bi_opf = op | op_flags;
+}
 
 static inline bool op_is_write(unsigned int op)
 {
-- 
cgit v1.2.3


From 3a83f4677539bce8eaa2bca9ee9c20e172d7ab04 Mon Sep 17 00:00:00 2001
From: Ming Lei <tom.leiming@gmail.com>
Date: Tue, 22 Nov 2016 08:57:21 -0700
Subject: block: bio: pass bvec table to bio_init()

Some drivers often use external bvec table, so introduce
this helper for this case. It is always safe to access the
bio->bi_io_vec in this way for this case.

After converting to this usage, it will becomes a bit easier
to evaluate the remaining direct access to bio->bi_io_vec,
so it can help to prepare for the following multipage bvec
support.

Signed-off-by: Ming Lei <tom.leiming@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>

Fixed up the new O_DIRECT cases.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/bio.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index d367cd37a7f7..70a7244f08a7 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -420,7 +420,8 @@ extern int bio_phys_segments(struct request_queue *, struct bio *);
 extern int submit_bio_wait(struct bio *bio);
 extern void bio_advance(struct bio *, unsigned);
 
-extern void bio_init(struct bio *);
+extern void bio_init(struct bio *bio, struct bio_vec *table,
+		     unsigned short max_vecs);
 extern void bio_reset(struct bio *);
 void bio_chain(struct bio *, struct bio *);
 
-- 
cgit v1.2.3


From 3dc87dd048dc442bab633e85bfb96c893612d765 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= <m@bjorling.me>
Date: Mon, 28 Nov 2016 22:38:53 +0100
Subject: nvme: lightnvm: attach lightnvm sysfs to nvme block device
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously, LBA read and write were not supported in the lightnvm
specification. Now that it supports it, lets use the traditional
NVMe gendisk, and attach the lightnvm sysfs geometry export.

Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/lightnvm.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index d190786e4ad8..fb2e601d674e 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -352,8 +352,6 @@ struct nvm_dev {
 
 	/* Backend device */
 	struct request_queue *q;
-	struct device dev;
-	struct device *parent_dev;
 	char name[DISK_NAME_LEN];
 	void *private_data;
 
-- 
cgit v1.2.3


From bb3149792e0ed52cf5f457dda4c9bf9c5bda1542 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= <jg@lightnvm.io>
Date: Mon, 28 Nov 2016 22:38:54 +0100
Subject: lightnvm: enable to send hint to erase command
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Erases might be subject to host hints. An example is multi-plane
programming to erase blocks in parallel. Enable targets to specify this
hint.

Signed-off-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/lightnvm.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index fb2e601d674e..d87be02edc39 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -470,8 +470,7 @@ typedef int (nvmm_open_blk_fn)(struct nvm_dev *, struct nvm_block *);
 typedef int (nvmm_close_blk_fn)(struct nvm_dev *, struct nvm_block *);
 typedef void (nvmm_flush_blk_fn)(struct nvm_dev *, struct nvm_block *);
 typedef int (nvmm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
-typedef int (nvmm_erase_blk_fn)(struct nvm_dev *, struct nvm_block *,
-								unsigned long);
+typedef int (nvmm_erase_blk_fn)(struct nvm_dev *, struct nvm_block *, int);
 typedef void (nvmm_mark_blk_fn)(struct nvm_dev *, struct ppa_addr, int);
 typedef struct nvm_lun *(nvmm_get_lun_fn)(struct nvm_dev *, int);
 typedef int (nvmm_reserve_lun)(struct nvm_dev *, int);
@@ -537,8 +536,8 @@ extern void nvm_addr_to_generic_mode(struct nvm_dev *, struct nvm_rq *);
 extern int nvm_set_rqd_ppalist(struct nvm_dev *, struct nvm_rq *,
 					const struct ppa_addr *, int, int);
 extern void nvm_free_rqd_ppalist(struct nvm_dev *, struct nvm_rq *);
-extern int nvm_erase_ppa(struct nvm_dev *, struct ppa_addr *, int);
-extern int nvm_erase_blk(struct nvm_dev *, struct nvm_block *);
+extern int nvm_erase_ppa(struct nvm_dev *, struct ppa_addr *, int, int);
+extern int nvm_erase_blk(struct nvm_dev *, struct nvm_block *, int);
 extern void nvm_end_io(struct nvm_rq *, int);
 extern int nvm_submit_ppa(struct nvm_dev *, struct ppa_addr *, int, int, int,
 								void *, int);
-- 
cgit v1.2.3


From a24ba4644b7ae5af3cd2eb6992c237cb4548c45e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= <jg@lightnvm.io>
Date: Mon, 28 Nov 2016 22:38:56 +0100
Subject: lightnvm: export set bad block table
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bad blocks should be managed by block owners. This would be either
targets for data blocks or sysblk for system blocks.

In order to support this, export two functions: One to mark a block as
an specific type (e.g., bad block) and another to update the bad block
table on the device.

Move bad block management to rrpc.

Signed-off-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/lightnvm.h | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index d87be02edc39..4480d1c6a1a5 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -423,6 +423,15 @@ static inline struct ppa_addr block_to_ppa(struct nvm_dev *dev,
 	return ppa;
 }
 
+static inline int ppa_cmp_blk(struct ppa_addr ppa1, struct ppa_addr ppa2)
+{
+	if (ppa_empty(ppa1) || ppa_empty(ppa2))
+		return 0;
+
+	return ((ppa1.g.ch == ppa2.g.ch) && (ppa1.g.lun == ppa2.g.lun) &&
+					(ppa1.g.blk == ppa2.g.blk));
+}
+
 static inline int ppa_to_slc(struct nvm_dev *dev, int slc_pg)
 {
 	return dev->lptbl[slc_pg];
@@ -528,7 +537,9 @@ extern struct nvm_dev *nvm_alloc_dev(int);
 extern int nvm_register(struct nvm_dev *);
 extern void nvm_unregister(struct nvm_dev *);
 
-void nvm_mark_blk(struct nvm_dev *dev, struct ppa_addr ppa, int type);
+extern void nvm_mark_blk(struct nvm_dev *dev, struct ppa_addr ppa, int type);
+extern int nvm_set_bb_tbl(struct nvm_dev *dev, struct ppa_addr *ppas,
+							int nr_ppas, int type);
 
 extern int nvm_submit_io(struct nvm_dev *, struct nvm_rq *);
 extern void nvm_generic_to_addr_mode(struct nvm_dev *, struct nvm_rq *);
-- 
cgit v1.2.3


From 402ab9a89d7b5bab08a5534027b39d80085ec19b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= <jg@lightnvm.io>
Date: Mon, 28 Nov 2016 22:38:57 +0100
Subject: lightnvm: add ECC error codes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add ECC error codes to enable the appropriate handling in the target.

Signed-off-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/lightnvm.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 4480d1c6a1a5..6b26a3289bce 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -107,6 +107,8 @@ enum {
 	NVM_RSP_NOT_CHANGEABLE	= 0x1,
 	NVM_RSP_ERR_FAILWRITE	= 0x40ff,
 	NVM_RSP_ERR_EMPTYPAGE	= 0x42ff,
+	NVM_RSP_ERR_FAILECC	= 0x4281,
+	NVM_RSP_WARN_HIGHECC	= 0x4700,
 
 	/* Device opcodes */
 	NVM_OP_HBREAD		= 0x02,
-- 
cgit v1.2.3


From 7e4f64a9b3004ce592f21653c3b7781628862232 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= <jg@lightnvm.io>
Date: Mon, 28 Nov 2016 22:39:00 +0100
Subject: lightnvm: cleanup unused target operations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cleanup definition leftovers from old gennvm interface

Signed-off-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/lightnvm.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 6b26a3289bce..e598308882aa 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -477,9 +477,6 @@ typedef int (nvmm_remove_tgt_fn)(struct nvm_dev *, struct nvm_ioctl_remove *);
 typedef struct nvm_block *(nvmm_get_blk_fn)(struct nvm_dev *,
 					      struct nvm_lun *, unsigned long);
 typedef void (nvmm_put_blk_fn)(struct nvm_dev *, struct nvm_block *);
-typedef int (nvmm_open_blk_fn)(struct nvm_dev *, struct nvm_block *);
-typedef int (nvmm_close_blk_fn)(struct nvm_dev *, struct nvm_block *);
-typedef void (nvmm_flush_blk_fn)(struct nvm_dev *, struct nvm_block *);
 typedef int (nvmm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
 typedef int (nvmm_erase_blk_fn)(struct nvm_dev *, struct nvm_block *, int);
 typedef void (nvmm_mark_blk_fn)(struct nvm_dev *, struct ppa_addr, int);
@@ -504,9 +501,6 @@ struct nvmm_type {
 	/* Block administration callbacks */
 	nvmm_get_blk_fn *get_blk;
 	nvmm_put_blk_fn *put_blk;
-	nvmm_open_blk_fn *open_blk;
-	nvmm_close_blk_fn *close_blk;
-	nvmm_flush_blk_fn *flush_blk;
 
 	nvmm_submit_io_fn *submit_io;
 	nvmm_erase_blk_fn *erase_blk;
-- 
cgit v1.2.3


From 0e5c3246dbb96b6870634e7d51b2490f05c976cf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= <jg@lightnvm.io>
Date: Mon, 28 Nov 2016 22:39:01 +0100
Subject: lightnvm: make address conversion functions global
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Targets are assumed to used the same generic ppa format, where the
address is partitioned on ch:lun:block:pg:pl:sec. Thus, make the
function in charge of transforming the ppa address from a linear format
to the generic one available to all targets.

This function will be needed by the media manager in order to do target
mapping translations when targets are divided on different physical
partitions.

Signed-off-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/lightnvm.h | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index e598308882aa..98278a9fcb1f 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -361,6 +361,36 @@ struct nvm_dev {
 	spinlock_t lock;
 };
 
+static inline struct ppa_addr linear_to_generic_addr(struct nvm_dev *dev,
+							struct ppa_addr r)
+{
+	struct ppa_addr l;
+	int secs, pgs, blks, luns;
+	sector_t ppa = r.ppa;
+
+	l.ppa = 0;
+
+	div_u64_rem(ppa, dev->sec_per_pg, &secs);
+	l.g.sec = secs;
+
+	sector_div(ppa, dev->sec_per_pg);
+	div_u64_rem(ppa, dev->pgs_per_blk, &pgs);
+	l.g.pg = pgs;
+
+	sector_div(ppa, dev->pgs_per_blk);
+	div_u64_rem(ppa, dev->blks_per_lun, &blks);
+	l.g.blk = blks;
+
+	sector_div(ppa, dev->blks_per_lun);
+	div_u64_rem(ppa, dev->luns_per_chnl, &luns);
+	l.g.lun = luns;
+
+	sector_div(ppa, dev->luns_per_chnl);
+	l.g.ch = ppa;
+
+	return l;
+}
+
 static inline struct ppa_addr generic_to_dev_addr(struct nvm_dev *dev,
 						struct ppa_addr r)
 {
-- 
cgit v1.2.3


From de93434fcf74d41754a48e45365a5914e00bc0be Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= <jg@lightnvm.io>
Date: Mon, 28 Nov 2016 22:39:04 +0100
Subject: lightnvm: remove gen_lun abstraction
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The gen_lun abstraction in the generic media manager was conceived on
the assumption that a single target would instantiated on top of it.
This has complicated target design to implement multi-instances. Remove
this abstraction and move its logic to nvm_lun, which manages physical
lun geometry and operations.

Signed-off-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/lightnvm.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 98278a9fcb1f..33940bdc18a9 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -275,7 +275,17 @@ struct nvm_lun {
 
 	spinlock_t lock;
 
+	/* lun block lists */
+	struct list_head used_list;	/* In-use blocks */
+	struct list_head free_list;	/* Not used blocks i.e. released
+					 * and ready for use
+					 */
+	struct list_head bb_list;	/* Bad blocks. Mutually exclusive with
+					 * free_list and used_list
+					 */
 	unsigned int nr_free_blocks;	/* Number of unused blocks */
+	int reserved_blocks;
+
 	struct nvm_block *blocks;
 };
 
-- 
cgit v1.2.3


From 8176117b82e49e043d045f214ba7a892fba6b827 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= <jg@lightnvm.io>
Date: Mon, 28 Nov 2016 22:39:05 +0100
Subject: lightnvm: manage lun partitions internally in mm
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

LUNs are exclusively owned by targets implementing a block device FTL.
Doing this reservation requires at the moment a 2-way callback gennvm
<-> target. The reason behind this is that LUNs were not assumed to
always be exclusively owned by targets. However, this design decision
goes against I/O determinism QoS (two targets would mix I/O on the same
parallel unit in the device).

This patch makes LUN reservation as part of the target creation on the
media manager. This makes that LUNs are always exclusively owned by the
target instantiated on top of them. LUN stripping and/or sharing should
be implemented on the target itself or the layers on top.

Signed-off-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/lightnvm.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 33940bdc18a9..89c695483d55 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -210,6 +210,7 @@ struct nvm_id {
 
 struct nvm_target {
 	struct list_head list;
+	struct list_head lun_list;
 	struct nvm_dev *dev;
 	struct nvm_tgt_type *type;
 	struct gendisk *disk;
@@ -273,6 +274,7 @@ struct nvm_lun {
 	int lun_id;
 	int chnl_id;
 
+	struct list_head list;
 	spinlock_t lock;
 
 	/* lun block lists */
@@ -521,8 +523,6 @@ typedef int (nvmm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
 typedef int (nvmm_erase_blk_fn)(struct nvm_dev *, struct nvm_block *, int);
 typedef void (nvmm_mark_blk_fn)(struct nvm_dev *, struct ppa_addr, int);
 typedef struct nvm_lun *(nvmm_get_lun_fn)(struct nvm_dev *, int);
-typedef int (nvmm_reserve_lun)(struct nvm_dev *, int);
-typedef void (nvmm_release_lun)(struct nvm_dev *, int);
 typedef void (nvmm_lun_info_print_fn)(struct nvm_dev *);
 
 typedef int (nvmm_get_area_fn)(struct nvm_dev *, sector_t *, sector_t);
@@ -550,8 +550,6 @@ struct nvmm_type {
 
 	/* Configuration management */
 	nvmm_get_lun_fn *get_lun;
-	nvmm_reserve_lun *reserve_lun;
-	nvmm_release_lun *release_lun;
 
 	/* Statistics */
 	nvmm_lun_info_print_fn *lun_info_print;
-- 
cgit v1.2.3


From 8e79b5cb1d3b8eceaf6862995952dd4de431dd99 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= <jg@lightnvm.io>
Date: Mon, 28 Nov 2016 22:39:06 +0100
Subject: lightnvm: move block provisioning to targets
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In order to naturally support multi-target instances on an Open-Channel
SSD, targets should own the LUNs they get blocks from and manage
provisioning internally. This is done in several steps.

This patch moves the block provisioning inside of the target and removes
the get/put block interface from the media manager.

Signed-off-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/lightnvm.h | 133 +++++++++++++++++++++++++----------------------
 1 file changed, 72 insertions(+), 61 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 89c695483d55..1f1588c2557e 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -211,7 +211,7 @@ struct nvm_id {
 struct nvm_target {
 	struct list_head list;
 	struct list_head lun_list;
-	struct nvm_dev *dev;
+	struct nvm_tgt_dev *dev;
 	struct nvm_tgt_type *type;
 	struct gendisk *disk;
 };
@@ -286,7 +286,6 @@ struct nvm_lun {
 					 * free_list and used_list
 					 */
 	unsigned int nr_free_blocks;	/* Number of unused blocks */
-	int reserved_blocks;
 
 	struct nvm_block *blocks;
 };
@@ -315,22 +314,12 @@ struct nvm_sb_info {
 	struct ppa_addr		fs_ppa;
 };
 
-struct nvm_dev {
-	struct nvm_dev_ops *ops;
-
-	struct list_head devices;
-
-	/* Media manager */
-	struct nvmm_type *mt;
-	void *mp;
-
-	/* System blocks */
-	struct nvm_sb_info sb;
-
-	/* Device information */
+/* Device generic information */
+struct nvm_geo {
 	int nr_chnls;
+	int nr_luns;
+	int luns_per_chnl; /* -1 if channels are not symmetric */
 	int nr_planes;
-	int luns_per_chnl;
 	int sec_per_pg; /* only sectors for a single page */
 	int pgs_per_blk;
 	int blks_per_lun;
@@ -350,14 +339,43 @@ struct nvm_dev {
 	int sec_per_pl; /* all sectors across planes */
 	int sec_per_blk;
 	int sec_per_lun;
+};
+
+struct nvm_tgt_dev {
+	/* Device information */
+	struct nvm_geo geo;
+
+	sector_t total_secs;
+
+	struct nvm_id identity;
+	struct request_queue *q;
+
+	struct nvmm_type *mt;
+	struct nvm_dev_ops *ops;
+
+	void *parent;
+};
+
+struct nvm_dev {
+	struct nvm_dev_ops *ops;
+
+	struct list_head devices;
+
+	/* Media manager */
+	struct nvmm_type *mt;
+	void *mp;
+
+	/* System blocks */
+	struct nvm_sb_info sb;
+
+	/* Device information */
+	struct nvm_geo geo;
 
 	/* lower page table */
 	int lps_per_blk;
 	int *lptbl;
 
-	unsigned long total_blocks;
 	unsigned long total_secs;
-	int nr_luns;
 
 	unsigned long *lun_map;
 	void *dma_pool;
@@ -373,7 +391,7 @@ struct nvm_dev {
 	spinlock_t lock;
 };
 
-static inline struct ppa_addr linear_to_generic_addr(struct nvm_dev *dev,
+static inline struct ppa_addr linear_to_generic_addr(struct nvm_geo *geo,
 							struct ppa_addr r)
 {
 	struct ppa_addr l;
@@ -382,22 +400,22 @@ static inline struct ppa_addr linear_to_generic_addr(struct nvm_dev *dev,
 
 	l.ppa = 0;
 
-	div_u64_rem(ppa, dev->sec_per_pg, &secs);
+	div_u64_rem(ppa, geo->sec_per_pg, &secs);
 	l.g.sec = secs;
 
-	sector_div(ppa, dev->sec_per_pg);
-	div_u64_rem(ppa, dev->pgs_per_blk, &pgs);
+	sector_div(ppa, geo->sec_per_pg);
+	div_u64_rem(ppa, geo->pgs_per_blk, &pgs);
 	l.g.pg = pgs;
 
-	sector_div(ppa, dev->pgs_per_blk);
-	div_u64_rem(ppa, dev->blks_per_lun, &blks);
+	sector_div(ppa, geo->pgs_per_blk);
+	div_u64_rem(ppa, geo->blks_per_lun, &blks);
 	l.g.blk = blks;
 
-	sector_div(ppa, dev->blks_per_lun);
-	div_u64_rem(ppa, dev->luns_per_chnl, &luns);
+	sector_div(ppa, geo->blks_per_lun);
+	div_u64_rem(ppa, geo->luns_per_chnl, &luns);
 	l.g.lun = luns;
 
-	sector_div(ppa, dev->luns_per_chnl);
+	sector_div(ppa, geo->luns_per_chnl);
 	l.g.ch = ppa;
 
 	return l;
@@ -406,14 +424,15 @@ static inline struct ppa_addr linear_to_generic_addr(struct nvm_dev *dev,
 static inline struct ppa_addr generic_to_dev_addr(struct nvm_dev *dev,
 						struct ppa_addr r)
 {
+	struct nvm_geo *geo = &dev->geo;
 	struct ppa_addr l;
 
-	l.ppa = ((u64)r.g.blk) << dev->ppaf.blk_offset;
-	l.ppa |= ((u64)r.g.pg) << dev->ppaf.pg_offset;
-	l.ppa |= ((u64)r.g.sec) << dev->ppaf.sect_offset;
-	l.ppa |= ((u64)r.g.pl) << dev->ppaf.pln_offset;
-	l.ppa |= ((u64)r.g.lun) << dev->ppaf.lun_offset;
-	l.ppa |= ((u64)r.g.ch) << dev->ppaf.ch_offset;
+	l.ppa = ((u64)r.g.blk) << geo->ppaf.blk_offset;
+	l.ppa |= ((u64)r.g.pg) << geo->ppaf.pg_offset;
+	l.ppa |= ((u64)r.g.sec) << geo->ppaf.sect_offset;
+	l.ppa |= ((u64)r.g.pl) << geo->ppaf.pln_offset;
+	l.ppa |= ((u64)r.g.lun) << geo->ppaf.lun_offset;
+	l.ppa |= ((u64)r.g.ch) << geo->ppaf.ch_offset;
 
 	return l;
 }
@@ -421,24 +440,25 @@ static inline struct ppa_addr generic_to_dev_addr(struct nvm_dev *dev,
 static inline struct ppa_addr dev_to_generic_addr(struct nvm_dev *dev,
 						struct ppa_addr r)
 {
+	struct nvm_geo *geo = &dev->geo;
 	struct ppa_addr l;
 
 	l.ppa = 0;
 	/*
 	 * (r.ppa << X offset) & X len bitmask. X eq. blk, pg, etc.
 	 */
-	l.g.blk = (r.ppa >> dev->ppaf.blk_offset) &
-					(((1 << dev->ppaf.blk_len) - 1));
-	l.g.pg |= (r.ppa >> dev->ppaf.pg_offset) &
-					(((1 << dev->ppaf.pg_len) - 1));
-	l.g.sec |= (r.ppa >> dev->ppaf.sect_offset) &
-					(((1 << dev->ppaf.sect_len) - 1));
-	l.g.pl |= (r.ppa >> dev->ppaf.pln_offset) &
-					(((1 << dev->ppaf.pln_len) - 1));
-	l.g.lun |= (r.ppa >> dev->ppaf.lun_offset) &
-					(((1 << dev->ppaf.lun_len) - 1));
-	l.g.ch |= (r.ppa >> dev->ppaf.ch_offset) &
-					(((1 << dev->ppaf.ch_len) - 1));
+	l.g.blk = (r.ppa >> geo->ppaf.blk_offset) &
+					(((1 << geo->ppaf.blk_len) - 1));
+	l.g.pg |= (r.ppa >> geo->ppaf.pg_offset) &
+					(((1 << geo->ppaf.pg_len) - 1));
+	l.g.sec |= (r.ppa >> geo->ppaf.sect_offset) &
+					(((1 << geo->ppaf.sect_len) - 1));
+	l.g.pl |= (r.ppa >> geo->ppaf.pln_offset) &
+					(((1 << geo->ppaf.pln_len) - 1));
+	l.g.lun |= (r.ppa >> geo->ppaf.lun_offset) &
+					(((1 << geo->ppaf.lun_len) - 1));
+	l.g.ch |= (r.ppa >> geo->ppaf.ch_offset) &
+					(((1 << geo->ppaf.ch_len) - 1));
 
 	return l;
 }
@@ -456,11 +476,12 @@ static inline void ppa_set_empty(struct ppa_addr *ppa_addr)
 static inline struct ppa_addr block_to_ppa(struct nvm_dev *dev,
 							struct nvm_block *blk)
 {
+	struct nvm_geo *geo = &dev->geo;
 	struct ppa_addr ppa;
 	struct nvm_lun *lun = blk->lun;
 
 	ppa.ppa = 0;
-	ppa.g.blk = blk->id % dev->blks_per_lun;
+	ppa.g.blk = blk->id % geo->blks_per_lun;
 	ppa.g.lun = lun->lun_id;
 	ppa.g.ch = lun->chnl_id;
 
@@ -483,7 +504,8 @@ static inline int ppa_to_slc(struct nvm_dev *dev, int slc_pg)
 
 typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *);
 typedef sector_t (nvm_tgt_capacity_fn)(void *);
-typedef void *(nvm_tgt_init_fn)(struct nvm_dev *, struct gendisk *, int, int);
+typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *, int,
+				int);
 typedef void (nvm_tgt_exit_fn)(void *);
 
 struct nvm_tgt_type {
@@ -516,9 +538,6 @@ typedef void (nvmm_unregister_fn)(struct nvm_dev *);
 
 typedef int (nvmm_create_tgt_fn)(struct nvm_dev *, struct nvm_ioctl_create *);
 typedef int (nvmm_remove_tgt_fn)(struct nvm_dev *, struct nvm_ioctl_remove *);
-typedef struct nvm_block *(nvmm_get_blk_fn)(struct nvm_dev *,
-					      struct nvm_lun *, unsigned long);
-typedef void (nvmm_put_blk_fn)(struct nvm_dev *, struct nvm_block *);
 typedef int (nvmm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
 typedef int (nvmm_erase_blk_fn)(struct nvm_dev *, struct nvm_block *, int);
 typedef void (nvmm_mark_blk_fn)(struct nvm_dev *, struct ppa_addr, int);
@@ -538,10 +557,6 @@ struct nvmm_type {
 	nvmm_create_tgt_fn *create_tgt;
 	nvmm_remove_tgt_fn *remove_tgt;
 
-	/* Block administration callbacks */
-	nvmm_get_blk_fn *get_blk;
-	nvmm_put_blk_fn *put_blk;
-
 	nvmm_submit_io_fn *submit_io;
 	nvmm_erase_blk_fn *erase_blk;
 
@@ -563,10 +578,6 @@ struct nvmm_type {
 extern int nvm_register_mgr(struct nvmm_type *);
 extern void nvm_unregister_mgr(struct nvmm_type *);
 
-extern struct nvm_block *nvm_get_blk(struct nvm_dev *, struct nvm_lun *,
-								unsigned long);
-extern void nvm_put_blk(struct nvm_dev *, struct nvm_block *);
-
 extern struct nvm_dev *nvm_alloc_dev(int);
 extern int nvm_register(struct nvm_dev *);
 extern void nvm_unregister(struct nvm_dev *);
@@ -611,10 +622,10 @@ extern int nvm_init_sysblock(struct nvm_dev *, struct nvm_sb_info *);
 
 extern int nvm_dev_factory(struct nvm_dev *, int flags);
 
-#define nvm_for_each_lun_ppa(dev, ppa, chid, lunid)			\
-	for ((chid) = 0, (ppa).ppa = 0; (chid) < (dev)->nr_chnls;	\
+#define nvm_for_each_lun_ppa(geo, ppa, chid, lunid)			\
+	for ((chid) = 0, (ppa).ppa = 0; (chid) < (geo)->nr_chnls;	\
 					(chid)++, (ppa).g.ch = (chid))	\
-		for ((lunid) = 0; (lunid) < (dev)->luns_per_chnl;	\
+		for ((lunid) = 0; (lunid) < (geo)->luns_per_chnl;	\
 					(lunid)++, (ppa).g.lun = (lunid))
 
 #else /* CONFIG_NVM */
-- 
cgit v1.2.3


From 0ac4072eb10c9627415eb1ca511121156e20012c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= <jg@lightnvm.io>
Date: Mon, 28 Nov 2016 22:39:07 +0100
Subject: lightnvm: remove get_lun operation on gennvm
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since LUNs are managed internally on the target, there is no need for
the media manager to implement a get_lun operation.

Signed-off-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/lightnvm.h | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 1f1588c2557e..e56c35227249 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -504,8 +504,8 @@ static inline int ppa_to_slc(struct nvm_dev *dev, int slc_pg)
 
 typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *);
 typedef sector_t (nvm_tgt_capacity_fn)(void *);
-typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *, int,
-				int);
+typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *,
+				struct list_head *lun_list);
 typedef void (nvm_tgt_exit_fn)(void *);
 
 struct nvm_tgt_type {
@@ -541,7 +541,6 @@ typedef int (nvmm_remove_tgt_fn)(struct nvm_dev *, struct nvm_ioctl_remove *);
 typedef int (nvmm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
 typedef int (nvmm_erase_blk_fn)(struct nvm_dev *, struct nvm_block *, int);
 typedef void (nvmm_mark_blk_fn)(struct nvm_dev *, struct ppa_addr, int);
-typedef struct nvm_lun *(nvmm_get_lun_fn)(struct nvm_dev *, int);
 typedef void (nvmm_lun_info_print_fn)(struct nvm_dev *);
 
 typedef int (nvmm_get_area_fn)(struct nvm_dev *, sector_t *, sector_t);
@@ -563,9 +562,6 @@ struct nvmm_type {
 	/* Bad block mgmt */
 	nvmm_mark_blk_fn *mark_blk;
 
-	/* Configuration management */
-	nvmm_get_lun_fn *get_lun;
-
 	/* Statistics */
 	nvmm_lun_info_print_fn *lun_info_print;
 
-- 
cgit v1.2.3


From eec44565e9ab13bbf5b48864a68871eabf1115c1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= <jg@lightnvm.io>
Date: Mon, 28 Nov 2016 22:39:08 +0100
Subject: lightnvm: remove debug lun statistics from gennvm
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since LUNs are managed internally on targets, the media manager has no
access to the free LUN lists. Thus, debug functions that show LUN
information on the device should not be implemented on the media
manager, but rather on the target in itself.

Signed-off-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/lightnvm.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index e56c35227249..ed04fa642371 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -541,8 +541,6 @@ typedef int (nvmm_remove_tgt_fn)(struct nvm_dev *, struct nvm_ioctl_remove *);
 typedef int (nvmm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
 typedef int (nvmm_erase_blk_fn)(struct nvm_dev *, struct nvm_block *, int);
 typedef void (nvmm_mark_blk_fn)(struct nvm_dev *, struct ppa_addr, int);
-typedef void (nvmm_lun_info_print_fn)(struct nvm_dev *);
-
 typedef int (nvmm_get_area_fn)(struct nvm_dev *, sector_t *, sector_t);
 typedef void (nvmm_put_area_fn)(struct nvm_dev *, sector_t);
 
@@ -562,9 +560,6 @@ struct nvmm_type {
 	/* Bad block mgmt */
 	nvmm_mark_blk_fn *mark_blk;
 
-	/* Statistics */
-	nvmm_lun_info_print_fn *lun_info_print;
-
 	nvmm_get_area_fn *get_area;
 	nvmm_put_area_fn *put_area;
 
-- 
cgit v1.2.3


From 2a02e627c245bfa987b97707123d7747d7b0e486 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= <jg@lightnvm.io>
Date: Mon, 28 Nov 2016 22:39:09 +0100
Subject: lightnvm: eliminate nvm_block abstraction on mm
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In order to naturally support multi-target instances on an Open-Channel
SSD, targets should own the LUNs they get blocks from and manage
provisioning internally. This is done in several steps.

A part of this transformation is that targets manage their blocks
internally. This patch eliminates the nvm_block abstraction and moves
block management to the target logic. The rrpc target is transformed.

Signed-off-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/lightnvm.h | 48 ++----------------------------------------------
 1 file changed, 2 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index ed04fa642371..cc210cc85c6d 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -266,8 +266,6 @@ static inline void *nvm_rq_to_pdu(struct nvm_rq *rqdata)
 	return rqdata + 1;
 }
 
-struct nvm_block;
-
 struct nvm_lun {
 	int id;
 
@@ -275,19 +273,6 @@ struct nvm_lun {
 	int chnl_id;
 
 	struct list_head list;
-	spinlock_t lock;
-
-	/* lun block lists */
-	struct list_head used_list;	/* In-use blocks */
-	struct list_head free_list;	/* Not used blocks i.e. released
-					 * and ready for use
-					 */
-	struct list_head bb_list;	/* Bad blocks. Mutually exclusive with
-					 * free_list and used_list
-					 */
-	unsigned int nr_free_blocks;	/* Number of unused blocks */
-
-	struct nvm_block *blocks;
 };
 
 enum {
@@ -296,15 +281,6 @@ enum {
 	NVM_BLK_ST_BAD =	0x8,	/* Bad block */
 };
 
-struct nvm_block {
-	struct list_head list;
-	struct nvm_lun *lun;
-	unsigned long id;
-
-	void *priv;
-	int state;
-};
-
 /* system block cpu representation */
 struct nvm_sb_info {
 	unsigned long		seqnr;
@@ -473,21 +449,6 @@ static inline void ppa_set_empty(struct ppa_addr *ppa_addr)
 	ppa_addr->ppa = ADDR_EMPTY;
 }
 
-static inline struct ppa_addr block_to_ppa(struct nvm_dev *dev,
-							struct nvm_block *blk)
-{
-	struct nvm_geo *geo = &dev->geo;
-	struct ppa_addr ppa;
-	struct nvm_lun *lun = blk->lun;
-
-	ppa.ppa = 0;
-	ppa.g.blk = blk->id % geo->blks_per_lun;
-	ppa.g.lun = lun->lun_id;
-	ppa.g.ch = lun->chnl_id;
-
-	return ppa;
-}
-
 static inline int ppa_cmp_blk(struct ppa_addr ppa1, struct ppa_addr ppa2)
 {
 	if (ppa_empty(ppa1) || ppa_empty(ppa2))
@@ -539,8 +500,7 @@ typedef void (nvmm_unregister_fn)(struct nvm_dev *);
 typedef int (nvmm_create_tgt_fn)(struct nvm_dev *, struct nvm_ioctl_create *);
 typedef int (nvmm_remove_tgt_fn)(struct nvm_dev *, struct nvm_ioctl_remove *);
 typedef int (nvmm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
-typedef int (nvmm_erase_blk_fn)(struct nvm_dev *, struct nvm_block *, int);
-typedef void (nvmm_mark_blk_fn)(struct nvm_dev *, struct ppa_addr, int);
+typedef int (nvmm_erase_blk_fn)(struct nvm_dev *, struct ppa_addr *, int);
 typedef int (nvmm_get_area_fn)(struct nvm_dev *, sector_t *, sector_t);
 typedef void (nvmm_put_area_fn)(struct nvm_dev *, sector_t);
 
@@ -557,9 +517,6 @@ struct nvmm_type {
 	nvmm_submit_io_fn *submit_io;
 	nvmm_erase_blk_fn *erase_blk;
 
-	/* Bad block mgmt */
-	nvmm_mark_blk_fn *mark_blk;
-
 	nvmm_get_area_fn *get_area;
 	nvmm_put_area_fn *put_area;
 
@@ -573,7 +530,6 @@ extern struct nvm_dev *nvm_alloc_dev(int);
 extern int nvm_register(struct nvm_dev *);
 extern void nvm_unregister(struct nvm_dev *);
 
-extern void nvm_mark_blk(struct nvm_dev *dev, struct ppa_addr ppa, int type);
 extern int nvm_set_bb_tbl(struct nvm_dev *dev, struct ppa_addr *ppas,
 							int nr_ppas, int type);
 
@@ -584,7 +540,7 @@ extern int nvm_set_rqd_ppalist(struct nvm_dev *, struct nvm_rq *,
 					const struct ppa_addr *, int, int);
 extern void nvm_free_rqd_ppalist(struct nvm_dev *, struct nvm_rq *);
 extern int nvm_erase_ppa(struct nvm_dev *, struct ppa_addr *, int, int);
-extern int nvm_erase_blk(struct nvm_dev *, struct nvm_block *, int);
+extern int nvm_erase_blk(struct nvm_dev *, struct ppa_addr *, int);
 extern void nvm_end_io(struct nvm_rq *, int);
 extern int nvm_submit_ppa(struct nvm_dev *, struct ppa_addr *, int, int, int,
 								void *, int);
-- 
cgit v1.2.3


From 8e53624d44c1de31b1b0d4f500703669418a4c67 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= <jg@lightnvm.io>
Date: Mon, 28 Nov 2016 22:39:10 +0100
Subject: lightnvm: eliminate nvm_lun abstraction in mm
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In order to naturally support multi-target instances on an Open-Channel
SSD, targets should own the LUNs they get blocks from and manage
provisioning internally. This is done in several steps.

Since targets own the LUNs the are instantiated on top of and manage the
free block list internally, there is no need for a LUN abstraction in
the media manager. LUNs are intrinsically managed as in the physical
layout (ch:0,lun:0, ..., ch:0,lun:n, ch:1,lun:0, ch:1,lun:n, ...,
ch:m,lun:0, ch:m,lun:n) and given to the targets based on the target
creation ioctl. This simplifies LUN management and clears the path for a
partition manager to sit directly underneath LightNVM targets.

Signed-off-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/lightnvm.h | 41 ++++++++++++++++++++++-------------------
 1 file changed, 22 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index cc210cc85c6d..2222853ef969 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -47,6 +47,7 @@ struct ppa_addr {
 struct nvm_rq;
 struct nvm_id;
 struct nvm_dev;
+struct nvm_tgt_dev;
 
 typedef int (nvm_l2p_update_fn)(u64, u32, __le64 *, void *);
 typedef int (nvm_id_fn)(struct nvm_dev *, struct nvm_id *);
@@ -210,7 +211,6 @@ struct nvm_id {
 
 struct nvm_target {
 	struct list_head list;
-	struct list_head lun_list;
 	struct nvm_tgt_dev *dev;
 	struct nvm_tgt_type *type;
 	struct gendisk *disk;
@@ -231,7 +231,7 @@ typedef void (nvm_end_io_fn)(struct nvm_rq *);
 
 struct nvm_rq {
 	struct nvm_tgt_instance *ins;
-	struct nvm_dev *dev;
+	struct nvm_tgt_dev *dev;
 
 	struct bio *bio;
 
@@ -266,15 +266,6 @@ static inline void *nvm_rq_to_pdu(struct nvm_rq *rqdata)
 	return rqdata + 1;
 }
 
-struct nvm_lun {
-	int id;
-
-	int lun_id;
-	int chnl_id;
-
-	struct list_head list;
-};
-
 enum {
 	NVM_BLK_ST_FREE =	0x1,	/* Free block */
 	NVM_BLK_ST_TGT =	0x2,	/* Block in use by target */
@@ -321,6 +312,9 @@ struct nvm_tgt_dev {
 	/* Device information */
 	struct nvm_geo geo;
 
+	/* Base ppas for target LUNs */
+	struct ppa_addr *luns;
+
 	sector_t total_secs;
 
 	struct nvm_id identity;
@@ -330,6 +324,7 @@ struct nvm_tgt_dev {
 	struct nvm_dev_ops *ops;
 
 	void *parent;
+	void *map;
 };
 
 struct nvm_dev {
@@ -363,16 +358,18 @@ struct nvm_dev {
 	char name[DISK_NAME_LEN];
 	void *private_data;
 
+	void *rmap;
+
 	struct mutex mlock;
 	spinlock_t lock;
 };
 
 static inline struct ppa_addr linear_to_generic_addr(struct nvm_geo *geo,
-							struct ppa_addr r)
+						     u64 pba)
 {
 	struct ppa_addr l;
 	int secs, pgs, blks, luns;
-	sector_t ppa = r.ppa;
+	sector_t ppa = pba;
 
 	l.ppa = 0;
 
@@ -465,8 +462,7 @@ static inline int ppa_to_slc(struct nvm_dev *dev, int slc_pg)
 
 typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *);
 typedef sector_t (nvm_tgt_capacity_fn)(void *);
-typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *,
-				struct list_head *lun_list);
+typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *);
 typedef void (nvm_tgt_exit_fn)(void *);
 
 struct nvm_tgt_type {
@@ -499,10 +495,11 @@ typedef void (nvmm_unregister_fn)(struct nvm_dev *);
 
 typedef int (nvmm_create_tgt_fn)(struct nvm_dev *, struct nvm_ioctl_create *);
 typedef int (nvmm_remove_tgt_fn)(struct nvm_dev *, struct nvm_ioctl_remove *);
-typedef int (nvmm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
-typedef int (nvmm_erase_blk_fn)(struct nvm_dev *, struct ppa_addr *, int);
+typedef int (nvmm_submit_io_fn)(struct nvm_tgt_dev *, struct nvm_rq *);
+typedef int (nvmm_erase_blk_fn)(struct nvm_tgt_dev *, struct ppa_addr *, int);
 typedef int (nvmm_get_area_fn)(struct nvm_dev *, sector_t *, sector_t);
 typedef void (nvmm_put_area_fn)(struct nvm_dev *, sector_t);
+typedef void (nvmm_part_to_tgt_fn)(struct nvm_dev *, sector_t*, int);
 
 struct nvmm_type {
 	const char *name;
@@ -520,6 +517,8 @@ struct nvmm_type {
 	nvmm_get_area_fn *get_area;
 	nvmm_put_area_fn *put_area;
 
+	nvmm_part_to_tgt_fn *part_to_tgt;
+
 	struct list_head list;
 };
 
@@ -533,14 +532,18 @@ extern void nvm_unregister(struct nvm_dev *);
 extern int nvm_set_bb_tbl(struct nvm_dev *dev, struct ppa_addr *ppas,
 							int nr_ppas, int type);
 
-extern int nvm_submit_io(struct nvm_dev *, struct nvm_rq *);
+extern int nvm_submit_io(struct nvm_tgt_dev *, struct nvm_rq *);
 extern void nvm_generic_to_addr_mode(struct nvm_dev *, struct nvm_rq *);
 extern void nvm_addr_to_generic_mode(struct nvm_dev *, struct nvm_rq *);
 extern int nvm_set_rqd_ppalist(struct nvm_dev *, struct nvm_rq *,
 					const struct ppa_addr *, int, int);
 extern void nvm_free_rqd_ppalist(struct nvm_dev *, struct nvm_rq *);
 extern int nvm_erase_ppa(struct nvm_dev *, struct ppa_addr *, int, int);
-extern int nvm_erase_blk(struct nvm_dev *, struct ppa_addr *, int);
+extern int nvm_erase_blk(struct nvm_tgt_dev *, struct ppa_addr *, int);
+extern int nvm_get_l2p_tbl(struct nvm_dev *, u64, u32, nvm_l2p_update_fn *,
+			   void *);
+extern int nvm_get_area(struct nvm_dev *, sector_t *, sector_t);
+extern void nvm_put_area(struct nvm_dev *, sector_t);
 extern void nvm_end_io(struct nvm_rq *, int);
 extern int nvm_submit_ppa(struct nvm_dev *, struct ppa_addr *, int, int, int,
 								void *, int);
-- 
cgit v1.2.3


From 959e911b31981b52ed3f3d6e351b107bcb9163ef Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= <jg@lightnvm.io>
Date: Mon, 28 Nov 2016 22:39:11 +0100
Subject: lightnvm: introduce helpers for generic ops in rrpc
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Avoid calling media manager and device-specific operations directly from
rrpc. Create helper functions on lightnvm's core instead.

Signed-off-by: Javier González <javier@cnexlabs.com>

Made it work with null_blk as well.
Signed-off-by: Matias Bjørling <m@bjorling.me>

Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/lightnvm.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 2222853ef969..99cd1e70e451 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -320,10 +320,7 @@ struct nvm_tgt_dev {
 	struct nvm_id identity;
 	struct request_queue *q;
 
-	struct nvmm_type *mt;
-	struct nvm_dev_ops *ops;
-
-	void *parent;
+	struct nvm_dev *parent;
 	void *map;
 };
 
-- 
cgit v1.2.3


From a279006afa3377493c4240395c70430f2a9b0d2b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= <jg@lightnvm.io>
Date: Mon, 28 Nov 2016 22:39:12 +0100
Subject: lightnvm: introduce max_phys_sects helper function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Target devices do not have access to the device driver operations.
Introduce a helper function that exposes the max. number of physical
sectors supported by the underlying device.

Signed-off-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/lightnvm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 99cd1e70e451..96375317b479 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -529,6 +529,7 @@ extern void nvm_unregister(struct nvm_dev *);
 extern int nvm_set_bb_tbl(struct nvm_dev *dev, struct ppa_addr *ppas,
 							int nr_ppas, int type);
 
+extern int nvm_max_phys_sects(struct nvm_tgt_dev *);
 extern int nvm_submit_io(struct nvm_tgt_dev *, struct nvm_rq *);
 extern void nvm_generic_to_addr_mode(struct nvm_dev *, struct nvm_rq *);
 extern void nvm_addr_to_generic_mode(struct nvm_dev *, struct nvm_rq *);
-- 
cgit v1.2.3


From da2d7cb828ce2714c603827ac5a6e1c98a02e861 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= <jg@lightnvm.io>
Date: Mon, 28 Nov 2016 22:39:13 +0100
Subject: lightnvm: use target nvm on target-specific ops.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On target-specific operations pass on nvm_tgt_dev instead of the generic
nvm device.

Signed-off-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/lightnvm.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 96375317b479..e76f9c4aa49b 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -538,10 +538,10 @@ extern int nvm_set_rqd_ppalist(struct nvm_dev *, struct nvm_rq *,
 extern void nvm_free_rqd_ppalist(struct nvm_dev *, struct nvm_rq *);
 extern int nvm_erase_ppa(struct nvm_dev *, struct ppa_addr *, int, int);
 extern int nvm_erase_blk(struct nvm_tgt_dev *, struct ppa_addr *, int);
-extern int nvm_get_l2p_tbl(struct nvm_dev *, u64, u32, nvm_l2p_update_fn *,
+extern int nvm_get_l2p_tbl(struct nvm_tgt_dev *, u64, u32, nvm_l2p_update_fn *,
 			   void *);
-extern int nvm_get_area(struct nvm_dev *, sector_t *, sector_t);
-extern void nvm_put_area(struct nvm_dev *, sector_t);
+extern int nvm_get_area(struct nvm_tgt_dev *, sector_t *, sector_t);
+extern void nvm_put_area(struct nvm_tgt_dev *, sector_t);
 extern void nvm_end_io(struct nvm_rq *, int);
 extern int nvm_submit_ppa(struct nvm_dev *, struct ppa_addr *, int, int, int,
 								void *, int);
-- 
cgit v1.2.3


From 333ba053d145d6f9152f6b0311a345b876f0fed1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= <javier@cnexlabs.com>
Date: Mon, 28 Nov 2016 22:39:14 +0100
Subject: lightnvm: transform target get/set bad block
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since targets are given a virtual target device, it is necessary to
translate all communication between targets and the backend device.
Implement the translation layer for get/set bad block table.

Signed-off-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/lightnvm.h | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index e76f9c4aa49b..7c273bbc5351 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -496,8 +496,15 @@ typedef int (nvmm_submit_io_fn)(struct nvm_tgt_dev *, struct nvm_rq *);
 typedef int (nvmm_erase_blk_fn)(struct nvm_tgt_dev *, struct ppa_addr *, int);
 typedef int (nvmm_get_area_fn)(struct nvm_dev *, sector_t *, sector_t);
 typedef void (nvmm_put_area_fn)(struct nvm_dev *, sector_t);
+typedef struct ppa_addr (nvmm_trans_ppa_fn)(struct nvm_tgt_dev *,
+					    struct ppa_addr, int);
 typedef void (nvmm_part_to_tgt_fn)(struct nvm_dev *, sector_t*, int);
 
+enum {
+	TRANS_TGT_TO_DEV =	0x0,
+	TRANS_DEV_TO_TGT =	0x1,
+};
+
 struct nvmm_type {
 	const char *name;
 	unsigned int version[3];
@@ -514,6 +521,7 @@ struct nvmm_type {
 	nvmm_get_area_fn *get_area;
 	nvmm_put_area_fn *put_area;
 
+	nvmm_trans_ppa_fn *trans_ppa;
 	nvmm_part_to_tgt_fn *part_to_tgt;
 
 	struct list_head list;
@@ -526,9 +534,9 @@ extern struct nvm_dev *nvm_alloc_dev(int);
 extern int nvm_register(struct nvm_dev *);
 extern void nvm_unregister(struct nvm_dev *);
 
-extern int nvm_set_bb_tbl(struct nvm_dev *dev, struct ppa_addr *ppas,
-							int nr_ppas, int type);
-
+extern int nvm_set_bb_tbl(struct nvm_dev *, struct ppa_addr *, int, int);
+extern int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr *,
+			      int, int);
 extern int nvm_max_phys_sects(struct nvm_tgt_dev *);
 extern int nvm_submit_io(struct nvm_tgt_dev *, struct nvm_rq *);
 extern void nvm_generic_to_addr_mode(struct nvm_dev *, struct nvm_rq *);
@@ -549,6 +557,7 @@ extern int nvm_submit_ppa_list(struct nvm_dev *, struct ppa_addr *, int, int,
 							int, void *, int);
 extern int nvm_bb_tbl_fold(struct nvm_dev *, u8 *, int);
 extern int nvm_get_bb_tbl(struct nvm_dev *, struct ppa_addr, u8 *);
+extern int nvm_get_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr, u8 *);
 
 /* sysblk.c */
 #define NVM_SYSBLK_MAGIC 0x4E564D53 /* "NVMS" */
-- 
cgit v1.2.3


From e73c23ff736e1ea371dfa419d7bf8e77ee53044a Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@hgst.com>
Date: Wed, 30 Nov 2016 12:28:58 -0800
Subject: block: add async variant of blkdev_issue_zeroout

Similar to __blkdev_issue_discard this variant allows submitting
the final bio asynchronously and chaining multiple ranges
into a single completion.

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@hgst.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blkdev.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 541fdd8787a5..7e9d8a0895be 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1269,6 +1269,9 @@ extern int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 		struct bio **biop);
 extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
 		sector_t nr_sects, gfp_t gfp_mask, struct page *page);
+extern int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
+		sector_t nr_sects, gfp_t gfp_mask, struct bio **biop,
+		bool discard);
 extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
 		sector_t nr_sects, gfp_t gfp_mask, bool discard);
 static inline int sb_issue_discard(struct super_block *sb, sector_t block,
-- 
cgit v1.2.3


From a6f0788ec2881ac14e97ff7fa6a78a807f87b5ba Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@hgst.com>
Date: Wed, 30 Nov 2016 12:28:59 -0800
Subject: block: add support for REQ_OP_WRITE_ZEROES

This adds a new block layer operation to zero out a range of
LBAs. This allows to implement zeroing for devices that don't use
either discard with a predictable zero pattern or WRITE SAME of zeroes.
The prominent example of that is NVMe with the Write Zeroes command,
but in the future, this should also help with improving the way
zeroing discards work. For this operation, suitable entry is exported in
sysfs which indicate the number of maximum bytes allowed in one
write zeroes operation by the device.

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@hgst.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/bio.h       | 25 ++++++++++++++-----------
 include/linux/blk_types.h |  2 ++
 include/linux/blkdev.h    | 19 +++++++++++++++++++
 3 files changed, 35 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index 70a7244f08a7..b15323934a29 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -76,7 +76,8 @@ static inline bool bio_has_data(struct bio *bio)
 	if (bio &&
 	    bio->bi_iter.bi_size &&
 	    bio_op(bio) != REQ_OP_DISCARD &&
-	    bio_op(bio) != REQ_OP_SECURE_ERASE)
+	    bio_op(bio) != REQ_OP_SECURE_ERASE &&
+	    bio_op(bio) != REQ_OP_WRITE_ZEROES)
 		return true;
 
 	return false;
@@ -86,7 +87,8 @@ static inline bool bio_no_advance_iter(struct bio *bio)
 {
 	return bio_op(bio) == REQ_OP_DISCARD ||
 	       bio_op(bio) == REQ_OP_SECURE_ERASE ||
-	       bio_op(bio) == REQ_OP_WRITE_SAME;
+	       bio_op(bio) == REQ_OP_WRITE_SAME ||
+	       bio_op(bio) == REQ_OP_WRITE_ZEROES;
 }
 
 static inline bool bio_mergeable(struct bio *bio)
@@ -188,18 +190,19 @@ static inline unsigned bio_segments(struct bio *bio)
 	struct bvec_iter iter;
 
 	/*
-	 * We special case discard/write same, because they interpret bi_size
-	 * differently:
+	 * We special case discard/write same/write zeroes, because they
+	 * interpret bi_size differently:
 	 */
 
-	if (bio_op(bio) == REQ_OP_DISCARD)
-		return 1;
-
-	if (bio_op(bio) == REQ_OP_SECURE_ERASE)
-		return 1;
-
-	if (bio_op(bio) == REQ_OP_WRITE_SAME)
+	switch (bio_op(bio)) {
+	case REQ_OP_DISCARD:
+	case REQ_OP_SECURE_ERASE:
+	case REQ_OP_WRITE_SAME:
+	case REQ_OP_WRITE_ZEROES:
 		return 1;
+	default:
+		break;
+	}
 
 	bio_for_each_segment(bv, bio, iter)
 		segs++;
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index f57458a6a93b..519ea2c9df61 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -159,6 +159,8 @@ enum req_opf {
 	REQ_OP_ZONE_RESET	= 6,
 	/* write the same sector many times */
 	REQ_OP_WRITE_SAME	= 7,
+	/* write the zero filled sector many times */
+	REQ_OP_WRITE_ZEROES	= 8,
 
 	REQ_OP_LAST,
 };
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 7e9d8a0895be..ebeef2b79c5a 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -323,6 +323,7 @@ struct queue_limits {
 	unsigned int		max_discard_sectors;
 	unsigned int		max_hw_discard_sectors;
 	unsigned int		max_write_same_sectors;
+	unsigned int		max_write_zeroes_sectors;
 	unsigned int		discard_granularity;
 	unsigned int		discard_alignment;
 
@@ -774,6 +775,9 @@ static inline bool rq_mergeable(struct request *rq)
 	if (req_op(rq) == REQ_OP_FLUSH)
 		return false;
 
+	if (req_op(rq) == REQ_OP_WRITE_ZEROES)
+		return false;
+
 	if (rq->cmd_flags & REQ_NOMERGE_FLAGS)
 		return false;
 	if (rq->rq_flags & RQF_NOMERGE_FLAGS)
@@ -1004,6 +1008,9 @@ static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q,
 	if (unlikely(op == REQ_OP_WRITE_SAME))
 		return q->limits.max_write_same_sectors;
 
+	if (unlikely(op == REQ_OP_WRITE_ZEROES))
+		return q->limits.max_write_zeroes_sectors;
+
 	return q->limits.max_sectors;
 }
 
@@ -1107,6 +1114,8 @@ extern void blk_queue_max_discard_sectors(struct request_queue *q,
 		unsigned int max_discard_sectors);
 extern void blk_queue_max_write_same_sectors(struct request_queue *q,
 		unsigned int max_write_same_sectors);
+extern void blk_queue_max_write_zeroes_sectors(struct request_queue *q,
+		unsigned int max_write_same_sectors);
 extern void blk_queue_logical_block_size(struct request_queue *, unsigned short);
 extern void blk_queue_physical_block_size(struct request_queue *, unsigned int);
 extern void blk_queue_alignment_offset(struct request_queue *q,
@@ -1475,6 +1484,16 @@ static inline unsigned int bdev_write_same(struct block_device *bdev)
 	return 0;
 }
 
+static inline unsigned int bdev_write_zeroes_sectors(struct block_device *bdev)
+{
+	struct request_queue *q = bdev_get_queue(bdev);
+
+	if (q)
+		return q->limits.max_write_zeroes_sectors;
+
+	return 0;
+}
+
 static inline enum blk_zoned_model bdev_zoned_model(struct block_device *bdev)
 {
 	struct request_queue *q = bdev_get_queue(bdev);
-- 
cgit v1.2.3


From 3b7c33b28a44d4621e365e090bf8bd332ab232a1 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@hgst.com>
Date: Wed, 30 Nov 2016 12:29:00 -0800
Subject: nvme.h: add Write Zeroes definitions

Add the command structure, optional command set support (ONCS) bit and
a new error code for the Write Zeroes command.

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@hgst.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/nvme.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 18ce9f7cc881..0df9466a7c38 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -237,6 +237,7 @@ enum {
 	NVME_CTRL_ONCS_COMPARE			= 1 << 0,
 	NVME_CTRL_ONCS_WRITE_UNCORRECTABLE	= 1 << 1,
 	NVME_CTRL_ONCS_DSM			= 1 << 2,
+	NVME_CTRL_ONCS_WRITE_ZEROES		= 1 << 3,
 	NVME_CTRL_VWC_PRESENT			= 1 << 0,
 };
 
@@ -543,6 +544,23 @@ struct nvme_dsm_range {
 	__le64			slba;
 };
 
+struct nvme_write_zeroes_cmd {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__le32			nsid;
+	__u64			rsvd2;
+	__le64			metadata;
+	union nvme_data_ptr	dptr;
+	__le64			slba;
+	__le16			length;
+	__le16			control;
+	__le32			dsmgmt;
+	__le32			reftag;
+	__le16			apptag;
+	__le16			appmask;
+};
+
 /* Admin commands */
 
 enum nvme_admin_opcode {
@@ -839,6 +857,7 @@ struct nvme_command {
 		struct nvme_download_firmware dlfw;
 		struct nvme_format_cmd format;
 		struct nvme_dsm_cmd dsm;
+		struct nvme_write_zeroes_cmd write_zeroes;
 		struct nvme_abort_cmd abort;
 		struct nvme_get_log_page_command get_log_page;
 		struct nvmf_common_command fabrics;
@@ -918,6 +937,7 @@ enum {
 	NVME_SC_BAD_ATTRIBUTES		= 0x180,
 	NVME_SC_INVALID_PI		= 0x181,
 	NVME_SC_READ_ONLY		= 0x182,
+	NVME_SC_ONCS_NOT_SUPPORTED	= 0x183,
 
 	/*
 	 * I/O Command Set Specific - Fabrics commands:
-- 
cgit v1.2.3


From a317178e36b52f5f24ad226a77403eeea5ac05c4 Mon Sep 17 00:00:00 2001
From: James Smart <james.smart@broadcom.com>
Date: Fri, 21 Oct 2016 23:51:54 +0300
Subject: parser: add u64 number parser

Will be used by the nvme-fabrics FC transport in parsing options

Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 include/linux/parser.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/parser.h b/include/linux/parser.h
index 39d5b7955b23..884c1e6eb3fe 100644
--- a/include/linux/parser.h
+++ b/include/linux/parser.h
@@ -27,6 +27,7 @@ typedef struct {
 
 int match_token(char *, const match_table_t table, substring_t args[]);
 int match_int(substring_t *, int *result);
+int match_u64(substring_t *, u64 *result);
 int match_octal(substring_t *, int *result);
 int match_hex(substring_t *, int *result);
 bool match_wildcard(const char *pattern, const char *str);
-- 
cgit v1.2.3


From cba3bdfd2e89edd706e2c40dfad914aca663b6ac Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Fri, 2 Dec 2016 00:28:39 -0800
Subject: nvme-fabrics: Add FC transport error codes to nvme.h

Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jay Freyensee <james_p_freyensee@linux.intel.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/nvme.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 0df9466a7c38..5ac1f57226f4 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -963,6 +963,19 @@ enum {
 	NVME_SC_ACCESS_DENIED		= 0x286,
 
 	NVME_SC_DNR			= 0x4000,
+
+
+	/*
+	 * FC Transport-specific error status values for NVME commands
+	 *
+	 * Transport-specific status code values must be in the range 0xB0..0xBF
+	 */
+
+	/* Generic FC failure - catchall */
+	NVME_SC_FC_TRANSPORT_ERROR	= 0x00B0,
+
+	/* I/O failure due to FC ABTS'd */
+	NVME_SC_FC_TRANSPORT_ABORTED	= 0x00B1,
 };
 
 struct nvme_completion {
-- 
cgit v1.2.3


From b1ad1475b447a7668ac8bfad77277c4405941883 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Fri, 2 Dec 2016 00:28:40 -0800
Subject: nvme-fabrics: Add FC transport FC-NVME definitions

- Formats for Cmd, Data, Rsp IUs
- Formats FC-4 LS definitions
- Add to MAINTAINERS file

Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jay Freyensee <james_p_freyensee@linux.intel.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/nvme-fc.h | 268 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 268 insertions(+)
 create mode 100644 include/linux/nvme-fc.h

(limited to 'include/linux')

diff --git a/include/linux/nvme-fc.h b/include/linux/nvme-fc.h
new file mode 100644
index 000000000000..4b45226bd604
--- /dev/null
+++ b/include/linux/nvme-fc.h
@@ -0,0 +1,268 @@
+/*
+ * Copyright (c) 2016 Avago Technologies.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful.
+ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND WARRANTIES,
+ * INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A
+ * PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE DISCLAIMED, EXCEPT TO
+ * THE EXTENT THAT SUCH DISCLAIMERS ARE HELD TO BE LEGALLY INVALID.
+ * See the GNU General Public License for more details, a copy of which
+ * can be found in the file COPYING included with this package
+ *
+ */
+
+/*
+ * This file contains definitions relative to FC-NVME r1.11 and a few
+ * newer items
+ */
+
+#ifndef _NVME_FC_H
+#define _NVME_FC_H 1
+
+
+#define NVME_CMD_SCSI_ID		0xFD
+#define NVME_CMD_FC_ID			FC_TYPE_NVME
+
+/* FC-NVME Cmd IU Flags */
+#define FCNVME_CMD_FLAGS_DIRMASK	0x03
+#define FCNVME_CMD_FLAGS_WRITE		0x01
+#define FCNVME_CMD_FLAGS_READ		0x02
+
+struct nvme_fc_cmd_iu {
+	__u8			scsi_id;
+	__u8			fc_id;
+	__be16			iu_len;
+	__u8			rsvd4[3];
+	__u8			flags;
+	__be64			connection_id;
+	__be32			csn;
+	__be32			data_len;
+	struct nvme_command	sqe;
+	__be32			rsvd88[2];
+};
+
+#define NVME_FC_SIZEOF_ZEROS_RSP	12
+
+struct nvme_fc_ersp_iu {
+	__u8			rsvd0[2];
+	__be16			iu_len;
+	__be32			rsn;
+	__be32			xfrd_len;
+	__be32			rsvd12;
+	struct nvme_completion	cqe;
+	/* for now - no additional payload */
+};
+
+
+/* FC-NVME r1.03/16-119v0 NVME Link Services */
+enum {
+	FCNVME_LS_RSVD			= 0,
+	FCNVME_LS_RJT			= 1,
+	FCNVME_LS_ACC			= 2,
+	FCNVME_LS_CREATE_ASSOCIATION	= 3,
+	FCNVME_LS_CREATE_CONNECTION	= 4,
+	FCNVME_LS_DISCONNECT		= 5,
+};
+
+/* FC-NVME r1.03/16-119v0 NVME Link Service Descriptors */
+enum {
+	FCNVME_LSDESC_RSVD		= 0x0,
+	FCNVME_LSDESC_RQST		= 0x1,
+	FCNVME_LSDESC_RJT		= 0x2,
+	FCNVME_LSDESC_CREATE_ASSOC_CMD	= 0x3,
+	FCNVME_LSDESC_CREATE_CONN_CMD	= 0x4,
+	FCNVME_LSDESC_DISCONN_CMD	= 0x5,
+	FCNVME_LSDESC_CONN_ID		= 0x6,
+	FCNVME_LSDESC_ASSOC_ID		= 0x7,
+};
+
+
+/* ********** start of Link Service Descriptors ********** */
+
+
+/*
+ * fills in length of a descriptor. Struture minus descriptor header
+ */
+static inline __be32 fcnvme_lsdesc_len(size_t sz)
+{
+	return cpu_to_be32(sz - (2 * sizeof(u32)));
+}
+
+
+struct fcnvme_ls_rqst_w0 {
+	u8	ls_cmd;			/* FCNVME_LS_xxx */
+	u8	zeros[3];
+};
+
+/* FCNVME_LSDESC_RQST */
+struct fcnvme_lsdesc_rqst {
+	__be32	desc_tag;		/* FCNVME_LSDESC_xxx */
+	__be32	desc_len;
+	struct fcnvme_ls_rqst_w0	w0;
+	__be32	rsvd12;
+};
+
+
+
+
+/* FCNVME_LSDESC_RJT */
+struct fcnvme_lsdesc_rjt {
+	__be32	desc_tag;		/* FCNVME_LSDESC_xxx */
+	__be32	desc_len;
+	u8	rsvd8;
+
+	/*
+	 * Reject reason and explanaction codes are generic
+	 * to ELs's from LS-3.
+	 */
+	u8	reason_code;
+	u8	reason_explanation;
+
+	u8	vendor;
+	__be32	rsvd12;
+};
+
+
+#define FCNVME_ASSOC_HOSTID_LEN		64
+#define FCNVME_ASSOC_HOSTNQN_LEN	256
+#define FCNVME_ASSOC_SUBNQN_LEN		256
+
+/* FCNVME_LSDESC_CREATE_ASSOC_CMD */
+struct fcnvme_lsdesc_cr_assoc_cmd {
+	__be32	desc_tag;		/* FCNVME_LSDESC_xxx */
+	__be32	desc_len;
+	__be16	ersp_ratio;
+	__be16	rsvd10;
+	__be32	rsvd12[9];
+	__be16	cntlid;
+	__be16	sqsize;
+	__be32	rsvd52;
+	u8	hostid[FCNVME_ASSOC_HOSTID_LEN];
+	u8	hostnqn[FCNVME_ASSOC_HOSTNQN_LEN];
+	u8	subnqn[FCNVME_ASSOC_SUBNQN_LEN];
+	u8	rsvd632[384];
+};
+
+/* FCNVME_LSDESC_CREATE_CONN_CMD */
+struct fcnvme_lsdesc_cr_conn_cmd {
+	__be32	desc_tag;		/* FCNVME_LSDESC_xxx */
+	__be32	desc_len;
+	__be16	ersp_ratio;
+	__be16	rsvd10;
+	__be32	rsvd12[9];
+	__be16	qid;
+	__be16	sqsize;
+	__be32  rsvd52;
+};
+
+/* Disconnect Scope Values */
+enum {
+	FCNVME_DISCONN_ASSOCIATION	= 0,
+	FCNVME_DISCONN_CONNECTION	= 1,
+};
+
+/* FCNVME_LSDESC_DISCONN_CMD */
+struct fcnvme_lsdesc_disconn_cmd {
+	__be32	desc_tag;		/* FCNVME_LSDESC_xxx */
+	__be32	desc_len;
+	u8	rsvd8[3];
+	/* note: scope is really a 1 bit field */
+	u8	scope;			/* FCNVME_DISCONN_xxx */
+	__be32	rsvd12;
+	__be64	id;
+};
+
+/* FCNVME_LSDESC_CONN_ID */
+struct fcnvme_lsdesc_conn_id {
+	__be32	desc_tag;		/* FCNVME_LSDESC_xxx */
+	__be32	desc_len;
+	__be64	connection_id;
+};
+
+/* FCNVME_LSDESC_ASSOC_ID */
+struct fcnvme_lsdesc_assoc_id {
+	__be32	desc_tag;		/* FCNVME_LSDESC_xxx */
+	__be32	desc_len;
+	__be64	association_id;
+};
+
+/* r_ctl values */
+enum {
+	FCNVME_RS_RCTL_DATA		= 1,
+	FCNVME_RS_RCTL_XFER_RDY		= 5,
+	FCNVME_RS_RCTL_RSP		= 8,
+};
+
+
+/* ********** start of Link Services ********** */
+
+
+/* FCNVME_LS_RJT */
+struct fcnvme_ls_rjt {
+	struct fcnvme_ls_rqst_w0		w0;
+	__be32					desc_list_len;
+	struct fcnvme_lsdesc_rqst		rqst;
+	struct fcnvme_lsdesc_rjt		rjt;
+};
+
+/* FCNVME_LS_ACC */
+struct fcnvme_ls_acc_hdr {
+	struct fcnvme_ls_rqst_w0		w0;
+	__be32					desc_list_len;
+	struct fcnvme_lsdesc_rqst		rqst;
+	/* Followed by cmd-specific ACC descriptors, see next definitions */
+};
+
+/* FCNVME_LS_CREATE_ASSOCIATION */
+struct fcnvme_ls_cr_assoc_rqst {
+	struct fcnvme_ls_rqst_w0		w0;
+	__be32					desc_list_len;
+	struct fcnvme_lsdesc_cr_assoc_cmd	assoc_cmd;
+};
+
+struct fcnvme_ls_cr_assoc_acc {
+	struct fcnvme_ls_acc_hdr		hdr;
+	struct fcnvme_lsdesc_assoc_id		associd;
+	struct fcnvme_lsdesc_conn_id		connectid;
+};
+
+
+/* FCNVME_LS_CREATE_CONNECTION */
+struct fcnvme_ls_cr_conn_rqst {
+	struct fcnvme_ls_rqst_w0		w0;
+	__be32					desc_list_len;
+	struct fcnvme_lsdesc_assoc_id		associd;
+	struct fcnvme_lsdesc_cr_conn_cmd	connect_cmd;
+};
+
+struct fcnvme_ls_cr_conn_acc {
+	struct fcnvme_ls_acc_hdr		hdr;
+	struct fcnvme_lsdesc_conn_id		connectid;
+};
+
+/* FCNVME_LS_DISCONNECT */
+struct fcnvme_ls_disconnect_rqst {
+	struct fcnvme_ls_rqst_w0		w0;
+	__be32					desc_list_len;
+	struct fcnvme_lsdesc_assoc_id		associd;
+	struct fcnvme_lsdesc_disconn_cmd	discon_cmd;
+};
+
+struct fcnvme_ls_disconnect_acc {
+	struct fcnvme_ls_acc_hdr		hdr;
+};
+
+
+/*
+ * Yet to be defined in FC-NVME:
+ */
+#define NVME_FC_CONNECT_TIMEOUT_SEC	2		/* 2 seconds */
+#define NVME_FC_LS_TIMEOUT_SEC		2		/* 2 seconds */
+#define NVME_FC_TGTOP_TIMEOUT_SEC	2		/* 2 seconds */
+
+
+#endif /* _NVME_FC_H */
-- 
cgit v1.2.3


From d6d20012e116904065d192be6146040c99c03c3c Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Fri, 2 Dec 2016 00:28:41 -0800
Subject: nvme-fabrics: Add FC transport LLDD api definitions

Host:
 - LLDD registration with the host transport
 - registering host ports (local ports) and target ports seen on
   fabric (remote ports)
 - Data structures and call points for FC-4 LS's and FCP IO requests

Target:
 - LLDD registration with the target transport
 - registering nvme subsystem ports (target ports)
 - Data structures and call points for reception of FC-4 LS's and
   FCP IO requests, and callbacks to perform data and rsp transfers
   for the io.

Add to MAINTAINERS file

Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jay Freyensee <james_p_freyensee@linux.intel.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/nvme-fc-driver.h | 851 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 851 insertions(+)
 create mode 100644 include/linux/nvme-fc-driver.h

(limited to 'include/linux')

diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h
new file mode 100644
index 000000000000..f21471f7ee40
--- /dev/null
+++ b/include/linux/nvme-fc-driver.h
@@ -0,0 +1,851 @@
+/*
+ * Copyright (c) 2016, Avago Technologies
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _NVME_FC_DRIVER_H
+#define _NVME_FC_DRIVER_H 1
+
+
+/*
+ * **********************  LLDD FC-NVME Host API ********************
+ *
+ *  For FC LLDD's that are the NVME Host role.
+ *
+ * ******************************************************************
+ */
+
+
+
+/* FC Port role bitmask - can merge with FC Port Roles in fc transport */
+#define FC_PORT_ROLE_NVME_INITIATOR	0x10
+#define FC_PORT_ROLE_NVME_TARGET	0x11
+#define FC_PORT_ROLE_NVME_DISCOVERY	0x12
+
+
+/**
+ * struct nvme_fc_port_info - port-specific ids and FC connection-specific
+ *                            data element used during NVME Host role
+ *                            registrations
+ *
+ * Static fields describing the port being registered:
+ * @node_name: FC WWNN for the port
+ * @port_name: FC WWPN for the port
+ * @port_role: What NVME roles are supported (see FC_PORT_ROLE_xxx)
+ *
+ * Initialization values for dynamic port fields:
+ * @port_id:      FC N_Port_ID currently assigned the port. Upper 8 bits must
+ *                be set to 0.
+ */
+struct nvme_fc_port_info {
+	u64			node_name;
+	u64			port_name;
+	u32			port_role;
+	u32			port_id;
+};
+
+
+/**
+ * struct nvmefc_ls_req - Request structure passed from NVME-FC transport
+ *                        to LLDD in order to perform a NVME FC-4 LS
+ *                        request and obtain a response.
+ *
+ * Values set by the NVME-FC layer prior to calling the LLDD ls_req
+ * entrypoint.
+ * @rqstaddr: pointer to request buffer
+ * @rqstdma:  PCI DMA address of request buffer
+ * @rqstlen:  Length, in bytes, of request buffer
+ * @rspaddr:  pointer to response buffer
+ * @rspdma:   PCI DMA address of response buffer
+ * @rsplen:   Length, in bytes, of response buffer
+ * @timeout:  Maximum amount of time, in seconds, to wait for the LS response.
+ *            If timeout exceeded, LLDD to abort LS exchange and complete
+ *            LS request with error status.
+ * @private:  pointer to memory allocated alongside the ls request structure
+ *            that is specifically for the LLDD to use while processing the
+ *            request. The length of the buffer corresponds to the
+ *            lsrqst_priv_sz value specified in the nvme_fc_port_template
+ *            supplied by the LLDD.
+ * @done:     The callback routine the LLDD is to invoke upon completion of
+ *            the LS request. req argument is the pointer to the original LS
+ *            request structure. Status argument must be 0 upon success, a
+ *            negative errno on failure (example: -ENXIO).
+ */
+struct nvmefc_ls_req {
+	void			*rqstaddr;
+	dma_addr_t		rqstdma;
+	u32			rqstlen;
+	void			*rspaddr;
+	dma_addr_t		rspdma;
+	u32			rsplen;
+	u32			timeout;
+
+	void			*private;
+
+	void (*done)(struct nvmefc_ls_req *req, int status);
+
+} __aligned(sizeof(u64));	/* alignment for other things alloc'd with */
+
+
+enum nvmefc_fcp_datadir {
+	NVMEFC_FCP_NODATA,	/* payload_length and sg_cnt will be zero */
+	NVMEFC_FCP_WRITE,
+	NVMEFC_FCP_READ,
+};
+
+
+#define NVME_FC_MAX_SEGMENTS		256
+
+/**
+ * struct nvmefc_fcp_req - Request structure passed from NVME-FC transport
+ *                         to LLDD in order to perform a NVME FCP IO operation.
+ *
+ * Values set by the NVME-FC layer prior to calling the LLDD fcp_io
+ * entrypoint.
+ * @cmdaddr:   pointer to the FCP CMD IU buffer
+ * @rspaddr:   pointer to the FCP RSP IU buffer
+ * @cmddma:    PCI DMA address of the FCP CMD IU buffer
+ * @rspdma:    PCI DMA address of the FCP RSP IU buffer
+ * @cmdlen:    Length, in bytes, of the FCP CMD IU buffer
+ * @rsplen:    Length, in bytes, of the FCP RSP IU buffer
+ * @payload_length: Length of DATA_IN or DATA_OUT payload data to transfer
+ * @sg_table:  scatter/gather structure for payload data
+ * @first_sgl: memory for 1st scatter/gather list segment for payload data
+ * @sg_cnt:    number of elements in the scatter/gather list
+ * @io_dir:    direction of the FCP request (see NVMEFC_FCP_xxx)
+ * @sqid:      The nvme SQID the command is being issued on
+ * @done:      The callback routine the LLDD is to invoke upon completion of
+ *             the FCP operation. req argument is the pointer to the original
+ *             FCP IO operation.
+ * @private:   pointer to memory allocated alongside the FCP operation
+ *             request structure that is specifically for the LLDD to use
+ *             while processing the operation. The length of the buffer
+ *             corresponds to the fcprqst_priv_sz value specified in the
+ *             nvme_fc_port_template supplied by the LLDD.
+ *
+ * Values set by the LLDD indicating completion status of the FCP operation.
+ * Must be set prior to calling the done() callback.
+ * @transferred_length: amount of payload data, in bytes, that were
+ *             transferred. Should equal payload_length on success.
+ * @rcv_rsplen: length, in bytes, of the FCP RSP IU received.
+ * @status:    Completion status of the FCP operation. must be 0 upon success,
+ *             NVME_SC_FC_xxx value upon failure. Note: this is NOT a
+ *             reflection of the NVME CQE completion status. Only the status
+ *             of the FCP operation at the NVME-FC level.
+ */
+struct nvmefc_fcp_req {
+	void			*cmdaddr;
+	void			*rspaddr;
+	dma_addr_t		cmddma;
+	dma_addr_t		rspdma;
+	u16			cmdlen;
+	u16			rsplen;
+
+	u32			payload_length;
+	struct sg_table		sg_table;
+	struct scatterlist	*first_sgl;
+	int			sg_cnt;
+	enum nvmefc_fcp_datadir	io_dir;
+
+	__le16			sqid;
+
+	void (*done)(struct nvmefc_fcp_req *req);
+
+	void			*private;
+
+	u32			transferred_length;
+	u16			rcv_rsplen;
+	u32			status;
+} __aligned(sizeof(u64));	/* alignment for other things alloc'd with */
+
+
+/*
+ * Direct copy of fc_port_state enum. For later merging
+ */
+enum nvme_fc_obj_state {
+	FC_OBJSTATE_UNKNOWN,
+	FC_OBJSTATE_NOTPRESENT,
+	FC_OBJSTATE_ONLINE,
+	FC_OBJSTATE_OFFLINE,		/* User has taken Port Offline */
+	FC_OBJSTATE_BLOCKED,
+	FC_OBJSTATE_BYPASSED,
+	FC_OBJSTATE_DIAGNOSTICS,
+	FC_OBJSTATE_LINKDOWN,
+	FC_OBJSTATE_ERROR,
+	FC_OBJSTATE_LOOPBACK,
+	FC_OBJSTATE_DELETED,
+};
+
+
+/**
+ * struct nvme_fc_local_port - structure used between NVME-FC transport and
+ *                 a LLDD to reference a local NVME host port.
+ *                 Allocated/created by the nvme_fc_register_localport()
+ *                 transport interface.
+ *
+ * Fields with static values for the port. Initialized by the
+ * port_info struct supplied to the registration call.
+ * @port_num:  NVME-FC transport host port number
+ * @port_role: NVME roles are supported on the port (see FC_PORT_ROLE_xxx)
+ * @node_name: FC WWNN for the port
+ * @port_name: FC WWPN for the port
+ * @private:   pointer to memory allocated alongside the local port
+ *             structure that is specifically for the LLDD to use.
+ *             The length of the buffer corresponds to the local_priv_sz
+ *             value specified in the nvme_fc_port_template supplied by
+ *             the LLDD.
+ *
+ * Fields with dynamic values. Values may change base on link state. LLDD
+ * may reference fields directly to change them. Initialized by the
+ * port_info struct supplied to the registration call.
+ * @port_id:      FC N_Port_ID currently assigned the port. Upper 8 bits must
+ *                be set to 0.
+ * @port_state:   Operational state of the port.
+ */
+struct nvme_fc_local_port {
+	/* static/read-only fields */
+	u32 port_num;
+	u32 port_role;
+	u64 node_name;
+	u64 port_name;
+
+	void *private;
+
+	/* dynamic fields */
+	u32 port_id;
+	enum nvme_fc_obj_state port_state;
+} __aligned(sizeof(u64));	/* alignment for other things alloc'd with */
+
+
+/**
+ * struct nvme_fc_remote_port - structure used between NVME-FC transport and
+ *                 a LLDD to reference a remote NVME subsystem port.
+ *                 Allocated/created by the nvme_fc_register_remoteport()
+ *                 transport interface.
+ *
+ * Fields with static values for the port. Initialized by the
+ * port_info struct supplied to the registration call.
+ * @port_num:  NVME-FC transport remote subsystem port number
+ * @port_role: NVME roles are supported on the port (see FC_PORT_ROLE_xxx)
+ * @node_name: FC WWNN for the port
+ * @port_name: FC WWPN for the port
+ * @localport: pointer to the NVME-FC local host port the subsystem is
+ *             connected to.
+ * @private:   pointer to memory allocated alongside the remote port
+ *             structure that is specifically for the LLDD to use.
+ *             The length of the buffer corresponds to the remote_priv_sz
+ *             value specified in the nvme_fc_port_template supplied by
+ *             the LLDD.
+ *
+ * Fields with dynamic values. Values may change base on link or login
+ * state. LLDD may reference fields directly to change them. Initialized by
+ * the port_info struct supplied to the registration call.
+ * @port_id:      FC N_Port_ID currently assigned the port. Upper 8 bits must
+ *                be set to 0.
+ * @port_state:   Operational state of the remote port. Valid values are
+ *                ONLINE or UNKNOWN.
+ */
+struct nvme_fc_remote_port {
+	/* static fields */
+	u32 port_num;
+	u32 port_role;
+	u64 node_name;
+	u64 port_name;
+
+	struct nvme_fc_local_port *localport;
+
+	void *private;
+
+	/* dynamic fields */
+	u32 port_id;
+	enum nvme_fc_obj_state port_state;
+} __aligned(sizeof(u64));	/* alignment for other things alloc'd with */
+
+
+/**
+ * struct nvme_fc_port_template - structure containing static entrypoints and
+ *                 operational parameters for an LLDD that supports NVME host
+ *                 behavior. Passed by reference in port registrations.
+ *                 NVME-FC transport remembers template reference and may
+ *                 access it during runtime operation.
+ *
+ * Host/Initiator Transport Entrypoints/Parameters:
+ *
+ * @localport_delete:  The LLDD initiates deletion of a localport via
+ *       nvme_fc_deregister_localport(). However, the teardown is
+ *       asynchronous. This routine is called upon the completion of the
+ *       teardown to inform the LLDD that the localport has been deleted.
+ *       Entrypoint is Mandatory.
+ *
+ * @remoteport_delete:  The LLDD initiates deletion of a remoteport via
+ *       nvme_fc_deregister_remoteport(). However, the teardown is
+ *       asynchronous. This routine is called upon the completion of the
+ *       teardown to inform the LLDD that the remoteport has been deleted.
+ *       Entrypoint is Mandatory.
+ *
+ * @create_queue:  Upon creating a host<->controller association, queues are
+ *       created such that they can be affinitized to cpus/cores. This
+ *       callback into the LLDD to notify that a controller queue is being
+ *       created.  The LLDD may choose to allocate an associated hw queue
+ *       or map it onto a shared hw queue. Upon return from the call, the
+ *       LLDD specifies a handle that will be given back to it for any
+ *       command that is posted to the controller queue.  The handle can
+ *       be used by the LLDD to map quickly to the proper hw queue for
+ *       command execution.  The mask of cpu's that will map to this queue
+ *       at the block-level is also passed in. The LLDD should use the
+ *       queue id and/or cpu masks to ensure proper affinitization of the
+ *       controller queue to the hw queue.
+ *       Entrypoint is Optional.
+ *
+ * @delete_queue:  This is the inverse of the crete_queue. During
+ *       host<->controller association teardown, this routine is called
+ *       when a controller queue is being terminated. Any association with
+ *       a hw queue should be termined. If there is a unique hw queue, the
+ *       hw queue should be torn down.
+ *       Entrypoint is Optional.
+ *
+ * @poll_queue:  Called to poll for the completion of an io on a blk queue.
+ *       Entrypoint is Optional.
+ *
+ * @ls_req:  Called to issue a FC-NVME FC-4 LS service request.
+ *       The nvme_fc_ls_req structure will fully describe the buffers for
+ *       the request payload and where to place the response payload. The
+ *       LLDD is to allocate an exchange, issue the LS request, obtain the
+ *       LS response, and call the "done" routine specified in the request
+ *       structure (argument to done is the ls request structure itself).
+ *       Entrypoint is Mandatory.
+ *
+ * @fcp_io:  called to issue a FC-NVME I/O request.  The I/O may be for
+ *       an admin queue or an i/o queue.  The nvmefc_fcp_req structure will
+ *       fully describe the io: the buffer containing the FC-NVME CMD IU
+ *       (which contains the SQE), the sg list for the payload if applicable,
+ *       and the buffer to place the FC-NVME RSP IU into.  The LLDD will
+ *       complete the i/o, indicating the amount of data transferred or
+ *       any transport error, and call the "done" routine specified in the
+ *       request structure (argument to done is the fcp request structure
+ *       itself).
+ *       Entrypoint is Mandatory.
+ *
+ * @ls_abort: called to request the LLDD to abort the indicated ls request.
+ *       The call may return before the abort has completed. After aborting
+ *       the request, the LLDD must still call the ls request done routine
+ *       indicating an FC transport Aborted status.
+ *       Entrypoint is Mandatory.
+ *
+ * @fcp_abort: called to request the LLDD to abort the indicated fcp request.
+ *       The call may return before the abort has completed. After aborting
+ *       the request, the LLDD must still call the fcp request done routine
+ *       indicating an FC transport Aborted status.
+ *       Entrypoint is Mandatory.
+ *
+ * @max_hw_queues:  indicates the maximum number of hw queues the LLDD
+ *       supports for cpu affinitization.
+ *       Value is Mandatory. Must be at least 1.
+ *
+ * @max_sgl_segments:  indicates the maximum number of sgl segments supported
+ *       by the LLDD
+ *       Value is Mandatory. Must be at least 1. Recommend at least 256.
+ *
+ * @max_dif_sgl_segments:  indicates the maximum number of sgl segments
+ *       supported by the LLDD for DIF operations.
+ *       Value is Mandatory. Must be at least 1. Recommend at least 256.
+ *
+ * @dma_boundary:  indicates the dma address boundary where dma mappings
+ *       will be split across.
+ *       Value is Mandatory. Typical value is 0xFFFFFFFF to split across
+ *       4Gig address boundarys
+ *
+ * @local_priv_sz: The LLDD sets this field to the amount of additional
+ *       memory that it would like fc nvme layer to allocate on the LLDD's
+ *       behalf whenever a localport is allocated.  The additional memory
+ *       area solely for the of the LLDD and its location is specified by
+ *       the localport->private pointer.
+ *       Value is Mandatory. Allowed to be zero.
+ *
+ * @remote_priv_sz: The LLDD sets this field to the amount of additional
+ *       memory that it would like fc nvme layer to allocate on the LLDD's
+ *       behalf whenever a remoteport is allocated.  The additional memory
+ *       area solely for the of the LLDD and its location is specified by
+ *       the remoteport->private pointer.
+ *       Value is Mandatory. Allowed to be zero.
+ *
+ * @lsrqst_priv_sz: The LLDD sets this field to the amount of additional
+ *       memory that it would like fc nvme layer to allocate on the LLDD's
+ *       behalf whenever a ls request structure is allocated. The additional
+ *       memory area solely for the of the LLDD and its location is
+ *       specified by the ls_request->private pointer.
+ *       Value is Mandatory. Allowed to be zero.
+ *
+ * @fcprqst_priv_sz: The LLDD sets this field to the amount of additional
+ *       memory that it would like fc nvme layer to allocate on the LLDD's
+ *       behalf whenever a fcp request structure is allocated. The additional
+ *       memory area solely for the of the LLDD and its location is
+ *       specified by the fcp_request->private pointer.
+ *       Value is Mandatory. Allowed to be zero.
+ */
+struct nvme_fc_port_template {
+	/* initiator-based functions */
+	void	(*localport_delete)(struct nvme_fc_local_port *);
+	void	(*remoteport_delete)(struct nvme_fc_remote_port *);
+	int	(*create_queue)(struct nvme_fc_local_port *,
+				unsigned int qidx, u16 qsize,
+				void **handle);
+	void	(*delete_queue)(struct nvme_fc_local_port *,
+				unsigned int qidx, void *handle);
+	void	(*poll_queue)(struct nvme_fc_local_port *, void *handle);
+	int	(*ls_req)(struct nvme_fc_local_port *,
+				struct nvme_fc_remote_port *,
+				struct nvmefc_ls_req *);
+	int	(*fcp_io)(struct nvme_fc_local_port *,
+				struct nvme_fc_remote_port *,
+				void *hw_queue_handle,
+				struct nvmefc_fcp_req *);
+	void	(*ls_abort)(struct nvme_fc_local_port *,
+				struct nvme_fc_remote_port *,
+				struct nvmefc_ls_req *);
+	void	(*fcp_abort)(struct nvme_fc_local_port *,
+				struct nvme_fc_remote_port *,
+				void *hw_queue_handle,
+				struct nvmefc_fcp_req *);
+
+	u32	max_hw_queues;
+	u16	max_sgl_segments;
+	u16	max_dif_sgl_segments;
+	u64	dma_boundary;
+
+	/* sizes of additional private data for data structures */
+	u32	local_priv_sz;
+	u32	remote_priv_sz;
+	u32	lsrqst_priv_sz;
+	u32	fcprqst_priv_sz;
+};
+
+
+/*
+ * Initiator/Host functions
+ */
+
+int nvme_fc_register_localport(struct nvme_fc_port_info *pinfo,
+			struct nvme_fc_port_template *template,
+			struct device *dev,
+			struct nvme_fc_local_port **lport_p);
+
+int nvme_fc_unregister_localport(struct nvme_fc_local_port *localport);
+
+int nvme_fc_register_remoteport(struct nvme_fc_local_port *localport,
+			struct nvme_fc_port_info *pinfo,
+			struct nvme_fc_remote_port **rport_p);
+
+int nvme_fc_unregister_remoteport(struct nvme_fc_remote_port *remoteport);
+
+
+
+/*
+ * ***************  LLDD FC-NVME Target/Subsystem API ***************
+ *
+ *  For FC LLDD's that are the NVME Subsystem role
+ *
+ * ******************************************************************
+ */
+
+/**
+ * struct nvmet_fc_port_info - port-specific ids and FC connection-specific
+ *                             data element used during NVME Subsystem role
+ *                             registrations
+ *
+ * Static fields describing the port being registered:
+ * @node_name: FC WWNN for the port
+ * @port_name: FC WWPN for the port
+ *
+ * Initialization values for dynamic port fields:
+ * @port_id:      FC N_Port_ID currently assigned the port. Upper 8 bits must
+ *                be set to 0.
+ */
+struct nvmet_fc_port_info {
+	u64			node_name;
+	u64			port_name;
+	u32			port_id;
+};
+
+
+/**
+ * struct nvmefc_tgt_ls_req - Structure used between LLDD and NVMET-FC
+ *                            layer to represent the exchange context for
+ *                            a FC-NVME Link Service (LS).
+ *
+ * The structure is allocated by the LLDD whenever a LS Request is received
+ * from the FC link. The address of the structure is passed to the nvmet-fc
+ * layer via the nvmet_fc_rcv_ls_req() call. The address of the structure
+ * will be passed back to the LLDD when the response is to be transmit.
+ * The LLDD is to use the address to map back to the LLDD exchange structure
+ * which maintains information such as the targetport the LS was received
+ * on, the remote FC NVME initiator that sent the LS, and any FC exchange
+ * context.  Upon completion of the LS response transmit, the address of the
+ * structure will be passed back to the LS rsp done() routine, allowing the
+ * nvmet-fc layer to release dma resources. Upon completion of the done()
+ * routine, no further access will be made by the nvmet-fc layer and the
+ * LLDD can de-allocate the structure.
+ *
+ * Field initialization:
+ *   At the time of the nvmet_fc_rcv_ls_req() call, there is no content that
+ *     is valid in the structure.
+ *
+ *   When the structure is used for the LLDD->xmt_ls_rsp() call, the nvmet-fc
+ *     layer will fully set the fields in order to specify the response
+ *     payload buffer and its length as well as the done routine to be called
+ *     upon compeletion of the transmit.  The nvmet-fc layer will also set a
+ *     private pointer for its own use in the done routine.
+ *
+ * Values set by the NVMET-FC layer prior to calling the LLDD xmt_ls_rsp
+ * entrypoint.
+ * @rspbuf:   pointer to the LS response buffer
+ * @rspdma:   PCI DMA address of the LS response buffer
+ * @rsplen:   Length, in bytes, of the LS response buffer
+ * @done:     The callback routine the LLDD is to invoke upon completion of
+ *            transmitting the LS response. req argument is the pointer to
+ *            the original ls request.
+ * @nvmet_fc_private:  pointer to an internal NVMET-FC layer structure used
+ *            as part of the NVMET-FC processing. The LLDD is not to access
+ *            this pointer.
+ */
+struct nvmefc_tgt_ls_req {
+	void		*rspbuf;
+	dma_addr_t	rspdma;
+	u16		rsplen;
+
+	void (*done)(struct nvmefc_tgt_ls_req *req);
+	void *nvmet_fc_private;		/* LLDD is not to access !! */
+};
+
+/* Operations that NVME-FC layer may request the LLDD to perform for FCP */
+enum {
+	NVMET_FCOP_READDATA	= 1,	/* xmt data to initiator */
+	NVMET_FCOP_WRITEDATA	= 2,	/* xmt data from initiator */
+	NVMET_FCOP_READDATA_RSP	= 3,	/* xmt data to initiator and send
+					 * rsp as well
+					 */
+	NVMET_FCOP_RSP		= 4,	/* send rsp frame */
+	NVMET_FCOP_ABORT	= 5,	/* abort exchange via ABTS */
+	NVMET_FCOP_BA_ACC	= 6,	/* send BA_ACC */
+	NVMET_FCOP_BA_RJT	= 7,	/* send BA_RJT */
+};
+
+/**
+ * struct nvmefc_tgt_fcp_req - Structure used between LLDD and NVMET-FC
+ *                            layer to represent the exchange context and
+ *                            the specific FC-NVME IU operation(s) to perform
+ *                            for a FC-NVME FCP IO.
+ *
+ * Structure used between LLDD and nvmet-fc layer to represent the exchange
+ * context for a FC-NVME FCP I/O operation (e.g. a nvme sqe, the sqe-related
+ * memory transfers, and its assocated cqe transfer).
+ *
+ * The structure is allocated by the LLDD whenever a FCP CMD IU is received
+ * from the FC link. The address of the structure is passed to the nvmet-fc
+ * layer via the nvmet_fc_rcv_fcp_req() call. The address of the structure
+ * will be passed back to the LLDD for the data operations and transmit of
+ * the response. The LLDD is to use the address to map back to the LLDD
+ * exchange structure which maintains information such as the targetport
+ * the FCP I/O was received on, the remote FC NVME initiator that sent the
+ * FCP I/O, and any FC exchange context.  Upon completion of the FCP target
+ * operation, the address of the structure will be passed back to the FCP
+ * op done() routine, allowing the nvmet-fc layer to release dma resources.
+ * Upon completion of the done() routine for either RSP or ABORT ops, no
+ * further access will be made by the nvmet-fc layer and the LLDD can
+ * de-allocate the structure.
+ *
+ * Field initialization:
+ *   At the time of the nvmet_fc_rcv_fcp_req() call, there is no content that
+ *     is valid in the structure.
+ *
+ *   When the structure is used for an FCP target operation, the nvmet-fc
+ *     layer will fully set the fields in order to specify the scattergather
+ *     list, the transfer length, as well as the done routine to be called
+ *     upon compeletion of the operation.  The nvmet-fc layer will also set a
+ *     private pointer for its own use in the done routine.
+ *
+ * Note: the LLDD must never fail a NVMET_FCOP_ABORT request !!
+ *
+ * Values set by the NVMET-FC layer prior to calling the LLDD fcp_op
+ * entrypoint.
+ * @op:       Indicates the FCP IU operation to perform (see NVMET_FCOP_xxx)
+ * @hwqid:    Specifies the hw queue index (0..N-1, where N is the
+ *            max_hw_queues value from the LLD's nvmet_fc_target_template)
+ *            that the operation is to use.
+ * @offset:   Indicates the DATA_OUT/DATA_IN payload offset to be tranferred.
+ *            Field is only valid on WRITEDATA, READDATA, or READDATA_RSP ops.
+ * @timeout:  amount of time, in seconds, to wait for a response from the NVME
+ *            host. A value of 0 is an infinite wait.
+ *            Valid only for the following ops:
+ *              WRITEDATA: caps the wait for data reception
+ *              READDATA_RSP & RSP: caps wait for FCP_CONF reception (if used)
+ * @transfer_length: the length, in bytes, of the DATA_OUT or DATA_IN payload
+ *            that is to be transferred.
+ *            Valid only for the WRITEDATA, READDATA, or READDATA_RSP ops.
+ * @ba_rjt:   Contains the BA_RJT payload that is to be transferred.
+ *            Valid only for the NVMET_FCOP_BA_RJT op.
+ * @sg:       Scatter/gather list for the DATA_OUT/DATA_IN payload data.
+ *            Valid only for the WRITEDATA, READDATA, or READDATA_RSP ops.
+ * @sg_cnt:   Number of valid entries in the scatter/gather list.
+ *            Valid only for the WRITEDATA, READDATA, or READDATA_RSP ops.
+ * @rspaddr:  pointer to the FCP RSP IU buffer to be transmit
+ *            Used by RSP and READDATA_RSP ops
+ * @rspdma:   PCI DMA address of the FCP RSP IU buffer
+ *            Used by RSP and READDATA_RSP ops
+ * @rsplen:   Length, in bytes, of the FCP RSP IU buffer
+ *            Used by RSP and READDATA_RSP ops
+ * @done:     The callback routine the LLDD is to invoke upon completion of
+ *            the operation. req argument is the pointer to the original
+ *            FCP subsystem op request.
+ * @nvmet_fc_private:  pointer to an internal NVMET-FC layer structure used
+ *            as part of the NVMET-FC processing. The LLDD is not to
+ *            reference this field.
+ *
+ * Values set by the LLDD indicating completion status of the FCP operation.
+ * Must be set prior to calling the done() callback.
+ * @transferred_length: amount of DATA_OUT payload data received by a
+ *            a WRITEDATA operation. If not a WRITEDATA operation, value must
+ *            be set to 0. Should equal transfer_length on success.
+ * @fcp_error: status of the FCP operation. Must be 0 on success; on failure
+ *            must be a NVME_SC_FC_xxxx value.
+ */
+struct nvmefc_tgt_fcp_req {
+	u8			op;
+	u16			hwqid;
+	u32			offset;
+	u32			timeout;
+	u32			transfer_length;
+	struct fc_ba_rjt	ba_rjt;
+	struct scatterlist	sg[NVME_FC_MAX_SEGMENTS];
+	int			sg_cnt;
+	void			*rspaddr;
+	dma_addr_t		rspdma;
+	u16			rsplen;
+
+	void (*done)(struct nvmefc_tgt_fcp_req *);
+
+	void *nvmet_fc_private;		/* LLDD is not to access !! */
+
+	u32			transferred_length;
+	int			fcp_error;
+};
+
+
+/* Target Features (Bit fields) LLDD supports */
+enum {
+	NVMET_FCTGTFEAT_READDATA_RSP = (1 << 0),
+		/* Bit 0: supports the NVMET_FCPOP_READDATA_RSP op, which
+		 * sends (the last) Read Data sequence followed by the RSP
+		 * sequence in one LLDD operation. Errors during Data
+		 * sequence transmit must not allow RSP sequence to be sent.
+		 */
+	NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED = (1 << 1),
+		/* Bit 1: When 0, the LLDD will deliver FCP CMD
+		 * on the CPU it should be affinitized to. Thus work will
+		 * be scheduled on the cpu received on. When 1, the LLDD
+		 * may not deliver the CMD on the CPU it should be worked
+		 * on. The transport should pick a cpu to schedule the work
+		 * on.
+		 */
+};
+
+
+/**
+ * struct nvmet_fc_target_port - structure used between NVME-FC transport and
+ *                 a LLDD to reference a local NVME subsystem port.
+ *                 Allocated/created by the nvme_fc_register_targetport()
+ *                 transport interface.
+ *
+ * Fields with static values for the port. Initialized by the
+ * port_info struct supplied to the registration call.
+ * @port_num:  NVME-FC transport subsytem port number
+ * @node_name: FC WWNN for the port
+ * @port_name: FC WWPN for the port
+ * @private:   pointer to memory allocated alongside the local port
+ *             structure that is specifically for the LLDD to use.
+ *             The length of the buffer corresponds to the target_priv_sz
+ *             value specified in the nvme_fc_target_template supplied by
+ *             the LLDD.
+ *
+ * Fields with dynamic values. Values may change base on link state. LLDD
+ * may reference fields directly to change them. Initialized by the
+ * port_info struct supplied to the registration call.
+ * @port_id:      FC N_Port_ID currently assigned the port. Upper 8 bits must
+ *                be set to 0.
+ * @port_state:   Operational state of the port.
+ */
+struct nvmet_fc_target_port {
+	/* static/read-only fields */
+	u32 port_num;
+	u64 node_name;
+	u64 port_name;
+
+	void *private;
+
+	/* dynamic fields */
+	u32 port_id;
+	enum nvme_fc_obj_state port_state;
+} __aligned(sizeof(u64));	/* alignment for other things alloc'd with */
+
+
+/**
+ * struct nvmet_fc_target_template - structure containing static entrypoints
+ *                 and operational parameters for an LLDD that supports NVME
+ *                 subsystem behavior. Passed by reference in port
+ *                 registrations. NVME-FC transport remembers template
+ *                 reference and may access it during runtime operation.
+ *
+ * Subsystem/Target Transport Entrypoints/Parameters:
+ *
+ * @targetport_delete:  The LLDD initiates deletion of a targetport via
+ *       nvmet_fc_unregister_targetport(). However, the teardown is
+ *       asynchronous. This routine is called upon the completion of the
+ *       teardown to inform the LLDD that the targetport has been deleted.
+ *       Entrypoint is Mandatory.
+ *
+ * @xmt_ls_rsp:  Called to transmit the response to a FC-NVME FC-4 LS service.
+ *       The nvmefc_tgt_ls_req structure is the same LLDD-supplied exchange
+ *       structure specified in the nvmet_fc_rcv_ls_req() call made when
+ *       the LS request was received.  The structure will fully describe
+ *       the buffers for the response payload and the dma address of the
+ *       payload. The LLDD is to transmit the response (or return a non-zero
+ *       errno status), and upon completion of the transmit, call the
+ *       "done" routine specified in the nvmefc_tgt_ls_req structure
+ *       (argument to done is the ls reqwuest structure itself).
+ *       After calling the done routine, the LLDD shall consider the
+ *       LS handling complete and the nvmefc_tgt_ls_req structure may
+ *       be freed/released.
+ *       Entrypoint is Mandatory.
+ *
+ * @fcp_op:  Called to perform a data transfer, transmit a response, or
+ *       abort an FCP opertion. The nvmefc_tgt_fcp_req structure is the same
+ *       LLDD-supplied exchange structure specified in the
+ *       nvmet_fc_rcv_fcp_req() call made when the FCP CMD IU was received.
+ *       The op field in the structure shall indicate the operation for
+ *       the LLDD to perform relative to the io.
+ *         NVMET_FCOP_READDATA operation: the LLDD is to send the
+ *           payload data (described by sglist) to the host in 1 or
+ *           more FC sequences (preferrably 1).  Note: the fc-nvme layer
+ *           may call the READDATA operation multiple times for longer
+ *           payloads.
+ *         NVMET_FCOP_WRITEDATA operation: the LLDD is to receive the
+ *           payload data (described by sglist) from the host via 1 or
+ *           more FC sequences (preferrably 1). The LLDD is to generate
+ *           the XFER_RDY IU(s) corresponding to the data being requested.
+ *           Note: the FC-NVME layer may call the WRITEDATA operation
+ *           multiple times for longer payloads.
+ *         NVMET_FCOP_READDATA_RSP operation: the LLDD is to send the
+ *           payload data (described by sglist) to the host in 1 or
+ *           more FC sequences (preferrably 1). If an error occurs during
+ *           payload data transmission, the LLDD is to set the
+ *           nvmefc_tgt_fcp_req fcp_error and transferred_length field, then
+ *           consider the operation complete. On error, the LLDD is to not
+ *           transmit the FCP_RSP iu. If all payload data is transferred
+ *           successfully, the LLDD is to update the nvmefc_tgt_fcp_req
+ *           transferred_length field and may subsequently transmit the
+ *           FCP_RSP iu payload (described by rspbuf, rspdma, rsplen).
+ *           The LLDD is to await FCP_CONF reception to confirm the RSP
+ *           reception by the host. The LLDD may retramsit the FCP_RSP iu
+ *           if necessary per FC-NVME. Upon reception of FCP_CONF, or upon
+ *           FCP_CONF failure, the LLDD is to set the nvmefc_tgt_fcp_req
+ *           fcp_error field and consider the operation complete..
+ *         NVMET_FCOP_RSP: the LLDD is to transmit the FCP_RSP iu payload
+ *           (described by rspbuf, rspdma, rsplen).  The LLDD is to await
+ *           FCP_CONF reception to confirm the RSP reception by the host.
+ *           The LLDD may retramsit the FCP_RSP iu if necessary per FC-NVME.
+ *           Upon reception of FCP_CONF, or upon FCP_CONF failure, the
+ *           LLDD is to set the nvmefc_tgt_fcp_req fcp_error field and
+ *           consider the operation complete..
+ *         NVMET_FCOP_ABORT: the LLDD is to terminate the exchange
+ *           corresponding to the fcp operation. The LLDD shall send
+ *           ABTS and follow FC exchange abort-multi rules, including
+ *           ABTS retries and possible logout.
+ *       Upon completing the indicated operation, the LLDD is to set the
+ *       status fields for the operation (tranferred_length and fcp_error
+ *       status) in the request, then all the "done" routine
+ *       indicated in the fcp request.  Upon return from the "done"
+ *       routine for either a NVMET_FCOP_RSP or NVMET_FCOP_ABORT operation
+ *       the fc-nvme layer will not longer reference the fcp request,
+ *       allowing the LLDD to free/release the fcp request.
+ *       Note: when calling the done routine for READDATA or WRITEDATA
+ *       operations, the fc-nvme layer may immediate convert, in the same
+ *       thread and before returning to the LLDD, the fcp operation to
+ *       the next operation for the fcp io and call the LLDDs fcp_op
+ *       call again. If fields in the fcp request are to be accessed post
+ *       the done call, the LLDD should save their values prior to calling
+ *       the done routine, and inspect the save values after the done
+ *       routine.
+ *       Returns 0 on success, -<errno> on failure (Ex: -EIO)
+ *       Entrypoint is Mandatory.
+ *
+ * @max_hw_queues:  indicates the maximum number of hw queues the LLDD
+ *       supports for cpu affinitization.
+ *       Value is Mandatory. Must be at least 1.
+ *
+ * @max_sgl_segments:  indicates the maximum number of sgl segments supported
+ *       by the LLDD
+ *       Value is Mandatory. Must be at least 1. Recommend at least 256.
+ *
+ * @max_dif_sgl_segments:  indicates the maximum number of sgl segments
+ *       supported by the LLDD for DIF operations.
+ *       Value is Mandatory. Must be at least 1. Recommend at least 256.
+ *
+ * @dma_boundary:  indicates the dma address boundary where dma mappings
+ *       will be split across.
+ *       Value is Mandatory. Typical value is 0xFFFFFFFF to split across
+ *       4Gig address boundarys
+ *
+ * @target_features: The LLDD sets bits in this field to correspond to
+ *       optional features that are supported by the LLDD.
+ *       Refer to the NVMET_FCTGTFEAT_xxx values.
+ *       Value is Mandatory. Allowed to be zero.
+ *
+ * @target_priv_sz: The LLDD sets this field to the amount of additional
+ *       memory that it would like fc nvme layer to allocate on the LLDD's
+ *       behalf whenever a targetport is allocated.  The additional memory
+ *       area solely for the of the LLDD and its location is specified by
+ *       the targetport->private pointer.
+ *       Value is Mandatory. Allowed to be zero.
+ */
+struct nvmet_fc_target_template {
+	void (*targetport_delete)(struct nvmet_fc_target_port *tgtport);
+	int (*xmt_ls_rsp)(struct nvmet_fc_target_port *tgtport,
+				struct nvmefc_tgt_ls_req *tls_req);
+	int (*fcp_op)(struct nvmet_fc_target_port *tgtport,
+				struct nvmefc_tgt_fcp_req *);
+
+	u32	max_hw_queues;
+	u16	max_sgl_segments;
+	u16	max_dif_sgl_segments;
+	u64	dma_boundary;
+
+	u32	target_features;
+
+	u32	target_priv_sz;
+};
+
+
+int nvmet_fc_register_targetport(struct nvmet_fc_port_info *portinfo,
+			struct nvmet_fc_target_template *template,
+			struct device *dev,
+			struct nvmet_fc_target_port **tgtport_p);
+
+int nvmet_fc_unregister_targetport(struct nvmet_fc_target_port *tgtport);
+
+int nvmet_fc_rcv_ls_req(struct nvmet_fc_target_port *tgtport,
+			struct nvmefc_tgt_ls_req *lsreq,
+			void *lsreqbuf, u32 lsreqbuf_len);
+
+int nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *tgtport,
+			struct nvmefc_tgt_fcp_req *fcpreq,
+			void *cmdiubuf, u32 cmdiubuf_len);
+
+#endif /* _NVME_FC_DRIVER_H */
-- 
cgit v1.2.3


From f9d03f96b988002027d4b28ea1b7a24729a4c9b5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 8 Dec 2016 15:20:32 -0700
Subject: block: improve handling of the magic discard payload

Instead of allocating a single unused biovec for discard requests, send
them down without any payload.  Instead we allow the driver to add a
"special" payload using a biovec embedded into struct request (unioned
over other fields never used while in the driver), and overloading
the number of segments for this case.

This has a couple of advantages:

 - we don't have to allocate the bio_vec
 - the amount of special casing for discard requests in the block
   layer is significantly reduced
 - using this same scheme for other request types is trivial,
   which will be important for implementing the new WRITE_ZEROES
   op on devices where it actually requires a payload (e.g. SCSI)
 - we can get rid of playing games with the request length, as
   we'll never touch it and completions will work just fine
 - it will allow us to support ranged discard operations in the
   future by merging non-contiguous discard bios into a single
   request
 - last but not least it removes a lot of code

This patch is the common base for my WIP series for ranges discards and to
remove discard_zeroes_data in favor of always using REQ_OP_WRITE_ZEROES,
so it would be good to get it in quickly.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/bio.h    |  3 ++-
 include/linux/blkdev.h | 15 ++++++++++++---
 2 files changed, 14 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index b15323934a29..7cf8a6c70a3f 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -197,8 +197,9 @@ static inline unsigned bio_segments(struct bio *bio)
 	switch (bio_op(bio)) {
 	case REQ_OP_DISCARD:
 	case REQ_OP_SECURE_ERASE:
-	case REQ_OP_WRITE_SAME:
 	case REQ_OP_WRITE_ZEROES:
+		return 0;
+	case REQ_OP_WRITE_SAME:
 		return 1;
 	default:
 		break;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ebeef2b79c5a..c5393766909d 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -120,10 +120,13 @@ typedef __u32 __bitwise req_flags_t;
 #define RQF_HASHED		((__force req_flags_t)(1 << 16))
 /* IO stats tracking on */
 #define RQF_STATS		((__force req_flags_t)(1 << 17))
+/* Look at ->special_vec for the actual data payload instead of the
+   bio chain. */
+#define RQF_SPECIAL_PAYLOAD	((__force req_flags_t)(1 << 18))
 
 /* flags that prevent us from merging requests: */
 #define RQF_NOMERGE_FLAGS \
-	(RQF_STARTED | RQF_SOFTBARRIER | RQF_FLUSH_SEQ)
+	(RQF_STARTED | RQF_SOFTBARRIER | RQF_FLUSH_SEQ | RQF_SPECIAL_PAYLOAD)
 
 #define BLK_MAX_CDB	16
 
@@ -175,6 +178,7 @@ struct request {
 	 */
 	union {
 		struct rb_node rb_node;	/* sort/lookup */
+		struct bio_vec special_vec;
 		void *completion_data;
 	};
 
@@ -909,8 +913,6 @@ extern void __blk_put_request(struct request_queue *, struct request *);
 extern struct request *blk_get_request(struct request_queue *, int, gfp_t);
 extern void blk_rq_set_block_pc(struct request *);
 extern void blk_requeue_request(struct request_queue *, struct request *);
-extern void blk_add_request_payload(struct request *rq, struct page *page,
-		int offset, unsigned int len);
 extern int blk_lld_busy(struct request_queue *q);
 extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
 			     struct bio_set *bs, gfp_t gfp_mask,
@@ -1153,6 +1155,13 @@ extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable);
 extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua);
 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
 
+static inline unsigned short blk_rq_nr_phys_segments(struct request *rq)
+{
+	if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
+		return 1;
+	return rq->nr_phys_segments;
+}
+
 extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *);
 extern void blk_dump_rq_flags(struct request *, char *);
 extern long nr_blockdev_pages(void);
-- 
cgit v1.2.3


From ae911c5e796d51cb2d1ed3a55e73b9cc88d176cf Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Thu, 8 Dec 2016 13:19:30 -0700
Subject: blk-mq: add blk_mq_start_stopped_hw_queue()

We have a variant for all hardware queues, but not one for a single
hardware queue.

Signed-off-by: Jens Axboe <axboe@fb.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
---
 include/linux/blk-mq.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 35a0af5ede6d..87e404aae267 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -231,6 +231,7 @@ void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx);
 void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx);
 void blk_mq_stop_hw_queues(struct request_queue *q);
 void blk_mq_start_hw_queues(struct request_queue *q);
+void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
 void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
 void blk_mq_run_hw_queues(struct request_queue *q, bool async);
 void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
-- 
cgit v1.2.3


From 70b3ea056f3074be6d9256c312b64c0d90a4a683 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Wed, 7 Dec 2016 08:43:31 -0700
Subject: elevator: make the rqhash helpers exported

Signed-off-by: Jens Axboe <axboe@fb.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
---
 include/linux/elevator.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index f219c9aed360..b276e9ef0e0b 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -108,6 +108,11 @@ struct elevator_type
 
 #define ELV_HASH_BITS 6
 
+void elv_rqhash_del(struct request_queue *q, struct request *rq);
+void elv_rqhash_add(struct request_queue *q, struct request *rq);
+void elv_rqhash_reposition(struct request_queue *q, struct request *rq);
+struct request *elv_rqhash_find(struct request_queue *q, sector_t offset);
+
 /*
  * each queue has an elevator_queue associated with it
  */
-- 
cgit v1.2.3