From 5a023cdba50c5f5f2bc351783b3131699deb3937 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 30 Nov 2015 10:20:29 -0800 Subject: block: enable dax for raw block devices If an application wants exclusive access to all of the persistent memory provided by an NVDIMM namespace it can use this raw-block-dax facility to forgo establishing a filesystem. This capability is targeted primarily to hypervisors wanting to provision persistent memory for guests. It can be disabled / enabled dynamically via the new BLKDAXSET ioctl. Cc: Jeff Moyer Cc: Christoph Hellwig Cc: Dave Chinner Cc: Andrew Morton Cc: Ross Zwisler Reported-by: kbuild test robot Reviewed-by: Jan Kara Signed-off-by: Dan Williams --- include/linux/fs.h | 11 +++++++++++ include/uapi/linux/fs.h | 2 ++ 2 files changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 3aa514254161..96fabc93b583 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -482,6 +482,9 @@ struct block_device { int bd_fsfreeze_count; /* Mutex for freeze */ struct mutex bd_fsfreeze_mutex; +#ifdef CONFIG_FS_DAX + int bd_map_count; +#endif }; /* @@ -2264,6 +2267,14 @@ extern struct super_block *freeze_bdev(struct block_device *); extern void emergency_thaw_all(void); extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); extern int fsync_bdev(struct block_device *); +#ifdef CONFIG_FS_DAX +extern bool blkdev_dax_capable(struct block_device *bdev); +#else +static inline bool blkdev_dax_capable(struct block_device *bdev) +{ + return false; +} +#endif extern struct super_block *blockdev_superblock; diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index f15d980249b5..401c409e9239 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -152,6 +152,8 @@ struct inodes_stat_t { #define BLKSECDISCARD _IO(0x12,125) #define BLKROTATIONAL _IO(0x12,126) #define BLKZEROOUT _IO(0x12,127) +#define BLKDAXSET _IO(0x12,128) +#define BLKDAXGET _IO(0x12,129) #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ #define FIBMAP _IO(0x00,1) /* bmap access */ -- cgit v1.2.3 From 9e0e252a048b0ba5066f0dc15c3b2468ffe5c422 Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Thu, 24 Dec 2015 19:20:32 -0700 Subject: badblocks: Add core badblock management code Take the core badblocks implementation from md, and make it generally available. This follows the same style as kernel implementations of linked lists, rb-trees etc, where you can have a structure that can be embedded anywhere, and accessor functions to manipulate the data. The only changes in this copy of the code are ones to generalize function/variable names from md-specific ones. Also add init and free functions. Signed-off-by: Vishal Verma Signed-off-by: Dan Williams --- include/linux/badblocks.h | 53 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 include/linux/badblocks.h (limited to 'include') diff --git a/include/linux/badblocks.h b/include/linux/badblocks.h new file mode 100644 index 000000000000..929344630b51 --- /dev/null +++ b/include/linux/badblocks.h @@ -0,0 +1,53 @@ +#ifndef _LINUX_BADBLOCKS_H +#define _LINUX_BADBLOCKS_H + +#include +#include +#include +#include + +#define BB_LEN_MASK (0x00000000000001FFULL) +#define BB_OFFSET_MASK (0x7FFFFFFFFFFFFE00ULL) +#define BB_ACK_MASK (0x8000000000000000ULL) +#define BB_MAX_LEN 512 +#define BB_OFFSET(x) (((x) & BB_OFFSET_MASK) >> 9) +#define BB_LEN(x) (((x) & BB_LEN_MASK) + 1) +#define BB_ACK(x) (!!((x) & BB_ACK_MASK)) +#define BB_MAKE(a, l, ack) (((a)<<9) | ((l)-1) | ((u64)(!!(ack)) << 63)) + +/* Bad block numbers are stored sorted in a single page. + * 64bits is used for each block or extent. + * 54 bits are sector number, 9 bits are extent size, + * 1 bit is an 'acknowledged' flag. + */ +#define MAX_BADBLOCKS (PAGE_SIZE/8) + +struct badblocks { + int count; /* count of bad blocks */ + int unacked_exist; /* there probably are unacknowledged + * bad blocks. This is only cleared + * when a read discovers none + */ + int shift; /* shift from sectors to block size + * a -ve shift means badblocks are + * disabled.*/ + u64 *page; /* badblock list */ + int changed; + seqlock_t lock; + sector_t sector; + sector_t size; /* in sectors */ +}; + +int badblocks_check(struct badblocks *bb, sector_t s, int sectors, + sector_t *first_bad, int *bad_sectors); +int badblocks_set(struct badblocks *bb, sector_t s, int sectors, + int acknowledged); +int badblocks_clear(struct badblocks *bb, sector_t s, int sectors); +void ack_all_badblocks(struct badblocks *bb); +ssize_t badblocks_show(struct badblocks *bb, char *page, int unack); +ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len, + int unack); +int badblocks_init(struct badblocks *bb, int enable); +void badblocks_free(struct badblocks *bb); + +#endif -- cgit v1.2.3 From 99e6608c9e7414ae4f2168df8bf8fae3eb49e41f Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Sat, 9 Jan 2016 08:36:51 -0800 Subject: block: Add badblock management for gendisks NVDIMM devices, which can behave more like DRAM rather than block devices, may develop bad cache lines, or 'poison'. A block device exposed by the pmem driver can then consume poison via a read (or write), and cause a machine check. On platforms without machine check recovery features, this would mean a crash. The block device maintaining a runtime list of all known sectors that have poison can directly avoid this, and also provide a path forward to enable proper handling/recovery for DAX faults on such a device. Use the new badblock management interfaces to add a badblocks list to gendisks. Signed-off-by: Vishal Verma Signed-off-by: Dan Williams --- include/linux/genhd.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 847cc1d91634..0bbec6880051 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -162,6 +162,7 @@ struct disk_part_tbl { }; struct disk_events; +struct badblocks; #if defined(CONFIG_BLK_DEV_INTEGRITY) @@ -213,6 +214,7 @@ struct gendisk { struct kobject integrity_kobj; #endif /* CONFIG_BLK_DEV_INTEGRITY */ int node_id; + struct badblocks *bb; }; static inline struct gendisk *part_to_disk(struct hd_struct *part) @@ -433,6 +435,11 @@ extern void add_disk(struct gendisk *disk); extern void del_gendisk(struct gendisk *gp); extern struct gendisk *get_gendisk(dev_t dev, int *partno); extern struct block_device *bdget_disk(struct gendisk *disk, int partno); +int disk_alloc_badblocks(struct gendisk *disk); +extern int disk_check_badblocks(struct gendisk *disk, sector_t s, int sectors, + sector_t *first_bad, int *bad_sectors); +extern int disk_set_badblocks(struct gendisk *disk, sector_t s, int sectors); +extern int disk_clear_badblocks(struct gendisk *disk, sector_t s, int sectors); extern void set_device_ro(struct block_device *bdev, int flag); extern void set_disk_ro(struct gendisk *disk, int flag); -- cgit v1.2.3 From 0caeef63e6d2f866d85bb507bf63e0ce8ec91cef Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Thu, 24 Dec 2015 19:21:43 -0700 Subject: libnvdimm: Add a poison list and export badblocks During region creation, perform Address Range Scrubs (ARS) for the SPA (System Physical Address) ranges to retrieve known poison locations from firmware. Add a new data structure 'nd_poison' which is used as a list in nvdimm_bus to store these poison locations. When creating a pmem namespace, if there is any known poison associated with its physical address space, convert the poison ranges to bad sectors that are exposed using the badblocks interface. Signed-off-by: Vishal Verma Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 3f021dc5da8c..bed40dff0e86 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -116,6 +116,7 @@ static inline struct nd_blk_region_desc *to_blk_region_desc( } +int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length); struct nvdimm_bus *__nvdimm_bus_register(struct device *parent, struct nvdimm_bus_descriptor *nfit_desc, struct module *module); #define nvdimm_bus_register(parent, desc) \ -- cgit v1.2.3 From d3b407fb3f782bd915db64e266010ea30a2d381e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 6 Jan 2016 12:19:22 -0800 Subject: badblocks: rename badblocks_free to badblocks_exit For symmetry with badblocks_init() make it clear that this path only destroys incremental allocations of a badblocks instance, and does not free the badblocks instance itself. Signed-off-by: Dan Williams --- include/linux/badblocks.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/badblocks.h b/include/linux/badblocks.h index 929344630b51..2d98c026c57f 100644 --- a/include/linux/badblocks.h +++ b/include/linux/badblocks.h @@ -48,6 +48,6 @@ ssize_t badblocks_show(struct badblocks *bb, char *page, int unack); ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len, int unack); int badblocks_init(struct badblocks *bb, int enable); -void badblocks_free(struct badblocks *bb); +void badblocks_exit(struct badblocks *bb); #endif -- cgit v1.2.3 From 16263ff6c72eb4cc00aa287230144dda12ccad12 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 4 Jan 2016 23:50:23 -0800 Subject: block, badblocks: introduce devm_init_badblocks Provide a devres interface for initializing a badblocks instance. The pmem driver has several scenarios where it will be beneficial to have this structure automatically freed when the device is disabled / fails probe. Signed-off-by: Dan Williams --- include/linux/badblocks.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/badblocks.h b/include/linux/badblocks.h index 2d98c026c57f..c3bdf8c59480 100644 --- a/include/linux/badblocks.h +++ b/include/linux/badblocks.h @@ -2,6 +2,7 @@ #define _LINUX_BADBLOCKS_H #include +#include #include #include #include @@ -23,6 +24,7 @@ #define MAX_BADBLOCKS (PAGE_SIZE/8) struct badblocks { + struct device *dev; /* set by devm_init_badblocks */ int count; /* count of bad blocks */ int unacked_exist; /* there probably are unacknowledged * bad blocks. This is only cleared @@ -49,5 +51,15 @@ ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len, int unack); int badblocks_init(struct badblocks *bb, int enable); void badblocks_exit(struct badblocks *bb); - +struct device; +int devm_init_badblocks(struct device *dev, struct badblocks *bb); +static inline void devm_exit_badblocks(struct device *dev, struct badblocks *bb) +{ + if (bb->dev != dev) { + dev_WARN_ONCE(dev, 1, "%s: badblocks instance not associated\n", + __func__); + return; + } + badblocks_exit(bb); +} #endif -- cgit v1.2.3 From 55f5560d8c18fe33fc169f8d244a9247dcac7612 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 5 Jan 2016 00:28:18 -0800 Subject: block: kill disk_{check|set|clear|alloc}_badblocks These actions are completely managed by a block driver or can use the badblocks api directly. Signed-off-by: Dan Williams --- include/linux/genhd.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 0bbec6880051..5c706765404a 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -435,11 +435,6 @@ extern void add_disk(struct gendisk *disk); extern void del_gendisk(struct gendisk *gp); extern struct gendisk *get_gendisk(dev_t dev, int *partno); extern struct block_device *bdget_disk(struct gendisk *disk, int partno); -int disk_alloc_badblocks(struct gendisk *disk); -extern int disk_check_badblocks(struct gendisk *disk, sector_t s, int sectors, - sector_t *first_bad, int *bad_sectors); -extern int disk_set_badblocks(struct gendisk *disk, sector_t s, int sectors); -extern int disk_clear_badblocks(struct gendisk *disk, sector_t s, int sectors); extern void set_device_ro(struct block_device *bdev, int flag); extern void set_disk_ro(struct gendisk *disk, int flag); -- cgit v1.2.3