summaryrefslogtreecommitdiffstats
path: root/drivers/nvdimm
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvdimm')
-rw-r--r--drivers/nvdimm/Kconfig23
-rw-r--r--drivers/nvdimm/Makefile5
-rw-r--r--drivers/nvdimm/btt.c50
-rw-r--r--drivers/nvdimm/btt.h3
-rw-r--r--drivers/nvdimm/btt_devs.c215
-rw-r--r--drivers/nvdimm/claim.c201
-rw-r--r--drivers/nvdimm/dimm_devs.c5
-rw-r--r--drivers/nvdimm/e820.c87
-rw-r--r--drivers/nvdimm/namespace_devs.c89
-rw-r--r--drivers/nvdimm/nd-core.h9
-rw-r--r--drivers/nvdimm/nd.h67
-rw-r--r--drivers/nvdimm/pfn.h35
-rw-r--r--drivers/nvdimm/pfn_devs.c337
-rw-r--r--drivers/nvdimm/pmem.c245
-rw-r--r--drivers/nvdimm/region.c2
-rw-r--r--drivers/nvdimm/region_devs.c20
16 files changed, 1133 insertions, 260 deletions
diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig
index 72226acb5c0f..53c11621d5b1 100644
--- a/drivers/nvdimm/Kconfig
+++ b/drivers/nvdimm/Kconfig
@@ -21,6 +21,7 @@ config BLK_DEV_PMEM
default LIBNVDIMM
depends on HAS_IOMEM
select ND_BTT if BTT
+ select ND_PFN if NVDIMM_PFN
help
Memory ranges for PMEM are described by either an NFIT
(NVDIMM Firmware Interface Table, see CONFIG_NFIT_ACPI), a
@@ -47,12 +48,16 @@ config ND_BLK
(CONFIG_ACPI_NFIT), or otherwise exposes BLK-mode
capabilities.
+config ND_CLAIM
+ bool
+
config ND_BTT
tristate
config BTT
bool "BTT: Block Translation Table (atomic sector updates)"
default y if LIBNVDIMM
+ select ND_CLAIM
help
The Block Translation Table (BTT) provides atomic sector
update semantics for persistent memory devices, so that
@@ -65,4 +70,22 @@ config BTT
Select Y if unsure
+config ND_PFN
+ tristate
+
+config NVDIMM_PFN
+ bool "PFN: Map persistent (device) memory"
+ default LIBNVDIMM
+ depends on ZONE_DEVICE
+ select ND_CLAIM
+ help
+ Map persistent memory, i.e. advertise it to the memory
+ management sub-system. By default persistent memory does
+ not support direct I/O, RDMA, or any other usage that
+ requires a 'struct page' to mediate an I/O request. This
+ driver allocates and initializes the infrastructure needed
+ to support those use cases.
+
+ Select Y if unsure
+
endif
diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile
index 594bb97c867a..ea84d3c4e8e5 100644
--- a/drivers/nvdimm/Makefile
+++ b/drivers/nvdimm/Makefile
@@ -2,6 +2,7 @@ obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o
obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o
obj-$(CONFIG_ND_BTT) += nd_btt.o
obj-$(CONFIG_ND_BLK) += nd_blk.o
+obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o
nd_pmem-y := pmem.o
@@ -9,6 +10,8 @@ nd_btt-y := btt.o
nd_blk-y := blk.o
+nd_e820-y := e820.o
+
libnvdimm-y := core.o
libnvdimm-y += bus.o
libnvdimm-y += dimm_devs.o
@@ -17,4 +20,6 @@ libnvdimm-y += region_devs.o
libnvdimm-y += region.o
libnvdimm-y += namespace_devs.o
libnvdimm-y += label.o
+libnvdimm-$(CONFIG_ND_CLAIM) += claim.o
libnvdimm-$(CONFIG_BTT) += btt_devs.o
+libnvdimm-$(CONFIG_NVDIMM_PFN) += pfn_devs.o
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 341202ed32b4..254239746020 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -583,33 +583,6 @@ static void free_arenas(struct btt *btt)
}
/*
- * This function checks if the metadata layout is valid and error free
- */
-static int arena_is_valid(struct arena_info *arena, struct btt_sb *super,
- u8 *uuid, u32 lbasize)
-{
- u64 checksum;
-
- if (memcmp(super->uuid, uuid, 16))
- return 0;
-
- checksum = le64_to_cpu(super->checksum);
- super->checksum = 0;
- if (checksum != nd_btt_sb_checksum(super))
- return 0;
- super->checksum = cpu_to_le64(checksum);
-
- if (lbasize != le32_to_cpu(super->external_lbasize))
- return 0;
-
- /* TODO: figure out action for this */
- if ((le32_to_cpu(super->flags) & IB_FLAG_ERROR_MASK) != 0)
- dev_info(to_dev(arena), "Found arena with an error flag\n");
-
- return 1;
-}
-
-/*
* This function reads an existing valid btt superblock and
* populates the corresponding arena_info struct
*/
@@ -632,8 +605,9 @@ static void parse_arena_meta(struct arena_info *arena, struct btt_sb *super,
arena->logoff = arena_off + le64_to_cpu(super->logoff);
arena->info2off = arena_off + le64_to_cpu(super->info2off);
- arena->size = (super->nextoff > 0) ? (le64_to_cpu(super->nextoff)) :
- (arena->info2off - arena->infooff + BTT_PG_SIZE);
+ arena->size = (le64_to_cpu(super->nextoff) > 0)
+ ? (le64_to_cpu(super->nextoff))
+ : (arena->info2off - arena->infooff + BTT_PG_SIZE);
arena->flags = le32_to_cpu(super->flags);
}
@@ -665,8 +639,7 @@ static int discover_arenas(struct btt *btt)
if (ret)
goto out;
- if (!arena_is_valid(arena, super, btt->nd_btt->uuid,
- btt->lbasize)) {
+ if (!nd_btt_arena_is_valid(btt->nd_btt, super)) {
if (remaining == btt->rawsize) {
btt->init_state = INIT_NOTFOUND;
dev_info(to_dev(arena), "No existing arenas\n");
@@ -755,10 +728,13 @@ static int create_arenas(struct btt *btt)
* It is only called for an uninitialized arena when a write
* to that arena occurs for the first time.
*/
-static int btt_arena_write_layout(struct arena_info *arena, u8 *uuid)
+static int btt_arena_write_layout(struct arena_info *arena)
{
int ret;
+ u64 sum;
struct btt_sb *super;
+ struct nd_btt *nd_btt = arena->nd_btt;
+ const u8 *parent_uuid = nd_dev_to_uuid(&nd_btt->ndns->dev);
ret = btt_map_init(arena);
if (ret)
@@ -773,7 +749,8 @@ static int btt_arena_write_layout(struct arena_info *arena, u8 *uuid)
return -ENOMEM;
strncpy(super->signature, BTT_SIG, BTT_SIG_LEN);
- memcpy(super->uuid, uuid, 16);
+ memcpy(super->uuid, nd_btt->uuid, 16);
+ memcpy(super->parent_uuid, parent_uuid, 16);
super->flags = cpu_to_le32(arena->flags);
super->version_major = cpu_to_le16(arena->version_major);
super->version_minor = cpu_to_le16(arena->version_minor);
@@ -794,7 +771,8 @@ static int btt_arena_write_layout(struct arena_info *arena, u8 *uuid)
super->info2off = cpu_to_le64(arena->info2off - arena->infooff);
super->flags = 0;
- super->checksum = cpu_to_le64(nd_btt_sb_checksum(super));
+ sum = nd_sb_checksum((struct nd_gen_sb *) super);
+ super->checksum = cpu_to_le64(sum);
ret = btt_info_write(arena, super);
@@ -813,7 +791,7 @@ static int btt_meta_init(struct btt *btt)
mutex_lock(&btt->init_lock);
list_for_each_entry(arena, &btt->arena_list, list) {
- ret = btt_arena_write_layout(arena, btt->nd_btt->uuid);
+ ret = btt_arena_write_layout(arena);
if (ret)
goto unlock;
@@ -1447,8 +1425,6 @@ static int __init nd_btt_init(void)
{
int rc;
- BUILD_BUG_ON(sizeof(struct btt_sb) != SZ_4K);
-
btt_major = register_blkdev(0, "btt");
if (btt_major < 0)
return btt_major;
diff --git a/drivers/nvdimm/btt.h b/drivers/nvdimm/btt.h
index 75b0d80a6bd9..b2f8651e5395 100644
--- a/drivers/nvdimm/btt.h
+++ b/drivers/nvdimm/btt.h
@@ -182,4 +182,7 @@ struct btt {
int init_state;
int num_arenas;
};
+
+bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super);
+
#endif
diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c
index 6ac8c0fea3ec..59ad54a63d9f 100644
--- a/drivers/nvdimm/btt_devs.c
+++ b/drivers/nvdimm/btt_devs.c
@@ -21,63 +21,13 @@
#include "btt.h"
#include "nd.h"
-static void __nd_btt_detach_ndns(struct nd_btt *nd_btt)
-{
- struct nd_namespace_common *ndns = nd_btt->ndns;
-
- dev_WARN_ONCE(&nd_btt->dev, !mutex_is_locked(&ndns->dev.mutex)
- || ndns->claim != &nd_btt->dev,
- "%s: invalid claim\n", __func__);
- ndns->claim = NULL;
- nd_btt->ndns = NULL;
- put_device(&ndns->dev);
-}
-
-static void nd_btt_detach_ndns(struct nd_btt *nd_btt)
-{
- struct nd_namespace_common *ndns = nd_btt->ndns;
-
- if (!ndns)
- return;
- get_device(&ndns->dev);
- device_lock(&ndns->dev);
- __nd_btt_detach_ndns(nd_btt);
- device_unlock(&ndns->dev);
- put_device(&ndns->dev);
-}
-
-static bool __nd_btt_attach_ndns(struct nd_btt *nd_btt,
- struct nd_namespace_common *ndns)
-{
- if (ndns->claim)
- return false;
- dev_WARN_ONCE(&nd_btt->dev, !mutex_is_locked(&ndns->dev.mutex)
- || nd_btt->ndns,
- "%s: invalid claim\n", __func__);
- ndns->claim = &nd_btt->dev;
- nd_btt->ndns = ndns;
- get_device(&ndns->dev);
- return true;
-}
-
-static bool nd_btt_attach_ndns(struct nd_btt *nd_btt,
- struct nd_namespace_common *ndns)
-{
- bool claimed;
-
- device_lock(&ndns->dev);
- claimed = __nd_btt_attach_ndns(nd_btt, ndns);
- device_unlock(&ndns->dev);
- return claimed;
-}
-
static void nd_btt_release(struct device *dev)
{
struct nd_region *nd_region = to_nd_region(dev->parent);
struct nd_btt *nd_btt = to_nd_btt(dev);
dev_dbg(dev, "%s\n", __func__);
- nd_btt_detach_ndns(nd_btt);
+ nd_detach_ndns(&nd_btt->dev, &nd_btt->ndns);
ida_simple_remove(&nd_region->btt_ida, nd_btt->id);
kfree(nd_btt->uuid);
kfree(nd_btt);
@@ -172,104 +122,15 @@ static ssize_t namespace_show(struct device *dev,
return rc;
}
-static int namespace_match(struct device *dev, void *data)
-{
- char *name = data;
-
- return strcmp(name, dev_name(dev)) == 0;
-}
-
-static bool is_nd_btt_idle(struct device *dev)
-{
- struct nd_region *nd_region = to_nd_region(dev->parent);
- struct nd_btt *nd_btt = to_nd_btt(dev);
-
- if (nd_region->btt_seed == dev || nd_btt->ndns || dev->driver)
- return false;
- return true;
-}
-
-static ssize_t __namespace_store(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t len)
-{
- struct nd_btt *nd_btt = to_nd_btt(dev);
- struct nd_namespace_common *ndns;
- struct device *found;
- char *name;
-
- if (dev->driver) {
- dev_dbg(dev, "%s: -EBUSY\n", __func__);
- return -EBUSY;
- }
-
- name = kstrndup(buf, len, GFP_KERNEL);
- if (!name)
- return -ENOMEM;
- strim(name);
-
- if (strncmp(name, "namespace", 9) == 0 || strcmp(name, "") == 0)
- /* pass */;
- else {
- len = -EINVAL;
- goto out;
- }
-
- ndns = nd_btt->ndns;
- if (strcmp(name, "") == 0) {
- /* detach the namespace and destroy / reset the btt device */
- nd_btt_detach_ndns(nd_btt);
- if (is_nd_btt_idle(dev))
- nd_device_unregister(dev, ND_ASYNC);
- else {
- nd_btt->lbasize = 0;
- kfree(nd_btt->uuid);
- nd_btt->uuid = NULL;
- }
- goto out;
- } else if (ndns) {
- dev_dbg(dev, "namespace already set to: %s\n",
- dev_name(&ndns->dev));
- len = -EBUSY;
- goto out;
- }
-
- found = device_find_child(dev->parent, name, namespace_match);
- if (!found) {
- dev_dbg(dev, "'%s' not found under %s\n", name,
- dev_name(dev->parent));
- len = -ENODEV;
- goto out;
- }
-
- ndns = to_ndns(found);
- if (__nvdimm_namespace_capacity(ndns) < SZ_16M) {
- dev_dbg(dev, "%s too small to host btt\n", name);
- len = -ENXIO;
- goto out_attach;
- }
-
- WARN_ON_ONCE(!is_nvdimm_bus_locked(&nd_btt->dev));
- if (!nd_btt_attach_ndns(nd_btt, ndns)) {
- dev_dbg(dev, "%s already claimed\n",
- dev_name(&ndns->dev));
- len = -EBUSY;
- }
-
- out_attach:
- put_device(&ndns->dev); /* from device_find_child */
- out:
- kfree(name);
- return len;
-}
-
static ssize_t namespace_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
{
+ struct nd_btt *nd_btt = to_nd_btt(dev);
ssize_t rc;
nvdimm_bus_lock(dev);
device_lock(dev);
- rc = __namespace_store(dev, attr, buf, len);
+ rc = nd_namespace_store(dev, &nd_btt->ndns, buf, len);
dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
rc, buf, buf[len - 1] == '\n' ? "" : "\n");
device_unlock(dev);
@@ -324,7 +185,7 @@ static struct device *__nd_btt_create(struct nd_region *nd_region,
dev->type = &nd_btt_device_type;
dev->groups = nd_btt_attribute_groups;
device_initialize(&nd_btt->dev);
- if (ndns && !__nd_btt_attach_ndns(nd_btt, ndns)) {
+ if (ndns && !__nd_attach_ndns(&nd_btt->dev, ndns, &nd_btt->ndns)) {
dev_dbg(&ndns->dev, "%s failed, already claimed by %s\n",
__func__, dev_name(ndns->claim));
put_device(dev);
@@ -342,30 +203,54 @@ struct device *nd_btt_create(struct nd_region *nd_region)
return dev;
}
-/*
- * nd_btt_sb_checksum: compute checksum for btt info block
+static bool uuid_is_null(u8 *uuid)
+{
+ static const u8 null_uuid[16];
+
+ return (memcmp(uuid, null_uuid, 16) == 0);
+}
+
+/**
+ * nd_btt_arena_is_valid - check if the metadata layout is valid
+ * @nd_btt: device with BTT geometry and backing device info
+ * @super: pointer to the arena's info block being tested
+ *
+ * Check consistency of the btt info block with itself by validating
+ * the checksum, and with the parent namespace by verifying the
+ * parent_uuid contained in the info block with the one supplied in.
*
- * Returns a fletcher64 checksum of everything in the given info block
- * except the last field (since that's where the checksum lives).
+ * Returns:
+ * false for an invalid info block, true for a valid one
*/
-u64 nd_btt_sb_checksum(struct btt_sb *btt_sb)
+bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super)
{
- u64 sum;
- __le64 sum_save;
-
- sum_save = btt_sb->checksum;
- btt_sb->checksum = 0;
- sum = nd_fletcher64(btt_sb, sizeof(*btt_sb), 1);
- btt_sb->checksum = sum_save;
- return sum;
+ const u8 *parent_uuid = nd_dev_to_uuid(&nd_btt->ndns->dev);
+ u64 checksum;
+
+ if (memcmp(super->signature, BTT_SIG, BTT_SIG_LEN) != 0)
+ return false;
+
+ if (!uuid_is_null(super->parent_uuid))
+ if (memcmp(super->parent_uuid, parent_uuid, 16) != 0)
+ return false;
+
+ checksum = le64_to_cpu(super->checksum);
+ super->checksum = 0;
+ if (checksum != nd_sb_checksum((struct nd_gen_sb *) super))
+ return false;
+ super->checksum = cpu_to_le64(checksum);
+
+ /* TODO: figure out action for this */
+ if ((le32_to_cpu(super->flags) & IB_FLAG_ERROR_MASK) != 0)
+ dev_info(&nd_btt->dev, "Found arena with an error flag\n");
+
+ return true;
}
-EXPORT_SYMBOL(nd_btt_sb_checksum);
+EXPORT_SYMBOL(nd_btt_arena_is_valid);
static int __nd_btt_probe(struct nd_btt *nd_btt,
struct nd_namespace_common *ndns, struct btt_sb *btt_sb)
{
- u64 checksum;
-
if (!btt_sb || !ndns || !nd_btt)
return -ENODEV;
@@ -375,14 +260,8 @@ static int __nd_btt_probe(struct nd_btt *nd_btt,
if (nvdimm_namespace_capacity(ndns) < SZ_16M)
return -ENXIO;
- if (memcmp(btt_sb->signature, BTT_SIG, BTT_SIG_LEN) != 0)
- return -ENODEV;
-
- checksum = le64_to_cpu(btt_sb->checksum);
- btt_sb->checksum = 0;
- if (checksum != nd_btt_sb_checksum(btt_sb))
+ if (!nd_btt_arena_is_valid(nd_btt, btt_sb))
return -ENODEV;
- btt_sb->checksum = cpu_to_le64(checksum);
nd_btt->lbasize = le32_to_cpu(btt_sb->external_lbasize);
nd_btt->uuid = kmemdup(btt_sb->uuid, 16, GFP_KERNEL);
@@ -416,7 +295,9 @@ int nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata)
dev_dbg(&ndns->dev, "%s: btt: %s\n", __func__,
rc == 0 ? dev_name(dev) : "<none>");
if (rc < 0) {
- __nd_btt_detach_ndns(to_nd_btt(dev));
+ struct nd_btt *nd_btt = to_nd_btt(dev);
+
+ __nd_detach_ndns(dev, &nd_btt->ndns);
put_device(dev);
}
diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
new file mode 100644
index 000000000000..e8f03b0e95e4
--- /dev/null
+++ b/drivers/nvdimm/claim.c
@@ -0,0 +1,201 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#include <linux/device.h>
+#include <linux/sizes.h>
+#include "nd-core.h"
+#include "pfn.h"
+#include "btt.h"
+#include "nd.h"
+
+void __nd_detach_ndns(struct device *dev, struct nd_namespace_common **_ndns)
+{
+ struct nd_namespace_common *ndns = *_ndns;
+
+ dev_WARN_ONCE(dev, !mutex_is_locked(&ndns->dev.mutex)
+ || ndns->claim != dev,
+ "%s: invalid claim\n", __func__);
+ ndns->claim = NULL;
+ *_ndns = NULL;
+ put_device(&ndns->dev);
+}
+
+void nd_detach_ndns(struct device *dev,
+ struct nd_namespace_common **_ndns)
+{
+ struct nd_namespace_common *ndns = *_ndns;
+
+ if (!ndns)
+ return;
+ get_device(&ndns->dev);
+ device_lock(&ndns->dev);
+ __nd_detach_ndns(dev, _ndns);
+ device_unlock(&ndns->dev);
+ put_device(&ndns->dev);
+}
+
+bool __nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach,
+ struct nd_namespace_common **_ndns)
+{
+ if (attach->claim)
+ return false;
+ dev_WARN_ONCE(dev, !mutex_is_locked(&attach->dev.mutex)
+ || *_ndns,
+ "%s: invalid claim\n", __func__);
+ attach->claim = dev;
+ *_ndns = attach;
+ get_device(&attach->dev);
+ return true;
+}
+
+bool nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach,
+ struct nd_namespace_common **_ndns)
+{
+ bool claimed;
+
+ device_lock(&attach->dev);
+ claimed = __nd_attach_ndns(dev, attach, _ndns);
+ device_unlock(&attach->dev);
+ return claimed;
+}
+
+static int namespace_match(struct device *dev, void *data)
+{
+ char *name = data;
+
+ return strcmp(name, dev_name(dev)) == 0;
+}
+
+static bool is_idle(struct device *dev, struct nd_namespace_common *ndns)
+{
+ struct nd_region *nd_region = to_nd_region(dev->parent);
+ struct device *seed = NULL;
+
+ if (is_nd_btt(dev))
+ seed = nd_region->btt_seed;
+ else if (is_nd_pfn(dev))
+ seed = nd_region->pfn_seed;
+
+ if (seed == dev || ndns || dev->driver)
+ return false;
+ return true;
+}
+
+static void nd_detach_and_reset(struct device *dev,
+ struct nd_namespace_common **_ndns)
+{
+ /* detach the namespace and destroy / reset the device */
+ nd_detach_ndns(dev, _ndns);
+ if (is_idle(dev, *_ndns)) {
+ nd_device_unregister(dev, ND_ASYNC);
+ } else if (is_nd_btt(dev)) {
+ struct nd_btt *nd_btt = to_nd_btt(dev);
+
+ nd_btt->lbasize = 0;
+ kfree(nd_btt->uuid);
+ nd_btt->uuid = NULL;
+ } else if (is_nd_pfn(dev)) {
+ struct nd_pfn *nd_pfn = to_nd_pfn(dev);
+
+ kfree(nd_pfn->uuid);
+ nd_pfn->uuid = NULL;
+ nd_pfn->mode = PFN_MODE_NONE;
+ }
+}
+
+ssize_t nd_namespace_store(struct device *dev,
+ struct nd_namespace_common **_ndns, const char *buf,
+ size_t len)
+{
+ struct nd_namespace_common *ndns;
+ struct device *found;
+ char *name;
+
+ if (dev->driver) {
+ dev_dbg(dev, "%s: -EBUSY\n", __func__);
+ return -EBUSY;
+ }
+
+ name = kstrndup(buf, len, GFP_KERNEL);
+ if (!name)
+ return -ENOMEM;
+ strim(name);
+
+ if (strncmp(name, "namespace", 9) == 0 || strcmp(name, "") == 0)
+ /* pass */;
+ else {
+ len = -EINVAL;
+ goto out;
+ }
+
+ ndns = *_ndns;
+ if (strcmp(name, "") == 0) {
+ nd_detach_and_reset(dev, _ndns);
+ goto out;
+ } else if (ndns) {
+ dev_dbg(dev, "namespace already set to: %s\n",
+ dev_name(&ndns->dev));
+ len = -EBUSY;
+ goto out;
+ }
+
+ found = device_find_child(dev->parent, name, namespace_match);
+ if (!found) {
+ dev_dbg(dev, "'%s' not found under %s\n", name,
+ dev_name(dev->parent));
+ len = -ENODEV;
+ goto out;
+ }
+
+ ndns = to_ndns(found);
+ if (__nvdimm_namespace_capacity(ndns) < SZ_16M) {
+ dev_dbg(dev, "%s too small to host\n", name);
+ len = -ENXIO;
+ goto out_attach;
+ }
+
+ WARN_ON_ONCE(!is_nvdimm_bus_locked(dev));
+ if (!nd_attach_ndns(dev, ndns, _ndns)) {
+ dev_dbg(dev, "%s already claimed\n",
+ dev_name(&ndns->dev));
+ len = -EBUSY;
+ }
+
+ out_attach:
+ put_device(&ndns->dev); /* from device_find_child */
+ out:
+ kfree(name);
+ return len;
+}
+
+/*
+ * nd_sb_checksum: compute checksum for a generic info block
+ *
+ * Returns a fletcher64 checksum of everything in the given info block
+ * except the last field (since that's where the checksum lives).
+ */
+u64 nd_sb_checksum(struct nd_gen_sb *nd_gen_sb)
+{
+ u64 sum;
+ __le64 sum_save;
+
+ BUILD_BUG_ON(sizeof(struct btt_sb) != SZ_4K);
+ BUILD_BUG_ON(sizeof(struct nd_pfn_sb) != SZ_4K);
+ BUILD_BUG_ON(sizeof(struct nd_gen_sb) != SZ_4K);
+
+ sum_save = nd_gen_sb->checksum;
+ nd_gen_sb->checksum = 0;
+ sum = nd_fletcher64(nd_gen_sb, sizeof(*nd_gen_sb), 1);
+ nd_gen_sb->checksum = sum_save;
+ return sum;
+}
+EXPORT_SYMBOL(nd_sb_checksum);
diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c
index c05eb807d674..651b8d19d324 100644
--- a/drivers/nvdimm/dimm_devs.c
+++ b/drivers/nvdimm/dimm_devs.c
@@ -241,10 +241,7 @@ void nvdimm_drvdata_release(struct kref *kref)
nvdimm_free_dpa(ndd, res);
nvdimm_bus_unlock(dev);
- if (ndd->data && is_vmalloc_addr(ndd->data))
- vfree(ndd->data);
- else
- kfree(ndd->data);
+ kvfree(ndd->data);
kfree(ndd);
put_device(dev);
}
diff --git a/drivers/nvdimm/e820.c b/drivers/nvdimm/e820.c
new file mode 100644
index 000000000000..8282db2ef99e
--- /dev/null
+++ b/drivers/nvdimm/e820.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2015, Christoph Hellwig.
+ * Copyright (c) 2015, Intel Corporation.
+ */
+#include <linux/platform_device.h>
+#include <linux/libnvdimm.h>
+#include <linux/module.h>
+
+static const struct attribute_group *e820_pmem_attribute_groups[] = {
+ &nvdimm_bus_attribute_group,
+ NULL,
+};
+
+static const struct attribute_group *e820_pmem_region_attribute_groups[] = {
+ &nd_region_attribute_group,
+ &nd_device_attribute_group,
+ NULL,
+};
+
+static int e820_pmem_remove(struct platform_device *pdev)
+{
+ struct nvdimm_bus *nvdimm_bus = platform_get_drvdata(pdev);
+
+ nvdimm_bus_unregister(nvdimm_bus);
+ return 0;
+}
+
+static int e820_pmem_probe(struct platform_device *pdev)
+{
+ static struct nvdimm_bus_descriptor nd_desc;
+ struct device *dev = &pdev->dev;
+ struct nvdimm_bus *nvdimm_bus;
+ struct resource *p;
+
+ nd_desc.attr_groups = e820_pmem_attribute_groups;
+ nd_desc.provider_name = "e820";
+ nvdimm_bus = nvdimm_bus_register(dev, &nd_desc);
+ if (!nvdimm_bus)
+ goto err;
+ platform_set_drvdata(pdev, nvdimm_bus);
+
+ for (p = iomem_resource.child; p ; p = p->sibling) {
+ struct nd_region_desc ndr_desc;
+
+ if (strncmp(p->name, "Persistent Memory (legacy)", 26) != 0)
+ continue;
+
+ memset(&ndr_desc, 0, sizeof(ndr_desc));
+ ndr_desc.res = p;
+ ndr_desc.attr_groups = e820_pmem_region_attribute_groups;
+ ndr_desc.numa_node = NUMA_NO_NODE;
+ set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
+ if (!nvdimm_pmem_region_create(nvdimm_bus, &ndr_desc))
+ goto err;
+ }
+
+ return 0;
+
+ err:
+ nvdimm_bus_unregister(nvdimm_bus);
+ dev_err(dev, "failed to register legacy persistent memory ranges\n");
+ return -ENXIO;
+}
+
+static struct platform_driver e820_pmem_driver = {
+ .probe = e820_pmem_probe,
+ .remove = e820_pmem_remove,
+ .driver = {
+ .name = "e820_pmem",
+ },
+};
+
+static __init int e820_pmem_init(void)
+{
+ return platform_driver_register(&e820_pmem_driver);
+}
+
+static __exit void e820_pmem_exit(void)
+{
+ platform_driver_unregister(&e820_pmem_driver);
+}
+
+MODULE_ALIAS("platform:e820_pmem*");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Intel Corporation");
+module_init(e820_pmem_init);
+module_exit(e820_pmem_exit);
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index fef0dd80d4ad..0955b2cb10fe 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -13,6 +13,7 @@
#include <linux/module.h>
#include <linux/device.h>
#include <linux/slab.h>
+#include <linux/pmem.h>
#include <linux/nd.h>
#include "nd-core.h"
#include "nd.h"
@@ -76,22 +77,54 @@ static bool is_namespace_io(struct device *dev)
return dev ? dev->type == &namespace_io_device_type : false;
}
+bool pmem_should_map_pages(struct device *dev)
+{
+ struct nd_region *nd_region = to_nd_region(dev->parent);
+
+ if (!IS_ENABLED(CONFIG_ZONE_DEVICE))
+ return false;
+
+ if (!test_bit(ND_REGION_PAGEMAP, &nd_region->flags))
+ return false;
+
+ if (is_nd_pfn(dev) || is_nd_btt(dev))
+ return false;
+
+#ifdef ARCH_MEMREMAP_PMEM
+ return ARCH_MEMREMAP_PMEM == MEMREMAP_WB;
+#else
+ return false;
+#endif
+}
+EXPORT_SYMBOL(pmem_should_map_pages);
+
const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns,
char *name)
{
struct nd_region *nd_region = to_nd_region(ndns->dev.parent);
- const char *suffix = "";
+ const char *suffix = NULL;
- if (ndns->claim && is_nd_btt(ndns->claim))
- suffix = "s";
+ if (ndns->claim) {
+ if (is_nd_btt(ndns->claim))
+ suffix = "s";
+ else if (is_nd_pfn(ndns->claim))
+ suffix = "m";
+ else
+ dev_WARN_ONCE(&ndns->dev, 1,
+ "unknown claim type by %s\n",
+ dev_name(ndns->claim));
+ }
- if (is_namespace_pmem(&ndns->dev) || is_namespace_io(&ndns->dev))
- sprintf(name, "pmem%d%s", nd_region->id, suffix);
- else if (is_namespace_blk(&ndns->dev)) {
+ if (is_namespace_pmem(&ndns->dev) || is_namespace_io(&ndns->dev)) {
+ if (!suffix && pmem_should_map_pages(&ndns->dev))
+ suffix = "m";
+ sprintf(name, "pmem%d%s", nd_region->id, suffix ? suffix : "");
+ } else if (is_namespace_blk(&ndns->dev)) {
struct nd_namespace_blk *nsblk;
nsblk = to_nd_namespace_blk(&ndns->dev);
- sprintf(name, "ndblk%d.%d%s", nd_region->id, nsblk->id, suffix);
+ sprintf(name, "ndblk%d.%d%s", nd_region->id, nsblk->id,
+ suffix ? suffix : "");
} else {
return NULL;
}
@@ -100,6 +133,26 @@ const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns,
}
EXPORT_SYMBOL(nvdimm_namespace_disk_name);
+const u8 *nd_dev_to_uuid(struct device *dev)
+{
+ static const u8 null_uuid[16];
+
+ if (!dev)
+ return null_uuid;
+
+ if (is_namespace_pmem(dev)) {
+ struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+ return nspm->uuid;
+ } else if (is_namespace_blk(dev)) {
+ struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
+
+ return nsblk->uuid;
+ } else
+ return null_uuid;
+}
+EXPORT_SYMBOL(nd_dev_to_uuid);
+
static ssize_t nstype_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -1235,12 +1288,22 @@ static const struct attribute_group *nd_namespace_attribute_groups[] = {
struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev)
{
struct nd_btt *nd_btt = is_nd_btt(dev) ? to_nd_btt(dev) : NULL;
+ struct nd_pfn *nd_pfn = is_nd_pfn(dev) ? to_nd_pfn(dev) : NULL;
struct nd_namespace_common *ndns;
resource_size_t size;
- if (nd_btt) {
- ndns = nd_btt->ndns;
- if (!ndns)
+ if (nd_btt || nd_pfn) {
+ struct device *host = NULL;
+
+ if (nd_btt) {
+ host = &nd_btt->dev;
+ ndns = nd_btt->ndns;
+ } else if (nd_pfn) {
+ host = &nd_pfn->dev;
+ ndns = nd_pfn->ndns;
+ }
+
+ if (!ndns || !host)
return ERR_PTR(-ENODEV);
/*
@@ -1251,12 +1314,12 @@ struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev)
device_unlock(&ndns->dev);
if (ndns->dev.driver) {
dev_dbg(&ndns->dev, "is active, can't bind %s\n",
- dev_name(&nd_btt->dev));
+ dev_name(host));
return ERR_PTR(-EBUSY);
}
- if (dev_WARN_ONCE(&ndns->dev, ndns->claim != &nd_btt->dev,
+ if (dev_WARN_ONCE(&ndns->dev, ndns->claim != host,
"host (%s) vs claim (%s) mismatch\n",
- dev_name(&nd_btt->dev),
+ dev_name(host),
dev_name(ndns->claim)))
return ERR_PTR(-ENXIO);
} else {
diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h
index e1970c71ad1c..159aed532042 100644
--- a/drivers/nvdimm/nd-core.h
+++ b/drivers/nvdimm/nd-core.h
@@ -80,4 +80,13 @@ struct resource *nsblk_add_resource(struct nd_region *nd_region,
int nvdimm_num_label_slots(struct nvdimm_drvdata *ndd);
void get_ndd(struct nvdimm_drvdata *ndd);
resource_size_t __nvdimm_namespace_capacity(struct nd_namespace_common *ndns);
+void nd_detach_ndns(struct device *dev, struct nd_namespace_common **_ndns);
+void __nd_detach_ndns(struct device *dev, struct nd_namespace_common **_ndns);
+bool nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach,
+ struct nd_namespace_common **_ndns);
+bool __nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach,
+ struct nd_namespace_common **_ndns);
+ssize_t nd_namespace_store(struct device *dev,
+ struct nd_namespace_common **_ndns, const char *buf,
+ size_t len);
#endif /* __ND_CORE_H__ */
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index c41f53e74277..417e521d299c 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -29,6 +29,13 @@ enum {
ND_MAX_LANES = 256,
SECTOR_SHIFT = 9,
INT_LBASIZE_ALIGNMENT = 64,
+#if IS_ENABLED(CONFIG_NVDIMM_PFN)
+ ND_PFN_ALIGN = PAGES_PER_SECTION * PAGE_SIZE,
+ ND_PFN_MASK = ND_PFN_ALIGN - 1,
+#else
+ ND_PFN_ALIGN = 0,
+ ND_PFN_MASK = 0,
+#endif
};
struct nvdimm_drvdata {
@@ -92,8 +99,11 @@ struct nd_region {
struct device dev;
struct ida ns_ida;
struct ida btt_ida;
+ struct ida pfn_ida;
+ unsigned long flags;
struct device *ns_seed;
struct device *btt_seed;
+ struct device *pfn_seed;
u16 ndr_mappings;
u64 ndr_size;
u64 ndr_start;
@@ -133,6 +143,22 @@ struct nd_btt {
int id;
};
+enum nd_pfn_mode {
+ PFN_MODE_NONE,
+ PFN_MODE_RAM,
+ PFN_MODE_PMEM,
+};
+
+struct nd_pfn {
+ int id;
+ u8 *uuid;
+ struct device dev;
+ unsigned long npfns;
+ enum nd_pfn_mode mode;
+ struct nd_pfn_sb *pfn_sb;
+ struct nd_namespace_common *ndns;
+};
+
enum nd_async_mode {
ND_SYNC,
ND_ASYNC,
@@ -159,14 +185,19 @@ int nvdimm_init_config_data(struct nvdimm_drvdata *ndd);
int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset,
void *buf, size_t len);
struct nd_btt *to_nd_btt(struct device *dev);
-struct btt_sb;
-u64 nd_btt_sb_checksum(struct btt_sb *btt_sb);
+
+struct nd_gen_sb {
+ char reserved[SZ_4K - 8];
+ __le64 checksum;
+};
+
+u64 nd_sb_checksum(struct nd_gen_sb *sb);
#if IS_ENABLED(CONFIG_BTT)
int nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata);
bool is_nd_btt(struct device *dev);
struct device *nd_btt_create(struct nd_region *nd_region);
#else
-static inline nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata)
+static inline int nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata)
{
return -ENODEV;
}
@@ -180,8 +211,36 @@ static inline struct device *nd_btt_create(struct nd_region *nd_region)
{
return NULL;
}
+#endif
+struct nd_pfn *to_nd_pfn(struct device *dev);
+#if IS_ENABLED(CONFIG_NVDIMM_PFN)
+int nd_pfn_probe(struct nd_namespace_common *ndns, void *drvdata);
+bool is_nd_pfn(struct device *dev);
+struct device *nd_pfn_create(struct nd_region *nd_region);
+int nd_pfn_validate(struct nd_pfn *nd_pfn);
+#else
+static inline int nd_pfn_probe(struct nd_namespace_common *ndns, void *drvdata)
+{
+ return -ENODEV;
+}
+
+static inline bool is_nd_pfn(struct device *dev)
+{
+ return false;
+}
+
+static inline struct device *nd_pfn_create(struct nd_region *nd_region)
+{
+ return NULL;
+}
+
+static inline int nd_pfn_validate(struct nd_pfn *nd_pfn)
+{
+ return -ENODEV;
+}
#endif
+
struct nd_region *to_nd_region(struct device *dev);
int nd_region_to_nstype(struct nd_region *nd_region);
int nd_region_register_namespaces(struct nd_region *nd_region, int *err);
@@ -217,4 +276,6 @@ static inline bool nd_iostat_start(struct bio *bio, unsigned long *start)
}
void nd_iostat_end(struct bio *bio, unsigned long start);
resource_size_t nd_namespace_blk_validate(struct nd_namespace_blk *nsblk);
+const u8 *nd_dev_to_uuid(struct device *dev);
+bool pmem_should_map_pages(struct device *dev);
#endif /* __ND_H__ */
diff --git a/drivers/nvdimm/pfn.h b/drivers/nvdimm/pfn.h
new file mode 100644
index 000000000000..cc243754acef
--- /dev/null
+++ b/drivers/nvdimm/pfn.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2014-2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __NVDIMM_PFN_H
+#define __NVDIMM_PFN_H
+
+#include <linux/types.h>
+
+#define PFN_SIG_LEN 16
+#define PFN_SIG "NVDIMM_PFN_INFO\0"
+
+struct nd_pfn_sb {
+ u8 signature[PFN_SIG_LEN];
+ u8 uuid[16];
+ u8 parent_uuid[16];
+ __le32 flags;
+ __le16 version_major;
+ __le16 version_minor;
+ __le64 dataoff;
+ __le64 npfns;
+ __le32 mode;
+ u8 padding[4012];
+ __le64 checksum;
+};
+#endif /* __NVDIMM_PFN_H */
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
new file mode 100644
index 000000000000..3fd7d0d81a47
--- /dev/null
+++ b/drivers/nvdimm/pfn_devs.c
@@ -0,0 +1,337 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#include <linux/blkdev.h>
+#include <linux/device.h>
+#include <linux/genhd.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include "nd-core.h"
+#include "pfn.h"
+#include "nd.h"
+
+static void nd_pfn_release(struct device *dev)
+{
+ struct nd_region *nd_region = to_nd_region(dev->parent);
+ struct nd_pfn *nd_pfn = to_nd_pfn(dev);
+
+ dev_dbg(dev, "%s\n", __func__);
+ nd_detach_ndns(&nd_pfn->dev, &nd_pfn->ndns);
+ ida_simple_remove(&nd_region->pfn_ida, nd_pfn->id);
+ kfree(nd_pfn->uuid);
+ kfree(nd_pfn);
+}
+
+static struct device_type nd_pfn_device_type = {
+ .name = "nd_pfn",
+ .release = nd_pfn_release,
+};
+
+bool is_nd_pfn(struct device *dev)
+{
+ return dev ? dev->type == &nd_pfn_device_type : false;
+}
+EXPORT_SYMBOL(is_nd_pfn);
+
+struct nd_pfn *to_nd_pfn(struct device *dev)
+{
+ struct nd_pfn *nd_pfn = container_of(dev, struct nd_pfn, dev);
+
+ WARN_ON(!is_nd_pfn(dev));
+ return nd_pfn;
+}
+EXPORT_SYMBOL(to_nd_pfn);
+
+static ssize_t mode_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nd_pfn *nd_pfn = to_nd_pfn(dev);
+
+ switch (nd_pfn->mode) {
+ case PFN_MODE_RAM:
+ return sprintf(buf, "ram\n");
+ case PFN_MODE_PMEM:
+ return sprintf(buf, "pmem\n");
+ default:
+ return sprintf(buf, "none\n");
+ }
+}
+
+static ssize_t mode_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t len)
+{
+ struct nd_pfn *nd_pfn = to_nd_pfn(dev);
+ ssize_t rc = 0;
+
+ device_lock(dev);
+ nvdimm_bus_lock(dev);
+ if (dev->driver)
+ rc = -EBUSY;
+ else {
+ size_t n = len - 1;
+
+ if (strncmp(buf, "pmem\n", n) == 0
+ || strncmp(buf, "pmem", n) == 0) {
+ /* TODO: allocate from PMEM support */
+ rc = -ENOTTY;
+ } else if (strncmp(buf, "ram\n", n) == 0
+ || strncmp(buf, "ram", n) == 0)
+ nd_pfn->mode = PFN_MODE_RAM;
+ else if (strncmp(buf, "none\n", n) == 0
+ || strncmp(buf, "none", n) == 0)
+ nd_pfn->mode = PFN_MODE_NONE;
+ else
+ rc = -EINVAL;
+ }
+ dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
+ rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+ nvdimm_bus_unlock(dev);
+ device_unlock(dev);
+
+ return rc ? rc : len;
+}
+static DEVICE_ATTR_RW(mode);
+
+static ssize_t uuid_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nd_pfn *nd_pfn = to_nd_pfn(dev);
+
+ if (nd_pfn->uuid)
+ return sprintf(buf, "%pUb\n", nd_pfn->uuid);
+ return sprintf(buf, "\n");
+}
+
+static ssize_t uuid_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t len)
+{
+ struct nd_pfn *nd_pfn = to_nd_pfn(dev);
+ ssize_t rc;
+
+ device_lock(dev);
+ rc = nd_uuid_store(dev, &nd_pfn->uuid, buf, len);
+ dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
+ rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+ device_unlock(dev);
+
+ return rc ? rc : len;
+}
+static DEVICE_ATTR_RW(uuid);
+
+static ssize_t namespace_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nd_pfn *nd_pfn = to_nd_pfn(dev);
+ ssize_t rc;
+
+ nvdimm_bus_lock(dev);
+ rc = sprintf(buf, "%s\n", nd_pfn->ndns
+ ? dev_name(&nd_pfn->ndns->dev) : "");
+ nvdimm_bus_unlock(dev);
+ return rc;
+}
+
+static ssize_t namespace_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t len)
+{
+ struct nd_pfn *nd_pfn = to_nd_pfn(dev);
+ ssize_t rc;
+
+ nvdimm_bus_lock(dev);
+ device_lock(dev);
+ rc = nd_namespace_store(dev, &nd_pfn->ndns, buf, len);
+ dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
+ rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+ device_unlock(dev);
+ nvdimm_bus_unlock(dev);
+
+ return rc;
+}
+static DEVICE_ATTR_RW(namespace);
+
+static struct attribute *nd_pfn_attributes[] = {
+ &dev_attr_mode.attr,
+ &dev_attr_namespace.attr,
+ &dev_attr_uuid.attr,
+ NULL,
+};
+
+static struct attribute_group nd_pfn_attribute_group = {
+ .attrs = nd_pfn_attributes,
+};
+
+static const struct attribute_group *nd_pfn_attribute_groups[] = {
+ &nd_pfn_attribute_group,
+ &nd_device_attribute_group,
+ &nd_numa_attribute_group,
+ NULL,
+};
+
+static struct device *__nd_pfn_create(struct nd_region *nd_region,
+ u8 *uuid, enum nd_pfn_mode mode,
+ struct nd_namespace_common *ndns)
+{
+ struct nd_pfn *nd_pfn;
+ struct device *dev;
+
+ /* we can only create pages for contiguous ranged of pmem */
+ if (!is_nd_pmem(&nd_region->dev))
+ return NULL;
+
+ nd_pfn = kzalloc(sizeof(*nd_pfn), GFP_KERNEL);
+ if (!nd_pfn)
+ return NULL;
+
+ nd_pfn->id = ida_simple_get(&nd_region->pfn_ida, 0, 0, GFP_KERNEL);
+ if (nd_pfn->id < 0) {
+ kfree(nd_pfn);
+ return NULL;
+ }
+
+ nd_pfn->mode = mode;
+ if (uuid)
+ uuid = kmemdup(uuid, 16, GFP_KERNEL);
+ nd_pfn->uuid = uuid;
+ dev = &nd_pfn->dev;
+ dev_set_name(dev, "pfn%d.%d", nd_region->id, nd_pfn->id);
+ dev->parent = &nd_region->dev;
+ dev->type = &nd_pfn_device_type;
+ dev->groups = nd_pfn_attribute_groups;
+ device_initialize(&nd_pfn->dev);
+ if (ndns && !__nd_attach_ndns(&nd_pfn->dev, ndns, &nd_pfn->ndns)) {
+ dev_dbg(&ndns->dev, "%s failed, already claimed by %s\n",
+ __func__, dev_name(ndns->claim));
+ put_device(dev);
+ return NULL;
+ }
+ return dev;
+}
+
+struct device *nd_pfn_create(struct nd_region *nd_region)
+{
+ struct device *dev = __nd_pfn_create(nd_region, NULL, PFN_MODE_NONE,
+ NULL);
+
+ if (dev)
+ __nd_device_register(dev);
+ return dev;
+}
+
+int nd_pfn_validate(struct nd_pfn *nd_pfn)
+{
+ struct nd_namespace_common *ndns = nd_pfn->ndns;
+ struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
+ struct nd_namespace_io *nsio;
+ u64 checksum, offset;
+
+ if (!pfn_sb || !ndns)
+ return -ENODEV;
+
+ if (!is_nd_pmem(nd_pfn->dev.parent))
+ return -ENODEV;
+
+ /* section alignment for simple hotplug */
+ if (nvdimm_namespace_capacity(ndns) < ND_PFN_ALIGN)
+ return -ENODEV;
+
+ if (nvdimm_read_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb)))
+ return -ENXIO;
+
+ if (memcmp(pfn_sb->signature, PFN_SIG, PFN_SIG_LEN) != 0)
+ return -ENODEV;
+
+ checksum = le64_to_cpu(pfn_sb->checksum);
+ pfn_sb->checksum = 0;
+ if (checksum != nd_sb_checksum((struct nd_gen_sb *) pfn_sb))
+ return -ENODEV;
+ pfn_sb->checksum = cpu_to_le64(checksum);
+
+ switch (le32_to_cpu(pfn_sb->mode)) {
+ case PFN_MODE_RAM:
+ break;
+ case PFN_MODE_PMEM:
+ /* TODO: allocate from PMEM support */
+ return -ENOTTY;
+ default:
+ return -ENXIO;
+ }
+
+ if (!nd_pfn->uuid) {
+ /* from probe we allocate */
+ nd_pfn->uuid = kmemdup(pfn_sb->uuid, 16, GFP_KERNEL);
+ if (!nd_pfn->uuid)
+ return -ENOMEM;
+ } else {
+ /* from init we validate */
+ if (memcmp(nd_pfn->uuid, pfn_sb->uuid, 16) != 0)
+ return -EINVAL;
+ }
+
+ /*
+ * These warnings are verbose because they can only trigger in
+ * the case where the physical address alignment of the
+ * namespace has changed since the pfn superblock was
+ * established.
+ */
+ offset = le64_to_cpu(pfn_sb->dataoff);
+ nsio = to_nd_namespace_io(&ndns->dev);
+ if (nsio->res.start & ND_PFN_MASK) {
+ dev_err(&nd_pfn->dev,
+ "init failed: %s not section aligned\n",
+ dev_name(&ndns->dev));
+ return -EBUSY;
+ } else if (offset >= resource_size(&nsio->res)) {
+ dev_err(&nd_pfn->dev, "pfn array size exceeds capacity of %s\n",
+ dev_name(&ndns->dev));
+ return -EBUSY;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(nd_pfn_validate);
+
+int nd_pfn_probe(struct nd_namespace_common *ndns, void *drvdata)
+{
+ int rc;
+ struct device *dev;
+ struct nd_pfn *nd_pfn;
+ struct nd_pfn_sb *pfn_sb;
+ struct nd_region *nd_region = to_nd_region(ndns->dev.parent);
+
+ if (ndns->force_raw)
+ return -ENODEV;
+
+ nvdimm_bus_lock(&ndns->dev);
+ dev = __nd_pfn_create(nd_region, NULL, PFN_MODE_NONE, ndns);
+ nvdimm_bus_unlock(&ndns->dev);
+ if (!dev)
+ return -ENOMEM;
+ dev_set_drvdata(dev, drvdata);
+ pfn_sb = kzalloc(sizeof(*pfn_sb), GFP_KERNEL);
+ nd_pfn = to_nd_pfn(dev);
+ nd_pfn->pfn_sb = pfn_sb;
+ rc = nd_pfn_validate(nd_pfn);
+ nd_pfn->pfn_sb = NULL;
+ kfree(pfn_sb);
+ dev_dbg(&ndns->dev, "%s: pfn: %s\n", __func__,
+ rc == 0 ? dev_name(dev) : "<none>");
+ if (rc < 0) {
+ __nd_detach_ndns(dev, &nd_pfn->ndns);
+ put_device(dev);
+ } else
+ __nd_device_register(&nd_pfn->dev);
+
+ return rc;
+}
+EXPORT_SYMBOL(nd_pfn_probe);
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 4c079d5cb539..b9525385c0dc 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -21,18 +21,24 @@
#include <linux/init.h>
#include <linux/platform_device.h>
#include <linux/module.h>
+#include <linux/memory_hotplug.h>
#include <linux/moduleparam.h>
+#include <linux/vmalloc.h>
#include <linux/slab.h>
#include <linux/pmem.h>
#include <linux/nd.h>
+#include "pfn.h"
#include "nd.h"
struct pmem_device {
struct request_queue *pmem_queue;
struct gendisk *pmem_disk;
+ struct nd_namespace_common *ndns;
/* One contiguous memory region per device */
phys_addr_t phys_addr;
+ /* when non-zero this device is hosting a 'pfn' instance */
+ phys_addr_t data_offset;
void __pmem *virt_addr;
size_t size;
};
@@ -44,7 +50,7 @@ static void pmem_do_bvec(struct pmem_device *pmem, struct page *page,
sector_t sector)
{
void *mem = kmap_atomic(page);
- size_t pmem_off = sector << 9;
+ phys_addr_t pmem_off = sector * 512 + pmem->data_offset;
void __pmem *pmem_addr = pmem->virt_addr + pmem_off;
if (rw == READ) {
@@ -92,19 +98,26 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
}
static long pmem_direct_access(struct block_device *bdev, sector_t sector,
- void **kaddr, unsigned long *pfn, long size)
+ void __pmem **kaddr, unsigned long *pfn)
{
struct pmem_device *pmem = bdev->bd_disk->private_data;
- size_t offset = sector << 9;
-
- if (!pmem)
- return -ENODEV;
+ resource_size_t offset = sector * 512 + pmem->data_offset;
+ resource_size_t size;
+
+ if (pmem->data_offset) {
+ /*
+ * Limit the direct_access() size to what is covered by
+ * the memmap
+ */
+ size = (pmem->size - offset) & ~ND_PFN_MASK;
+ } else
+ size = pmem->size - offset;
/* FIXME convert DAX to comprehend that this mapping has a lifetime */
- *kaddr = (void __force *) pmem->virt_addr + offset;
+ *kaddr = pmem->virt_addr + offset;
*pfn = (pmem->phys_addr + offset) >> PAGE_SHIFT;
- return pmem->size - offset;
+ return size;
}
static const struct block_device_operations pmem_fops = {
@@ -119,27 +132,33 @@ static struct pmem_device *pmem_alloc(struct device *dev,
{
struct pmem_device *pmem;
- pmem = kzalloc(sizeof(*pmem), GFP_KERNEL);
+ pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL);
if (!pmem)
return ERR_PTR(-ENOMEM);
pmem->phys_addr = res->start;
pmem->size = resource_size(res);
- if (!arch_has_pmem_api())
+ if (!arch_has_wmb_pmem())
dev_warn(dev, "unable to guarantee persistence of writes\n");
- if (!request_mem_region(pmem->phys_addr, pmem->size, dev_name(dev))) {
+ if (!devm_request_mem_region(dev, pmem->phys_addr, pmem->size,
+ dev_name(dev))) {
dev_warn(dev, "could not reserve region [0x%pa:0x%zx]\n",
&pmem->phys_addr, pmem->size);
- kfree(pmem);
return ERR_PTR(-EBUSY);
}
- pmem->virt_addr = memremap_pmem(pmem->phys_addr, pmem->size);
- if (!pmem->virt_addr) {
- release_mem_region(pmem->phys_addr, pmem->size);
- kfree(pmem);
- return ERR_PTR(-ENXIO);
+ if (pmem_should_map_pages(dev)) {
+ void *addr = devm_memremap_pages(dev, res);
+
+ if (IS_ERR(addr))
+ return addr;
+ pmem->virt_addr = (void __pmem *) addr;
+ } else {
+ pmem->virt_addr = memremap_pmem(dev, pmem->phys_addr,
+ pmem->size);
+ if (!pmem->virt_addr)
+ return ERR_PTR(-ENXIO);
}
return pmem;
@@ -147,13 +166,16 @@ static struct pmem_device *pmem_alloc(struct device *dev,
static void pmem_detach_disk(struct pmem_device *pmem)
{
+ if (!pmem->pmem_disk)
+ return;
+
del_gendisk(pmem->pmem_disk);
put_disk(pmem->pmem_disk);
blk_cleanup_queue(pmem->pmem_queue);
}
-static int pmem_attach_disk(struct nd_namespace_common *ndns,
- struct pmem_device *pmem)
+static int pmem_attach_disk(struct device *dev,
+ struct nd_namespace_common *ndns, struct pmem_device *pmem)
{
struct gendisk *disk;
@@ -162,6 +184,7 @@ static int pmem_attach_disk(struct nd_namespace_common *ndns,
return -ENOMEM;
blk_queue_make_request(pmem->pmem_queue, pmem_make_request);
+ blk_queue_physical_block_size(pmem->pmem_queue, PAGE_SIZE);
blk_queue_max_hw_sectors(pmem->pmem_queue, UINT_MAX);
blk_queue_bounce_limit(pmem->pmem_queue, BLK_BOUNCE_ANY);
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, pmem->pmem_queue);
@@ -179,8 +202,8 @@ static int pmem_attach_disk(struct nd_namespace_common *ndns,
disk->queue = pmem->pmem_queue;
disk->flags = GENHD_FL_EXT_DEVT;
nvdimm_namespace_disk_name(ndns, disk->disk_name);
- disk->driverfs_dev = &ndns->dev;
- set_capacity(disk, pmem->size >> 9);
+ disk->driverfs_dev = dev;
+ set_capacity(disk, (pmem->size - pmem->data_offset) / 512);
pmem->pmem_disk = disk;
add_disk(disk);
@@ -209,11 +232,152 @@ static int pmem_rw_bytes(struct nd_namespace_common *ndns,
return 0;
}
-static void pmem_free(struct pmem_device *pmem)
+static int nd_pfn_init(struct nd_pfn *nd_pfn)
+{
+ struct nd_pfn_sb *pfn_sb = kzalloc(sizeof(*pfn_sb), GFP_KERNEL);
+ struct pmem_device *pmem = dev_get_drvdata(&nd_pfn->dev);
+ struct nd_namespace_common *ndns = nd_pfn->ndns;
+ struct nd_region *nd_region;
+ unsigned long npfns;
+ phys_addr_t offset;
+ u64 checksum;
+ int rc;
+
+ if (!pfn_sb)
+ return -ENOMEM;
+
+ nd_pfn->pfn_sb = pfn_sb;
+ rc = nd_pfn_validate(nd_pfn);
+ if (rc == 0 || rc == -EBUSY)
+ return rc;
+
+ /* section alignment for simple hotplug */
+ if (nvdimm_namespace_capacity(ndns) < ND_PFN_ALIGN
+ || pmem->phys_addr & ND_PFN_MASK)
+ return -ENODEV;
+
+ nd_region = to_nd_region(nd_pfn->dev.parent);
+ if (nd_region->ro) {
+ dev_info(&nd_pfn->dev,
+ "%s is read-only, unable to init metadata\n",
+ dev_name(&nd_region->dev));
+ goto err;
+ }
+
+ memset(pfn_sb, 0, sizeof(*pfn_sb));
+ npfns = (pmem->size - SZ_8K) / SZ_4K;
+ /*
+ * Note, we use 64 here for the standard size of struct page,
+ * debugging options may cause it to be larger in which case the
+ * implementation will limit the pfns advertised through
+ * ->direct_access() to those that are included in the memmap.
+ */
+ if (nd_pfn->mode == PFN_MODE_PMEM)
+ offset = ALIGN(SZ_8K + 64 * npfns, PMD_SIZE);
+ else if (nd_pfn->mode == PFN_MODE_RAM)
+ offset = SZ_8K;
+ else
+ goto err;
+
+ npfns = (pmem->size - offset) / SZ_4K;
+ pfn_sb->mode = cpu_to_le32(nd_pfn->mode);
+ pfn_sb->dataoff = cpu_to_le64(offset);
+ pfn_sb->npfns = cpu_to_le64(npfns);
+ memcpy(pfn_sb->signature, PFN_SIG, PFN_SIG_LEN);
+ memcpy(pfn_sb->uuid, nd_pfn->uuid, 16);
+ pfn_sb->version_major = cpu_to_le16(1);
+ checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb);
+ pfn_sb->checksum = cpu_to_le64(checksum);
+
+ rc = nvdimm_write_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb));
+ if (rc)
+ goto err;
+
+ return 0;
+ err:
+ nd_pfn->pfn_sb = NULL;
+ kfree(pfn_sb);
+ return -ENXIO;
+}
+
+static int nvdimm_namespace_detach_pfn(struct nd_namespace_common *ndns)
+{
+ struct nd_pfn *nd_pfn = to_nd_pfn(ndns->claim);
+ struct pmem_device *pmem;
+
+ /* free pmem disk */
+ pmem = dev_get_drvdata(&nd_pfn->dev);
+ pmem_detach_disk(pmem);
+
+ /* release nd_pfn resources */
+ kfree(nd_pfn->pfn_sb);
+ nd_pfn->pfn_sb = NULL;
+
+ return 0;
+}
+
+static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns)
{
- memunmap_pmem(pmem->virt_addr);
- release_mem_region(pmem->phys_addr, pmem->size);
- kfree(pmem);
+ struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
+ struct nd_pfn *nd_pfn = to_nd_pfn(ndns->claim);
+ struct device *dev = &nd_pfn->dev;
+ struct vmem_altmap *altmap;
+ struct nd_region *nd_region;
+ struct nd_pfn_sb *pfn_sb;
+ struct pmem_device *pmem;
+ phys_addr_t offset;
+ int rc;
+
+ if (!nd_pfn->uuid || !nd_pfn->ndns)
+ return -ENODEV;
+
+ nd_region = to_nd_region(dev->parent);
+ rc = nd_pfn_init(nd_pfn);
+ if (rc)
+ return rc;
+
+ if (PAGE_SIZE != SZ_4K) {
+ dev_err(dev, "only supported on systems with 4K PAGE_SIZE\n");
+ return -ENXIO;
+ }
+ if (nsio->res.start & ND_PFN_MASK) {
+ dev_err(dev, "%s not memory hotplug section aligned\n",
+ dev_name(&ndns->dev));
+ return -ENXIO;
+ }
+
+ pfn_sb = nd_pfn->pfn_sb;
+ offset = le64_to_cpu(pfn_sb->dataoff);
+ nd_pfn->mode = le32_to_cpu(nd_pfn->pfn_sb->mode);
+ if (nd_pfn->mode == PFN_MODE_RAM) {
+ if (offset != SZ_8K)
+ return -EINVAL;
+ nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns);
+ altmap = NULL;
+ } else {
+ rc = -ENXIO;
+ goto err;
+ }
+
+ /* establish pfn range for lookup, and switch to direct map */
+ pmem = dev_get_drvdata(dev);
+ memunmap_pmem(dev, pmem->virt_addr);
+ pmem->virt_addr = (void __pmem *)devm_memremap_pages(dev, &nsio->res);
+ if (IS_ERR(pmem->virt_addr)) {
+ rc = PTR_ERR(pmem->virt_addr);
+ goto err;
+ }
+
+ /* attach pmem disk in "pfn-mode" */
+ pmem->data_offset = offset;
+ rc = pmem_attach_disk(dev, ndns, pmem);
+ if (rc)
+ goto err;
+
+ return rc;
+ err:
+ nvdimm_namespace_detach_pfn(ndns);
+ return rc;
}
static int nd_pmem_probe(struct device *dev)
@@ -222,7 +386,6 @@ static int nd_pmem_probe(struct device *dev)
struct nd_namespace_common *ndns;
struct nd_namespace_io *nsio;
struct pmem_device *pmem;
- int rc;
ndns = nvdimm_namespace_common_probe(dev);
if (IS_ERR(ndns))
@@ -233,18 +396,27 @@ static int nd_pmem_probe(struct device *dev)
if (IS_ERR(pmem))
return PTR_ERR(pmem);
+ pmem->ndns = ndns;
dev_set_drvdata(dev, pmem);
ndns->rw_bytes = pmem_rw_bytes;
+
if (is_nd_btt(dev))
- rc = nvdimm_namespace_attach_btt(ndns);
- else if (nd_btt_probe(ndns, pmem) == 0) {
+ return nvdimm_namespace_attach_btt(ndns);
+
+ if (is_nd_pfn(dev))
+ return nvdimm_namespace_attach_pfn(ndns);
+
+ if (nd_btt_probe(ndns, pmem) == 0) {
/* we'll come back as btt-pmem */
- rc = -ENXIO;
- } else
- rc = pmem_attach_disk(ndns, pmem);
- if (rc)
- pmem_free(pmem);
- return rc;
+ return -ENXIO;
+ }
+
+ if (nd_pfn_probe(ndns, pmem) == 0) {
+ /* we'll come back as pfn-pmem */
+ return -ENXIO;
+ }
+
+ return pmem_attach_disk(dev, ndns, pmem);
}
static int nd_pmem_remove(struct device *dev)
@@ -252,10 +424,11 @@ static int nd_pmem_remove(struct device *dev)
struct pmem_device *pmem = dev_get_drvdata(dev);
if (is_nd_btt(dev))
- nvdimm_namespace_detach_btt(to_nd_btt(dev)->ndns);
+ nvdimm_namespace_detach_btt(pmem->ndns);
+ else if (is_nd_pfn(dev))
+ nvdimm_namespace_detach_pfn(pmem->ndns);
else
pmem_detach_disk(pmem);
- pmem_free(pmem);
return 0;
}
diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c
index f28f78ccff19..7da63eac78ee 100644
--- a/drivers/nvdimm/region.c
+++ b/drivers/nvdimm/region.c
@@ -53,6 +53,7 @@ static int nd_region_probe(struct device *dev)
return -ENODEV;
nd_region->btt_seed = nd_btt_create(nd_region);
+ nd_region->pfn_seed = nd_pfn_create(nd_region);
if (err == 0)
return 0;
@@ -84,6 +85,7 @@ static int nd_region_remove(struct device *dev)
nvdimm_bus_lock(dev);
nd_region->ns_seed = NULL;
nd_region->btt_seed = NULL;
+ nd_region->pfn_seed = NULL;
dev_set_drvdata(dev, NULL);
nvdimm_bus_unlock(dev);
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index 7384455792bf..529f3f02e7b2 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -345,6 +345,23 @@ static ssize_t btt_seed_show(struct device *dev,
}
static DEVICE_ATTR_RO(btt_seed);
+static ssize_t pfn_seed_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nd_region *nd_region = to_nd_region(dev);
+ ssize_t rc;
+
+ nvdimm_bus_lock(dev);
+ if (nd_region->pfn_seed)
+ rc = sprintf(buf, "%s\n", dev_name(nd_region->pfn_seed));
+ else
+ rc = sprintf(buf, "\n");
+ nvdimm_bus_unlock(dev);
+
+ return rc;
+}
+static DEVICE_ATTR_RO(pfn_seed);
+
static ssize_t read_only_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -373,6 +390,7 @@ static struct attribute *nd_region_attributes[] = {
&dev_attr_nstype.attr,
&dev_attr_mappings.attr,
&dev_attr_btt_seed.attr,
+ &dev_attr_pfn_seed.attr,
&dev_attr_read_only.attr,
&dev_attr_set_cookie.attr,
&dev_attr_available_size.attr,
@@ -740,10 +758,12 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
nd_region->provider_data = ndr_desc->provider_data;
nd_region->nd_set = ndr_desc->nd_set;
nd_region->num_lanes = ndr_desc->num_lanes;
+ nd_region->flags = ndr_desc->flags;
nd_region->ro = ro;
nd_region->numa_node = ndr_desc->numa_node;
ida_init(&nd_region->ns_ida);
ida_init(&nd_region->btt_ida);
+ ida_init(&nd_region->pfn_ida);
dev = &nd_region->dev;
dev_set_name(dev, "region%d", nd_region->id);
dev->parent = &nvdimm_bus->dev;