summaryrefslogtreecommitdiffstats
path: root/drivers/dax/bus.c
diff options
context:
space:
mode:
authorDan Williams <dan.j.williams@intel.com>2023-02-10 01:07:13 -0800
committerDan Williams <dan.j.williams@intel.com>2023-02-10 17:33:40 -0800
commite9ee9fe3a9d4ae0e1e935fc2ec1218b66a043cae (patch)
tree118a412eb5423cb52995a6c6feadbb072279001e /drivers/dax/bus.c
parent7dab174e2e27eeaf10273e597ffbef4f8ea032bb (diff)
downloadlinux-e9ee9fe3a9d4ae0e1e935fc2ec1218b66a043cae.tar.gz
linux-e9ee9fe3a9d4ae0e1e935fc2ec1218b66a043cae.tar.bz2
linux-e9ee9fe3a9d4ae0e1e935fc2ec1218b66a043cae.zip
dax: Assign RAM regions to memory-hotplug by default
The default mode for device-dax instances is backwards for RAM-regions as evidenced by the fact that it tends to catch end users by surprise. "Where is my memory?". Recall that platforms are increasingly shipping with performance-differentiated memory pools beyond typical DRAM and NUMA effects. This includes HBM (high-bandwidth-memory) and CXL (dynamic interleave, varied media types, and future fabric attached possibilities). For this reason the EFI_MEMORY_SP (EFI Special Purpose Memory => Linux 'Soft Reserved') attribute is expected to be applied to all memory-pools that are not the general purpose pool. This designation gives an Operating System a chance to defer usage of a memory pool until later in the boot process where its performance properties can be interrogated and administrator policy can be applied. 'Soft Reserved' memory can be anything from too limited and precious to be part of the general purpose pool (HBM), too slow to host hot kernel data structures (some PMEM media), or anything in between. However, in the absence of an explicit policy, the memory should at least be made usable by default. The current device-dax default hides all non-general-purpose memory behind a device interface. The expectation is that the distribution of users that want the memory online by default vs device-dedicated-access by default follows the Pareto principle. A small number of enlightened users may want to do userspace memory management through a device, but general users just want the kernel to make the memory available with an option to get more advanced later. Arrange for all device-dax instances not backed by PMEM to default to attaching to the dax_kmem driver. From there the baseline memory hotplug policy (CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE / memhp_default_state=) gates whether the memory comes online or stays offline. Where, if it stays offline, it can be reliably converted back to device-mode where it can be partitioned, or fronted by a userspace allocator. So, if someone wants device-dax instances for their 'Soft Reserved' memory: 1/ Build a kernel with CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE=n or boot with memhp_default_state=offline, or roll the dice and hope that the kernel has not pinned a page in that memory before step 2. 2/ Write a udev rule to convert the target dax device(s) from 'system-ram' mode to 'devdax' mode: daxctl reconfigure-device $dax -m devdax -f Cc: Michal Hocko <mhocko@suse.com> Cc: David Hildenbrand <david@redhat.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Reviewed-by: Gregory Price <gregory.price@memverge.com> Tested-by: Fan Ni <fan.ni@samsung.com> Reviewed-by: Dave Jiang <dave.jiang@intel.com> Link: https://lore.kernel.org/r/167602003336.1924368.6809503401422267885.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Diffstat (limited to 'drivers/dax/bus.c')
-rw-r--r--drivers/dax/bus.c53
1 files changed, 22 insertions, 31 deletions
diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c
index 1dad813ee4a6..012d576004e9 100644
--- a/drivers/dax/bus.c
+++ b/drivers/dax/bus.c
@@ -56,6 +56,25 @@ static int dax_match_id(struct dax_device_driver *dax_drv, struct device *dev)
return match;
}
+static int dax_match_type(struct dax_device_driver *dax_drv, struct device *dev)
+{
+ enum dax_driver_type type = DAXDRV_DEVICE_TYPE;
+ struct dev_dax *dev_dax = to_dev_dax(dev);
+
+ if (dev_dax->region->res.flags & IORESOURCE_DAX_KMEM)
+ type = DAXDRV_KMEM_TYPE;
+
+ if (dax_drv->type == type)
+ return 1;
+
+ /* default to device mode if dax_kmem is disabled */
+ if (dax_drv->type == DAXDRV_DEVICE_TYPE &&
+ !IS_ENABLED(CONFIG_DEV_DAX_KMEM))
+ return 1;
+
+ return 0;
+}
+
enum id_action {
ID_REMOVE,
ID_ADD,
@@ -216,14 +235,9 @@ static int dax_bus_match(struct device *dev, struct device_driver *drv)
{
struct dax_device_driver *dax_drv = to_dax_drv(drv);
- /*
- * All but the 'device-dax' driver, which has 'match_always'
- * set, requires an exact id match.
- */
- if (dax_drv->match_always)
+ if (dax_match_id(dax_drv, dev))
return 1;
-
- return dax_match_id(dax_drv, dev);
+ return dax_match_type(dax_drv, dev);
}
/*
@@ -1413,13 +1427,10 @@ err_id:
}
EXPORT_SYMBOL_GPL(devm_create_dev_dax);
-static int match_always_count;
-
int __dax_driver_register(struct dax_device_driver *dax_drv,
struct module *module, const char *mod_name)
{
struct device_driver *drv = &dax_drv->drv;
- int rc = 0;
/*
* dax_bus_probe() calls dax_drv->probe() unconditionally.
@@ -1434,26 +1445,7 @@ int __dax_driver_register(struct dax_device_driver *dax_drv,
drv->mod_name = mod_name;
drv->bus = &dax_bus_type;
- /* there can only be one default driver */
- mutex_lock(&dax_bus_lock);
- match_always_count += dax_drv->match_always;
- if (match_always_count > 1) {
- match_always_count--;
- WARN_ON(1);
- rc = -EINVAL;
- }
- mutex_unlock(&dax_bus_lock);
- if (rc)
- return rc;
-
- rc = driver_register(drv);
- if (rc && dax_drv->match_always) {
- mutex_lock(&dax_bus_lock);
- match_always_count -= dax_drv->match_always;
- mutex_unlock(&dax_bus_lock);
- }
-
- return rc;
+ return driver_register(drv);
}
EXPORT_SYMBOL_GPL(__dax_driver_register);
@@ -1463,7 +1455,6 @@ void dax_driver_unregister(struct dax_device_driver *dax_drv)
struct dax_id *dax_id, *_id;
mutex_lock(&dax_bus_lock);
- match_always_count -= dax_drv->match_always;
list_for_each_entry_safe(dax_id, _id, &dax_drv->ids, list) {
list_del(&dax_id->list);
kfree(dax_id);