diff options
48 files changed, 1266 insertions, 863 deletions
diff --git a/drivers/Makefile b/drivers/Makefile index 020780b6b4d2..f735c4955143 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -72,9 +72,9 @@ obj-$(CONFIG_PARPORT) += parport/ obj-y += base/ block/ misc/ mfd/ nfc/ obj-$(CONFIG_LIBNVDIMM) += nvdimm/ obj-$(CONFIG_DAX) += dax/ -obj-$(CONFIG_CXL_BUS) += cxl/ obj-$(CONFIG_DMA_SHARED_BUFFER) += dma-buf/ obj-$(CONFIG_NUBUS) += nubus/ +obj-y += cxl/ obj-y += macintosh/ obj-y += scsi/ obj-y += nvme/ diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index b67d2ee77cd1..86fa61a21826 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -443,7 +443,7 @@ static void acpi_bus_osc_negotiate_usb_control(void) } osc_sb_native_usb4_control = - control & ((u32 *)context.ret.pointer)[OSC_CONTROL_DWORD]; + control & acpi_osc_ctx_get_pci_control(&context); acpi_bus_decode_usb_osc("USB4 _OSC: OS supports", control); acpi_bus_decode_usb_osc("USB4 _OSC: OS controls", diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index fe61f617a943..ae5f4acf2675 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -1230,7 +1230,7 @@ static ssize_t hw_error_scrub_store(struct device *dev, if (rc) return rc; - nfit_device_lock(dev); + device_lock(dev); nd_desc = dev_get_drvdata(dev); if (nd_desc) { struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); @@ -1247,7 +1247,7 @@ static ssize_t hw_error_scrub_store(struct device *dev, break; } } - nfit_device_unlock(dev); + device_unlock(dev); if (rc) return rc; return size; @@ -1267,10 +1267,10 @@ static ssize_t scrub_show(struct device *dev, ssize_t rc = -ENXIO; bool busy; - nfit_device_lock(dev); + device_lock(dev); nd_desc = dev_get_drvdata(dev); if (!nd_desc) { - nfit_device_unlock(dev); + device_unlock(dev); return rc; } acpi_desc = to_acpi_desc(nd_desc); @@ -1287,7 +1287,7 @@ static ssize_t scrub_show(struct device *dev, } mutex_unlock(&acpi_desc->init_mutex); - nfit_device_unlock(dev); + device_unlock(dev); return rc; } @@ -1304,14 +1304,14 @@ static ssize_t scrub_store(struct device *dev, if (val != 1) return -EINVAL; - nfit_device_lock(dev); + device_lock(dev); nd_desc = dev_get_drvdata(dev); if (nd_desc) { struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); rc = acpi_nfit_ars_rescan(acpi_desc, ARS_REQ_LONG); } - nfit_device_unlock(dev); + device_unlock(dev); if (rc) return rc; return size; @@ -1697,9 +1697,9 @@ static void acpi_nvdimm_notify(acpi_handle handle, u32 event, void *data) struct acpi_device *adev = data; struct device *dev = &adev->dev; - nfit_device_lock(dev->parent); + device_lock(dev->parent); __acpi_nvdimm_notify(dev, event); - nfit_device_unlock(dev->parent); + device_unlock(dev->parent); } static bool acpi_nvdimm_has_method(struct acpi_device *adev, char *method) @@ -3152,8 +3152,8 @@ static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc) struct device *dev = acpi_desc->dev; /* Bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */ - nfit_device_lock(dev); - nfit_device_unlock(dev); + device_lock(dev); + device_unlock(dev); /* Bounce the init_mutex to complete initial registration */ mutex_lock(&acpi_desc->init_mutex); @@ -3305,8 +3305,8 @@ void acpi_nfit_shutdown(void *data) * acpi_nfit_ars_rescan() submissions have had a chance to * either submit or see ->cancel set. */ - nfit_device_lock(bus_dev); - nfit_device_unlock(bus_dev); + device_lock(bus_dev); + device_unlock(bus_dev); flush_workqueue(nfit_wq); } @@ -3449,9 +3449,9 @@ EXPORT_SYMBOL_GPL(__acpi_nfit_notify); static void acpi_nfit_notify(struct acpi_device *adev, u32 event) { - nfit_device_lock(&adev->dev); + device_lock(&adev->dev); __acpi_nfit_notify(&adev->dev, adev->handle, event); - nfit_device_unlock(&adev->dev); + device_unlock(&adev->dev); } static const struct acpi_device_id acpi_nfit_ids[] = { diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h index 50882bdbeb96..6023ad61831a 100644 --- a/drivers/acpi/nfit/nfit.h +++ b/drivers/acpi/nfit/nfit.h @@ -337,30 +337,6 @@ static inline struct acpi_nfit_desc *to_acpi_desc( return container_of(nd_desc, struct acpi_nfit_desc, nd_desc); } -#ifdef CONFIG_PROVE_LOCKING -static inline void nfit_device_lock(struct device *dev) -{ - device_lock(dev); - mutex_lock(&dev->lockdep_mutex); -} - -static inline void nfit_device_unlock(struct device *dev) -{ - mutex_unlock(&dev->lockdep_mutex); - device_unlock(dev); -} -#else -static inline void nfit_device_lock(struct device *dev) -{ - device_lock(dev); -} - -static inline void nfit_device_unlock(struct device *dev) -{ - device_unlock(dev); -} -#endif - const guid_t *to_nfit_uuid(enum nfit_uuids id); int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *nfit, acpi_size sz); void acpi_nfit_shutdown(void *data); diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index b3b507f20e87..d57cf8454b93 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -140,6 +140,17 @@ static struct pci_osc_bit_struct pci_osc_control_bit[] = { { OSC_PCI_EXPRESS_DPC_CONTROL, "DPC" }, }; +static struct pci_osc_bit_struct cxl_osc_support_bit[] = { + { OSC_CXL_1_1_PORT_REG_ACCESS_SUPPORT, "CXL11PortRegAccess" }, + { OSC_CXL_2_0_PORT_DEV_REG_ACCESS_SUPPORT, "CXL20PortDevRegAccess" }, + { OSC_CXL_PROTOCOL_ERR_REPORTING_SUPPORT, "CXLProtocolErrorReporting" }, + { OSC_CXL_NATIVE_HP_SUPPORT, "CXLNativeHotPlug" }, +}; + +static struct pci_osc_bit_struct cxl_osc_control_bit[] = { + { OSC_CXL_ERROR_REPORTING_CONTROL, "CXLMemErrorReporting" }, +}; + static void decode_osc_bits(struct acpi_pci_root *root, char *msg, u32 word, struct pci_osc_bit_struct *table, int size) { @@ -168,33 +179,73 @@ static void decode_osc_control(struct acpi_pci_root *root, char *msg, u32 word) ARRAY_SIZE(pci_osc_control_bit)); } +static void decode_cxl_osc_support(struct acpi_pci_root *root, char *msg, u32 word) +{ + decode_osc_bits(root, msg, word, cxl_osc_support_bit, + ARRAY_SIZE(cxl_osc_support_bit)); +} + +static void decode_cxl_osc_control(struct acpi_pci_root *root, char *msg, u32 word) +{ + decode_osc_bits(root, msg, word, cxl_osc_control_bit, + ARRAY_SIZE(cxl_osc_control_bit)); +} + +static inline bool is_pcie(struct acpi_pci_root *root) +{ + return root->bridge_type == ACPI_BRIDGE_TYPE_PCIE; +} + +static inline bool is_cxl(struct acpi_pci_root *root) +{ + return root->bridge_type == ACPI_BRIDGE_TYPE_CXL; +} + static u8 pci_osc_uuid_str[] = "33DB4D5B-1FF7-401C-9657-7441C03DD766"; +static u8 cxl_osc_uuid_str[] = "68F2D50B-C469-4d8A-BD3D-941A103FD3FC"; -static acpi_status acpi_pci_run_osc(acpi_handle handle, - const u32 *capbuf, u32 *retval) +static char *to_uuid(struct acpi_pci_root *root) +{ + if (is_cxl(root)) + return cxl_osc_uuid_str; + return pci_osc_uuid_str; +} + +static int cap_length(struct acpi_pci_root *root) +{ + if (is_cxl(root)) + return sizeof(u32) * OSC_CXL_CAPABILITY_DWORDS; + return sizeof(u32) * OSC_PCI_CAPABILITY_DWORDS; +} + +static acpi_status acpi_pci_run_osc(struct acpi_pci_root *root, + const u32 *capbuf, u32 *pci_control, + u32 *cxl_control) { struct acpi_osc_context context = { - .uuid_str = pci_osc_uuid_str, + .uuid_str = to_uuid(root), .rev = 1, - .cap.length = 12, + .cap.length = cap_length(root), .cap.pointer = (void *)capbuf, }; acpi_status status; - status = acpi_run_osc(handle, &context); + status = acpi_run_osc(root->device->handle, &context); if (ACPI_SUCCESS(status)) { - *retval = *((u32 *)(context.ret.pointer + 8)); + *pci_control = acpi_osc_ctx_get_pci_control(&context); + if (is_cxl(root)) + *cxl_control = acpi_osc_ctx_get_cxl_control(&context); kfree(context.ret.pointer); } return status; } -static acpi_status acpi_pci_query_osc(struct acpi_pci_root *root, - u32 support, - u32 *control) +static acpi_status acpi_pci_query_osc(struct acpi_pci_root *root, u32 support, + u32 *control, u32 cxl_support, + u32 *cxl_control) { acpi_status status; - u32 result, capbuf[3]; + u32 pci_result, cxl_result, capbuf[OSC_CXL_CAPABILITY_DWORDS]; support |= root->osc_support_set; @@ -202,10 +253,28 @@ static acpi_status acpi_pci_query_osc(struct acpi_pci_root *root, capbuf[OSC_SUPPORT_DWORD] = support; capbuf[OSC_CONTROL_DWORD] = *control | root->osc_control_set; - status = acpi_pci_run_osc(root->device->handle, capbuf, &result); + if (is_cxl(root)) { + cxl_support |= root->osc_ext_support_set; + capbuf[OSC_EXT_SUPPORT_DWORD] = cxl_support; + capbuf[OSC_EXT_CONTROL_DWORD] = *cxl_control | root->osc_ext_control_set; + } + +retry: + status = acpi_pci_run_osc(root, capbuf, &pci_result, &cxl_result); if (ACPI_SUCCESS(status)) { root->osc_support_set = support; - *control = result; + *control = pci_result; + if (is_cxl(root)) { + root->osc_ext_support_set = cxl_support; + *cxl_control = cxl_result; + } + } else if (is_cxl(root)) { + /* + * CXL _OSC is optional on CXL 1.1 hosts. Fall back to PCIe _OSC + * upon any failure using CXL _OSC. + */ + root->bridge_type = ACPI_BRIDGE_TYPE_PCIE; + goto retry; } return status; } @@ -321,6 +390,8 @@ EXPORT_SYMBOL_GPL(acpi_get_pci_dev); * @handle: ACPI handle of a PCI root bridge (or PCIe Root Complex). * @mask: Mask of _OSC bits to request control of, place to store control mask. * @support: _OSC supported capability. + * @cxl_mask: Mask of CXL _OSC control bits, place to store control mask. + * @cxl_support: CXL _OSC supported capability. * * Run _OSC query for @mask and if that is successful, compare the returned * mask of control bits with @req. If all of the @req bits are set in the @@ -331,12 +402,14 @@ EXPORT_SYMBOL_GPL(acpi_get_pci_dev); * _OSC bits the BIOS has granted control of, but its contents are meaningless * on failure. **/ -static acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 *mask, u32 support) +static acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 *mask, + u32 support, u32 *cxl_mask, + u32 cxl_support) { u32 req = OSC_PCI_EXPRESS_CAPABILITY_CONTROL; struct acpi_pci_root *root; acpi_status status; - u32 ctrl, capbuf[3]; + u32 ctrl, cxl_ctrl = 0, capbuf[OSC_CXL_CAPABILITY_DWORDS]; if (!mask) return AE_BAD_PARAMETER; @@ -348,20 +421,42 @@ static acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 *mask, u32 s ctrl = *mask; *mask |= root->osc_control_set; + if (is_cxl(root)) { + cxl_ctrl = *cxl_mask; + *cxl_mask |= root->osc_ext_control_set; + } + /* Need to check the available controls bits before requesting them. */ do { - status = acpi_pci_query_osc(root, support, mask); + u32 pci_missing = 0, cxl_missing = 0; + + status = acpi_pci_query_osc(root, support, mask, cxl_support, + cxl_mask); if (ACPI_FAILURE(status)) return status; - if (ctrl == *mask) - break; - decode_osc_control(root, "platform does not support", - ctrl & ~(*mask)); + if (is_cxl(root)) { + if (ctrl == *mask && cxl_ctrl == *cxl_mask) + break; + pci_missing = ctrl & ~(*mask); + cxl_missing = cxl_ctrl & ~(*cxl_mask); + } else { + if (ctrl == *mask) + break; + pci_missing = ctrl & ~(*mask); + } + if (pci_missing) + decode_osc_control(root, "platform does not support", + pci_missing); + if (cxl_missing) + decode_cxl_osc_control(root, "CXL platform does not support", + cxl_missing); ctrl = *mask; - } while (*mask); + cxl_ctrl = *cxl_mask; + } while (*mask || *cxl_mask); /* No need to request _OSC if the control was already granted. */ - if ((root->osc_control_set & ctrl) == ctrl) + if ((root->osc_control_set & ctrl) == ctrl && + (root->osc_ext_control_set & cxl_ctrl) == cxl_ctrl) return AE_OK; if ((ctrl & req) != req) { @@ -373,11 +468,17 @@ static acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 *mask, u32 s capbuf[OSC_QUERY_DWORD] = 0; capbuf[OSC_SUPPORT_DWORD] = root->osc_support_set; capbuf[OSC_CONTROL_DWORD] = ctrl; - status = acpi_pci_run_osc(handle, capbuf, mask); + if (is_cxl(root)) { + capbuf[OSC_EXT_SUPPORT_DWORD] = root->osc_ext_support_set; + capbuf[OSC_EXT_CONTROL_DWORD] = cxl_ctrl; + } + + status = acpi_pci_run_osc(root, capbuf, mask, cxl_mask); if (ACPI_FAILURE(status)) return status; root->osc_control_set = *mask; + root->osc_ext_control_set = *cxl_mask; return AE_OK; } @@ -403,6 +504,53 @@ static u32 calculate_support(void) return support; } +/* + * Background on hotplug support, and making it depend on only + * CONFIG_HOTPLUG_PCI_PCIE vs. also considering CONFIG_MEMORY_HOTPLUG: + * + * CONFIG_ACPI_HOTPLUG_MEMORY does depend on CONFIG_MEMORY_HOTPLUG, but + * there is no existing _OSC for memory hotplug support. The reason is that + * ACPI memory hotplug requires the OS to acknowledge / coordinate with + * memory plug events via a scan handler. On the CXL side the equivalent + * would be if Linux supported the Mechanical Retention Lock [1], or + * otherwise had some coordination for the driver of a PCI device + * undergoing hotplug to be consulted on whether the hotplug should + * proceed or not. + * + * The concern is that if Linux says no to supporting CXL hotplug then + * the BIOS may say no to giving the OS hotplug control of any other PCIe + * device. So the question here is not whether hotplug is enabled, it's + * whether it is handled natively by the at all OS, and if + * CONFIG_HOTPLUG_PCI_PCIE is enabled then the answer is "yes". + * + * Otherwise, the plan for CXL coordinated remove, since the kernel does + * not support blocking hotplug, is to require the memory device to be + * disabled before hotplug is attempted. When CONFIG_MEMORY_HOTPLUG is + * disabled that step will fail and the remove attempt cancelled by the + * user. If that is not honored and the card is removed anyway then it + * does not matter if CONFIG_MEMORY_HOTPLUG is enabled or not, it will + * cause a crash and other badness. + * + * Therefore, just say yes to CXL hotplug and require removal to + * be coordinated by userspace unless and until the kernel grows better + * mechanisms for doing "managed" removal of devices in consultation with + * the driver. + * + * [1]: https://lore.kernel.org/all/20201122014203.4706-1-ashok.raj@intel.com/ + */ +static u32 calculate_cxl_support(void) +{ + u32 support; + + support = OSC_CXL_2_0_PORT_DEV_REG_ACCESS_SUPPORT; + if (pci_aer_available()) + support |= OSC_CXL_PROTOCOL_ERR_REPORTING_SUPPORT; + if (IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE)) + support |= OSC_CXL_NATIVE_HP_SUPPORT; + + return support; +} + static u32 calculate_control(void) { u32 control; @@ -434,6 +582,16 @@ static u32 calculate_control(void) return control; } +static u32 calculate_cxl_control(void) +{ + u32 control = 0; + + if (IS_ENABLED(CONFIG_MEMORY_FAILURE)) + control |= OSC_CXL_ERROR_REPORTING_CONTROL; + + return control; +} + static bool os_control_query_checks(struct acpi_pci_root *root, u32 support) { struct acpi_device *device = root->device; @@ -452,10 +610,10 @@ static bool os_control_query_checks(struct acpi_pci_root *root, u32 support) return true; } -static void negotiate_os_control(struct acpi_pci_root *root, int *no_aspm, - bool is_pcie) +static void negotiate_os_control(struct acpi_pci_root *root, int *no_aspm) { u32 support, control = 0, requested = 0; + u32 cxl_support = 0, cxl_control = 0, cxl_requested = 0; acpi_status status; struct acpi_device *device = root->device; acpi_handle handle = device->handle; @@ -479,10 +637,20 @@ static void negotiate_os_control(struct acpi_pci_root *root, int *no_aspm, if (os_control_query_checks(root, support)) requested = control = calculate_control(); - status = acpi_pci_osc_control_set(handle, &control, support); + if (is_cxl(root)) { + cxl_support = calculate_cxl_support(); + decode_cxl_osc_support(root, "OS supports", cxl_support); + cxl_requested = cxl_control = calculate_cxl_control(); + } + + status = acpi_pci_osc_control_set(handle, &control, support, + &cxl_control, cxl_support); if (ACPI_SUCCESS(status)) { if (control) decode_osc_control(root, "OS now controls", control); + if (cxl_control) + decode_cxl_osc_control(root, "OS now controls", + cxl_control); if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_ASPM) { /* @@ -504,13 +672,18 @@ static void negotiate_os_control(struct acpi_pci_root *root, int *no_aspm, *no_aspm = 1; /* _OSC is optional for PCI host bridges */ - if ((status == AE_NOT_FOUND) && !is_pcie) + if (status == AE_NOT_FOUND && !is_pcie(root)) return; if (control) { decode_osc_control(root, "OS requested", requested); decode_osc_control(root, "platform willing to grant", control); } + if (cxl_control) { + decode_cxl_osc_control(root, "OS requested", cxl_requested); + decode_cxl_osc_control(root, "platform willing to grant", + cxl_control); + } dev_info(&device->dev, "_OSC: platform retains control of PCIe features (%s)\n", acpi_format_exception(status)); @@ -527,7 +700,7 @@ static int acpi_pci_root_add(struct acpi_device *device, acpi_handle handle = device->handle; int no_aspm = 0; bool hotadd = system_state == SYSTEM_RUNNING; - bool is_pcie; + const char *acpi_hid; root = kzalloc(sizeof(struct acpi_pci_root), GFP_KERNEL); if (!root) @@ -585,8 +758,15 @@ static int acpi_pci_root_add(struct acpi_device *device, root->mcfg_addr = acpi_pci_root_get_mcfg_addr(handle); - is_pcie = strcmp(acpi_device_hid(device), "PNP0A08") == 0; - negotiate_os_control(root, &no_aspm, is_pcie); + acpi_hid = acpi_device_hid(root->device); + if (strcmp(acpi_hid, "PNP0A08") == 0) + root->bridge_type = ACPI_BRIDGE_TYPE_PCIE; + else if (strcmp(acpi_hid, "ACPI0016") == 0) + root->bridge_type = ACPI_BRIDGE_TYPE_CXL; + else + dev_dbg(&device->dev, "Assuming non-PCIe host bridge\n"); + + negotiate_os_control(root, &no_aspm); /* * TBD: Need PCI interface for enumeration/configuration of roots. diff --git a/drivers/base/core.c b/drivers/base/core.c index 3d6430eb0c6a..2eede2ec3d64 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -2864,9 +2864,6 @@ void device_initialize(struct device *dev) kobject_init(&dev->kobj, &device_ktype); INIT_LIST_HEAD(&dev->dma_pools); mutex_init(&dev->mutex); -#ifdef CONFIG_PROVE_LOCKING - mutex_init(&dev->lockdep_mutex); -#endif lockdep_set_novalidate_class(&dev->mutex); spin_lock_init(&dev->devres_lock); INIT_LIST_HEAD(&dev->devres_head); diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig index b88ab956bb7c..f64e3984689f 100644 --- a/drivers/cxl/Kconfig +++ b/drivers/cxl/Kconfig @@ -98,4 +98,8 @@ config CXL_PORT default CXL_BUS tristate +config CXL_SUSPEND + def_bool y + depends on SUSPEND && CXL_MEM + endif diff --git a/drivers/cxl/Makefile b/drivers/cxl/Makefile index ce267ef11d93..a78270794150 100644 --- a/drivers/cxl/Makefile +++ b/drivers/cxl/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_CXL_BUS) += core/ +obj-y += core/ obj-$(CONFIG_CXL_PCI) += cxl_pci.o obj-$(CONFIG_CXL_MEM) += cxl_mem.o obj-$(CONFIG_CXL_ACPI) += cxl_acpi.o diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index d15a6aec0331..40286f5df812 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -275,6 +275,13 @@ static int add_root_nvdimm_bridge(struct device *match, void *data) return 1; } +static struct lock_class_key cxl_root_key; + +static void cxl_acpi_lock_reset_class(void *dev) +{ + device_lock_reset_class(dev); +} + static int cxl_acpi_probe(struct platform_device *pdev) { int rc; @@ -283,6 +290,12 @@ static int cxl_acpi_probe(struct platform_device *pdev) struct acpi_device *adev = ACPI_COMPANION(host); struct cxl_cfmws_context ctx; + device_lock_set_class(&pdev->dev, &cxl_root_key); + rc = devm_add_action_or_reset(&pdev->dev, cxl_acpi_lock_reset_class, + &pdev->dev); + if (rc) + return rc; + root_port = devm_cxl_add_port(host, host, CXL_RESOURCE_NONE, NULL); if (IS_ERR(root_port)) return PTR_ERR(root_port); diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile index 6d37cd78b151..9d35085d25af 100644 --- a/drivers/cxl/core/Makefile +++ b/drivers/cxl/core/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_CXL_BUS) += cxl_core.o +obj-$(CONFIG_CXL_SUSPEND) += suspend.o ccflags-y += -I$(srctree)/drivers/cxl cxl_core-y := port.o diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index be61a0d8016b..54f434733b56 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -35,6 +35,7 @@ static bool cxl_raw_allow_all; .flags = _flags, \ } +#define CXL_VARIABLE_PAYLOAD ~0U /* * This table defines the supported mailbox commands for the driver. This table * is made up of a UAPI structure. Non-negative values as parameters in the @@ -44,26 +45,26 @@ static bool cxl_raw_allow_all; static struct cxl_mem_command cxl_mem_commands[CXL_MEM_COMMAND_ID_MAX] = { CXL_CMD(IDENTIFY, 0, 0x43, CXL_CMD_FLAG_FORCE_ENABLE), #ifdef CONFIG_CXL_MEM_RAW_COMMANDS - CXL_CMD(RAW, ~0, ~0, 0), + CXL_CMD(RAW, CXL_VARIABLE_PAYLOAD, CXL_VARIABLE_PAYLOAD, 0), #endif - CXL_CMD(GET_SUPPORTED_LOGS, 0, ~0, CXL_CMD_FLAG_FORCE_ENABLE), + CXL_CMD(GET_SUPPORTED_LOGS, 0, CXL_VARIABLE_PAYLOAD, CXL_CMD_FLAG_FORCE_ENABLE), CXL_CMD(GET_FW_INFO, 0, 0x50, 0), CXL_CMD(GET_PARTITION_INFO, 0, 0x20, 0), - CXL_CMD(GET_LSA, 0x8, ~0, 0), + CXL_CMD(GET_LSA, 0x8, CXL_VARIABLE_PAYLOAD, 0), CXL_CMD(GET_HEALTH_INFO, 0, 0x12, 0), - CXL_CMD(GET_LOG, 0x18, ~0, CXL_CMD_FLAG_FORCE_ENABLE), + CXL_CMD(GET_LOG, 0x18, CXL_VARIABLE_PAYLOAD, CXL_CMD_FLAG_FORCE_ENABLE), CXL_CMD(SET_PARTITION_INFO, 0x0a, 0, 0), - CXL_CMD(SET_LSA, ~0, 0, 0), + CXL_CMD(SET_LSA, CXL_VARIABLE_PAYLOAD, 0, 0), CXL_CMD(GET_ALERT_CONFIG, 0, 0x10, 0), CXL_CMD(SET_ALERT_CONFIG, 0xc, 0, 0), CXL_CMD(GET_SHUTDOWN_STATE, 0, 0x1, 0), CXL_CMD(SET_SHUTDOWN_STATE, 0x1, 0, 0), - CXL_CMD(GET_POISON, 0x10, ~0, 0), + CXL_CMD(GET_POISON, 0x10, CXL_VARIABLE_PAYLOAD, 0), CXL_CMD(INJECT_POISON, 0x8, 0, 0), CXL_CMD(CLEAR_POISON, 0x48, 0, 0), CXL_CMD(GET_SCAN_MEDIA_CAPS, 0x10, 0x4, 0), CXL_CMD(SCAN_MEDIA, 0x11, 0, 0), - CXL_CMD(GET_SCAN_MEDIA, 0, ~0, 0), + CXL_CMD(GET_SCAN_MEDIA, 0, CXL_VARIABLE_PAYLOAD, 0), }; /* @@ -127,6 +128,17 @@ static struct cxl_mem_command *cxl_mem_find_command(u16 opcode) return NULL; } +static const char *cxl_mem_opcode_to_name(u16 opcode) +{ + struct cxl_mem_command *c; + + c = cxl_mem_find_command(opcode); + if (!c) + return NULL; + + return cxl_command_names[c->info.id].name; +} + /** * cxl_mbox_send_cmd() - Send a mailbox command to a device. * @cxlds: The device data for the operation @@ -136,7 +148,7 @@ static struct cxl_mem_command *cxl_mem_find_command(u16 opcode) * @out: Caller allocated buffer for the output. * @out_size: Expected size of output. * - * Context: Any context. Will acquire and release mbox_mutex. + * Context: Any context. * Return: * * %>=0 - Number of bytes returned in @out. * * %-E2BIG - Payload is too large for hardware. @@ -169,17 +181,17 @@ int cxl_mbox_send_cmd(struct cxl_dev_state *cxlds, u16 opcode, void *in, if (rc) return rc; - /* TODO: Map return code to proper kernel style errno */ - if (mbox_cmd.return_code != CXL_MBOX_SUCCESS) - return -ENXIO; + if (mbox_cmd.return_code != CXL_MBOX_CMD_RC_SUCCESS) + return cxl_mbox_cmd_rc2errno(&mbox_cmd); /* * Variable sized commands can't be validated and so it's up to the * caller to do that if they wish. */ - if (cmd->info.size_out >= 0 && mbox_cmd.size_out != out_size) - return -EIO; - + if (cmd->info.size_out != CXL_VARIABLE_PAYLOAD) { + if (mbox_cmd.size_out != out_size) + return -EIO; + } return 0; } EXPORT_SYMBOL_NS_GPL(cxl_mbox_send_cmd, CXL); @@ -208,75 +220,122 @@ static bool cxl_mem_raw_command_allowed(u16 opcode) } /** - * cxl_validate_cmd_from_user() - Check fields for CXL_MEM_SEND_COMMAND. - * @cxlds: The device data for the operation - * @send_cmd: &struct cxl_send_command copied in from userspace. - * @out_cmd: Sanitized and populated &struct cxl_mem_command. + * cxl_payload_from_user_allowed() - Check contents of in_payload. + * @opcode: The mailbox command opcode. + * @payload_in: Pointer to the input payload passed in from user space. * * Return: - * * %0 - @out_cmd is ready to send. - * * %-ENOTTY - Invalid command specified. - * * %-EINVAL - Reserved fields or invalid values were used. - * * %-ENOMEM - Input or output buffer wasn't sized properly. - * * %-EPERM - Attempted to use a protected command. - * * %-EBUSY - Kernel has claimed exclusive access to this opcode + * * true - payload_in passes check for @opcode. + * * false - payload_in contains invalid or unsupported values. * - * The result of this command is a fully validated command in @out_cmd that is - * safe to send to the hardware. + * The driver may inspect payload contents before sending a mailbox + * command from user space to the device. The intent is to reject + * commands with input payloads that are known to be unsafe. This + * check is not intended to replace the users careful selection of + * mailbox command parameters and makes no guarantee that the user + * command will succeed, nor that it is appropriate. * - * See handle_mailbox_cmd_from_user() + * The specific checks are determined by the opcode. */ -static int cxl_validate_cmd_from_user(struct cxl_dev_state *cxlds, - const struct cxl_send_command *send_cmd, - struct cxl_mem_command *out_cmd) +static bool cxl_payload_from_user_allowed(u16 opcode, void *payload_in) { - const struct cxl_command_info *info; - struct cxl_mem_command *c; + switch (opcode) { + case CXL_MBOX_OP_SET_PARTITION_INFO: { + struct cxl_mbox_set_partition_info *pi = payload_in; - if (send_cmd->id == 0 || send_cmd->id >= CXL_MEM_COMMAND_ID_MAX) - return -ENOTTY; + if (pi->flags & CXL_SET_PARTITION_IMMEDIATE_FLAG) + return false; + break; + } + default: + break; + } + return true; +} - /* - * The user can never specify an input payload larger than what hardware - * supports, but output can be arbitrarily large (simply write out as - * much data as the hardware provides). - */ - if (send_cmd->in.size > cxlds->payload_size) +static int cxl_mbox_cmd_ctor(struct cxl_mbox_cmd *mbox, + struct cxl_dev_state *cxlds, u16 opcode, + size_t in_size, size_t out_size, u64 in_payload) +{ + *mbox = (struct cxl_mbox_cmd) { + .opcode = opcode, + .size_in = in_size, + }; + + if (in_size) { + mbox->payload_in = vmemdup_user(u64_to_user_ptr(in_payload), + in_size); + if (IS_ERR(mbox->payload_in)) + return PTR_ERR(mbox->payload_in); + + if (!cxl_payload_from_user_allowed(opcode, mbox->payload_in)) { + dev_dbg(cxlds->dev, "%s: input payload not allowed\n", + cxl_mem_opcode_to_name(opcode)); + kvfree(mbox->payload_in); + return -EBUSY; + } + } + + /* Prepare to handle a full payload for variable sized output */ + if (out_size == CXL_VARIABLE_PAYLOAD) + mbox->size_out = cxlds->payload_size; + else + mbox->size_out = out_size; + + if (mbox->size_out) { + mbox->payload_out = kvzalloc(mbox->size_out, GFP_KERNEL); + if (!mbox->payload_out) { + kvfree(mbox->payload_in); + return -ENOMEM; + } + } + return 0; +} + +static void cxl_mbox_cmd_dtor(struct cxl_mbox_cmd *mbox) +{ + kvfree(mbox->payload_in); + kvfree(mbox->payload_out); +} + +static int cxl_to_mem_cmd_raw(struct cxl_mem_command *mem_cmd, + const struct cxl_send_command *send_cmd, + struct cxl_dev_state *cxlds) +{ + if (send_cmd->raw.rsvd) return -EINVAL; /* - * Checks are bypassed for raw commands but a WARN/taint will occur - * later in the callchain + * Unlike supported commands, the output size of RAW commands + * gets passed along without further checking, so it must be + * validated here. */ - if (send_cmd->id == CXL_MEM_COMMAND_ID_RAW) { - const struct cxl_mem_command temp = { - .info = { - .id = CXL_MEM_COMMAND_ID_RAW, - .flags = 0, - .size_in = send_cmd->in.size, - .size_out = send_cmd->out.size, - }, - .opcode = send_cmd->raw.opcode - }; + if (send_cmd->out.size > cxlds->payload_size) + return -EINVAL; - if (send_cmd->raw.rsvd) - return -EINVAL; + if (!cxl_mem_raw_command_allowed(send_cmd->raw.opcode)) + return -EPERM; - /* - * Unlike supported commands, the output size of RAW commands - * gets passed along without further checking, so it must be - * validated here. - */ - if (send_cmd->out.size > cxlds->payload_size) - return -EINVAL; + dev_WARN_ONCE(cxlds->dev, true, "raw command path used\n"); - if (!cxl_mem_raw_command_allowed(send_cmd->raw.opcode)) - return -EPERM; + *mem_cmd = (struct cxl_mem_command) { + .info = { + .id = CXL_MEM_COMMAND_ID_RAW, + .size_in = send_cmd->in.size, + .size_out = send_cmd->out.size, + }, + .opcode = send_cmd->raw.opcode + }; - memcpy(out_cmd, &temp, sizeof(temp)); + return 0; +} - return 0; - } +static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd, + const struct cxl_send_command *send_cmd, + struct cxl_dev_state *cxlds) +{ + struct cxl_mem_command *c = &cxl_mem_commands[send_cmd->id]; + const struct cxl_command_info *info = &c->info; if (send_cmd->flags & ~CXL_MEM_COMMAND_FLAG_MASK) return -EINVAL; @@ -287,10 +346,6 @@ static int cxl_validate_cmd_from_user(struct cxl_dev_state *cxlds, if (send_cmd->in.rsvd || send_cmd->out.rsvd) return -EINVAL; - /* Convert user's command into the internal representation */ - c = &cxl_mem_commands[send_cmd->id]; - info = &c->info; - /* Check that the command is enabled for hardware */ if (!test_bit(info->id, cxlds->enabled_cmds)) return -ENOTTY; @@ -300,22 +355,74 @@ static int cxl_validate_cmd_from_user(struct cxl_dev_state *cxlds, return -EBUSY; /* Check the input buffer is the expected size */ - if (info->size_in >= 0 && info->size_in != send_cmd->in.size) + if (info->size_in != send_cmd->in.size) return -ENOMEM; /* Check the output buffer is at least large enough */ - if (info->size_out >= 0 && send_cmd->out.size < info->size_out) + if (send_cmd->out.size < info->size_out) return -ENOMEM; - memcpy(out_cmd, c, sizeof(*c)); - out_cmd->info.size_in = send_cmd->in.size; + *mem_cmd = (struct cxl_mem_command) { + .info = { + .id = info->id, + .flags = info->flags, + .size_in = send_cmd->in.size, + .size_out = send_cmd->out.size, + }, + .opcode = c->opcode + }; + + return 0; +} + +/** + * cxl_validate_cmd_from_user() - Check fields for CXL_MEM_SEND_COMMAND. + * @mbox_cmd: Sanitized and populated &struct cxl_mbox_cmd. + * @cxlds: The device data for the operation + * @send_cmd: &struct cxl_send_command copied in from userspace. + * + * Return: + * * %0 - @out_cmd is ready to send. + * * %-ENOTTY - Invalid command specified. + * * %-EINVAL - Reserved fields or invalid values were used. + * * %-ENOMEM - Input or output buffer wasn't sized properly. + * * %-EPERM - Attempted to use a protected command. + * * %-EBUSY - Kernel has claimed exclusive access to this opcode + * + * The result of this command is a fully validated command in @mbox_cmd that is + * safe to send to the hardware. + */ +static int cxl_validate_cmd_from_user(struct cxl_mbox_cmd *mbox_cmd, + struct cxl_dev_state *cxlds, + const struct cxl_send_command *send_cmd) +{ + struct cxl_mem_command mem_cmd; + int rc; + + if (send_cmd->id == 0 || send_cmd->id >= CXL_MEM_COMMAND_ID_MAX) + return -ENOTTY; + /* - * XXX: out_cmd->info.size_out will be controlled by the driver, and the - * specified number of bytes @send_cmd->out.size will be copied back out - * to userspace. + * The user can never specify an input payload larger than what hardware + * supports, but output can be arbitrarily large (simply write out as + * much data as the hardware provides). */ + if (send_cmd->in.size > cxlds->payload_size) + return -EINVAL; - return 0; + /* Sanitize and construct a cxl_mem_command */ + if (send_cmd->id == CXL_MEM_COMMAND_ID_RAW) + rc = cxl_to_mem_cmd_raw(&mem_cmd, send_cmd, cxlds); + else + rc = cxl_to_mem_cmd(&mem_cmd, send_cmd, cxlds); + + if (rc) + return rc; + + /* Sanitize and construct a cxl_mbox_cmd */ + return cxl_mbox_cmd_ctor(mbox_cmd, cxlds, mem_cmd.opcode, + mem_cmd.info.size_in, mem_cmd.info.size_out, + send_cmd->in.payload); } int cxl_query_cmd(struct cxl_memdev *cxlmd, @@ -355,8 +462,7 @@ int cxl_query_cmd(struct cxl_memdev *cxlmd, /** * handle_mailbox_cmd_from_user() - Dispatch a mailbox command for userspace. * @cxlds: The device data for the operation - * @cmd: The validated command. - * @in_payload: Pointer to userspace's input payload. + * @mbox_cmd: The validated mailbox command. * @out_payload: Pointer to userspace's output payload. * @size_out: (Input) Max payload size to copy out. * (Output) Payload size hardware generated. @@ -371,51 +477,27 @@ int cxl_query_cmd(struct cxl_memdev *cxlmd, * * %-EINTR - Mailbox acquisition interrupted. * * %-EXXX - Transaction level failures. * - * Creates the appropriate mailbox command and dispatches it on behalf of a - * userspace request. The input and output payloads are copied between - * userspace. + * Dispatches a mailbox command on behalf of a userspace request. + * The output payload is copied to userspace. * * See cxl_send_cmd(). */ static int handle_mailbox_cmd_from_user(struct cxl_dev_state *cxlds, - const struct cxl_mem_command *cmd, - u64 in_payload, u64 out_payload, - s32 *size_out, u32 *retval) + struct cxl_mbox_cmd *mbox_cmd, + u64 out_payload, s32 *size_out, + u32 *retval) { struct device *dev = cxlds->dev; - struct cxl_mbox_cmd mbox_cmd = { - .opcode = cmd->opcode, - .size_in = cmd->info.size_in, - .size_out = cmd->info.size_out, - }; int rc; - if (cmd->info.size_out) { - mbox_cmd.payload_out = kvzalloc(cmd->info.size_out, GFP_KERNEL); - if (!mbox_cmd.payload_out) - return -ENOMEM; - } - - if (cmd->info.size_in) { - mbox_cmd.payload_in = vmemdup_user(u64_to_user_ptr(in_payload), - cmd->info.size_in); - if (IS_ERR(mbox_cmd.payload_in)) { - kvfree(mbox_cmd.payload_out); - return PTR_ERR(mbox_cmd.payload_in); - } - } - dev_dbg(dev, "Submitting %s command for user\n" "\topcode: %x\n" - "\tsize: %ub\n", - cxl_command_names[cmd->info.id].name, mbox_cmd.opcode, - cmd->info.size_in); - - dev_WARN_ONCE(dev, cmd->info.id == CXL_MEM_COMMAND_ID_RAW, - "raw command path used\n"); + "\tsize: %zx\n", + cxl_mem_opcode_to_name(mbox_cmd->opcode), + mbox_cmd->opcode, mbox_cmd->size_in); - rc = cxlds->mbox_send(cxlds, &mbox_cmd); + rc = cxlds->mbox_send(cxlds, mbox_cmd); if (rc) goto out; @@ -424,22 +506,21 @@ static int handle_mailbox_cmd_from_user(struct cxl_dev_state *cxlds, * to userspace. While the payload may have written more output than * this it will have to be ignored. */ - if (mbox_cmd.size_out) { - dev_WARN_ONCE(dev, mbox_cmd.size_out > *size_out, + if (mbox_cmd->size_out) { + dev_WARN_ONCE(dev, mbox_cmd->size_out > *size_out, "Invalid return size\n"); if (copy_to_user(u64_to_user_ptr(out_payload), - mbox_cmd.payload_out, mbox_cmd.size_out)) { + mbox_cmd->payload_out, mbox_cmd->size_out)) { rc = -EFAULT; goto out; } } - *size_out = mbox_cmd.size_out; - *retval = mbox_cmd.return_code; + *size_out = mbox_cmd->size_out; + *retval = mbox_cmd->return_code; out: - kvfree(mbox_cmd.payload_in); - kvfree(mbox_cmd.payload_out); + cxl_mbox_cmd_dtor(mbox_cmd); return rc; } @@ -448,7 +529,7 @@ int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s) struct cxl_dev_state *cxlds = cxlmd->cxlds; struct device *dev = &cxlmd->dev; struct cxl_send_command send; - struct cxl_mem_command c; + struct cxl_mbox_cmd mbox_cmd; int rc; dev_dbg(dev, "Send IOCTL\n"); @@ -456,17 +537,12 @@ int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s) if (copy_from_user(&send, s, sizeof(send))) return -EFAULT; - rc = cxl_validate_cmd_from_user(cxlmd->cxlds, &send, &c); + rc = cxl_validate_cmd_from_user(&mbox_cmd, cxlmd->cxlds, &send); if (rc) return rc; - /* Prepare to handle a full payload for variable sized output */ - if (c.info.size_out < 0) - c.info.size_out = cxlds->payload_size; - - rc = handle_mailbox_cmd_from_user(cxlds, &c, send.in.payload, - send.out.payload, &send.out.size, - &send.retval); + rc = handle_mailbox_cmd_from_user(cxlds, &mbox_cmd, send.out.payload, + &send.out.size, &send.retval); if (rc) return rc; diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index 1f76b28f9826..f7cdcd33504a 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -228,6 +228,8 @@ static void detach_memdev(struct work_struct *work) put_device(&cxlmd->dev); } +static struct lock_class_key cxl_memdev_key; + static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds, const struct file_operations *fops) { @@ -247,6 +249,7 @@ static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds, dev = &cxlmd->dev; device_initialize(dev); + lockdep_set_class(&dev->mutex, &cxl_memdev_key); dev->parent = cxlds->dev; dev->bus = &cxl_bus_type; dev->devt = MKDEV(cxl_mem_major, cxlmd->id); diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index c9a494d6976a..c4c99ff7b55e 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -1,8 +1,11 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright(c) 2021 Intel Corporation. All rights reserved. */ +#include <linux/io-64-nonatomic-lo-hi.h> #include <linux/device.h> +#include <linux/delay.h> #include <linux/pci.h> #include <cxlpci.h> +#include <cxlmem.h> #include <cxl.h> #include "core.h" @@ -13,6 +16,10 @@ * a set of helpers for CXL interactions which occur via PCIe. */ +static unsigned short media_ready_timeout = 60; +module_param(media_ready_timeout, ushort, 0644); +MODULE_PARM_DESC(media_ready_timeout, "seconds to wait for media ready"); + struct cxl_walk_context { struct pci_bus *bus; struct cxl_port *port; @@ -94,3 +101,360 @@ int devm_cxl_port_enumerate_dports(struct cxl_port *port) return ctx.count; } EXPORT_SYMBOL_NS_GPL(devm_cxl_port_enumerate_dports, CXL); + +/* + * Wait up to @media_ready_timeout for the device to report memory + * active. + */ +int cxl_await_media_ready(struct cxl_dev_state *cxlds) +{ + struct pci_dev *pdev = to_pci_dev(cxlds->dev); + int d = cxlds->cxl_dvsec; + bool active = false; + u64 md_status; + int rc, i; + + for (i = media_ready_timeout; i; i--) { + u32 temp; + + rc = pci_read_config_dword( + pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &temp); + if (rc) + return rc; + + active = FIELD_GET(CXL_DVSEC_MEM_ACTIVE, temp); + if (active) + break; + msleep(1000); + } + + if (!active) { + dev_err(&pdev->dev, + "timeout awaiting memory active after %d seconds\n", + media_ready_timeout); + return -ETIMEDOUT; + } + + md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); + if (!CXLMDEV_READY(md_status)) + return -EIO; + + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_await_media_ready, CXL); + +static int wait_for_valid(struct cxl_dev_state *cxlds) +{ + struct pci_dev *pdev = to_pci_dev(cxlds->dev); + int d = cxlds->cxl_dvsec, rc; + u32 val; + + /* + * Memory_Info_Valid: When set, indicates that the CXL Range 1 Size high + * and Size Low registers are valid. Must be set within 1 second of + * deassertion of reset to CXL device. Likely it is already set by the + * time this runs, but otherwise give a 1.5 second timeout in case of + * clock skew. + */ + rc = pci_read_config_dword(pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &val); + if (rc) + return rc; + + if (val & CXL_DVSEC_MEM_INFO_VALID) + return 0; + + msleep(1500); + + rc = pci_read_config_dword(pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &val); + if (rc) + return rc; + + if (val & CXL_DVSEC_MEM_INFO_VALID) + return 0; + + return -ETIMEDOUT; +} + +static int cxl_set_mem_enable(struct cxl_dev_state *cxlds, u16 val) +{ + struct pci_dev *pdev = to_pci_dev(cxlds->dev); + int d = cxlds->cxl_dvsec; + u16 ctrl; + int rc; + + rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl); + if (rc < 0) + return rc; + + if ((ctrl & CXL_DVSEC_MEM_ENABLE) == val) + return 1; + ctrl &= ~CXL_DVSEC_MEM_ENABLE; + ctrl |= val; + + rc = pci_write_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, ctrl); + if (rc < 0) + return rc; + + return 0; +} + +static void clear_mem_enable(void *cxlds) +{ + cxl_set_mem_enable(cxlds, 0); +} + +static int devm_cxl_enable_mem(struct device *host, struct cxl_dev_state *cxlds) +{ + int rc; + + rc = cxl_set_mem_enable(cxlds, CXL_DVSEC_MEM_ENABLE); + if (rc < 0) + return rc; + if (rc > 0) + return 0; + return devm_add_action_or_reset(host, clear_mem_enable, cxlds); +} + +static bool range_contains(struct range *r1, struct range *r2) +{ + return r1->start <= r2->start && r1->end >= r2->end; +} + +/* require dvsec ranges to be covered by a locked platform window */ +static int dvsec_range_allowed(struct device *dev, void *arg) +{ + struct range *dev_range = arg; + struct cxl_decoder *cxld; + struct range root_range; + + if (!is_root_decoder(dev)) + return 0; + + cxld = to_cxl_decoder(dev); + + if (!(cxld->flags & CXL_DECODER_F_LOCK)) + return 0; + if (!(cxld->flags & CXL_DECODER_F_RAM)) + return 0; + + root_range = (struct range) { + .start = cxld->platform_res.start, + .end = cxld->platform_res.end, + }; + + return range_contains(&root_range, dev_range); +} + +static void disable_hdm(void *_cxlhdm) +{ + u32 global_ctrl; + struct cxl_hdm *cxlhdm = _cxlhdm; + void __iomem *hdm = cxlhdm->regs.hdm_decoder; + + global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET); + writel(global_ctrl & ~CXL_HDM_DECODER_ENABLE, + hdm + CXL_HDM_DECODER_CTRL_OFFSET); +} + +static int devm_cxl_enable_hdm(struct device *host, struct cxl_hdm *cxlhdm) +{ + void __iomem *hdm = cxlhdm->regs.hdm_decoder; + u32 global_ctrl; + + global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET); + writel(global_ctrl | CXL_HDM_DECODER_ENABLE, + hdm + CXL_HDM_DECODER_CTRL_OFFSET); + + return devm_add_action_or_reset(host, disable_hdm, cxlhdm); +} + +static bool __cxl_hdm_decode_init(struct cxl_dev_state *cxlds, + struct cxl_hdm *cxlhdm, + struct cxl_endpoint_dvsec_info *info) +{ + void __iomem *hdm = cxlhdm->regs.hdm_decoder; + struct cxl_port *port = cxlhdm->port; + struct device *dev = cxlds->dev; + struct cxl_port *root; + int i, rc, allowed; + u32 global_ctrl; + + global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET); + + /* + * If the HDM Decoder Capability is already enabled then assume + * that some other agent like platform firmware set it up. + */ + if (global_ctrl & CXL_HDM_DECODER_ENABLE) { + rc = devm_cxl_enable_mem(&port->dev, cxlds); + if (rc) + return false; + return true; + } + + root = to_cxl_port(port->dev.parent); + while (!is_cxl_root(root) && is_cxl_port(root->dev.parent)) + root = to_cxl_port(root->dev.parent); + if (!is_cxl_root(root)) { + dev_err(dev, "Failed to acquire root port for HDM enable\n"); + return false; + } + + for (i = 0, allowed = 0; info->mem_enabled && i < info->ranges; i++) { + struct device *cxld_dev; + + cxld_dev = device_find_child(&root->dev, &info->dvsec_range[i], + dvsec_range_allowed); + if (!cxld_dev) { + dev_dbg(dev, "DVSEC Range%d denied by platform\n", i); + continue; + } + dev_dbg(dev, "DVSEC Range%d allowed by platform\n", i); + put_device(cxld_dev); + allowed++; + } + + if (!allowed) { + cxl_set_mem_enable(cxlds, 0); + info->mem_enabled = 0; + } + + /* + * Per CXL 2.0 Section 8.1.3.8.3 and 8.1.3.8.4 DVSEC CXL Range 1 Base + * [High,Low] when HDM operation is enabled the range register values + * are ignored by the device, but the spec also recommends matching the + * DVSEC Range 1,2 to HDM Decoder Range 0,1. So, non-zero info->ranges + * are expected even though Linux does not require or maintain that + * match. If at least one DVSEC range is enabled and allowed, skip HDM + * Decoder Capability Enable. + */ + if (info->mem_enabled) + return false; + + rc = devm_cxl_enable_hdm(&port->dev, cxlhdm); + if (rc) + return false; + + rc = devm_cxl_enable_mem(&port->dev, cxlds); + if (rc) + return false; + + return true; +} + +/** + * cxl_hdm_decode_init() - Setup HDM decoding for the endpoint + * @cxlds: Device state + * @cxlhdm: Mapped HDM decoder Capability + * + * Try to enable the endpoint's HDM Decoder Capability + */ +int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm) +{ + struct pci_dev *pdev = to_pci_dev(cxlds->dev); + struct cxl_endpoint_dvsec_info info = { 0 }; + int hdm_count, rc, i, ranges = 0; + struct device *dev = &pdev->dev; + int d = cxlds->cxl_dvsec; + u16 cap, ctrl; + + if (!d) { + dev_dbg(dev, "No DVSEC Capability\n"); + return -ENXIO; + } + + rc = pci_read_config_word(pdev, d + CXL_DVSEC_CAP_OFFSET, &cap); + if (rc) + return rc; + + rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl); + if (rc) + return rc; + + if (!(cap & CXL_DVSEC_MEM_CAPABLE)) { + dev_dbg(dev, "Not MEM Capable\n"); + return -ENXIO; + } + + /* + * It is not allowed by spec for MEM.capable to be set and have 0 legacy + * HDM decoders (values > 2 are also undefined as of CXL 2.0). As this + * driver is for a spec defined class code which must be CXL.mem + * capable, there is no point in continuing to enable CXL.mem. + */ + hdm_count = FIELD_GET(CXL_DVSEC_HDM_COUNT_MASK, cap); + if (!hdm_count || hdm_count > 2) + return -EINVAL; + + rc = wait_for_valid(cxlds); + if (rc) { + dev_dbg(dev, "Failure awaiting MEM_INFO_VALID (%d)\n", rc); + return rc; + } + + /* + * The current DVSEC values are moot if the memory capability is + * disabled, and they will remain moot after the HDM Decoder + * capability is enabled. + */ + info.mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl); + if (!info.mem_enabled) + goto hdm_init; + + for (i = 0; i < hdm_count; i++) { + u64 base, size; + u32 temp; + + rc = pci_read_config_dword( + pdev, d + CXL_DVSEC_RANGE_SIZE_HIGH(i), &temp); + if (rc) + return rc; + + size = (u64)temp << 32; + + rc = pci_read_config_dword( + pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(i), &temp); + if (rc) + return rc; + + size |= temp & CXL_DVSEC_MEM_SIZE_LOW_MASK; + + rc = pci_read_config_dword( + pdev, d + CXL_DVSEC_RANGE_BASE_HIGH(i), &temp); + if (rc) + return rc; + + base = (u64)temp << 32; + + rc = pci_read_config_dword( + pdev, d + CXL_DVSEC_RANGE_BASE_LOW(i), &temp); + if (rc) + return rc; + + base |= temp & CXL_DVSEC_MEM_BASE_LOW_MASK; + + info.dvsec_range[i] = (struct range) { + .start = base, + .end = base + size - 1 + }; + + if (size) + ranges++; + } + + info.ranges = ranges; + + /* + * If DVSEC ranges are being used instead of HDM decoder registers there + * is no use in trying to manage those. + */ +hdm_init: + if (!__cxl_hdm_decode_init(cxlds, cxlhdm, &info)) { + dev_err(dev, + "Legacy range registers configuration prevents HDM operation.\n"); + return -EBUSY; + } + + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_hdm_decode_init, CXL); diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c index 8de240c4d96b..bec7cfb54ebf 100644 --- a/drivers/cxl/core/pmem.c +++ b/drivers/cxl/core/pmem.c @@ -80,6 +80,8 @@ struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_nvdimm *cxl_nvd) } EXPORT_SYMBOL_NS_GPL(cxl_find_nvdimm_bridge, CXL); +static struct lock_class_key cxl_nvdimm_bridge_key; + static struct cxl_nvdimm_bridge *cxl_nvdimm_bridge_alloc(struct cxl_port *port) { struct cxl_nvdimm_bridge *cxl_nvb; @@ -99,6 +101,7 @@ static struct cxl_nvdimm_bridge *cxl_nvdimm_bridge_alloc(struct cxl_port *port) cxl_nvb->port = port; cxl_nvb->state = CXL_NVB_NEW; device_initialize(dev); + lockdep_set_class(&dev->mutex, &cxl_nvdimm_bridge_key); device_set_pm_not_required(dev); dev->parent = &port->dev; dev->bus = &cxl_bus_type; @@ -121,10 +124,10 @@ static void unregister_nvb(void *_cxl_nvb) * work to flush. Once the state has been changed to 'dead' then no new * work can be queued by user-triggered bind. */ - cxl_device_lock(&cxl_nvb->dev); + device_lock(&cxl_nvb->dev); flush = cxl_nvb->state != CXL_NVB_NEW; cxl_nvb->state = CXL_NVB_DEAD; - cxl_device_unlock(&cxl_nvb->dev); + device_unlock(&cxl_nvb->dev); /* * Even though the device core will trigger device_release_driver() @@ -214,6 +217,8 @@ struct cxl_nvdimm *to_cxl_nvdimm(struct device *dev) } EXPORT_SYMBOL_NS_GPL(to_cxl_nvdimm, CXL); +static struct lock_class_key cxl_nvdimm_key; + static struct cxl_nvdimm *cxl_nvdimm_alloc(struct cxl_memdev *cxlmd) { struct cxl_nvdimm *cxl_nvd; @@ -226,6 +231,7 @@ static struct cxl_nvdimm *cxl_nvdimm_alloc(struct cxl_memdev *cxlmd) dev = &cxl_nvd->dev; cxl_nvd->cxlmd = cxlmd; device_initialize(dev); + lockdep_set_class(&dev->mutex, &cxl_nvdimm_key); device_set_pm_not_required(dev); dev->parent = &cxlmd->dev; dev->bus = &cxl_bus_type; diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 2ab1ba4499b3..ea60abda6500 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -312,10 +312,10 @@ static void cxl_port_release(struct device *dev) struct cxl_port *port = to_cxl_port(dev); struct cxl_ep *ep, *_e; - cxl_device_lock(dev); + device_lock(dev); list_for_each_entry_safe(ep, _e, &port->endpoints, list) cxl_ep_release(ep); - cxl_device_unlock(dev); + device_unlock(dev); ida_free(&cxl_port_ida, port->id); kfree(port); } @@ -391,6 +391,8 @@ static int devm_cxl_link_uport(struct device *host, struct cxl_port *port) return devm_add_action_or_reset(host, cxl_unlink_uport, port); } +static struct lock_class_key cxl_port_key; + static struct cxl_port *cxl_port_alloc(struct device *uport, resource_size_t component_reg_phys, struct cxl_port *parent_port) @@ -415,9 +417,10 @@ static struct cxl_port *cxl_port_alloc(struct device *uport, * description. */ dev = &port->dev; - if (parent_port) + if (parent_port) { dev->parent = &parent_port->dev; - else + port->depth = parent_port->depth + 1; + } else dev->parent = uport; port->uport = uport; @@ -427,6 +430,7 @@ static struct cxl_port *cxl_port_alloc(struct device *uport, INIT_LIST_HEAD(&port->endpoints); device_initialize(dev); + lockdep_set_class_and_subclass(&dev->mutex, &cxl_port_key, port->depth); device_set_pm_not_required(dev); dev->bus = &cxl_bus_type; dev->type = &cxl_port_type; @@ -457,8 +461,6 @@ struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport, if (IS_ERR(port)) return port; - if (parent_port) - port->depth = parent_port->depth + 1; dev = &port->dev; if (is_cxl_memdev(uport)) rc = dev_set_name(dev, "endpoint%d", port->id); @@ -554,7 +556,7 @@ static int match_root_child(struct device *dev, const void *match) return 0; port = to_cxl_port(dev); - cxl_device_lock(dev); + device_lock(dev); list_for_each_entry(dport, &port->dports, list) { iter = match; while (iter) { @@ -564,7 +566,7 @@ static int match_root_child(struct device *dev, const void *match) } } out: - cxl_device_unlock(dev); + device_unlock(dev); return !!iter; } @@ -623,13 +625,13 @@ static int add_dport(struct cxl_port *port, struct cxl_dport *new) static void cond_cxl_root_lock(struct cxl_port *port) { if (is_cxl_root(port)) - cxl_device_lock(&port->dev); + device_lock(&port->dev); } static void cond_cxl_root_unlock(struct cxl_port *port) { if (is_cxl_root(port)) - cxl_device_unlock(&port->dev); + device_unlock(&port->dev); } static void cxl_dport_remove(void *data) @@ -736,15 +738,15 @@ static int add_ep(struct cxl_port *port, struct cxl_ep *new) { struct cxl_ep *dup; - cxl_device_lock(&port->dev); + device_lock(&port->dev); if (port->dead) { - cxl_device_unlock(&port->dev); + device_unlock(&port->dev); return -ENXIO; } dup = find_ep(port, new->ep); if (!dup) list_add_tail(&new->list, &port->endpoints); - cxl_device_unlock(&port->dev); + device_unlock(&port->dev); return dup ? -EEXIST : 0; } @@ -854,12 +856,12 @@ static void delete_endpoint(void *data) goto out; parent = &parent_port->dev; - cxl_device_lock(parent); + device_lock(parent); if (parent->driver && endpoint->uport) { devm_release_action(parent, cxl_unlink_uport, endpoint); devm_release_action(parent, unregister_port, endpoint); } - cxl_device_unlock(parent); + device_unlock(parent); put_device(parent); out: put_device(&endpoint->dev); @@ -920,7 +922,7 @@ static void cxl_detach_ep(void *data) } parent_port = to_cxl_port(port->dev.parent); - cxl_device_lock(&parent_port->dev); + device_lock(&parent_port->dev); if (!parent_port->dev.driver) { /* * The bottom-up race to delete the port lost to a @@ -928,12 +930,12 @@ static void cxl_detach_ep(void *data) * parent_port ->remove() will have cleaned up all * descendants. */ - cxl_device_unlock(&parent_port->dev); + device_unlock(&parent_port->dev); put_device(&port->dev); continue; } - cxl_device_lock(&port->dev); + device_lock(&port->dev); ep = find_ep(port, &cxlmd->dev); dev_dbg(&cxlmd->dev, "disconnect %s from %s\n", ep ? dev_name(ep->ep) : "", dev_name(&port->dev)); @@ -948,7 +950,7 @@ static void cxl_detach_ep(void *data) port->dead = true; list_splice_init(&port->dports, &reap_dports); } - cxl_device_unlock(&port->dev); + device_unlock(&port->dev); if (!list_empty(&reap_dports)) { dev_dbg(&cxlmd->dev, "delete %s\n", @@ -956,7 +958,7 @@ static void cxl_detach_ep(void *data) delete_switch_port(port, &reap_dports); } put_device(&port->dev); - cxl_device_unlock(&parent_port->dev); + device_unlock(&parent_port->dev); } } @@ -1004,7 +1006,7 @@ static int add_port_attach_ep(struct cxl_memdev *cxlmd, return -EAGAIN; } - cxl_device_lock(&parent_port->dev); + device_lock(&parent_port->dev); if (!parent_port->dev.driver) { dev_warn(&cxlmd->dev, "port %s:%s disabled, failed to enumerate CXL.mem\n", @@ -1022,7 +1024,7 @@ static int add_port_attach_ep(struct cxl_memdev *cxlmd, get_device(&port->dev); } out: - cxl_device_unlock(&parent_port->dev); + device_unlock(&parent_port->dev); if (IS_ERR(port)) rc = PTR_ERR(port); @@ -1133,14 +1135,14 @@ struct cxl_dport *cxl_find_dport_by_dev(struct cxl_port *port, { struct cxl_dport *dport; - cxl_device_lock(&port->dev); + device_lock(&port->dev); list_for_each_entry(dport, &port->dports, list) if (dport->dport == dev) { - cxl_device_unlock(&port->dev); + device_unlock(&port->dev); return dport; } - cxl_device_unlock(&port->dev); + device_unlock(&port->dev); return NULL; } EXPORT_SYMBOL_NS_GPL(cxl_find_dport_by_dev, CXL); @@ -1173,6 +1175,8 @@ static int decoder_populate_targets(struct cxl_decoder *cxld, return rc; } +static struct lock_class_key cxl_decoder_key; + /** * cxl_decoder_alloc - Allocate a new CXL decoder * @port: owning port of this decoder @@ -1214,6 +1218,7 @@ static struct cxl_decoder *cxl_decoder_alloc(struct cxl_port *port, seqlock_init(&cxld->target_lock); dev = &cxld->dev; device_initialize(dev); + lockdep_set_class(&dev->mutex, &cxl_decoder_key); device_set_pm_not_required(dev); dev->parent = &port->dev; dev->bus = &cxl_bus_type; @@ -1379,9 +1384,9 @@ int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map) port = to_cxl_port(cxld->dev.parent); - cxl_device_lock(&port->dev); + device_lock(&port->dev); rc = cxl_decoder_add_locked(cxld, target_map); - cxl_device_unlock(&port->dev); + device_unlock(&port->dev); return rc; } @@ -1452,14 +1457,7 @@ static int cxl_bus_probe(struct device *dev) { int rc; - /* - * Take the CXL nested lock since the driver core only holds - * @dev->mutex and not @dev->lockdep_mutex. - */ - cxl_nested_lock(dev); rc = to_cxl_drv(dev->driver)->probe(dev); - cxl_nested_unlock(dev); - dev_dbg(dev, "probe: %d\n", rc); return rc; } @@ -1468,10 +1466,8 @@ static void cxl_bus_remove(struct device *dev) { struct cxl_driver *cxl_drv = to_cxl_drv(dev->driver); - cxl_nested_lock(dev); if (cxl_drv->remove) cxl_drv->remove(dev); - cxl_nested_unlock(dev); } static struct workqueue_struct *cxl_bus_wq; diff --git a/drivers/cxl/core/suspend.c b/drivers/cxl/core/suspend.c new file mode 100644 index 000000000000..a5984d96ea1d --- /dev/null +++ b/drivers/cxl/core/suspend.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2022 Intel Corporation. All rights reserved. */ +#include <linux/atomic.h> +#include <linux/export.h> +#include "cxlmem.h" + +static atomic_t mem_active; + +bool cxl_mem_active(void) +{ + return atomic_read(&mem_active) != 0; +} + +void cxl_mem_active_inc(void) +{ + atomic_inc(&mem_active); +} +EXPORT_SYMBOL_NS_GPL(cxl_mem_active_inc, CXL); + +void cxl_mem_active_dec(void) +{ + atomic_dec(&mem_active); +} +EXPORT_SYMBOL_NS_GPL(cxl_mem_active_dec, CXL); diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 990b6670222e..140dc3278cde 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -405,82 +405,4 @@ struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_nvdimm *cxl_nvd); #define __mock static #endif -#ifdef CONFIG_PROVE_CXL_LOCKING -enum cxl_lock_class { - CXL_ANON_LOCK, - CXL_NVDIMM_LOCK, - CXL_NVDIMM_BRIDGE_LOCK, - CXL_PORT_LOCK, - /* - * Be careful to add new lock classes here, CXL_PORT_LOCK is - * extended by the port depth, so a maximum CXL port topology - * depth would need to be defined first. - */ -}; - -static inline void cxl_nested_lock(struct device *dev) -{ - if (is_cxl_port(dev)) { - struct cxl_port *port = to_cxl_port(dev); - - mutex_lock_nested(&dev->lockdep_mutex, - CXL_PORT_LOCK + port->depth); - } else if (is_cxl_decoder(dev)) { - struct cxl_port *port = to_cxl_port(dev->parent); - - /* - * A decoder is the immediate child of a port, so set - * its lock class equal to other child device siblings. - */ - mutex_lock_nested(&dev->lockdep_mutex, - CXL_PORT_LOCK + port->depth + 1); - } else if (is_cxl_nvdimm_bridge(dev)) - mutex_lock_nested(&dev->lockdep_mutex, CXL_NVDIMM_BRIDGE_LOCK); - else if (is_cxl_nvdimm(dev)) - mutex_lock_nested(&dev->lockdep_mutex, CXL_NVDIMM_LOCK); - else - mutex_lock_nested(&dev->lockdep_mutex, CXL_ANON_LOCK); -} - -static inline void cxl_nested_unlock(struct device *dev) -{ - mutex_unlock(&dev->lockdep_mutex); -} - -static inline void cxl_device_lock(struct device *dev) -{ - /* - * For double lock errors the lockup will happen before lockdep - * warns at cxl_nested_lock(), so assert explicitly. - */ - lockdep_assert_not_held(&dev->lockdep_mutex); - - device_lock(dev); - cxl_nested_lock(dev); -} - -static inline void cxl_device_unlock(struct device *dev) -{ - cxl_nested_unlock(dev); - device_unlock(dev); -} -#else -static inline void cxl_nested_lock(struct device *dev) -{ -} - -static inline void cxl_nested_unlock(struct device *dev) -{ -} - -static inline void cxl_device_lock(struct device *dev) -{ - device_lock(dev); -} - -static inline void cxl_device_unlock(struct device *dev) -{ - device_unlock(dev); -} -#endif #endif /* __CXL_H__ */ diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 5d33ce24fe09..60d10ee1e7fc 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -85,10 +85,61 @@ struct cxl_mbox_cmd { size_t size_in; size_t size_out; u16 return_code; -#define CXL_MBOX_SUCCESS 0 }; /* + * Per CXL 2.0 Section 8.2.8.4.5.1 + */ +#define CMD_CMD_RC_TABLE \ + C(SUCCESS, 0, NULL), \ + C(BACKGROUND, -ENXIO, "background cmd started successfully"), \ + C(INPUT, -ENXIO, "cmd input was invalid"), \ + C(UNSUPPORTED, -ENXIO, "cmd is not supported"), \ + C(INTERNAL, -ENXIO, "internal device error"), \ + C(RETRY, -ENXIO, "temporary error, retry once"), \ + C(BUSY, -ENXIO, "ongoing background operation"), \ + C(MEDIADISABLED, -ENXIO, "media access is disabled"), \ + C(FWINPROGRESS, -ENXIO, "one FW package can be transferred at a time"), \ + C(FWOOO, -ENXIO, "FW package content was transferred out of order"), \ + C(FWAUTH, -ENXIO, "FW package authentication failed"), \ + C(FWSLOT, -ENXIO, "FW slot is not supported for requested operation"), \ + C(FWROLLBACK, -ENXIO, "rolled back to the previous active FW"), \ + C(FWRESET, -ENXIO, "FW failed to activate, needs cold reset"), \ + C(HANDLE, -ENXIO, "one or more Event Record Handles were invalid"), \ + C(PADDR, -ENXIO, "physical address specified is invalid"), \ + C(POISONLMT, -ENXIO, "poison injection limit has been reached"), \ + C(MEDIAFAILURE, -ENXIO, "permanent issue with the media"), \ + C(ABORT, -ENXIO, "background cmd was aborted by device"), \ + C(SECURITY, -ENXIO, "not valid in the current security state"), \ + C(PASSPHRASE, -ENXIO, "phrase doesn't match current set passphrase"), \ + C(MBUNSUPPORTED, -ENXIO, "unsupported on the mailbox it was issued on"),\ + C(PAYLOADLEN, -ENXIO, "invalid payload length") + +#undef C +#define C(a, b, c) CXL_MBOX_CMD_RC_##a +enum { CMD_CMD_RC_TABLE }; +#undef C +#define C(a, b, c) { b, c } +struct cxl_mbox_cmd_rc { + int err; + const char *desc; +}; + +static const +struct cxl_mbox_cmd_rc cxl_mbox_cmd_rctable[] ={ CMD_CMD_RC_TABLE }; +#undef C + +static inline const char *cxl_mbox_cmd_rc2str(struct cxl_mbox_cmd *mbox_cmd) +{ + return cxl_mbox_cmd_rctable[mbox_cmd->return_code].desc; +} + +static inline int cxl_mbox_cmd_rc2errno(struct cxl_mbox_cmd *mbox_cmd) +{ + return cxl_mbox_cmd_rctable[mbox_cmd->return_code].err; +} + +/* * CXL 2.0 - Memory capacity multiplier * See Section 8.2.9.5 * @@ -141,7 +192,6 @@ struct cxl_endpoint_dvsec_info { * @info: Cached DVSEC information about the device. * @serial: PCIe Device Serial Number * @mbox_send: @dev specific transport for transmitting mailbox commands - * @wait_media_ready: @dev specific method to await media ready * * See section 8.2.9.5.2 Capacity Configuration and Label Storage for * details on capacity parameters. @@ -172,11 +222,9 @@ struct cxl_dev_state { u64 next_persistent_bytes; resource_size_t component_reg_phys; - struct cxl_endpoint_dvsec_info info; u64 serial; int (*mbox_send)(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd); - int (*wait_media_ready)(struct cxl_dev_state *cxlds); }; enum cxl_opcode { @@ -262,6 +310,13 @@ struct cxl_mbox_set_lsa { u8 data[]; } __packed; +struct cxl_mbox_set_partition_info { + __le64 volatile_capacity; + u8 flags; +} __packed; + +#define CXL_SET_PARTITION_IMMEDIATE_FLAG BIT(0) + /** * struct cxl_mem_command - Driver representation of a memory device command * @info: Command information as it exists for the UAPI @@ -290,11 +345,23 @@ struct cxl_mem_command { int cxl_mbox_send_cmd(struct cxl_dev_state *cxlds, u16 opcode, void *in, size_t in_size, void *out, size_t out_size); int cxl_dev_state_identify(struct cxl_dev_state *cxlds); +int cxl_await_media_ready(struct cxl_dev_state *cxlds); int cxl_enumerate_cmds(struct cxl_dev_state *cxlds); int cxl_mem_create_range_info(struct cxl_dev_state *cxlds); struct cxl_dev_state *cxl_dev_state_create(struct device *dev); void set_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds); void clear_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds); +#ifdef CONFIG_CXL_SUSPEND +void cxl_mem_active_inc(void); +void cxl_mem_active_dec(void); +#else +static inline void cxl_mem_active_inc(void) +{ +} +static inline void cxl_mem_active_dec(void) +{ +} +#endif struct cxl_hdm { struct cxl_component_regs regs; diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index 329e7ea3f36a..fce1c11729c2 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -72,4 +72,6 @@ static inline resource_size_t cxl_regmap_to_base(struct pci_dev *pdev, } int devm_cxl_port_enumerate_dports(struct cxl_port *port); +struct cxl_dev_state; +int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm); #endif /* __CXL_PCI_H__ */ diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 49a4b1c47299..c310f1fd3db0 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -24,27 +24,6 @@ * in higher level operations. */ -static int wait_for_media(struct cxl_memdev *cxlmd) -{ - struct cxl_dev_state *cxlds = cxlmd->cxlds; - struct cxl_endpoint_dvsec_info *info = &cxlds->info; - int rc; - - if (!info->mem_enabled) - return -EBUSY; - - rc = cxlds->wait_media_ready(cxlds); - if (rc) - return rc; - - /* - * We know the device is active, and enabled, if any ranges are non-zero - * we'll need to check later before adding the port since that owns the - * HDM decoder registers. - */ - return 0; -} - static int create_endpoint(struct cxl_memdev *cxlmd, struct cxl_port *parent_port) { @@ -67,72 +46,14 @@ static int create_endpoint(struct cxl_memdev *cxlmd, return cxl_endpoint_autoremove(cxlmd, endpoint); } -/** - * cxl_dvsec_decode_init() - Setup HDM decoding for the endpoint - * @cxlds: Device state - * - * Additionally, enables global HDM decoding. Warning: don't call this outside - * of probe. Once probe is complete, the port driver owns all access to the HDM - * decoder registers. - * - * Returns: false if DVSEC Ranges are being used instead of HDM - * decoders, or if it can not be determined if DVSEC Ranges are in use. - * Otherwise, returns true. - */ -__mock bool cxl_dvsec_decode_init(struct cxl_dev_state *cxlds) +static void enable_suspend(void *data) { - struct cxl_endpoint_dvsec_info *info = &cxlds->info; - struct cxl_register_map map; - struct cxl_component_reg_map *cmap = &map.component_map; - bool global_enable, do_hdm_init = false; - void __iomem *crb; - u32 global_ctrl; - - /* map hdm decoder */ - crb = ioremap(cxlds->component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE); - if (!crb) { - dev_dbg(cxlds->dev, "Failed to map component registers\n"); - return false; - } - - cxl_probe_component_regs(cxlds->dev, crb, cmap); - if (!cmap->hdm_decoder.valid) { - dev_dbg(cxlds->dev, "Invalid HDM decoder registers\n"); - goto out; - } - - global_ctrl = readl(crb + cmap->hdm_decoder.offset + - CXL_HDM_DECODER_CTRL_OFFSET); - global_enable = global_ctrl & CXL_HDM_DECODER_ENABLE; - if (!global_enable && info->ranges) { - dev_dbg(cxlds->dev, - "DVSEC ranges already programmed and HDM decoders not enabled.\n"); - goto out; - } - - do_hdm_init = true; - - /* - * Permanently (for this boot at least) opt the device into HDM - * operation. Individual HDM decoders still need to be enabled after - * this point. - */ - if (!global_enable) { - dev_dbg(cxlds->dev, "Enabling HDM decode\n"); - writel(global_ctrl | CXL_HDM_DECODER_ENABLE, - crb + cmap->hdm_decoder.offset + - CXL_HDM_DECODER_CTRL_OFFSET); - } - -out: - iounmap(crb); - return do_hdm_init; + cxl_mem_active_dec(); } static int cxl_mem_probe(struct device *dev) { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); - struct cxl_dev_state *cxlds = cxlmd->cxlds; struct cxl_port *parent_port; int rc; @@ -147,44 +68,6 @@ static int cxl_mem_probe(struct device *dev) if (work_pending(&cxlmd->detach_work)) return -EBUSY; - rc = wait_for_media(cxlmd); - if (rc) { - dev_err(dev, "Media not active (%d)\n", rc); - return rc; - } - - /* - * If DVSEC ranges are being used instead of HDM decoder registers there - * is no use in trying to manage those. - */ - if (!cxl_dvsec_decode_init(cxlds)) { - struct cxl_endpoint_dvsec_info *info = &cxlds->info; - int i; - - /* */ - for (i = 0; i < 2; i++) { - u64 base, size; - - /* - * Give a nice warning to the user that BIOS has really - * botched things for them if it didn't place DVSEC - * ranges in the memory map. - */ - base = info->dvsec_range[i].start; - size = range_len(&info->dvsec_range[i]); - if (size && !region_intersects(base, size, - IORESOURCE_SYSTEM_RAM, - IORES_DESC_NONE)) { - dev_err(dev, - "DVSEC range %#llx-%#llx must be reserved by BIOS, but isn't\n", - base, base + size - 1); - } - } - dev_err(dev, - "Active DVSEC range registers in use. Will not bind.\n"); - return -EBUSY; - } - rc = devm_cxl_enumerate_ports(cxlmd); if (rc) return rc; @@ -195,19 +78,36 @@ static int cxl_mem_probe(struct device *dev) return -ENXIO; } - cxl_device_lock(&parent_port->dev); + device_lock(&parent_port->dev); if (!parent_port->dev.driver) { dev_err(dev, "CXL port topology %s not enabled\n", dev_name(&parent_port->dev)); rc = -ENXIO; - goto out; + goto unlock; } rc = create_endpoint(cxlmd, parent_port); -out: - cxl_device_unlock(&parent_port->dev); +unlock: + device_unlock(&parent_port->dev); put_device(&parent_port->dev); - return rc; + if (rc) + return rc; + + /* + * The kernel may be operating out of CXL memory on this device, + * there is no spec defined way to determine whether this device + * preserves contents over suspend, and there is no simple way + * to arrange for the suspend image to avoid CXL memory which + * would setup a circular dependency between PCI resume and save + * state restoration. + * + * TODO: support suspend when all the regions this device is + * hosting are locked and covered by the system address map, + * i.e. platform firmware owns restoring the HDM configuration + * that it locked. + */ + cxl_mem_active_inc(); + return devm_add_action_or_reset(dev, enable_suspend, NULL); } static struct cxl_driver cxl_mem_driver = { diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 3f2182d66829..5a0ae46d4989 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -48,8 +48,7 @@ */ static unsigned short mbox_ready_timeout = 60; module_param(mbox_ready_timeout, ushort, 0644); -MODULE_PARM_DESC(mbox_ready_timeout, - "seconds to wait for mailbox ready / memory active status"); +MODULE_PARM_DESC(mbox_ready_timeout, "seconds to wait for mailbox ready"); static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds) { @@ -177,9 +176,10 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds, mbox_cmd->return_code = FIELD_GET(CXLDEV_MBOX_STATUS_RET_CODE_MASK, status_reg); - if (mbox_cmd->return_code != 0) { - dev_dbg(dev, "Mailbox operation had an error\n"); - return 0; + if (mbox_cmd->return_code != CXL_MBOX_CMD_RC_SUCCESS) { + dev_dbg(dev, "Mailbox operation had an error: %s\n", + cxl_mbox_cmd_rc2str(mbox_cmd)); + return 0; /* completed but caller must check return_code */ } /* #7 */ @@ -386,164 +386,6 @@ static int cxl_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, return rc; } -static int wait_for_valid(struct cxl_dev_state *cxlds) -{ - struct pci_dev *pdev = to_pci_dev(cxlds->dev); - int d = cxlds->cxl_dvsec, rc; - u32 val; - - /* - * Memory_Info_Valid: When set, indicates that the CXL Range 1 Size high - * and Size Low registers are valid. Must be set within 1 second of - * deassertion of reset to CXL device. Likely it is already set by the - * time this runs, but otherwise give a 1.5 second timeout in case of - * clock skew. - */ - rc = pci_read_config_dword(pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &val); - if (rc) - return rc; - - if (val & CXL_DVSEC_MEM_INFO_VALID) - return 0; - - msleep(1500); - - rc = pci_read_config_dword(pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &val); - if (rc) - return rc; - - if (val & CXL_DVSEC_MEM_INFO_VALID) - return 0; - - return -ETIMEDOUT; -} - -/* - * Wait up to @mbox_ready_timeout for the device to report memory - * active. - */ -static int wait_for_media_ready(struct cxl_dev_state *cxlds) -{ - struct pci_dev *pdev = to_pci_dev(cxlds->dev); - int d = cxlds->cxl_dvsec; - bool active = false; - u64 md_status; - int rc, i; - - rc = wait_for_valid(cxlds); - if (rc) - return rc; - - for (i = mbox_ready_timeout; i; i--) { - u32 temp; - - rc = pci_read_config_dword( - pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &temp); - if (rc) - return rc; - - active = FIELD_GET(CXL_DVSEC_MEM_ACTIVE, temp); - if (active) - break; - msleep(1000); - } - - if (!active) { - dev_err(&pdev->dev, - "timeout awaiting memory active after %d seconds\n", - mbox_ready_timeout); - return -ETIMEDOUT; - } - - md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); - if (!CXLMDEV_READY(md_status)) - return -EIO; - - return 0; -} - -static int cxl_dvsec_ranges(struct cxl_dev_state *cxlds) -{ - struct cxl_endpoint_dvsec_info *info = &cxlds->info; - struct pci_dev *pdev = to_pci_dev(cxlds->dev); - int d = cxlds->cxl_dvsec; - int hdm_count, rc, i; - u16 cap, ctrl; - - if (!d) - return -ENXIO; - - rc = pci_read_config_word(pdev, d + CXL_DVSEC_CAP_OFFSET, &cap); - if (rc) - return rc; - - rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl); - if (rc) - return rc; - - if (!(cap & CXL_DVSEC_MEM_CAPABLE)) - return -ENXIO; - - /* - * It is not allowed by spec for MEM.capable to be set and have 0 legacy - * HDM decoders (values > 2 are also undefined as of CXL 2.0). As this - * driver is for a spec defined class code which must be CXL.mem - * capable, there is no point in continuing to enable CXL.mem. - */ - hdm_count = FIELD_GET(CXL_DVSEC_HDM_COUNT_MASK, cap); - if (!hdm_count || hdm_count > 2) - return -EINVAL; - - rc = wait_for_valid(cxlds); - if (rc) - return rc; - - info->mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl); - - for (i = 0; i < hdm_count; i++) { - u64 base, size; - u32 temp; - - rc = pci_read_config_dword( - pdev, d + CXL_DVSEC_RANGE_SIZE_HIGH(i), &temp); - if (rc) - return rc; - - size = (u64)temp << 32; - - rc = pci_read_config_dword( - pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(i), &temp); - if (rc) - return rc; - - size |= temp & CXL_DVSEC_MEM_SIZE_LOW_MASK; - - rc = pci_read_config_dword( - pdev, d + CXL_DVSEC_RANGE_BASE_HIGH(i), &temp); - if (rc) - return rc; - - base = (u64)temp << 32; - - rc = pci_read_config_dword( - pdev, d + CXL_DVSEC_RANGE_BASE_LOW(i), &temp); - if (rc) - return rc; - - base |= temp & CXL_DVSEC_MEM_BASE_LOW_MASK; - - info->dvsec_range[i] = (struct range) { - .start = base, - .end = base + size - 1 - }; - - if (size) - info->ranges++; - } - - return 0; -} - static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct cxl_register_map map; @@ -573,8 +415,6 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) dev_warn(&pdev->dev, "Device DVSEC not present, skip CXL.mem init\n"); - cxlds->wait_media_ready = wait_for_media_ready; - rc = cxl_setup_regs(pdev, CXL_REGLOC_RBI_MEMDEV, &map); if (rc) return rc; @@ -610,11 +450,6 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) return rc; - rc = cxl_dvsec_ranges(cxlds); - if (rc) - dev_warn(&pdev->dev, - "Failed to get DVSEC range information (%d)\n", rc); - cxlmd = devm_cxl_add_memdev(cxlds); if (IS_ERR(cxlmd)) return PTR_ERR(cxlmd); diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index 15ad666ab03e..bbeef91e637e 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -43,7 +43,7 @@ static int cxl_nvdimm_probe(struct device *dev) if (!cxl_nvb) return -ENXIO; - cxl_device_lock(&cxl_nvb->dev); + device_lock(&cxl_nvb->dev); if (!cxl_nvb->nvdimm_bus) { rc = -ENXIO; goto out; @@ -68,7 +68,7 @@ static int cxl_nvdimm_probe(struct device *dev) dev_set_drvdata(dev, nvdimm); rc = devm_add_action_or_reset(dev, unregister_nvdimm, nvdimm); out: - cxl_device_unlock(&cxl_nvb->dev); + device_unlock(&cxl_nvb->dev); put_device(&cxl_nvb->dev); return rc; @@ -233,7 +233,7 @@ static void cxl_nvb_update_state(struct work_struct *work) struct nvdimm_bus *victim_bus = NULL; bool release = false, rescan = false; - cxl_device_lock(&cxl_nvb->dev); + device_lock(&cxl_nvb->dev); switch (cxl_nvb->state) { case CXL_NVB_ONLINE: if (!online_nvdimm_bus(cxl_nvb)) { @@ -251,7 +251,7 @@ static void cxl_nvb_update_state(struct work_struct *work) default: break; } - cxl_device_unlock(&cxl_nvb->dev); + device_unlock(&cxl_nvb->dev); if (release) device_release_driver(&cxl_nvb->dev); @@ -327,9 +327,9 @@ static int cxl_nvdimm_bridge_reset(struct device *dev, void *data) return 0; cxl_nvb = to_cxl_nvdimm_bridge(dev); - cxl_device_lock(dev); + device_lock(dev); cxl_nvb->state = CXL_NVB_NEW; - cxl_device_unlock(dev); + device_unlock(dev); return 0; } @@ -344,7 +344,6 @@ static __init int cxl_pmem_init(void) { int rc; - set_bit(CXL_MEM_COMMAND_ID_SET_PARTITION_INFO, exclusive_cmds); set_bit(CXL_MEM_COMMAND_ID_SET_SHUTDOWN_STATE, exclusive_cmds); set_bit(CXL_MEM_COMMAND_ID_SET_LSA, exclusive_cmds); diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c index d420da5fc39c..3cf308f114c4 100644 --- a/drivers/cxl/port.c +++ b/drivers/cxl/port.c @@ -36,14 +36,8 @@ static int cxl_port_probe(struct device *dev) struct cxl_hdm *cxlhdm; int rc; - if (is_cxl_endpoint(port)) { - struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport); - get_device(&cxlmd->dev); - rc = devm_add_action_or_reset(dev, schedule_detach, cxlmd); - if (rc) - return rc; - } else { + if (!is_cxl_endpoint(port)) { rc = devm_cxl_port_enumerate_dports(port); if (rc < 0) return rc; @@ -55,6 +49,26 @@ static int cxl_port_probe(struct device *dev) if (IS_ERR(cxlhdm)) return PTR_ERR(cxlhdm); + if (is_cxl_endpoint(port)) { + struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport); + struct cxl_dev_state *cxlds = cxlmd->cxlds; + + get_device(&cxlmd->dev); + rc = devm_add_action_or_reset(dev, schedule_detach, cxlmd); + if (rc) + return rc; + + rc = cxl_hdm_decode_init(cxlds, cxlhdm); + if (rc) + return rc; + + rc = cxl_await_media_ready(cxlds); + if (rc) { + dev_err(dev, "Media not active (%d)\n", rc); + return rc; + } + } + rc = devm_cxl_enumerate_decoders(cxlhdm); if (rc) { dev_err(dev, "Couldn't enumerate decoders (%d)\n", rc); diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c index e5a58520d398..fabbb31f2c35 100644 --- a/drivers/nvdimm/btt_devs.c +++ b/drivers/nvdimm/btt_devs.c @@ -50,14 +50,14 @@ static ssize_t sector_size_store(struct device *dev, struct nd_btt *nd_btt = to_nd_btt(dev); ssize_t rc; - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); rc = nd_size_select_store(dev, buf, &nd_btt->lbasize, btt_lbasize_supported); dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, buf[len - 1] == '\n' ? "" : "\n"); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return rc ? rc : len; } @@ -79,11 +79,11 @@ static ssize_t uuid_store(struct device *dev, struct nd_btt *nd_btt = to_nd_btt(dev); ssize_t rc; - nd_device_lock(dev); + device_lock(dev); rc = nd_uuid_store(dev, &nd_btt->uuid, buf, len); dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, buf[len - 1] == '\n' ? "" : "\n"); - nd_device_unlock(dev); + device_unlock(dev); return rc ? rc : len; } @@ -108,13 +108,13 @@ static ssize_t namespace_store(struct device *dev, struct nd_btt *nd_btt = to_nd_btt(dev); ssize_t rc; - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); rc = nd_namespace_store(dev, &nd_btt->ndns, buf, len); dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, buf[len - 1] == '\n' ? "" : "\n"); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return rc; } @@ -126,14 +126,14 @@ static ssize_t size_show(struct device *dev, struct nd_btt *nd_btt = to_nd_btt(dev); ssize_t rc; - nd_device_lock(dev); + device_lock(dev); if (dev->driver) rc = sprintf(buf, "%llu\n", nd_btt->size); else { /* no size to convey if the btt instance is disabled */ rc = -ENXIO; } - nd_device_unlock(dev); + device_unlock(dev); return rc; } @@ -178,6 +178,8 @@ bool is_nd_btt(struct device *dev) } EXPORT_SYMBOL(is_nd_btt); +static struct lock_class_key nvdimm_btt_key; + static struct device *__nd_btt_create(struct nd_region *nd_region, unsigned long lbasize, uuid_t *uuid, struct nd_namespace_common *ndns) @@ -205,6 +207,7 @@ static struct device *__nd_btt_create(struct nd_region *nd_region, dev->parent = &nd_region->dev; dev->type = &nd_btt_device_type; device_initialize(&nd_btt->dev); + lockdep_set_class(&nd_btt->dev.mutex, &nvdimm_btt_key); if (ndns && !__nd_attach_ndns(&nd_btt->dev, ndns, &nd_btt->ndns)) { dev_dbg(&ndns->dev, "failed, already claimed by %s\n", dev_name(ndns->claim)); @@ -225,7 +228,7 @@ struct device *nd_btt_create(struct nd_region *nd_region) { struct device *dev = __nd_btt_create(nd_region, 0, NULL, NULL); - __nd_device_register(dev); + nd_device_register(dev); return dev; } @@ -324,7 +327,7 @@ static int __nd_btt_probe(struct nd_btt *nd_btt, if (!nd_btt->uuid) return -ENOMEM; - __nd_device_register(&nd_btt->dev); + nd_device_register(&nd_btt->dev); return 0; } diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 7b0d1443217a..a4fc17db707c 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -88,10 +88,7 @@ static int nvdimm_bus_probe(struct device *dev) dev->driver->name, dev_name(dev)); nvdimm_bus_probe_start(nvdimm_bus); - debug_nvdimm_lock(dev); rc = nd_drv->probe(dev); - debug_nvdimm_unlock(dev); - if ((rc == 0 || rc == -EOPNOTSUPP) && dev->parent && is_nd_region(dev->parent)) nd_region_advance_seeds(to_nd_region(dev->parent), dev); @@ -111,11 +108,8 @@ static void nvdimm_bus_remove(struct device *dev) struct module *provider = to_bus_provider(dev); struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); - if (nd_drv->remove) { - debug_nvdimm_lock(dev); + if (nd_drv->remove) nd_drv->remove(dev); - debug_nvdimm_unlock(dev); - } dev_dbg(&nvdimm_bus->dev, "%s.remove(%s)\n", dev->driver->name, dev_name(dev)); @@ -139,7 +133,7 @@ static void nvdimm_bus_shutdown(struct device *dev) void nd_device_notify(struct device *dev, enum nvdimm_event event) { - nd_device_lock(dev); + device_lock(dev); if (dev->driver) { struct nd_device_driver *nd_drv; @@ -147,7 +141,7 @@ void nd_device_notify(struct device *dev, enum nvdimm_event event) if (nd_drv->notify) nd_drv->notify(dev, event); } - nd_device_unlock(dev); + device_unlock(dev); } EXPORT_SYMBOL(nd_device_notify); @@ -334,6 +328,8 @@ struct nvdimm_bus *nvdimm_to_bus(struct nvdimm *nvdimm) } EXPORT_SYMBOL_GPL(nvdimm_to_bus); +static struct lock_class_key nvdimm_bus_key; + struct nvdimm_bus *nvdimm_bus_register(struct device *parent, struct nvdimm_bus_descriptor *nd_desc) { @@ -360,6 +356,7 @@ struct nvdimm_bus *nvdimm_bus_register(struct device *parent, nvdimm_bus->dev.bus = &nvdimm_bus_type; nvdimm_bus->dev.of_node = nd_desc->of_node; device_initialize(&nvdimm_bus->dev); + lockdep_set_class(&nvdimm_bus->dev.mutex, &nvdimm_bus_key); device_set_pm_not_required(&nvdimm_bus->dev); rc = dev_set_name(&nvdimm_bus->dev, "ndbus%d", nvdimm_bus->id); if (rc) @@ -511,7 +508,7 @@ static void nd_async_device_unregister(void *d, async_cookie_t cookie) put_device(dev); } -void __nd_device_register(struct device *dev) +void nd_device_register(struct device *dev) { if (!dev) return; @@ -537,12 +534,6 @@ void __nd_device_register(struct device *dev) async_schedule_dev_domain(nd_async_device_register, dev, &nd_async_domain); } - -void nd_device_register(struct device *dev) -{ - device_initialize(dev); - __nd_device_register(dev); -} EXPORT_SYMBOL(nd_device_register); void nd_device_unregister(struct device *dev, enum nd_async_mode mode) @@ -572,9 +563,9 @@ void nd_device_unregister(struct device *dev, enum nd_async_mode mode) * or otherwise let the async path handle it if the * unregistration was already queued. */ - nd_device_lock(dev); + device_lock(dev); killed = kill_device(dev); - nd_device_unlock(dev); + device_unlock(dev); if (!killed) return; @@ -724,6 +715,8 @@ static void ndctl_release(struct device *dev) kfree(dev); } +static struct lock_class_key nvdimm_ndctl_key; + int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus) { dev_t devt = MKDEV(nvdimm_bus_major, nvdimm_bus->id); @@ -734,6 +727,7 @@ int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus) if (!dev) return -ENOMEM; device_initialize(dev); + lockdep_set_class(&dev->mutex, &nvdimm_ndctl_key); device_set_pm_not_required(dev); dev->class = nd_class; dev->parent = &nvdimm_bus->dev; @@ -930,10 +924,10 @@ void wait_nvdimm_bus_probe_idle(struct device *dev) if (nvdimm_bus->probe_active == 0) break; nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); wait_event(nvdimm_bus->wait, nvdimm_bus->probe_active == 0); - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); } while (true); } @@ -1167,7 +1161,7 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, goto out; } - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); rc = nd_cmd_clear_to_send(nvdimm_bus, nvdimm, func, buf); if (rc) @@ -1189,7 +1183,7 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, out_unlock: nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); out: kfree(in_env); kfree(out_env); diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c index 69a03358817f..d91799b71d23 100644 --- a/drivers/nvdimm/core.c +++ b/drivers/nvdimm/core.c @@ -215,7 +215,7 @@ EXPORT_SYMBOL_GPL(to_nvdimm_bus_dev); * * Enforce that uuids can only be changed while the device is disabled * (driver detached) - * LOCKING: expects nd_device_lock() is held on entry + * LOCKING: expects device_lock() is held on entry */ int nd_uuid_store(struct device *dev, uuid_t **uuid_out, const char *buf, size_t len) @@ -316,15 +316,15 @@ static DEVICE_ATTR_RO(provider); static int flush_namespaces(struct device *dev, void *data) { - nd_device_lock(dev); - nd_device_unlock(dev); + device_lock(dev); + device_unlock(dev); return 0; } static int flush_regions_dimms(struct device *dev, void *data) { - nd_device_lock(dev); - nd_device_unlock(dev); + device_lock(dev); + device_unlock(dev); device_for_each_child(dev, NULL, flush_namespaces); return 0; } @@ -368,9 +368,7 @@ static ssize_t capability_show(struct device *dev, if (!nd_desc->fw_ops) return -EOPNOTSUPP; - nvdimm_bus_lock(dev); cap = nd_desc->fw_ops->capability(nd_desc); - nvdimm_bus_unlock(dev); switch (cap) { case NVDIMM_FWA_CAP_QUIESCE: @@ -395,10 +393,8 @@ static ssize_t activate_show(struct device *dev, if (!nd_desc->fw_ops) return -EOPNOTSUPP; - nvdimm_bus_lock(dev); cap = nd_desc->fw_ops->capability(nd_desc); state = nd_desc->fw_ops->activate_state(nd_desc); - nvdimm_bus_unlock(dev); if (cap < NVDIMM_FWA_CAP_QUIESCE) return -EOPNOTSUPP; @@ -443,7 +439,6 @@ static ssize_t activate_store(struct device *dev, else return -EINVAL; - nvdimm_bus_lock(dev); state = nd_desc->fw_ops->activate_state(nd_desc); switch (state) { @@ -461,7 +456,6 @@ static ssize_t activate_store(struct device *dev, default: rc = -ENXIO; } - nvdimm_bus_unlock(dev); if (rc == 0) rc = len; @@ -484,10 +478,7 @@ static umode_t nvdimm_bus_firmware_visible(struct kobject *kobj, struct attribut if (!nd_desc->fw_ops) return 0; - nvdimm_bus_lock(dev); cap = nd_desc->fw_ops->capability(nd_desc); - nvdimm_bus_unlock(dev); - if (cap < NVDIMM_FWA_CAP_QUIESCE) return 0; diff --git a/drivers/nvdimm/dax_devs.c b/drivers/nvdimm/dax_devs.c index 99965077bac4..7f4a9d28b670 100644 --- a/drivers/nvdimm/dax_devs.c +++ b/drivers/nvdimm/dax_devs.c @@ -80,7 +80,7 @@ struct device *nd_dax_create(struct nd_region *nd_region) nd_dax = nd_dax_alloc(nd_region); if (nd_dax) dev = nd_pfn_devinit(&nd_dax->nd_pfn, NULL); - __nd_device_register(dev); + nd_device_register(dev); return dev; } @@ -119,7 +119,7 @@ int nd_dax_probe(struct device *dev, struct nd_namespace_common *ndns) nd_detach_ndns(dax_dev, &nd_pfn->ndns); put_device(dax_dev); } else - __nd_device_register(dax_dev); + nd_device_register(dax_dev); return rc; } diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index ee507eed42b5..c7c980577491 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -341,9 +341,9 @@ static ssize_t available_slots_show(struct device *dev, { ssize_t rc; - nd_device_lock(dev); + device_lock(dev); rc = __available_slots_show(dev_get_drvdata(dev), buf); - nd_device_unlock(dev); + device_unlock(dev); return rc; } @@ -386,12 +386,12 @@ static ssize_t security_store(struct device *dev, * done while probing is idle and the DIMM is not in active use * in any region. */ - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); wait_nvdimm_bus_probe_idle(dev); rc = nvdimm_security_store(dev, buf, len); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return rc; } @@ -570,6 +570,8 @@ bool is_nvdimm(struct device *dev) return dev->type == &nvdimm_device_type; } +static struct lock_class_key nvdimm_key; + struct nvdimm *__nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, const struct attribute_group **groups, unsigned long flags, unsigned long cmd_mask, int num_flush, @@ -613,6 +615,8 @@ struct nvdimm *__nvdimm_create(struct nvdimm_bus *nvdimm_bus, /* get security state and extended (master) state */ nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER); nvdimm->sec.ext_flags = nvdimm_security_flags(nvdimm, NVDIMM_MASTER); + device_initialize(dev); + lockdep_set_class(&dev->mutex, &nvdimm_key); nd_device_register(dev); return nvdimm; diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index 62b83b2e26e3..bf4f5c09d9b1 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -264,7 +264,7 @@ static ssize_t alt_name_store(struct device *dev, struct nd_region *nd_region = to_nd_region(dev->parent); ssize_t rc; - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); wait_nvdimm_bus_probe_idle(dev); rc = __alt_name_store(dev, buf, len); @@ -272,7 +272,7 @@ static ssize_t alt_name_store(struct device *dev, rc = nd_namespace_label_update(nd_region, dev); dev_dbg(dev, "%s(%zd)\n", rc < 0 ? "fail " : "", rc); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return rc < 0 ? rc : len; } @@ -846,7 +846,7 @@ static ssize_t size_store(struct device *dev, if (rc) return rc; - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); wait_nvdimm_bus_probe_idle(dev); rc = __size_store(dev, val); @@ -868,7 +868,7 @@ static ssize_t size_store(struct device *dev, dev_dbg(dev, "%llx %s (%d)\n", val, rc < 0 ? "fail" : "success", rc); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return rc < 0 ? rc : len; } @@ -1043,7 +1043,7 @@ static ssize_t uuid_store(struct device *dev, } else return -ENXIO; - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); wait_nvdimm_bus_probe_idle(dev); if (to_ndns(dev)->claim) @@ -1059,7 +1059,7 @@ static ssize_t uuid_store(struct device *dev, dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, buf[len - 1] == '\n' ? "" : "\n"); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return rc < 0 ? rc : len; } @@ -1118,7 +1118,7 @@ static ssize_t sector_size_store(struct device *dev, } else return -ENXIO; - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); if (to_ndns(dev)->claim) rc = -EBUSY; @@ -1129,7 +1129,7 @@ static ssize_t sector_size_store(struct device *dev, dev_dbg(dev, "result: %zd %s: %s%s", rc, rc < 0 ? "tried" : "wrote", buf, buf[len - 1] == '\n' ? "" : "\n"); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return rc ? rc : len; } @@ -1239,9 +1239,9 @@ static ssize_t holder_show(struct device *dev, struct nd_namespace_common *ndns = to_ndns(dev); ssize_t rc; - nd_device_lock(dev); + device_lock(dev); rc = sprintf(buf, "%s\n", ndns->claim ? dev_name(ndns->claim) : ""); - nd_device_unlock(dev); + device_unlock(dev); return rc; } @@ -1278,7 +1278,7 @@ static ssize_t holder_class_store(struct device *dev, struct nd_region *nd_region = to_nd_region(dev->parent); int rc; - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); wait_nvdimm_bus_probe_idle(dev); rc = __holder_class_store(dev, buf); @@ -1286,7 +1286,7 @@ static ssize_t holder_class_store(struct device *dev, rc = nd_namespace_label_update(nd_region, dev); dev_dbg(dev, "%s(%d)\n", rc < 0 ? "fail " : "", rc); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return rc < 0 ? rc : len; } @@ -1297,7 +1297,7 @@ static ssize_t holder_class_show(struct device *dev, struct nd_namespace_common *ndns = to_ndns(dev); ssize_t rc; - nd_device_lock(dev); + device_lock(dev); if (ndns->claim_class == NVDIMM_CCLASS_NONE) rc = sprintf(buf, "\n"); else if ((ndns->claim_class == NVDIMM_CCLASS_BTT) || @@ -1309,7 +1309,7 @@ static ssize_t holder_class_show(struct device *dev, rc = sprintf(buf, "dax\n"); else rc = sprintf(buf, "<unknown>\n"); - nd_device_unlock(dev); + device_unlock(dev); return rc; } @@ -1323,7 +1323,7 @@ static ssize_t mode_show(struct device *dev, char *mode; ssize_t rc; - nd_device_lock(dev); + device_lock(dev); claim = ndns->claim; if (claim && is_nd_btt(claim)) mode = "safe"; @@ -1336,7 +1336,7 @@ static ssize_t mode_show(struct device *dev, else mode = "raw"; rc = sprintf(buf, "%s\n", mode); - nd_device_unlock(dev); + device_unlock(dev); return rc; } @@ -1456,8 +1456,8 @@ struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev) * Flush any in-progess probes / removals in the driver * for the raw personality of this namespace. */ - nd_device_lock(&ndns->dev); - nd_device_unlock(&ndns->dev); + device_lock(&ndns->dev); + device_unlock(&ndns->dev); if (ndns->dev.driver) { dev_dbg(&ndns->dev, "is active, can't bind %s\n", dev_name(dev)); @@ -1830,6 +1830,8 @@ static struct device *nd_namespace_pmem_create(struct nd_region *nd_region) return dev; } +static struct lock_class_key nvdimm_namespace_key; + void nd_region_create_ns_seed(struct nd_region *nd_region) { WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev)); @@ -1845,8 +1847,12 @@ void nd_region_create_ns_seed(struct nd_region *nd_region) */ if (!nd_region->ns_seed) dev_err(&nd_region->dev, "failed to create namespace\n"); - else + else { + device_initialize(nd_region->ns_seed); + lockdep_set_class(&nd_region->ns_seed->mutex, + &nvdimm_namespace_key); nd_device_register(nd_region->ns_seed); + } } void nd_region_create_dax_seed(struct nd_region *nd_region) @@ -2200,6 +2206,8 @@ int nd_region_register_namespaces(struct nd_region *nd_region, int *err) if (id < 0) break; dev_set_name(dev, "namespace%d.%d", nd_region->id, id); + device_initialize(dev); + lockdep_set_class(&dev->mutex, &nvdimm_namespace_key); nd_device_register(dev); } if (i) diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h index 448f9dcb4bb7..cc86ee09d7c0 100644 --- a/drivers/nvdimm/nd-core.h +++ b/drivers/nvdimm/nd-core.h @@ -106,7 +106,7 @@ void nd_region_create_dax_seed(struct nd_region *nd_region); int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus); void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus); void nd_synchronize(void); -void __nd_device_register(struct device *dev); +void nd_device_register(struct device *dev); struct nd_label_id; char *nd_label_gen_id(struct nd_label_id *label_id, const uuid_t *uuid, u32 flags); @@ -161,70 +161,4 @@ static inline void devm_nsio_disable(struct device *dev, { } #endif - -#ifdef CONFIG_PROVE_NVDIMM_LOCKING -extern struct class *nd_class; - -enum { - LOCK_BUS, - LOCK_NDCTL, - LOCK_REGION, - LOCK_DIMM = LOCK_REGION, - LOCK_NAMESPACE, - LOCK_CLAIM, -}; - -static inline void debug_nvdimm_lock(struct device *dev) -{ - if (is_nd_region(dev)) - mutex_lock_nested(&dev->lockdep_mutex, LOCK_REGION); - else if (is_nvdimm(dev)) - mutex_lock_nested(&dev->lockdep_mutex, LOCK_DIMM); - else if (is_nd_btt(dev) || is_nd_pfn(dev) || is_nd_dax(dev)) - mutex_lock_nested(&dev->lockdep_mutex, LOCK_CLAIM); - else if (dev->parent && (is_nd_region(dev->parent))) - mutex_lock_nested(&dev->lockdep_mutex, LOCK_NAMESPACE); - else if (is_nvdimm_bus(dev)) - mutex_lock_nested(&dev->lockdep_mutex, LOCK_BUS); - else if (dev->class && dev->class == nd_class) - mutex_lock_nested(&dev->lockdep_mutex, LOCK_NDCTL); - else - dev_WARN(dev, "unknown lock level\n"); -} - -static inline void debug_nvdimm_unlock(struct device *dev) -{ - mutex_unlock(&dev->lockdep_mutex); -} - -static inline void nd_device_lock(struct device *dev) -{ - device_lock(dev); - debug_nvdimm_lock(dev); -} - -static inline void nd_device_unlock(struct device *dev) -{ - debug_nvdimm_unlock(dev); - device_unlock(dev); -} -#else -static inline void nd_device_lock(struct device *dev) -{ - device_lock(dev); -} - -static inline void nd_device_unlock(struct device *dev) -{ - device_unlock(dev); -} - -static inline void debug_nvdimm_lock(struct device *dev) -{ -} - -static inline void debug_nvdimm_unlock(struct device *dev) -{ -} -#endif #endif /* __ND_CORE_H__ */ diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index c31e184bfa45..0e92ab4b3283 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -55,7 +55,7 @@ static ssize_t mode_store(struct device *dev, struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); ssize_t rc = 0; - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); if (dev->driver) rc = -EBUSY; @@ -77,7 +77,7 @@ static ssize_t mode_store(struct device *dev, dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, buf[len - 1] == '\n' ? "" : "\n"); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return rc ? rc : len; } @@ -123,14 +123,14 @@ static ssize_t align_store(struct device *dev, unsigned long aligns[MAX_NVDIMM_ALIGN] = { [0] = 0, }; ssize_t rc; - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); rc = nd_size_select_store(dev, buf, &nd_pfn->align, nd_pfn_supported_alignments(aligns)); dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, buf[len - 1] == '\n' ? "" : "\n"); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return rc ? rc : len; } @@ -152,11 +152,11 @@ static ssize_t uuid_store(struct device *dev, struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); ssize_t rc; - nd_device_lock(dev); + device_lock(dev); rc = nd_uuid_store(dev, &nd_pfn->uuid, buf, len); dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, buf[len - 1] == '\n' ? "" : "\n"); - nd_device_unlock(dev); + device_unlock(dev); return rc ? rc : len; } @@ -181,13 +181,13 @@ static ssize_t namespace_store(struct device *dev, struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); ssize_t rc; - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); rc = nd_namespace_store(dev, &nd_pfn->ndns, buf, len); dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, buf[len - 1] == '\n' ? "" : "\n"); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return rc; } @@ -199,7 +199,7 @@ static ssize_t resource_show(struct device *dev, struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); ssize_t rc; - nd_device_lock(dev); + device_lock(dev); if (dev->driver) { struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; u64 offset = __le64_to_cpu(pfn_sb->dataoff); @@ -213,7 +213,7 @@ static ssize_t resource_show(struct device *dev, /* no address to convey if the pfn instance is disabled */ rc = -ENXIO; } - nd_device_unlock(dev); + device_unlock(dev); return rc; } @@ -225,7 +225,7 @@ static ssize_t size_show(struct device *dev, struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); ssize_t rc; - nd_device_lock(dev); + device_lock(dev); if (dev->driver) { struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; u64 offset = __le64_to_cpu(pfn_sb->dataoff); @@ -241,7 +241,7 @@ static ssize_t size_show(struct device *dev, /* no size to convey if the pfn instance is disabled */ rc = -ENXIO; } - nd_device_unlock(dev); + device_unlock(dev); return rc; } @@ -291,6 +291,8 @@ bool is_nd_pfn(struct device *dev) } EXPORT_SYMBOL(is_nd_pfn); +static struct lock_class_key nvdimm_pfn_key; + struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn, struct nd_namespace_common *ndns) { @@ -303,6 +305,7 @@ struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn, nd_pfn->align = nd_pfn_default_alignment(); dev = &nd_pfn->dev; device_initialize(&nd_pfn->dev); + lockdep_set_class(&nd_pfn->dev.mutex, &nvdimm_pfn_key); if (ndns && !__nd_attach_ndns(&nd_pfn->dev, ndns, &nd_pfn->ndns)) { dev_dbg(&ndns->dev, "failed, already claimed by %s\n", dev_name(ndns->claim)); @@ -346,7 +349,7 @@ struct device *nd_pfn_create(struct nd_region *nd_region) nd_pfn = nd_pfn_alloc(nd_region); dev = nd_pfn_devinit(nd_pfn, NULL); - __nd_device_register(dev); + nd_device_register(dev); return dev; } @@ -643,7 +646,7 @@ int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns) nd_detach_ndns(pfn_dev, &nd_pfn->ndns); put_device(pfn_dev); } else - __nd_device_register(pfn_dev); + nd_device_register(pfn_dev); return rc; } diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 6b24ecada695..629d10fcf53b 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -660,7 +660,7 @@ static void nd_pmem_remove(struct device *dev) nvdimm_namespace_detach_btt(to_nd_btt(dev)); else { /* - * Note, this assumes nd_device_lock() context to not + * Note, this assumes device_lock() context to not * race nd_pmem_notify() */ sysfs_put(pmem->bb_state); diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c index 188560b1c110..390123d293ea 100644 --- a/drivers/nvdimm/region.c +++ b/drivers/nvdimm/region.c @@ -95,7 +95,7 @@ static void nd_region_remove(struct device *dev) nvdimm_bus_unlock(dev); /* - * Note, this assumes nd_device_lock() context to not race + * Note, this assumes device_lock() context to not race * nd_region_notify() */ sysfs_put(nd_region->bb_state); diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index 0cb274c2b508..d976260eca7a 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -279,7 +279,7 @@ static ssize_t set_cookie_show(struct device *dev, * the v1.1 namespace label cookie definition. To read all this * data we need to wait for probing to settle. */ - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); wait_nvdimm_bus_probe_idle(dev); if (nd_region->ndr_mappings) { @@ -296,7 +296,7 @@ static ssize_t set_cookie_show(struct device *dev, } } nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); if (rc) return rc; @@ -353,12 +353,12 @@ static ssize_t available_size_show(struct device *dev, * memory nvdimm_bus_lock() is dropped, but that's userspace's * problem to not race itself. */ - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); wait_nvdimm_bus_probe_idle(dev); available = nd_region_available_dpa(nd_region); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return sprintf(buf, "%llu\n", available); } @@ -370,12 +370,12 @@ static ssize_t max_available_extent_show(struct device *dev, struct nd_region *nd_region = to_nd_region(dev); unsigned long long available = 0; - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); wait_nvdimm_bus_probe_idle(dev); available = nd_region_allocatable_dpa(nd_region); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return sprintf(buf, "%llu\n", available); } @@ -549,12 +549,12 @@ static ssize_t region_badblocks_show(struct device *dev, struct nd_region *nd_region = to_nd_region(dev); ssize_t rc; - nd_device_lock(dev); + device_lock(dev); if (dev->driver) rc = badblocks_show(&nd_region->bb, buf, 0); else rc = -ENXIO; - nd_device_unlock(dev); + device_unlock(dev); return rc; } @@ -949,6 +949,8 @@ static unsigned long default_align(struct nd_region *nd_region) return align; } +static struct lock_class_key nvdimm_region_key; + static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus, struct nd_region_desc *ndr_desc, const struct device_type *dev_type, const char *caller) @@ -1035,6 +1037,8 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus, else nd_region->flush = NULL; + device_initialize(dev); + lockdep_set_class(&dev->mutex, &nvdimm_region_key); nd_device_register(dev); return nd_region; diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 772590e2eddb..0dc1ea0b52f5 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -585,14 +585,22 @@ int unregister_acpi_bus_type(struct acpi_bus_type *); int acpi_bind_one(struct device *dev, struct acpi_device *adev); int acpi_unbind_one(struct device *dev); +enum acpi_bridge_type { + ACPI_BRIDGE_TYPE_PCIE = 1, + ACPI_BRIDGE_TYPE_CXL, +}; + struct acpi_pci_root { struct acpi_device * device; struct pci_bus *bus; u16 segment; + int bridge_type; struct resource secondary; /* downstream bus range */ - u32 osc_support_set; /* _OSC state of support bits */ - u32 osc_control_set; /* _OSC state of control bits */ + u32 osc_support_set; /* _OSC state of support bits */ + u32 osc_control_set; /* _OSC state of control bits */ + u32 osc_ext_support_set; /* _OSC state of extended support bits */ + u32 osc_ext_control_set; /* _OSC state of extended control bits */ phys_addr_t mcfg_addr; }; diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 03465db16b68..f3fdfd784a6c 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -550,10 +550,16 @@ struct acpi_osc_context { acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context); -/* Indexes into _OSC Capabilities Buffer (DWORDs 2 & 3 are device-specific) */ +/* Number of _OSC capability DWORDS depends on bridge type */ +#define OSC_PCI_CAPABILITY_DWORDS 3 +#define OSC_CXL_CAPABILITY_DWORDS 5 + +/* Indexes into _OSC Capabilities Buffer (DWORDs 2 to 5 are device-specific) */ #define OSC_QUERY_DWORD 0 /* DWORD 1 */ #define OSC_SUPPORT_DWORD 1 /* DWORD 2 */ #define OSC_CONTROL_DWORD 2 /* DWORD 3 */ +#define OSC_EXT_SUPPORT_DWORD 3 /* DWORD 4 */ +#define OSC_EXT_CONTROL_DWORD 4 /* DWORD 5 */ /* _OSC Capabilities DWORD 1: Query/Control and Error Returns (generic) */ #define OSC_QUERY_ENABLE 0x00000001 /* input */ @@ -610,6 +616,29 @@ extern u32 osc_sb_native_usb4_control; #define OSC_PCI_EXPRESS_LTR_CONTROL 0x00000020 #define OSC_PCI_EXPRESS_DPC_CONTROL 0x00000080 +/* CXL _OSC: Capabilities DWORD 4: Support Field */ +#define OSC_CXL_1_1_PORT_REG_ACCESS_SUPPORT 0x00000001 +#define OSC_CXL_2_0_PORT_DEV_REG_ACCESS_SUPPORT 0x00000002 +#define OSC_CXL_PROTOCOL_ERR_REPORTING_SUPPORT 0x00000004 +#define OSC_CXL_NATIVE_HP_SUPPORT 0x00000008 + +/* CXL _OSC: Capabilities DWORD 5: Control Field */ +#define OSC_CXL_ERROR_REPORTING_CONTROL 0x00000001 + +static inline u32 acpi_osc_ctx_get_pci_control(struct acpi_osc_context *context) +{ + u32 *ret = context->ret.pointer; + + return ret[OSC_CONTROL_DWORD]; +} + +static inline u32 acpi_osc_ctx_get_cxl_control(struct acpi_osc_context *context) +{ + u32 *ret = context->ret.pointer; + + return ret[OSC_EXT_CONTROL_DWORD]; +} + #define ACPI_GSB_ACCESS_ATTRIB_QUICK 0x00000002 #define ACPI_GSB_ACCESS_ATTRIB_SEND_RCV 0x00000004 #define ACPI_GSB_ACCESS_ATTRIB_BYTE 0x00000006 @@ -1006,6 +1035,17 @@ static inline int acpi_register_wakeup_handler(int wake_irq, static inline void acpi_unregister_wakeup_handler( bool (*wakeup)(void *context), void *context) { } +struct acpi_osc_context; +static inline u32 acpi_osc_ctx_get_pci_control(struct acpi_osc_context *context) +{ + return 0; +} + +static inline u32 acpi_osc_ctx_get_cxl_control(struct acpi_osc_context *context) +{ + return 0; +} + #endif /* !CONFIG_ACPI */ #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC diff --git a/include/linux/device.h b/include/linux/device.h index 93459724dcde..073f1b0126ac 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -400,8 +400,6 @@ struct dev_msi_info { * This identifies the device type and carries type-specific * information. * @mutex: Mutex to synchronize calls to its driver. - * @lockdep_mutex: An optional debug lock that a subsystem can use as a - * peer lock to gain localized lockdep coverage of the device_lock. * @bus: Type of bus device is on. * @driver: Which driver has allocated this * @platform_data: Platform data specific to the device. @@ -499,9 +497,6 @@ struct device { core doesn't touch it */ void *driver_data; /* Driver data, set and get with dev_set_drvdata/dev_get_drvdata */ -#ifdef CONFIG_PROVE_LOCKING - struct mutex lockdep_mutex; -#endif struct mutex mutex; /* mutex to synchronize calls to * its driver. */ @@ -850,6 +845,49 @@ static inline bool device_supports_offline(struct device *dev) return dev->bus && dev->bus->offline && dev->bus->online; } +#define __device_lock_set_class(dev, name, key) \ +do { \ + struct device *__d2 __maybe_unused = dev; \ + lock_set_class(&__d2->mutex.dep_map, name, key, 0, _THIS_IP_); \ +} while (0) + +/** + * device_lock_set_class - Specify a temporary lock class while a device + * is attached to a driver + * @dev: device to modify + * @key: lock class key data + * + * This must be called with the device_lock() already held, for example + * from driver ->probe(). Take care to only override the default + * lockdep_no_validate class. + */ +#ifdef CONFIG_LOCKDEP +#define device_lock_set_class(dev, key) \ +do { \ + struct device *__d = dev; \ + dev_WARN_ONCE(__d, !lockdep_match_class(&__d->mutex, \ + &__lockdep_no_validate__), \ + "overriding existing custom lock class\n"); \ + __device_lock_set_class(__d, #key, key); \ +} while (0) +#else +#define device_lock_set_class(dev, key) __device_lock_set_class(dev, #key, key) +#endif + +/** + * device_lock_reset_class - Return a device to the default lockdep novalidate state + * @dev: device to modify + * + * This must be called with the device_lock() already held, for example + * from driver ->remove(). + */ +#define device_lock_reset_class(dev) \ +do { \ + struct device *__d __maybe_unused = dev; \ + lock_set_novalidate_class(&__d->mutex.dep_map, "&dev->mutex", \ + _THIS_IP_); \ +} while (0) + void lock_device_hotplug(void); void unlock_device_hotplug(void); int lock_device_hotplug_sysfs(void); diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 37951c17908e..b6829b970093 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -286,6 +286,9 @@ extern void lock_set_class(struct lockdep_map *lock, const char *name, struct lock_class_key *key, unsigned int subclass, unsigned long ip); +#define lock_set_novalidate_class(l, n, i) \ + lock_set_class(l, n, &__lockdep_no_validate__, 0, i) + static inline void lock_set_subclass(struct lockdep_map *lock, unsigned int subclass, unsigned long ip) { @@ -353,7 +356,8 @@ static inline void lockdep_set_selftest_task(struct task_struct *task) # define lock_acquire(l, s, t, r, c, n, i) do { } while (0) # define lock_release(l, i) do { } while (0) # define lock_downgrade(l, i) do { } while (0) -# define lock_set_class(l, n, k, s, i) do { } while (0) +# define lock_set_class(l, n, key, s, i) do { (void)(key); } while (0) +# define lock_set_novalidate_class(l, n, i) do { } while (0) # define lock_set_subclass(l, s, i) do { } while (0) # define lockdep_init() do { } while (0) # define lockdep_init_map_type(lock, name, key, sub, inner, outer, type) \ diff --git a/include/linux/pm.h b/include/linux/pm.h index ffe941958501..70ec69d8bafd 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -36,6 +36,15 @@ static inline void pm_vt_switch_unregister(struct device *dev) } #endif /* CONFIG_VT_CONSOLE_SLEEP */ +#ifdef CONFIG_CXL_SUSPEND +bool cxl_mem_active(void); +#else +static inline bool cxl_mem_active(void) +{ + return false; +} +#endif + /* * Device power management */ diff --git a/include/uapi/linux/cxl_mem.h b/include/uapi/linux/cxl_mem.h index 8d206f27bb6d..c71021a2a9ed 100644 --- a/include/uapi/linux/cxl_mem.h +++ b/include/uapi/linux/cxl_mem.h @@ -68,8 +68,8 @@ static const struct { * struct cxl_command_info - Command information returned from a query. * @id: ID number for the command. * @flags: Flags that specify command behavior. - * @size_in: Expected input size, or -1 if variable length. - * @size_out: Expected output size, or -1 if variable length. + * @size_in: Expected input size, or ~0 if variable length. + * @size_out: Expected output size, or ~0 if variable length. * * Represents a single command that is supported by both the driver and the * hardware. This is returned as part of an array from the query ioctl. The @@ -78,7 +78,7 @@ static const struct { * * - @id = 10 * - @flags = 0 - * - @size_in = -1 + * - @size_in = ~0 * - @size_out = 0 * * See struct cxl_mem_query_commands. @@ -89,8 +89,8 @@ struct cxl_command_info { __u32 flags; #define CXL_MEM_COMMAND_FLAG_MASK GENMASK(0, 0) - __s32 size_in; - __s32 size_out; + __u32 size_in; + __u32 size_out; }; /** @@ -169,13 +169,13 @@ struct cxl_send_command { __u32 retval; struct { - __s32 size; + __u32 size; __u32 rsvd; __u64 payload; } in; struct { - __s32 size; + __u32 size; __u32 rsvd; __u64 payload; } out; diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 938d5c78b421..20a66bf9f465 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -83,7 +83,7 @@ bool hibernation_available(void) { return nohibernate == 0 && !security_locked_down(LOCKDOWN_HIBERNATION) && - !secretmem_active(); + !secretmem_active() && !cxl_mem_active(); } /** diff --git a/kernel/power/main.c b/kernel/power/main.c index 5242bf2ee469..e3694034b753 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -127,7 +127,9 @@ static ssize_t mem_sleep_show(struct kobject *kobj, struct kobj_attribute *attr, char *s = buf; suspend_state_t i; - for (i = PM_SUSPEND_MIN; i < PM_SUSPEND_MAX; i++) + for (i = PM_SUSPEND_MIN; i < PM_SUSPEND_MAX; i++) { + if (i >= PM_SUSPEND_MEM && cxl_mem_active()) + continue; if (mem_sleep_states[i]) { const char *label = mem_sleep_states[i]; @@ -136,6 +138,7 @@ static ssize_t mem_sleep_show(struct kobject *kobj, struct kobj_attribute *attr, else s += sprintf(s, "%s ", label); } + } /* Convert the last space to a newline if needed. */ if (s != buf) diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 6fcdee7e87a5..827075944d28 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -236,7 +236,8 @@ EXPORT_SYMBOL_GPL(suspend_valid_only_mem); static bool sleep_state_supported(suspend_state_t state) { - return state == PM_SUSPEND_TO_IDLE || valid_state(state); + return state == PM_SUSPEND_TO_IDLE || + (valid_state(state) && !cxl_mem_active()); } static int platform_suspend_prepare(suspend_state_t state) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 4cea85a83321..2e24db4bff19 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1490,29 +1490,6 @@ config CSD_LOCK_WAIT_DEBUG include the IPI handler function currently executing (if any) and relevant stack traces. -choice - prompt "Lock debugging: prove subsystem device_lock() correctness" - depends on PROVE_LOCKING - help - For subsystems that have instrumented their usage of the device_lock() - with nested annotations, enable lock dependency checking. The locking - hierarchy 'subclass' identifiers are not compatible across - sub-systems, so only one can be enabled at a time. - -config PROVE_NVDIMM_LOCKING - bool "NVDIMM" - depends on LIBNVDIMM - help - Enable lockdep to validate nd_device_lock() usage. - -config PROVE_CXL_LOCKING - bool "CXL" - depends on CXL_BUS - help - Enable lockdep to validate cxl_device_lock() usage. - -endchoice - endmenu # lock debugging config TRACE_IRQFLAGS diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 82e49ab0937d..33543231d453 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -8,6 +8,8 @@ ldflags-y += --wrap=devm_cxl_port_enumerate_dports ldflags-y += --wrap=devm_cxl_setup_hdm ldflags-y += --wrap=devm_cxl_add_passthrough_decoder ldflags-y += --wrap=devm_cxl_enumerate_decoders +ldflags-y += --wrap=cxl_await_media_ready +ldflags-y += --wrap=cxl_hdm_decode_init DRIVERS := ../../../drivers CXL_SRC := $(DRIVERS)/cxl @@ -34,7 +36,6 @@ cxl_port-y += config_check.o obj-m += cxl_mem.o cxl_mem-y := $(CXL_SRC)/mem.o -cxl_mem-y += mock_mem.o cxl_mem-y += config_check.o obj-m += cxl_core.o diff --git a/tools/testing/cxl/mock_mem.c b/tools/testing/cxl/mock_mem.c deleted file mode 100644 index d1dec5845139..000000000000 --- a/tools/testing/cxl/mock_mem.c +++ /dev/null @@ -1,10 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* Copyright(c) 2022 Intel Corporation. All rights reserved. */ - -#include <linux/types.h> - -struct cxl_dev_state; -bool cxl_dvsec_decode_init(struct cxl_dev_state *cxlds) -{ - return true; -} diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index b6b726eff3e2..6b9239b2afd4 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -237,25 +237,11 @@ static int cxl_mock_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd * return rc; } -static int cxl_mock_wait_media_ready(struct cxl_dev_state *cxlds) -{ - msleep(100); - return 0; -} - static void label_area_release(void *lsa) { vfree(lsa); } -static void mock_validate_dvsec_ranges(struct cxl_dev_state *cxlds) -{ - struct cxl_endpoint_dvsec_info *info; - - info = &cxlds->info; - info->mem_enabled = true; -} - static int cxl_mock_mem_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -278,7 +264,6 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) cxlds->serial = pdev->id; cxlds->mbox_send = cxl_mock_mbox_send; - cxlds->wait_media_ready = cxl_mock_wait_media_ready; cxlds->payload_size = SZ_4K; rc = cxl_enumerate_cmds(cxlds); @@ -293,8 +278,6 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) if (rc) return rc; - mock_validate_dvsec_ranges(cxlds); - cxlmd = devm_cxl_add_memdev(cxlds); if (IS_ERR(cxlmd)) return PTR_ERR(cxlmd); diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index 6e8c9d63c92d..f1f8c40948c5 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -193,6 +193,35 @@ int __wrap_devm_cxl_port_enumerate_dports(struct cxl_port *port) } EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_port_enumerate_dports, CXL); +int __wrap_cxl_await_media_ready(struct cxl_dev_state *cxlds) +{ + int rc, index; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + + if (ops && ops->is_mock_dev(cxlds->dev)) + rc = 0; + else + rc = cxl_await_media_ready(cxlds); + put_cxl_mock_ops(index); + + return rc; +} +EXPORT_SYMBOL_NS_GPL(__wrap_cxl_await_media_ready, CXL); + +bool __wrap_cxl_hdm_decode_init(struct cxl_dev_state *cxlds, + struct cxl_hdm *cxlhdm) +{ + int rc = 0, index; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + + if (!ops || !ops->is_mock_dev(cxlds->dev)) + rc = cxl_hdm_decode_init(cxlds, cxlhdm); + put_cxl_mock_ops(index); + + return rc; +} +EXPORT_SYMBOL_NS_GPL(__wrap_cxl_hdm_decode_init, CXL); + MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(ACPI); MODULE_IMPORT_NS(CXL); |