diff options
49 files changed, 2649 insertions, 836 deletions
diff --git a/Documentation/ABI/testing/sysfs-bus-nvdimm b/Documentation/ABI/testing/sysfs-bus-nvdimm index 1c1f5acbf53d..de8c5a59c77f 100644 --- a/Documentation/ABI/testing/sysfs-bus-nvdimm +++ b/Documentation/ABI/testing/sysfs-bus-nvdimm @@ -41,3 +41,17 @@ KernelVersion: 5.18 Contact: Kajol Jain <kjain@linux.ibm.com> Description: (RO) This sysfs file exposes the cpumask which is designated to to retrieve nvdimm pmu event counter data. + +What: /sys/bus/nd/devices/nmemX/cxl/id +Date: November 2022 +KernelVersion: 6.2 +Contact: Dave Jiang <dave.jiang@intel.com> +Description: (RO) Show the id (serial) of the device. This is CXL specific. + +What: /sys/bus/nd/devices/nmemX/cxl/provider +Date: November 2022 +KernelVersion: 6.2 +Contact: Dave Jiang <dave.jiang@intel.com> +Description: (RO) Shows the CXL bridge device that ties to a CXL memory device + to this NVDIMM device. I.e. the parent of the device returned is + a /sys/bus/cxl/devices/memX instance. diff --git a/Documentation/PCI/pci-error-recovery.rst b/Documentation/PCI/pci-error-recovery.rst index 187f43a03200..bdafeb4b66dc 100644 --- a/Documentation/PCI/pci-error-recovery.rst +++ b/Documentation/PCI/pci-error-recovery.rst @@ -83,6 +83,7 @@ This structure has the form:: int (*mmio_enabled)(struct pci_dev *dev); int (*slot_reset)(struct pci_dev *dev); void (*resume)(struct pci_dev *dev); + void (*cor_error_detected)(struct pci_dev *dev); }; The possible channel states are:: @@ -422,5 +423,11 @@ That is, the recovery API only requires that: - drivers/net/cxgb3 - drivers/net/s2io.c + The cor_error_detected() callback is invoked in handle_error_source() when + the error severity is "correctable". The callback is optional and allows + additional logging to be done if desired. See example: + + - drivers/cxl/pci.c + The End ------- diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 68a2a04c02e5..276b3baf1c50 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -69,6 +69,7 @@ config X86 select ARCH_ENABLE_THP_MIGRATION if X86_64 && TRANSPARENT_HUGEPAGE select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_CACHE_LINE_SIZE + select ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEBUG_VM_PGTABLE if !X86_PAE diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 2e5a045731de..ef34ba21aa92 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -20,6 +20,7 @@ #include <linux/kernel.h> #include <linux/cc_platform.h> #include <linux/set_memory.h> +#include <linux/memregion.h> #include <asm/e820/api.h> #include <asm/processor.h> @@ -330,6 +331,23 @@ void arch_invalidate_pmem(void *addr, size_t size) EXPORT_SYMBOL_GPL(arch_invalidate_pmem); #endif +#ifdef CONFIG_ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION +bool cpu_cache_has_invalidate_memregion(void) +{ + return !cpu_feature_enabled(X86_FEATURE_HYPERVISOR); +} +EXPORT_SYMBOL_NS_GPL(cpu_cache_has_invalidate_memregion, DEVMEM); + +int cpu_cache_invalidate_memregion(int res_desc) +{ + if (WARN_ON_ONCE(!cpu_cache_has_invalidate_memregion())) + return -ENXIO; + wbinvd_on_all_cpus(); + return 0; +} +EXPORT_SYMBOL_NS_GPL(cpu_cache_invalidate_memregion, DEVMEM); +#endif + static void __cpa_flush_all(void *arg) { unsigned long cache = (unsigned long)arg; diff --git a/drivers/acpi/nfit/intel.c b/drivers/acpi/nfit/intel.c index 8dd792a55730..3902759abcba 100644 --- a/drivers/acpi/nfit/intel.c +++ b/drivers/acpi/nfit/intel.c @@ -3,6 +3,7 @@ #include <linux/libnvdimm.h> #include <linux/ndctl.h> #include <linux/acpi.h> +#include <linux/memregion.h> #include <asm/smp.h> #include "intel.h" #include "nfit.h" @@ -190,8 +191,6 @@ static int intel_security_change_key(struct nvdimm *nvdimm, } } -static void nvdimm_invalidate_cache(void); - static int __maybe_unused intel_security_unlock(struct nvdimm *nvdimm, const struct nvdimm_key_data *key_data) { @@ -227,9 +226,6 @@ static int __maybe_unused intel_security_unlock(struct nvdimm *nvdimm, return -EIO; } - /* DIMM unlocked, invalidate all CPU caches before we read it */ - nvdimm_invalidate_cache(); - return 0; } @@ -297,8 +293,6 @@ static int __maybe_unused intel_security_erase(struct nvdimm *nvdimm, if (!test_bit(cmd, &nfit_mem->dsm_mask)) return -ENOTTY; - /* flush all cache before we erase DIMM */ - nvdimm_invalidate_cache(); memcpy(nd_cmd.cmd.passphrase, key->data, sizeof(nd_cmd.cmd.passphrase)); rc = nvdimm_ctl(nvdimm, ND_CMD_CALL, &nd_cmd, sizeof(nd_cmd), NULL); @@ -317,8 +311,6 @@ static int __maybe_unused intel_security_erase(struct nvdimm *nvdimm, return -ENXIO; } - /* DIMM erased, invalidate all CPU caches before we read it */ - nvdimm_invalidate_cache(); return 0; } @@ -354,8 +346,6 @@ static int __maybe_unused intel_security_query_overwrite(struct nvdimm *nvdimm) return -ENXIO; } - /* flush all cache before we make the nvdimms available */ - nvdimm_invalidate_cache(); return 0; } @@ -380,8 +370,6 @@ static int __maybe_unused intel_security_overwrite(struct nvdimm *nvdimm, if (!test_bit(NVDIMM_INTEL_OVERWRITE, &nfit_mem->dsm_mask)) return -ENOTTY; - /* flush all cache before we erase DIMM */ - nvdimm_invalidate_cache(); memcpy(nd_cmd.cmd.passphrase, nkey->data, sizeof(nd_cmd.cmd.passphrase)); rc = nvdimm_ctl(nvdimm, ND_CMD_CALL, &nd_cmd, sizeof(nd_cmd), NULL); @@ -401,22 +389,6 @@ static int __maybe_unused intel_security_overwrite(struct nvdimm *nvdimm, } } -/* - * TODO: define a cross arch wbinvd equivalent when/if - * NVDIMM_FAMILY_INTEL command support arrives on another arch. - */ -#ifdef CONFIG_X86 -static void nvdimm_invalidate_cache(void) -{ - wbinvd_on_all_cpus(); -} -#else -static void nvdimm_invalidate_cache(void) -{ - WARN_ON_ONCE("cache invalidation required after unlock\n"); -} -#endif - static const struct nvdimm_security_ops __intel_security_ops = { .get_flags = intel_security_flags, .freeze = intel_security_freeze, diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index 4e3db20e9cbb..b3c202d2a433 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -493,6 +493,7 @@ static u32 calculate_cxl_support(void) u32 support; support = OSC_CXL_2_0_PORT_DEV_REG_ACCESS_SUPPORT; + support |= OSC_CXL_1_1_PORT_REG_ACCESS_SUPPORT; if (pci_aer_available()) support |= OSC_CXL_PROTOCOL_ERR_REPORTING_SUPPORT; if (IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE)) diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig index 768ced3d6fe8..0ac53c422c31 100644 --- a/drivers/cxl/Kconfig +++ b/drivers/cxl/Kconfig @@ -111,4 +111,22 @@ config CXL_REGION select MEMREGION select GET_FREE_REGION +config CXL_REGION_INVALIDATION_TEST + bool "CXL: Region Cache Management Bypass (TEST)" + depends on CXL_REGION + help + CXL Region management and security operations potentially invalidate + the content of CPU caches without notifiying those caches to + invalidate the affected cachelines. The CXL Region driver attempts + to invalidate caches when those events occur. If that invalidation + fails the region will fail to enable. Reasons for cache + invalidation failure are due to the CPU not providing a cache + invalidation mechanism. For example usage of wbinvd is restricted to + bare metal x86. However, for testing purposes toggling this option + can disable that data integrity safety and proceed with enabling + regions when there might be conflicting contents in the CPU cache. + + If unsure, or if this kernel is meant for production environments, + say N. + endif diff --git a/drivers/cxl/Makefile b/drivers/cxl/Makefile index a78270794150..db321f48ba52 100644 --- a/drivers/cxl/Makefile +++ b/drivers/cxl/Makefile @@ -9,5 +9,5 @@ obj-$(CONFIG_CXL_PORT) += cxl_port.o cxl_mem-y := mem.o cxl_pci-y := pci.o cxl_acpi-y := acpi.o -cxl_pmem-y := pmem.o +cxl_pmem-y := pmem.o security.o cxl_port-y := port.o diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index fb649683dd3a..ad0849af42d7 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -6,9 +6,120 @@ #include <linux/kernel.h> #include <linux/acpi.h> #include <linux/pci.h> +#include <asm/div64.h> #include "cxlpci.h" #include "cxl.h" +#define CXL_RCRB_SIZE SZ_8K + +struct cxl_cxims_data { + int nr_maps; + u64 xormaps[]; +}; + +/* + * Find a targets entry (n) in the host bridge interleave list. + * CXL Specfication 3.0 Table 9-22 + */ +static int cxl_xor_calc_n(u64 hpa, struct cxl_cxims_data *cximsd, int iw, + int ig) +{ + int i = 0, n = 0; + u8 eiw; + + /* IW: 2,4,6,8,12,16 begin building 'n' using xormaps */ + if (iw != 3) { + for (i = 0; i < cximsd->nr_maps; i++) + n |= (hweight64(hpa & cximsd->xormaps[i]) & 1) << i; + } + /* IW: 3,6,12 add a modulo calculation to 'n' */ + if (!is_power_of_2(iw)) { + if (ways_to_eiw(iw, &eiw)) + return -1; + hpa &= GENMASK_ULL(51, eiw + ig); + n |= do_div(hpa, 3) << i; + } + return n; +} + +static struct cxl_dport *cxl_hb_xor(struct cxl_root_decoder *cxlrd, int pos) +{ + struct cxl_cxims_data *cximsd = cxlrd->platform_data; + struct cxl_switch_decoder *cxlsd = &cxlrd->cxlsd; + struct cxl_decoder *cxld = &cxlsd->cxld; + int ig = cxld->interleave_granularity; + int iw = cxld->interleave_ways; + int n = 0; + u64 hpa; + + if (dev_WARN_ONCE(&cxld->dev, + cxld->interleave_ways != cxlsd->nr_targets, + "misconfigured root decoder\n")) + return NULL; + + hpa = cxlrd->res->start + pos * ig; + + /* Entry (n) is 0 for no interleave (iw == 1) */ + if (iw != 1) + n = cxl_xor_calc_n(hpa, cximsd, iw, ig); + + if (n < 0) + return NULL; + + return cxlrd->cxlsd.target[n]; +} + +struct cxl_cxims_context { + struct device *dev; + struct cxl_root_decoder *cxlrd; +}; + +static int cxl_parse_cxims(union acpi_subtable_headers *header, void *arg, + const unsigned long end) +{ + struct acpi_cedt_cxims *cxims = (struct acpi_cedt_cxims *)header; + struct cxl_cxims_context *ctx = arg; + struct cxl_root_decoder *cxlrd = ctx->cxlrd; + struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld; + struct device *dev = ctx->dev; + struct cxl_cxims_data *cximsd; + unsigned int hbig, nr_maps; + int rc; + + rc = eig_to_granularity(cxims->hbig, &hbig); + if (rc) + return rc; + + /* Does this CXIMS entry apply to the given CXL Window? */ + if (hbig != cxld->interleave_granularity) + return 0; + + /* IW 1,3 do not use xormaps and skip this parsing entirely */ + if (is_power_of_2(cxld->interleave_ways)) + /* 2, 4, 8, 16 way */ + nr_maps = ilog2(cxld->interleave_ways); + else + /* 6, 12 way */ + nr_maps = ilog2(cxld->interleave_ways / 3); + + if (cxims->nr_xormaps < nr_maps) { + dev_dbg(dev, "CXIMS nr_xormaps[%d] expected[%d]\n", + cxims->nr_xormaps, nr_maps); + return -ENXIO; + } + + cximsd = devm_kzalloc(dev, struct_size(cximsd, xormaps, nr_maps), + GFP_KERNEL); + if (!cximsd) + return -ENOMEM; + memcpy(cximsd->xormaps, cxims->xormap_list, + nr_maps * sizeof(*cximsd->xormaps)); + cximsd->nr_maps = nr_maps; + cxlrd->platform_data = cximsd; + + return 0; +} + static unsigned long cfmws_to_decoder_flags(int restrictions) { unsigned long flags = CXL_DECODER_F_ENABLE; @@ -33,8 +144,10 @@ static int cxl_acpi_cfmws_verify(struct device *dev, int rc, expected_len; unsigned int ways; - if (cfmws->interleave_arithmetic != ACPI_CEDT_CFMWS_ARITHMETIC_MODULO) { - dev_err(dev, "CFMWS Unsupported Interleave Arithmetic\n"); + if (cfmws->interleave_arithmetic != ACPI_CEDT_CFMWS_ARITHMETIC_MODULO && + cfmws->interleave_arithmetic != ACPI_CEDT_CFMWS_ARITHMETIC_XOR) { + dev_err(dev, "CFMWS Unknown Interleave Arithmetic: %d\n", + cfmws->interleave_arithmetic); return -EINVAL; } @@ -48,7 +161,7 @@ static int cxl_acpi_cfmws_verify(struct device *dev, return -EINVAL; } - rc = cxl_to_ways(cfmws->interleave_ways, &ways); + rc = eiw_to_ways(cfmws->interleave_ways, &ways); if (rc) { dev_err(dev, "CFMWS Interleave Ways (%d) invalid\n", cfmws->interleave_ways); @@ -70,6 +183,10 @@ static int cxl_acpi_cfmws_verify(struct device *dev, return 0; } +/* + * Note, @dev must be the first member, see 'struct cxl_chbs_context' + * and mock_acpi_table_parse_cedt() + */ struct cxl_cfmws_context { struct device *dev; struct cxl_port *root_port; @@ -84,9 +201,11 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, struct cxl_cfmws_context *ctx = arg; struct cxl_port *root_port = ctx->root_port; struct resource *cxl_res = ctx->cxl_res; + struct cxl_cxims_context cxims_ctx; struct cxl_root_decoder *cxlrd; struct device *dev = ctx->dev; struct acpi_cedt_cfmws *cfmws; + cxl_calc_hb_fn cxl_calc_hb; struct cxl_decoder *cxld; unsigned int ways, i, ig; struct resource *res; @@ -102,10 +221,10 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, return 0; } - rc = cxl_to_ways(cfmws->interleave_ways, &ways); + rc = eiw_to_ways(cfmws->interleave_ways, &ways); if (rc) return rc; - rc = cxl_to_granularity(cfmws->granularity, &ig); + rc = eig_to_granularity(cfmws->granularity, &ig); if (rc) return rc; for (i = 0; i < ways; i++) @@ -128,7 +247,12 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, if (rc) goto err_insert; - cxlrd = cxl_root_decoder_alloc(root_port, ways); + if (cfmws->interleave_arithmetic == ACPI_CEDT_CFMWS_ARITHMETIC_MODULO) + cxl_calc_hb = cxl_hb_modulo; + else + cxl_calc_hb = cxl_hb_xor; + + cxlrd = cxl_root_decoder_alloc(root_port, ways, cxl_calc_hb); if (IS_ERR(cxlrd)) return 0; @@ -148,7 +272,25 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, ig = CXL_DECODER_MIN_GRANULARITY; cxld->interleave_granularity = ig; + if (cfmws->interleave_arithmetic == ACPI_CEDT_CFMWS_ARITHMETIC_XOR) { + if (ways != 1 && ways != 3) { + cxims_ctx = (struct cxl_cxims_context) { + .dev = dev, + .cxlrd = cxlrd, + }; + rc = acpi_table_parse_cedt(ACPI_CEDT_TYPE_CXIMS, + cxl_parse_cxims, &cxims_ctx); + if (rc < 0) + goto err_xormap; + if (!cxlrd->platform_data) { + dev_err(dev, "No CXIMS for HBIG %u\n", ig); + rc = -EINVAL; + goto err_xormap; + } + } + } rc = cxl_decoder_add(cxld, target_map); +err_xormap: if (rc) put_device(&cxld->dev); else @@ -193,34 +335,39 @@ static int add_host_bridge_uport(struct device *match, void *arg) { struct cxl_port *root_port = arg; struct device *host = root_port->dev.parent; - struct acpi_device *bridge = to_cxl_host_bridge(host, match); + struct acpi_device *hb = to_cxl_host_bridge(host, match); struct acpi_pci_root *pci_root; struct cxl_dport *dport; struct cxl_port *port; + struct device *bridge; int rc; - if (!bridge) + if (!hb) return 0; - dport = cxl_find_dport_by_dev(root_port, match); + pci_root = acpi_pci_find_root(hb->handle); + bridge = pci_root->bus->bridge; + dport = cxl_find_dport_by_dev(root_port, bridge); if (!dport) { dev_dbg(host, "host bridge expected and not found\n"); return 0; } - /* - * Note that this lookup already succeeded in - * to_cxl_host_bridge(), so no need to check for failure here - */ - pci_root = acpi_pci_find_root(bridge->handle); - rc = devm_cxl_register_pci_bus(host, match, pci_root->bus); + if (dport->rch) { + dev_info(bridge, "host supports CXL (restricted)\n"); + return 0; + } + + rc = devm_cxl_register_pci_bus(host, bridge, pci_root->bus); if (rc) return rc; - port = devm_cxl_add_port(host, match, dport->component_reg_phys, dport); + port = devm_cxl_add_port(host, bridge, dport->component_reg_phys, + dport); if (IS_ERR(port)) return PTR_ERR(port); - dev_dbg(host, "%s: add: %s\n", dev_name(match), dev_name(&port->dev)); + + dev_info(bridge, "host supports CXL\n"); return 0; } @@ -228,7 +375,9 @@ static int add_host_bridge_uport(struct device *match, void *arg) struct cxl_chbs_context { struct device *dev; unsigned long long uid; + resource_size_t rcrb; resource_size_t chbcr; + u32 cxl_version; }; static int cxl_get_chbcr(union acpi_subtable_headers *header, void *arg, @@ -244,51 +393,86 @@ static int cxl_get_chbcr(union acpi_subtable_headers *header, void *arg, if (ctx->uid != chbs->uid) return 0; - ctx->chbcr = chbs->base; + + ctx->cxl_version = chbs->cxl_version; + ctx->rcrb = CXL_RESOURCE_NONE; + ctx->chbcr = CXL_RESOURCE_NONE; + + if (!chbs->base) + return 0; + + if (chbs->cxl_version != ACPI_CEDT_CHBS_VERSION_CXL11) { + ctx->chbcr = chbs->base; + return 0; + } + + if (chbs->length != CXL_RCRB_SIZE) + return 0; + + ctx->rcrb = chbs->base; + ctx->chbcr = cxl_rcrb_to_component(ctx->dev, chbs->base, + CXL_RCRB_DOWNSTREAM); return 0; } static int add_host_bridge_dport(struct device *match, void *arg) { - acpi_status status; + acpi_status rc; + struct device *bridge; unsigned long long uid; struct cxl_dport *dport; struct cxl_chbs_context ctx; + struct acpi_pci_root *pci_root; struct cxl_port *root_port = arg; struct device *host = root_port->dev.parent; - struct acpi_device *bridge = to_cxl_host_bridge(host, match); + struct acpi_device *hb = to_cxl_host_bridge(host, match); - if (!bridge) + if (!hb) return 0; - status = acpi_evaluate_integer(bridge->handle, METHOD_NAME__UID, NULL, - &uid); - if (status != AE_OK) { - dev_err(host, "unable to retrieve _UID of %s\n", - dev_name(match)); + rc = acpi_evaluate_integer(hb->handle, METHOD_NAME__UID, NULL, &uid); + if (rc != AE_OK) { + dev_err(match, "unable to retrieve _UID\n"); return -ENODEV; } + dev_dbg(match, "UID found: %lld\n", uid); + ctx = (struct cxl_chbs_context) { - .dev = host, + .dev = match, .uid = uid, }; acpi_table_parse_cedt(ACPI_CEDT_TYPE_CHBS, cxl_get_chbcr, &ctx); - if (ctx.chbcr == 0) { - dev_warn(host, "No CHBS found for Host Bridge: %s\n", - dev_name(match)); + if (!ctx.chbcr) { + dev_warn(match, "No CHBS found for Host Bridge (UID %lld)\n", + uid); return 0; } - dport = devm_cxl_add_dport(root_port, match, uid, ctx.chbcr); - if (IS_ERR(dport)) { - dev_err(host, "failed to add downstream port: %s\n", - dev_name(match)); - return PTR_ERR(dport); + if (ctx.rcrb != CXL_RESOURCE_NONE) + dev_dbg(match, "RCRB found for UID %lld: %pa\n", uid, &ctx.rcrb); + + if (ctx.chbcr == CXL_RESOURCE_NONE) { + dev_warn(match, "CHBCR invalid for Host Bridge (UID %lld)\n", + uid); + return 0; } - dev_dbg(host, "add dport%llu: %s\n", uid, dev_name(match)); + + dev_dbg(match, "CHBCR found: %pa\n", &ctx.chbcr); + + pci_root = acpi_pci_find_root(hb->handle); + bridge = pci_root->bus->bridge; + if (ctx.cxl_version == ACPI_CEDT_CHBS_VERSION_CXL11) + dport = devm_cxl_add_rch_dport(root_port, bridge, uid, + ctx.chbcr, ctx.rcrb); + else + dport = devm_cxl_add_dport(root_port, bridge, uid, + ctx.chbcr); + if (IS_ERR(dport)) + return PTR_ERR(dport); + return 0; } @@ -466,7 +650,6 @@ static int cxl_acpi_probe(struct platform_device *pdev) root_port = devm_cxl_add_port(host, host, CXL_RESOURCE_NONE, NULL); if (IS_ERR(root_port)) return PTR_ERR(root_port); - dev_dbg(host, "add: %s\n", dev_name(&root_port->dev)); rc = bus_for_each_dev(adev->dev.bus, NULL, root_port, add_host_bridge_dport); @@ -512,7 +695,8 @@ static int cxl_acpi_probe(struct platform_device *pdev) return rc; /* In case PCI is scanned before ACPI re-trigger memdev attach */ - return cxl_bus_rescan(); + cxl_bus_rescan(); + return 0; } static const struct acpi_device_id cxl_acpi_ids[] = { @@ -536,7 +720,20 @@ static struct platform_driver cxl_acpi_driver = { .id_table = cxl_test_ids, }; -module_platform_driver(cxl_acpi_driver); +static int __init cxl_acpi_init(void) +{ + return platform_driver_register(&cxl_acpi_driver); +} + +static void __exit cxl_acpi_exit(void) +{ + platform_driver_unregister(&cxl_acpi_driver); + cxl_bus_drain(); +} + +module_init(cxl_acpi_init); +module_exit(cxl_acpi_exit); MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(CXL); MODULE_IMPORT_NS(ACPI); +MODULE_SOFTDEP("pre: cxl_pmem"); diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 1d8f87be283f..8c04672dca56 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -58,14 +58,6 @@ extern struct rw_semaphore cxl_dpa_rwsem; bool is_switch_decoder(struct device *dev); struct cxl_switch_decoder *to_cxl_switch_decoder(struct device *dev); -static inline struct cxl_ep *cxl_ep_load(struct cxl_port *port, - struct cxl_memdev *cxlmd) -{ - if (!port) - return NULL; - - return xa_load(&port->endpoints, (unsigned long)&cxlmd->dev); -} int cxl_memdev_init(void); void cxl_memdev_exit(void); diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index d1d2caea5c62..dcc16d7cb8f3 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -82,18 +82,23 @@ static void parse_hdm_decoder_caps(struct cxl_hdm *cxlhdm) cxlhdm->interleave_mask |= GENMASK(14, 12); } -static void __iomem *map_hdm_decoder_regs(struct cxl_port *port, - void __iomem *crb) +static int map_hdm_decoder_regs(struct cxl_port *port, void __iomem *crb, + struct cxl_component_regs *regs) { - struct cxl_component_reg_map map; + struct cxl_register_map map = { + .resource = port->component_reg_phys, + .base = crb, + .max_size = CXL_COMPONENT_REG_BLOCK_SIZE, + }; - cxl_probe_component_regs(&port->dev, crb, &map); - if (!map.hdm_decoder.valid) { + cxl_probe_component_regs(&port->dev, crb, &map.component_map); + if (!map.component_map.hdm_decoder.valid) { dev_err(&port->dev, "HDM decoder registers invalid\n"); - return IOMEM_ERR_PTR(-ENXIO); + return -ENXIO; } - return crb + map.hdm_decoder.offset; + return cxl_map_component_regs(&port->dev, regs, &map, + BIT(CXL_CM_CAP_CAP_ID_HDM)); } /** @@ -103,25 +108,25 @@ static void __iomem *map_hdm_decoder_regs(struct cxl_port *port, struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port) { struct device *dev = &port->dev; - void __iomem *crb, *hdm; struct cxl_hdm *cxlhdm; + void __iomem *crb; + int rc; cxlhdm = devm_kzalloc(dev, sizeof(*cxlhdm), GFP_KERNEL); if (!cxlhdm) return ERR_PTR(-ENOMEM); cxlhdm->port = port; - crb = devm_cxl_iomap_block(dev, port->component_reg_phys, - CXL_COMPONENT_REG_BLOCK_SIZE); + crb = ioremap(port->component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE); if (!crb) { dev_err(dev, "No component registers mapped\n"); return ERR_PTR(-ENXIO); } - hdm = map_hdm_decoder_regs(port, crb); - if (IS_ERR(hdm)) - return ERR_CAST(hdm); - cxlhdm->regs.hdm_decoder = hdm; + rc = map_hdm_decoder_regs(port, crb, &cxlhdm->regs); + iounmap(crb); + if (rc) + return ERR_PTR(rc); parse_hdm_decoder_caps(cxlhdm); if (cxlhdm->decoder_count == 0) { @@ -489,10 +494,10 @@ static void cxld_set_interleave(struct cxl_decoder *cxld, u32 *ctrl) * Input validation ensures these warns never fire, but otherwise * suppress unititalized variable usage warnings. */ - if (WARN_ONCE(ways_to_cxl(cxld->interleave_ways, &eiw), + if (WARN_ONCE(ways_to_eiw(cxld->interleave_ways, &eiw), "invalid interleave_ways: %d\n", cxld->interleave_ways)) return; - if (WARN_ONCE(granularity_to_cxl(cxld->interleave_granularity, &eig), + if (WARN_ONCE(granularity_to_eig(cxld->interleave_granularity, &eig), "invalid interleave_granularity: %d\n", cxld->interleave_granularity)) return; @@ -736,16 +741,16 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, } cxld->target_type = CXL_DECODER_EXPANDER; } - rc = cxl_to_ways(FIELD_GET(CXL_HDM_DECODER0_CTRL_IW_MASK, ctrl), - &cxld->interleave_ways); + rc = eiw_to_ways(FIELD_GET(CXL_HDM_DECODER0_CTRL_IW_MASK, ctrl), + &cxld->interleave_ways); if (rc) { dev_warn(&port->dev, "decoder%d.%d: Invalid interleave ways (ctrl: %#x)\n", port->id, cxld->id, ctrl); return rc; } - rc = cxl_to_granularity(FIELD_GET(CXL_HDM_DECODER0_CTRL_IG_MASK, ctrl), - &cxld->interleave_granularity); + rc = eig_to_granularity(FIELD_GET(CXL_HDM_DECODER0_CTRL_IG_MASK, ctrl), + &cxld->interleave_granularity); if (rc) return rc; diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 0c90f13870a4..b03fba212799 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -140,13 +140,9 @@ static const char *cxl_mem_opcode_to_name(u16 opcode) } /** - * cxl_mbox_send_cmd() - Send a mailbox command to a device. + * cxl_internal_send_cmd() - Kernel internal interface to send a mailbox command * @cxlds: The device data for the operation - * @opcode: Opcode for the mailbox command. - * @in: The input payload for the mailbox command. - * @in_size: The length of the input payload - * @out: Caller allocated buffer for the output. - * @out_size: Expected size of output. + * @mbox_cmd: initialized command to execute * * Context: Any context. * Return: @@ -161,40 +157,40 @@ static const char *cxl_mem_opcode_to_name(u16 opcode) * error. While this distinction can be useful for commands from userspace, the * kernel will only be able to use results when both are successful. */ -int cxl_mbox_send_cmd(struct cxl_dev_state *cxlds, u16 opcode, void *in, - size_t in_size, void *out, size_t out_size) +int cxl_internal_send_cmd(struct cxl_dev_state *cxlds, + struct cxl_mbox_cmd *mbox_cmd) { - const struct cxl_mem_command *cmd = cxl_mem_find_command(opcode); - struct cxl_mbox_cmd mbox_cmd = { - .opcode = opcode, - .payload_in = in, - .size_in = in_size, - .size_out = out_size, - .payload_out = out, - }; + size_t out_size, min_out; int rc; - if (in_size > cxlds->payload_size || out_size > cxlds->payload_size) + if (mbox_cmd->size_in > cxlds->payload_size || + mbox_cmd->size_out > cxlds->payload_size) return -E2BIG; - rc = cxlds->mbox_send(cxlds, &mbox_cmd); + out_size = mbox_cmd->size_out; + min_out = mbox_cmd->min_out; + rc = cxlds->mbox_send(cxlds, mbox_cmd); if (rc) return rc; - if (mbox_cmd.return_code != CXL_MBOX_CMD_RC_SUCCESS) - return cxl_mbox_cmd_rc2errno(&mbox_cmd); + if (mbox_cmd->return_code != CXL_MBOX_CMD_RC_SUCCESS) + return cxl_mbox_cmd_rc2errno(mbox_cmd); + + if (!out_size) + return 0; /* - * Variable sized commands can't be validated and so it's up to the - * caller to do that if they wish. + * Variable sized output needs to at least satisfy the caller's + * minimum if not the fully requested size. */ - if (cmd->info.size_out != CXL_VARIABLE_PAYLOAD) { - if (mbox_cmd.size_out != out_size) - return -EIO; - } + if (min_out == 0) + min_out = out_size; + + if (mbox_cmd->size_out < min_out) + return -EIO; return 0; } -EXPORT_SYMBOL_NS_GPL(cxl_mbox_send_cmd, CXL); +EXPORT_SYMBOL_NS_GPL(cxl_internal_send_cmd, CXL); static bool cxl_mem_raw_command_allowed(u16 opcode) { @@ -561,15 +557,25 @@ static int cxl_xfer_log(struct cxl_dev_state *cxlds, uuid_t *uuid, u32 size, u8 while (remaining) { u32 xfer_size = min_t(u32, remaining, cxlds->payload_size); - struct cxl_mbox_get_log log = { + struct cxl_mbox_cmd mbox_cmd; + struct cxl_mbox_get_log log; + int rc; + + log = (struct cxl_mbox_get_log) { .uuid = *uuid, .offset = cpu_to_le32(offset), - .length = cpu_to_le32(xfer_size) + .length = cpu_to_le32(xfer_size), }; - int rc; - rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_GET_LOG, &log, sizeof(log), - out, xfer_size); + mbox_cmd = (struct cxl_mbox_cmd) { + .opcode = CXL_MBOX_OP_GET_LOG, + .size_in = sizeof(log), + .payload_in = &log, + .size_out = xfer_size, + .payload_out = out, + }; + + rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); if (rc < 0) return rc; @@ -615,19 +621,27 @@ static void cxl_walk_cel(struct cxl_dev_state *cxlds, size_t size, u8 *cel) static struct cxl_mbox_get_supported_logs *cxl_get_gsl(struct cxl_dev_state *cxlds) { struct cxl_mbox_get_supported_logs *ret; + struct cxl_mbox_cmd mbox_cmd; int rc; ret = kvmalloc(cxlds->payload_size, GFP_KERNEL); if (!ret) return ERR_PTR(-ENOMEM); - rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_GET_SUPPORTED_LOGS, NULL, 0, ret, - cxlds->payload_size); + mbox_cmd = (struct cxl_mbox_cmd) { + .opcode = CXL_MBOX_OP_GET_SUPPORTED_LOGS, + .size_out = cxlds->payload_size, + .payload_out = ret, + /* At least the record number field must be valid */ + .min_out = 2, + }; + rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); if (rc < 0) { kvfree(ret); return ERR_PTR(rc); } + return ret; } @@ -697,7 +711,6 @@ int cxl_enumerate_cmds(struct cxl_dev_state *cxlds) /* Found the required CEL */ rc = 0; } - out: kvfree(gsl); return rc; @@ -719,11 +732,15 @@ EXPORT_SYMBOL_NS_GPL(cxl_enumerate_cmds, CXL); static int cxl_mem_get_partition_info(struct cxl_dev_state *cxlds) { struct cxl_mbox_get_partition_info pi; + struct cxl_mbox_cmd mbox_cmd; int rc; - rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_GET_PARTITION_INFO, NULL, 0, - &pi, sizeof(pi)); - + mbox_cmd = (struct cxl_mbox_cmd) { + .opcode = CXL_MBOX_OP_GET_PARTITION_INFO, + .size_out = sizeof(pi), + .payload_out = &pi, + }; + rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); if (rc) return rc; @@ -752,10 +769,15 @@ int cxl_dev_state_identify(struct cxl_dev_state *cxlds) { /* See CXL 2.0 Table 175 Identify Memory Device Output Payload */ struct cxl_mbox_identify id; + struct cxl_mbox_cmd mbox_cmd; int rc; - rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_IDENTIFY, NULL, 0, &id, - sizeof(id)); + mbox_cmd = (struct cxl_mbox_cmd) { + .opcode = CXL_MBOX_OP_IDENTIFY, + .size_out = sizeof(id), + .payload_out = &id, + }; + rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); if (rc < 0) return rc; diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index 20ce488a7754..a74a93310d26 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -344,6 +344,7 @@ struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds) * needed as this is ordered with cdev_add() publishing the device. */ cxlmd->cxlds = cxlds; + cxlds->cxlmd = cxlmd; cdev = &cxlmd->cdev; rc = cdev_device_add(cdev, dev); diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 9240df53ed87..57764e9cd19d 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -54,16 +54,13 @@ static int match_add_dports(struct pci_dev *pdev, void *data) dev_dbg(&port->dev, "failed to find component registers\n"); port_num = FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap); - dport = devm_cxl_add_dport(port, &pdev->dev, port_num, - cxl_regmap_to_base(pdev, &map)); + dport = devm_cxl_add_dport(port, &pdev->dev, port_num, map.resource); if (IS_ERR(dport)) { ctx->error = PTR_ERR(dport); return PTR_ERR(dport); } ctx->count++; - dev_dbg(&port->dev, "add dport%d: %s\n", port_num, dev_name(&pdev->dev)); - return 0; } diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c index 36aa5070d902..f3d2169b6731 100644 --- a/drivers/cxl/core/pmem.c +++ b/drivers/cxl/core/pmem.c @@ -99,7 +99,6 @@ static struct cxl_nvdimm_bridge *cxl_nvdimm_bridge_alloc(struct cxl_port *port) dev = &cxl_nvb->dev; cxl_nvb->port = port; - cxl_nvb->state = CXL_NVB_NEW; device_initialize(dev); lockdep_set_class(&dev->mutex, &cxl_nvdimm_bridge_key); device_set_pm_not_required(dev); @@ -117,28 +116,7 @@ err: static void unregister_nvb(void *_cxl_nvb) { struct cxl_nvdimm_bridge *cxl_nvb = _cxl_nvb; - bool flush; - /* - * If the bridge was ever activated then there might be in-flight state - * work to flush. Once the state has been changed to 'dead' then no new - * work can be queued by user-triggered bind. - */ - device_lock(&cxl_nvb->dev); - flush = cxl_nvb->state != CXL_NVB_NEW; - cxl_nvb->state = CXL_NVB_DEAD; - device_unlock(&cxl_nvb->dev); - - /* - * Even though the device core will trigger device_release_driver() - * before the unregister, it does not know about the fact that - * cxl_nvdimm_bridge_driver defers ->remove() work. So, do the driver - * release not and flush it before tearing down the nvdimm device - * hierarchy. - */ - device_release_driver(&cxl_nvb->dev); - if (flush) - flush_work(&cxl_nvb->state_work); device_unregister(&cxl_nvb->dev); } @@ -188,7 +166,6 @@ static void cxl_nvdimm_release(struct device *dev) { struct cxl_nvdimm *cxl_nvd = to_cxl_nvdimm(dev); - xa_destroy(&cxl_nvd->pmem_regions); kfree(cxl_nvd); } @@ -220,7 +197,8 @@ EXPORT_SYMBOL_NS_GPL(to_cxl_nvdimm, CXL); static struct lock_class_key cxl_nvdimm_key; -static struct cxl_nvdimm *cxl_nvdimm_alloc(struct cxl_memdev *cxlmd) +static struct cxl_nvdimm *cxl_nvdimm_alloc(struct cxl_nvdimm_bridge *cxl_nvb, + struct cxl_memdev *cxlmd) { struct cxl_nvdimm *cxl_nvd; struct device *dev; @@ -231,38 +209,78 @@ static struct cxl_nvdimm *cxl_nvdimm_alloc(struct cxl_memdev *cxlmd) dev = &cxl_nvd->dev; cxl_nvd->cxlmd = cxlmd; - xa_init(&cxl_nvd->pmem_regions); + cxlmd->cxl_nvd = cxl_nvd; device_initialize(dev); lockdep_set_class(&dev->mutex, &cxl_nvdimm_key); device_set_pm_not_required(dev); dev->parent = &cxlmd->dev; dev->bus = &cxl_bus_type; dev->type = &cxl_nvdimm_type; + /* + * A "%llx" string is 17-bytes vs dimm_id that is max + * NVDIMM_KEY_DESC_LEN + */ + BUILD_BUG_ON(sizeof(cxl_nvd->dev_id) < 17 || + sizeof(cxl_nvd->dev_id) > NVDIMM_KEY_DESC_LEN); + sprintf(cxl_nvd->dev_id, "%llx", cxlmd->cxlds->serial); return cxl_nvd; } -static void cxl_nvd_unregister(void *dev) +static void cxl_nvd_unregister(void *_cxl_nvd) { - device_unregister(dev); + struct cxl_nvdimm *cxl_nvd = _cxl_nvd; + struct cxl_memdev *cxlmd = cxl_nvd->cxlmd; + struct cxl_nvdimm_bridge *cxl_nvb = cxlmd->cxl_nvb; + + /* + * Either the bridge is in ->remove() context under the device_lock(), + * or cxlmd_release_nvdimm() is cancelling the bridge's release action + * for @cxl_nvd and doing it itself (while manually holding the bridge + * lock). + */ + device_lock_assert(&cxl_nvb->dev); + cxl_nvd->cxlmd = NULL; + cxlmd->cxl_nvd = NULL; + device_unregister(&cxl_nvd->dev); +} + +static void cxlmd_release_nvdimm(void *_cxlmd) +{ + struct cxl_memdev *cxlmd = _cxlmd; + struct cxl_nvdimm_bridge *cxl_nvb = cxlmd->cxl_nvb; + + device_lock(&cxl_nvb->dev); + if (cxlmd->cxl_nvd) + devm_release_action(&cxl_nvb->dev, cxl_nvd_unregister, + cxlmd->cxl_nvd); + device_unlock(&cxl_nvb->dev); + put_device(&cxl_nvb->dev); } /** * devm_cxl_add_nvdimm() - add a bridge between a cxl_memdev and an nvdimm - * @host: same host as @cxlmd * @cxlmd: cxl_memdev instance that will perform LIBNVDIMM operations * * Return: 0 on success negative error code on failure. */ -int devm_cxl_add_nvdimm(struct device *host, struct cxl_memdev *cxlmd) +int devm_cxl_add_nvdimm(struct cxl_memdev *cxlmd) { + struct cxl_nvdimm_bridge *cxl_nvb; struct cxl_nvdimm *cxl_nvd; struct device *dev; int rc; - cxl_nvd = cxl_nvdimm_alloc(cxlmd); - if (IS_ERR(cxl_nvd)) - return PTR_ERR(cxl_nvd); + cxl_nvb = cxl_find_nvdimm_bridge(&cxlmd->dev); + if (!cxl_nvb) + return -ENODEV; + + cxl_nvd = cxl_nvdimm_alloc(cxl_nvb, cxlmd); + if (IS_ERR(cxl_nvd)) { + rc = PTR_ERR(cxl_nvd); + goto err_alloc; + } + cxlmd->cxl_nvb = cxl_nvb; dev = &cxl_nvd->dev; rc = dev_set_name(dev, "pmem%d", cxlmd->id); @@ -273,13 +291,34 @@ int devm_cxl_add_nvdimm(struct device *host, struct cxl_memdev *cxlmd) if (rc) goto err; - dev_dbg(host, "%s: register %s\n", dev_name(dev->parent), - dev_name(dev)); + dev_dbg(&cxlmd->dev, "register %s\n", dev_name(dev)); + + /* + * The two actions below arrange for @cxl_nvd to be deleted when either + * the top-level PMEM bridge goes down, or the endpoint device goes + * through ->remove(). + */ + device_lock(&cxl_nvb->dev); + if (cxl_nvb->dev.driver) + rc = devm_add_action_or_reset(&cxl_nvb->dev, cxl_nvd_unregister, + cxl_nvd); + else + rc = -ENXIO; + device_unlock(&cxl_nvb->dev); - return devm_add_action_or_reset(host, cxl_nvd_unregister, dev); + if (rc) + goto err_alloc; + + /* @cxlmd carries a reference on @cxl_nvb until cxlmd_release_nvdimm */ + return devm_add_action_or_reset(&cxlmd->dev, cxlmd_release_nvdimm, cxlmd); err: put_device(dev); +err_alloc: + cxlmd->cxl_nvb = NULL; + cxlmd->cxl_nvd = NULL; + put_device(&cxl_nvb->dev); + return rc; } EXPORT_SYMBOL_NS_GPL(devm_cxl_add_nvdimm, CXL); diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index e7556864ea80..b631a0520456 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -628,6 +628,8 @@ static struct cxl_port *cxl_port_alloc(struct device *uport, iter = to_cxl_port(iter->dev.parent); if (iter->host_bridge) port->host_bridge = iter->host_bridge; + else if (parent_dport->rch) + port->host_bridge = parent_dport->dport; else port->host_bridge = iter->uport; dev_dbg(uport, "host-bridge: %s\n", dev_name(port->host_bridge)); @@ -655,16 +657,10 @@ err: return ERR_PTR(rc); } -/** - * devm_cxl_add_port - register a cxl_port in CXL memory decode hierarchy - * @host: host device for devm operations - * @uport: "physical" device implementing this upstream port - * @component_reg_phys: (optional) for configurable cxl_port instances - * @parent_dport: next hop up in the CXL memory decode hierarchy - */ -struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport, - resource_size_t component_reg_phys, - struct cxl_dport *parent_dport) +static struct cxl_port *__devm_cxl_add_port(struct device *host, + struct device *uport, + resource_size_t component_reg_phys, + struct cxl_dport *parent_dport) { struct cxl_port *port; struct device *dev; @@ -702,6 +698,41 @@ err: put_device(dev); return ERR_PTR(rc); } + +/** + * devm_cxl_add_port - register a cxl_port in CXL memory decode hierarchy + * @host: host device for devm operations + * @uport: "physical" device implementing this upstream port + * @component_reg_phys: (optional) for configurable cxl_port instances + * @parent_dport: next hop up in the CXL memory decode hierarchy + */ +struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport, + resource_size_t component_reg_phys, + struct cxl_dport *parent_dport) +{ + struct cxl_port *port, *parent_port; + + port = __devm_cxl_add_port(host, uport, component_reg_phys, + parent_dport); + + parent_port = parent_dport ? parent_dport->port : NULL; + if (IS_ERR(port)) { + dev_dbg(uport, "Failed to add %s%s%s%s: %ld\n", + dev_name(&port->dev), + parent_port ? " to " : "", + parent_port ? dev_name(&parent_port->dev) : "", + parent_port ? "" : " (root port)", + PTR_ERR(port)); + } else { + dev_dbg(uport, "%s added%s%s%s\n", + dev_name(&port->dev), + parent_port ? " to " : "", + parent_port ? dev_name(&parent_port->dev) : "", + parent_port ? "" : " (root port)"); + } + + return port; +} EXPORT_SYMBOL_NS_GPL(devm_cxl_add_port, CXL); struct pci_bus *cxl_port_to_pci_bus(struct cxl_port *port) @@ -870,20 +901,10 @@ static void cxl_dport_unlink(void *data) sysfs_remove_link(&port->dev.kobj, link_name); } -/** - * devm_cxl_add_dport - append downstream port data to a cxl_port - * @port: the cxl_port that references this dport - * @dport_dev: firmware or PCI device representing the dport - * @port_id: identifier for this dport in a decoder's target list - * @component_reg_phys: optional location of CXL component registers - * - * Note that dports are appended to the devm release action's of the - * either the port's host (for root ports), or the port itself (for - * switch ports) - */ -struct cxl_dport *devm_cxl_add_dport(struct cxl_port *port, - struct device *dport_dev, int port_id, - resource_size_t component_reg_phys) +static struct cxl_dport * +__devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev, + int port_id, resource_size_t component_reg_phys, + resource_size_t rcrb) { char link_name[CXL_TARGET_STRLEN]; struct cxl_dport *dport; @@ -913,6 +934,9 @@ struct cxl_dport *devm_cxl_add_dport(struct cxl_port *port, dport->port_id = port_id; dport->component_reg_phys = component_reg_phys; dport->port = port; + if (rcrb != CXL_RESOURCE_NONE) + dport->rch = true; + dport->rcrb = rcrb; cond_cxl_root_lock(port); rc = add_dport(port, dport); @@ -935,8 +959,74 @@ struct cxl_dport *devm_cxl_add_dport(struct cxl_port *port, return dport; } + +/** + * devm_cxl_add_dport - append VH downstream port data to a cxl_port + * @port: the cxl_port that references this dport + * @dport_dev: firmware or PCI device representing the dport + * @port_id: identifier for this dport in a decoder's target list + * @component_reg_phys: optional location of CXL component registers + * + * Note that dports are appended to the devm release action's of the + * either the port's host (for root ports), or the port itself (for + * switch ports) + */ +struct cxl_dport *devm_cxl_add_dport(struct cxl_port *port, + struct device *dport_dev, int port_id, + resource_size_t component_reg_phys) +{ + struct cxl_dport *dport; + + dport = __devm_cxl_add_dport(port, dport_dev, port_id, + component_reg_phys, CXL_RESOURCE_NONE); + if (IS_ERR(dport)) { + dev_dbg(dport_dev, "failed to add dport to %s: %ld\n", + dev_name(&port->dev), PTR_ERR(dport)); + } else { + dev_dbg(dport_dev, "dport added to %s\n", + dev_name(&port->dev)); + } + + return dport; +} EXPORT_SYMBOL_NS_GPL(devm_cxl_add_dport, CXL); +/** + * devm_cxl_add_rch_dport - append RCH downstream port data to a cxl_port + * @port: the cxl_port that references this dport + * @dport_dev: firmware or PCI device representing the dport + * @port_id: identifier for this dport in a decoder's target list + * @component_reg_phys: optional location of CXL component registers + * @rcrb: mandatory location of a Root Complex Register Block + * + * See CXL 3.0 9.11.8 CXL Devices Attached to an RCH + */ +struct cxl_dport *devm_cxl_add_rch_dport(struct cxl_port *port, + struct device *dport_dev, int port_id, + resource_size_t component_reg_phys, + resource_size_t rcrb) +{ + struct cxl_dport *dport; + + if (rcrb == CXL_RESOURCE_NONE) { + dev_dbg(&port->dev, "failed to add RCH dport, missing RCRB\n"); + return ERR_PTR(-EINVAL); + } + + dport = __devm_cxl_add_dport(port, dport_dev, port_id, + component_reg_phys, rcrb); + if (IS_ERR(dport)) { + dev_dbg(dport_dev, "failed to add RCH dport to %s: %ld\n", + dev_name(&port->dev), PTR_ERR(dport)); + } else { + dev_dbg(dport_dev, "RCH dport added to %s\n", + dev_name(&port->dev)); + } + + return dport; +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_add_rch_dport, CXL); + static int add_ep(struct cxl_ep *new) { struct cxl_port *port = new->dport->port; @@ -1122,47 +1212,6 @@ static void reap_dports(struct cxl_port *port) } } -int devm_cxl_add_endpoint(struct cxl_memdev *cxlmd, - struct cxl_dport *parent_dport) -{ - struct cxl_port *parent_port = parent_dport->port; - struct cxl_dev_state *cxlds = cxlmd->cxlds; - struct cxl_port *endpoint, *iter, *down; - int rc; - - /* - * Now that the path to the root is established record all the - * intervening ports in the chain. - */ - for (iter = parent_port, down = NULL; !is_cxl_root(iter); - down = iter, iter = to_cxl_port(iter->dev.parent)) { - struct cxl_ep *ep; - - ep = cxl_ep_load(iter, cxlmd); - ep->next = down; - } - - endpoint = devm_cxl_add_port(&parent_port->dev, &cxlmd->dev, - cxlds->component_reg_phys, parent_dport); - if (IS_ERR(endpoint)) - return PTR_ERR(endpoint); - - dev_dbg(&cxlmd->dev, "add: %s\n", dev_name(&endpoint->dev)); - - rc = cxl_endpoint_autoremove(cxlmd, endpoint); - if (rc) - return rc; - - if (!endpoint->dev.driver) { - dev_err(&cxlmd->dev, "%s failed probe\n", - dev_name(&endpoint->dev)); - return -ENXIO; - } - - return 0; -} -EXPORT_SYMBOL_NS_GPL(devm_cxl_add_endpoint, CXL); - static void cxl_detach_ep(void *data) { struct cxl_memdev *cxlmd = data; @@ -1243,7 +1292,7 @@ static resource_size_t find_component_registers(struct device *dev) pdev = to_pci_dev(dev); cxl_find_regblock(pdev, CXL_REGLOC_RBI_COMPONENT, &map); - return cxl_regmap_to_base(pdev, &map); + return map.resource; } static int add_port_attach_ep(struct cxl_memdev *cxlmd, @@ -1320,6 +1369,13 @@ int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd) struct device *iter; int rc; + /* + * Skip intermediate port enumeration in the RCH case, there + * are no ports in between a host bridge and an endpoint. + */ + if (cxlmd->cxlds->rcd) + return 0; + rc = devm_add_action_or_reset(&cxlmd->dev, cxl_detach_ep, cxlmd); if (rc) return rc; @@ -1428,7 +1484,7 @@ static int decoder_populate_targets(struct cxl_switch_decoder *cxlsd, return rc; } -static struct cxl_dport *cxl_hb_modulo(struct cxl_root_decoder *cxlrd, int pos) +struct cxl_dport *cxl_hb_modulo(struct cxl_root_decoder *cxlrd, int pos) { struct cxl_switch_decoder *cxlsd = &cxlrd->cxlsd; struct cxl_decoder *cxld = &cxlsd->cxld; @@ -1441,6 +1497,7 @@ static struct cxl_dport *cxl_hb_modulo(struct cxl_root_decoder *cxlrd, int pos) return cxlrd->cxlsd.target[pos % iw]; } +EXPORT_SYMBOL_NS_GPL(cxl_hb_modulo, CXL); static struct lock_class_key cxl_decoder_key; @@ -1502,6 +1559,7 @@ static int cxl_switch_decoder_init(struct cxl_port *port, * cxl_root_decoder_alloc - Allocate a root level decoder * @port: owning CXL root of this decoder * @nr_targets: static number of downstream targets + * @calc_hb: which host bridge covers the n'th position by granularity * * Return: A new cxl decoder to be registered by cxl_decoder_add(). A * 'CXL root' decoder is one that decodes from a top-level / static platform @@ -1509,7 +1567,8 @@ static int cxl_switch_decoder_init(struct cxl_port *port, * topology. */ struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port, - unsigned int nr_targets) + unsigned int nr_targets, + cxl_calc_hb_fn calc_hb) { struct cxl_root_decoder *cxlrd; struct cxl_switch_decoder *cxlsd; @@ -1531,7 +1590,7 @@ struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port, return ERR_PTR(rc); } - cxlrd->calc_hb = cxl_hb_modulo; + cxlrd->calc_hb = calc_hb; cxld = &cxlsd->cxld; cxld->dev.type = &cxl_decoder_root_type; @@ -1797,12 +1856,27 @@ static void cxl_bus_remove(struct device *dev) static struct workqueue_struct *cxl_bus_wq; -int cxl_bus_rescan(void) +static void cxl_bus_rescan_queue(struct work_struct *w) { - return bus_rescan_devices(&cxl_bus_type); + int rc = bus_rescan_devices(&cxl_bus_type); + + pr_debug("CXL bus rescan result: %d\n", rc); +} + +void cxl_bus_rescan(void) +{ + static DECLARE_WORK(rescan_work, cxl_bus_rescan_queue); + + queue_work(cxl_bus_wq, &rescan_work); } EXPORT_SYMBOL_NS_GPL(cxl_bus_rescan, CXL); +void cxl_bus_drain(void) +{ + drain_workqueue(cxl_bus_wq); +} +EXPORT_SYMBOL_NS_GPL(cxl_bus_drain, CXL); + bool schedule_cxl_memdev_detach(struct cxl_memdev *cxlmd) { return queue_work(cxl_bus_wq, &cxlmd->detach_work); diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index f9ae5ad284ff..02f28da519e3 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -324,7 +324,7 @@ static ssize_t interleave_ways_store(struct device *dev, if (rc) return rc; - rc = ways_to_cxl(val, &iw); + rc = ways_to_eiw(val, &iw); if (rc) return rc; @@ -391,7 +391,7 @@ static ssize_t interleave_granularity_store(struct device *dev, if (rc) return rc; - rc = granularity_to_cxl(val, &ig); + rc = granularity_to_eig(val, &ig); if (rc) return rc; @@ -1028,7 +1028,7 @@ static int cxl_port_setup_targets(struct cxl_port *port, parent_iw = parent_cxld->interleave_ways; } - rc = granularity_to_cxl(parent_ig, &peig); + rc = granularity_to_eig(parent_ig, &peig); if (rc) { dev_dbg(&cxlr->dev, "%s:%s: invalid parent granularity: %d\n", dev_name(parent_port->uport), @@ -1036,7 +1036,7 @@ static int cxl_port_setup_targets(struct cxl_port *port, return rc; } - rc = ways_to_cxl(parent_iw, &peiw); + rc = ways_to_eiw(parent_iw, &peiw); if (rc) { dev_dbg(&cxlr->dev, "%s:%s: invalid parent interleave: %d\n", dev_name(parent_port->uport), @@ -1045,7 +1045,7 @@ static int cxl_port_setup_targets(struct cxl_port *port, } iw = cxl_rr->nr_targets; - rc = ways_to_cxl(iw, &eiw); + rc = ways_to_eiw(iw, &eiw); if (rc) { dev_dbg(&cxlr->dev, "%s:%s: invalid port interleave: %d\n", dev_name(port->uport), dev_name(&port->dev), iw); @@ -1065,7 +1065,7 @@ static int cxl_port_setup_targets(struct cxl_port *port, eig = peig; } - rc = cxl_to_granularity(eig, &ig); + rc = eig_to_granularity(eig, &ig); if (rc) { dev_dbg(&cxlr->dev, "%s:%s: invalid interleave: %d\n", dev_name(port->uport), dev_name(&port->dev), @@ -1226,7 +1226,7 @@ static int cxl_region_attach(struct cxl_region *cxlr, struct cxl_endpoint_decoder *cxled_target; struct cxl_memdev *cxlmd_target; - cxled_target = p->targets[pos]; + cxled_target = p->targets[i]; if (!cxled_target) continue; @@ -1403,6 +1403,8 @@ static int attach_target(struct cxl_region *cxlr, const char *decoder, int pos) goto out; down_read(&cxl_dpa_rwsem); rc = cxl_region_attach(cxlr, to_cxl_endpoint_decoder(dev), pos); + if (rc == 0) + set_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags); up_read(&cxl_dpa_rwsem); up_write(&cxl_region_rwsem); out: @@ -1812,6 +1814,7 @@ static struct lock_class_key cxl_pmem_region_key; static struct cxl_pmem_region *cxl_pmem_region_alloc(struct cxl_region *cxlr) { struct cxl_region_params *p = &cxlr->params; + struct cxl_nvdimm_bridge *cxl_nvb; struct cxl_pmem_region *cxlr_pmem; struct device *dev; int i; @@ -1839,6 +1842,18 @@ static struct cxl_pmem_region *cxl_pmem_region_alloc(struct cxl_region *cxlr) struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i]; + /* + * Regions never span CXL root devices, so by definition the + * bridge for one device is the same for all. + */ + if (i == 0) { + cxl_nvb = cxl_find_nvdimm_bridge(&cxlmd->dev); + if (!cxl_nvb) { + cxlr_pmem = ERR_PTR(-ENODEV); + goto out; + } + cxlr->cxl_nvb = cxl_nvb; + } m->cxlmd = cxlmd; get_device(&cxlmd->dev); m->start = cxled->dpa_res->start; @@ -1848,6 +1863,7 @@ static struct cxl_pmem_region *cxl_pmem_region_alloc(struct cxl_region *cxlr) dev = &cxlr_pmem->dev; cxlr_pmem->cxlr = cxlr; + cxlr->cxlr_pmem = cxlr_pmem; device_initialize(dev); lockdep_set_class(&dev->mutex, &cxl_pmem_region_key); device_set_pm_not_required(dev); @@ -1860,9 +1876,36 @@ out: return cxlr_pmem; } -static void cxlr_pmem_unregister(void *dev) +static void cxlr_pmem_unregister(void *_cxlr_pmem) { - device_unregister(dev); + struct cxl_pmem_region *cxlr_pmem = _cxlr_pmem; + struct cxl_region *cxlr = cxlr_pmem->cxlr; + struct cxl_nvdimm_bridge *cxl_nvb = cxlr->cxl_nvb; + + /* + * Either the bridge is in ->remove() context under the device_lock(), + * or cxlr_release_nvdimm() is cancelling the bridge's release action + * for @cxlr_pmem and doing it itself (while manually holding the bridge + * lock). + */ + device_lock_assert(&cxl_nvb->dev); + cxlr->cxlr_pmem = NULL; + cxlr_pmem->cxlr = NULL; + device_unregister(&cxlr_pmem->dev); +} + +static void cxlr_release_nvdimm(void *_cxlr) +{ + struct cxl_region *cxlr = _cxlr; + struct cxl_nvdimm_bridge *cxl_nvb = cxlr->cxl_nvb; + + device_lock(&cxl_nvb->dev); + if (cxlr->cxlr_pmem) + devm_release_action(&cxl_nvb->dev, cxlr_pmem_unregister, + cxlr->cxlr_pmem); + device_unlock(&cxl_nvb->dev); + cxlr->cxl_nvb = NULL; + put_device(&cxl_nvb->dev); } /** @@ -1874,12 +1917,14 @@ static void cxlr_pmem_unregister(void *dev) static int devm_cxl_add_pmem_region(struct cxl_region *cxlr) { struct cxl_pmem_region *cxlr_pmem; + struct cxl_nvdimm_bridge *cxl_nvb; struct device *dev; int rc; cxlr_pmem = cxl_pmem_region_alloc(cxlr); if (IS_ERR(cxlr_pmem)) return PTR_ERR(cxlr_pmem); + cxl_nvb = cxlr->cxl_nvb; dev = &cxlr_pmem->dev; rc = dev_set_name(dev, "pmem_region%d", cxlr->id); @@ -1893,13 +1938,52 @@ static int devm_cxl_add_pmem_region(struct cxl_region *cxlr) dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent), dev_name(dev)); - return devm_add_action_or_reset(&cxlr->dev, cxlr_pmem_unregister, dev); + device_lock(&cxl_nvb->dev); + if (cxl_nvb->dev.driver) + rc = devm_add_action_or_reset(&cxl_nvb->dev, + cxlr_pmem_unregister, cxlr_pmem); + else + rc = -ENXIO; + device_unlock(&cxl_nvb->dev); + + if (rc) + goto err_bridge; + + /* @cxlr carries a reference on @cxl_nvb until cxlr_release_nvdimm */ + return devm_add_action_or_reset(&cxlr->dev, cxlr_release_nvdimm, cxlr); err: put_device(dev); +err_bridge: + put_device(&cxl_nvb->dev); + cxlr->cxl_nvb = NULL; return rc; } +static int cxl_region_invalidate_memregion(struct cxl_region *cxlr) +{ + if (!test_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags)) + return 0; + + if (!cpu_cache_has_invalidate_memregion()) { + if (IS_ENABLED(CONFIG_CXL_REGION_INVALIDATION_TEST)) { + dev_warn( + &cxlr->dev, + "Bypassing cpu_cache_invalidate_memregion() for testing!\n"); + clear_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags); + return 0; + } else { + dev_err(&cxlr->dev, + "Failed to synchronize CPU cache state\n"); + return -ENXIO; + } + } + + cpu_cache_invalidate_memregion(IORES_DESC_CXL); + clear_bit(CXL_REGION_F_INCOHERENT, &cxlr->flags); + return 0; +} + static int cxl_region_probe(struct device *dev) { struct cxl_region *cxlr = to_cxl_region(dev); @@ -1915,14 +1999,21 @@ static int cxl_region_probe(struct device *dev) if (p->state < CXL_CONFIG_COMMIT) { dev_dbg(&cxlr->dev, "config state: %d\n", p->state); rc = -ENXIO; + goto out; } + rc = cxl_region_invalidate_memregion(cxlr); + /* * From this point on any path that changes the region's state away from * CXL_CONFIG_COMMIT is also responsible for releasing the driver. */ +out: up_read(&cxl_region_rwsem); + if (rc) + return rc; + switch (cxlr->mode) { case CXL_DECODER_PMEM: return devm_cxl_add_pmem_region(cxlr); @@ -1950,4 +2041,5 @@ void cxl_region_exit(void) } MODULE_IMPORT_NS(CXL); +MODULE_IMPORT_NS(DEVMEM); MODULE_ALIAS_CXL(CXL_DEVICE_REGION); diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c index 39a129c57d40..1476a0299c9b 100644 --- a/drivers/cxl/core/regs.c +++ b/drivers/cxl/core/regs.c @@ -7,6 +7,8 @@ #include <cxlmem.h> #include <cxlpci.h> +#include "core.h" + /** * DOC: cxl registers * @@ -59,36 +61,48 @@ void cxl_probe_component_regs(struct device *dev, void __iomem *base, for (cap = 1; cap <= cap_count; cap++) { void __iomem *register_block; - u32 hdr; - int decoder_cnt; + struct cxl_reg_map *rmap; u16 cap_id, offset; - u32 length; + u32 length, hdr; hdr = readl(base + cap * 0x4); cap_id = FIELD_GET(CXL_CM_CAP_HDR_ID_MASK, hdr); offset = FIELD_GET(CXL_CM_CAP_PTR_MASK, hdr); register_block = base + offset; + hdr = readl(register_block); + rmap = NULL; switch (cap_id) { - case CXL_CM_CAP_CAP_ID_HDM: + case CXL_CM_CAP_CAP_ID_HDM: { + int decoder_cnt; + dev_dbg(dev, "found HDM decoder capability (0x%x)\n", offset); - hdr = readl(register_block); - decoder_cnt = cxl_hdm_decoder_count(hdr); length = 0x20 * decoder_cnt + 0x10; - - map->hdm_decoder.valid = true; - map->hdm_decoder.offset = CXL_CM_OFFSET + offset; - map->hdm_decoder.size = length; + rmap = &map->hdm_decoder; + break; + } + case CXL_CM_CAP_CAP_ID_RAS: + dev_dbg(dev, "found RAS capability (0x%x)\n", + offset); + length = CXL_RAS_CAPABILITY_LENGTH; + rmap = &map->ras; break; default: dev_dbg(dev, "Unknown CM cap ID: %d (0x%x)\n", cap_id, offset); break; } + + if (!rmap) + continue; + rmap->valid = true; + rmap->id = cap_id; + rmap->offset = CXL_CM_OFFSET + offset; + rmap->size = length; } } EXPORT_SYMBOL_NS_GPL(cxl_probe_component_regs, CXL); @@ -117,6 +131,7 @@ void cxl_probe_device_regs(struct device *dev, void __iomem *base, cap_count = FIELD_GET(CXLDEV_CAP_ARRAY_COUNT_MASK, cap_array); for (cap = 1; cap <= cap_count; cap++) { + struct cxl_reg_map *rmap; u32 offset, length; u16 cap_id; @@ -125,28 +140,22 @@ void cxl_probe_device_regs(struct device *dev, void __iomem *base, offset = readl(base + cap * 0x10 + 0x4); length = readl(base + cap * 0x10 + 0x8); + rmap = NULL; switch (cap_id) { case CXLDEV_CAP_CAP_ID_DEVICE_STATUS: dev_dbg(dev, "found Status capability (0x%x)\n", offset); - - map->status.valid = true; - map->status.offset = offset; - map->status.size = length; + rmap = &map->status; break; case CXLDEV_CAP_CAP_ID_PRIMARY_MAILBOX: dev_dbg(dev, "found Mailbox capability (0x%x)\n", offset); - map->mbox.valid = true; - map->mbox.offset = offset; - map->mbox.size = length; + rmap = &map->mbox; break; case CXLDEV_CAP_CAP_ID_SECONDARY_MAILBOX: dev_dbg(dev, "found Secondary Mailbox capability (0x%x)\n", offset); break; case CXLDEV_CAP_CAP_ID_MEMDEV: dev_dbg(dev, "found Memory Device capability (0x%x)\n", offset); - map->memdev.valid = true; - map->memdev.offset = offset; - map->memdev.size = length; + rmap = &map->memdev; break; default: if (cap_id >= 0x8000) @@ -155,6 +164,13 @@ void cxl_probe_device_regs(struct device *dev, void __iomem *base, dev_dbg(dev, "Unknown cap ID: %#x offset: %#x\n", cap_id, offset); break; } + + if (!rmap) + continue; + rmap->valid = true; + rmap->id = cap_id; + rmap->offset = offset; + rmap->size = length; } } EXPORT_SYMBOL_NS_GPL(cxl_probe_device_regs, CXL); @@ -165,6 +181,9 @@ void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr, void __iomem *ret_val; struct resource *res; + if (WARN_ON_ONCE(addr == CXL_RESOURCE_NONE)) + return NULL; + res = devm_request_mem_region(dev, addr, length, dev_name(dev)); if (!res) { resource_size_t end = addr + length - 1; @@ -180,67 +199,65 @@ void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr, return ret_val; } -int cxl_map_component_regs(struct pci_dev *pdev, - struct cxl_component_regs *regs, - struct cxl_register_map *map) +int cxl_map_component_regs(struct device *dev, struct cxl_component_regs *regs, + struct cxl_register_map *map, unsigned long map_mask) { - struct device *dev = &pdev->dev; - resource_size_t phys_addr; - resource_size_t length; - - phys_addr = pci_resource_start(pdev, map->barno); - phys_addr += map->block_offset; + struct mapinfo { + struct cxl_reg_map *rmap; + void __iomem **addr; + } mapinfo[] = { + { &map->component_map.hdm_decoder, ®s->hdm_decoder }, + { &map->component_map.ras, ®s->ras }, + }; + int i; + + for (i = 0; i < ARRAY_SIZE(mapinfo); i++) { + struct mapinfo *mi = &mapinfo[i]; + resource_size_t phys_addr; + resource_size_t length; - phys_addr += map->component_map.hdm_decoder.offset; - length = map->component_map.hdm_decoder.size; - regs->hdm_decoder = devm_cxl_iomap_block(dev, phys_addr, length); - if (!regs->hdm_decoder) - return -ENOMEM; + if (!mi->rmap->valid) + continue; + if (!test_bit(mi->rmap->id, &map_mask)) + continue; + phys_addr = map->resource + mi->rmap->offset; + length = mi->rmap->size; + *(mi->addr) = devm_cxl_iomap_block(dev, phys_addr, length); + if (!*(mi->addr)) + return -ENOMEM; + } return 0; } EXPORT_SYMBOL_NS_GPL(cxl_map_component_regs, CXL); -int cxl_map_device_regs(struct pci_dev *pdev, +int cxl_map_device_regs(struct device *dev, struct cxl_device_regs *regs, struct cxl_register_map *map) { - struct device *dev = &pdev->dev; - resource_size_t phys_addr; - - phys_addr = pci_resource_start(pdev, map->barno); - phys_addr += map->block_offset; - - if (map->device_map.status.valid) { - resource_size_t addr; + resource_size_t phys_addr = map->resource; + struct mapinfo { + struct cxl_reg_map *rmap; + void __iomem **addr; + } mapinfo[] = { + { &map->device_map.status, ®s->status, }, + { &map->device_map.mbox, ®s->mbox, }, + { &map->device_map.memdev, ®s->memdev, }, + }; + int i; + + for (i = 0; i < ARRAY_SIZE(mapinfo); i++) { + struct mapinfo *mi = &mapinfo[i]; resource_size_t length; - - addr = phys_addr + map->device_map.status.offset; - length = map->device_map.status.size; - regs->status = devm_cxl_iomap_block(dev, addr, length); - if (!regs->status) - return -ENOMEM; - } - - if (map->device_map.mbox.valid) { resource_size_t addr; - resource_size_t length; - addr = phys_addr + map->device_map.mbox.offset; - length = map->device_map.mbox.size; - regs->mbox = devm_cxl_iomap_block(dev, addr, length); - if (!regs->mbox) - return -ENOMEM; - } - - if (map->device_map.memdev.valid) { - resource_size_t addr; - resource_size_t length; + if (!mi->rmap->valid) + continue; - addr = phys_addr + map->device_map.memdev.offset; - length = map->device_map.memdev.size; - regs->memdev = devm_cxl_iomap_block(dev, addr, length); - if (!regs->memdev) + addr = phys_addr + mi->rmap->offset; + length = mi->rmap->size; + *(mi->addr) = devm_cxl_iomap_block(dev, addr, length); + if (!*(mi->addr)) return -ENOMEM; } @@ -248,13 +265,24 @@ int cxl_map_device_regs(struct pci_dev *pdev, } EXPORT_SYMBOL_NS_GPL(cxl_map_device_regs, CXL); -static void cxl_decode_regblock(u32 reg_lo, u32 reg_hi, +static bool cxl_decode_regblock(struct pci_dev *pdev, u32 reg_lo, u32 reg_hi, struct cxl_register_map *map) { - map->block_offset = ((u64)reg_hi << 32) | - (reg_lo & CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK); - map->barno = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BIR_MASK, reg_lo); + int bar = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BIR_MASK, reg_lo); + u64 offset = ((u64)reg_hi << 32) | + (reg_lo & CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK); + + if (offset > pci_resource_len(pdev, bar)) { + dev_warn(&pdev->dev, + "BAR%d: %pr: too small (offset: %pa, type: %d)\n", bar, + &pdev->resource[bar], &offset, map->reg_type); + return false; + } + map->reg_type = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK, reg_lo); + map->resource = pci_resource_start(pdev, bar) + offset; + map->max_size = pci_resource_len(pdev, bar) - offset; + return true; } /** @@ -274,7 +302,7 @@ int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type, u32 regloc_size, regblocks; int regloc, i; - map->block_offset = U64_MAX; + map->resource = CXL_RESOURCE_NONE; regloc = pci_find_dvsec_capability(pdev, PCI_DVSEC_VENDOR_ID_CXL, CXL_DVSEC_REG_LOCATOR); if (!regloc) @@ -292,13 +320,79 @@ int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type, pci_read_config_dword(pdev, regloc, ®_lo); pci_read_config_dword(pdev, regloc + 4, ®_hi); - cxl_decode_regblock(reg_lo, reg_hi, map); + if (!cxl_decode_regblock(pdev, reg_lo, reg_hi, map)) + continue; if (map->reg_type == type) return 0; } - map->block_offset = U64_MAX; + map->resource = CXL_RESOURCE_NONE; return -ENODEV; } EXPORT_SYMBOL_NS_GPL(cxl_find_regblock, CXL); + +resource_size_t cxl_rcrb_to_component(struct device *dev, + resource_size_t rcrb, + enum cxl_rcrb which) +{ + resource_size_t component_reg_phys; + void __iomem *addr; + u32 bar0, bar1; + u16 cmd; + u32 id; + + if (which == CXL_RCRB_UPSTREAM) + rcrb += SZ_4K; + + /* + * RCRB's BAR[0..1] point to component block containing CXL + * subsystem component registers. MEMBAR extraction follows + * the PCI Base spec here, esp. 64 bit extraction and memory + * ranges alignment (6.0, 7.5.1.2.1). + */ + if (!request_mem_region(rcrb, SZ_4K, "CXL RCRB")) + return CXL_RESOURCE_NONE; + addr = ioremap(rcrb, SZ_4K); + if (!addr) { + dev_err(dev, "Failed to map region %pr\n", addr); + release_mem_region(rcrb, SZ_4K); + return CXL_RESOURCE_NONE; + } + + id = readl(addr + PCI_VENDOR_ID); + cmd = readw(addr + PCI_COMMAND); + bar0 = readl(addr + PCI_BASE_ADDRESS_0); + bar1 = readl(addr + PCI_BASE_ADDRESS_1); + iounmap(addr); + release_mem_region(rcrb, SZ_4K); + + /* + * Sanity check, see CXL 3.0 Figure 9-8 CXL Device that Does Not + * Remap Upstream Port and Component Registers + */ + if (id == U32_MAX) { + if (which == CXL_RCRB_DOWNSTREAM) + dev_err(dev, "Failed to access Downstream Port RCRB\n"); + return CXL_RESOURCE_NONE; + } + if (!(cmd & PCI_COMMAND_MEMORY)) + return CXL_RESOURCE_NONE; + /* The RCRB is a Memory Window, and the MEM_TYPE_1M bit is obsolete */ + if (bar0 & (PCI_BASE_ADDRESS_MEM_TYPE_1M | PCI_BASE_ADDRESS_SPACE_IO)) + return CXL_RESOURCE_NONE; + + component_reg_phys = bar0 & PCI_BASE_ADDRESS_MEM_MASK; + if (bar0 & PCI_BASE_ADDRESS_MEM_TYPE_64) + component_reg_phys |= ((u64)bar1) << 32; + + if (!component_reg_phys) + return CXL_RESOURCE_NONE; + + /* MEMBAR is block size (64k) aligned. */ + if (!IS_ALIGNED(component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE)) + return CXL_RESOURCE_NONE; + + return component_reg_phys; +} +EXPORT_SYMBOL_NS_GPL(cxl_rcrb_to_component, CXL); diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index ac75554b5d76..1b1cf459ac77 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -33,6 +33,7 @@ #define CXL_CM_CAP_HDR_ARRAY_SIZE_MASK GENMASK(31, 24) #define CXL_CM_CAP_PTR_MASK GENMASK(31, 20) +#define CXL_CM_CAP_CAP_ID_RAS 0x2 #define CXL_CM_CAP_CAP_ID_HDM 0x5 #define CXL_CM_CAP_CAP_HDM_VERSION 1 @@ -61,6 +62,10 @@ #define CXL_HDM_DECODER0_SKIP_LOW(i) CXL_HDM_DECODER0_TL_LOW(i) #define CXL_HDM_DECODER0_SKIP_HIGH(i) CXL_HDM_DECODER0_TL_HIGH(i) +/* HDM decoder control register constants CXL 3.0 8.2.5.19.7 */ +#define CXL_DECODER_MIN_GRANULARITY 256 +#define CXL_DECODER_MAX_ENCODED_IG 6 + static inline int cxl_hdm_decoder_count(u32 cap_hdr) { int val = FIELD_GET(CXL_HDM_DECODER_COUNT_MASK, cap_hdr); @@ -69,23 +74,23 @@ static inline int cxl_hdm_decoder_count(u32 cap_hdr) } /* Encode defined in CXL 2.0 8.2.5.12.7 HDM Decoder Control Register */ -static inline int cxl_to_granularity(u16 ig, unsigned int *val) +static inline int eig_to_granularity(u16 eig, unsigned int *granularity) { - if (ig > 6) + if (eig > CXL_DECODER_MAX_ENCODED_IG) return -EINVAL; - *val = 256 << ig; + *granularity = CXL_DECODER_MIN_GRANULARITY << eig; return 0; } /* Encode defined in CXL ECN "3, 6, 12 and 16-way memory Interleaving" */ -static inline int cxl_to_ways(u8 eniw, unsigned int *val) +static inline int eiw_to_ways(u8 eiw, unsigned int *ways) { - switch (eniw) { + switch (eiw) { case 0 ... 4: - *val = 1 << eniw; + *ways = 1 << eiw; break; case 8 ... 10: - *val = 3 << (eniw - 8); + *ways = 3 << (eiw - 8); break; default: return -EINVAL; @@ -94,20 +99,21 @@ static inline int cxl_to_ways(u8 eniw, unsigned int *val) return 0; } -static inline int granularity_to_cxl(int g, u16 *ig) +static inline int granularity_to_eig(int granularity, u16 *eig) { - if (g > SZ_16K || g < 256 || !is_power_of_2(g)) + if (granularity > SZ_16K || granularity < CXL_DECODER_MIN_GRANULARITY || + !is_power_of_2(granularity)) return -EINVAL; - *ig = ilog2(g) - 8; + *eig = ilog2(granularity) - 8; return 0; } -static inline int ways_to_cxl(unsigned int ways, u8 *iw) +static inline int ways_to_eiw(unsigned int ways, u8 *eiw) { if (ways > 16) return -EINVAL; if (is_power_of_2(ways)) { - *iw = ilog2(ways); + *eiw = ilog2(ways); return 0; } if (ways % 3) @@ -115,10 +121,26 @@ static inline int ways_to_cxl(unsigned int ways, u8 *iw) ways /= 3; if (!is_power_of_2(ways)) return -EINVAL; - *iw = ilog2(ways) + 8; + *eiw = ilog2(ways) + 8; return 0; } +/* RAS Registers CXL 2.0 8.2.5.9 CXL RAS Capability Structure */ +#define CXL_RAS_UNCORRECTABLE_STATUS_OFFSET 0x0 +#define CXL_RAS_UNCORRECTABLE_STATUS_MASK (GENMASK(16, 14) | GENMASK(11, 0)) +#define CXL_RAS_UNCORRECTABLE_MASK_OFFSET 0x4 +#define CXL_RAS_UNCORRECTABLE_MASK_MASK (GENMASK(16, 14) | GENMASK(11, 0)) +#define CXL_RAS_UNCORRECTABLE_SEVERITY_OFFSET 0x8 +#define CXL_RAS_UNCORRECTABLE_SEVERITY_MASK (GENMASK(16, 14) | GENMASK(11, 0)) +#define CXL_RAS_CORRECTABLE_STATUS_OFFSET 0xC +#define CXL_RAS_CORRECTABLE_STATUS_MASK GENMASK(6, 0) +#define CXL_RAS_CORRECTABLE_MASK_OFFSET 0x10 +#define CXL_RAS_CORRECTABLE_MASK_MASK GENMASK(6, 0) +#define CXL_RAS_CAP_CONTROL_OFFSET 0x14 +#define CXL_RAS_CAP_CONTROL_FE_MASK GENMASK(5, 0) +#define CXL_RAS_HEADER_LOG_OFFSET 0x18 +#define CXL_RAS_CAPABILITY_LENGTH 0x58 + /* CXL 2.0 8.2.8.1 Device Capabilities Array Register */ #define CXLDEV_CAP_ARRAY_OFFSET 0x0 #define CXLDEV_CAP_ARRAY_CAP_ID 0 @@ -153,9 +175,11 @@ struct cxl_regs { /* * Common set of CXL Component register block base pointers * @hdm_decoder: CXL 2.0 8.2.5.12 CXL HDM Decoder Capability Structure + * @ras: CXL 2.0 8.2.5.9 CXL RAS Capability Structure */ struct_group_tagged(cxl_component_regs, component, void __iomem *hdm_decoder; + void __iomem *ras; ); /* * Common set of CXL Device register block base pointers @@ -170,12 +194,14 @@ struct cxl_regs { struct cxl_reg_map { bool valid; + int id; unsigned long offset; unsigned long size; }; struct cxl_component_reg_map { struct cxl_reg_map hdm_decoder; + struct cxl_reg_map ras; }; struct cxl_device_reg_map { @@ -187,17 +213,17 @@ struct cxl_device_reg_map { /** * struct cxl_register_map - DVSEC harvested register block mapping parameters * @base: virtual base of the register-block-BAR + @block_offset - * @block_offset: offset to start of register block in @barno + * @resource: physical resource base of the register block + * @max_size: maximum mapping size to perform register search * @reg_type: see enum cxl_regloc_type - * @barno: PCI BAR number containing the register block * @component_map: cxl_reg_map for component registers * @device_map: cxl_reg_maps for device registers */ struct cxl_register_map { void __iomem *base; - u64 block_offset; + resource_size_t resource; + resource_size_t max_size; u8 reg_type; - u8 barno; union { struct cxl_component_reg_map component_map; struct cxl_device_reg_map device_map; @@ -208,18 +234,23 @@ void cxl_probe_component_regs(struct device *dev, void __iomem *base, struct cxl_component_reg_map *map); void cxl_probe_device_regs(struct device *dev, void __iomem *base, struct cxl_device_reg_map *map); -int cxl_map_component_regs(struct pci_dev *pdev, - struct cxl_component_regs *regs, - struct cxl_register_map *map); -int cxl_map_device_regs(struct pci_dev *pdev, - struct cxl_device_regs *regs, +int cxl_map_component_regs(struct device *dev, struct cxl_component_regs *regs, + struct cxl_register_map *map, + unsigned long map_mask); +int cxl_map_device_regs(struct device *dev, struct cxl_device_regs *regs, struct cxl_register_map *map); enum cxl_regloc_type; int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type, struct cxl_register_map *map); -void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr, - resource_size_t length); + +enum cxl_rcrb { + CXL_RCRB_DOWNSTREAM, + CXL_RCRB_UPSTREAM, +}; +resource_size_t cxl_rcrb_to_component(struct device *dev, + resource_size_t rcrb, + enum cxl_rcrb which); #define CXL_RESOURCE_NONE ((resource_size_t) -1) #define CXL_TARGET_STRLEN 20 @@ -248,7 +279,6 @@ enum cxl_decoder_type { */ #define CXL_DECODER_MAX_INTERLEAVE 16 -#define CXL_DECODER_MIN_GRANULARITY 256 /** * struct cxl_decoder - Common CXL HDM Decoder Attributes @@ -324,18 +354,23 @@ struct cxl_switch_decoder { struct cxl_dport *target[]; }; +struct cxl_root_decoder; +typedef struct cxl_dport *(*cxl_calc_hb_fn)(struct cxl_root_decoder *cxlrd, + int pos); /** * struct cxl_root_decoder - Static platform CXL address decoder * @res: host / parent resource for region allocations * @region_id: region id for next region provisioning event * @calc_hb: which host bridge covers the n'th position by granularity + * @platform_data: platform specific configuration data * @cxlsd: base cxl switch decoder */ struct cxl_root_decoder { struct resource *res; atomic_t region_id; - struct cxl_dport *(*calc_hb)(struct cxl_root_decoder *cxlrd, int pos); + cxl_calc_hb_fn calc_hb; + void *platform_data; struct cxl_switch_decoder cxlsd; }; @@ -379,12 +414,21 @@ struct cxl_region_params { int nr_targets; }; +/* + * Flag whether this region needs to have its HPA span synchronized with + * CPU cache state at region activation time. + */ +#define CXL_REGION_F_INCOHERENT 0 + /** * struct cxl_region - CXL region * @dev: This region's device * @id: This region's id. Id is globally unique across all regions * @mode: Endpoint decoder allocation / access mode * @type: Endpoint decoder target type + * @cxl_nvb: nvdimm bridge for coordinating @cxlr_pmem setup / shutdown + * @cxlr_pmem: (for pmem regions) cached copy of the nvdimm bridge + * @flags: Region state flags * @params: active + config params for the region */ struct cxl_region { @@ -392,38 +436,26 @@ struct cxl_region { int id; enum cxl_decoder_mode mode; enum cxl_decoder_type type; + struct cxl_nvdimm_bridge *cxl_nvb; + struct cxl_pmem_region *cxlr_pmem; + unsigned long flags; struct cxl_region_params params; }; -/** - * enum cxl_nvdimm_brige_state - state machine for managing bus rescans - * @CXL_NVB_NEW: Set at bridge create and after cxl_pmem_wq is destroyed - * @CXL_NVB_DEAD: Set at brige unregistration to preclude async probing - * @CXL_NVB_ONLINE: Target state after successful ->probe() - * @CXL_NVB_OFFLINE: Target state after ->remove() or failed ->probe() - */ -enum cxl_nvdimm_brige_state { - CXL_NVB_NEW, - CXL_NVB_DEAD, - CXL_NVB_ONLINE, - CXL_NVB_OFFLINE, -}; - struct cxl_nvdimm_bridge { int id; struct device dev; struct cxl_port *port; struct nvdimm_bus *nvdimm_bus; struct nvdimm_bus_descriptor nd_desc; - struct work_struct state_work; - enum cxl_nvdimm_brige_state state; }; +#define CXL_DEV_ID_LEN 19 + struct cxl_nvdimm { struct device dev; struct cxl_memdev *cxlmd; - struct cxl_nvdimm_bridge *bridge; - struct xarray pmem_regions; + u8 dev_id[CXL_DEV_ID_LEN]; /* for nvdimm, string of 'serial' */ }; struct cxl_pmem_region_mapping { @@ -438,7 +470,6 @@ struct cxl_pmem_region { struct device dev; struct cxl_region *cxlr; struct nd_region *nd_region; - struct cxl_nvdimm_bridge *bridge; struct range hpa_range; int nr_mappings; struct cxl_pmem_region_mapping mapping[]; @@ -500,12 +531,16 @@ cxl_find_dport_by_dev(struct cxl_port *port, const struct device *dport_dev) * @dport: PCI bridge or firmware device representing the downstream link * @port_id: unique hardware identifier for dport in decoder target list * @component_reg_phys: downstream port component registers + * @rcrb: base address for the Root Complex Register Block + * @rch: Indicate whether this dport was enumerated in RCH or VH mode * @port: reference to cxl_port that contains this downstream port */ struct cxl_dport { struct device *dport; int port_id; resource_size_t component_reg_phys; + resource_size_t rcrb; + bool rch; struct cxl_port *port; }; @@ -562,11 +597,10 @@ struct pci_bus *cxl_port_to_pci_bus(struct cxl_port *port); struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport, resource_size_t component_reg_phys, struct cxl_dport *parent_dport); -int devm_cxl_add_endpoint(struct cxl_memdev *cxlmd, - struct cxl_dport *parent_dport); struct cxl_port *find_cxl_root(struct device *dev); int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd); -int cxl_bus_rescan(void); +void cxl_bus_rescan(void); +void cxl_bus_drain(void); struct cxl_port *cxl_mem_find_port(struct cxl_memdev *cxlmd, struct cxl_dport **dport); bool schedule_cxl_memdev_detach(struct cxl_memdev *cxlmd); @@ -574,6 +608,10 @@ bool schedule_cxl_memdev_detach(struct cxl_memdev *cxlmd); struct cxl_dport *devm_cxl_add_dport(struct cxl_port *port, struct device *dport, int port_id, resource_size_t component_reg_phys); +struct cxl_dport *devm_cxl_add_rch_dport(struct cxl_port *port, + struct device *dport_dev, int port_id, + resource_size_t component_reg_phys, + resource_size_t rcrb); struct cxl_decoder *to_cxl_decoder(struct device *dev); struct cxl_root_decoder *to_cxl_root_decoder(struct device *dev); @@ -581,7 +619,9 @@ struct cxl_endpoint_decoder *to_cxl_endpoint_decoder(struct device *dev); bool is_root_decoder(struct device *dev); bool is_endpoint_decoder(struct device *dev); struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port, - unsigned int nr_targets); + unsigned int nr_targets, + cxl_calc_hb_fn calc_hb); +struct cxl_dport *cxl_hb_modulo(struct cxl_root_decoder *cxlrd, int pos); struct cxl_switch_decoder *cxl_switch_decoder_alloc(struct cxl_port *port, unsigned int nr_targets); int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map); @@ -637,7 +677,7 @@ struct cxl_nvdimm_bridge *devm_cxl_add_nvdimm_bridge(struct device *host, struct cxl_nvdimm *to_cxl_nvdimm(struct device *dev); bool is_cxl_nvdimm(struct device *dev); bool is_cxl_nvdimm_bridge(struct device *dev); -int devm_cxl_add_nvdimm(struct device *host, struct cxl_memdev *cxlmd); +int devm_cxl_add_nvdimm(struct cxl_memdev *cxlmd); struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct device *dev); #ifdef CONFIG_CXL_REGION diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 88e3a8e54b6a..ab138004f644 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -35,6 +35,8 @@ * @cdev: char dev core object for ioctl operations * @cxlds: The device state backing this device * @detach_work: active memdev lost a port in its ancestry + * @cxl_nvb: coordinate removal of @cxl_nvd if present + * @cxl_nvd: optional bridge to an nvdimm if the device supports pmem * @id: id number of this memdev instance. */ struct cxl_memdev { @@ -42,6 +44,8 @@ struct cxl_memdev { struct cdev cdev; struct cxl_dev_state *cxlds; struct work_struct detach_work; + struct cxl_nvdimm_bridge *cxl_nvb; + struct cxl_nvdimm *cxl_nvd; int id; }; @@ -76,6 +80,15 @@ static inline bool is_cxl_endpoint(struct cxl_port *port) struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds); +static inline struct cxl_ep *cxl_ep_load(struct cxl_port *port, + struct cxl_memdev *cxlmd) +{ + if (!port) + return NULL; + + return xa_load(&port->endpoints, (unsigned long)&cxlmd->dev); +} + /** * struct cxl_mbox_cmd - A command to be submitted to hardware. * @opcode: (input) The command set and command submitted to hardware. @@ -88,6 +101,7 @@ struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds); * outputs commands this is always expected to be deterministic. For * variable sized output commands, it tells the exact number of bytes * written. + * @min_out: (input) internal command output payload size validation * @return_code: (output) Error code returned from hardware. * * This is the primary mechanism used to send commands to the hardware. @@ -102,6 +116,7 @@ struct cxl_mbox_cmd { void *payload_out; size_t size_in; size_t size_out; + size_t min_out; u16 return_code; }; @@ -186,8 +201,10 @@ struct cxl_endpoint_dvsec_info { * Currently only memory devices are represented. * * @dev: The device associated with this CXL state + * @cxlmd: The device representing the CXL.mem capabilities of @dev * @regs: Parsed register blocks * @cxl_dvsec: Offset to the PCIe device DVSEC + * @rcd: operating in RCD mode (CXL 3.0 9.11.8 CXL Devices Attached to an RCH) * @payload_size: Size of space for payload * (CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register) * @lsa_size: Size of Label Storage Area @@ -218,10 +235,12 @@ struct cxl_endpoint_dvsec_info { */ struct cxl_dev_state { struct device *dev; + struct cxl_memdev *cxlmd; struct cxl_regs regs; int cxl_dvsec; + bool rcd; size_t payload_size; size_t lsa_size; struct mutex mbox_mutex; /* Protects device mailbox and firmware */ @@ -273,6 +292,12 @@ enum cxl_opcode { CXL_MBOX_OP_GET_SCAN_MEDIA_CAPS = 0x4303, CXL_MBOX_OP_SCAN_MEDIA = 0x4304, CXL_MBOX_OP_GET_SCAN_MEDIA = 0x4305, + CXL_MBOX_OP_GET_SECURITY_STATE = 0x4500, + CXL_MBOX_OP_SET_PASSPHRASE = 0x4501, + CXL_MBOX_OP_DISABLE_PASSPHRASE = 0x4502, + CXL_MBOX_OP_UNLOCK = 0x4503, + CXL_MBOX_OP_FREEZE_SECURITY = 0x4504, + CXL_MBOX_OP_PASSPHRASE_SECURE_ERASE = 0x4505, CXL_MBOX_OP_MAX = 0x10000 }; @@ -372,8 +397,43 @@ struct cxl_mem_command { #define CXL_CMD_FLAG_FORCE_ENABLE BIT(0) }; -int cxl_mbox_send_cmd(struct cxl_dev_state *cxlds, u16 opcode, void *in, - size_t in_size, void *out, size_t out_size); +#define CXL_PMEM_SEC_STATE_USER_PASS_SET 0x01 +#define CXL_PMEM_SEC_STATE_MASTER_PASS_SET 0x02 +#define CXL_PMEM_SEC_STATE_LOCKED 0x04 +#define CXL_PMEM_SEC_STATE_FROZEN 0x08 +#define CXL_PMEM_SEC_STATE_USER_PLIMIT 0x10 +#define CXL_PMEM_SEC_STATE_MASTER_PLIMIT 0x20 + +/* set passphrase input payload */ +struct cxl_set_pass { + u8 type; + u8 reserved[31]; + /* CXL field using NVDIMM define, same length */ + u8 old_pass[NVDIMM_PASSPHRASE_LEN]; + u8 new_pass[NVDIMM_PASSPHRASE_LEN]; +} __packed; + +/* disable passphrase input payload */ +struct cxl_disable_pass { + u8 type; + u8 reserved[31]; + u8 pass[NVDIMM_PASSPHRASE_LEN]; +} __packed; + +/* passphrase secure erase payload */ +struct cxl_pass_erase { + u8 type; + u8 reserved[31]; + u8 pass[NVDIMM_PASSPHRASE_LEN]; +} __packed; + +enum { + CXL_PMEM_SEC_PASS_MASTER = 0, + CXL_PMEM_SEC_PASS_USER, +}; + +int cxl_internal_send_cmd(struct cxl_dev_state *cxlds, + struct cxl_mbox_cmd *cmd); int cxl_dev_state_identify(struct cxl_dev_state *cxlds); int cxl_await_media_ready(struct cxl_dev_state *cxlds); int cxl_enumerate_cmds(struct cxl_dev_state *cxlds); diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index eec597dbe763..920909791bb9 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -62,15 +62,6 @@ enum cxl_regloc_type { CXL_REGLOC_RBI_TYPES }; -static inline resource_size_t cxl_regmap_to_base(struct pci_dev *pdev, - struct cxl_register_map *map) -{ - if (map->block_offset == U64_MAX) - return CXL_RESOURCE_NONE; - - return pci_resource_start(pdev, map->barno) + map->block_offset; -} - int devm_cxl_port_enumerate_dports(struct cxl_port *port); struct cxl_dev_state; int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm); diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 64ccf053d32c..39c4b54f0715 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -45,9 +45,60 @@ static int cxl_mem_dpa_show(struct seq_file *file, void *data) return 0; } +static int devm_cxl_add_endpoint(struct device *host, struct cxl_memdev *cxlmd, + struct cxl_dport *parent_dport) +{ + struct cxl_port *parent_port = parent_dport->port; + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_port *endpoint, *iter, *down; + resource_size_t component_reg_phys; + int rc; + + /* + * Now that the path to the root is established record all the + * intervening ports in the chain. + */ + for (iter = parent_port, down = NULL; !is_cxl_root(iter); + down = iter, iter = to_cxl_port(iter->dev.parent)) { + struct cxl_ep *ep; + + ep = cxl_ep_load(iter, cxlmd); + ep->next = down; + } + + /* + * The component registers for an RCD might come from the + * host-bridge RCRB if they are not already mapped via the + * typical register locator mechanism. + */ + if (parent_dport->rch && cxlds->component_reg_phys == CXL_RESOURCE_NONE) + component_reg_phys = cxl_rcrb_to_component( + &cxlmd->dev, parent_dport->rcrb, CXL_RCRB_UPSTREAM); + else + component_reg_phys = cxlds->component_reg_phys; + endpoint = devm_cxl_add_port(host, &cxlmd->dev, component_reg_phys, + parent_dport); + if (IS_ERR(endpoint)) + return PTR_ERR(endpoint); + + rc = cxl_endpoint_autoremove(cxlmd, endpoint); + if (rc) + return rc; + + if (!endpoint->dev.driver) { + dev_err(&cxlmd->dev, "%s failed probe\n", + dev_name(&endpoint->dev)); + return -ENXIO; + } + + return 0; +} + static int cxl_mem_probe(struct device *dev) { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct device *endpoint_parent; struct cxl_port *parent_port; struct cxl_dport *dport; struct dentry *dentry; @@ -80,21 +131,34 @@ static int cxl_mem_probe(struct device *dev) return -ENXIO; } - device_lock(&parent_port->dev); - if (!parent_port->dev.driver) { + if (dport->rch) + endpoint_parent = parent_port->uport; + else + endpoint_parent = &parent_port->dev; + + device_lock(endpoint_parent); + if (!endpoint_parent->driver) { dev_err(dev, "CXL port topology %s not enabled\n", - dev_name(&parent_port->dev)); + dev_name(endpoint_parent)); rc = -ENXIO; goto unlock; } - rc = devm_cxl_add_endpoint(cxlmd, dport); + rc = devm_cxl_add_endpoint(endpoint_parent, cxlmd, dport); unlock: - device_unlock(&parent_port->dev); + device_unlock(endpoint_parent); put_device(&parent_port->dev); if (rc) return rc; + if (resource_size(&cxlds->pmem_res) && IS_ENABLED(CONFIG_CXL_PMEM)) { + rc = devm_cxl_add_nvdimm(cxlmd); + if (rc == -ENODEV) + dev_info(dev, "PMEM disabled by platform\n"); + else + return rc; + } + /* * The kernel may be operating out of CXL memory on this device, * there is no spec defined way to determine whether this device diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index faeb5d9d7a7a..33083a522fd1 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -9,10 +9,13 @@ #include <linux/list.h> #include <linux/pci.h> #include <linux/pci-doe.h> +#include <linux/aer.h> #include <linux/io.h> #include "cxlmem.h" #include "cxlpci.h" #include "cxl.h" +#define CREATE_TRACE_POINTS +#include <trace/events/cxl.h> /** * DOC: cxl pci @@ -276,35 +279,22 @@ static int cxl_pci_setup_mailbox(struct cxl_dev_state *cxlds) static int cxl_map_regblock(struct pci_dev *pdev, struct cxl_register_map *map) { - void __iomem *addr; - int bar = map->barno; struct device *dev = &pdev->dev; - resource_size_t offset = map->block_offset; - - /* Basic sanity check that BAR is big enough */ - if (pci_resource_len(pdev, bar) < offset) { - dev_err(dev, "BAR%d: %pr: too small (offset: %pa)\n", bar, - &pdev->resource[bar], &offset); - return -ENXIO; - } - addr = pci_iomap(pdev, bar, 0); - if (!addr) { + map->base = ioremap(map->resource, map->max_size); + if (!map->base) { dev_err(dev, "failed to map registers\n"); return -ENOMEM; } - dev_dbg(dev, "Mapped CXL Memory Device resource bar %u @ %pa\n", - bar, &offset); - - map->base = addr + map->block_offset; + dev_dbg(dev, "Mapped CXL Memory Device resource %pa\n", &map->resource); return 0; } static void cxl_unmap_regblock(struct pci_dev *pdev, struct cxl_register_map *map) { - pci_iounmap(pdev, map->base - map->block_offset); + iounmap(map->base); map->base = NULL; } @@ -324,6 +314,9 @@ static int cxl_probe_regs(struct pci_dev *pdev, struct cxl_register_map *map) return -ENXIO; } + if (!comp_map->ras.valid) + dev_dbg(dev, "RAS registers not found\n"); + dev_dbg(dev, "Set up component registers\n"); break; case CXL_REGLOC_RBI_MEMDEV: @@ -347,27 +340,6 @@ static int cxl_probe_regs(struct pci_dev *pdev, struct cxl_register_map *map) return 0; } -static int cxl_map_regs(struct cxl_dev_state *cxlds, struct cxl_register_map *map) -{ - struct device *dev = cxlds->dev; - struct pci_dev *pdev = to_pci_dev(dev); - - switch (map->reg_type) { - case CXL_REGLOC_RBI_COMPONENT: - cxl_map_component_regs(pdev, &cxlds->regs.component, map); - dev_dbg(dev, "Mapping component registers...\n"); - break; - case CXL_REGLOC_RBI_MEMDEV: - cxl_map_device_regs(pdev, &cxlds->regs.device_regs, map); - dev_dbg(dev, "Probing device registers...\n"); - break; - default: - break; - } - - return 0; -} - static int cxl_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, struct cxl_register_map *map) { @@ -418,6 +390,11 @@ static void devm_cxl_pci_create_doe(struct cxl_dev_state *cxlds) continue; } + if (!pci_request_config_region_exclusive(pdev, off, + PCI_DOE_CAP_SIZEOF, + dev_name(dev))) + pci_err(pdev, "Failed to exclude DOE registers\n"); + if (xa_insert(&cxlds->doe_mbs, off, doe_mb, GFP_KERNEL)) { dev_err(dev, "xa_insert failed to insert MB @ %x\n", off); @@ -428,6 +405,20 @@ static void devm_cxl_pci_create_doe(struct cxl_dev_state *cxlds) } } +/* + * Assume that any RCIEP that emits the CXL memory expander class code + * is an RCD + */ +static bool is_cxl_restricted(struct pci_dev *pdev) +{ + return pci_pcie_type(pdev) == PCI_EXP_TYPE_RC_END; +} + +static void disable_aer(void *pdev) +{ + pci_disable_pcie_error_reporting(pdev); +} + static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct cxl_register_map map; @@ -449,7 +440,9 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) cxlds = cxl_dev_state_create(&pdev->dev); if (IS_ERR(cxlds)) return PTR_ERR(cxlds); + pci_set_drvdata(pdev, cxlds); + cxlds->rcd = is_cxl_restricted(pdev); cxlds->serial = pci_get_dsn(pdev); cxlds->cxl_dvsec = pci_find_dvsec_capability( pdev, PCI_DVSEC_VENDOR_ID_CXL, CXL_DVSEC_PCIE_DEVICE); @@ -461,7 +454,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) return rc; - rc = cxl_map_regs(cxlds, &map); + rc = cxl_map_device_regs(&pdev->dev, &cxlds->regs.device_regs, &map); if (rc) return rc; @@ -474,10 +467,15 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) dev_warn(&pdev->dev, "No component registers (%d)\n", rc); - cxlds->component_reg_phys = cxl_regmap_to_base(pdev, &map); + cxlds->component_reg_phys = map.resource; devm_cxl_pci_create_doe(cxlds); + rc = cxl_map_component_regs(&pdev->dev, &cxlds->regs.component, + &map, BIT(CXL_CM_CAP_CAP_ID_RAS)); + if (rc) + dev_dbg(&pdev->dev, "Failed to map RAS capability.\n"); + rc = cxl_pci_setup_mailbox(cxlds); if (rc) return rc; @@ -498,8 +496,13 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (IS_ERR(cxlmd)) return PTR_ERR(cxlmd); - if (resource_size(&cxlds->pmem_res) && IS_ENABLED(CONFIG_CXL_PMEM)) - rc = devm_cxl_add_nvdimm(&pdev->dev, cxlmd); + if (cxlds->regs.ras) { + pci_enable_pcie_error_reporting(pdev); + rc = devm_add_action_or_reset(&pdev->dev, disable_aer, pdev); + if (rc) + return rc; + } + pci_save_state(pdev); return rc; } @@ -511,10 +514,151 @@ static const struct pci_device_id cxl_mem_pci_tbl[] = { }; MODULE_DEVICE_TABLE(pci, cxl_mem_pci_tbl); +/* CXL spec rev3.0 8.2.4.16.1 */ +static void header_log_copy(struct cxl_dev_state *cxlds, u32 *log) +{ + void __iomem *addr; + u32 *log_addr; + int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32); + + addr = cxlds->regs.ras + CXL_RAS_HEADER_LOG_OFFSET; + log_addr = log; + + for (i = 0; i < log_u32_size; i++) { + *log_addr = readl(addr); + log_addr++; + addr += sizeof(u32); + } +} + +/* + * Log the state of the RAS status registers and prepare them to log the + * next error status. Return 1 if reset needed. + */ +static bool cxl_report_and_clear(struct cxl_dev_state *cxlds) +{ + struct cxl_memdev *cxlmd = cxlds->cxlmd; + struct device *dev = &cxlmd->dev; + u32 hl[CXL_HEADERLOG_SIZE_U32]; + void __iomem *addr; + u32 status; + u32 fe; + + if (!cxlds->regs.ras) + return false; + + addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET; + status = readl(addr); + if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK)) + return false; + + /* If multiple errors, log header points to first error from ctrl reg */ + if (hweight32(status) > 1) { + addr = cxlds->regs.ras + CXL_RAS_CAP_CONTROL_OFFSET; + fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, readl(addr))); + } else { + fe = status; + } + + header_log_copy(cxlds, hl); + trace_cxl_aer_uncorrectable_error(dev, status, fe, hl); + writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr); + + return true; +} + +static pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, + pci_channel_state_t state) +{ + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); + struct cxl_memdev *cxlmd = cxlds->cxlmd; + struct device *dev = &cxlmd->dev; + bool ue; + + /* + * A frozen channel indicates an impending reset which is fatal to + * CXL.mem operation, and will likely crash the system. On the off + * chance the situation is recoverable dump the status of the RAS + * capability registers and bounce the active state of the memdev. + */ + ue = cxl_report_and_clear(cxlds); + + switch (state) { + case pci_channel_io_normal: + if (ue) { + device_release_driver(dev); + return PCI_ERS_RESULT_NEED_RESET; + } + return PCI_ERS_RESULT_CAN_RECOVER; + case pci_channel_io_frozen: + dev_warn(&pdev->dev, + "%s: frozen state error detected, disable CXL.mem\n", + dev_name(dev)); + device_release_driver(dev); + return PCI_ERS_RESULT_NEED_RESET; + case pci_channel_io_perm_failure: + dev_warn(&pdev->dev, + "failure state error detected, request disconnect\n"); + return PCI_ERS_RESULT_DISCONNECT; + } + return PCI_ERS_RESULT_NEED_RESET; +} + +static pci_ers_result_t cxl_slot_reset(struct pci_dev *pdev) +{ + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); + struct cxl_memdev *cxlmd = cxlds->cxlmd; + struct device *dev = &cxlmd->dev; + + dev_info(&pdev->dev, "%s: restart CXL.mem after slot reset\n", + dev_name(dev)); + pci_restore_state(pdev); + if (device_attach(dev) <= 0) + return PCI_ERS_RESULT_DISCONNECT; + return PCI_ERS_RESULT_RECOVERED; +} + +static void cxl_error_resume(struct pci_dev *pdev) +{ + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); + struct cxl_memdev *cxlmd = cxlds->cxlmd; + struct device *dev = &cxlmd->dev; + + dev_info(&pdev->dev, "%s: error resume %s\n", dev_name(dev), + dev->driver ? "successful" : "failed"); +} + +static void cxl_cor_error_detected(struct pci_dev *pdev) +{ + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); + struct cxl_memdev *cxlmd = cxlds->cxlmd; + struct device *dev = &cxlmd->dev; + void __iomem *addr; + u32 status; + + if (!cxlds->regs.ras) + return; + + addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_STATUS_OFFSET; + status = readl(addr); + if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) { + writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr); + trace_cxl_aer_correctable_error(dev, status); + } +} + +static const struct pci_error_handlers cxl_error_handlers = { + .error_detected = cxl_error_detected, + .slot_reset = cxl_slot_reset, + .resume = cxl_error_resume, + .cor_error_detected = cxl_cor_error_detected, +}; + static struct pci_driver cxl_pci_driver = { .name = KBUILD_MODNAME, .id_table = cxl_mem_pci_tbl, .probe = cxl_pci_probe, + .err_handler = &cxl_error_handlers, .driver = { .probe_type = PROBE_PREFER_ASYNCHRONOUS, }, diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index 4c627d67281a..eedefebc4283 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -11,12 +11,7 @@ #include "cxlmem.h" #include "cxl.h" -/* - * Ordered workqueue for cxl nvdimm device arrival and departure - * to coordinate bus rescans when a bridge arrives and trigger remove - * operations when the bridge is removed. - */ -static struct workqueue_struct *cxl_pmem_wq; +extern const struct nvdimm_security_ops *cxl_security_ops; static __read_mostly DECLARE_BITMAP(exclusive_cmds, CXL_MEM_COMMAND_ID_MAX); @@ -27,78 +22,81 @@ static void clear_exclusive(void *cxlds) static void unregister_nvdimm(void *nvdimm) { - struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm); - struct cxl_nvdimm_bridge *cxl_nvb = cxl_nvd->bridge; - struct cxl_pmem_region *cxlr_pmem; - unsigned long index; + nvdimm_delete(nvdimm); +} - device_lock(&cxl_nvb->dev); - dev_set_drvdata(&cxl_nvd->dev, NULL); - xa_for_each(&cxl_nvd->pmem_regions, index, cxlr_pmem) { - get_device(&cxlr_pmem->dev); - device_unlock(&cxl_nvb->dev); +static ssize_t provider_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct nvdimm *nvdimm = to_nvdimm(dev); + struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm); - device_release_driver(&cxlr_pmem->dev); - put_device(&cxlr_pmem->dev); + return sysfs_emit(buf, "%s\n", dev_name(&cxl_nvd->dev)); +} +static DEVICE_ATTR_RO(provider); - device_lock(&cxl_nvb->dev); - } - device_unlock(&cxl_nvb->dev); +static ssize_t id_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct nvdimm *nvdimm = to_nvdimm(dev); + struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm); + struct cxl_dev_state *cxlds = cxl_nvd->cxlmd->cxlds; - nvdimm_delete(nvdimm); - cxl_nvd->bridge = NULL; + return sysfs_emit(buf, "%lld\n", cxlds->serial); } +static DEVICE_ATTR_RO(id); + +static struct attribute *cxl_dimm_attributes[] = { + &dev_attr_id.attr, + &dev_attr_provider.attr, + NULL +}; + +static const struct attribute_group cxl_dimm_attribute_group = { + .name = "cxl", + .attrs = cxl_dimm_attributes, +}; + +static const struct attribute_group *cxl_dimm_attribute_groups[] = { + &cxl_dimm_attribute_group, + NULL +}; static int cxl_nvdimm_probe(struct device *dev) { struct cxl_nvdimm *cxl_nvd = to_cxl_nvdimm(dev); struct cxl_memdev *cxlmd = cxl_nvd->cxlmd; + struct cxl_nvdimm_bridge *cxl_nvb = cxlmd->cxl_nvb; unsigned long flags = 0, cmd_mask = 0; struct cxl_dev_state *cxlds = cxlmd->cxlds; - struct cxl_nvdimm_bridge *cxl_nvb; struct nvdimm *nvdimm; int rc; - cxl_nvb = cxl_find_nvdimm_bridge(dev); - if (!cxl_nvb) - return -ENXIO; - - device_lock(&cxl_nvb->dev); - if (!cxl_nvb->nvdimm_bus) { - rc = -ENXIO; - goto out; - } - set_exclusive_cxl_commands(cxlds, exclusive_cmds); rc = devm_add_action_or_reset(dev, clear_exclusive, cxlds); if (rc) - goto out; + return rc; set_bit(NDD_LABELING, &flags); set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask); set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask); set_bit(ND_CMD_SET_CONFIG_DATA, &cmd_mask); - nvdimm = nvdimm_create(cxl_nvb->nvdimm_bus, cxl_nvd, NULL, flags, - cmd_mask, 0, NULL); - if (!nvdimm) { - rc = -ENOMEM; - goto out; - } + nvdimm = __nvdimm_create(cxl_nvb->nvdimm_bus, cxl_nvd, + cxl_dimm_attribute_groups, flags, + cmd_mask, 0, NULL, cxl_nvd->dev_id, + cxl_security_ops, NULL); + if (!nvdimm) + return -ENOMEM; dev_set_drvdata(dev, nvdimm); - cxl_nvd->bridge = cxl_nvb; - rc = devm_add_action_or_reset(dev, unregister_nvdimm, nvdimm); -out: - device_unlock(&cxl_nvb->dev); - put_device(&cxl_nvb->dev); - - return rc; + return devm_add_action_or_reset(dev, unregister_nvdimm, nvdimm); } static struct cxl_driver cxl_nvdimm_driver = { .name = "cxl_nvdimm", .probe = cxl_nvdimm_probe, .id = CXL_DEVICE_NVDIMM, + .drv = { + .suppress_bind_attrs = true, + }, }; static int cxl_pmem_get_config_size(struct cxl_dev_state *cxlds, @@ -121,6 +119,7 @@ static int cxl_pmem_get_config_data(struct cxl_dev_state *cxlds, unsigned int buf_len) { struct cxl_mbox_get_lsa get_lsa; + struct cxl_mbox_cmd mbox_cmd; int rc; if (sizeof(*cmd) > buf_len) @@ -132,9 +131,15 @@ static int cxl_pmem_get_config_data(struct cxl_dev_state *cxlds, .offset = cpu_to_le32(cmd->in_offset), .length = cpu_to_le32(cmd->in_length), }; + mbox_cmd = (struct cxl_mbox_cmd) { + .opcode = CXL_MBOX_OP_GET_LSA, + .payload_in = &get_lsa, + .size_in = sizeof(get_lsa), + .size_out = cmd->in_length, + .payload_out = cmd->out_buf, + }; - rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_GET_LSA, &get_lsa, - sizeof(get_lsa), cmd->out_buf, cmd->in_length); + rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); cmd->status = 0; return rc; @@ -145,6 +150,7 @@ static int cxl_pmem_set_config_data(struct cxl_dev_state *cxlds, unsigned int buf_len) { struct cxl_mbox_set_lsa *set_lsa; + struct cxl_mbox_cmd mbox_cmd; int rc; if (sizeof(*cmd) > buf_len) @@ -163,10 +169,13 @@ static int cxl_pmem_set_config_data(struct cxl_dev_state *cxlds, .offset = cpu_to_le32(cmd->in_offset), }; memcpy(set_lsa->data, cmd->in_buf, cmd->in_length); + mbox_cmd = (struct cxl_mbox_cmd) { + .opcode = CXL_MBOX_OP_SET_LSA, + .payload_in = set_lsa, + .size_in = struct_size(set_lsa, data, cmd->in_length), + }; - rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_SET_LSA, set_lsa, - struct_size(set_lsa, data, cmd->in_length), - NULL, 0); + rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); /* * Set "firmware" status (4-packed bytes at the end of the input @@ -216,204 +225,48 @@ static int cxl_pmem_ctl(struct nvdimm_bus_descriptor *nd_desc, return cxl_pmem_nvdimm_ctl(nvdimm, cmd, buf, buf_len); } -static bool online_nvdimm_bus(struct cxl_nvdimm_bridge *cxl_nvb) -{ - if (cxl_nvb->nvdimm_bus) - return true; - cxl_nvb->nvdimm_bus = - nvdimm_bus_register(&cxl_nvb->dev, &cxl_nvb->nd_desc); - return cxl_nvb->nvdimm_bus != NULL; -} - -static int cxl_nvdimm_release_driver(struct device *dev, void *cxl_nvb) -{ - struct cxl_nvdimm *cxl_nvd; - - if (!is_cxl_nvdimm(dev)) - return 0; - - cxl_nvd = to_cxl_nvdimm(dev); - if (cxl_nvd->bridge != cxl_nvb) - return 0; - - device_release_driver(dev); - return 0; -} - -static int cxl_pmem_region_release_driver(struct device *dev, void *cxl_nvb) -{ - struct cxl_pmem_region *cxlr_pmem; - - if (!is_cxl_pmem_region(dev)) - return 0; - - cxlr_pmem = to_cxl_pmem_region(dev); - if (cxlr_pmem->bridge != cxl_nvb) - return 0; - - device_release_driver(dev); - return 0; -} - -static void offline_nvdimm_bus(struct cxl_nvdimm_bridge *cxl_nvb, - struct nvdimm_bus *nvdimm_bus) +static void unregister_nvdimm_bus(void *_cxl_nvb) { - if (!nvdimm_bus) - return; + struct cxl_nvdimm_bridge *cxl_nvb = _cxl_nvb; + struct nvdimm_bus *nvdimm_bus = cxl_nvb->nvdimm_bus; - /* - * Set the state of cxl_nvdimm devices to unbound / idle before - * nvdimm_bus_unregister() rips the nvdimm objects out from - * underneath them. - */ - bus_for_each_dev(&cxl_bus_type, NULL, cxl_nvb, - cxl_pmem_region_release_driver); - bus_for_each_dev(&cxl_bus_type, NULL, cxl_nvb, - cxl_nvdimm_release_driver); + cxl_nvb->nvdimm_bus = NULL; nvdimm_bus_unregister(nvdimm_bus); } -static void cxl_nvb_update_state(struct work_struct *work) -{ - struct cxl_nvdimm_bridge *cxl_nvb = - container_of(work, typeof(*cxl_nvb), state_work); - struct nvdimm_bus *victim_bus = NULL; - bool release = false, rescan = false; - - device_lock(&cxl_nvb->dev); - switch (cxl_nvb->state) { - case CXL_NVB_ONLINE: - if (!online_nvdimm_bus(cxl_nvb)) { - dev_err(&cxl_nvb->dev, - "failed to establish nvdimm bus\n"); - release = true; - } else - rescan = true; - break; - case CXL_NVB_OFFLINE: - case CXL_NVB_DEAD: - victim_bus = cxl_nvb->nvdimm_bus; - cxl_nvb->nvdimm_bus = NULL; - break; - default: - break; - } - device_unlock(&cxl_nvb->dev); - - if (release) - device_release_driver(&cxl_nvb->dev); - if (rescan) { - int rc = bus_rescan_devices(&cxl_bus_type); - - dev_dbg(&cxl_nvb->dev, "rescan: %d\n", rc); - } - offline_nvdimm_bus(cxl_nvb, victim_bus); - - put_device(&cxl_nvb->dev); -} - -static void cxl_nvdimm_bridge_state_work(struct cxl_nvdimm_bridge *cxl_nvb) -{ - /* - * Take a reference that the workqueue will drop if new work - * gets queued. - */ - get_device(&cxl_nvb->dev); - if (!queue_work(cxl_pmem_wq, &cxl_nvb->state_work)) - put_device(&cxl_nvb->dev); -} - -static void cxl_nvdimm_bridge_remove(struct device *dev) -{ - struct cxl_nvdimm_bridge *cxl_nvb = to_cxl_nvdimm_bridge(dev); - - if (cxl_nvb->state == CXL_NVB_ONLINE) - cxl_nvb->state = CXL_NVB_OFFLINE; - cxl_nvdimm_bridge_state_work(cxl_nvb); -} - static int cxl_nvdimm_bridge_probe(struct device *dev) { struct cxl_nvdimm_bridge *cxl_nvb = to_cxl_nvdimm_bridge(dev); - if (cxl_nvb->state == CXL_NVB_DEAD) - return -ENXIO; - - if (cxl_nvb->state == CXL_NVB_NEW) { - cxl_nvb->nd_desc = (struct nvdimm_bus_descriptor) { - .provider_name = "CXL", - .module = THIS_MODULE, - .ndctl = cxl_pmem_ctl, - }; + cxl_nvb->nd_desc = (struct nvdimm_bus_descriptor) { + .provider_name = "CXL", + .module = THIS_MODULE, + .ndctl = cxl_pmem_ctl, + }; - INIT_WORK(&cxl_nvb->state_work, cxl_nvb_update_state); - } + cxl_nvb->nvdimm_bus = + nvdimm_bus_register(&cxl_nvb->dev, &cxl_nvb->nd_desc); - cxl_nvb->state = CXL_NVB_ONLINE; - cxl_nvdimm_bridge_state_work(cxl_nvb); + if (!cxl_nvb->nvdimm_bus) + return -ENOMEM; - return 0; + return devm_add_action_or_reset(dev, unregister_nvdimm_bus, cxl_nvb); } static struct cxl_driver cxl_nvdimm_bridge_driver = { .name = "cxl_nvdimm_bridge", .probe = cxl_nvdimm_bridge_probe, - .remove = cxl_nvdimm_bridge_remove, .id = CXL_DEVICE_NVDIMM_BRIDGE, + .drv = { + .suppress_bind_attrs = true, + }, }; -static int match_cxl_nvdimm(struct device *dev, void *data) -{ - return is_cxl_nvdimm(dev); -} - static void unregister_nvdimm_region(void *nd_region) { nvdimm_region_delete(nd_region); } -static int cxl_nvdimm_add_region(struct cxl_nvdimm *cxl_nvd, - struct cxl_pmem_region *cxlr_pmem) -{ - int rc; - - rc = xa_insert(&cxl_nvd->pmem_regions, (unsigned long)cxlr_pmem, - cxlr_pmem, GFP_KERNEL); - if (rc) - return rc; - - get_device(&cxlr_pmem->dev); - return 0; -} - -static void cxl_nvdimm_del_region(struct cxl_nvdimm *cxl_nvd, - struct cxl_pmem_region *cxlr_pmem) -{ - /* - * It is possible this is called without a corresponding - * cxl_nvdimm_add_region for @cxlr_pmem - */ - cxlr_pmem = xa_erase(&cxl_nvd->pmem_regions, (unsigned long)cxlr_pmem); - if (cxlr_pmem) - put_device(&cxlr_pmem->dev); -} - -static void release_mappings(void *data) -{ - int i; - struct cxl_pmem_region *cxlr_pmem = data; - struct cxl_nvdimm_bridge *cxl_nvb = cxlr_pmem->bridge; - - device_lock(&cxl_nvb->dev); - for (i = 0; i < cxlr_pmem->nr_mappings; i++) { - struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i]; - struct cxl_nvdimm *cxl_nvd = m->cxl_nvd; - - cxl_nvdimm_del_region(cxl_nvd, cxlr_pmem); - } - device_unlock(&cxl_nvb->dev); -} - static void cxlr_pmem_remove_resource(void *res) { remove_resource(res); @@ -429,8 +282,8 @@ static int cxl_pmem_region_probe(struct device *dev) struct nd_mapping_desc mappings[CXL_DECODER_MAX_INTERLEAVE]; struct cxl_pmem_region *cxlr_pmem = to_cxl_pmem_region(dev); struct cxl_region *cxlr = cxlr_pmem->cxlr; + struct cxl_nvdimm_bridge *cxl_nvb = cxlr->cxl_nvb; struct cxl_pmem_region_info *info = NULL; - struct cxl_nvdimm_bridge *cxl_nvb; struct nd_interleave_set *nd_set; struct nd_region_desc ndr_desc; struct cxl_nvdimm *cxl_nvd; @@ -438,28 +291,12 @@ static int cxl_pmem_region_probe(struct device *dev) struct resource *res; int rc, i = 0; - cxl_nvb = cxl_find_nvdimm_bridge(&cxlr_pmem->mapping[0].cxlmd->dev); - if (!cxl_nvb) { - dev_dbg(dev, "bridge not found\n"); - return -ENXIO; - } - cxlr_pmem->bridge = cxl_nvb; - - device_lock(&cxl_nvb->dev); - if (!cxl_nvb->nvdimm_bus) { - dev_dbg(dev, "nvdimm bus not found\n"); - rc = -ENXIO; - goto out_nvb; - } - memset(&mappings, 0, sizeof(mappings)); memset(&ndr_desc, 0, sizeof(ndr_desc)); res = devm_kzalloc(dev, sizeof(*res), GFP_KERNEL); - if (!res) { - rc = -ENOMEM; - goto out_nvb; - } + if (!res) + return -ENOMEM; res->name = "Persistent Memory"; res->start = cxlr_pmem->hpa_range.start; @@ -469,11 +306,11 @@ static int cxl_pmem_region_probe(struct device *dev) rc = insert_resource(&iomem_resource, res); if (rc) - goto out_nvb; + return rc; rc = devm_add_action_or_reset(dev, cxlr_pmem_remove_resource, res); if (rc) - goto out_nvb; + return rc; ndr_desc.res = res; ndr_desc.provider_data = cxlr_pmem; @@ -487,43 +324,23 @@ static int cxl_pmem_region_probe(struct device *dev) } nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL); - if (!nd_set) { - rc = -ENOMEM; - goto out_nvb; - } + if (!nd_set) + return -ENOMEM; ndr_desc.memregion = cxlr->id; set_bit(ND_REGION_CXL, &ndr_desc.flags); set_bit(ND_REGION_PERSIST_MEMCTRL, &ndr_desc.flags); info = kmalloc_array(cxlr_pmem->nr_mappings, sizeof(*info), GFP_KERNEL); - if (!info) { - rc = -ENOMEM; - goto out_nvb; - } - - rc = devm_add_action_or_reset(dev, release_mappings, cxlr_pmem); - if (rc) - goto out_nvd; + if (!info) + return -ENOMEM; for (i = 0; i < cxlr_pmem->nr_mappings; i++) { struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i]; struct cxl_memdev *cxlmd = m->cxlmd; struct cxl_dev_state *cxlds = cxlmd->cxlds; - struct device *d; - - d = device_find_child(&cxlmd->dev, NULL, match_cxl_nvdimm); - if (!d) { - dev_dbg(dev, "[%d]: %s: no cxl_nvdimm found\n", i, - dev_name(&cxlmd->dev)); - rc = -ENODEV; - goto out_nvd; - } - /* safe to drop ref now with bridge lock held */ - put_device(d); - - cxl_nvd = to_cxl_nvdimm(d); + cxl_nvd = cxlmd->cxl_nvd; nvdimm = dev_get_drvdata(&cxl_nvd->dev); if (!nvdimm) { dev_dbg(dev, "[%d]: %s: no nvdimm found\n", i, @@ -532,14 +349,6 @@ static int cxl_pmem_region_probe(struct device *dev) goto out_nvd; } - /* - * Pin the region per nvdimm device as those may be released - * out-of-order with respect to the region, and a single nvdimm - * maybe associated with multiple regions - */ - rc = cxl_nvdimm_add_region(cxl_nvd, cxlr_pmem); - if (rc) - goto out_nvd; m->cxl_nvd = cxl_nvd; mappings[i] = (struct nd_mapping_desc) { .nvdimm = nvdimm, @@ -572,9 +381,6 @@ static int cxl_pmem_region_probe(struct device *dev) cxlr_pmem->nd_region); out_nvd: kfree(info); -out_nvb: - device_unlock(&cxl_nvb->dev); - put_device(&cxl_nvb->dev); return rc; } @@ -583,33 +389,11 @@ static struct cxl_driver cxl_pmem_region_driver = { .name = "cxl_pmem_region", .probe = cxl_pmem_region_probe, .id = CXL_DEVICE_PMEM_REGION, + .drv = { + .suppress_bind_attrs = true, + }, }; -/* - * Return all bridges to the CXL_NVB_NEW state to invalidate any - * ->state_work referring to the now destroyed cxl_pmem_wq. - */ -static int cxl_nvdimm_bridge_reset(struct device *dev, void *data) -{ - struct cxl_nvdimm_bridge *cxl_nvb; - - if (!is_cxl_nvdimm_bridge(dev)) - return 0; - - cxl_nvb = to_cxl_nvdimm_bridge(dev); - device_lock(dev); - cxl_nvb->state = CXL_NVB_NEW; - device_unlock(dev); - - return 0; -} - -static void destroy_cxl_pmem_wq(void) -{ - destroy_workqueue(cxl_pmem_wq); - bus_for_each_dev(&cxl_bus_type, NULL, NULL, cxl_nvdimm_bridge_reset); -} - static __init int cxl_pmem_init(void) { int rc; @@ -617,13 +401,9 @@ static __init int cxl_pmem_init(void) set_bit(CXL_MEM_COMMAND_ID_SET_SHUTDOWN_STATE, exclusive_cmds); set_bit(CXL_MEM_COMMAND_ID_SET_LSA, exclusive_cmds); - cxl_pmem_wq = alloc_ordered_workqueue("cxl_pmem", 0); - if (!cxl_pmem_wq) - return -ENXIO; - rc = cxl_driver_register(&cxl_nvdimm_bridge_driver); if (rc) - goto err_bridge; + return rc; rc = cxl_driver_register(&cxl_nvdimm_driver); if (rc) @@ -639,8 +419,6 @@ err_region: cxl_driver_unregister(&cxl_nvdimm_driver); err_nvdimm: cxl_driver_unregister(&cxl_nvdimm_bridge_driver); -err_bridge: - destroy_cxl_pmem_wq(); return rc; } @@ -649,7 +427,6 @@ static __exit void cxl_pmem_exit(void) cxl_driver_unregister(&cxl_pmem_region_driver); cxl_driver_unregister(&cxl_nvdimm_driver); cxl_driver_unregister(&cxl_nvdimm_bridge_driver); - destroy_cxl_pmem_wq(); } MODULE_LICENSE("GPL v2"); diff --git a/drivers/cxl/security.c b/drivers/cxl/security.c new file mode 100644 index 000000000000..4ad4bda2d18e --- /dev/null +++ b/drivers/cxl/security.c @@ -0,0 +1,202 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2022 Intel Corporation. All rights reserved. */ +#include <linux/libnvdimm.h> +#include <asm/unaligned.h> +#include <linux/module.h> +#include <linux/async.h> +#include <linux/slab.h> +#include <linux/memregion.h> +#include "cxlmem.h" +#include "cxl.h" + +static unsigned long cxl_pmem_get_security_flags(struct nvdimm *nvdimm, + enum nvdimm_passphrase_type ptype) +{ + struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm); + struct cxl_memdev *cxlmd = cxl_nvd->cxlmd; + struct cxl_dev_state *cxlds = cxlmd->cxlds; + unsigned long security_flags = 0; + struct cxl_get_security_output { + __le32 flags; + } out; + struct cxl_mbox_cmd mbox_cmd; + u32 sec_out; + int rc; + + mbox_cmd = (struct cxl_mbox_cmd) { + .opcode = CXL_MBOX_OP_GET_SECURITY_STATE, + .size_out = sizeof(out), + .payload_out = &out, + }; + + rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); + if (rc < 0) + return 0; + + sec_out = le32_to_cpu(out.flags); + if (ptype == NVDIMM_MASTER) { + if (sec_out & CXL_PMEM_SEC_STATE_MASTER_PASS_SET) + set_bit(NVDIMM_SECURITY_UNLOCKED, &security_flags); + else + set_bit(NVDIMM_SECURITY_DISABLED, &security_flags); + if (sec_out & CXL_PMEM_SEC_STATE_MASTER_PLIMIT) + set_bit(NVDIMM_SECURITY_FROZEN, &security_flags); + return security_flags; + } + + if (sec_out & CXL_PMEM_SEC_STATE_USER_PASS_SET) { + if (sec_out & CXL_PMEM_SEC_STATE_FROZEN || + sec_out & CXL_PMEM_SEC_STATE_USER_PLIMIT) + set_bit(NVDIMM_SECURITY_FROZEN, &security_flags); + + if (sec_out & CXL_PMEM_SEC_STATE_LOCKED) + set_bit(NVDIMM_SECURITY_LOCKED, &security_flags); + else + set_bit(NVDIMM_SECURITY_UNLOCKED, &security_flags); + } else { + set_bit(NVDIMM_SECURITY_DISABLED, &security_flags); + } + + return security_flags; +} + +static int cxl_pmem_security_change_key(struct nvdimm *nvdimm, + const struct nvdimm_key_data *old_data, + const struct nvdimm_key_data *new_data, + enum nvdimm_passphrase_type ptype) +{ + struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm); + struct cxl_memdev *cxlmd = cxl_nvd->cxlmd; + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_mbox_cmd mbox_cmd; + struct cxl_set_pass set_pass; + + set_pass = (struct cxl_set_pass) { + .type = ptype == NVDIMM_MASTER ? CXL_PMEM_SEC_PASS_MASTER : + CXL_PMEM_SEC_PASS_USER, + }; + memcpy(set_pass.old_pass, old_data->data, NVDIMM_PASSPHRASE_LEN); + memcpy(set_pass.new_pass, new_data->data, NVDIMM_PASSPHRASE_LEN); + + mbox_cmd = (struct cxl_mbox_cmd) { + .opcode = CXL_MBOX_OP_SET_PASSPHRASE, + .size_in = sizeof(set_pass), + .payload_in = &set_pass, + }; + + return cxl_internal_send_cmd(cxlds, &mbox_cmd); +} + +static int __cxl_pmem_security_disable(struct nvdimm *nvdimm, + const struct nvdimm_key_data *key_data, + enum nvdimm_passphrase_type ptype) +{ + struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm); + struct cxl_memdev *cxlmd = cxl_nvd->cxlmd; + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_disable_pass dis_pass; + struct cxl_mbox_cmd mbox_cmd; + + dis_pass = (struct cxl_disable_pass) { + .type = ptype == NVDIMM_MASTER ? CXL_PMEM_SEC_PASS_MASTER : + CXL_PMEM_SEC_PASS_USER, + }; + memcpy(dis_pass.pass, key_data->data, NVDIMM_PASSPHRASE_LEN); + + mbox_cmd = (struct cxl_mbox_cmd) { + .opcode = CXL_MBOX_OP_DISABLE_PASSPHRASE, + .size_in = sizeof(dis_pass), + .payload_in = &dis_pass, + }; + + return cxl_internal_send_cmd(cxlds, &mbox_cmd); +} + +static int cxl_pmem_security_disable(struct nvdimm *nvdimm, + const struct nvdimm_key_data *key_data) +{ + return __cxl_pmem_security_disable(nvdimm, key_data, NVDIMM_USER); +} + +static int cxl_pmem_security_disable_master(struct nvdimm *nvdimm, + const struct nvdimm_key_data *key_data) +{ + return __cxl_pmem_security_disable(nvdimm, key_data, NVDIMM_MASTER); +} + +static int cxl_pmem_security_freeze(struct nvdimm *nvdimm) +{ + struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm); + struct cxl_memdev *cxlmd = cxl_nvd->cxlmd; + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_mbox_cmd mbox_cmd = { + .opcode = CXL_MBOX_OP_FREEZE_SECURITY, + }; + + return cxl_internal_send_cmd(cxlds, &mbox_cmd); +} + +static int cxl_pmem_security_unlock(struct nvdimm *nvdimm, + const struct nvdimm_key_data *key_data) +{ + struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm); + struct cxl_memdev *cxlmd = cxl_nvd->cxlmd; + struct cxl_dev_state *cxlds = cxlmd->cxlds; + u8 pass[NVDIMM_PASSPHRASE_LEN]; + struct cxl_mbox_cmd mbox_cmd; + int rc; + + memcpy(pass, key_data->data, NVDIMM_PASSPHRASE_LEN); + mbox_cmd = (struct cxl_mbox_cmd) { + .opcode = CXL_MBOX_OP_UNLOCK, + .size_in = NVDIMM_PASSPHRASE_LEN, + .payload_in = pass, + }; + + rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); + if (rc < 0) + return rc; + + return 0; +} + +static int cxl_pmem_security_passphrase_erase(struct nvdimm *nvdimm, + const struct nvdimm_key_data *key, + enum nvdimm_passphrase_type ptype) +{ + struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm); + struct cxl_memdev *cxlmd = cxl_nvd->cxlmd; + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_mbox_cmd mbox_cmd; + struct cxl_pass_erase erase; + int rc; + + erase = (struct cxl_pass_erase) { + .type = ptype == NVDIMM_MASTER ? CXL_PMEM_SEC_PASS_MASTER : + CXL_PMEM_SEC_PASS_USER, + }; + memcpy(erase.pass, key->data, NVDIMM_PASSPHRASE_LEN); + mbox_cmd = (struct cxl_mbox_cmd) { + .opcode = CXL_MBOX_OP_PASSPHRASE_SECURE_ERASE, + .size_in = sizeof(erase), + .payload_in = &erase, + }; + + rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); + if (rc < 0) + return rc; + + return 0; +} + +static const struct nvdimm_security_ops __cxl_security_ops = { + .get_flags = cxl_pmem_get_security_flags, + .change_key = cxl_pmem_security_change_key, + .disable = cxl_pmem_security_disable, + .freeze = cxl_pmem_security_freeze, + .unlock = cxl_pmem_security_unlock, + .erase = cxl_pmem_security_passphrase_erase, + .disable_master = cxl_pmem_security_disable_master, +}; + +const struct nvdimm_security_ops *cxl_security_ops = &__cxl_security_ops; diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig index 5a29046e3319..79d93126453d 100644 --- a/drivers/nvdimm/Kconfig +++ b/drivers/nvdimm/Kconfig @@ -114,4 +114,16 @@ config NVDIMM_TEST_BUILD core devm_memremap_pages() implementation and other infrastructure. +config NVDIMM_SECURITY_TEST + bool "Enable NVDIMM security unit tests" + depends on NVDIMM_KEYS + help + The NVDIMM and CXL subsystems support unit testing of their device + security state machines. The NVDIMM_SECURITY_TEST option disables CPU + cache maintenance operations around events like secure erase and + overwrite. Also, when enabled, the NVDIMM subsystem core helps the unit + test implement a mock state machine. + + Select N if unsure. + endif diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index c7c980577491..1fc081dcf631 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -349,11 +349,18 @@ static ssize_t available_slots_show(struct device *dev, } static DEVICE_ATTR_RO(available_slots); -__weak ssize_t security_show(struct device *dev, +ssize_t security_show(struct device *dev, struct device_attribute *attr, char *buf) { struct nvdimm *nvdimm = to_nvdimm(dev); + /* + * For the test version we need to poll the "hardware" in order + * to get the updated status for unlock testing. + */ + if (IS_ENABLED(CONFIG_NVDIMM_SECURITY_TEST)) + nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER); + if (test_bit(NVDIMM_SECURITY_OVERWRITE, &nvdimm->sec.flags)) return sprintf(buf, "overwrite\n"); if (test_bit(NVDIMM_SECURITY_DISABLED, &nvdimm->sec.flags)) diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c index 390123d293ea..88dc062af5f8 100644 --- a/drivers/nvdimm/region.c +++ b/drivers/nvdimm/region.c @@ -2,6 +2,7 @@ /* * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. */ +#include <linux/memregion.h> #include <linux/cpumask.h> #include <linux/module.h> #include <linux/device.h> @@ -100,6 +101,16 @@ static void nd_region_remove(struct device *dev) */ sysfs_put(nd_region->bb_state); nd_region->bb_state = NULL; + + /* + * Try to flush caches here since a disabled region may be subject to + * secure erase while disabled, and previous dirty data should not be + * written back to a new instance of the region. This only matters on + * bare metal where security commands are available, so silent failure + * here is ok. + */ + if (cpu_cache_has_invalidate_memregion()) + cpu_cache_invalidate_memregion(IORES_DESC_PERSISTENT_MEMORY); } static int child_notify(struct device *dev, void *data) diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index e0875d369762..83dbf398ea84 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -59,9 +59,51 @@ static int nvdimm_map_flush(struct device *dev, struct nvdimm *nvdimm, int dimm, return 0; } +static int nd_region_invalidate_memregion(struct nd_region *nd_region) +{ + int i, incoherent = 0; + + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + struct nvdimm *nvdimm = nd_mapping->nvdimm; + + if (test_bit(NDD_INCOHERENT, &nvdimm->flags)) { + incoherent++; + break; + } + } + + if (!incoherent) + return 0; + + if (!cpu_cache_has_invalidate_memregion()) { + if (IS_ENABLED(CONFIG_NVDIMM_SECURITY_TEST)) { + dev_warn( + &nd_region->dev, + "Bypassing cpu_cache_invalidate_memergion() for testing!\n"); + goto out; + } else { + dev_err(&nd_region->dev, + "Failed to synchronize CPU cache state\n"); + return -ENXIO; + } + } + + cpu_cache_invalidate_memregion(IORES_DESC_PERSISTENT_MEMORY); +out: + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + struct nvdimm *nvdimm = nd_mapping->nvdimm; + + clear_bit(NDD_INCOHERENT, &nvdimm->flags); + } + + return 0; +} + int nd_region_activate(struct nd_region *nd_region) { - int i, j, num_flush = 0; + int i, j, rc, num_flush = 0; struct nd_region_data *ndrd; struct device *dev = &nd_region->dev; size_t flush_data_size = sizeof(void *); @@ -85,6 +127,10 @@ int nd_region_activate(struct nd_region *nd_region) } nvdimm_bus_unlock(&nd_region->dev); + rc = nd_region_invalidate_memregion(nd_region); + if (rc) + return rc; + ndrd = devm_kzalloc(dev, sizeof(*ndrd) + flush_data_size, GFP_KERNEL); if (!ndrd) return -ENOMEM; @@ -1222,3 +1268,5 @@ int nd_region_conflict(struct nd_region *nd_region, resource_size_t start, return device_for_each_child(&nvdimm_bus->dev, &ctx, region_conflict); } + +MODULE_IMPORT_NS(DEVMEM); diff --git a/drivers/nvdimm/security.c b/drivers/nvdimm/security.c index 8aefb60c42ff..a03e3c45f297 100644 --- a/drivers/nvdimm/security.c +++ b/drivers/nvdimm/security.c @@ -177,6 +177,10 @@ static int __nvdimm_security_unlock(struct nvdimm *nvdimm) || !nvdimm->sec.flags) return -EIO; + /* cxl_test needs this to pre-populate the security state */ + if (IS_ENABLED(CONFIG_NVDIMM_SECURITY_TEST)) + nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER); + /* No need to go further if security is disabled */ if (test_bit(NVDIMM_SECURITY_DISABLED, &nvdimm->sec.flags)) return 0; @@ -204,6 +208,8 @@ static int __nvdimm_security_unlock(struct nvdimm *nvdimm) rc = nvdimm->sec.ops->unlock(nvdimm, data); dev_dbg(dev, "key: %d unlock: %s\n", key_serial(key), rc == 0 ? "success" : "fail"); + if (rc == 0) + set_bit(NDD_INCOHERENT, &nvdimm->flags); nvdimm_put_key(key); nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER); @@ -239,7 +245,8 @@ static int check_security_state(struct nvdimm *nvdimm) return 0; } -static int security_disable(struct nvdimm *nvdimm, unsigned int keyid) +static int security_disable(struct nvdimm *nvdimm, unsigned int keyid, + enum nvdimm_passphrase_type pass_type) { struct device *dev = &nvdimm->dev; struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); @@ -250,8 +257,13 @@ static int security_disable(struct nvdimm *nvdimm, unsigned int keyid) /* The bus lock should be held at the top level of the call stack */ lockdep_assert_held(&nvdimm_bus->reconfig_mutex); - if (!nvdimm->sec.ops || !nvdimm->sec.ops->disable - || !nvdimm->sec.flags) + if (!nvdimm->sec.ops || !nvdimm->sec.flags) + return -EOPNOTSUPP; + + if (pass_type == NVDIMM_USER && !nvdimm->sec.ops->disable) + return -EOPNOTSUPP; + + if (pass_type == NVDIMM_MASTER && !nvdimm->sec.ops->disable_master) return -EOPNOTSUPP; rc = check_security_state(nvdimm); @@ -263,12 +275,21 @@ static int security_disable(struct nvdimm *nvdimm, unsigned int keyid) if (!data) return -ENOKEY; - rc = nvdimm->sec.ops->disable(nvdimm, data); - dev_dbg(dev, "key: %d disable: %s\n", key_serial(key), + if (pass_type == NVDIMM_MASTER) { + rc = nvdimm->sec.ops->disable_master(nvdimm, data); + dev_dbg(dev, "key: %d disable_master: %s\n", key_serial(key), rc == 0 ? "success" : "fail"); + } else { + rc = nvdimm->sec.ops->disable(nvdimm, data); + dev_dbg(dev, "key: %d disable: %s\n", key_serial(key), + rc == 0 ? "success" : "fail"); + } nvdimm_put_key(key); - nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER); + if (pass_type == NVDIMM_MASTER) + nvdimm->sec.ext_flags = nvdimm_security_flags(nvdimm, NVDIMM_MASTER); + else + nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER); return rc; } @@ -355,6 +376,8 @@ static int security_erase(struct nvdimm *nvdimm, unsigned int keyid, return -ENOKEY; rc = nvdimm->sec.ops->erase(nvdimm, data, pass_type); + if (rc == 0) + set_bit(NDD_INCOHERENT, &nvdimm->flags); dev_dbg(dev, "key: %d erase%s: %s\n", key_serial(key), pass_type == NVDIMM_MASTER ? "(master)" : "(user)", rc == 0 ? "success" : "fail"); @@ -389,6 +412,8 @@ static int security_overwrite(struct nvdimm *nvdimm, unsigned int keyid) return -ENOKEY; rc = nvdimm->sec.ops->overwrite(nvdimm, data); + if (rc == 0) + set_bit(NDD_INCOHERENT, &nvdimm->flags); dev_dbg(dev, "key: %d overwrite submission: %s\n", key_serial(key), rc == 0 ? "success" : "fail"); @@ -473,6 +498,7 @@ void nvdimm_security_overwrite_query(struct work_struct *work) #define OPS \ C( OP_FREEZE, "freeze", 1), \ C( OP_DISABLE, "disable", 2), \ + C( OP_DISABLE_MASTER, "disable_master", 2), \ C( OP_UPDATE, "update", 3), \ C( OP_ERASE, "erase", 2), \ C( OP_OVERWRITE, "overwrite", 2), \ @@ -524,7 +550,10 @@ ssize_t nvdimm_security_store(struct device *dev, const char *buf, size_t len) rc = nvdimm_security_freeze(nvdimm); } else if (i == OP_DISABLE) { dev_dbg(dev, "disable %u\n", key); - rc = security_disable(nvdimm, key); + rc = security_disable(nvdimm, key, NVDIMM_USER); + } else if (i == OP_DISABLE_MASTER) { + dev_dbg(dev, "disable_master %u\n", key); + rc = security_disable(nvdimm, key, NVDIMM_MASTER); } else if (i == OP_UPDATE || i == OP_MASTER_UPDATE) { dev_dbg(dev, "%s %u %u\n", ops[i].name, key, newkey); rc = security_update(nvdimm, key, newkey, i == OP_UPDATE diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 0a2eeb82cebd..6c250eb214e8 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -756,6 +756,13 @@ static ssize_t pci_write_config(struct file *filp, struct kobject *kobj, if (ret) return ret; + if (resource_is_exclusive(&dev->driver_exclusive_resource, off, + count)) { + pci_warn_once(dev, "%s: Unexpected write to kernel-exclusive config offset %llx", + current->comm, off); + add_taint(TAINT_USER, LOCKDEP_STILL_OK); + } + if (off > dev->cfg_size) return 0; if (off + count > dev->cfg_size) { diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index e2d8a74f83c3..625f7b2cafe4 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -961,8 +961,14 @@ static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info) if (aer) pci_write_config_dword(dev, aer + PCI_ERR_COR_STATUS, info->status); - if (pcie_aer_is_native(dev)) + if (pcie_aer_is_native(dev)) { + struct pci_driver *pdrv = dev->driver; + + if (pdrv && pdrv->err_handler && + pdrv->err_handler->cor_error_detected) + pdrv->err_handler->cor_error_detected(dev); pcie_clear_device_status(dev); + } } else if (info->severity == AER_NONFATAL) pcie_do_recovery(dev, pci_channel_io_normal, aer_root_reset); else if (info->severity == AER_FATAL) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index fdd7e56ddf40..334fd91f01e1 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -2305,6 +2305,12 @@ struct pci_dev *pci_alloc_dev(struct pci_bus *bus) INIT_LIST_HEAD(&dev->bus_list); dev->dev.type = &pci_dev_type; dev->bus = pci_bus_get(bus); + dev->driver_exclusive_resource = (struct resource) { + .name = "PCI Exclusive", + .start = 0, + .end = -1, + }; + #ifdef CONFIG_PCI_MSI raw_spin_lock_init(&dev->msi_lock); #endif diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 27642ca15d93..4ae3c541ea6f 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -318,6 +318,8 @@ extern void __devm_release_region(struct device *dev, struct resource *parent, resource_size_t start, resource_size_t n); extern int iomem_map_sanity_check(resource_size_t addr, unsigned long size); extern bool iomem_is_exclusive(u64 addr); +extern bool resource_is_exclusive(struct resource *resource, u64 addr, + resource_size_t size); extern int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index c74acfa1a3fe..af38252ad704 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -35,6 +35,11 @@ enum { NDD_WORK_PENDING = 4, /* dimm supports namespace labels */ NDD_LABELING = 6, + /* + * dimm contents have changed requiring invalidation of CPU caches prior + * to activation of a region that includes this device + */ + NDD_INCOHERENT = 7, /* need to set a limit somewhere, but yes, this is likely overkill */ ND_IOCTL_MAX_BUFLEN = SZ_4M, @@ -183,6 +188,8 @@ struct nvdimm_security_ops { int (*overwrite)(struct nvdimm *nvdimm, const struct nvdimm_key_data *key_data); int (*query_overwrite)(struct nvdimm *nvdimm); + int (*disable_master)(struct nvdimm *nvdimm, + const struct nvdimm_key_data *key_data); }; enum nvdimm_fwa_state { diff --git a/include/linux/memregion.h b/include/linux/memregion.h index c04c4fd2e209..bf83363807ac 100644 --- a/include/linux/memregion.h +++ b/include/linux/memregion.h @@ -3,6 +3,7 @@ #define _MEMREGION_H_ #include <linux/types.h> #include <linux/errno.h> +#include <linux/bug.h> struct memregion_info { int target_node; @@ -20,4 +21,41 @@ static inline void memregion_free(int id) { } #endif + +/** + * cpu_cache_invalidate_memregion - drop any CPU cached data for + * memregions described by @res_desc + * @res_desc: one of the IORES_DESC_* types + * + * Perform cache maintenance after a memory event / operation that + * changes the contents of physical memory in a cache-incoherent manner. + * For example, device memory technologies like NVDIMM and CXL have + * device secure erase, and dynamic region provision that can replace + * the memory mapped to a given physical address. + * + * Limit the functionality to architectures that have an efficient way + * to writeback and invalidate potentially terabytes of address space at + * once. Note that this routine may or may not write back any dirty + * contents while performing the invalidation. It is only exported for + * the explicit usage of the NVDIMM and CXL modules in the 'DEVMEM' + * symbol namespace on bare platforms. + * + * Returns 0 on success or negative error code on a failure to perform + * the cache maintenance. + */ +#ifdef CONFIG_ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION +int cpu_cache_invalidate_memregion(int res_desc); +bool cpu_cache_has_invalidate_memregion(void); +#else +static inline bool cpu_cache_has_invalidate_memregion(void) +{ + return false; +} + +static inline int cpu_cache_invalidate_memregion(int res_desc) +{ + WARN_ON_ONCE("CPU cache invalidation required"); + return -ENXIO; +} +#endif #endif /* _MEMREGION_H_ */ diff --git a/include/linux/pci.h b/include/linux/pci.h index aa514b54c681..c0d939f3169c 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -410,6 +410,7 @@ struct pci_dev { */ unsigned int irq; struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */ + struct resource driver_exclusive_resource; /* driver exclusive resource ranges */ bool match_driver; /* Skip attaching driver */ @@ -844,6 +845,9 @@ struct pci_error_handlers { /* Device driver may resume normal operations */ void (*resume)(struct pci_dev *dev); + + /* Allow device driver to record more details of a correctable error */ + void (*cor_error_detected)(struct pci_dev *dev); }; @@ -1408,6 +1412,21 @@ int pci_request_selected_regions(struct pci_dev *, int, const char *); int pci_request_selected_regions_exclusive(struct pci_dev *, int, const char *); void pci_release_selected_regions(struct pci_dev *, int); +static inline __must_check struct resource * +pci_request_config_region_exclusive(struct pci_dev *pdev, unsigned int offset, + unsigned int len, const char *name) +{ + return __request_region(&pdev->driver_exclusive_resource, offset, len, + name, IORESOURCE_EXCLUSIVE); +} + +static inline void pci_release_config_region(struct pci_dev *pdev, + unsigned int offset, + unsigned int len) +{ + __release_region(&pdev->driver_exclusive_resource, offset, len); +} + /* drivers/pci/bus.c */ void pci_add_resource(struct list_head *resources, struct resource *res); void pci_add_resource_offset(struct list_head *resources, struct resource *res, @@ -2505,6 +2524,7 @@ void pci_ims_free_irq(struct pci_dev *pdev, struct msi_map map); #define pci_crit(pdev, fmt, arg...) dev_crit(&(pdev)->dev, fmt, ##arg) #define pci_err(pdev, fmt, arg...) dev_err(&(pdev)->dev, fmt, ##arg) #define pci_warn(pdev, fmt, arg...) dev_warn(&(pdev)->dev, fmt, ##arg) +#define pci_warn_once(pdev, fmt, arg...) dev_warn_once(&(pdev)->dev, fmt, ##arg) #define pci_notice(pdev, fmt, arg...) dev_notice(&(pdev)->dev, fmt, ##arg) #define pci_info(pdev, fmt, arg...) dev_info(&(pdev)->dev, fmt, ##arg) #define pci_dbg(pdev, fmt, arg...) dev_dbg(&(pdev)->dev, fmt, ##arg) diff --git a/include/trace/events/cxl.h b/include/trace/events/cxl.h new file mode 100644 index 000000000000..ad085a2534ef --- /dev/null +++ b/include/trace/events/cxl.h @@ -0,0 +1,112 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM cxl + +#if !defined(_CXL_EVENTS_H) || defined(TRACE_HEADER_MULTI_READ) +#define _CXL_EVENTS_H + +#include <linux/tracepoint.h> + +#define CXL_HEADERLOG_SIZE SZ_512 +#define CXL_HEADERLOG_SIZE_U32 SZ_512 / sizeof(u32) + +#define CXL_RAS_UC_CACHE_DATA_PARITY BIT(0) +#define CXL_RAS_UC_CACHE_ADDR_PARITY BIT(1) +#define CXL_RAS_UC_CACHE_BE_PARITY BIT(2) +#define CXL_RAS_UC_CACHE_DATA_ECC BIT(3) +#define CXL_RAS_UC_MEM_DATA_PARITY BIT(4) +#define CXL_RAS_UC_MEM_ADDR_PARITY BIT(5) +#define CXL_RAS_UC_MEM_BE_PARITY BIT(6) +#define CXL_RAS_UC_MEM_DATA_ECC BIT(7) +#define CXL_RAS_UC_REINIT_THRESH BIT(8) +#define CXL_RAS_UC_RSVD_ENCODE BIT(9) +#define CXL_RAS_UC_POISON BIT(10) +#define CXL_RAS_UC_RECV_OVERFLOW BIT(11) +#define CXL_RAS_UC_INTERNAL_ERR BIT(14) +#define CXL_RAS_UC_IDE_TX_ERR BIT(15) +#define CXL_RAS_UC_IDE_RX_ERR BIT(16) + +#define show_uc_errs(status) __print_flags(status, " | ", \ + { CXL_RAS_UC_CACHE_DATA_PARITY, "Cache Data Parity Error" }, \ + { CXL_RAS_UC_CACHE_ADDR_PARITY, "Cache Address Parity Error" }, \ + { CXL_RAS_UC_CACHE_BE_PARITY, "Cache Byte Enable Parity Error" }, \ + { CXL_RAS_UC_CACHE_DATA_ECC, "Cache Data ECC Error" }, \ + { CXL_RAS_UC_MEM_DATA_PARITY, "Memory Data Parity Error" }, \ + { CXL_RAS_UC_MEM_ADDR_PARITY, "Memory Address Parity Error" }, \ + { CXL_RAS_UC_MEM_BE_PARITY, "Memory Byte Enable Parity Error" }, \ + { CXL_RAS_UC_MEM_DATA_ECC, "Memory Data ECC Error" }, \ + { CXL_RAS_UC_REINIT_THRESH, "REINIT Threshold Hit" }, \ + { CXL_RAS_UC_RSVD_ENCODE, "Received Unrecognized Encoding" }, \ + { CXL_RAS_UC_POISON, "Received Poison From Peer" }, \ + { CXL_RAS_UC_RECV_OVERFLOW, "Receiver Overflow" }, \ + { CXL_RAS_UC_INTERNAL_ERR, "Component Specific Error" }, \ + { CXL_RAS_UC_IDE_TX_ERR, "IDE Tx Error" }, \ + { CXL_RAS_UC_IDE_RX_ERR, "IDE Rx Error" } \ +) + +TRACE_EVENT(cxl_aer_uncorrectable_error, + TP_PROTO(const struct device *dev, u32 status, u32 fe, u32 *hl), + TP_ARGS(dev, status, fe, hl), + TP_STRUCT__entry( + __string(dev_name, dev_name(dev)) + __field(u32, status) + __field(u32, first_error) + __array(u32, header_log, CXL_HEADERLOG_SIZE_U32) + ), + TP_fast_assign( + __assign_str(dev_name, dev_name(dev)); + __entry->status = status; + __entry->first_error = fe; + /* + * Embed the 512B headerlog data for user app retrieval and + * parsing, but no need to print this in the trace buffer. + */ + memcpy(__entry->header_log, hl, CXL_HEADERLOG_SIZE); + ), + TP_printk("%s: status: '%s' first_error: '%s'", + __get_str(dev_name), + show_uc_errs(__entry->status), + show_uc_errs(__entry->first_error) + ) +); + +#define CXL_RAS_CE_CACHE_DATA_ECC BIT(0) +#define CXL_RAS_CE_MEM_DATA_ECC BIT(1) +#define CXL_RAS_CE_CRC_THRESH BIT(2) +#define CLX_RAS_CE_RETRY_THRESH BIT(3) +#define CXL_RAS_CE_CACHE_POISON BIT(4) +#define CXL_RAS_CE_MEM_POISON BIT(5) +#define CXL_RAS_CE_PHYS_LAYER_ERR BIT(6) + +#define show_ce_errs(status) __print_flags(status, " | ", \ + { CXL_RAS_CE_CACHE_DATA_ECC, "Cache Data ECC Error" }, \ + { CXL_RAS_CE_MEM_DATA_ECC, "Memory Data ECC Error" }, \ + { CXL_RAS_CE_CRC_THRESH, "CRC Threshold Hit" }, \ + { CLX_RAS_CE_RETRY_THRESH, "Retry Threshold" }, \ + { CXL_RAS_CE_CACHE_POISON, "Received Cache Poison From Peer" }, \ + { CXL_RAS_CE_MEM_POISON, "Received Memory Poison From Peer" }, \ + { CXL_RAS_CE_PHYS_LAYER_ERR, "Received Error From Physical Layer" } \ +) + +TRACE_EVENT(cxl_aer_correctable_error, + TP_PROTO(const struct device *dev, u32 status), + TP_ARGS(dev, status), + TP_STRUCT__entry( + __string(dev_name, dev_name(dev)) + __field(u32, status) + ), + TP_fast_assign( + __assign_str(dev_name, dev_name(dev)); + __entry->status = status; + ), + TP_printk("%s: status: '%s'", + __get_str(dev_name), show_ce_errs(__entry->status) + ) +); + +#endif /* _CXL_EVENTS_H */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE cxl +#include <trace/define_trace.h> diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 57b8e2ffb1dd..82a03ea954af 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -1119,6 +1119,7 @@ #define PCI_DOE_STATUS_DATA_OBJECT_READY 0x80000000 /* Data Object Ready */ #define PCI_DOE_WRITE 0x10 /* DOE Write Data Mailbox Register */ #define PCI_DOE_READ 0x14 /* DOE Read Data Mailbox Register */ +#define PCI_DOE_CAP_SIZEOF 0x18 /* Size of DOE register block */ /* DOE Data Object - note not actually registers */ #define PCI_DOE_DATA_OBJECT_HEADER_1_VID 0x0000ffff diff --git a/kernel/resource.c b/kernel/resource.c index 4c5e80b92f2f..82ed54cd1f0d 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -1707,18 +1707,15 @@ static int strict_iomem_checks; * * Returns true if exclusive to the kernel, otherwise returns false. */ -bool iomem_is_exclusive(u64 addr) +bool resource_is_exclusive(struct resource *root, u64 addr, resource_size_t size) { const unsigned int exclusive_system_ram = IORESOURCE_SYSTEM_RAM | IORESOURCE_EXCLUSIVE; bool skip_children = false, err = false; - int size = PAGE_SIZE; struct resource *p; - addr = addr & PAGE_MASK; - read_lock(&resource_lock); - for_each_resource(&iomem_resource, p, skip_children) { + for_each_resource(root, p, skip_children) { if (p->start >= addr + size) break; if (p->end < addr) { @@ -1757,6 +1754,12 @@ bool iomem_is_exclusive(u64 addr) return err; } +bool iomem_is_exclusive(u64 addr) +{ + return resource_is_exclusive(&iomem_resource, addr & PAGE_MASK, + PAGE_SIZE); +} + struct resource_entry *resource_list_create_entry(struct resource *res, size_t extra_size) { diff --git a/lib/Kconfig b/lib/Kconfig index 9bbf8a4b2108..9eb514abcdec 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -672,6 +672,9 @@ config ARCH_HAS_PMEM_API config MEMREGION bool +config ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION + bool + config ARCH_HAS_MEMREMAP_COMPAT_ALIGN bool diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 500be85729cc..0805f08af8b3 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -10,6 +10,7 @@ ldflags-y += --wrap=devm_cxl_add_passthrough_decoder ldflags-y += --wrap=devm_cxl_enumerate_decoders ldflags-y += --wrap=cxl_await_media_ready ldflags-y += --wrap=cxl_hdm_decode_init +ldflags-y += --wrap=cxl_rcrb_to_component DRIVERS := ../../../drivers CXL_SRC := $(DRIVERS)/cxl @@ -26,6 +27,7 @@ cxl_acpi-y += config_check.o obj-m += cxl_pmem.o cxl_pmem-y := $(CXL_SRC)/pmem.o +cxl_pmem-y += $(CXL_SRC)/security.o cxl_pmem-y += config_check.o obj-m += cxl_port.o diff --git a/tools/testing/cxl/config_check.c b/tools/testing/cxl/config_check.c index de5e5b3652fd..c4c457e59841 100644 --- a/tools/testing/cxl/config_check.c +++ b/tools/testing/cxl/config_check.c @@ -10,4 +10,6 @@ void check(void) BUILD_BUG_ON(!IS_MODULE(CONFIG_CXL_BUS)); BUILD_BUG_ON(!IS_MODULE(CONFIG_CXL_ACPI)); BUILD_BUG_ON(!IS_MODULE(CONFIG_CXL_PMEM)); + BUILD_BUG_ON(!IS_ENABLED(CONFIG_CXL_REGION_INVALIDATION_TEST)); + BUILD_BUG_ON(!IS_ENABLED(CONFIG_NVDIMM_SECURITY_TEST)); } diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index 7edce12fd2ce..30ee680d38ff 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -11,11 +11,15 @@ #include <cxlmem.h> #include "mock.h" +static int interleave_arithmetic; + #define NR_CXL_HOST_BRIDGES 2 #define NR_CXL_SINGLE_HOST 1 +#define NR_CXL_RCH 1 #define NR_CXL_ROOT_PORTS 2 #define NR_CXL_SWITCH_PORTS 2 #define NR_CXL_PORT_DECODERS 8 +#define NR_BRIDGES (NR_CXL_HOST_BRIDGES + NR_CXL_SINGLE_HOST + NR_CXL_RCH) static struct platform_device *cxl_acpi; static struct platform_device *cxl_host_bridge[NR_CXL_HOST_BRIDGES]; @@ -35,6 +39,8 @@ static struct platform_device *cxl_swd_single[NR_MEM_SINGLE]; struct platform_device *cxl_mem[NR_MEM_MULTI]; struct platform_device *cxl_mem_single[NR_MEM_SINGLE]; +static struct platform_device *cxl_rch[NR_CXL_RCH]; +static struct platform_device *cxl_rcd[NR_CXL_RCH]; static inline bool is_multi_bridge(struct device *dev) { @@ -57,7 +63,7 @@ static inline bool is_single_bridge(struct device *dev) } static struct acpi_device acpi0017_mock; -static struct acpi_device host_bridge[NR_CXL_HOST_BRIDGES + NR_CXL_SINGLE_HOST] = { +static struct acpi_device host_bridge[NR_BRIDGES] = { [0] = { .handle = &host_bridge[0], }, @@ -67,7 +73,9 @@ static struct acpi_device host_bridge[NR_CXL_HOST_BRIDGES + NR_CXL_SINGLE_HOST] [2] = { .handle = &host_bridge[2], }, - + [3] = { + .handle = &host_bridge[3], + }, }; static bool is_mock_dev(struct device *dev) @@ -80,6 +88,9 @@ static bool is_mock_dev(struct device *dev) for (i = 0; i < ARRAY_SIZE(cxl_mem_single); i++) if (dev == &cxl_mem_single[i]->dev) return true; + for (i = 0; i < ARRAY_SIZE(cxl_rcd); i++) + if (dev == &cxl_rcd[i]->dev) + return true; if (dev == &cxl_acpi->dev) return true; return false; @@ -101,7 +112,7 @@ static bool is_mock_adev(struct acpi_device *adev) static struct { struct acpi_table_cedt cedt; - struct acpi_cedt_chbs chbs[NR_CXL_HOST_BRIDGES + NR_CXL_SINGLE_HOST]; + struct acpi_cedt_chbs chbs[NR_BRIDGES]; struct { struct acpi_cedt_cfmws cfmws; u32 target[1]; @@ -122,6 +133,26 @@ static struct { struct acpi_cedt_cfmws cfmws; u32 target[1]; } cfmws4; + struct { + struct acpi_cedt_cfmws cfmws; + u32 target[1]; + } cfmws5; + struct { + struct acpi_cedt_cfmws cfmws; + u32 target[1]; + } cfmws6; + struct { + struct acpi_cedt_cfmws cfmws; + u32 target[2]; + } cfmws7; + struct { + struct acpi_cedt_cfmws cfmws; + u32 target[4]; + } cfmws8; + struct { + struct acpi_cedt_cxims cxims; + u64 xormap_list[2]; + } cxims0; } __packed mock_cedt = { .cedt = { .header = { @@ -154,6 +185,14 @@ static struct { .uid = 2, .cxl_version = ACPI_CEDT_CHBS_VERSION_CXL20, }, + .chbs[3] = { + .header = { + .type = ACPI_CEDT_TYPE_CHBS, + .length = sizeof(mock_cedt.chbs[0]), + }, + .uid = 3, + .cxl_version = ACPI_CEDT_CHBS_VERSION_CXL11, + }, .cfmws0 = { .cfmws = { .header = { @@ -229,6 +268,81 @@ static struct { }, .target = { 2 }, }, + .cfmws5 = { + .cfmws = { + .header = { + .type = ACPI_CEDT_TYPE_CFMWS, + .length = sizeof(mock_cedt.cfmws5), + }, + .interleave_ways = 0, + .granularity = 4, + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + ACPI_CEDT_CFMWS_RESTRICT_VOLATILE, + .qtg_id = 5, + .window_size = SZ_256M, + }, + .target = { 3 }, + }, + /* .cfmws6,7,8 use ACPI_CEDT_CFMWS_ARITHMETIC_XOR */ + .cfmws6 = { + .cfmws = { + .header = { + .type = ACPI_CEDT_TYPE_CFMWS, + .length = sizeof(mock_cedt.cfmws6), + }, + .interleave_arithmetic = ACPI_CEDT_CFMWS_ARITHMETIC_XOR, + .interleave_ways = 0, + .granularity = 4, + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + ACPI_CEDT_CFMWS_RESTRICT_PMEM, + .qtg_id = 0, + .window_size = SZ_256M * 8UL, + }, + .target = { 0, }, + }, + .cfmws7 = { + .cfmws = { + .header = { + .type = ACPI_CEDT_TYPE_CFMWS, + .length = sizeof(mock_cedt.cfmws7), + }, + .interleave_arithmetic = ACPI_CEDT_CFMWS_ARITHMETIC_XOR, + .interleave_ways = 1, + .granularity = 0, + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + ACPI_CEDT_CFMWS_RESTRICT_PMEM, + .qtg_id = 1, + .window_size = SZ_256M * 8UL, + }, + .target = { 0, 1, }, + }, + .cfmws8 = { + .cfmws = { + .header = { + .type = ACPI_CEDT_TYPE_CFMWS, + .length = sizeof(mock_cedt.cfmws8), + }, + .interleave_arithmetic = ACPI_CEDT_CFMWS_ARITHMETIC_XOR, + .interleave_ways = 2, + .granularity = 0, + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + ACPI_CEDT_CFMWS_RESTRICT_PMEM, + .qtg_id = 0, + .window_size = SZ_256M * 16UL, + }, + .target = { 0, 1, 0, 1, }, + }, + .cxims0 = { + .cxims = { + .header = { + .type = ACPI_CEDT_TYPE_CXIMS, + .length = sizeof(mock_cedt.cxims0), + }, + .hbig = 0, + .nr_xormaps = 2, + }, + .xormap_list = { 0x404100, 0x808200, }, + }, }; struct acpi_cedt_cfmws *mock_cfmws[] = { @@ -237,6 +351,22 @@ struct acpi_cedt_cfmws *mock_cfmws[] = { [2] = &mock_cedt.cfmws2.cfmws, [3] = &mock_cedt.cfmws3.cfmws, [4] = &mock_cedt.cfmws4.cfmws, + [5] = &mock_cedt.cfmws5.cfmws, + /* Modulo Math above, XOR Math below */ + [6] = &mock_cedt.cfmws6.cfmws, + [7] = &mock_cedt.cfmws7.cfmws, + [8] = &mock_cedt.cfmws8.cfmws, +}; + +static int cfmws_start; +static int cfmws_end; +#define CFMWS_MOD_ARRAY_START 0 +#define CFMWS_MOD_ARRAY_END 5 +#define CFMWS_XOR_ARRAY_START 6 +#define CFMWS_XOR_ARRAY_END 8 + +struct acpi_cedt_cxims *mock_cxims[1] = { + [0] = &mock_cedt.cxims0.cxims, }; struct cxl_mock_res { @@ -262,11 +392,11 @@ static void depopulate_all_mock_resources(void) mutex_unlock(&mock_res_lock); } -static struct cxl_mock_res *alloc_mock_res(resource_size_t size) +static struct cxl_mock_res *alloc_mock_res(resource_size_t size, int align) { struct cxl_mock_res *res = kzalloc(sizeof(*res), GFP_KERNEL); struct genpool_data_align data = { - .align = SZ_256M, + .align = align, }; unsigned long phys; @@ -301,17 +431,17 @@ static int populate_cedt(void) else size = ACPI_CEDT_CHBS_LENGTH_CXL11; - res = alloc_mock_res(size); + res = alloc_mock_res(size, size); if (!res) return -ENOMEM; chbs->base = res->range.start; chbs->length = size; } - for (i = 0; i < ARRAY_SIZE(mock_cfmws); i++) { + for (i = cfmws_start; i <= cfmws_end; i++) { struct acpi_cedt_cfmws *window = mock_cfmws[i]; - res = alloc_mock_res(window->window_size); + res = alloc_mock_res(window->window_size, SZ_256M); if (!res) return -ENOMEM; window->base_hpa = res->range.start; @@ -320,10 +450,12 @@ static int populate_cedt(void) return 0; } +static bool is_mock_port(struct device *dev); + /* - * WARNING, this hack assumes the format of 'struct - * cxl_cfmws_context' and 'struct cxl_chbs_context' share the property that - * the first struct member is the device being probed by the cxl_acpi + * WARNING, this hack assumes the format of 'struct cxl_cfmws_context' + * and 'struct cxl_chbs_context' share the property that the first + * struct member is a cxl_test device being probed by the cxl_acpi * driver. */ struct cxl_cedt_context { @@ -340,7 +472,7 @@ static int mock_acpi_table_parse_cedt(enum acpi_cedt_type id, unsigned long end; int i; - if (dev != &cxl_acpi->dev) + if (!is_mock_port(dev) && !is_mock_dev(dev)) return acpi_table_parse_cedt(id, handler_arg, arg); if (id == ACPI_CEDT_TYPE_CHBS) @@ -351,12 +483,19 @@ static int mock_acpi_table_parse_cedt(enum acpi_cedt_type id, } if (id == ACPI_CEDT_TYPE_CFMWS) - for (i = 0; i < ARRAY_SIZE(mock_cfmws); i++) { + for (i = cfmws_start; i <= cfmws_end; i++) { h = (union acpi_subtable_headers *) mock_cfmws[i]; end = (unsigned long) h + mock_cfmws[i]->header.length; handler_arg(h, arg, end); } + if (id == ACPI_CEDT_TYPE_CXIMS) + for (i = 0; i < ARRAY_SIZE(mock_cxims); i++) { + h = (union acpi_subtable_headers *)mock_cxims[i]; + end = (unsigned long)h + mock_cxims[i]->header.length; + handler_arg(h, arg, end); + } + return 0; } @@ -370,6 +509,10 @@ static bool is_mock_bridge(struct device *dev) for (i = 0; i < ARRAY_SIZE(cxl_hb_single); i++) if (dev == &cxl_hb_single[i]->dev) return true; + for (i = 0; i < ARRAY_SIZE(cxl_rch); i++) + if (dev == &cxl_rch[i]->dev) + return true; + return false; } @@ -439,14 +582,21 @@ mock_acpi_evaluate_integer(acpi_handle handle, acpi_string pathname, return AE_OK; } -static struct pci_bus mock_pci_bus[NR_CXL_HOST_BRIDGES]; -static struct acpi_pci_root mock_pci_root[NR_CXL_HOST_BRIDGES] = { +static struct pci_bus mock_pci_bus[NR_BRIDGES]; +static struct acpi_pci_root mock_pci_root[ARRAY_SIZE(mock_pci_bus)] = { [0] = { .bus = &mock_pci_bus[0], }, [1] = { .bus = &mock_pci_bus[1], }, + [2] = { + .bus = &mock_pci_bus[2], + }, + [3] = { + .bus = &mock_pci_bus[3], + }, + }; static bool is_mock_bus(struct pci_bus *bus) @@ -634,7 +784,6 @@ static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm) static int mock_cxl_port_enumerate_dports(struct cxl_port *port) { - struct device *dev = &port->dev; struct platform_device **array; int i, array_size; @@ -684,19 +833,22 @@ static int mock_cxl_port_enumerate_dports(struct cxl_port *port) dport = devm_cxl_add_dport(port, &pdev->dev, pdev->id, CXL_RESOURCE_NONE); - if (IS_ERR(dport)) { - dev_err(dev, "failed to add dport: %s (%ld)\n", - dev_name(&pdev->dev), PTR_ERR(dport)); + if (IS_ERR(dport)) return PTR_ERR(dport); - } - - dev_dbg(dev, "add dport%d: %s\n", pdev->id, - dev_name(&pdev->dev)); } return 0; } +resource_size_t mock_cxl_rcrb_to_component(struct device *dev, + resource_size_t rcrb, + enum cxl_rcrb which) +{ + dev_dbg(dev, "rcrb: %pa which: %d\n", &rcrb, which); + + return (resource_size_t) which + 1; +} + static struct cxl_mock_ops cxl_mock_ops = { .is_mock_adev = is_mock_adev, .is_mock_bridge = is_mock_bridge, @@ -705,6 +857,7 @@ static struct cxl_mock_ops cxl_mock_ops = { .is_mock_dev = is_mock_dev, .acpi_table_parse_cedt = mock_acpi_table_parse_cedt, .acpi_evaluate_integer = mock_acpi_evaluate_integer, + .cxl_rcrb_to_component = mock_cxl_rcrb_to_component, .acpi_pci_find_root = mock_acpi_pci_find_root, .devm_cxl_port_enumerate_dports = mock_cxl_port_enumerate_dports, .devm_cxl_setup_hdm = mock_cxl_setup_hdm, @@ -729,6 +882,87 @@ static void mock_companion(struct acpi_device *adev, struct device *dev) #define SZ_512G (SZ_64G * 8) #endif +static __init int cxl_rch_init(void) +{ + int rc, i; + + for (i = 0; i < ARRAY_SIZE(cxl_rch); i++) { + int idx = NR_CXL_HOST_BRIDGES + NR_CXL_SINGLE_HOST + i; + struct acpi_device *adev = &host_bridge[idx]; + struct platform_device *pdev; + + pdev = platform_device_alloc("cxl_host_bridge", idx); + if (!pdev) + goto err_bridge; + + mock_companion(adev, &pdev->dev); + rc = platform_device_add(pdev); + if (rc) { + platform_device_put(pdev); + goto err_bridge; + } + + cxl_rch[i] = pdev; + mock_pci_bus[idx].bridge = &pdev->dev; + rc = sysfs_create_link(&pdev->dev.kobj, &pdev->dev.kobj, + "firmware_node"); + if (rc) + goto err_bridge; + } + + for (i = 0; i < ARRAY_SIZE(cxl_rcd); i++) { + int idx = NR_MEM_MULTI + NR_MEM_SINGLE + i; + struct platform_device *rch = cxl_rch[i]; + struct platform_device *pdev; + + pdev = platform_device_alloc("cxl_rcd", idx); + if (!pdev) + goto err_mem; + pdev->dev.parent = &rch->dev; + set_dev_node(&pdev->dev, i % 2); + + rc = platform_device_add(pdev); + if (rc) { + platform_device_put(pdev); + goto err_mem; + } + cxl_rcd[i] = pdev; + } + + return 0; + +err_mem: + for (i = ARRAY_SIZE(cxl_rcd) - 1; i >= 0; i--) + platform_device_unregister(cxl_rcd[i]); +err_bridge: + for (i = ARRAY_SIZE(cxl_rch) - 1; i >= 0; i--) { + struct platform_device *pdev = cxl_rch[i]; + + if (!pdev) + continue; + sysfs_remove_link(&pdev->dev.kobj, "firmware_node"); + platform_device_unregister(cxl_rch[i]); + } + + return rc; +} + +static void cxl_rch_exit(void) +{ + int i; + + for (i = ARRAY_SIZE(cxl_rcd) - 1; i >= 0; i--) + platform_device_unregister(cxl_rcd[i]); + for (i = ARRAY_SIZE(cxl_rch) - 1; i >= 0; i--) { + struct platform_device *pdev = cxl_rch[i]; + + if (!pdev) + continue; + sysfs_remove_link(&pdev->dev.kobj, "firmware_node"); + platform_device_unregister(cxl_rch[i]); + } +} + static __init int cxl_single_init(void) { int i, rc; @@ -751,6 +985,7 @@ static __init int cxl_single_init(void) } cxl_hb_single[i] = pdev; + mock_pci_bus[i + NR_CXL_HOST_BRIDGES].bridge = &pdev->dev; rc = sysfs_create_link(&pdev->dev.kobj, &pdev->dev.kobj, "physical_node"); if (rc) @@ -897,6 +1132,16 @@ static __init int cxl_test_init(void) if (rc) goto err_gen_pool_add; + if (interleave_arithmetic == 1) { + cfmws_start = CFMWS_XOR_ARRAY_START; + cfmws_end = CFMWS_XOR_ARRAY_END; + dev_dbg(NULL, "cxl_test loading xor math option\n"); + } else { + cfmws_start = CFMWS_MOD_ARRAY_START; + cfmws_end = CFMWS_MOD_ARRAY_END; + dev_dbg(NULL, "cxl_test loading modulo math option\n"); + } + rc = populate_cedt(); if (rc) goto err_populate; @@ -917,6 +1162,7 @@ static __init int cxl_test_init(void) } cxl_host_bridge[i] = pdev; + mock_pci_bus[i].bridge = &pdev->dev; rc = sysfs_create_link(&pdev->dev.kobj, &pdev->dev.kobj, "physical_node"); if (rc) @@ -999,9 +1245,13 @@ static __init int cxl_test_init(void) if (rc) goto err_mem; + rc = cxl_rch_init(); + if (rc) + goto err_single; + cxl_acpi = platform_device_alloc("cxl_acpi", 0); if (!cxl_acpi) - goto err_single; + goto err_rch; mock_companion(&acpi0017_mock, &cxl_acpi->dev); acpi0017_mock.dev.bus = &platform_bus_type; @@ -1014,6 +1264,8 @@ static __init int cxl_test_init(void) err_add: platform_device_put(cxl_acpi); +err_rch: + cxl_rch_exit(); err_single: cxl_single_exit(); err_mem: @@ -1051,6 +1303,7 @@ static __exit void cxl_test_exit(void) int i; platform_device_unregister(cxl_acpi); + cxl_rch_exit(); cxl_single_exit(); for (i = ARRAY_SIZE(cxl_mem) - 1; i >= 0; i--) platform_device_unregister(cxl_mem[i]); @@ -1073,6 +1326,8 @@ static __exit void cxl_test_exit(void) unregister_cxl_mock_ops(&cxl_mock_ops); } +module_param(interleave_arithmetic, int, 0000); +MODULE_PARM_DESC(interleave_arithmetic, "Modulo:0, XOR:1"); module_init(cxl_test_init); module_exit(cxl_test_exit); MODULE_LICENSE("GPL v2"); diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index aa2df3a15051..5e4ecd93f1d2 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -65,6 +65,18 @@ static struct { }, }; +#define PASS_TRY_LIMIT 3 + +struct cxl_mockmem_data { + void *lsa; + u32 security_state; + u8 user_pass[NVDIMM_PASSPHRASE_LEN]; + u8 master_pass[NVDIMM_PASSPHRASE_LEN]; + int user_limit; + int master_limit; + +}; + static int mock_gsl(struct cxl_mbox_cmd *cmd) { if (cmd->size_out < sizeof(mock_gsl_payload)) @@ -100,6 +112,24 @@ static int mock_get_log(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) return 0; } +static int mock_rcd_id(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) +{ + struct cxl_mbox_identify id = { + .fw_revision = { "mock fw v1 " }, + .total_capacity = + cpu_to_le64(DEV_SIZE / CXL_CAPACITY_MULTIPLIER), + .volatile_capacity = + cpu_to_le64(DEV_SIZE / CXL_CAPACITY_MULTIPLIER), + }; + + if (cmd->size_out < sizeof(id)) + return -EINVAL; + + memcpy(cmd->payload_out, &id, sizeof(id)); + + return 0; +} + static int mock_id(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) { struct cxl_mbox_identify id = { @@ -137,10 +167,334 @@ static int mock_partition_info(struct cxl_dev_state *cxlds, return 0; } +static int mock_get_security_state(struct cxl_dev_state *cxlds, + struct cxl_mbox_cmd *cmd) +{ + struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev); + + if (cmd->size_in) + return -EINVAL; + + if (cmd->size_out != sizeof(u32)) + return -EINVAL; + + memcpy(cmd->payload_out, &mdata->security_state, sizeof(u32)); + + return 0; +} + +static void master_plimit_check(struct cxl_mockmem_data *mdata) +{ + if (mdata->master_limit == PASS_TRY_LIMIT) + return; + mdata->master_limit++; + if (mdata->master_limit == PASS_TRY_LIMIT) + mdata->security_state |= CXL_PMEM_SEC_STATE_MASTER_PLIMIT; +} + +static void user_plimit_check(struct cxl_mockmem_data *mdata) +{ + if (mdata->user_limit == PASS_TRY_LIMIT) + return; + mdata->user_limit++; + if (mdata->user_limit == PASS_TRY_LIMIT) + mdata->security_state |= CXL_PMEM_SEC_STATE_USER_PLIMIT; +} + +static int mock_set_passphrase(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) +{ + struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev); + struct cxl_set_pass *set_pass; + + if (cmd->size_in != sizeof(*set_pass)) + return -EINVAL; + + if (cmd->size_out != 0) + return -EINVAL; + + if (mdata->security_state & CXL_PMEM_SEC_STATE_FROZEN) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + + set_pass = cmd->payload_in; + switch (set_pass->type) { + case CXL_PMEM_SEC_PASS_MASTER: + if (mdata->security_state & CXL_PMEM_SEC_STATE_MASTER_PLIMIT) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + /* + * CXL spec rev3.0 8.2.9.8.6.2, The master pasphrase shall only be set in + * the security disabled state when the user passphrase is not set. + */ + if (mdata->security_state & CXL_PMEM_SEC_STATE_USER_PASS_SET) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + if (memcmp(mdata->master_pass, set_pass->old_pass, NVDIMM_PASSPHRASE_LEN)) { + master_plimit_check(mdata); + cmd->return_code = CXL_MBOX_CMD_RC_PASSPHRASE; + return -ENXIO; + } + memcpy(mdata->master_pass, set_pass->new_pass, NVDIMM_PASSPHRASE_LEN); + mdata->security_state |= CXL_PMEM_SEC_STATE_MASTER_PASS_SET; + return 0; + + case CXL_PMEM_SEC_PASS_USER: + if (mdata->security_state & CXL_PMEM_SEC_STATE_USER_PLIMIT) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + if (memcmp(mdata->user_pass, set_pass->old_pass, NVDIMM_PASSPHRASE_LEN)) { + user_plimit_check(mdata); + cmd->return_code = CXL_MBOX_CMD_RC_PASSPHRASE; + return -ENXIO; + } + memcpy(mdata->user_pass, set_pass->new_pass, NVDIMM_PASSPHRASE_LEN); + mdata->security_state |= CXL_PMEM_SEC_STATE_USER_PASS_SET; + return 0; + + default: + cmd->return_code = CXL_MBOX_CMD_RC_INPUT; + } + return -EINVAL; +} + +static int mock_disable_passphrase(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) +{ + struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev); + struct cxl_disable_pass *dis_pass; + + if (cmd->size_in != sizeof(*dis_pass)) + return -EINVAL; + + if (cmd->size_out != 0) + return -EINVAL; + + if (mdata->security_state & CXL_PMEM_SEC_STATE_FROZEN) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + + dis_pass = cmd->payload_in; + switch (dis_pass->type) { + case CXL_PMEM_SEC_PASS_MASTER: + if (mdata->security_state & CXL_PMEM_SEC_STATE_MASTER_PLIMIT) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + + if (!(mdata->security_state & CXL_PMEM_SEC_STATE_MASTER_PASS_SET)) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + + if (memcmp(dis_pass->pass, mdata->master_pass, NVDIMM_PASSPHRASE_LEN)) { + master_plimit_check(mdata); + cmd->return_code = CXL_MBOX_CMD_RC_PASSPHRASE; + return -ENXIO; + } + + mdata->master_limit = 0; + memset(mdata->master_pass, 0, NVDIMM_PASSPHRASE_LEN); + mdata->security_state &= ~CXL_PMEM_SEC_STATE_MASTER_PASS_SET; + return 0; + + case CXL_PMEM_SEC_PASS_USER: + if (mdata->security_state & CXL_PMEM_SEC_STATE_USER_PLIMIT) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + + if (!(mdata->security_state & CXL_PMEM_SEC_STATE_USER_PASS_SET)) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + + if (memcmp(dis_pass->pass, mdata->user_pass, NVDIMM_PASSPHRASE_LEN)) { + user_plimit_check(mdata); + cmd->return_code = CXL_MBOX_CMD_RC_PASSPHRASE; + return -ENXIO; + } + + mdata->user_limit = 0; + memset(mdata->user_pass, 0, NVDIMM_PASSPHRASE_LEN); + mdata->security_state &= ~(CXL_PMEM_SEC_STATE_USER_PASS_SET | + CXL_PMEM_SEC_STATE_LOCKED); + return 0; + + default: + cmd->return_code = CXL_MBOX_CMD_RC_INPUT; + return -EINVAL; + } + + return 0; +} + +static int mock_freeze_security(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) +{ + struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev); + + if (cmd->size_in != 0) + return -EINVAL; + + if (cmd->size_out != 0) + return -EINVAL; + + if (mdata->security_state & CXL_PMEM_SEC_STATE_FROZEN) + return 0; + + mdata->security_state |= CXL_PMEM_SEC_STATE_FROZEN; + return 0; +} + +static int mock_unlock_security(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) +{ + struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev); + + if (cmd->size_in != NVDIMM_PASSPHRASE_LEN) + return -EINVAL; + + if (cmd->size_out != 0) + return -EINVAL; + + if (mdata->security_state & CXL_PMEM_SEC_STATE_FROZEN) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + + if (!(mdata->security_state & CXL_PMEM_SEC_STATE_USER_PASS_SET)) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + + if (mdata->security_state & CXL_PMEM_SEC_STATE_USER_PLIMIT) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + + if (!(mdata->security_state & CXL_PMEM_SEC_STATE_LOCKED)) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + + if (memcmp(cmd->payload_in, mdata->user_pass, NVDIMM_PASSPHRASE_LEN)) { + if (++mdata->user_limit == PASS_TRY_LIMIT) + mdata->security_state |= CXL_PMEM_SEC_STATE_USER_PLIMIT; + cmd->return_code = CXL_MBOX_CMD_RC_PASSPHRASE; + return -ENXIO; + } + + mdata->user_limit = 0; + mdata->security_state &= ~CXL_PMEM_SEC_STATE_LOCKED; + return 0; +} + +static int mock_passphrase_secure_erase(struct cxl_dev_state *cxlds, + struct cxl_mbox_cmd *cmd) +{ + struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev); + struct cxl_pass_erase *erase; + + if (cmd->size_in != sizeof(*erase)) + return -EINVAL; + + if (cmd->size_out != 0) + return -EINVAL; + + erase = cmd->payload_in; + if (mdata->security_state & CXL_PMEM_SEC_STATE_FROZEN) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + + if (mdata->security_state & CXL_PMEM_SEC_STATE_USER_PLIMIT && + erase->type == CXL_PMEM_SEC_PASS_USER) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + + if (mdata->security_state & CXL_PMEM_SEC_STATE_MASTER_PLIMIT && + erase->type == CXL_PMEM_SEC_PASS_MASTER) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + + switch (erase->type) { + case CXL_PMEM_SEC_PASS_MASTER: + /* + * The spec does not clearly define the behavior of the scenario + * where a master passphrase is passed in while the master + * passphrase is not set and user passphrase is not set. The + * code will take the assumption that it will behave the same + * as a CXL secure erase command without passphrase (0x4401). + */ + if (mdata->security_state & CXL_PMEM_SEC_STATE_MASTER_PASS_SET) { + if (memcmp(mdata->master_pass, erase->pass, + NVDIMM_PASSPHRASE_LEN)) { + master_plimit_check(mdata); + cmd->return_code = CXL_MBOX_CMD_RC_PASSPHRASE; + return -ENXIO; + } + mdata->master_limit = 0; + mdata->user_limit = 0; + mdata->security_state &= ~CXL_PMEM_SEC_STATE_USER_PASS_SET; + memset(mdata->user_pass, 0, NVDIMM_PASSPHRASE_LEN); + mdata->security_state &= ~CXL_PMEM_SEC_STATE_LOCKED; + } else { + /* + * CXL rev3 8.2.9.8.6.3 Disable Passphrase + * When master passphrase is disabled, the device shall + * return Invalid Input for the Passphrase Secure Erase + * command with master passphrase. + */ + return -EINVAL; + } + /* Scramble encryption keys so that data is effectively erased */ + break; + case CXL_PMEM_SEC_PASS_USER: + /* + * The spec does not clearly define the behavior of the scenario + * where a user passphrase is passed in while the user + * passphrase is not set. The code will take the assumption that + * it will behave the same as a CXL secure erase command without + * passphrase (0x4401). + */ + if (mdata->security_state & CXL_PMEM_SEC_STATE_USER_PASS_SET) { + if (memcmp(mdata->user_pass, erase->pass, + NVDIMM_PASSPHRASE_LEN)) { + user_plimit_check(mdata); + cmd->return_code = CXL_MBOX_CMD_RC_PASSPHRASE; + return -ENXIO; + } + mdata->user_limit = 0; + mdata->security_state &= ~CXL_PMEM_SEC_STATE_USER_PASS_SET; + memset(mdata->user_pass, 0, NVDIMM_PASSPHRASE_LEN); + } + + /* + * CXL rev3 Table 8-118 + * If user passphrase is not set or supported by device, current + * passphrase value is ignored. Will make the assumption that + * the operation will proceed as secure erase w/o passphrase + * since spec is not explicit. + */ + + /* Scramble encryption keys so that data is effectively erased */ + break; + default: + return -EINVAL; + } + + return 0; +} + static int mock_get_lsa(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) { struct cxl_mbox_get_lsa *get_lsa = cmd->payload_in; - void *lsa = dev_get_drvdata(cxlds->dev); + struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev); + void *lsa = mdata->lsa; u32 offset, length; if (sizeof(*get_lsa) > cmd->size_in) @@ -159,7 +513,8 @@ static int mock_get_lsa(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) static int mock_set_lsa(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) { struct cxl_mbox_set_lsa *set_lsa = cmd->payload_in; - void *lsa = dev_get_drvdata(cxlds->dev); + struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev); + void *lsa = mdata->lsa; u32 offset, length; if (sizeof(*set_lsa) > cmd->size_in) @@ -216,7 +571,10 @@ static int cxl_mock_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd * rc = mock_get_log(cxlds, cmd); break; case CXL_MBOX_OP_IDENTIFY: - rc = mock_id(cxlds, cmd); + if (cxlds->rcd) + rc = mock_rcd_id(cxlds, cmd); + else + rc = mock_id(cxlds, cmd); break; case CXL_MBOX_OP_GET_LSA: rc = mock_get_lsa(cxlds, cmd); @@ -230,6 +588,24 @@ static int cxl_mock_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd * case CXL_MBOX_OP_GET_HEALTH_INFO: rc = mock_health_info(cxlds, cmd); break; + case CXL_MBOX_OP_GET_SECURITY_STATE: + rc = mock_get_security_state(cxlds, cmd); + break; + case CXL_MBOX_OP_SET_PASSPHRASE: + rc = mock_set_passphrase(cxlds, cmd); + break; + case CXL_MBOX_OP_DISABLE_PASSPHRASE: + rc = mock_disable_passphrase(cxlds, cmd); + break; + case CXL_MBOX_OP_FREEZE_SECURITY: + rc = mock_freeze_security(cxlds, cmd); + break; + case CXL_MBOX_OP_UNLOCK: + rc = mock_unlock_security(cxlds, cmd); + break; + case CXL_MBOX_OP_PASSPHRASE_SECURE_ERASE: + rc = mock_passphrase_secure_erase(cxlds, cmd); + break; default: break; } @@ -245,21 +621,32 @@ static void label_area_release(void *lsa) vfree(lsa); } +static bool is_rcd(struct platform_device *pdev) +{ + const struct platform_device_id *id = platform_get_device_id(pdev); + + return !!id->driver_data; +} + static int cxl_mock_mem_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct cxl_memdev *cxlmd; struct cxl_dev_state *cxlds; - void *lsa; + struct cxl_mockmem_data *mdata; int rc; - lsa = vmalloc(LSA_SIZE); - if (!lsa) + mdata = devm_kzalloc(dev, sizeof(*mdata), GFP_KERNEL); + if (!mdata) return -ENOMEM; - rc = devm_add_action_or_reset(dev, label_area_release, lsa); + dev_set_drvdata(dev, mdata); + + mdata->lsa = vmalloc(LSA_SIZE); + if (!mdata->lsa) + return -ENOMEM; + rc = devm_add_action_or_reset(dev, label_area_release, mdata->lsa); if (rc) return rc; - dev_set_drvdata(dev, lsa); cxlds = cxl_dev_state_create(dev); if (IS_ERR(cxlds)) @@ -268,6 +655,10 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) cxlds->serial = pdev->id; cxlds->mbox_send = cxl_mock_mbox_send; cxlds->payload_size = SZ_4K; + if (is_rcd(pdev)) { + cxlds->rcd = true; + cxlds->component_reg_phys = CXL_RESOURCE_NONE; + } rc = cxl_enumerate_cmds(cxlds); if (rc) @@ -285,14 +676,51 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) if (IS_ERR(cxlmd)) return PTR_ERR(cxlmd); - if (resource_size(&cxlds->pmem_res) && IS_ENABLED(CONFIG_CXL_PMEM)) - rc = devm_cxl_add_nvdimm(dev, cxlmd); - return 0; } +static ssize_t security_lock_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cxl_mockmem_data *mdata = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%u\n", + !!(mdata->security_state & CXL_PMEM_SEC_STATE_LOCKED)); +} + +static ssize_t security_lock_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cxl_mockmem_data *mdata = dev_get_drvdata(dev); + u32 mask = CXL_PMEM_SEC_STATE_FROZEN | CXL_PMEM_SEC_STATE_USER_PLIMIT | + CXL_PMEM_SEC_STATE_MASTER_PLIMIT; + int val; + + if (kstrtoint(buf, 0, &val) < 0) + return -EINVAL; + + if (val == 1) { + if (!(mdata->security_state & CXL_PMEM_SEC_STATE_USER_PASS_SET)) + return -ENXIO; + mdata->security_state |= CXL_PMEM_SEC_STATE_LOCKED; + mdata->security_state &= ~mask; + } else { + return -EINVAL; + } + return count; +} + +static DEVICE_ATTR_RW(security_lock); + +static struct attribute *cxl_mock_mem_attrs[] = { + &dev_attr_security_lock.attr, + NULL +}; +ATTRIBUTE_GROUPS(cxl_mock_mem); + static const struct platform_device_id cxl_mock_mem_ids[] = { - { .name = "cxl_mem", }, + { .name = "cxl_mem", 0 }, + { .name = "cxl_rcd", 1 }, { }, }; MODULE_DEVICE_TABLE(platform, cxl_mock_mem_ids); @@ -302,6 +730,7 @@ static struct platform_driver cxl_mock_mem_driver = { .id_table = cxl_mock_mem_ids, .driver = { .name = KBUILD_MODNAME, + .dev_groups = cxl_mock_mem_groups, }, }; diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index bce6a21df0d5..5dface08e0de 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -224,6 +224,25 @@ int __wrap_cxl_hdm_decode_init(struct cxl_dev_state *cxlds, } EXPORT_SYMBOL_NS_GPL(__wrap_cxl_hdm_decode_init, CXL); +resource_size_t __wrap_cxl_rcrb_to_component(struct device *dev, + resource_size_t rcrb, + enum cxl_rcrb which) +{ + int index; + resource_size_t component_reg_phys; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + + if (ops && ops->is_mock_port(dev)) + component_reg_phys = + ops->cxl_rcrb_to_component(dev, rcrb, which); + else + component_reg_phys = cxl_rcrb_to_component(dev, rcrb, which); + put_cxl_mock_ops(index); + + return component_reg_phys; +} +EXPORT_SYMBOL_NS_GPL(__wrap_cxl_rcrb_to_component, CXL); + MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(ACPI); MODULE_IMPORT_NS(CXL); diff --git a/tools/testing/cxl/test/mock.h b/tools/testing/cxl/test/mock.h index 738f24e3988a..ef33f159375e 100644 --- a/tools/testing/cxl/test/mock.h +++ b/tools/testing/cxl/test/mock.h @@ -15,6 +15,9 @@ struct cxl_mock_ops { acpi_string pathname, struct acpi_object_list *arguments, unsigned long long *data); + resource_size_t (*cxl_rcrb_to_component)(struct device *dev, + resource_size_t rcrb, + enum cxl_rcrb which); struct acpi_pci_root *(*acpi_pci_find_root)(acpi_handle handle); bool (*is_mock_bus)(struct pci_bus *bus); bool (*is_mock_port)(struct device *dev); diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild index 5eb5c23b062f..8153251ea389 100644 --- a/tools/testing/nvdimm/Kbuild +++ b/tools/testing/nvdimm/Kbuild @@ -79,7 +79,6 @@ libnvdimm-$(CONFIG_BTT) += $(NVDIMM_SRC)/btt_devs.o libnvdimm-$(CONFIG_NVDIMM_PFN) += $(NVDIMM_SRC)/pfn_devs.o libnvdimm-$(CONFIG_NVDIMM_DAX) += $(NVDIMM_SRC)/dax_devs.o libnvdimm-$(CONFIG_NVDIMM_KEYS) += $(NVDIMM_SRC)/security.o -libnvdimm-y += dimm_devs.o libnvdimm-y += libnvdimm_test.o libnvdimm-y += config_check.o diff --git a/tools/testing/nvdimm/dimm_devs.c b/tools/testing/nvdimm/dimm_devs.c deleted file mode 100644 index 57bd27dedf1f..000000000000 --- a/tools/testing/nvdimm/dimm_devs.c +++ /dev/null @@ -1,30 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright Intel Corp. 2018 */ -#include <linux/init.h> -#include <linux/module.h> -#include <linux/moduleparam.h> -#include <linux/nd.h> -#include "pmem.h" -#include "pfn.h" -#include "nd.h" -#include "nd-core.h" - -ssize_t security_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct nvdimm *nvdimm = to_nvdimm(dev); - - /* - * For the test version we need to poll the "hardware" in order - * to get the updated status for unlock testing. - */ - nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER); - - if (test_bit(NVDIMM_SECURITY_DISABLED, &nvdimm->sec.flags)) - return sprintf(buf, "disabled\n"); - if (test_bit(NVDIMM_SECURITY_UNLOCKED, &nvdimm->sec.flags)) - return sprintf(buf, "unlocked\n"); - if (test_bit(NVDIMM_SECURITY_LOCKED, &nvdimm->sec.flags)) - return sprintf(buf, "locked\n"); - return -ENOTTY; -} |