summaryrefslogtreecommitdiffstats
path: root/drivers/cxl/core/pci.c
diff options
context:
space:
mode:
authorDan Williams <dan.j.williams@intel.com>2022-12-08 09:02:00 -0800
committerDan Williams <dan.j.williams@intel.com>2023-01-04 17:11:11 -0800
commit4a20bc3e207488064e08fc5d7220d6acf95c80dd (patch)
tree2e568943c101fc5108c7d025eafde5219048ae63 /drivers/cxl/core/pci.c
parent88603b6dc419445847923fcb7fe5080067a30f98 (diff)
downloadlinux-stable-4a20bc3e207488064e08fc5d7220d6acf95c80dd.tar.gz
linux-stable-4a20bc3e207488064e08fc5d7220d6acf95c80dd.tar.bz2
linux-stable-4a20bc3e207488064e08fc5d7220d6acf95c80dd.zip
cxl/pci: Move tracepoint definitions to drivers/cxl/core/
CXL is using tracepoints for reporting RAS capability register payloads for AER events, and has plans to use tracepoints for the output payload of Get Poison List and Get Event Records commands. For organization purposes it would be nice to keep those all under a single + local CXL trace system. This also organization also potentially helps in the future when CXL drivers expand beyond generic memory expanders, however that would also entail a move away from the expander-specific cxl_dev_state context, save that for later. Note that the powerpc-specific drivers/misc/cxl/ also defines a 'cxl' trace system, however, it is unlikely that a single platform will ever load both drivers simultaneously. Cc: Steven Rostedt <rostedt@goodmis.org> Tested-by: Alison Schofield <alison.schofield@intel.com> Reviewed-by: Dave Jiang <dave.jiang@intel.com> Link: https://lore.kernel.org/r/167051869176.436579.9728373544811641087.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Diffstat (limited to 'drivers/cxl/core/pci.c')
-rw-r--r--drivers/cxl/core/pci.c112
1 files changed, 112 insertions, 0 deletions
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 57764e9cd19d..1d1492440287 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -9,6 +9,7 @@
#include <cxlmem.h>
#include <cxl.h>
#include "core.h"
+#include "trace.h"
/**
* DOC: cxl core pci
@@ -622,3 +623,114 @@ void read_cdat_data(struct cxl_port *port)
}
}
EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL);
+
+void cxl_cor_error_detected(struct pci_dev *pdev)
+{
+ struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
+ struct cxl_memdev *cxlmd = cxlds->cxlmd;
+ struct device *dev = &cxlmd->dev;
+ void __iomem *addr;
+ u32 status;
+
+ if (!cxlds->regs.ras)
+ return;
+
+ addr = cxlds->regs.ras + CXL_RAS_CORRECTABLE_STATUS_OFFSET;
+ status = readl(addr);
+ if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) {
+ writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr);
+ trace_cxl_aer_correctable_error(dev, status);
+ }
+}
+EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, CXL);
+
+/* CXL spec rev3.0 8.2.4.16.1 */
+static void header_log_copy(struct cxl_dev_state *cxlds, u32 *log)
+{
+ void __iomem *addr;
+ u32 *log_addr;
+ int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32);
+
+ addr = cxlds->regs.ras + CXL_RAS_HEADER_LOG_OFFSET;
+ log_addr = log;
+
+ for (i = 0; i < log_u32_size; i++) {
+ *log_addr = readl(addr);
+ log_addr++;
+ addr += sizeof(u32);
+ }
+}
+
+/*
+ * Log the state of the RAS status registers and prepare them to log the
+ * next error status. Return 1 if reset needed.
+ */
+static bool cxl_report_and_clear(struct cxl_dev_state *cxlds)
+{
+ struct cxl_memdev *cxlmd = cxlds->cxlmd;
+ struct device *dev = &cxlmd->dev;
+ u32 hl[CXL_HEADERLOG_SIZE_U32];
+ void __iomem *addr;
+ u32 status;
+ u32 fe;
+
+ if (!cxlds->regs.ras)
+ return false;
+
+ addr = cxlds->regs.ras + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET;
+ status = readl(addr);
+ if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK))
+ return false;
+
+ /* If multiple errors, log header points to first error from ctrl reg */
+ if (hweight32(status) > 1) {
+ addr = cxlds->regs.ras + CXL_RAS_CAP_CONTROL_OFFSET;
+ fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, readl(addr)));
+ } else {
+ fe = status;
+ }
+
+ header_log_copy(cxlds, hl);
+ trace_cxl_aer_uncorrectable_error(dev, status, fe, hl);
+ writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr);
+
+ return true;
+}
+
+pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
+ pci_channel_state_t state)
+{
+ struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
+ struct cxl_memdev *cxlmd = cxlds->cxlmd;
+ struct device *dev = &cxlmd->dev;
+ bool ue;
+
+ /*
+ * A frozen channel indicates an impending reset which is fatal to
+ * CXL.mem operation, and will likely crash the system. On the off
+ * chance the situation is recoverable dump the status of the RAS
+ * capability registers and bounce the active state of the memdev.
+ */
+ ue = cxl_report_and_clear(cxlds);
+
+ switch (state) {
+ case pci_channel_io_normal:
+ if (ue) {
+ device_release_driver(dev);
+ return PCI_ERS_RESULT_NEED_RESET;
+ }
+ return PCI_ERS_RESULT_CAN_RECOVER;
+ case pci_channel_io_frozen:
+ dev_warn(&pdev->dev,
+ "%s: frozen state error detected, disable CXL.mem\n",
+ dev_name(dev));
+ device_release_driver(dev);
+ return PCI_ERS_RESULT_NEED_RESET;
+ case pci_channel_io_perm_failure:
+ dev_warn(&pdev->dev,
+ "failure state error detected, request disconnect\n");
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
+ return PCI_ERS_RESULT_NEED_RESET;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_error_detected, CXL);