scsi: lpfc: Change default IRQ model on AMD architectures

The current driver attempts to allocate an interrupt vector per cpu using the systems managed IRQ allocator (flag PCI_IRQ_AFFINITY). The system IRQ allocator will either provide the per-cpu vector, or return fewer vectors. When fewer vectors, they are evenly spread between the numa nodes on the system. When run on an AMD architecture, if interrupts occur to a cpu that is not in the same numa node as the adapter generating the interrupt, there are extreme costs and overheads in performance. Thus, if 1:1 vector allocation is used, or the "balanced" vectors in the other numa nodes, performance can be hit significantly. A much more performant model is to allocate interrupts only on the cpus that are in the numa node where the adapter resides. I/O completion is still performed by the cpu where the I/O was generated. Unfortunately, there is no flag to request the managed IRQ subsystem allocate vectors only for the CPUs in the numa node as the adapter. On AMD architecture, revert the irq allocation to the normal style (non-managed) and then use irq_set_affinity_hint() to set the cpu affinity and disable user-space rebalancing. Tie the support into CPU offline/online. If the cpu being offlined owns a vector, the vector is re-affinitized to one of the other CPUs on the same numa node. If there are no more CPUs on the numa node, the vector has all affinity removed and lets the system determine where it's serviced. Similarly, when the cpu that owned a vector comes online, the vector is reaffinitized to the cpu. Link: https://lore.kernel.org/r/20191105005708.7399-10-jsmart2021@gmail.com Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com> Signed-off-by: James Smart <jsmart2021@gmail.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
author: James Smart <jsmart2021@gmail.com> 2019-11-04 16:57:06 -0800
committer: Martin K. Petersen <martin.petersen@oracle.com> 2019-11-06 00:04:04 -0500
commit: dcaa213679387e95a315dca05c57dbb15273703c (patch)
tree: 841877bd096a0e4d1d633e29b5213395604efceb /drivers/scsi/lpfc/lpfc_sli4.h
parent: 93a4d6f40198dffcca35d9a928c409f9290f1fe0 (diff)
download: linux-stable-dcaa213679387e95a315dca05c57dbb15273703c.tar.gz
linux-stable-dcaa213679387e95a315dca05c57dbb15273703c.tar.bz2
linux-stable-dcaa213679387e95a315dca05c57dbb15273703c.zip
1 files changed, 15 insertions, 4 deletions
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index ef32159248d7..d963ca871383 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -41,8 +41,13 @@
 
 /* Multi-queue arrangement for FCP EQ/CQ/WQ tuples */
 #define LPFC_HBA_HDWQ_MIN	0
-#define LPFC_HBA_HDWQ_MAX	128
-#define LPFC_HBA_HDWQ_DEF	0
+#define LPFC_HBA_HDWQ_MAX	256
+#define LPFC_HBA_HDWQ_DEF	LPFC_HBA_HDWQ_MIN
+
+/* irq_chann range, values */
+#define LPFC_IRQ_CHANN_MIN	0
+#define LPFC_IRQ_CHANN_MAX	256
+#define LPFC_IRQ_CHANN_DEF	LPFC_IRQ_CHANN_MIN
 
 /* FCP MQ queue count limiting */
 #define LPFC_FCP_MQ_THRESHOLD_MIN	0
@@ -467,11 +472,17 @@ struct lpfc_hba;
 #define LPFC_SLI4_HANDLER_NAME_SZ	16
 struct lpfc_hba_eq_hdl {
 	uint32_t idx;
+	uint16_t irq;
 	char handler_name[LPFC_SLI4_HANDLER_NAME_SZ];
 	struct lpfc_hba *phba;
 	struct lpfc_queue *eq;
+	struct cpumask aff_mask;
 };
 
+#define lpfc_get_eq_hdl(eqidx) (&phba->sli4_hba.hba_eq_hdl[eqidx])
+#define lpfc_get_aff_mask(eqidx) (&phba->sli4_hba.hba_eq_hdl[eqidx].aff_mask)
+#define lpfc_get_irq(eqidx) (phba->sli4_hba.hba_eq_hdl[eqidx].irq)
+
 /*BB Credit recovery value*/
 struct lpfc_bbscn_params {
 	uint32_t word0;
@@ -561,11 +572,10 @@ struct lpfc_sli4_lnk_info {
 #define LPFC_SLI4_HANDLER_CNT		(LPFC_HBA_IO_CHAN_MAX+ \
 					 LPFC_FOF_IO_CHAN_NUM)
 
-/* Used for IRQ vector to CPU mapping */
+/* Used for tracking CPU mapping attributes */
 struct lpfc_vector_map_info {
 	uint16_t	phys_id;
 	uint16_t	core_id;
-	uint16_t	irq;
 	uint16_t	eq;
 	uint16_t	hdwq;
 	uint16_t	flag;
@@ -908,6 +918,7 @@ struct lpfc_sli4_hba {
 	struct lpfc_vector_map_info *cpu_map;
 	uint16_t num_possible_cpu;
 	uint16_t num_present_cpu;
+	struct cpumask numa_mask;
 	uint16_t curr_disp_cpu;
 	struct lpfc_eq_intr_info __percpu *eq_info;
 	uint32_t conf_trunk;
author	James Smart <jsmart2021@gmail.com>	2019-11-04 16:57:06 -0800
committer	Martin K. Petersen <martin.petersen@oracle.com>	2019-11-06 00:04:04 -0500
commit	dcaa213679387e95a315dca05c57dbb15273703c (patch)
tree	841877bd096a0e4d1d633e29b5213395604efceb /drivers/scsi/lpfc/lpfc_sli4.h
parent	93a4d6f40198dffcca35d9a928c409f9290f1fe0 (diff)
download	linux-stable-dcaa213679387e95a315dca05c57dbb15273703c.tar.gz linux-stable-dcaa213679387e95a315dca05c57dbb15273703c.tar.bz2 linux-stable-dcaa213679387e95a315dca05c57dbb15273703c.zip