diff options
author | James Smart <jsmart2021@gmail.com> | 2019-11-04 16:57:06 -0800 |
---|---|---|
committer | Martin K. Petersen <martin.petersen@oracle.com> | 2019-11-06 00:04:04 -0500 |
commit | dcaa213679387e95a315dca05c57dbb15273703c (patch) | |
tree | 841877bd096a0e4d1d633e29b5213395604efceb /drivers/scsi/lpfc/lpfc_sli4.h | |
parent | 93a4d6f40198dffcca35d9a928c409f9290f1fe0 (diff) | |
download | linux-stable-dcaa213679387e95a315dca05c57dbb15273703c.tar.gz linux-stable-dcaa213679387e95a315dca05c57dbb15273703c.tar.bz2 linux-stable-dcaa213679387e95a315dca05c57dbb15273703c.zip |
scsi: lpfc: Change default IRQ model on AMD architectures
The current driver attempts to allocate an interrupt vector per cpu using
the systems managed IRQ allocator (flag PCI_IRQ_AFFINITY). The system IRQ
allocator will either provide the per-cpu vector, or return fewer
vectors. When fewer vectors, they are evenly spread between the numa nodes
on the system. When run on an AMD architecture, if interrupts occur to a
cpu that is not in the same numa node as the adapter generating the
interrupt, there are extreme costs and overheads in performance. Thus, if
1:1 vector allocation is used, or the "balanced" vectors in the other numa
nodes, performance can be hit significantly.
A much more performant model is to allocate interrupts only on the cpus
that are in the numa node where the adapter resides. I/O completion is
still performed by the cpu where the I/O was generated. Unfortunately,
there is no flag to request the managed IRQ subsystem allocate vectors only
for the CPUs in the numa node as the adapter.
On AMD architecture, revert the irq allocation to the normal style
(non-managed) and then use irq_set_affinity_hint() to set the cpu
affinity and disable user-space rebalancing.
Tie the support into CPU offline/online. If the cpu being offlined owns a
vector, the vector is re-affinitized to one of the other CPUs on the same
numa node. If there are no more CPUs on the numa node, the vector has all
affinity removed and lets the system determine where it's serviced.
Similarly, when the cpu that owned a vector comes online, the vector is
reaffinitized to the cpu.
Link: https://lore.kernel.org/r/20191105005708.7399-10-jsmart2021@gmail.com
Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Diffstat (limited to 'drivers/scsi/lpfc/lpfc_sli4.h')
-rw-r--r-- | drivers/scsi/lpfc/lpfc_sli4.h | 19 |
1 files changed, 15 insertions, 4 deletions
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h index ef32159248d7..d963ca871383 100644 --- a/drivers/scsi/lpfc/lpfc_sli4.h +++ b/drivers/scsi/lpfc/lpfc_sli4.h @@ -41,8 +41,13 @@ /* Multi-queue arrangement for FCP EQ/CQ/WQ tuples */ #define LPFC_HBA_HDWQ_MIN 0 -#define LPFC_HBA_HDWQ_MAX 128 -#define LPFC_HBA_HDWQ_DEF 0 +#define LPFC_HBA_HDWQ_MAX 256 +#define LPFC_HBA_HDWQ_DEF LPFC_HBA_HDWQ_MIN + +/* irq_chann range, values */ +#define LPFC_IRQ_CHANN_MIN 0 +#define LPFC_IRQ_CHANN_MAX 256 +#define LPFC_IRQ_CHANN_DEF LPFC_IRQ_CHANN_MIN /* FCP MQ queue count limiting */ #define LPFC_FCP_MQ_THRESHOLD_MIN 0 @@ -467,11 +472,17 @@ struct lpfc_hba; #define LPFC_SLI4_HANDLER_NAME_SZ 16 struct lpfc_hba_eq_hdl { uint32_t idx; + uint16_t irq; char handler_name[LPFC_SLI4_HANDLER_NAME_SZ]; struct lpfc_hba *phba; struct lpfc_queue *eq; + struct cpumask aff_mask; }; +#define lpfc_get_eq_hdl(eqidx) (&phba->sli4_hba.hba_eq_hdl[eqidx]) +#define lpfc_get_aff_mask(eqidx) (&phba->sli4_hba.hba_eq_hdl[eqidx].aff_mask) +#define lpfc_get_irq(eqidx) (phba->sli4_hba.hba_eq_hdl[eqidx].irq) + /*BB Credit recovery value*/ struct lpfc_bbscn_params { uint32_t word0; @@ -561,11 +572,10 @@ struct lpfc_sli4_lnk_info { #define LPFC_SLI4_HANDLER_CNT (LPFC_HBA_IO_CHAN_MAX+ \ LPFC_FOF_IO_CHAN_NUM) -/* Used for IRQ vector to CPU mapping */ +/* Used for tracking CPU mapping attributes */ struct lpfc_vector_map_info { uint16_t phys_id; uint16_t core_id; - uint16_t irq; uint16_t eq; uint16_t hdwq; uint16_t flag; @@ -908,6 +918,7 @@ struct lpfc_sli4_hba { struct lpfc_vector_map_info *cpu_map; uint16_t num_possible_cpu; uint16_t num_present_cpu; + struct cpumask numa_mask; uint16_t curr_disp_cpu; struct lpfc_eq_intr_info __percpu *eq_info; uint32_t conf_trunk; |