summaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw')
-rw-r--r--drivers/infiniband/hw/bnxt_re/bnxt_re.h71
-rw-r--r--drivers/infiniband/hw/bnxt_re/hw_counters.c88
-rw-r--r--drivers/infiniband/hw/bnxt_re/hw_counters.h55
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.c763
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.h34
-rw-r--r--drivers/infiniband/hw/bnxt_re/main.c743
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.c340
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.h66
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.c98
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.h4
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_res.c42
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_res.h157
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.c114
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.h14
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_tlv.h2
-rw-r--r--drivers/infiniband/hw/bnxt_re/roce_hsi.h191
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c8
-rw-r--r--drivers/infiniband/hw/cxgb4/cq.c11
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h44
-rw-r--r--drivers/infiniband/hw/cxgb4/provider.c67
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c34
-rw-r--r--drivers/infiniband/hw/efa/efa.h17
-rw-r--r--drivers/infiniband/hw/efa/efa_admin_cmds_defs.h60
-rw-r--r--drivers/infiniband/hw/efa/efa_com.c30
-rw-r--r--drivers/infiniband/hw/efa/efa_com_cmd.c21
-rw-r--r--drivers/infiniband/hw/efa/efa_com_cmd.h22
-rw-r--r--drivers/infiniband/hw/efa/efa_main.c83
-rw-r--r--drivers/infiniband/hw/efa/efa_verbs.c127
-rw-r--r--drivers/infiniband/hw/erdma/erdma.h18
-rw-r--r--drivers/infiniband/hw/erdma/erdma_cmdq.c97
-rw-r--r--drivers/infiniband/hw/erdma/erdma_cq.c2
-rw-r--r--drivers/infiniband/hw/erdma/erdma_eq.c91
-rw-r--r--drivers/infiniband/hw/erdma/erdma_hw.h65
-rw-r--r--drivers/infiniband/hw/erdma/erdma_main.c42
-rw-r--r--drivers/infiniband/hw/erdma/erdma_qp.c6
-rw-r--r--drivers/infiniband/hw/erdma/erdma_verbs.c660
-rw-r--r--drivers/infiniband/hw/erdma/erdma_verbs.h59
-rw-r--r--drivers/infiniband/hw/hfi1/Makefile1
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.c6
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.h2
-rw-r--r--drivers/infiniband/hw/hfi1/aspm.h2
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c49
-rw-r--r--drivers/infiniband/hw/hfi1/chip.h2
-rw-r--r--drivers/infiniband/hw/hfi1/chip_registers.h2
-rw-r--r--drivers/infiniband/hw/hfi1/common.h2
-rw-r--r--drivers/infiniband/hw/hfi1/debugfs.c2
-rw-r--r--drivers/infiniband/hw/hfi1/debugfs.h2
-rw-r--r--drivers/infiniband/hw/hfi1/device.c74
-rw-r--r--drivers/infiniband/hw/hfi1/device.h2
-rw-r--r--drivers/infiniband/hw/hfi1/driver.c2
-rw-r--r--drivers/infiniband/hw/hfi1/efivar.c4
-rw-r--r--drivers/infiniband/hw/hfi1/efivar.h2
-rw-r--r--drivers/infiniband/hw/hfi1/eprom.c2
-rw-r--r--drivers/infiniband/hw/hfi1/eprom.h2
-rw-r--r--drivers/infiniband/hw/hfi1/exp_rcv.c2
-rw-r--r--drivers/infiniband/hw/hfi1/exp_rcv.h2
-rw-r--r--drivers/infiniband/hw/hfi1/fault.c3
-rw-r--r--drivers/infiniband/hw/hfi1/fault.h2
-rw-r--r--drivers/infiniband/hw/hfi1/file_ops.c4
-rw-r--r--drivers/infiniband/hw/hfi1/firmware.c2
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h8
-rw-r--r--drivers/infiniband/hw/hfi1/init.c3
-rw-r--r--drivers/infiniband/hw/hfi1/intr.c2
-rw-r--r--drivers/infiniband/hw/hfi1/iowait.h2
-rw-r--r--drivers/infiniband/hw/hfi1/ipoib_main.c20
-rw-r--r--drivers/infiniband/hw/hfi1/ipoib_tx.c2
-rw-r--r--drivers/infiniband/hw/hfi1/mad.c2
-rw-r--r--drivers/infiniband/hw/hfi1/mad.h2
-rw-r--r--drivers/infiniband/hw/hfi1/mmu_rb.c4
-rw-r--r--drivers/infiniband/hw/hfi1/mmu_rb.h6
-rw-r--r--drivers/infiniband/hw/hfi1/netdev.h2
-rw-r--r--drivers/infiniband/hw/hfi1/netdev_rx.c9
-rw-r--r--drivers/infiniband/hw/hfi1/opa_compat.h2
-rw-r--r--drivers/infiniband/hw/hfi1/pcie.c41
-rw-r--r--drivers/infiniband/hw/hfi1/pin_system.c474
-rw-r--r--drivers/infiniband/hw/hfi1/pinning.h20
-rw-r--r--drivers/infiniband/hw/hfi1/pio.c17
-rw-r--r--drivers/infiniband/hw/hfi1/pio.h2
-rw-r--r--drivers/infiniband/hw/hfi1/pio_copy.c2
-rw-r--r--drivers/infiniband/hw/hfi1/platform.c2
-rw-r--r--drivers/infiniband/hw/hfi1/platform.h2
-rw-r--r--drivers/infiniband/hw/hfi1/qp.c2
-rw-r--r--drivers/infiniband/hw/hfi1/qp.h2
-rw-r--r--drivers/infiniband/hw/hfi1/qsfp.c2
-rw-r--r--drivers/infiniband/hw/hfi1/qsfp.h2
-rw-r--r--drivers/infiniband/hw/hfi1/rc.c2
-rw-r--r--drivers/infiniband/hw/hfi1/ruc.c2
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c4
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.h2
-rw-r--r--drivers/infiniband/hw/hfi1/sdma_txreq.h2
-rw-r--r--drivers/infiniband/hw/hfi1/sysfs.c2
-rw-r--r--drivers/infiniband/hw/hfi1/tid_rdma.c25
-rw-r--r--drivers/infiniband/hw/hfi1/trace.c2
-rw-r--r--drivers/infiniband/hw/hfi1/trace.h2
-rw-r--r--drivers/infiniband/hw/hfi1/trace_ctxts.h2
-rw-r--r--drivers/infiniband/hw/hfi1/trace_dbg.h4
-rw-r--r--drivers/infiniband/hw/hfi1/trace_ibhdrs.h2
-rw-r--r--drivers/infiniband/hw/hfi1/trace_misc.h2
-rw-r--r--drivers/infiniband/hw/hfi1/trace_mmu.h2
-rw-r--r--drivers/infiniband/hw/hfi1/trace_rc.h2
-rw-r--r--drivers/infiniband/hw/hfi1/trace_rx.h4
-rw-r--r--drivers/infiniband/hw/hfi1/trace_tid.h4
-rw-r--r--drivers/infiniband/hw/hfi1/trace_tx.h6
-rw-r--r--drivers/infiniband/hw/hfi1/uc.c2
-rw-r--r--drivers/infiniband/hw/hfi1/ud.c2
-rw-r--r--drivers/infiniband/hw/hfi1/user_exp_rcv.c6
-rw-r--r--drivers/infiniband/hw/hfi1/user_exp_rcv.h4
-rw-r--r--drivers/infiniband/hw/hfi1/user_pages.c2
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.c447
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.h19
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c2
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.h2
-rw-r--r--drivers/infiniband/hw/hfi1/verbs_txreq.c2
-rw-r--r--drivers/infiniband/hw/hfi1/verbs_txreq.h2
-rw-r--r--drivers/infiniband/hw/hfi1/vnic.h2
-rw-r--r--drivers/infiniband/hw/hfi1/vnic_main.c2
-rw-r--r--drivers/infiniband/hw/hfi1/vnic_sdma.c2
-rw-r--r--drivers/infiniband/hw/hns/Makefile3
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_ah.c51
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_alloc.c3
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cmd.c19
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cmd.h3
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cq.c56
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_debugfs.c110
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_debugfs.h33
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_device.h125
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hem.c136
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hem.h68
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c788
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.h44
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_main.c169
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_mr.c391
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_pd.c12
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_qp.c147
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_restrack.c147
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_srq.c105
-rw-r--r--drivers/infiniband/hw/irdma/cm.c95
-rw-r--r--drivers/infiniband/hw/irdma/cm.h2
-rw-r--r--drivers/infiniband/hw/irdma/ctrl.c25
-rw-r--r--drivers/infiniband/hw/irdma/defs.h3
-rw-r--r--drivers/infiniband/hw/irdma/hmc.c2
-rw-r--r--drivers/infiniband/hw/irdma/hmc.h2
-rw-r--r--drivers/infiniband/hw/irdma/hw.c87
-rw-r--r--drivers/infiniband/hw/irdma/i40iw_hw.c3
-rw-r--r--drivers/infiniband/hw/irdma/i40iw_hw.h4
-rw-r--r--drivers/infiniband/hw/irdma/i40iw_if.c4
-rw-r--r--drivers/infiniband/hw/irdma/icrdma_hw.c3
-rw-r--r--drivers/infiniband/hw/irdma/icrdma_hw.h3
-rw-r--r--drivers/infiniband/hw/irdma/irdma.h3
-rw-r--r--drivers/infiniband/hw/irdma/main.c4
-rw-r--r--drivers/infiniband/hw/irdma/main.h12
-rw-r--r--drivers/infiniband/hw/irdma/osdep.h2
-rw-r--r--drivers/infiniband/hw/irdma/pble.c2
-rw-r--r--drivers/infiniband/hw/irdma/pble.h2
-rw-r--r--drivers/infiniband/hw/irdma/protos.h2
-rw-r--r--drivers/infiniband/hw/irdma/puda.c2
-rw-r--r--drivers/infiniband/hw/irdma/puda.h2
-rw-r--r--drivers/infiniband/hw/irdma/trace.c2
-rw-r--r--drivers/infiniband/hw/irdma/trace.h2
-rw-r--r--drivers/infiniband/hw/irdma/trace_cm.h2
-rw-r--r--drivers/infiniband/hw/irdma/type.h5
-rw-r--r--drivers/infiniband/hw/irdma/uda.c2
-rw-r--r--drivers/infiniband/hw/irdma/uda.h2
-rw-r--r--drivers/infiniband/hw/irdma/uda_d.h2
-rw-r--r--drivers/infiniband/hw/irdma/uk.c220
-rw-r--r--drivers/infiniband/hw/irdma/user.h21
-rw-r--r--drivers/infiniband/hw/irdma/utils.c38
-rw-r--r--drivers/infiniband/hw/irdma/verbs.c551
-rw-r--r--drivers/infiniband/hw/irdma/verbs.h10
-rw-r--r--drivers/infiniband/hw/irdma/ws.c2
-rw-r--r--drivers/infiniband/hw/irdma/ws.h2
-rw-r--r--drivers/infiniband/hw/mana/cq.c119
-rw-r--r--drivers/infiniband/hw/mana/device.c81
-rw-r--r--drivers/infiniband/hw/mana/main.c578
-rw-r--r--drivers/infiniband/hw/mana/mana_ib.h351
-rw-r--r--drivers/infiniband/hw/mana/mr.c18
-rw-r--r--drivers/infiniband/hw/mana/qp.c381
-rw-r--r--drivers/infiniband/hw/mana/wq.c31
-rw-r--r--drivers/infiniband/hw/mlx4/alias_GUID.c4
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c3
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c10
-rw-r--r--drivers/infiniband/hw/mlx4/main.c265
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h4
-rw-r--r--drivers/infiniband/hw/mlx4/sysfs.c2
-rw-r--r--drivers/infiniband/hw/mlx5/Makefile2
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.c33
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.h4
-rw-r--r--drivers/infiniband/hw/mlx5/cong.c6
-rw-r--r--drivers/infiniband/hw/mlx5/counters.c6
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c33
-rw-r--r--drivers/infiniband/hw/mlx5/data_direct.c227
-rw-r--r--drivers/infiniband/hw/mlx5/data_direct.h23
-rw-r--r--drivers/infiniband/hw/mlx5/devx.c8
-rw-r--r--drivers/infiniband/hw/mlx5/dm.c5
-rw-r--r--drivers/infiniband/hw/mlx5/fs.c2
-rw-r--r--drivers/infiniband/hw/mlx5/ib_rep.c22
-rw-r--r--drivers/infiniband/hw/mlx5/macsec.c364
-rw-r--r--drivers/infiniband/hw/mlx5/macsec.h29
-rw-r--r--drivers/infiniband/hw/mlx5/mad.c125
-rw-r--r--drivers/infiniband/hw/mlx5/main.c774
-rw-r--r--drivers/infiniband/hw/mlx5/mem.c200
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h137
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c791
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c411
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c59
-rw-r--r--drivers/infiniband/hw/mlx5/qpc.c13
-rw-r--r--drivers/infiniband/hw/mlx5/restrack.c29
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c17
-rw-r--r--drivers/infiniband/hw/mlx5/std_types.c76
-rw-r--r--drivers/infiniband/hw/mlx5/umr.c155
-rw-r--r--drivers/infiniband/hw/mlx5/umr.h4
-rw-r--r--drivers/infiniband/hw/mlx5/wr.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.c4
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.h2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c23
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_stats.c22
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c5
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.h2
-rw-r--r--drivers/infiniband/hw/qedr/qedr_iw_cm.c3
-rw-r--r--drivers/infiniband/hw/qedr/qedr_roce_cm.c1
-rw-r--r--drivers/infiniband/hw/qedr/verbs.c16
-rw-r--r--drivers/infiniband/hw/qedr/verbs.h2
-rw-r--r--drivers/infiniband/hw/qib/qib.h1
-rw-r--r--drivers/infiniband/hw/qib/qib_driver.c6
-rw-r--r--drivers/infiniband/hw/qib/qib_file_ops.c19
-rw-r--r--drivers/infiniband/hw/qib/qib_fs.c6
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7220.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c7
-rw-r--r--drivers/infiniband/hw/qib/qib_init.c9
-rw-r--r--drivers/infiniband/hw/qib/qib_pcie.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_sysfs.c10
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h4
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.c2
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.h2
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom.c6
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom.h2
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c5
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c14
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h2
241 files changed, 10622 insertions, 4218 deletions
diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
index ea81b2497511..e94518b12f86 100644
--- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h
+++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
@@ -41,6 +41,7 @@
#define __BNXT_RE_H__
#include <rdma/uverbs_ioctl.h>
#include "hw_counters.h"
+#include <linux/hashtable.h>
#define ROCE_DRV_MODULE_NAME "bnxt_re"
#define BNXT_RE_DESC "Broadcom NetXtreme-C/E RoCE Driver"
@@ -90,6 +91,15 @@ struct bnxt_re_ring_attr {
u8 mode;
};
+/*
+ * Data structure and defines to handle
+ * recovery
+ */
+#define BNXT_RE_PRE_RECOVERY_REMOVE 0x1
+#define BNXT_RE_COMPLETE_REMOVE 0x2
+#define BNXT_RE_POST_RECOVERY_INIT 0x4
+#define BNXT_RE_COMPLETE_INIT 0x8
+
struct bnxt_re_sqp_entries {
struct bnxt_qplib_sge sge;
u64 wrid;
@@ -106,18 +116,51 @@ struct bnxt_re_gsi_context {
struct bnxt_re_sqp_entries *sqp_tbl;
};
-#define BNXT_RE_MIN_MSIX 2
-#define BNXT_RE_MAX_MSIX 9
+struct bnxt_re_en_dev_info {
+ struct bnxt_en_dev *en_dev;
+ struct bnxt_re_dev *rdev;
+};
+
#define BNXT_RE_AEQ_IDX 0
#define BNXT_RE_NQ_IDX 1
#define BNXT_RE_GEN_P5_MAX_VF 64
+struct bnxt_re_pacing {
+ u64 dbr_db_fifo_reg_off;
+ void *dbr_page;
+ u64 dbr_bar_addr;
+ u32 pacing_algo_th;
+ u32 do_pacing_save;
+ u32 dbq_pacing_time; /* ms */
+ u32 dbr_def_do_pacing;
+ bool dbr_pacing;
+ struct mutex dbq_lock; /* synchronize db pacing algo */
+};
+
+#define BNXT_RE_MAX_DBR_DO_PACING 0xFFFF
+#define BNXT_RE_DBR_PACING_TIME 5 /* ms */
+#define BNXT_RE_PACING_ALGO_THRESHOLD 250 /* Entries in DB FIFO */
+#define BNXT_RE_PACING_ALARM_TH_MULTIPLE 2 /* Multiple of pacing algo threshold */
+/* Default do_pacing value when there is no congestion */
+#define BNXT_RE_DBR_DO_PACING_NO_CONGESTION 0x7F /* 1 in 512 probability */
+
+#define BNXT_RE_MAX_FIFO_DEPTH_P5 0x2c00
+#define BNXT_RE_MAX_FIFO_DEPTH_P7 0x8000
+
+#define BNXT_RE_MAX_FIFO_DEPTH(ctx) \
+ (bnxt_qplib_is_chip_gen_p7((ctx)) ? \
+ BNXT_RE_MAX_FIFO_DEPTH_P7 :\
+ BNXT_RE_MAX_FIFO_DEPTH_P5)
+
+#define BNXT_RE_GRC_FIFO_REG_BASE 0x2000
+
+#define MAX_CQ_HASH_BITS (16)
+#define MAX_SRQ_HASH_BITS (16)
struct bnxt_re_dev {
struct ib_device ibdev;
struct list_head list;
unsigned long flags;
#define BNXT_RE_FLAG_NETDEV_REGISTERED 0
-#define BNXT_RE_FLAG_GOT_MSIX 2
#define BNXT_RE_FLAG_HAVE_L2_REF 3
#define BNXT_RE_FLAG_RCFW_CHANNEL_EN 4
#define BNXT_RE_FLAG_QOS_WORK_REG 5
@@ -126,6 +169,7 @@ struct bnxt_re_dev {
#define BNXT_RE_FLAG_ERR_DEVICE_DETACHED 17
#define BNXT_RE_FLAG_ISSUE_ROCE_STATS 29
struct net_device *netdev;
+ struct auxiliary_device *adev;
struct notifier_block nb;
unsigned int version, major, minor;
struct bnxt_qplib_chip_ctx *chip_ctx;
@@ -144,7 +188,7 @@ struct bnxt_re_dev {
struct bnxt_qplib_rcfw rcfw;
/* NQ */
- struct bnxt_qplib_nq nq[BNXT_RE_MAX_MSIX];
+ struct bnxt_qplib_nq nq[BNXT_MAX_ROCE_MSIX];
/* Device Resources */
struct bnxt_qplib_dev_attr dev_attr;
@@ -152,16 +196,9 @@ struct bnxt_re_dev {
struct bnxt_qplib_res qplib_res;
struct bnxt_qplib_dpi dpi_privileged;
- atomic_t qp_count;
struct mutex qp_lock; /* protect qp list */
struct list_head qp_list;
- atomic_t cq_count;
- atomic_t srq_count;
- atomic_t mr_count;
- atomic_t mw_count;
- atomic_t ah_count;
- atomic_t pd_count;
/* Max of 2 lossless traffic class supported per port */
u16 cosq[2];
@@ -171,6 +208,11 @@ struct bnxt_re_dev {
atomic_t nq_alloc_cnt;
u32 is_virtfn;
u32 num_vfs;
+ struct bnxt_re_pacing pacing;
+ struct work_struct dbq_fifo_check_work;
+ struct delayed_work dbq_pacing_work;
+ DECLARE_HASHTABLE(cq_hash, MAX_CQ_HASH_BITS);
+ DECLARE_HASHTABLE(srq_hash, MAX_SRQ_HASH_BITS);
};
#define to_bnxt_re_dev(ptr, member) \
@@ -181,6 +223,7 @@ struct bnxt_re_dev {
#define BNXT_RE_ROCEV2_IPV6_PACKET 3
#define BNXT_RE_CHECK_RC(x) ((x) && ((x) != -ETIMEDOUT))
+void bnxt_re_pacing_alert(struct bnxt_re_dev *rdev);
static inline struct device *rdev_to_dev(struct bnxt_re_dev *rdev)
{
@@ -190,4 +233,10 @@ static inline struct device *rdev_to_dev(struct bnxt_re_dev *rdev)
}
extern const struct uapi_definition bnxt_re_uapi_defs[];
+
+static inline void bnxt_re_set_pacing_dev_state(struct bnxt_re_dev *rdev)
+{
+ rdev->qplib_res.pacing_data->dev_err_state =
+ test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags);
+}
#endif
diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.c b/drivers/infiniband/hw/bnxt_re/hw_counters.c
index 825d512799d9..128651c01595 100644
--- a/drivers/infiniband/hw/bnxt_re/hw_counters.c
+++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c
@@ -61,15 +61,29 @@ static const struct rdma_stat_desc bnxt_re_stat_descs[] = {
[BNXT_RE_ACTIVE_PD].name = "active_pds",
[BNXT_RE_ACTIVE_AH].name = "active_ahs",
[BNXT_RE_ACTIVE_QP].name = "active_qps",
+ [BNXT_RE_ACTIVE_RC_QP].name = "active_rc_qps",
+ [BNXT_RE_ACTIVE_UD_QP].name = "active_ud_qps",
[BNXT_RE_ACTIVE_SRQ].name = "active_srqs",
[BNXT_RE_ACTIVE_CQ].name = "active_cqs",
[BNXT_RE_ACTIVE_MR].name = "active_mrs",
[BNXT_RE_ACTIVE_MW].name = "active_mws",
+ [BNXT_RE_WATERMARK_PD].name = "watermark_pds",
+ [BNXT_RE_WATERMARK_AH].name = "watermark_ahs",
+ [BNXT_RE_WATERMARK_QP].name = "watermark_qps",
+ [BNXT_RE_WATERMARK_RC_QP].name = "watermark_rc_qps",
+ [BNXT_RE_WATERMARK_UD_QP].name = "watermark_ud_qps",
+ [BNXT_RE_WATERMARK_SRQ].name = "watermark_srqs",
+ [BNXT_RE_WATERMARK_CQ].name = "watermark_cqs",
+ [BNXT_RE_WATERMARK_MR].name = "watermark_mrs",
+ [BNXT_RE_WATERMARK_MW].name = "watermark_mws",
+ [BNXT_RE_RESIZE_CQ_CNT].name = "resize_cq_cnt",
[BNXT_RE_RX_PKTS].name = "rx_pkts",
[BNXT_RE_RX_BYTES].name = "rx_bytes",
[BNXT_RE_TX_PKTS].name = "tx_pkts",
[BNXT_RE_TX_BYTES].name = "tx_bytes",
[BNXT_RE_RECOVERABLE_ERRORS].name = "recoverable_errors",
+ [BNXT_RE_TX_ERRORS].name = "tx_roce_errors",
+ [BNXT_RE_TX_DISCARDS].name = "tx_roce_discards",
[BNXT_RE_RX_ERRORS].name = "rx_roce_errors",
[BNXT_RE_RX_DISCARDS].name = "rx_roce_discards",
[BNXT_RE_TO_RETRANSMITS].name = "to_retransmits",
@@ -117,14 +131,25 @@ static const struct rdma_stat_desc bnxt_re_stat_descs[] = {
[BNXT_RE_TX_READ_RES].name = "tx_read_resp",
[BNXT_RE_TX_WRITE_REQ].name = "tx_write_req",
[BNXT_RE_TX_SEND_REQ].name = "tx_send_req",
+ [BNXT_RE_TX_ROCE_PKTS].name = "tx_roce_only_pkts",
+ [BNXT_RE_TX_ROCE_BYTES].name = "tx_roce_only_bytes",
[BNXT_RE_RX_ATOMIC_REQ].name = "rx_atomic_req",
[BNXT_RE_RX_READ_REQ].name = "rx_read_req",
[BNXT_RE_RX_READ_RESP].name = "rx_read_resp",
[BNXT_RE_RX_WRITE_REQ].name = "rx_write_req",
[BNXT_RE_RX_SEND_REQ].name = "rx_send_req",
+ [BNXT_RE_RX_ROCE_PKTS].name = "rx_roce_only_pkts",
+ [BNXT_RE_RX_ROCE_BYTES].name = "rx_roce_only_bytes",
[BNXT_RE_RX_ROCE_GOOD_PKTS].name = "rx_roce_good_pkts",
[BNXT_RE_RX_ROCE_GOOD_BYTES].name = "rx_roce_good_bytes",
- [BNXT_RE_OOB].name = "rx_out_of_buffer"
+ [BNXT_RE_OOB].name = "rx_out_of_buffer",
+ [BNXT_RE_TX_CNP].name = "tx_cnp_pkts",
+ [BNXT_RE_RX_CNP].name = "rx_cnp_pkts",
+ [BNXT_RE_RX_ECN].name = "rx_ecn_marked_pkts",
+ [BNXT_RE_PACING_RESCHED].name = "pacing_reschedule",
+ [BNXT_RE_PACING_CMPL].name = "pacing_complete",
+ [BNXT_RE_PACING_ALERT].name = "pacing_alerts",
+ [BNXT_RE_DB_FIFO_REG].name = "db_fifo_register",
};
static void bnxt_re_copy_ext_stats(struct bnxt_re_dev *rdev,
@@ -136,14 +161,22 @@ static void bnxt_re_copy_ext_stats(struct bnxt_re_dev *rdev,
stats->value[BNXT_RE_TX_READ_RES] = s->tx_read_res;
stats->value[BNXT_RE_TX_WRITE_REQ] = s->tx_write_req;
stats->value[BNXT_RE_TX_SEND_REQ] = s->tx_send_req;
+ stats->value[BNXT_RE_TX_ROCE_PKTS] = s->tx_roce_pkts;
+ stats->value[BNXT_RE_TX_ROCE_BYTES] = s->tx_roce_bytes;
stats->value[BNXT_RE_RX_ATOMIC_REQ] = s->rx_atomic_req;
stats->value[BNXT_RE_RX_READ_REQ] = s->rx_read_req;
stats->value[BNXT_RE_RX_READ_RESP] = s->rx_read_res;
stats->value[BNXT_RE_RX_WRITE_REQ] = s->rx_write_req;
stats->value[BNXT_RE_RX_SEND_REQ] = s->rx_send_req;
+ stats->value[BNXT_RE_RX_ROCE_PKTS] = s->rx_roce_pkts;
+ stats->value[BNXT_RE_RX_ROCE_BYTES] = s->rx_roce_bytes;
stats->value[BNXT_RE_RX_ROCE_GOOD_PKTS] = s->rx_roce_good_pkts;
stats->value[BNXT_RE_RX_ROCE_GOOD_BYTES] = s->rx_roce_good_bytes;
stats->value[BNXT_RE_OOB] = s->rx_out_of_buffer;
+ stats->value[BNXT_RE_TX_CNP] = s->tx_cnp;
+ stats->value[BNXT_RE_RX_CNP] = s->rx_cnp;
+ stats->value[BNXT_RE_RX_ECN] = s->rx_ecn_marked;
+ stats->value[BNXT_RE_OUT_OF_SEQ_ERR] = s->rx_out_of_sequence;
}
static int bnxt_re_get_ext_stat(struct bnxt_re_dev *rdev,
@@ -249,30 +282,59 @@ static void bnxt_re_copy_err_stats(struct bnxt_re_dev *rdev,
err_s->res_oos_drop_count;
}
+static void bnxt_re_copy_db_pacing_stats(struct bnxt_re_dev *rdev,
+ struct rdma_hw_stats *stats)
+{
+ struct bnxt_re_db_pacing_stats *pacing_s = &rdev->stats.pacing;
+
+ stats->value[BNXT_RE_PACING_RESCHED] = pacing_s->resched;
+ stats->value[BNXT_RE_PACING_CMPL] = pacing_s->complete;
+ stats->value[BNXT_RE_PACING_ALERT] = pacing_s->alerts;
+ stats->value[BNXT_RE_DB_FIFO_REG] =
+ readl(rdev->en_dev->bar0 + rdev->pacing.dbr_db_fifo_reg_off);
+}
+
int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
struct rdma_hw_stats *stats,
u32 port, int index)
{
struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
- struct ctx_hw_stats *hw_stats = NULL;
+ struct bnxt_re_res_cntrs *res_s = &rdev->stats.res;
struct bnxt_qplib_roce_stats *err_s = NULL;
+ struct ctx_hw_stats *hw_stats = NULL;
int rc = 0;
hw_stats = rdev->qplib_ctx.stats.dma;
if (!port || !stats)
return -EINVAL;
- stats->value[BNXT_RE_ACTIVE_QP] = atomic_read(&rdev->qp_count);
- stats->value[BNXT_RE_ACTIVE_SRQ] = atomic_read(&rdev->srq_count);
- stats->value[BNXT_RE_ACTIVE_CQ] = atomic_read(&rdev->cq_count);
- stats->value[BNXT_RE_ACTIVE_MR] = atomic_read(&rdev->mr_count);
- stats->value[BNXT_RE_ACTIVE_MW] = atomic_read(&rdev->mw_count);
- stats->value[BNXT_RE_ACTIVE_PD] = atomic_read(&rdev->pd_count);
- stats->value[BNXT_RE_ACTIVE_AH] = atomic_read(&rdev->ah_count);
+ stats->value[BNXT_RE_ACTIVE_QP] = atomic_read(&res_s->qp_count);
+ stats->value[BNXT_RE_ACTIVE_RC_QP] = atomic_read(&res_s->rc_qp_count);
+ stats->value[BNXT_RE_ACTIVE_UD_QP] = atomic_read(&res_s->ud_qp_count);
+ stats->value[BNXT_RE_ACTIVE_SRQ] = atomic_read(&res_s->srq_count);
+ stats->value[BNXT_RE_ACTIVE_CQ] = atomic_read(&res_s->cq_count);
+ stats->value[BNXT_RE_ACTIVE_MR] = atomic_read(&res_s->mr_count);
+ stats->value[BNXT_RE_ACTIVE_MW] = atomic_read(&res_s->mw_count);
+ stats->value[BNXT_RE_ACTIVE_PD] = atomic_read(&res_s->pd_count);
+ stats->value[BNXT_RE_ACTIVE_AH] = atomic_read(&res_s->ah_count);
+ stats->value[BNXT_RE_WATERMARK_QP] = res_s->qp_watermark;
+ stats->value[BNXT_RE_WATERMARK_RC_QP] = res_s->rc_qp_watermark;
+ stats->value[BNXT_RE_WATERMARK_UD_QP] = res_s->ud_qp_watermark;
+ stats->value[BNXT_RE_WATERMARK_SRQ] = res_s->srq_watermark;
+ stats->value[BNXT_RE_WATERMARK_CQ] = res_s->cq_watermark;
+ stats->value[BNXT_RE_WATERMARK_MR] = res_s->mr_watermark;
+ stats->value[BNXT_RE_WATERMARK_MW] = res_s->mw_watermark;
+ stats->value[BNXT_RE_WATERMARK_PD] = res_s->pd_watermark;
+ stats->value[BNXT_RE_WATERMARK_AH] = res_s->ah_watermark;
+ stats->value[BNXT_RE_RESIZE_CQ_CNT] = atomic_read(&res_s->resize_count);
if (hw_stats) {
stats->value[BNXT_RE_RECOVERABLE_ERRORS] =
le64_to_cpu(hw_stats->tx_bcast_pkts);
+ stats->value[BNXT_RE_TX_DISCARDS] =
+ le64_to_cpu(hw_stats->tx_discard_pkts);
+ stats->value[BNXT_RE_TX_ERRORS] =
+ le64_to_cpu(hw_stats->tx_error_pkts);
stats->value[BNXT_RE_RX_ERRORS] =
le64_to_cpu(hw_stats->rx_error_pkts);
stats->value[BNXT_RE_RX_DISCARDS] =
@@ -294,6 +356,7 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
&rdev->flags);
goto done;
}
+ bnxt_re_copy_err_stats(rdev, stats, err_s);
if (_is_ext_stats_supported(rdev->dev_attr.dev_cap_flags) &&
!rdev->is_virtfn) {
rc = bnxt_re_get_ext_stat(rdev, stats);
@@ -303,11 +366,12 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
goto done;
}
}
- bnxt_re_copy_err_stats(rdev, stats, err_s);
+ if (rdev->pacing.dbr_pacing)
+ bnxt_re_copy_db_pacing_stats(rdev, stats);
}
done:
- return bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ?
+ return bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx) ?
BNXT_RE_NUM_EXT_COUNTERS : BNXT_RE_NUM_STD_COUNTERS;
}
@@ -317,7 +381,7 @@ struct rdma_hw_stats *bnxt_re_ib_alloc_hw_port_stats(struct ib_device *ibdev,
struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
int num_counters = 0;
- if (bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx))
+ if (bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx))
num_counters = BNXT_RE_NUM_EXT_COUNTERS;
else
num_counters = BNXT_RE_NUM_STD_COUNTERS;
diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.h b/drivers/infiniband/hw/bnxt_re/hw_counters.h
index 7943b2c393e4..e541b6f8ca9f 100644
--- a/drivers/infiniband/hw/bnxt_re/hw_counters.h
+++ b/drivers/infiniband/hw/bnxt_re/hw_counters.h
@@ -44,15 +44,29 @@ enum bnxt_re_hw_stats {
BNXT_RE_ACTIVE_PD,
BNXT_RE_ACTIVE_AH,
BNXT_RE_ACTIVE_QP,
+ BNXT_RE_ACTIVE_RC_QP,
+ BNXT_RE_ACTIVE_UD_QP,
BNXT_RE_ACTIVE_SRQ,
BNXT_RE_ACTIVE_CQ,
BNXT_RE_ACTIVE_MR,
BNXT_RE_ACTIVE_MW,
+ BNXT_RE_WATERMARK_PD,
+ BNXT_RE_WATERMARK_AH,
+ BNXT_RE_WATERMARK_QP,
+ BNXT_RE_WATERMARK_RC_QP,
+ BNXT_RE_WATERMARK_UD_QP,
+ BNXT_RE_WATERMARK_SRQ,
+ BNXT_RE_WATERMARK_CQ,
+ BNXT_RE_WATERMARK_MR,
+ BNXT_RE_WATERMARK_MW,
+ BNXT_RE_RESIZE_CQ_CNT,
BNXT_RE_RX_PKTS,
BNXT_RE_RX_BYTES,
BNXT_RE_TX_PKTS,
BNXT_RE_TX_BYTES,
BNXT_RE_RECOVERABLE_ERRORS,
+ BNXT_RE_TX_ERRORS,
+ BNXT_RE_TX_DISCARDS,
BNXT_RE_RX_ERRORS,
BNXT_RE_RX_DISCARDS,
BNXT_RE_TO_RETRANSMITS,
@@ -100,19 +114,58 @@ enum bnxt_re_hw_stats {
BNXT_RE_TX_READ_RES,
BNXT_RE_TX_WRITE_REQ,
BNXT_RE_TX_SEND_REQ,
+ BNXT_RE_TX_ROCE_PKTS,
+ BNXT_RE_TX_ROCE_BYTES,
BNXT_RE_RX_ATOMIC_REQ,
BNXT_RE_RX_READ_REQ,
BNXT_RE_RX_READ_RESP,
BNXT_RE_RX_WRITE_REQ,
BNXT_RE_RX_SEND_REQ,
+ BNXT_RE_RX_ROCE_PKTS,
+ BNXT_RE_RX_ROCE_BYTES,
BNXT_RE_RX_ROCE_GOOD_PKTS,
BNXT_RE_RX_ROCE_GOOD_BYTES,
BNXT_RE_OOB,
+ BNXT_RE_TX_CNP,
+ BNXT_RE_RX_CNP,
+ BNXT_RE_RX_ECN,
+ BNXT_RE_PACING_RESCHED,
+ BNXT_RE_PACING_CMPL,
+ BNXT_RE_PACING_ALERT,
+ BNXT_RE_DB_FIFO_REG,
BNXT_RE_NUM_EXT_COUNTERS
};
#define BNXT_RE_NUM_STD_COUNTERS (BNXT_RE_OUT_OF_SEQ_ERR + 1)
+struct bnxt_re_db_pacing_stats {
+ u64 resched;
+ u64 complete;
+ u64 alerts;
+};
+
+struct bnxt_re_res_cntrs {
+ atomic_t qp_count;
+ atomic_t rc_qp_count;
+ atomic_t ud_qp_count;
+ atomic_t cq_count;
+ atomic_t srq_count;
+ atomic_t mr_count;
+ atomic_t mw_count;
+ atomic_t ah_count;
+ atomic_t pd_count;
+ atomic_t resize_count;
+ u64 qp_watermark;
+ u64 rc_qp_watermark;
+ u64 ud_qp_watermark;
+ u64 cq_watermark;
+ u64 srq_watermark;
+ u64 mr_watermark;
+ u64 mw_watermark;
+ u64 ah_watermark;
+ u64 pd_watermark;
+};
+
struct bnxt_re_rstat {
struct bnxt_qplib_roce_stats errs;
struct bnxt_qplib_ext_stat ext_stat;
@@ -120,6 +173,8 @@ struct bnxt_re_rstat {
struct bnxt_re_stats {
struct bnxt_re_rstat rstat;
+ struct bnxt_re_res_cntrs res;
+ struct bnxt_re_db_pacing_stats pacing;
};
struct rdma_hw_stats *bnxt_re_ib_alloc_hw_port_stats(struct ib_device *ibdev,
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 03cc45a5458d..460f33914825 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -50,6 +50,7 @@
#include <rdma/ib_mad.h>
#include <rdma/ib_cache.h>
#include <rdma/uverbs_ioctl.h>
+#include <linux/hashtable.h>
#include "bnxt_ulp.h"
@@ -114,6 +115,14 @@ static enum ib_access_flags __to_ib_access_flags(int qflags)
return iflags;
};
+static void bnxt_re_check_and_set_relaxed_ordering(struct bnxt_re_dev *rdev,
+ struct bnxt_qplib_mrw *qplib_mr)
+{
+ if (_is_relaxed_ordering_supported(rdev->dev_attr.dev_cap_flags2) &&
+ pcie_relaxed_ordering_enabled(rdev->en_dev->pdev))
+ qplib_mr->flags |= CMDQ_REGISTER_MR_FLAGS_ENABLE_RO;
+}
+
static int bnxt_re_build_sgl(struct ib_sge *ib_sg_list,
struct bnxt_qplib_sge *sg_list, int num)
{
@@ -284,7 +293,7 @@ int bnxt_re_query_gid(struct ib_device *ibdev, u32 port_num,
int index, union ib_gid *gid)
{
struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
- int rc = 0;
+ int rc;
/* Ignore port_num */
memset(gid, 0, sizeof(*gid));
@@ -400,6 +409,10 @@ static void bnxt_re_create_fence_wqe(struct bnxt_re_pd *pd)
struct bnxt_re_fence_data *fence = &pd->fence;
struct ib_mr *ib_mr = &fence->mr->ib_mr;
struct bnxt_qplib_swqe *wqe = &fence->bind_wqe;
+ struct bnxt_re_dev *rdev = pd->rdev;
+
+ if (bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx))
+ return;
memset(wqe, 0, sizeof(*wqe));
wqe->type = BNXT_QPLIB_SWQE_TYPE_BIND_MW;
@@ -454,6 +467,9 @@ static void bnxt_re_destroy_fence_mr(struct bnxt_re_pd *pd)
struct device *dev = &rdev->en_dev->pdev->dev;
struct bnxt_re_mr *mr = fence->mr;
+ if (bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx))
+ return;
+
if (fence->mw) {
bnxt_re_dealloc_mw(fence->mw);
fence->mw = NULL;
@@ -485,6 +501,9 @@ static int bnxt_re_create_fence_mr(struct bnxt_re_pd *pd)
struct ib_mw *mw;
int rc;
+ if (bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx))
+ return 0;
+
dma_addr = dma_map_single(dev, fence->va, BNXT_RE_FENCE_BYTES,
DMA_BIDIRECTIONAL);
rc = dma_mapping_error(dev, dma_addr);
@@ -506,15 +525,19 @@ static int bnxt_re_create_fence_mr(struct bnxt_re_pd *pd)
mr->rdev = rdev;
mr->qplib_mr.pd = &pd->qplib_pd;
mr->qplib_mr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR;
- mr->qplib_mr.flags = __from_ib_access_flags(mr_access_flags);
- rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr);
- if (rc) {
- ibdev_err(&rdev->ibdev, "Failed to alloc fence-HW-MR\n");
- goto fail;
- }
+ mr->qplib_mr.access_flags = __from_ib_access_flags(mr_access_flags);
+ if (!_is_alloc_mr_unified(rdev->dev_attr.dev_cap_flags)) {
+ rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr);
+ if (rc) {
+ ibdev_err(&rdev->ibdev, "Failed to alloc fence-HW-MR\n");
+ goto fail;
+ }
- /* Register MR */
- mr->ib_mr.lkey = mr->qplib_mr.lkey;
+ /* Register MR */
+ mr->ib_mr.lkey = mr->qplib_mr.lkey;
+ } else {
+ mr->qplib_mr.flags = CMDQ_REGISTER_MR_FLAGS_ALLOC_MR;
+ }
mr->qplib_mr.va = (u64)(unsigned long)fence->va;
mr->qplib_mr.total_size = BNXT_RE_FENCE_BYTES;
rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, NULL,
@@ -565,6 +588,9 @@ bnxt_re_mmap_entry_insert(struct bnxt_re_ucontext *uctx, u64 mem_offset,
break;
case BNXT_RE_MMAP_UC_DB:
case BNXT_RE_MMAP_WC_DB:
+ case BNXT_RE_MMAP_DBR_BAR:
+ case BNXT_RE_MMAP_DBR_PAGE:
+ case BNXT_RE_MMAP_TOGGLE_PAGE:
ret = rdma_user_mmap_entry_insert(&uctx->ib_uctx,
&entry->rdma_entry, PAGE_SIZE);
break;
@@ -600,7 +626,7 @@ int bnxt_re_dealloc_pd(struct ib_pd *ib_pd, struct ib_udata *udata)
if (!bnxt_qplib_dealloc_pd(&rdev->qplib_res,
&rdev->qplib_res.pd_tbl,
&pd->qplib_pd))
- atomic_dec(&rdev->pd_count);
+ atomic_dec(&rdev->stats.res.pd_count);
}
return 0;
}
@@ -613,10 +639,11 @@ int bnxt_re_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
udata, struct bnxt_re_ucontext, ib_uctx);
struct bnxt_re_pd *pd = container_of(ibpd, struct bnxt_re_pd, ib_pd);
struct bnxt_re_user_mmap_entry *entry = NULL;
+ u32 active_pds;
int rc = 0;
pd->rdev = rdev;
- if (bnxt_qplib_alloc_pd(&rdev->qplib_res.pd_tbl, &pd->qplib_pd)) {
+ if (bnxt_qplib_alloc_pd(&rdev->qplib_res, &pd->qplib_pd)) {
ibdev_err(&rdev->ibdev, "Failed to allocate HW PD");
rc = -ENOMEM;
goto fail;
@@ -663,7 +690,9 @@ int bnxt_re_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
if (bnxt_re_create_fence_mr(pd))
ibdev_warn(&rdev->ibdev,
"Failed to create Fence-MR\n");
- atomic_inc(&rdev->pd_count);
+ active_pds = atomic_inc_return(&rdev->stats.res.pd_count);
+ if (active_pds > rdev->stats.res.pd_watermark)
+ rdev->stats.res.pd_watermark = active_pds;
return 0;
dbfail:
@@ -679,7 +708,7 @@ int bnxt_re_destroy_ah(struct ib_ah *ib_ah, u32 flags)
struct bnxt_re_ah *ah = container_of(ib_ah, struct bnxt_re_ah, ib_ah);
struct bnxt_re_dev *rdev = ah->rdev;
bool block = true;
- int rc = 0;
+ int rc;
block = !(flags & RDMA_DESTROY_AH_SLEEPABLE);
rc = bnxt_qplib_destroy_ah(&rdev->qplib_res, &ah->qplib_ah, block);
@@ -689,7 +718,7 @@ int bnxt_re_destroy_ah(struct ib_ah *ib_ah, u32 flags)
else
goto fail;
}
- atomic_dec(&rdev->ah_count);
+ atomic_dec(&rdev->stats.res.ah_count);
fail:
return rc;
}
@@ -723,6 +752,7 @@ int bnxt_re_create_ah(struct ib_ah *ib_ah, struct rdma_ah_init_attr *init_attr,
const struct ib_gid_attr *sgid_attr;
struct bnxt_re_gid_ctx *ctx;
struct bnxt_re_ah *ah = container_of(ib_ah, struct bnxt_re_ah, ib_ah);
+ u32 active_ahs;
u8 nw_type;
int rc;
@@ -775,7 +805,9 @@ int bnxt_re_create_ah(struct ib_ah *ib_ah, struct rdma_ah_init_attr *init_attr,
wmb(); /* make sure cache is updated. */
spin_unlock_irqrestore(&uctx->sh_lock, flag);
}
- atomic_inc(&rdev->ah_count);
+ active_ahs = atomic_inc_return(&rdev->stats.res.ah_count);
+ if (active_ahs > rdev->stats.res.ah_watermark)
+ rdev->stats.res.ah_watermark = active_ahs;
return 0;
}
@@ -826,7 +858,7 @@ static int bnxt_re_destroy_gsi_sqp(struct bnxt_re_qp *qp)
struct bnxt_re_qp *gsi_sqp;
struct bnxt_re_ah *gsi_sah;
struct bnxt_re_dev *rdev;
- int rc = 0;
+ int rc;
rdev = qp->rdev;
gsi_sqp = rdev->gsi_ctx.gsi_sqp;
@@ -836,7 +868,7 @@ static int bnxt_re_destroy_gsi_sqp(struct bnxt_re_qp *qp)
bnxt_qplib_destroy_ah(&rdev->qplib_res,
&gsi_sah->qplib_ah,
true);
- atomic_dec(&rdev->ah_count);
+ atomic_dec(&rdev->stats.res.ah_count);
bnxt_qplib_clean_qp(&qp->qplib_qp);
ibdev_dbg(&rdev->ibdev, "Destroy the shadow QP\n");
@@ -851,7 +883,7 @@ static int bnxt_re_destroy_gsi_sqp(struct bnxt_re_qp *qp)
mutex_lock(&rdev->qp_lock);
list_del(&gsi_sqp->list);
mutex_unlock(&rdev->qp_lock);
- atomic_dec(&rdev->qp_count);
+ atomic_dec(&rdev->stats.res.qp_count);
kfree(rdev->gsi_ctx.sqp_tbl);
kfree(gsi_sah);
@@ -901,7 +933,11 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata)
mutex_lock(&rdev->qp_lock);
list_del(&qp->list);
mutex_unlock(&rdev->qp_lock);
- atomic_dec(&rdev->qp_count);
+ atomic_dec(&rdev->stats.res.qp_count);
+ if (qp->qplib_qp.type == CMDQ_CREATE_QP_TYPE_RC)
+ atomic_dec(&rdev->stats.res.rc_qp_count);
+ else if (qp->qplib_qp.type == CMDQ_CREATE_QP_TYPE_UD)
+ atomic_dec(&rdev->stats.res.ud_qp_count);
ib_umem_release(qp->rumem);
ib_umem_release(qp->sumem);
@@ -970,59 +1006,58 @@ static int bnxt_re_setup_swqe_size(struct bnxt_re_qp *qp,
align = sizeof(struct sq_send_hdr);
ilsize = ALIGN(init_attr->cap.max_inline_data, align);
- sq->wqe_size = bnxt_re_get_wqe_size(ilsize, sq->max_sge);
- if (sq->wqe_size > bnxt_re_get_swqe_size(dev_attr->max_qp_sges))
- return -EINVAL;
- /* For gen p4 and gen p5 backward compatibility mode
- * wqe size is fixed to 128 bytes
+ /* For gen p4 and gen p5 fixed wqe compatibility mode
+ * wqe size is fixed to 128 bytes - ie 6 SGEs
*/
- if (sq->wqe_size < bnxt_re_get_swqe_size(dev_attr->max_qp_sges) &&
- qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC)
- sq->wqe_size = bnxt_re_get_swqe_size(dev_attr->max_qp_sges);
+ if (qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) {
+ sq->wqe_size = bnxt_re_get_swqe_size(BNXT_STATIC_MAX_SGE);
+ sq->max_sge = BNXT_STATIC_MAX_SGE;
+ } else {
+ sq->wqe_size = bnxt_re_get_wqe_size(ilsize, sq->max_sge);
+ if (sq->wqe_size > bnxt_re_get_swqe_size(dev_attr->max_qp_sges))
+ return -EINVAL;
+ }
if (init_attr->cap.max_inline_data) {
qplqp->max_inline_data = sq->wqe_size -
sizeof(struct sq_send_hdr);
init_attr->cap.max_inline_data = qplqp->max_inline_data;
- if (qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC)
- sq->max_sge = qplqp->max_inline_data /
- sizeof(struct sq_sge);
}
return 0;
}
static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd,
- struct bnxt_re_qp *qp, struct ib_udata *udata)
+ struct bnxt_re_qp *qp, struct bnxt_re_ucontext *cntx,
+ struct bnxt_re_qp_req *ureq)
{
struct bnxt_qplib_qp *qplib_qp;
- struct bnxt_re_ucontext *cntx;
- struct bnxt_re_qp_req ureq;
int bytes = 0, psn_sz;
struct ib_umem *umem;
int psn_nume;
qplib_qp = &qp->qplib_qp;
- cntx = rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext,
- ib_uctx);
- if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
- return -EFAULT;
bytes = (qplib_qp->sq.max_wqe * qplib_qp->sq.wqe_size);
/* Consider mapping PSN search memory only for RC QPs. */
if (qplib_qp->type == CMDQ_CREATE_QP_TYPE_RC) {
- psn_sz = bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ?
+ psn_sz = bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx) ?
sizeof(struct sq_psn_search_ext) :
sizeof(struct sq_psn_search);
- psn_nume = (qplib_qp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) ?
- qplib_qp->sq.max_wqe :
- ((qplib_qp->sq.max_wqe * qplib_qp->sq.wqe_size) /
- sizeof(struct bnxt_qplib_sge));
+ if (cntx && bnxt_re_is_var_size_supported(rdev, cntx)) {
+ psn_nume = ureq->sq_slots;
+ } else {
+ psn_nume = (qplib_qp->wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) ?
+ qplib_qp->sq.max_wqe : ((qplib_qp->sq.max_wqe * qplib_qp->sq.wqe_size) /
+ sizeof(struct bnxt_qplib_sge));
+ }
+ if (_is_host_msn_table(rdev->qplib_res.dattr->dev_cap_flags2))
+ psn_nume = roundup_pow_of_two(psn_nume);
bytes += (psn_nume * psn_sz);
}
bytes = PAGE_ALIGN(bytes);
- umem = ib_umem_get(&rdev->ibdev, ureq.qpsva, bytes,
+ umem = ib_umem_get(&rdev->ibdev, ureq->qpsva, bytes,
IB_ACCESS_LOCAL_WRITE);
if (IS_ERR(umem))
return PTR_ERR(umem);
@@ -1031,12 +1066,12 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd,
qplib_qp->sq.sg_info.umem = umem;
qplib_qp->sq.sg_info.pgsize = PAGE_SIZE;
qplib_qp->sq.sg_info.pgshft = PAGE_SHIFT;
- qplib_qp->qp_handle = ureq.qp_handle;
+ qplib_qp->qp_handle = ureq->qp_handle;
if (!qp->qplib_qp.srq) {
bytes = (qplib_qp->rq.max_wqe * qplib_qp->rq.wqe_size);
bytes = PAGE_ALIGN(bytes);
- umem = ib_umem_get(&rdev->ibdev, ureq.qprva, bytes,
+ umem = ib_umem_get(&rdev->ibdev, ureq->qprva, bytes,
IB_ACCESS_LOCAL_WRITE);
if (IS_ERR(umem))
goto rqfail;
@@ -1095,7 +1130,7 @@ static struct bnxt_re_ah *bnxt_re_create_shadow_qp_ah
"Failed to allocate HW AH for Shadow QP");
goto fail;
}
- atomic_inc(&rdev->ah_count);
+ atomic_inc(&rdev->stats.res.ah_count);
return ah;
@@ -1132,6 +1167,7 @@ static struct bnxt_re_qp *bnxt_re_create_shadow_qp
/* Shadow QP SQ depth should be same as QP1 RQ depth */
qp->qplib_qp.sq.wqe_size = bnxt_re_get_wqe_size(0, 6);
qp->qplib_qp.sq.max_wqe = qp1_qp->rq.max_wqe;
+ qp->qplib_qp.sq.max_sw_wqe = qp1_qp->rq.max_wqe;
qp->qplib_qp.sq.max_sge = 2;
/* Q full delta can be 1 since it is internal QP */
qp->qplib_qp.sq.q_full_delta = 1;
@@ -1143,6 +1179,7 @@ static struct bnxt_re_qp *bnxt_re_create_shadow_qp
qp->qplib_qp.rq.wqe_size = bnxt_re_get_rwqe_size(6);
qp->qplib_qp.rq.max_wqe = qp1_qp->rq.max_wqe;
+ qp->qplib_qp.rq.max_sw_wqe = qp1_qp->rq.max_wqe;
qp->qplib_qp.rq.max_sge = qp1_qp->rq.max_sge;
/* Q full delta can be 1 since it is internal QP */
qp->qplib_qp.rq.q_full_delta = 1;
@@ -1163,7 +1200,7 @@ static struct bnxt_re_qp *bnxt_re_create_shadow_qp
INIT_LIST_HEAD(&qp->list);
mutex_lock(&rdev->qp_lock);
list_add_tail(&qp->list, &rdev->qp_list);
- atomic_inc(&rdev->qp_count);
+ atomic_inc(&rdev->stats.res.qp_count);
mutex_unlock(&rdev->qp_lock);
return qp;
fail:
@@ -1172,7 +1209,8 @@ fail:
}
static int bnxt_re_init_rq_attr(struct bnxt_re_qp *qp,
- struct ib_qp_init_attr *init_attr)
+ struct ib_qp_init_attr *init_attr,
+ struct bnxt_re_ucontext *uctx)
{
struct bnxt_qplib_dev_attr *dev_attr;
struct bnxt_qplib_qp *qplqp;
@@ -1201,8 +1239,9 @@ static int bnxt_re_init_rq_attr(struct bnxt_re_qp *qp,
/* Allocate 1 more than what's provided so posting max doesn't
* mean empty.
*/
- entries = roundup_pow_of_two(init_attr->cap.max_recv_wr + 1);
+ entries = bnxt_re_init_depth(init_attr->cap.max_recv_wr + 1, uctx);
rq->max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + 1);
+ rq->max_sw_wqe = rq->max_wqe;
rq->q_full_delta = 0;
rq->sg_info.pgsize = PAGE_SIZE;
rq->sg_info.pgshft = PAGE_SHIFT;
@@ -1221,7 +1260,7 @@ static void bnxt_re_adjust_gsi_rq_attr(struct bnxt_re_qp *qp)
qplqp = &qp->qplib_qp;
dev_attr = &rdev->dev_attr;
- if (!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) {
+ if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) {
qplqp->rq.max_sge = dev_attr->max_qp_sges;
if (qplqp->rq.max_sge > dev_attr->max_qp_sges)
qplqp->rq.max_sge = dev_attr->max_qp_sges;
@@ -1231,14 +1270,15 @@ static void bnxt_re_adjust_gsi_rq_attr(struct bnxt_re_qp *qp)
static int bnxt_re_init_sq_attr(struct bnxt_re_qp *qp,
struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata)
+ struct bnxt_re_ucontext *uctx,
+ struct bnxt_re_qp_req *ureq)
{
struct bnxt_qplib_dev_attr *dev_attr;
struct bnxt_qplib_qp *qplqp;
struct bnxt_re_dev *rdev;
struct bnxt_qplib_q *sq;
+ int diff = 0;
int entries;
- int diff;
int rc;
rdev = qp->rdev;
@@ -1247,21 +1287,28 @@ static int bnxt_re_init_sq_attr(struct bnxt_re_qp *qp,
dev_attr = &rdev->dev_attr;
sq->max_sge = init_attr->cap.max_send_sge;
- if (sq->max_sge > dev_attr->max_qp_sges) {
- sq->max_sge = dev_attr->max_qp_sges;
- init_attr->cap.max_send_sge = sq->max_sge;
- }
+ entries = init_attr->cap.max_send_wr;
+ if (uctx && qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE) {
+ sq->max_wqe = ureq->sq_slots;
+ sq->max_sw_wqe = ureq->sq_slots;
+ sq->wqe_size = sizeof(struct sq_sge);
+ } else {
+ if (sq->max_sge > dev_attr->max_qp_sges) {
+ sq->max_sge = dev_attr->max_qp_sges;
+ init_attr->cap.max_send_sge = sq->max_sge;
+ }
- rc = bnxt_re_setup_swqe_size(qp, init_attr);
- if (rc)
- return rc;
+ rc = bnxt_re_setup_swqe_size(qp, init_attr);
+ if (rc)
+ return rc;
- entries = init_attr->cap.max_send_wr;
- /* Allocate 128 + 1 more than what's provided */
- diff = (qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE) ?
- 0 : BNXT_QPLIB_RESERVED_QP_WRS;
- entries = roundup_pow_of_two(entries + diff + 1);
- sq->max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + diff + 1);
+ /* Allocate 128 + 1 more than what's provided */
+ diff = (qplqp->wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE) ?
+ 0 : BNXT_QPLIB_RESERVED_QP_WRS;
+ entries = bnxt_re_init_depth(entries + diff + 1, uctx);
+ sq->max_wqe = min_t(u32, entries, dev_attr->max_qp_wqes + diff + 1);
+ sq->max_sw_wqe = bnxt_qplib_get_depth(sq, qplqp->wqe_mode, true);
+ }
sq->q_full_delta = diff + 1;
/*
* Reserving one slot for Phantom WQE. Application can
@@ -1276,7 +1323,8 @@ static int bnxt_re_init_sq_attr(struct bnxt_re_qp *qp,
}
static void bnxt_re_adjust_gsi_sq_attr(struct bnxt_re_qp *qp,
- struct ib_qp_init_attr *init_attr)
+ struct ib_qp_init_attr *init_attr,
+ struct bnxt_re_ucontext *uctx)
{
struct bnxt_qplib_dev_attr *dev_attr;
struct bnxt_qplib_qp *qplqp;
@@ -1287,8 +1335,8 @@ static void bnxt_re_adjust_gsi_sq_attr(struct bnxt_re_qp *qp,
qplqp = &qp->qplib_qp;
dev_attr = &rdev->dev_attr;
- if (!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) {
- entries = roundup_pow_of_two(init_attr->cap.max_send_wr + 1);
+ if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) {
+ entries = bnxt_re_init_depth(init_attr->cap.max_send_wr + 1, uctx);
qplqp->sq.max_wqe = min_t(u32, entries,
dev_attr->max_qp_wqes + 1);
qplqp->sq.q_full_delta = qplqp->sq.max_wqe -
@@ -1314,7 +1362,7 @@ static int bnxt_re_init_qp_type(struct bnxt_re_dev *rdev,
goto out;
}
- if (bnxt_qplib_is_chip_gen_p5(chip_ctx) &&
+ if (bnxt_qplib_is_chip_gen_p5_p7(chip_ctx) &&
init_attr->qp_type == IB_QPT_GSI)
qptype = CMDQ_CREATE_QP_TYPE_GSI;
out:
@@ -1323,7 +1371,8 @@ out:
static int bnxt_re_init_qp_attr(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd,
struct ib_qp_init_attr *init_attr,
- struct ib_udata *udata)
+ struct bnxt_re_ucontext *uctx,
+ struct bnxt_re_qp_req *ureq)
{
struct bnxt_qplib_dev_attr *dev_attr;
struct bnxt_qplib_qp *qplqp;
@@ -1340,16 +1389,14 @@ static int bnxt_re_init_qp_attr(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd,
qplqp->pd = &pd->qplib_pd;
qplqp->qp_handle = (u64)qplqp;
qplqp->max_inline_data = init_attr->cap.max_inline_data;
- qplqp->sig_type = ((init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ?
- true : false);
+ qplqp->sig_type = init_attr->sq_sig_type == IB_SIGNAL_ALL_WR;
qptype = bnxt_re_init_qp_type(rdev, init_attr);
if (qptype < 0) {
rc = qptype;
goto out;
}
qplqp->type = (u8)qptype;
- qplqp->wqe_mode = rdev->chip_ctx->modes.wqe_mode;
-
+ qplqp->wqe_mode = bnxt_re_is_var_size_supported(rdev, uctx);
if (init_attr->qp_type == IB_QPT_RC) {
qplqp->max_rd_atomic = dev_attr->max_qp_rd_atom;
qplqp->max_dest_rd_atomic = dev_attr->max_qp_init_rd_atom;
@@ -1377,21 +1424,21 @@ static int bnxt_re_init_qp_attr(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd,
}
/* Setup RQ/SRQ */
- rc = bnxt_re_init_rq_attr(qp, init_attr);
+ rc = bnxt_re_init_rq_attr(qp, init_attr, uctx);
if (rc)
goto out;
if (init_attr->qp_type == IB_QPT_GSI)
bnxt_re_adjust_gsi_rq_attr(qp);
/* Setup SQ */
- rc = bnxt_re_init_sq_attr(qp, init_attr, udata);
+ rc = bnxt_re_init_sq_attr(qp, init_attr, uctx, ureq);
if (rc)
goto out;
if (init_attr->qp_type == IB_QPT_GSI)
- bnxt_re_adjust_gsi_sq_attr(qp, init_attr);
+ bnxt_re_adjust_gsi_sq_attr(qp, init_attr, uctx);
- if (udata) /* This will update DPI and qp_handle */
- rc = bnxt_re_init_user_qp(rdev, pd, qp, udata);
+ if (uctx) /* This will update DPI and qp_handle */
+ rc = bnxt_re_init_user_qp(rdev, pd, qp, uctx, ureq);
out:
return rc;
}
@@ -1446,7 +1493,7 @@ static int bnxt_re_create_gsi_qp(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd,
{
struct bnxt_re_dev *rdev;
struct bnxt_qplib_qp *qplqp;
- int rc = 0;
+ int rc;
rdev = qp->rdev;
qplqp = &qp->qplib_qp;
@@ -1492,13 +1539,27 @@ static bool bnxt_re_test_qp_limits(struct bnxt_re_dev *rdev,
int bnxt_re_create_qp(struct ib_qp *ib_qp, struct ib_qp_init_attr *qp_init_attr,
struct ib_udata *udata)
{
- struct ib_pd *ib_pd = ib_qp->pd;
- struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
- struct bnxt_re_dev *rdev = pd->rdev;
- struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
- struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
+ struct bnxt_qplib_dev_attr *dev_attr;
+ struct bnxt_re_ucontext *uctx;
+ struct bnxt_re_qp_req ureq;
+ struct bnxt_re_dev *rdev;
+ struct bnxt_re_pd *pd;
+ struct bnxt_re_qp *qp;
+ struct ib_pd *ib_pd;
+ u32 active_qps;
int rc;
+ ib_pd = ib_qp->pd;
+ pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+ rdev = pd->rdev;
+ dev_attr = &rdev->dev_attr;
+ qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
+
+ uctx = rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, ib_uctx);
+ if (udata)
+ if (ib_copy_from_udata(&ureq, udata, min(udata->inlen, sizeof(ureq))))
+ return -EFAULT;
+
rc = bnxt_re_test_qp_limits(rdev, qp_init_attr, dev_attr);
if (!rc) {
rc = -EINVAL;
@@ -1506,12 +1567,12 @@ int bnxt_re_create_qp(struct ib_qp *ib_qp, struct ib_qp_init_attr *qp_init_attr,
}
qp->rdev = rdev;
- rc = bnxt_re_init_qp_attr(qp, pd, qp_init_attr, udata);
+ rc = bnxt_re_init_qp_attr(qp, pd, qp_init_attr, uctx, &ureq);
if (rc)
goto fail;
if (qp_init_attr->qp_type == IB_QPT_GSI &&
- !(bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx))) {
+ !(bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx))) {
rc = bnxt_re_create_gsi_qp(qp, pd, qp_init_attr);
if (rc == -ENODEV)
goto qp_destroy;
@@ -1545,7 +1606,18 @@ int bnxt_re_create_qp(struct ib_qp *ib_qp, struct ib_qp_init_attr *qp_init_attr,
mutex_lock(&rdev->qp_lock);
list_add_tail(&qp->list, &rdev->qp_list);
mutex_unlock(&rdev->qp_lock);
- atomic_inc(&rdev->qp_count);
+ active_qps = atomic_inc_return(&rdev->stats.res.qp_count);
+ if (active_qps > rdev->stats.res.qp_watermark)
+ rdev->stats.res.qp_watermark = active_qps;
+ if (qp_init_attr->qp_type == IB_QPT_RC) {
+ active_qps = atomic_inc_return(&rdev->stats.res.rc_qp_count);
+ if (active_qps > rdev->stats.res.rc_qp_watermark)
+ rdev->stats.res.rc_qp_watermark = active_qps;
+ } else if (qp_init_attr->qp_type == IB_QPT_UD) {
+ active_qps = atomic_inc_return(&rdev->stats.res.ud_qp_count);
+ if (active_qps > rdev->stats.res.ud_qp_watermark)
+ rdev->stats.res.ud_qp_watermark = active_qps;
+ }
return 0;
qp_destroy:
@@ -1646,9 +1718,13 @@ int bnxt_re_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata)
if (qplib_srq->cq)
nq = qplib_srq->cq->nq;
+ if (rdev->chip_ctx->modes.toggle_bits & BNXT_QPLIB_SRQ_TOGGLE_BIT) {
+ free_page((unsigned long)srq->uctx_srq_page);
+ hash_del(&srq->hash_entry);
+ }
bnxt_qplib_destroy_srq(&rdev->qplib_res, qplib_srq);
ib_umem_release(srq->umem);
- atomic_dec(&rdev->srq_count);
+ atomic_dec(&rdev->stats.res.srq_count);
if (nq)
nq->budget--;
return 0;
@@ -1692,10 +1768,12 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq,
{
struct bnxt_qplib_dev_attr *dev_attr;
struct bnxt_qplib_nq *nq = NULL;
+ struct bnxt_re_ucontext *uctx;
struct bnxt_re_dev *rdev;
struct bnxt_re_srq *srq;
struct bnxt_re_pd *pd;
struct ib_pd *ib_pd;
+ u32 active_srqs;
int rc, entries;
ib_pd = ib_srq->pd;
@@ -1715,13 +1793,14 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq,
goto exit;
}
+ uctx = rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, ib_uctx);
srq->rdev = rdev;
srq->qplib_srq.pd = &pd->qplib_pd;
srq->qplib_srq.dpi = &rdev->dpi_privileged;
/* Allocate 1 more than what's provided so posting max doesn't
* mean empty
*/
- entries = roundup_pow_of_two(srq_init_attr->attr.max_wr + 1);
+ entries = bnxt_re_init_depth(srq_init_attr->attr.max_wr + 1, uctx);
if (entries > dev_attr->max_srq_wqes + 1)
entries = dev_attr->max_srq_wqes + 1;
srq->qplib_srq.max_wqe = entries;
@@ -1747,9 +1826,18 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq,
}
if (udata) {
- struct bnxt_re_srq_resp resp;
+ struct bnxt_re_srq_resp resp = {};
resp.srqid = srq->qplib_srq.id;
+ if (rdev->chip_ctx->modes.toggle_bits & BNXT_QPLIB_SRQ_TOGGLE_BIT) {
+ hash_add(rdev->srq_hash, &srq->hash_entry, srq->qplib_srq.id);
+ srq->uctx_srq_page = (void *)get_zeroed_page(GFP_KERNEL);
+ if (!srq->uctx_srq_page) {
+ rc = -ENOMEM;
+ goto fail;
+ }
+ resp.comp_mask |= BNXT_RE_SRQ_TOGGLE_PAGE_SUPPORT;
+ }
rc = ib_copy_to_udata(udata, &resp, sizeof(resp));
if (rc) {
ibdev_err(&rdev->ibdev, "SRQ copy to udata failed!");
@@ -1760,7 +1848,9 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq,
}
if (nq)
nq->budget++;
- atomic_inc(&rdev->srq_count);
+ active_srqs = atomic_inc_return(&rdev->stats.res.srq_count);
+ if (active_srqs > rdev->stats.res.srq_watermark)
+ rdev->stats.res.srq_watermark = active_srqs;
spin_lock_init(&srq->lock);
return 0;
@@ -1783,7 +1873,7 @@ int bnxt_re_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr,
switch (srq_attr_mask) {
case IB_SRQ_MAX_WR:
/* SRQ resize is not supported */
- break;
+ return -EINVAL;
case IB_SRQ_LIMIT:
/* Change the SRQ threshold */
if (srq_attr->srq_limit > srq->qplib_srq.max_wqe)
@@ -1798,13 +1888,12 @@ int bnxt_re_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr,
/* On success, update the shadow */
srq->srq_limit = srq_attr->srq_limit;
/* No need to Build and send response back to udata */
- break;
+ return 0;
default:
ibdev_err(&rdev->ibdev,
"Unsupported srq_attr_mask 0x%x", srq_attr_mask);
return -EINVAL;
}
- return 0;
}
int bnxt_re_query_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr)
@@ -1862,7 +1951,7 @@ static int bnxt_re_modify_shadow_qp(struct bnxt_re_dev *rdev,
int qp_attr_mask)
{
struct bnxt_re_qp *qp = rdev->gsi_ctx.gsi_sqp;
- int rc = 0;
+ int rc;
if (qp_attr_mask & IB_QP_STATE) {
qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_STATE;
@@ -2077,6 +2166,9 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
qp->qplib_qp.max_dest_rd_atomic = qp_attr->max_dest_rd_atomic;
}
if (qp_attr_mask & IB_QP_CAP) {
+ struct bnxt_re_ucontext *uctx =
+ rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, ib_uctx);
+
qp->qplib_qp.modify_flags |=
CMDQ_MODIFY_QP_MODIFY_MASK_SQ_SIZE |
CMDQ_MODIFY_QP_MODIFY_MASK_RQ_SIZE |
@@ -2093,7 +2185,7 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
"Create QP failed - max exceeded");
return -EINVAL;
}
- entries = roundup_pow_of_two(qp_attr->cap.max_send_wr);
+ entries = bnxt_re_init_depth(qp_attr->cap.max_send_wr, uctx);
qp->qplib_qp.sq.max_wqe = min_t(u32, entries,
dev_attr->max_qp_wqes + 1);
qp->qplib_qp.sq.q_full_delta = qp->qplib_qp.sq.max_wqe -
@@ -2106,9 +2198,10 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
qp->qplib_qp.sq.q_full_delta -= 1;
qp->qplib_qp.sq.max_sge = qp_attr->cap.max_send_sge;
if (qp->qplib_qp.rq.max_wqe) {
- entries = roundup_pow_of_two(qp_attr->cap.max_recv_wr);
+ entries = bnxt_re_init_depth(qp_attr->cap.max_recv_wr, uctx);
qp->qplib_qp.rq.max_wqe =
min_t(u32, entries, dev_attr->max_qp_wqes + 1);
+ qp->qplib_qp.rq.max_sw_wqe = qp->qplib_qp.rq.max_wqe;
qp->qplib_qp.rq.q_full_delta = qp->qplib_qp.rq.max_wqe -
qp_attr->cap.max_recv_wr;
qp->qplib_qp.rq.max_sge = qp_attr->cap.max_recv_sge;
@@ -2212,7 +2305,7 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp,
u8 ip_version = 0;
u16 vlan_id = 0xFFFF;
void *buf;
- int i, rc = 0;
+ int i, rc;
memset(&qp->qp1_hdr, 0, sizeof(qp->qp1_hdr));
@@ -2250,7 +2343,7 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp,
}
is_eth = true;
- is_vlan = (vlan_id && (vlan_id < 0x1000)) ? true : false;
+ is_vlan = vlan_id && (vlan_id < 0x1000);
ib_ud_header_init(payload_size, !is_eth, is_eth, is_vlan, is_grh,
ip_version, is_udp, 0, &qp->qp1_hdr);
@@ -2433,7 +2526,7 @@ static int bnxt_re_build_send_wqe(struct bnxt_re_qp *qp,
break;
case IB_WR_SEND_WITH_IMM:
wqe->type = BNXT_QPLIB_SWQE_TYPE_SEND_WITH_IMM;
- wqe->send.imm_data = wr->ex.imm_data;
+ wqe->send.imm_data = be32_to_cpu(wr->ex.imm_data);
break;
case IB_WR_SEND_WITH_INV:
wqe->type = BNXT_QPLIB_SWQE_TYPE_SEND_WITH_INV;
@@ -2463,7 +2556,7 @@ static int bnxt_re_build_rdma_wqe(const struct ib_send_wr *wr,
break;
case IB_WR_RDMA_WRITE_WITH_IMM:
wqe->type = BNXT_QPLIB_SWQE_TYPE_RDMA_WRITE_WITH_IMM;
- wqe->rdma.imm_data = wr->ex.imm_data;
+ wqe->rdma.imm_data = be32_to_cpu(wr->ex.imm_data);
break;
case IB_WR_RDMA_READ:
wqe->type = BNXT_QPLIB_SWQE_TYPE_RDMA_READ;
@@ -2519,11 +2612,6 @@ static int bnxt_re_build_inv_wqe(const struct ib_send_wr *wr,
wqe->type = BNXT_QPLIB_SWQE_TYPE_LOCAL_INV;
wqe->local_inv.inv_l_key = wr->ex.invalidate_rkey;
- /* Need unconditional fence for local invalidate
- * opcode to work as expected.
- */
- wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
-
if (wr->send_flags & IB_SEND_SIGNALED)
wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP;
if (wr->send_flags & IB_SEND_SOLICITED)
@@ -2546,12 +2634,6 @@ static int bnxt_re_build_reg_wqe(const struct ib_reg_wr *wr,
wqe->frmr.levels = qplib_frpl->hwq.level;
wqe->type = BNXT_QPLIB_SWQE_TYPE_REG_MR;
- /* Need unconditional fence for reg_mr
- * opcode to function as expected.
- */
-
- wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
-
if (wr->wr.send_flags & IB_SEND_SIGNALED)
wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP;
@@ -2682,6 +2764,18 @@ bad:
return rc;
}
+static void bnxt_re_legacy_set_uc_fence(struct bnxt_qplib_swqe *wqe)
+{
+ /* Need unconditional fence for non-wire memory opcode
+ * to work as expected.
+ */
+ if (wqe->type == BNXT_QPLIB_SWQE_TYPE_LOCAL_INV ||
+ wqe->type == BNXT_QPLIB_SWQE_TYPE_FAST_REG_MR ||
+ wqe->type == BNXT_QPLIB_SWQE_TYPE_REG_MR ||
+ wqe->type == BNXT_QPLIB_SWQE_TYPE_BIND_MW)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
+}
+
int bnxt_re_post_send(struct ib_qp *ib_qp, const struct ib_send_wr *wr,
const struct ib_send_wr **bad_wr)
{
@@ -2761,8 +2855,11 @@ int bnxt_re_post_send(struct ib_qp *ib_qp, const struct ib_send_wr *wr,
rc = -EINVAL;
goto bad;
}
- if (!rc)
+ if (!rc) {
+ if (!bnxt_qplib_is_chip_gen_p5_p7(qp->rdev->chip_ctx))
+ bnxt_re_legacy_set_uc_fence(&wqe);
rc = bnxt_qplib_post_send(&qp->qplib_qp, &wqe);
+ }
bad:
if (rc) {
ibdev_err(&qp->rdev->ibdev,
@@ -2787,7 +2884,6 @@ static int bnxt_re_post_recv_shadow_qp(struct bnxt_re_dev *rdev,
struct bnxt_qplib_swqe wqe;
int rc = 0;
- memset(&wqe, 0, sizeof(wqe));
while (wr) {
/* House keeping */
memset(&wqe, 0, sizeof(wqe));
@@ -2875,33 +2971,44 @@ int bnxt_re_post_recv(struct ib_qp *ib_qp, const struct ib_recv_wr *wr,
/* Completion Queues */
int bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
{
- struct bnxt_re_cq *cq;
+ struct bnxt_qplib_chip_ctx *cctx;
struct bnxt_qplib_nq *nq;
struct bnxt_re_dev *rdev;
+ struct bnxt_re_cq *cq;
cq = container_of(ib_cq, struct bnxt_re_cq, ib_cq);
rdev = cq->rdev;
nq = cq->qplib_cq.nq;
+ cctx = rdev->chip_ctx;
+ if (cctx->modes.toggle_bits & BNXT_QPLIB_CQ_TOGGLE_BIT) {
+ free_page((unsigned long)cq->uctx_cq_page);
+ hash_del(&cq->hash_entry);
+ }
bnxt_qplib_destroy_cq(&rdev->qplib_res, &cq->qplib_cq);
ib_umem_release(cq->umem);
- atomic_dec(&rdev->cq_count);
+ atomic_dec(&rdev->stats.res.cq_count);
nq->budget--;
kfree(cq->cql);
return 0;
}
int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
- struct ib_udata *udata)
+ struct uverbs_attr_bundle *attrs)
{
+ struct bnxt_re_cq *cq = container_of(ibcq, struct bnxt_re_cq, ib_cq);
struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibcq->device, ibdev);
+ struct ib_udata *udata = &attrs->driver_udata;
+ struct bnxt_re_ucontext *uctx =
+ rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, ib_uctx);
struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
- struct bnxt_re_cq *cq = container_of(ibcq, struct bnxt_re_cq, ib_cq);
- int rc, entries;
- int cqe = attr->cqe;
+ struct bnxt_qplib_chip_ctx *cctx;
struct bnxt_qplib_nq *nq = NULL;
unsigned int nq_alloc_cnt;
+ int cqe = attr->cqe;
+ int rc, entries;
+ u32 active_cqs;
if (attr->flags)
return -EOPNOTSUPP;
@@ -2913,9 +3020,10 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
}
cq->rdev = rdev;
+ cctx = rdev->chip_ctx;
cq->qplib_cq.cq_handle = (u64)(unsigned long)(&cq->qplib_cq);
- entries = roundup_pow_of_two(cqe + 1);
+ entries = bnxt_re_init_depth(cqe + 1, uctx);
if (entries > dev_attr->max_cq_wqes + 1)
entries = dev_attr->max_cq_wqes + 1;
@@ -2923,8 +3031,6 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
cq->qplib_cq.sg_info.pgshft = PAGE_SHIFT;
if (udata) {
struct bnxt_re_cq_req req;
- struct bnxt_re_ucontext *uctx = rdma_udata_to_drv_context(
- udata, struct bnxt_re_ucontext, ib_uctx);
if (ib_copy_from_udata(&req, udata, sizeof(req))) {
rc = -EFAULT;
goto fail;
@@ -2970,26 +3076,40 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
cq->cq_period = cq->qplib_cq.period;
nq->budget++;
- atomic_inc(&rdev->cq_count);
+ active_cqs = atomic_inc_return(&rdev->stats.res.cq_count);
+ if (active_cqs > rdev->stats.res.cq_watermark)
+ rdev->stats.res.cq_watermark = active_cqs;
spin_lock_init(&cq->cq_lock);
if (udata) {
- struct bnxt_re_cq_resp resp;
+ struct bnxt_re_cq_resp resp = {};
+ if (cctx->modes.toggle_bits & BNXT_QPLIB_CQ_TOGGLE_BIT) {
+ hash_add(rdev->cq_hash, &cq->hash_entry, cq->qplib_cq.id);
+ /* Allocate a page */
+ cq->uctx_cq_page = (void *)get_zeroed_page(GFP_KERNEL);
+ if (!cq->uctx_cq_page) {
+ rc = -ENOMEM;
+ goto c2fail;
+ }
+ resp.comp_mask |= BNXT_RE_CQ_TOGGLE_PAGE_SUPPORT;
+ }
resp.cqid = cq->qplib_cq.id;
resp.tail = cq->qplib_cq.hwq.cons;
resp.phase = cq->qplib_cq.period;
resp.rsvd = 0;
- rc = ib_copy_to_udata(udata, &resp, sizeof(resp));
+ rc = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen));
if (rc) {
ibdev_err(&rdev->ibdev, "Failed to copy CQ udata");
bnxt_qplib_destroy_cq(&rdev->qplib_res, &cq->qplib_cq);
- goto c2fail;
+ goto free_mem;
}
}
return 0;
+free_mem:
+ free_page((unsigned long)cq->uctx_cq_page);
c2fail:
ib_umem_release(cq->umem);
fail:
@@ -3044,12 +3164,11 @@ int bnxt_re_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
return -EINVAL;
}
- entries = roundup_pow_of_two(cqe + 1);
+ uctx = rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, ib_uctx);
+ entries = bnxt_re_init_depth(cqe + 1, uctx);
if (entries > dev_attr->max_cq_wqes + 1)
entries = dev_attr->max_cq_wqes + 1;
- uctx = rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext,
- ib_uctx);
/* uverbs consumer */
if (ib_copy_from_udata(&req, udata, sizeof(req))) {
rc = -EFAULT;
@@ -3083,6 +3202,7 @@ int bnxt_re_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
}
cq->ib_cq.cqe = cq->resize_cqe;
+ atomic_inc(&rdev->stats.res.resize_count);
return 0;
@@ -3319,26 +3439,21 @@ static int bnxt_re_process_raw_qp_pkt_rx(struct bnxt_re_qp *gsi_qp,
struct bnxt_re_dev *rdev = gsi_qp->rdev;
struct bnxt_re_sqp_entries *sqp_entry = NULL;
struct bnxt_re_qp *gsi_sqp = rdev->gsi_ctx.gsi_sqp;
+ dma_addr_t shrq_hdr_buf_map;
+ struct ib_sge s_sge[2] = {};
+ struct ib_sge r_sge[2] = {};
struct bnxt_re_ah *gsi_sah;
+ struct ib_recv_wr rwr = {};
+ dma_addr_t rq_hdr_buf_map;
+ struct ib_ud_wr udwr = {};
struct ib_send_wr *swr;
- struct ib_ud_wr udwr;
- struct ib_recv_wr rwr;
+ u32 skip_bytes = 0;
int pkt_type = 0;
- u32 tbl_idx;
void *rq_hdr_buf;
- dma_addr_t rq_hdr_buf_map;
- dma_addr_t shrq_hdr_buf_map;
u32 offset = 0;
- u32 skip_bytes = 0;
- struct ib_sge s_sge[2];
- struct ib_sge r_sge[2];
+ u32 tbl_idx;
int rc;
- memset(&udwr, 0, sizeof(udwr));
- memset(&rwr, 0, sizeof(rwr));
- memset(&s_sge, 0, sizeof(s_sge));
- memset(&r_sge, 0, sizeof(r_sge));
-
swr = &udwr.wr;
tbl_idx = cqe->wr_id;
@@ -3514,7 +3629,7 @@ static void bnxt_re_process_res_shadow_qp_wc(struct bnxt_re_qp *gsi_sqp,
wc->byte_len = orig_cqe->length;
wc->qp = &gsi_qp->ib_qp;
- wc->ex.imm_data = orig_cqe->immdata;
+ wc->ex.imm_data = cpu_to_be32(le32_to_cpu(orig_cqe->immdata));
wc->src_qp = orig_cqe->src_qp;
memcpy(wc->smac, orig_cqe->smac, ETH_ALEN);
if (bnxt_re_is_vlan_pkt(orig_cqe, &vlan_id, &sl)) {
@@ -3578,7 +3693,7 @@ static int send_phantom_wqe(struct bnxt_re_qp *qp)
{
struct bnxt_qplib_qp *lib_qp = &qp->qplib_qp;
unsigned long flags;
- int rc = 0;
+ int rc;
spin_lock_irqsave(&qp->sq_lock, flags);
@@ -3659,7 +3774,7 @@ int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc)
(unsigned long)(cqe->qp_handle),
struct bnxt_re_qp, qplib_qp);
wc->qp = &qp->ib_qp;
- wc->ex.imm_data = cqe->immdata;
+ wc->ex.imm_data = cpu_to_be32(le32_to_cpu(cqe->immdata));
wc->src_qp = cqe->src_qp;
memcpy(wc->smac, cqe->smac, ETH_ALEN);
wc->port_num = 1;
@@ -3768,6 +3883,7 @@ struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *ib_pd, int mr_access_flags)
struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
struct bnxt_re_dev *rdev = pd->rdev;
struct bnxt_re_mr *mr;
+ u32 active_mrs;
int rc;
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
@@ -3776,9 +3892,12 @@ struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *ib_pd, int mr_access_flags)
mr->rdev = rdev;
mr->qplib_mr.pd = &pd->qplib_pd;
- mr->qplib_mr.flags = __from_ib_access_flags(mr_access_flags);
+ mr->qplib_mr.access_flags = __from_ib_access_flags(mr_access_flags);
mr->qplib_mr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR;
+ if (mr_access_flags & IB_ACCESS_RELAXED_ORDERING)
+ bnxt_re_check_and_set_relaxed_ordering(rdev, &mr->qplib_mr);
+
/* Allocate and register 0 as the address */
rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr);
if (rc)
@@ -3795,7 +3914,9 @@ struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *ib_pd, int mr_access_flags)
if (mr_access_flags & (IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ |
IB_ACCESS_REMOTE_ATOMIC))
mr->ib_mr.rkey = mr->ib_mr.lkey;
- atomic_inc(&rdev->mr_count);
+ active_mrs = atomic_inc_return(&rdev->stats.res.mr_count);
+ if (active_mrs > rdev->stats.res.mr_watermark)
+ rdev->stats.res.mr_watermark = active_mrs;
return &mr->ib_mr;
@@ -3828,7 +3949,7 @@ int bnxt_re_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
ib_umem_release(mr->ib_umem);
kfree(mr);
- atomic_dec(&rdev->mr_count);
+ atomic_dec(&rdev->stats.res.mr_count);
return rc;
}
@@ -3858,6 +3979,7 @@ struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type type,
struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
struct bnxt_re_dev *rdev = pd->rdev;
struct bnxt_re_mr *mr = NULL;
+ u32 active_mrs;
int rc;
if (type != IB_MR_TYPE_MEM_REG) {
@@ -3873,7 +3995,7 @@ struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type type,
mr->rdev = rdev;
mr->qplib_mr.pd = &pd->qplib_pd;
- mr->qplib_mr.flags = BNXT_QPLIB_FR_PMR;
+ mr->qplib_mr.access_flags = BNXT_QPLIB_FR_PMR;
mr->qplib_mr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR;
rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr);
@@ -3896,7 +4018,9 @@ struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type type,
goto fail_mr;
}
- atomic_inc(&rdev->mr_count);
+ active_mrs = atomic_inc_return(&rdev->stats.res.mr_count);
+ if (active_mrs > rdev->stats.res.mr_watermark)
+ rdev->stats.res.mr_watermark = active_mrs;
return &mr->ib_mr;
fail_mr:
@@ -3914,6 +4038,7 @@ struct ib_mw *bnxt_re_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type,
struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
struct bnxt_re_dev *rdev = pd->rdev;
struct bnxt_re_mw *mw;
+ u32 active_mws;
int rc;
mw = kzalloc(sizeof(*mw), GFP_KERNEL);
@@ -3932,7 +4057,9 @@ struct ib_mw *bnxt_re_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type,
}
mw->ib_mw.rkey = mw->qplib_mw.rkey;
- atomic_inc(&rdev->mw_count);
+ active_mws = atomic_inc_return(&rdev->stats.res.mw_count);
+ if (active_mws > rdev->stats.res.mw_watermark)
+ rdev->stats.res.mw_watermark = active_mws;
return &mw->ib_mw;
fail:
@@ -3953,21 +4080,19 @@ int bnxt_re_dealloc_mw(struct ib_mw *ib_mw)
}
kfree(mw);
- atomic_dec(&rdev->mw_count);
+ atomic_dec(&rdev->stats.res.mw_count);
return rc;
}
-/* uverbs */
-struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
- u64 virt_addr, int mr_access_flags,
- struct ib_udata *udata)
+static struct ib_mr *__bnxt_re_user_reg_mr(struct ib_pd *ib_pd, u64 length, u64 virt_addr,
+ int mr_access_flags, struct ib_umem *umem)
{
struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
struct bnxt_re_dev *rdev = pd->rdev;
- struct bnxt_re_mr *mr;
- struct ib_umem *umem;
unsigned long page_size;
+ struct bnxt_re_mr *mr;
int umem_pgs, rc;
+ u32 active_mrs;
if (length > BNXT_RE_MAX_MR_SIZE) {
ibdev_err(&rdev->ibdev, "MR Size: %lld > Max supported:%lld\n",
@@ -3975,56 +4100,57 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
return ERR_PTR(-ENOMEM);
}
+ page_size = ib_umem_find_best_pgsz(umem, BNXT_RE_PAGE_SIZE_SUPPORTED, virt_addr);
+ if (!page_size) {
+ ibdev_err(&rdev->ibdev, "umem page size unsupported!");
+ return ERR_PTR(-EINVAL);
+ }
+
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mr)
return ERR_PTR(-ENOMEM);
mr->rdev = rdev;
mr->qplib_mr.pd = &pd->qplib_pd;
- mr->qplib_mr.flags = __from_ib_access_flags(mr_access_flags);
+ mr->qplib_mr.access_flags = __from_ib_access_flags(mr_access_flags);
mr->qplib_mr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_MR;
- rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr);
- if (rc) {
- ibdev_err(&rdev->ibdev, "Failed to allocate MR");
- goto free_mr;
- }
- /* The fixed portion of the rkey is the same as the lkey */
- mr->ib_mr.rkey = mr->qplib_mr.rkey;
-
- umem = ib_umem_get(&rdev->ibdev, start, length, mr_access_flags);
- if (IS_ERR(umem)) {
- ibdev_err(&rdev->ibdev, "Failed to get umem");
- rc = -EFAULT;
- goto free_mrw;
+ if (!_is_alloc_mr_unified(rdev->dev_attr.dev_cap_flags)) {
+ rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr);
+ if (rc) {
+ ibdev_err(&rdev->ibdev, "Failed to allocate MR rc = %d", rc);
+ rc = -EIO;
+ goto free_mr;
+ }
+ /* The fixed portion of the rkey is the same as the lkey */
+ mr->ib_mr.rkey = mr->qplib_mr.rkey;
+ } else {
+ mr->qplib_mr.flags = CMDQ_REGISTER_MR_FLAGS_ALLOC_MR;
}
mr->ib_umem = umem;
-
mr->qplib_mr.va = virt_addr;
- page_size = ib_umem_find_best_pgsz(
- umem, BNXT_RE_PAGE_SIZE_SUPPORTED, virt_addr);
- if (!page_size) {
- ibdev_err(&rdev->ibdev, "umem page size unsupported!");
- rc = -EFAULT;
- goto free_umem;
- }
mr->qplib_mr.total_size = length;
+ if (mr_access_flags & IB_ACCESS_RELAXED_ORDERING)
+ bnxt_re_check_and_set_relaxed_ordering(rdev, &mr->qplib_mr);
+
umem_pgs = ib_umem_num_dma_blocks(umem, page_size);
rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, umem,
umem_pgs, page_size);
if (rc) {
- ibdev_err(&rdev->ibdev, "Failed to register user MR");
- goto free_umem;
+ ibdev_err(&rdev->ibdev, "Failed to register user MR - rc = %d\n", rc);
+ rc = -EIO;
+ goto free_mrw;
}
mr->ib_mr.lkey = mr->qplib_mr.lkey;
mr->ib_mr.rkey = mr->qplib_mr.lkey;
- atomic_inc(&rdev->mr_count);
+ active_mrs = atomic_inc_return(&rdev->stats.res.mr_count);
+ if (active_mrs > rdev->stats.res.mr_watermark)
+ rdev->stats.res.mr_watermark = active_mrs;
return &mr->ib_mr;
-free_umem:
- ib_umem_release(umem);
+
free_mrw:
bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
free_mr:
@@ -4032,6 +4158,49 @@ free_mr:
return ERR_PTR(rc);
}
+struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
+ u64 virt_addr, int mr_access_flags,
+ struct ib_udata *udata)
+{
+ struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+ struct bnxt_re_dev *rdev = pd->rdev;
+ struct ib_umem *umem;
+ struct ib_mr *ib_mr;
+
+ umem = ib_umem_get(&rdev->ibdev, start, length, mr_access_flags);
+ if (IS_ERR(umem))
+ return ERR_CAST(umem);
+
+ ib_mr = __bnxt_re_user_reg_mr(ib_pd, length, virt_addr, mr_access_flags, umem);
+ if (IS_ERR(ib_mr))
+ ib_umem_release(umem);
+ return ib_mr;
+}
+
+struct ib_mr *bnxt_re_reg_user_mr_dmabuf(struct ib_pd *ib_pd, u64 start,
+ u64 length, u64 virt_addr, int fd,
+ int mr_access_flags,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+ struct bnxt_re_dev *rdev = pd->rdev;
+ struct ib_umem_dmabuf *umem_dmabuf;
+ struct ib_umem *umem;
+ struct ib_mr *ib_mr;
+
+ umem_dmabuf = ib_umem_dmabuf_get_pinned(&rdev->ibdev, start, length,
+ fd, mr_access_flags);
+ if (IS_ERR(umem_dmabuf))
+ return ERR_CAST(umem_dmabuf);
+
+ umem = &umem_dmabuf->umem;
+
+ ib_mr = __bnxt_re_user_reg_mr(ib_pd, length, virt_addr, mr_access_flags, umem);
+ if (IS_ERR(ib_mr))
+ ib_umem_release(umem);
+ return ib_mr;
+}
+
int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata)
{
struct ib_device *ibdev = ctx->device;
@@ -4041,6 +4210,7 @@ int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata)
struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
struct bnxt_re_user_mmap_entry *entry;
struct bnxt_re_uctx_resp resp = {};
+ struct bnxt_re_uctx_req ureq = {};
u32 chip_met_rev_num = 0;
int rc;
@@ -4075,9 +4245,6 @@ int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata)
resp.cqe_sz = sizeof(struct cq_base);
resp.max_cqd = dev_attr->max_cq_wqes;
- resp.comp_mask |= BNXT_RE_UCNTX_CMASK_HAVE_MODE;
- resp.mode = rdev->chip_ctx->modes.wqe_mode;
-
if (rdev->chip_ctx->modes.db_push)
resp.comp_mask |= BNXT_RE_UCNTX_CMASK_WC_DPI_ENABLED;
@@ -4087,6 +4254,27 @@ int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata)
goto cfail;
}
uctx->shpage_mmap = &entry->rdma_entry;
+ if (rdev->pacing.dbr_pacing)
+ resp.comp_mask |= BNXT_RE_UCNTX_CMASK_DBR_PACING_ENABLED;
+
+ if (_is_host_msn_table(rdev->qplib_res.dattr->dev_cap_flags2))
+ resp.comp_mask |= BNXT_RE_UCNTX_CMASK_MSN_TABLE_ENABLED;
+
+ if (udata->inlen >= sizeof(ureq)) {
+ rc = ib_copy_from_udata(&ureq, udata, min(udata->inlen, sizeof(ureq)));
+ if (rc)
+ goto cfail;
+ if (ureq.comp_mask & BNXT_RE_COMP_MASK_REQ_UCNTX_POW2_SUPPORT) {
+ resp.comp_mask |= BNXT_RE_UCNTX_CMASK_POW2_DISABLED;
+ uctx->cmask |= BNXT_RE_UCNTX_CAP_POW2_DISABLED;
+ }
+ if (ureq.comp_mask & BNXT_RE_COMP_MASK_REQ_UCNTX_VAR_WQE_SUPPORT) {
+ resp.comp_mask |= BNXT_RE_UCNTX_CMASK_HAVE_MODE;
+ resp.mode = rdev->chip_ctx->modes.wqe_mode;
+ if (resp.mode == BNXT_QPLIB_WQE_MODE_VARIABLE)
+ uctx->cmask |= BNXT_RE_UCNTX_CAP_VAR_WQE_ENABLED;
+ }
+ }
rc = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp)));
if (rc) {
@@ -4125,6 +4313,32 @@ void bnxt_re_dealloc_ucontext(struct ib_ucontext *ib_uctx)
}
}
+static struct bnxt_re_cq *bnxt_re_search_for_cq(struct bnxt_re_dev *rdev, u32 cq_id)
+{
+ struct bnxt_re_cq *cq = NULL, *tmp_cq;
+
+ hash_for_each_possible(rdev->cq_hash, tmp_cq, hash_entry, cq_id) {
+ if (tmp_cq->qplib_cq.id == cq_id) {
+ cq = tmp_cq;
+ break;
+ }
+ }
+ return cq;
+}
+
+static struct bnxt_re_srq *bnxt_re_search_for_srq(struct bnxt_re_dev *rdev, u32 srq_id)
+{
+ struct bnxt_re_srq *srq = NULL, *tmp_srq;
+
+ hash_for_each_possible(rdev->srq_hash, tmp_srq, hash_entry, srq_id) {
+ if (tmp_srq->qplib_srq.id == srq_id) {
+ srq = tmp_srq;
+ break;
+ }
+ }
+ return srq;
+}
+
/* Helper function to mmap the virtual memory from user app */
int bnxt_re_mmap(struct ib_ucontext *ib_uctx, struct vm_area_struct *vma)
{
@@ -4159,6 +4373,20 @@ int bnxt_re_mmap(struct ib_ucontext *ib_uctx, struct vm_area_struct *vma)
case BNXT_RE_MMAP_SH_PAGE:
ret = vm_insert_page(vma, vma->vm_start, virt_to_page(uctx->shpg));
break;
+ case BNXT_RE_MMAP_DBR_BAR:
+ pfn = bnxt_entry->mem_offset >> PAGE_SHIFT;
+ ret = rdma_user_mmap_io(ib_uctx, vma, pfn, PAGE_SIZE,
+ pgprot_noncached(vma->vm_page_prot),
+ rdma_entry);
+ break;
+ case BNXT_RE_MMAP_DBR_PAGE:
+ case BNXT_RE_MMAP_TOGGLE_PAGE:
+ /* Driver doesn't expect write access for user space */
+ if (vma->vm_flags & VM_WRITE)
+ return -EFAULT;
+ ret = vm_insert_page(vma, vma->vm_start,
+ virt_to_page((void *)bnxt_entry->mem_offset));
+ break;
default:
ret = -EINVAL;
break;
@@ -4178,6 +4406,15 @@ void bnxt_re_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
kfree(bnxt_entry);
}
+static int UVERBS_HANDLER(BNXT_RE_METHOD_NOTIFY_DRV)(struct uverbs_attr_bundle *attrs)
+{
+ struct bnxt_re_ucontext *uctx;
+
+ uctx = container_of(ib_uverbs_get_ucontext(attrs), struct bnxt_re_ucontext, ib_uctx);
+ bnxt_re_pacing_alert(uctx->rdev);
+ return 0;
+}
+
static int UVERBS_HANDLER(BNXT_RE_METHOD_ALLOC_PAGE)(struct uverbs_attr_bundle *attrs)
{
struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, BNXT_RE_ALLOC_PAGE_HANDLE);
@@ -4190,7 +4427,7 @@ static int UVERBS_HANDLER(BNXT_RE_METHOD_ALLOC_PAGE)(struct uverbs_attr_bundle *
u64 mmap_offset;
u32 length;
u32 dpi;
- u64 dbr;
+ u64 addr;
int err;
uctx = container_of(ib_uverbs_get_ucontext(attrs), struct bnxt_re_ucontext, ib_uctx);
@@ -4212,19 +4449,30 @@ static int UVERBS_HANDLER(BNXT_RE_METHOD_ALLOC_PAGE)(struct uverbs_attr_bundle *
return -ENOMEM;
length = PAGE_SIZE;
dpi = uctx->wcdpi.dpi;
- dbr = (u64)uctx->wcdpi.umdbr;
+ addr = (u64)uctx->wcdpi.umdbr;
mmap_flag = BNXT_RE_MMAP_WC_DB;
} else {
return -EINVAL;
}
break;
+ case BNXT_RE_ALLOC_DBR_BAR_PAGE:
+ length = PAGE_SIZE;
+ addr = (u64)rdev->pacing.dbr_bar_addr;
+ mmap_flag = BNXT_RE_MMAP_DBR_BAR;
+ break;
+
+ case BNXT_RE_ALLOC_DBR_PAGE:
+ length = PAGE_SIZE;
+ addr = (u64)rdev->pacing.dbr_page;
+ mmap_flag = BNXT_RE_MMAP_DBR_PAGE;
+ break;
default:
return -EOPNOTSUPP;
}
- entry = bnxt_re_mmap_entry_insert(uctx, dbr, mmap_flag, &mmap_offset);
+ entry = bnxt_re_mmap_entry_insert(uctx, addr, mmap_flag, &mmap_offset);
if (!entry)
return -ENOMEM;
@@ -4264,6 +4512,9 @@ static int alloc_page_obj_cleanup(struct ib_uobject *uobject,
uctx->wcdpi.dbr = NULL;
}
break;
+ case BNXT_RE_MMAP_DBR_BAR:
+ case BNXT_RE_MMAP_DBR_PAGE:
+ break;
default:
goto exit;
}
@@ -4301,7 +4552,133 @@ DECLARE_UVERBS_NAMED_OBJECT(BNXT_RE_OBJECT_ALLOC_PAGE,
&UVERBS_METHOD(BNXT_RE_METHOD_ALLOC_PAGE),
&UVERBS_METHOD(BNXT_RE_METHOD_DESTROY_PAGE));
+DECLARE_UVERBS_NAMED_METHOD(BNXT_RE_METHOD_NOTIFY_DRV);
+
+DECLARE_UVERBS_GLOBAL_METHODS(BNXT_RE_OBJECT_NOTIFY_DRV,
+ &UVERBS_METHOD(BNXT_RE_METHOD_NOTIFY_DRV));
+
+/* Toggle MEM */
+static int UVERBS_HANDLER(BNXT_RE_METHOD_GET_TOGGLE_MEM)(struct uverbs_attr_bundle *attrs)
+{
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, BNXT_RE_TOGGLE_MEM_HANDLE);
+ enum bnxt_re_mmap_flag mmap_flag = BNXT_RE_MMAP_TOGGLE_PAGE;
+ enum bnxt_re_get_toggle_mem_type res_type;
+ struct bnxt_re_user_mmap_entry *entry;
+ struct bnxt_re_ucontext *uctx;
+ struct ib_ucontext *ib_uctx;
+ struct bnxt_re_dev *rdev;
+ struct bnxt_re_srq *srq;
+ u32 length = PAGE_SIZE;
+ struct bnxt_re_cq *cq;
+ u64 mem_offset;
+ u32 offset = 0;
+ u64 addr = 0;
+ u32 res_id;
+ int err;
+
+ ib_uctx = ib_uverbs_get_ucontext(attrs);
+ if (IS_ERR(ib_uctx))
+ return PTR_ERR(ib_uctx);
+
+ err = uverbs_get_const(&res_type, attrs, BNXT_RE_TOGGLE_MEM_TYPE);
+ if (err)
+ return err;
+
+ uctx = container_of(ib_uctx, struct bnxt_re_ucontext, ib_uctx);
+ rdev = uctx->rdev;
+ err = uverbs_copy_from(&res_id, attrs, BNXT_RE_TOGGLE_MEM_RES_ID);
+ if (err)
+ return err;
+
+ switch (res_type) {
+ case BNXT_RE_CQ_TOGGLE_MEM:
+ cq = bnxt_re_search_for_cq(rdev, res_id);
+ if (!cq)
+ return -EINVAL;
+
+ addr = (u64)cq->uctx_cq_page;
+ break;
+ case BNXT_RE_SRQ_TOGGLE_MEM:
+ srq = bnxt_re_search_for_srq(rdev, res_id);
+ if (!srq)
+ return -EINVAL;
+
+ addr = (u64)srq->uctx_srq_page;
+ break;
+
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ entry = bnxt_re_mmap_entry_insert(uctx, addr, mmap_flag, &mem_offset);
+ if (!entry)
+ return -ENOMEM;
+
+ uobj->object = entry;
+ uverbs_finalize_uobj_create(attrs, BNXT_RE_TOGGLE_MEM_HANDLE);
+ err = uverbs_copy_to(attrs, BNXT_RE_TOGGLE_MEM_MMAP_PAGE,
+ &mem_offset, sizeof(mem_offset));
+ if (err)
+ return err;
+
+ err = uverbs_copy_to(attrs, BNXT_RE_TOGGLE_MEM_MMAP_LENGTH,
+ &length, sizeof(length));
+ if (err)
+ return err;
+
+ err = uverbs_copy_to(attrs, BNXT_RE_TOGGLE_MEM_MMAP_OFFSET,
+ &offset, sizeof(length));
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int get_toggle_mem_obj_cleanup(struct ib_uobject *uobject,
+ enum rdma_remove_reason why,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct bnxt_re_user_mmap_entry *entry = uobject->object;
+
+ rdma_user_mmap_entry_remove(&entry->rdma_entry);
+ return 0;
+}
+
+DECLARE_UVERBS_NAMED_METHOD(BNXT_RE_METHOD_GET_TOGGLE_MEM,
+ UVERBS_ATTR_IDR(BNXT_RE_TOGGLE_MEM_HANDLE,
+ BNXT_RE_OBJECT_GET_TOGGLE_MEM,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_CONST_IN(BNXT_RE_TOGGLE_MEM_TYPE,
+ enum bnxt_re_get_toggle_mem_type,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(BNXT_RE_TOGGLE_MEM_RES_ID,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(BNXT_RE_TOGGLE_MEM_MMAP_PAGE,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(BNXT_RE_TOGGLE_MEM_MMAP_OFFSET,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(BNXT_RE_TOGGLE_MEM_MMAP_LENGTH,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(BNXT_RE_METHOD_RELEASE_TOGGLE_MEM,
+ UVERBS_ATTR_IDR(BNXT_RE_RELEASE_TOGGLE_MEM_HANDLE,
+ BNXT_RE_OBJECT_GET_TOGGLE_MEM,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(BNXT_RE_OBJECT_GET_TOGGLE_MEM,
+ UVERBS_TYPE_ALLOC_IDR(get_toggle_mem_obj_cleanup),
+ &UVERBS_METHOD(BNXT_RE_METHOD_GET_TOGGLE_MEM),
+ &UVERBS_METHOD(BNXT_RE_METHOD_RELEASE_TOGGLE_MEM));
+
const struct uapi_definition bnxt_re_uapi_defs[] = {
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(BNXT_RE_OBJECT_ALLOC_PAGE),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(BNXT_RE_OBJECT_NOTIFY_DRV),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(BNXT_RE_OBJECT_GET_TOGGLE_MEM),
{}
};
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
index 32d9e9d09791..b789e47ec97a 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
@@ -77,6 +77,8 @@ struct bnxt_re_srq {
struct bnxt_qplib_srq qplib_srq;
struct ib_umem *umem;
spinlock_t lock; /* protect srq */
+ void *uctx_srq_page;
+ struct hlist_node hash_entry;
};
struct bnxt_re_qp {
@@ -108,6 +110,8 @@ struct bnxt_re_cq {
struct ib_umem *umem;
struct ib_umem *resize_umem;
int resize_cqe;
+ void *uctx_cq_page;
+ struct hlist_node hash_entry;
};
struct bnxt_re_mr {
@@ -140,12 +144,16 @@ struct bnxt_re_ucontext {
void *shpg;
spinlock_t sh_lock; /* protect shpg */
struct rdma_user_mmap_entry *shpage_mmap;
+ u64 cmask;
};
enum bnxt_re_mmap_flag {
BNXT_RE_MMAP_SH_PAGE,
BNXT_RE_MMAP_UC_DB,
BNXT_RE_MMAP_WC_DB,
+ BNXT_RE_MMAP_DBR_PAGE,
+ BNXT_RE_MMAP_DBR_BAR,
+ BNXT_RE_MMAP_TOGGLE_PAGE,
};
struct bnxt_re_user_mmap_entry {
@@ -165,6 +173,26 @@ static inline u16 bnxt_re_get_rwqe_size(int nsge)
return sizeof(struct rq_wqe_hdr) + (nsge * sizeof(struct sq_sge));
}
+enum {
+ BNXT_RE_UCNTX_CAP_POW2_DISABLED = 0x1ULL,
+ BNXT_RE_UCNTX_CAP_VAR_WQE_ENABLED = 0x2ULL,
+};
+
+static inline u32 bnxt_re_init_depth(u32 ent, struct bnxt_re_ucontext *uctx)
+{
+ return uctx ? (uctx->cmask & BNXT_RE_UCNTX_CAP_POW2_DISABLED) ?
+ ent : roundup_pow_of_two(ent) : ent;
+}
+
+static inline bool bnxt_re_is_var_size_supported(struct bnxt_re_dev *rdev,
+ struct bnxt_re_ucontext *uctx)
+{
+ if (uctx)
+ return uctx->cmask & BNXT_RE_UCNTX_CAP_VAR_WQE_ENABLED;
+ else
+ return rdev->chip_ctx->modes.wqe_mode;
+}
+
int bnxt_re_query_device(struct ib_device *ibdev,
struct ib_device_attr *ib_attr,
struct ib_udata *udata);
@@ -209,7 +237,7 @@ int bnxt_re_post_send(struct ib_qp *qp, const struct ib_send_wr *send_wr,
int bnxt_re_post_recv(struct ib_qp *qp, const struct ib_recv_wr *recv_wr,
const struct ib_recv_wr **bad_recv_wr);
int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
- struct ib_udata *udata);
+ struct uverbs_attr_bundle *attrs);
int bnxt_re_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
int bnxt_re_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
int bnxt_re_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc);
@@ -227,6 +255,10 @@ int bnxt_re_dealloc_mw(struct ib_mw *mw);
struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int mr_access_flags,
struct ib_udata *udata);
+struct ib_mr *bnxt_re_reg_user_mr_dmabuf(struct ib_pd *ib_pd, u64 start,
+ u64 length, u64 virt_addr,
+ int fd, int mr_access_flags,
+ struct uverbs_attr_bundle *attrs);
int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata);
void bnxt_re_dealloc_ucontext(struct ib_ucontext *context);
int bnxt_re_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 63e98e2d3596..777068de4bbc 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -54,6 +54,7 @@
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_addr.h>
+#include <linux/hashtable.h>
#include "bnxt_ulp.h"
#include "roce_hsi.h"
@@ -71,7 +72,7 @@ static char version[] =
BNXT_RE_DESC "\n";
MODULE_AUTHOR("Eddie Wai <eddie.wai@broadcom.com>");
-MODULE_DESCRIPTION(BNXT_RE_DESC " Driver");
+MODULE_DESCRIPTION(BNXT_RE_DESC);
MODULE_LICENSE("Dual BSD/GPL");
/* globals */
@@ -82,11 +83,12 @@ static void bnxt_re_dev_stop(struct bnxt_re_dev *rdev);
static int bnxt_re_netdev_event(struct notifier_block *notifier,
unsigned long event, void *ptr);
static struct bnxt_re_dev *bnxt_re_from_netdev(struct net_device *netdev);
-static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev);
+static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev, u8 op_type);
static int bnxt_re_hwrm_qcaps(struct bnxt_re_dev *rdev);
static int bnxt_re_hwrm_qcfg(struct bnxt_re_dev *rdev, u32 *db_len,
u32 *offset);
+static void bnxt_re_setup_cc(struct bnxt_re_dev *rdev, bool enable);
static void bnxt_re_set_db_offset(struct bnxt_re_dev *rdev)
{
struct bnxt_qplib_chip_ctx *cctx;
@@ -107,9 +109,14 @@ static void bnxt_re_set_db_offset(struct bnxt_re_dev *rdev)
dev_info(rdev_to_dev(rdev),
"Couldn't get DB bar size, Low latency framework is disabled\n");
/* set register offsets for both UC and WC */
- res->dpi_tbl.ucreg.offset = res->is_vf ? BNXT_QPLIB_DBR_VF_DB_OFFSET :
- BNXT_QPLIB_DBR_PF_DB_OFFSET;
- res->dpi_tbl.wcreg.offset = res->dpi_tbl.ucreg.offset;
+ if (bnxt_qplib_is_chip_gen_p7(cctx)) {
+ res->dpi_tbl.ucreg.offset = offset;
+ res->dpi_tbl.wcreg.offset = en_dev->l2_db_size;
+ } else {
+ res->dpi_tbl.ucreg.offset = res->is_vf ? BNXT_QPLIB_DBR_VF_DB_OFFSET :
+ BNXT_QPLIB_DBR_PF_DB_OFFSET;
+ res->dpi_tbl.wcreg.offset = res->dpi_tbl.ucreg.offset;
+ }
/* If WC mapping is disabled by L2 driver then en_dev->l2_db_size
* is equal to the DB-Bar actual size. This indicates that L2
@@ -123,16 +130,20 @@ static void bnxt_re_set_db_offset(struct bnxt_re_dev *rdev)
}
}
-static void bnxt_re_set_drv_mode(struct bnxt_re_dev *rdev, u8 mode)
+static void bnxt_re_set_drv_mode(struct bnxt_re_dev *rdev)
{
struct bnxt_qplib_chip_ctx *cctx;
cctx = rdev->chip_ctx;
- cctx->modes.wqe_mode = bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ?
- mode : BNXT_QPLIB_WQE_MODE_STATIC;
+ cctx->modes.wqe_mode = bnxt_qplib_is_chip_gen_p7(rdev->chip_ctx) ?
+ BNXT_QPLIB_WQE_MODE_VARIABLE : BNXT_QPLIB_WQE_MODE_STATIC;
if (bnxt_re_hwrm_qcaps(rdev))
dev_err(rdev_to_dev(rdev),
"Failed to query hwrm qcaps\n");
+ if (bnxt_qplib_is_chip_gen_p7(rdev->chip_ctx)) {
+ cctx->modes.toggle_bits |= BNXT_QPLIB_CQ_TOGGLE_BIT;
+ cctx->modes.toggle_bits |= BNXT_QPLIB_SRQ_TOGGLE_BIT;
+ }
}
static void bnxt_re_destroy_chip_ctx(struct bnxt_re_dev *rdev)
@@ -150,7 +161,7 @@ static void bnxt_re_destroy_chip_ctx(struct bnxt_re_dev *rdev)
kfree(chip_ctx);
}
-static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev, u8 wqe_mode)
+static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev)
{
struct bnxt_qplib_chip_ctx *chip_ctx;
struct bnxt_en_dev *en_dev;
@@ -158,6 +169,7 @@ static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev, u8 wqe_mode)
en_dev = rdev->en_dev;
+ rdev->qplib_res.pdev = en_dev->pdev;
chip_ctx = kzalloc(sizeof(*chip_ctx), GFP_KERNEL);
if (!chip_ctx)
return -ENOMEM;
@@ -172,7 +184,7 @@ static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev, u8 wqe_mode)
rdev->qplib_res.dattr = &rdev->dev_attr;
rdev->qplib_res.is_vf = BNXT_EN_VF(en_dev);
- bnxt_re_set_drv_mode(rdev, wqe_mode);
+ bnxt_re_set_drv_mode(rdev);
bnxt_re_set_db_offset(rdev);
rc = bnxt_qplib_map_db_bar(&rdev->qplib_res);
@@ -215,7 +227,7 @@ static void bnxt_re_limit_pf_res(struct bnxt_re_dev *rdev)
ctx->srqc_count = min_t(u32, BNXT_RE_MAX_SRQC_COUNT,
attr->max_srq);
ctx->cq_count = min_t(u32, BNXT_RE_MAX_CQ_COUNT, attr->max_cq);
- if (!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx))
+ if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx))
for (i = 0; i < MAX_TQM_ALLOC_REQ; i++)
rdev->qplib_ctx.tqm_ctx.qcount[i] =
rdev->dev_attr.tqm_alloc_reqs[i];
@@ -264,7 +276,7 @@ static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev)
memset(&rdev->qplib_ctx.vf_res, 0, sizeof(struct bnxt_qplib_vf_res));
bnxt_re_limit_pf_res(rdev);
- num_vfs = bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ?
+ num_vfs = bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx) ?
BNXT_RE_GEN_P5_MAX_VF : rdev->num_vfs;
if (num_vfs)
bnxt_re_limit_vf_res(&rdev->qplib_ctx, num_vfs);
@@ -272,11 +284,8 @@ static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev)
static void bnxt_re_vf_res_config(struct bnxt_re_dev *rdev)
{
-
- if (test_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags))
- return;
rdev->num_vfs = pci_sriov_get_totalvfs(rdev->en_dev->pdev);
- if (!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) {
+ if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) {
bnxt_re_set_resource_limits(rdev);
bnxt_qplib_set_func_resources(&rdev->qplib_res, &rdev->rcfw,
&rdev->qplib_ctx);
@@ -285,21 +294,31 @@ static void bnxt_re_vf_res_config(struct bnxt_re_dev *rdev)
static void bnxt_re_shutdown(struct auxiliary_device *adev)
{
- struct bnxt_re_dev *rdev = auxiliary_get_drvdata(adev);
+ struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(adev);
+ struct bnxt_re_dev *rdev;
- if (!rdev)
+ if (!en_info)
return;
+
+ rdev = en_info->rdev;
ib_unregister_device(&rdev->ibdev);
- bnxt_re_dev_uninit(rdev);
+ bnxt_re_dev_uninit(rdev, BNXT_RE_COMPLETE_REMOVE);
}
static void bnxt_re_stop_irq(void *handle)
{
- struct bnxt_re_dev *rdev = (struct bnxt_re_dev *)handle;
- struct bnxt_qplib_rcfw *rcfw = &rdev->rcfw;
+ struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(handle);
+ struct bnxt_qplib_rcfw *rcfw;
+ struct bnxt_re_dev *rdev;
struct bnxt_qplib_nq *nq;
int indx;
+ if (!en_info)
+ return;
+
+ rdev = en_info->rdev;
+ rcfw = &rdev->rcfw;
+
for (indx = BNXT_RE_NQ_IDX; indx < rdev->num_msix; indx++) {
nq = &rdev->nq[indx - 1];
bnxt_qplib_nq_stop_irq(nq, false);
@@ -310,12 +329,19 @@ static void bnxt_re_stop_irq(void *handle)
static void bnxt_re_start_irq(void *handle, struct bnxt_msix_entry *ent)
{
- struct bnxt_re_dev *rdev = (struct bnxt_re_dev *)handle;
- struct bnxt_msix_entry *msix_ent = rdev->en_dev->msix_entries;
- struct bnxt_qplib_rcfw *rcfw = &rdev->rcfw;
+ struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(handle);
+ struct bnxt_msix_entry *msix_ent;
+ struct bnxt_qplib_rcfw *rcfw;
+ struct bnxt_re_dev *rdev;
struct bnxt_qplib_nq *nq;
int indx, rc;
+ if (!en_info)
+ return;
+
+ rdev = en_info->rdev;
+ msix_ent = rdev->en_dev->msix_entries;
+ rcfw = &rdev->rcfw;
if (!ent) {
/* Not setting the f/w timeout bit in rcfw.
* During the driver unload the first command
@@ -360,14 +386,9 @@ static struct bnxt_ulp_ops bnxt_re_ulp_ops = {
static int bnxt_re_register_netdev(struct bnxt_re_dev *rdev)
{
struct bnxt_en_dev *en_dev;
- int rc = 0;
en_dev = rdev->en_dev;
-
- rc = bnxt_register_dev(en_dev, &bnxt_re_ulp_ops, rdev);
- if (!rc)
- rdev->qplib_res.pdev = rdev->en_dev->pdev;
- return rc;
+ return bnxt_register_dev(en_dev, &bnxt_re_ulp_ops, rdev->adev);
}
static void bnxt_re_init_hwrm_hdr(struct input *hdr, u16 opcd)
@@ -395,10 +416,9 @@ static int bnxt_re_hwrm_qcfg(struct bnxt_re_dev *rdev, u32 *db_len,
struct bnxt_en_dev *en_dev = rdev->en_dev;
struct hwrm_func_qcfg_output resp = {0};
struct hwrm_func_qcfg_input req = {0};
- struct bnxt_fw_msg fw_msg;
+ struct bnxt_fw_msg fw_msg = {};
int rc;
- memset(&fw_msg, 0, sizeof(fw_msg));
bnxt_re_init_hwrm_hdr((void *)&req, HWRM_FUNC_QCFG);
req.fid = cpu_to_le16(0xffff);
bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
@@ -419,6 +439,7 @@ int bnxt_re_hwrm_qcaps(struct bnxt_re_dev *rdev)
struct hwrm_func_qcaps_input req = {};
struct bnxt_qplib_chip_ctx *cctx;
struct bnxt_fw_msg fw_msg = {};
+ u32 flags_ext2;
int rc;
cctx = rdev->chip_ctx;
@@ -432,9 +453,252 @@ int bnxt_re_hwrm_qcaps(struct bnxt_re_dev *rdev)
return rc;
cctx->modes.db_push = le32_to_cpu(resp.flags) & FUNC_QCAPS_RESP_FLAGS_WCB_PUSH_MODE;
+ flags_ext2 = le32_to_cpu(resp.flags_ext2);
+ cctx->modes.dbr_pacing = flags_ext2 & FUNC_QCAPS_RESP_FLAGS_EXT2_DBR_PACING_EXT_SUPPORTED ||
+ flags_ext2 & FUNC_QCAPS_RESP_FLAGS_EXT2_DBR_PACING_V0_SUPPORTED;
+ return 0;
+}
+
+static int bnxt_re_hwrm_dbr_pacing_qcfg(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_qplib_db_pacing_data *pacing_data = rdev->qplib_res.pacing_data;
+ struct hwrm_func_dbr_pacing_qcfg_output resp = {};
+ struct hwrm_func_dbr_pacing_qcfg_input req = {};
+ struct bnxt_en_dev *en_dev = rdev->en_dev;
+ struct bnxt_qplib_chip_ctx *cctx;
+ struct bnxt_fw_msg fw_msg = {};
+ int rc;
+
+ cctx = rdev->chip_ctx;
+ bnxt_re_init_hwrm_hdr((void *)&req, HWRM_FUNC_DBR_PACING_QCFG);
+ bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
+ sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
+ rc = bnxt_send_msg(en_dev, &fw_msg);
+ if (rc)
+ return rc;
+
+ if ((le32_to_cpu(resp.dbr_stat_db_fifo_reg) &
+ FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_MASK) ==
+ FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_GRC)
+ cctx->dbr_stat_db_fifo =
+ le32_to_cpu(resp.dbr_stat_db_fifo_reg) &
+ ~FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_MASK;
+
+ pacing_data->fifo_max_depth = le32_to_cpu(resp.dbr_stat_db_max_fifo_depth);
+ if (!pacing_data->fifo_max_depth)
+ pacing_data->fifo_max_depth = BNXT_RE_MAX_FIFO_DEPTH(cctx);
+ pacing_data->fifo_room_mask = le32_to_cpu(resp.dbr_stat_db_fifo_reg_fifo_room_mask);
+ pacing_data->fifo_room_shift = resp.dbr_stat_db_fifo_reg_fifo_room_shift;
+
+ return 0;
+}
+
+/* Update the pacing tunable parameters to the default values */
+static void bnxt_re_set_default_pacing_data(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_qplib_db_pacing_data *pacing_data = rdev->qplib_res.pacing_data;
+
+ pacing_data->do_pacing = rdev->pacing.dbr_def_do_pacing;
+ pacing_data->pacing_th = rdev->pacing.pacing_algo_th;
+ pacing_data->alarm_th =
+ pacing_data->pacing_th * BNXT_RE_PACING_ALARM_TH_MULTIPLE;
+}
+
+static u32 __get_fifo_occupancy(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_qplib_db_pacing_data *pacing_data = rdev->qplib_res.pacing_data;
+ u32 read_val, fifo_occup;
+
+ read_val = readl(rdev->en_dev->bar0 + rdev->pacing.dbr_db_fifo_reg_off);
+ fifo_occup = pacing_data->fifo_max_depth -
+ ((read_val & pacing_data->fifo_room_mask) >>
+ pacing_data->fifo_room_shift);
+ return fifo_occup;
+}
+
+static bool is_dbr_fifo_full(struct bnxt_re_dev *rdev)
+{
+ u32 max_occup, fifo_occup;
+
+ fifo_occup = __get_fifo_occupancy(rdev);
+ max_occup = BNXT_RE_MAX_FIFO_DEPTH(rdev->chip_ctx) - 1;
+ if (fifo_occup == max_occup)
+ return true;
+
+ return false;
+}
+
+static void __wait_for_fifo_occupancy_below_th(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_qplib_db_pacing_data *pacing_data = rdev->qplib_res.pacing_data;
+ u32 fifo_occup;
+
+ /* loop shouldn't run infintely as the occupancy usually goes
+ * below pacing algo threshold as soon as pacing kicks in.
+ */
+ while (1) {
+ fifo_occup = __get_fifo_occupancy(rdev);
+ /* Fifo occupancy cannot be greater the MAX FIFO depth */
+ if (fifo_occup > pacing_data->fifo_max_depth)
+ break;
+
+ if (fifo_occup < pacing_data->pacing_th)
+ break;
+ }
+}
+
+static void bnxt_re_db_fifo_check(struct work_struct *work)
+{
+ struct bnxt_re_dev *rdev = container_of(work, struct bnxt_re_dev,
+ dbq_fifo_check_work);
+ struct bnxt_qplib_db_pacing_data *pacing_data;
+ u32 pacing_save;
+
+ if (!mutex_trylock(&rdev->pacing.dbq_lock))
+ return;
+ pacing_data = rdev->qplib_res.pacing_data;
+ pacing_save = rdev->pacing.do_pacing_save;
+ __wait_for_fifo_occupancy_below_th(rdev);
+ cancel_delayed_work_sync(&rdev->dbq_pacing_work);
+ if (pacing_save > rdev->pacing.dbr_def_do_pacing) {
+ /* Double the do_pacing value during the congestion */
+ pacing_save = pacing_save << 1;
+ } else {
+ /*
+ * when a new congestion is detected increase the do_pacing
+ * by 8 times. And also increase the pacing_th by 4 times. The
+ * reason to increase pacing_th is to give more space for the
+ * queue to oscillate down without getting empty, but also more
+ * room for the queue to increase without causing another alarm.
+ */
+ pacing_save = pacing_save << 3;
+ pacing_data->pacing_th = rdev->pacing.pacing_algo_th * 4;
+ }
+
+ if (pacing_save > BNXT_RE_MAX_DBR_DO_PACING)
+ pacing_save = BNXT_RE_MAX_DBR_DO_PACING;
+
+ pacing_data->do_pacing = pacing_save;
+ rdev->pacing.do_pacing_save = pacing_data->do_pacing;
+ pacing_data->alarm_th =
+ pacing_data->pacing_th * BNXT_RE_PACING_ALARM_TH_MULTIPLE;
+ schedule_delayed_work(&rdev->dbq_pacing_work,
+ msecs_to_jiffies(rdev->pacing.dbq_pacing_time));
+ rdev->stats.pacing.alerts++;
+ mutex_unlock(&rdev->pacing.dbq_lock);
+}
+
+static void bnxt_re_pacing_timer_exp(struct work_struct *work)
+{
+ struct bnxt_re_dev *rdev = container_of(work, struct bnxt_re_dev,
+ dbq_pacing_work.work);
+ struct bnxt_qplib_db_pacing_data *pacing_data;
+ u32 fifo_occup;
+
+ if (!mutex_trylock(&rdev->pacing.dbq_lock))
+ return;
+
+ pacing_data = rdev->qplib_res.pacing_data;
+ fifo_occup = __get_fifo_occupancy(rdev);
+
+ if (fifo_occup > pacing_data->pacing_th)
+ goto restart_timer;
+
+ /*
+ * Instead of immediately going back to the default do_pacing
+ * reduce it by 1/8 times and restart the timer.
+ */
+ pacing_data->do_pacing = pacing_data->do_pacing - (pacing_data->do_pacing >> 3);
+ pacing_data->do_pacing = max_t(u32, rdev->pacing.dbr_def_do_pacing, pacing_data->do_pacing);
+ if (pacing_data->do_pacing <= rdev->pacing.dbr_def_do_pacing) {
+ bnxt_re_set_default_pacing_data(rdev);
+ rdev->stats.pacing.complete++;
+ goto dbq_unlock;
+ }
+
+restart_timer:
+ schedule_delayed_work(&rdev->dbq_pacing_work,
+ msecs_to_jiffies(rdev->pacing.dbq_pacing_time));
+ rdev->stats.pacing.resched++;
+dbq_unlock:
+ rdev->pacing.do_pacing_save = pacing_data->do_pacing;
+ mutex_unlock(&rdev->pacing.dbq_lock);
+}
+
+void bnxt_re_pacing_alert(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_qplib_db_pacing_data *pacing_data;
+
+ if (!rdev->pacing.dbr_pacing)
+ return;
+ mutex_lock(&rdev->pacing.dbq_lock);
+ pacing_data = rdev->qplib_res.pacing_data;
+
+ /*
+ * Increase the alarm_th to max so that other user lib instances do not
+ * keep alerting the driver.
+ */
+ pacing_data->alarm_th = pacing_data->fifo_max_depth;
+ pacing_data->do_pacing = BNXT_RE_MAX_DBR_DO_PACING;
+ cancel_work_sync(&rdev->dbq_fifo_check_work);
+ schedule_work(&rdev->dbq_fifo_check_work);
+ mutex_unlock(&rdev->pacing.dbq_lock);
+}
+
+static int bnxt_re_initialize_dbr_pacing(struct bnxt_re_dev *rdev)
+{
+ /* Allocate a page for app use */
+ rdev->pacing.dbr_page = (void *)__get_free_page(GFP_KERNEL);
+ if (!rdev->pacing.dbr_page)
+ return -ENOMEM;
+
+ memset((u8 *)rdev->pacing.dbr_page, 0, PAGE_SIZE);
+ rdev->qplib_res.pacing_data = (struct bnxt_qplib_db_pacing_data *)rdev->pacing.dbr_page;
+
+ if (bnxt_re_hwrm_dbr_pacing_qcfg(rdev)) {
+ free_page((u64)rdev->pacing.dbr_page);
+ rdev->pacing.dbr_page = NULL;
+ return -EIO;
+ }
+
+ /* MAP HW window 2 for reading db fifo depth */
+ writel(rdev->chip_ctx->dbr_stat_db_fifo & BNXT_GRC_BASE_MASK,
+ rdev->en_dev->bar0 + BNXT_GRCPF_REG_WINDOW_BASE_OUT + 4);
+ rdev->pacing.dbr_db_fifo_reg_off =
+ (rdev->chip_ctx->dbr_stat_db_fifo & BNXT_GRC_OFFSET_MASK) +
+ BNXT_RE_GRC_FIFO_REG_BASE;
+ rdev->pacing.dbr_bar_addr =
+ pci_resource_start(rdev->qplib_res.pdev, 0) + rdev->pacing.dbr_db_fifo_reg_off;
+
+ if (is_dbr_fifo_full(rdev)) {
+ free_page((u64)rdev->pacing.dbr_page);
+ rdev->pacing.dbr_page = NULL;
+ return -EIO;
+ }
+
+ rdev->pacing.pacing_algo_th = BNXT_RE_PACING_ALGO_THRESHOLD;
+ rdev->pacing.dbq_pacing_time = BNXT_RE_DBR_PACING_TIME;
+ rdev->pacing.dbr_def_do_pacing = BNXT_RE_DBR_DO_PACING_NO_CONGESTION;
+ rdev->pacing.do_pacing_save = rdev->pacing.dbr_def_do_pacing;
+ rdev->qplib_res.pacing_data->grc_reg_offset = rdev->pacing.dbr_db_fifo_reg_off;
+ bnxt_re_set_default_pacing_data(rdev);
+ /* Initialize worker for DBR Pacing */
+ INIT_WORK(&rdev->dbq_fifo_check_work, bnxt_re_db_fifo_check);
+ INIT_DELAYED_WORK(&rdev->dbq_pacing_work, bnxt_re_pacing_timer_exp);
return 0;
}
+static void bnxt_re_deinitialize_dbr_pacing(struct bnxt_re_dev *rdev)
+{
+ cancel_work_sync(&rdev->dbq_fifo_check_work);
+ cancel_delayed_work_sync(&rdev->dbq_pacing_work);
+ if (rdev->pacing.dbr_page)
+ free_page((u64)rdev->pacing.dbr_page);
+
+ rdev->pacing.dbr_page = NULL;
+ rdev->pacing.dbr_pacing = false;
+}
+
static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev,
u16 fw_ring_id, int type)
{
@@ -652,6 +916,7 @@ static const struct ib_device_ops bnxt_re_dev_ops = {
.query_qp = bnxt_re_query_qp,
.query_srq = bnxt_re_query_srq,
.reg_user_mr = bnxt_re_reg_user_mr,
+ .reg_user_mr_dmabuf = bnxt_re_reg_user_mr_dmabuf,
.req_notify_cq = bnxt_re_req_notify_cq,
.resize_cq = bnxt_re_resize_cq,
INIT_RDMA_OBJ_SIZE(ib_ah, bnxt_re_ah, ib_ah),
@@ -711,13 +976,14 @@ static struct bnxt_re_dev *bnxt_re_dev_add(struct bnxt_aux_priv *aux_priv,
rdev->id = rdev->en_dev->pdev->devfn;
INIT_LIST_HEAD(&rdev->qp_list);
mutex_init(&rdev->qp_lock);
- atomic_set(&rdev->qp_count, 0);
- atomic_set(&rdev->cq_count, 0);
- atomic_set(&rdev->srq_count, 0);
- atomic_set(&rdev->mr_count, 0);
- atomic_set(&rdev->mw_count, 0);
- atomic_set(&rdev->ah_count, 0);
- atomic_set(&rdev->pd_count, 0);
+ mutex_init(&rdev->pacing.dbq_lock);
+ atomic_set(&rdev->stats.res.qp_count, 0);
+ atomic_set(&rdev->stats.res.cq_count, 0);
+ atomic_set(&rdev->stats.res.srq_count, 0);
+ atomic_set(&rdev->stats.res.mr_count, 0);
+ atomic_set(&rdev->stats.res.mw_count, 0);
+ atomic_set(&rdev->stats.res.ah_count, 0);
+ atomic_set(&rdev->stats.res.pd_count, 0);
rdev->cosq[0] = 0xFFFF;
rdev->cosq[1] = 0xFFFF;
@@ -759,7 +1025,10 @@ static int bnxt_re_handle_unaffi_async_event(struct creq_func_event
static int bnxt_re_handle_qp_async_event(struct creq_qp_event *qp_event,
struct bnxt_re_qp *qp)
{
- struct ib_event event;
+ struct bnxt_re_srq *srq = container_of(qp->qplib_qp.srq, struct bnxt_re_srq,
+ qplib_srq);
+ struct creq_qp_error_notification *err_event;
+ struct ib_event event = {};
unsigned int flags;
if (qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_ERR &&
@@ -769,15 +1038,147 @@ static int bnxt_re_handle_qp_async_event(struct creq_qp_event *qp_event,
bnxt_re_unlock_cqs(qp, flags);
}
- memset(&event, 0, sizeof(event));
- if (qp->qplib_qp.srq) {
- event.device = &qp->rdev->ibdev;
- event.element.qp = &qp->ib_qp;
- event.event = IB_EVENT_QP_LAST_WQE_REACHED;
+ event.device = &qp->rdev->ibdev;
+ event.element.qp = &qp->ib_qp;
+ event.event = IB_EVENT_QP_FATAL;
+
+ err_event = (struct creq_qp_error_notification *)qp_event;
+
+ switch (err_event->req_err_state_reason) {
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_OPCODE_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_TIMEOUT_RETRY_LIMIT:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_RNR_TIMEOUT_RETRY_LIMIT:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_NAK_ARRIVAL_2:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_NAK_ARRIVAL_3:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_INVALID_READ_RESP:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_ILLEGAL_BIND:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_ILLEGAL_FAST_REG:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_ILLEGAL_INVALIDATE:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_RETRAN_LOCAL_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_AV_DOMAIN_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_PROD_WQE_MSMTCH_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_PSN_RANGE_CHECK_ERROR:
+ event.event = IB_EVENT_QP_ACCESS_ERR;
+ break;
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_NAK_ARRIVAL_1:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_NAK_ARRIVAL_4:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_READ_RESP_LENGTH:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_WQE_FORMAT_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_ORRQ_FORMAT_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_INVALID_AVID_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_SERV_TYPE_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_INVALID_OP_ERROR:
+ event.event = IB_EVENT_QP_REQ_ERR;
+ break;
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_RX_MEMORY_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_TX_MEMORY_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_CMP_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_CQ_LOAD_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_TX_PCI_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_RX_PCI_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_RETX_SETUP_ERROR:
+ event.event = IB_EVENT_QP_FATAL;
+ break;
+
+ default:
+ break;
}
- if (event.device && qp->ib_qp.event_handler)
+ switch (err_event->res_err_state_reason) {
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_EXCEED_MAX:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_PAYLOAD_LENGTH_MISMATCH:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_PSN_SEQ_ERROR_RETRY_LIMIT:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_RX_INVALID_R_KEY:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_RX_DOMAIN_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_RX_NO_PERMISSION:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_RX_RANGE_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_TX_INVALID_R_KEY:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_TX_DOMAIN_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_TX_NO_PERMISSION:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_TX_RANGE_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_UNALIGN_ATOMIC:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_PSN_NOT_FOUND:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_INVALID_DUP_RKEY:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_IRRQ_FORMAT_ERROR:
+ event.event = IB_EVENT_QP_ACCESS_ERR;
+ break;
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_EXCEEDS_WQE:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_WQE_FORMAT_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_UNSUPPORTED_OPCODE:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_REM_INVALIDATE:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_OPCODE_ERROR:
+ event.event = IB_EVENT_QP_REQ_ERR;
+ break;
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_IRRQ_OFLOW:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_CMP_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_CQ_LOAD_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_TX_PCI_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_RX_PCI_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_MEMORY_ERROR:
+ event.event = IB_EVENT_QP_FATAL;
+ break;
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_SRQ_LOAD_ERROR:
+ case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_SRQ_ERROR:
+ if (srq)
+ event.event = IB_EVENT_SRQ_ERR;
+ break;
+ default:
+ break;
+ }
+
+ if (err_event->res_err_state_reason || err_event->req_err_state_reason) {
+ ibdev_dbg(&qp->rdev->ibdev,
+ "%s %s qp_id: %d cons (%d %d) req (%d %d) res (%d %d)\n",
+ __func__, rdma_is_kernel_res(&qp->ib_qp.res) ? "kernel" : "user",
+ qp->qplib_qp.id,
+ err_event->sq_cons_idx,
+ err_event->rq_cons_idx,
+ err_event->req_slow_path_state,
+ err_event->req_err_state_reason,
+ err_event->res_slow_path_state,
+ err_event->res_err_state_reason);
+ } else {
+ if (srq)
+ event.event = IB_EVENT_QP_LAST_WQE_REACHED;
+ }
+
+ if (event.event == IB_EVENT_SRQ_ERR && srq->ib_srq.event_handler) {
+ (*srq->ib_srq.event_handler)(&event,
+ srq->ib_srq.srq_context);
+ } else if (event.device && qp->ib_qp.event_handler) {
qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context);
+ }
+
+ return 0;
+}
+
+static int bnxt_re_handle_cq_async_error(void *event, struct bnxt_re_cq *cq)
+{
+ struct creq_cq_error_notification *cqerr;
+ struct ib_event ibevent = {};
+
+ cqerr = event;
+ switch (cqerr->cq_err_reason) {
+ case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_REQ_CQ_INVALID_ERROR:
+ case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_REQ_CQ_OVERFLOW_ERROR:
+ case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_REQ_CQ_LOAD_ERROR:
+ case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_RES_CQ_INVALID_ERROR:
+ case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_RES_CQ_OVERFLOW_ERROR:
+ case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_RES_CQ_LOAD_ERROR:
+ ibevent.event = IB_EVENT_CQ_ERR;
+ break;
+ default:
+ break;
+ }
+
+ if (ibevent.event == IB_EVENT_CQ_ERR && cq->ib_cq.event_handler) {
+ ibevent.element.cq = &cq->ib_cq;
+ ibevent.device = &cq->rdev->ibdev;
+
+ ibdev_dbg(&cq->rdev->ibdev,
+ "%s err reason %d\n", __func__, cqerr->cq_err_reason);
+ cq->ib_cq.event_handler(&ibevent, cq->ib_cq.cq_context);
+ }
return 0;
}
@@ -785,6 +1186,10 @@ static int bnxt_re_handle_qp_async_event(struct creq_qp_event *qp_event,
static int bnxt_re_handle_affi_async_event(struct creq_qp_event *affi_async,
void *obj)
{
+ struct bnxt_qplib_qp *lib_qp;
+ struct bnxt_qplib_cq *lib_cq;
+ struct bnxt_re_qp *qp;
+ struct bnxt_re_cq *cq;
int rc = 0;
u8 event;
@@ -792,11 +1197,19 @@ static int bnxt_re_handle_affi_async_event(struct creq_qp_event *affi_async,
return rc; /* QP was already dead, still return success */
event = affi_async->event;
- if (event == CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION) {
- struct bnxt_qplib_qp *lib_qp = obj;
- struct bnxt_re_qp *qp = container_of(lib_qp, struct bnxt_re_qp,
- qplib_qp);
+ switch (event) {
+ case CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION:
+ lib_qp = obj;
+ qp = container_of(lib_qp, struct bnxt_re_qp, qplib_qp);
rc = bnxt_re_handle_qp_async_event(affi_async, qp);
+ break;
+ case CREQ_QP_EVENT_EVENT_CQ_ERROR_NOTIFICATION:
+ lib_cq = obj;
+ cq = container_of(lib_cq, struct bnxt_re_cq, qplib_cq);
+ rc = bnxt_re_handle_cq_async_error(affi_async, cq);
+ break;
+ default:
+ rc = -EINVAL;
}
return rc;
}
@@ -830,13 +1243,10 @@ static int bnxt_re_srqn_handler(struct bnxt_qplib_nq *nq,
ib_event.device = &srq->rdev->ibdev;
ib_event.element.srq = &srq->ib_srq;
- if (event == NQ_SRQ_EVENT_EVENT_SRQ_THRESHOLD_EVENT)
- ib_event.event = IB_EVENT_SRQ_LIMIT_REACHED;
- else
- ib_event.event = IB_EVENT_SRQ_ERR;
if (srq->ib_srq.event_handler) {
- /* Lock event_handler? */
+ if (event == NQ_SRQ_EVENT_EVENT_SRQ_THRESHOLD_EVENT)
+ ib_event.event = IB_EVENT_SRQ_LIMIT_REACHED;
(*srq->ib_srq.event_handler)(&ib_event,
srq->ib_srq.srq_context);
}
@@ -848,25 +1258,19 @@ static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq,
{
struct bnxt_re_cq *cq = container_of(handle, struct bnxt_re_cq,
qplib_cq);
+ u32 *cq_ptr;
if (cq->ib_cq.comp_handler) {
- /* Lock comp_handler? */
+ if (cq->uctx_cq_page) {
+ cq_ptr = (u32 *)cq->uctx_cq_page;
+ *cq_ptr = cq->qplib_cq.toggle;
+ }
(*cq->ib_cq.comp_handler)(&cq->ib_cq, cq->ib_cq.cq_context);
}
return 0;
}
-#define BNXT_RE_GEN_P5_PF_NQ_DB 0x10000
-#define BNXT_RE_GEN_P5_VF_NQ_DB 0x4000
-static u32 bnxt_re_get_nqdb_offset(struct bnxt_re_dev *rdev, u16 indx)
-{
- return bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) ?
- (rdev->is_virtfn ? BNXT_RE_GEN_P5_VF_NQ_DB :
- BNXT_RE_GEN_P5_PF_NQ_DB) :
- rdev->en_dev->msix_entries[indx].db_offset;
-}
-
static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev)
{
int i;
@@ -887,7 +1291,7 @@ static int bnxt_re_init_res(struct bnxt_re_dev *rdev)
bnxt_qplib_init_res(&rdev->qplib_res);
for (i = 1; i < rdev->num_msix ; i++) {
- db_offt = bnxt_re_get_nqdb_offset(rdev, i);
+ db_offt = rdev->en_dev->msix_entries[i].db_offset;
rc = bnxt_qplib_enable_nq(rdev->en_dev->pdev, &rdev->nq[i - 1],
i - 1, rdev->en_dev->msix_entries[i].vector,
db_offt, &bnxt_re_cqn_handler,
@@ -937,13 +1341,12 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
{
struct bnxt_re_ring_attr rattr = {};
int num_vec_created = 0;
- int rc = 0, i;
+ int rc, i;
u8 type;
/* Configure and allocate resources for qplib */
rdev->qplib_res.rcfw = &rdev->rcfw;
- rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr,
- rdev->is_virtfn);
+ rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr);
if (rc)
goto fail;
@@ -1090,11 +1493,10 @@ static u32 bnxt_re_get_priority_mask(struct bnxt_re_dev *rdev)
{
u32 prio_map = 0, tmp_map = 0;
struct net_device *netdev;
- struct dcb_app app;
+ struct dcb_app app = {};
netdev = rdev->netdev;
- memset(&app, 0, sizeof(app));
app.selector = IEEE_8021QAZ_APP_SEL_ETHERTYPE;
app.protocol = ETH_P_IBOE;
tmp_map = dcb_ieee_getapp_mask(netdev, &app);
@@ -1123,8 +1525,7 @@ static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev)
*/
if ((prio_map == 0 && rdev->qplib_res.prio) ||
(prio_map != 0 && !rdev->qplib_res.prio)) {
- rdev->qplib_res.prio = prio_map ? true : false;
-
+ rdev->qplib_res.prio = prio_map;
bnxt_re_update_gid(rdev);
}
@@ -1138,7 +1539,7 @@ static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev)
struct hwrm_ver_get_input req = {};
struct bnxt_qplib_chip_ctx *cctx;
struct bnxt_fw_msg fw_msg = {};
- int rc = 0;
+ int rc;
bnxt_re_init_hwrm_hdr((void *)&req, HWRM_VER_GET);
req.hwrm_intf_maj = HWRM_VERSION_MAJOR;
@@ -1168,7 +1569,7 @@ static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev)
static int bnxt_re_ib_init(struct bnxt_re_dev *rdev)
{
- int rc = 0;
+ int rc;
u32 event;
/* Register ib dev */
@@ -1188,7 +1589,7 @@ static int bnxt_re_ib_init(struct bnxt_re_dev *rdev)
return rc;
}
-static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev)
+static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev, u8 op_type)
{
u8 type;
int rc;
@@ -1214,12 +1615,17 @@ static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev)
bnxt_re_net_ring_free(rdev, rdev->rcfw.creq.ring_id, type);
bnxt_qplib_free_rcfw_channel(&rdev->rcfw);
}
- if (test_and_clear_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags))
- rdev->num_msix = 0;
+
+ rdev->num_msix = 0;
+
+ if (rdev->pacing.dbr_pacing)
+ bnxt_re_deinitialize_dbr_pacing(rdev);
bnxt_re_destroy_chip_ctx(rdev);
- if (test_and_clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags))
- bnxt_unregister_dev(rdev->en_dev);
+ if (op_type == BNXT_RE_COMPLETE_REMOVE) {
+ if (test_and_clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags))
+ bnxt_unregister_dev(rdev->en_dev);
+ }
}
/* worker thread for polling periodic events. Now used for QoS programming*/
@@ -1232,26 +1638,27 @@ static void bnxt_re_worker(struct work_struct *work)
schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000));
}
-static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 wqe_mode)
+static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type)
{
+ struct bnxt_re_ring_attr rattr = {};
struct bnxt_qplib_creq_ctx *creq;
- struct bnxt_re_ring_attr rattr;
u32 db_offt;
int vid;
u8 type;
int rc;
- /* Registered a new RoCE device instance to netdev */
- memset(&rattr, 0, sizeof(rattr));
- rc = bnxt_re_register_netdev(rdev);
- if (rc) {
- ibdev_err(&rdev->ibdev,
- "Failed to register with netedev: %#x\n", rc);
- return -EINVAL;
+ if (op_type == BNXT_RE_COMPLETE_INIT) {
+ /* Registered a new RoCE device instance to netdev */
+ rc = bnxt_re_register_netdev(rdev);
+ if (rc) {
+ ibdev_err(&rdev->ibdev,
+ "Failed to register with netedev: %#x\n", rc);
+ return -EINVAL;
+ }
}
set_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
- rc = bnxt_re_setup_chip_ctx(rdev, wqe_mode);
+ rc = bnxt_re_setup_chip_ctx(rdev);
if (rc) {
bnxt_unregister_dev(rdev->en_dev);
clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
@@ -1271,7 +1678,6 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 wqe_mode)
ibdev_dbg(&rdev->ibdev, "Got %d MSI-X vectors\n",
rdev->en_dev->ulp_tbl->msix_requested);
rdev->num_msix = rdev->en_dev->ulp_tbl->msix_requested;
- set_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags);
bnxt_re_query_hwrm_intf_version(rdev);
@@ -1300,7 +1706,7 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 wqe_mode)
ibdev_err(&rdev->ibdev, "Failed to allocate CREQ: %#x\n", rc);
goto free_rcfw;
}
- db_offt = bnxt_re_get_nqdb_offset(rdev, BNXT_RE_AEQ_IDX);
+ db_offt = rdev->en_dev->msix_entries[BNXT_RE_AEQ_IDX].db_offset;
vid = rdev->en_dev->msix_entries[BNXT_RE_AEQ_IDX].vector;
rc = bnxt_qplib_enable_rcfw_channel(&rdev->rcfw,
vid, db_offt,
@@ -1311,15 +1717,24 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 wqe_mode)
goto free_ring;
}
- rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr,
- rdev->is_virtfn);
+ if (bnxt_qplib_dbr_pacing_en(rdev->chip_ctx)) {
+ rc = bnxt_re_initialize_dbr_pacing(rdev);
+ if (!rc) {
+ rdev->pacing.dbr_pacing = true;
+ } else {
+ ibdev_err(&rdev->ibdev,
+ "DBR pacing disabled with error : %d\n", rc);
+ rdev->pacing.dbr_pacing = false;
+ }
+ }
+ rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr);
if (rc)
goto disable_rcfw;
bnxt_re_set_resource_limits(rdev);
rc = bnxt_qplib_alloc_ctx(&rdev->qplib_res, &rdev->qplib_ctx, 0,
- bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx));
+ bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx));
if (rc) {
ibdev_err(&rdev->ibdev,
"Failed to allocate QPLIB context: %#x\n", rc);
@@ -1375,6 +1790,9 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 wqe_mode)
*/
bnxt_re_vf_res_config(rdev);
}
+ hash_init(rdev->cq_hash);
+ if (rdev->chip_ctx->modes.toggle_bits & BNXT_QPLIB_SRQ_TOGGLE_BIT)
+ hash_init(rdev->srq_hash);
return 0;
free_sctx:
@@ -1389,21 +1807,38 @@ free_ring:
free_rcfw:
bnxt_qplib_free_rcfw_channel(&rdev->rcfw);
fail:
- bnxt_re_dev_uninit(rdev);
+ bnxt_re_dev_uninit(rdev, BNXT_RE_COMPLETE_REMOVE);
return rc;
}
-static int bnxt_re_add_device(struct auxiliary_device *adev, u8 wqe_mode)
+static void bnxt_re_update_en_info_rdev(struct bnxt_re_dev *rdev,
+ struct bnxt_re_en_dev_info *en_info,
+ struct auxiliary_device *adev)
+{
+ /* Before updating the rdev pointer in bnxt_re_en_dev_info structure,
+ * take the rtnl lock to avoid accessing invalid rdev pointer from
+ * L2 ULP callbacks. This is applicable in all the places where rdev
+ * pointer is updated in bnxt_re_en_dev_info.
+ */
+ rtnl_lock();
+ en_info->rdev = rdev;
+ rdev->adev = adev;
+ rtnl_unlock();
+}
+
+static int bnxt_re_add_device(struct auxiliary_device *adev, u8 op_type)
{
struct bnxt_aux_priv *aux_priv =
container_of(adev, struct bnxt_aux_priv, aux_dev);
+ struct bnxt_re_en_dev_info *en_info;
struct bnxt_en_dev *en_dev;
struct bnxt_re_dev *rdev;
- int rc = 0;
+ int rc;
+
+ en_info = auxiliary_get_drvdata(adev);
+ en_dev = en_info->en_dev;
- /* en_dev should never be NULL as long as adev and aux_dev are valid. */
- en_dev = aux_priv->edev;
rdev = bnxt_re_dev_add(aux_priv, en_dev);
if (!rdev || !rdev_to_dev(rdev)) {
@@ -1411,7 +1846,9 @@ static int bnxt_re_add_device(struct auxiliary_device *adev, u8 wqe_mode)
goto exit;
}
- rc = bnxt_re_dev_init(rdev, wqe_mode);
+ bnxt_re_update_en_info_rdev(rdev, en_info, adev);
+
+ rc = bnxt_re_dev_init(rdev, op_type);
if (rc)
goto re_dev_dealloc;
@@ -1421,12 +1858,22 @@ static int bnxt_re_add_device(struct auxiliary_device *adev, u8 wqe_mode)
aux_priv->aux_dev.name);
goto re_dev_uninit;
}
- auxiliary_set_drvdata(adev, rdev);
+
+ rdev->nb.notifier_call = bnxt_re_netdev_event;
+ rc = register_netdevice_notifier(&rdev->nb);
+ if (rc) {
+ rdev->nb.notifier_call = NULL;
+ pr_err("%s: Cannot register to netdevice_notifier",
+ ROCE_DRV_MODULE_NAME);
+ return rc;
+ }
+ bnxt_re_setup_cc(rdev, true);
return 0;
re_dev_uninit:
- bnxt_re_dev_uninit(rdev);
+ bnxt_re_update_en_info_rdev(NULL, en_info, adev);
+ bnxt_re_dev_uninit(rdev, BNXT_RE_COMPLETE_REMOVE);
re_dev_dealloc:
ib_dealloc_device(&rdev->ibdev);
exit:
@@ -1442,7 +1889,7 @@ static void bnxt_re_setup_cc(struct bnxt_re_dev *rdev, bool enable)
return;
/* Currently enabling only for GenP5 adapters */
- if (!bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx))
+ if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx))
return;
if (enable) {
@@ -1509,14 +1956,9 @@ exit:
#define BNXT_ADEV_NAME "bnxt_en"
-static void bnxt_re_remove(struct auxiliary_device *adev)
+static void bnxt_re_remove_device(struct bnxt_re_dev *rdev, u8 op_type,
+ struct auxiliary_device *aux_dev)
{
- struct bnxt_re_dev *rdev = auxiliary_get_drvdata(adev);
-
- if (!rdev)
- return;
-
- mutex_lock(&bnxt_re_mutex);
if (rdev->nb.notifier_call) {
unregister_netdevice_notifier(&rdev->nb);
rdev->nb.notifier_call = NULL;
@@ -1524,41 +1966,56 @@ static void bnxt_re_remove(struct auxiliary_device *adev)
/* If notifier is null, we should have already done a
* clean up before coming here.
*/
- goto skip_remove;
+ return;
}
bnxt_re_setup_cc(rdev, false);
ib_unregister_device(&rdev->ibdev);
- bnxt_re_dev_uninit(rdev);
+ bnxt_re_dev_uninit(rdev, op_type);
ib_dealloc_device(&rdev->ibdev);
-skip_remove:
+}
+
+static void bnxt_re_remove(struct auxiliary_device *adev)
+{
+ struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(adev);
+ struct bnxt_re_dev *rdev;
+
+ mutex_lock(&bnxt_re_mutex);
+ if (!en_info) {
+ mutex_unlock(&bnxt_re_mutex);
+ return;
+ }
+ rdev = en_info->rdev;
+
+ if (rdev)
+ bnxt_re_remove_device(rdev, BNXT_RE_COMPLETE_REMOVE, adev);
+ kfree(en_info);
mutex_unlock(&bnxt_re_mutex);
}
static int bnxt_re_probe(struct auxiliary_device *adev,
const struct auxiliary_device_id *id)
{
- struct bnxt_re_dev *rdev;
+ struct bnxt_aux_priv *aux_priv =
+ container_of(adev, struct bnxt_aux_priv, aux_dev);
+ struct bnxt_re_en_dev_info *en_info;
+ struct bnxt_en_dev *en_dev;
int rc;
+ en_dev = aux_priv->edev;
+
mutex_lock(&bnxt_re_mutex);
- rc = bnxt_re_add_device(adev, BNXT_QPLIB_WQE_MODE_STATIC);
- if (rc) {
+ en_info = kzalloc(sizeof(*en_info), GFP_KERNEL);
+ if (!en_info) {
mutex_unlock(&bnxt_re_mutex);
- return rc;
+ return -ENOMEM;
}
+ en_info->en_dev = en_dev;
- rdev = auxiliary_get_drvdata(adev);
+ auxiliary_set_drvdata(adev, en_info);
- rdev->nb.notifier_call = bnxt_re_netdev_event;
- rc = register_netdevice_notifier(&rdev->nb);
- if (rc) {
- rdev->nb.notifier_call = NULL;
- pr_err("%s: Cannot register to netdevice_notifier",
- ROCE_DRV_MODULE_NAME);
+ rc = bnxt_re_add_device(adev, BNXT_RE_COMPLETE_INIT);
+ if (rc)
goto err;
- }
-
- bnxt_re_setup_cc(rdev, true);
mutex_unlock(&bnxt_re_mutex);
return 0;
@@ -1571,11 +2028,15 @@ err:
static int bnxt_re_suspend(struct auxiliary_device *adev, pm_message_t state)
{
- struct bnxt_re_dev *rdev = auxiliary_get_drvdata(adev);
+ struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(adev);
+ struct bnxt_en_dev *en_dev;
+ struct bnxt_re_dev *rdev;
- if (!rdev)
+ if (!en_info)
return 0;
+ rdev = en_info->rdev;
+ en_dev = en_info->en_dev;
mutex_lock(&bnxt_re_mutex);
/* L2 driver may invoke this callback during device error/crash or device
* reset. Current RoCE driver doesn't recover the device in case of
@@ -1594,13 +2055,20 @@ static int bnxt_re_suspend(struct auxiliary_device *adev, pm_message_t state)
set_bit(ERR_DEVICE_DETACHED, &rdev->rcfw.cmdq.flags);
bnxt_re_dev_stop(rdev);
- bnxt_re_stop_irq(rdev);
+ bnxt_re_stop_irq(adev);
/* Move the device states to detached and avoid sending any more
* commands to HW
*/
set_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags);
set_bit(ERR_DEVICE_DETACHED, &rdev->rcfw.cmdq.flags);
wake_up_all(&rdev->rcfw.cmdq.waitq);
+
+ if (rdev->pacing.dbr_pacing)
+ bnxt_re_set_pacing_dev_state(rdev);
+
+ ibdev_info(&rdev->ibdev, "%s: L2 driver notified to stop en_state 0x%lx",
+ __func__, en_dev->en_state);
+ bnxt_re_remove_device(rdev, BNXT_RE_PRE_RECOVERY_REMOVE, adev);
mutex_unlock(&bnxt_re_mutex);
return 0;
@@ -1608,9 +2076,10 @@ static int bnxt_re_suspend(struct auxiliary_device *adev, pm_message_t state)
static int bnxt_re_resume(struct auxiliary_device *adev)
{
- struct bnxt_re_dev *rdev = auxiliary_get_drvdata(adev);
+ struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(adev);
+ struct bnxt_re_dev *rdev;
- if (!rdev)
+ if (!en_info)
return 0;
mutex_lock(&bnxt_re_mutex);
@@ -1621,7 +2090,9 @@ static int bnxt_re_resume(struct auxiliary_device *adev)
* L2 driver want to modify the MSIx table.
*/
- ibdev_info(&rdev->ibdev, "Handle device resume call");
+ bnxt_re_add_device(adev, BNXT_RE_POST_RECOVERY_INIT);
+ rdev = en_info->rdev;
+ ibdev_info(&rdev->ibdev, "Device resume completed");
mutex_unlock(&bnxt_re_mutex);
return 0;
@@ -1646,7 +2117,7 @@ static struct auxiliary_driver bnxt_re_driver = {
static int __init bnxt_re_mod_init(void)
{
- int rc = 0;
+ int rc;
pr_info("%s: %s", ROCE_DRV_MODULE_NAME, version);
rc = auxiliary_driver_register(&bnxt_re_driver);
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
index a42555623aed..42e98e5f94cb 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -54,6 +54,10 @@
#include "qplib_rcfw.h"
#include "qplib_sp.h"
#include "qplib_fp.h"
+#include <rdma/ib_addr.h>
+#include "bnxt_ulp.h"
+#include "bnxt_re.h"
+#include "ib_verbs.h"
static void __clean_cq(struct bnxt_qplib_cq *cq, u64 qp);
@@ -237,18 +241,15 @@ static void clean_nq(struct bnxt_qplib_nq *nq, struct bnxt_qplib_cq *cq)
struct bnxt_qplib_hwq *hwq = &nq->hwq;
struct nq_base *nqe, **nq_ptr;
int budget = nq->budget;
- u32 sw_cons, raw_cons;
uintptr_t q_handle;
u16 type;
spin_lock_bh(&hwq->lock);
/* Service the NQ until empty */
- raw_cons = hwq->cons;
while (budget--) {
- sw_cons = HWQ_CMP(raw_cons, hwq);
nq_ptr = (struct nq_base **)hwq->pbl_ptr;
- nqe = &nq_ptr[NQE_PG(sw_cons)][NQE_IDX(sw_cons)];
- if (!NQE_CMP_VALID(nqe, raw_cons, hwq->max_elements))
+ nqe = &nq_ptr[NQE_PG(hwq->cons)][NQE_IDX(hwq->cons)];
+ if (!NQE_CMP_VALID(nqe, nq->nq_db.dbinfo.flags))
break;
/*
@@ -276,7 +277,8 @@ static void clean_nq(struct bnxt_qplib_nq *nq, struct bnxt_qplib_cq *cq)
default:
break;
}
- raw_cons++;
+ bnxt_qplib_hwq_incr_cons(hwq->max_elements, &hwq->cons,
+ 1, &nq->nq_db.dbinfo.flags);
}
spin_unlock_bh(&hwq->lock);
}
@@ -302,18 +304,16 @@ static void bnxt_qplib_service_nq(struct tasklet_struct *t)
struct bnxt_qplib_hwq *hwq = &nq->hwq;
struct bnxt_qplib_cq *cq;
int budget = nq->budget;
- u32 sw_cons, raw_cons;
struct nq_base *nqe;
uintptr_t q_handle;
+ u32 hw_polled = 0;
u16 type;
spin_lock_bh(&hwq->lock);
/* Service the NQ until empty */
- raw_cons = hwq->cons;
while (budget--) {
- sw_cons = HWQ_CMP(raw_cons, hwq);
- nqe = bnxt_qplib_get_qe(hwq, sw_cons, NULL);
- if (!NQE_CMP_VALID(nqe, raw_cons, hwq->max_elements))
+ nqe = bnxt_qplib_get_qe(hwq, hwq->cons, NULL);
+ if (!NQE_CMP_VALID(nqe, nq->nq_db.dbinfo.flags))
break;
/*
@@ -334,6 +334,9 @@ static void bnxt_qplib_service_nq(struct tasklet_struct *t)
cq = (struct bnxt_qplib_cq *)(unsigned long)q_handle;
if (!cq)
break;
+ cq->toggle = (le16_to_cpu(nqe->info10_type) &
+ NQ_CN_TOGGLE_MASK) >> NQ_CN_TOGGLE_SFT;
+ cq->dbinfo.toggle = cq->toggle;
bnxt_qplib_armen_db(&cq->dbinfo,
DBC_DBC_TYPE_CQ_ARMENA);
spin_lock_bh(&cq->compl_lock);
@@ -348,6 +351,7 @@ static void bnxt_qplib_service_nq(struct tasklet_struct *t)
case NQ_BASE_TYPE_SRQ_EVENT:
{
struct bnxt_qplib_srq *srq;
+ struct bnxt_re_srq *srq_p;
struct nq_srq_event *nqsrqe =
(struct nq_srq_event *)nqe;
@@ -355,6 +359,12 @@ static void bnxt_qplib_service_nq(struct tasklet_struct *t)
q_handle |= (u64)le32_to_cpu(nqsrqe->srq_handle_high)
<< 32;
srq = (struct bnxt_qplib_srq *)q_handle;
+ srq->toggle = (le16_to_cpu(nqe->info10_type) & NQ_CN_TOGGLE_MASK)
+ >> NQ_CN_TOGGLE_SFT;
+ srq->dbinfo.toggle = srq->toggle;
+ srq_p = container_of(srq, struct bnxt_re_srq, qplib_srq);
+ if (srq_p->uctx_srq_page)
+ *((u32 *)srq_p->uctx_srq_page) = srq->toggle;
bnxt_qplib_armen_db(&srq->dbinfo,
DBC_DBC_TYPE_SRQ_ARMENA);
if (nq->srqn_handler(nq,
@@ -372,12 +382,12 @@ static void bnxt_qplib_service_nq(struct tasklet_struct *t)
"nqe with type = 0x%x not handled\n", type);
break;
}
- raw_cons++;
+ hw_polled++;
+ bnxt_qplib_hwq_incr_cons(hwq->max_elements, &hwq->cons,
+ 1, &nq->nq_db.dbinfo.flags);
}
- if (hwq->cons != raw_cons) {
- hwq->cons = raw_cons;
+ if (hw_polled)
bnxt_qplib_ring_nq_db(&nq->nq_db.dbinfo, nq->res->cctx, true);
- }
spin_unlock_bh(&hwq->lock);
}
@@ -505,6 +515,7 @@ static int bnxt_qplib_map_nq_db(struct bnxt_qplib_nq *nq, u32 reg_offt)
pdev = nq->pdev;
nq_db = &nq->nq_db;
+ nq_db->dbinfo.flags = 0;
nq_db->reg.bar_id = NQ_CONS_PCI_BAR_REGION;
nq_db->reg.bar_base = pci_resource_start(pdev, nq_db->reg.bar_id);
if (!nq_db->reg.bar_base) {
@@ -535,7 +546,7 @@ int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq,
cqn_handler_t cqn_handler,
srqn_handler_t srqn_handler)
{
- int rc = -1;
+ int rc;
nq->pdev = pdev;
nq->cqn_handler = cqn_handler;
@@ -649,7 +660,7 @@ int bnxt_qplib_create_srq(struct bnxt_qplib_res *res,
rc = -ENOMEM;
goto fail;
}
-
+ srq->dbinfo.flags = 0;
bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
CMDQ_BASE_OPCODE_CREATE_SRQ,
sizeof(req));
@@ -703,13 +714,9 @@ int bnxt_qplib_modify_srq(struct bnxt_qplib_res *res,
struct bnxt_qplib_srq *srq)
{
struct bnxt_qplib_hwq *srq_hwq = &srq->hwq;
- u32 sw_prod, sw_cons, count = 0;
+ u32 count;
- sw_prod = HWQ_CMP(srq_hwq->prod, srq_hwq);
- sw_cons = HWQ_CMP(srq_hwq->cons, srq_hwq);
-
- count = sw_prod > sw_cons ? sw_prod - sw_cons :
- srq_hwq->max_elements - sw_cons + sw_prod;
+ count = __bnxt_qplib_get_avail(srq_hwq);
if (count > srq->threshold) {
srq->arm_req = false;
bnxt_qplib_srq_arm_db(&srq->dbinfo, srq->threshold);
@@ -727,27 +734,31 @@ int bnxt_qplib_query_srq(struct bnxt_qplib_res *res,
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
struct creq_query_srq_resp resp = {};
struct bnxt_qplib_cmdqmsg msg = {};
- struct bnxt_qplib_rcfw_sbuf *sbuf;
+ struct bnxt_qplib_rcfw_sbuf sbuf;
struct creq_query_srq_resp_sb *sb;
struct cmdq_query_srq req = {};
- int rc = 0;
+ int rc;
bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
CMDQ_BASE_OPCODE_QUERY_SRQ,
sizeof(req));
/* Configure the request */
- sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb));
- if (!sbuf)
+ sbuf.size = ALIGN(sizeof(*sb), BNXT_QPLIB_CMDQE_UNITS);
+ sbuf.sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf.size,
+ &sbuf.dma_addr, GFP_KERNEL);
+ if (!sbuf.sb)
return -ENOMEM;
- req.resp_size = sizeof(*sb) / BNXT_QPLIB_CMDQE_UNITS;
+ req.resp_size = sbuf.size / BNXT_QPLIB_CMDQE_UNITS;
req.srq_cid = cpu_to_le32(srq->id);
- sb = sbuf->sb;
- bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, sbuf, sizeof(req),
+ sb = sbuf.sb;
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, &sbuf, sizeof(req),
sizeof(resp), 0);
rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
- srq->threshold = le16_to_cpu(sb->srq_limit);
- bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf);
+ if (!rc)
+ srq->threshold = le16_to_cpu(sb->srq_limit);
+ dma_free_coherent(&rcfw->pdev->dev, sbuf.size,
+ sbuf.sb, sbuf.dma_addr);
return rc;
}
@@ -758,7 +769,7 @@ int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq,
struct bnxt_qplib_hwq *srq_hwq = &srq->hwq;
struct rq_wqe *srqe;
struct sq_sge *hw_sge;
- u32 sw_prod, sw_cons, count = 0;
+ u32 count = 0;
int i, next;
spin_lock(&srq_hwq->lock);
@@ -772,8 +783,7 @@ int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq,
srq->start_idx = srq->swq[next].next_idx;
spin_unlock(&srq_hwq->lock);
- sw_prod = HWQ_CMP(srq_hwq->prod, srq_hwq);
- srqe = bnxt_qplib_get_qe(srq_hwq, sw_prod, NULL);
+ srqe = bnxt_qplib_get_qe(srq_hwq, srq_hwq->prod, NULL);
memset(srqe, 0, srq->wqe_size);
/* Calculate wqe_size16 and data_len */
for (i = 0, hw_sge = (struct sq_sge *)srqe->data;
@@ -789,17 +799,10 @@ int bnxt_qplib_post_srq_recv(struct bnxt_qplib_srq *srq,
srqe->wr_id[0] = cpu_to_le32((u32)next);
srq->swq[next].wr_id = wqe->wr_id;
- srq_hwq->prod++;
+ bnxt_qplib_hwq_incr_prod(&srq->dbinfo, srq_hwq, srq->dbinfo.max_slot);
spin_lock(&srq_hwq->lock);
- sw_prod = HWQ_CMP(srq_hwq->prod, srq_hwq);
- /* retaining srq_hwq->cons for this logic
- * actually the lock is only required to
- * read srq_hwq->cons.
- */
- sw_cons = HWQ_CMP(srq_hwq->cons, srq_hwq);
- count = sw_prod > sw_cons ? sw_prod - sw_cons :
- srq_hwq->max_elements - sw_cons + sw_prod;
+ count = __bnxt_qplib_get_avail(srq_hwq);
spin_unlock(&srq_hwq->lock);
/* Ring DB */
bnxt_qplib_ring_prod_db(&srq->dbinfo, DBC_DBC_TYPE_SRQ);
@@ -817,13 +820,13 @@ static int bnxt_qplib_alloc_init_swq(struct bnxt_qplib_q *que)
{
int indx;
- que->swq = kcalloc(que->max_wqe, sizeof(*que->swq), GFP_KERNEL);
+ que->swq = kcalloc(que->max_sw_wqe, sizeof(*que->swq), GFP_KERNEL);
if (!que->swq)
return -ENOMEM;
que->swq_start = 0;
- que->swq_last = que->max_wqe - 1;
- for (indx = 0; indx < que->max_wqe; indx++)
+ que->swq_last = que->max_sw_wqe - 1;
+ for (indx = 0; indx < que->max_sw_wqe; indx++)
que->swq[indx].next_idx = indx + 1;
que->swq[que->swq_last].next_idx = 0; /* Make it circular */
que->swq_last = 0;
@@ -846,6 +849,7 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
u32 tbl_indx;
int rc;
+ sq->dbinfo.flags = 0;
bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
CMDQ_BASE_OPCODE_CREATE_QP1,
sizeof(req));
@@ -858,7 +862,7 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
hwq_attr.res = res;
hwq_attr.sginfo = &sq->sg_info;
hwq_attr.stride = sizeof(struct sq_sge);
- hwq_attr.depth = bnxt_qplib_get_depth(sq);
+ hwq_attr.depth = bnxt_qplib_get_depth(sq, qp->wqe_mode, false);
hwq_attr.type = HWQ_TYPE_QUEUE;
rc = bnxt_qplib_alloc_init_hwq(&sq->hwq, &hwq_attr);
if (rc)
@@ -882,10 +886,11 @@ int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
/* RQ */
if (rq->max_wqe) {
+ rq->dbinfo.flags = 0;
hwq_attr.res = res;
hwq_attr.sginfo = &rq->sg_info;
hwq_attr.stride = sizeof(struct sq_sge);
- hwq_attr.depth = bnxt_qplib_get_depth(rq);
+ hwq_attr.depth = bnxt_qplib_get_depth(rq, qp->wqe_mode, false);
hwq_attr.type = HWQ_TYPE_QUEUE;
rc = bnxt_qplib_alloc_init_hwq(&rq->hwq, &hwq_attr);
if (rc)
@@ -989,6 +994,10 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
u32 tbl_indx;
u16 nsge;
+ if (res->dattr)
+ qp->is_host_msn_tbl = _is_host_msn_table(res->dattr->dev_cap_flags2);
+
+ sq->dbinfo.flags = 0;
bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
CMDQ_BASE_OPCODE_CREATE_QP,
sizeof(req));
@@ -1000,17 +1009,30 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
/* SQ */
if (qp->type == CMDQ_CREATE_QP_TYPE_RC) {
- psn_sz = bnxt_qplib_is_chip_gen_p5(res->cctx) ?
+ psn_sz = bnxt_qplib_is_chip_gen_p5_p7(res->cctx) ?
sizeof(struct sq_psn_search_ext) :
sizeof(struct sq_psn_search);
+
+ if (qp->is_host_msn_tbl) {
+ psn_sz = sizeof(struct sq_msn_search);
+ qp->msn = 0;
+ }
}
hwq_attr.res = res;
hwq_attr.sginfo = &sq->sg_info;
hwq_attr.stride = sizeof(struct sq_sge);
- hwq_attr.depth = bnxt_qplib_get_depth(sq);
+ hwq_attr.depth = bnxt_qplib_get_depth(sq, qp->wqe_mode, true);
hwq_attr.aux_stride = psn_sz;
- hwq_attr.aux_depth = bnxt_qplib_set_sq_size(sq, qp->wqe_mode);
+ hwq_attr.aux_depth = psn_sz ? bnxt_qplib_set_sq_size(sq, qp->wqe_mode)
+ : 0;
+ /* Update msn tbl size */
+ if (qp->is_host_msn_tbl && psn_sz) {
+ hwq_attr.aux_depth = roundup_pow_of_two(bnxt_qplib_set_sq_size(sq, qp->wqe_mode));
+ qp->msn_tbl_sz = hwq_attr.aux_depth;
+ qp->msn = 0;
+ }
+
hwq_attr.type = HWQ_TYPE_QUEUE;
rc = bnxt_qplib_alloc_init_hwq(&sq->hwq, &hwq_attr);
if (rc)
@@ -1037,10 +1059,11 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
/* RQ */
if (!qp->srq) {
+ rq->dbinfo.flags = 0;
hwq_attr.res = res;
hwq_attr.sginfo = &rq->sg_info;
hwq_attr.stride = sizeof(struct sq_sge);
- hwq_attr.depth = bnxt_qplib_get_depth(rq);
+ hwq_attr.depth = bnxt_qplib_get_depth(rq, qp->wqe_mode, false);
hwq_attr.aux_stride = 0;
hwq_attr.aux_depth = 0;
hwq_attr.type = HWQ_TYPE_QUEUE;
@@ -1365,24 +1388,26 @@ int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
struct creq_query_qp_resp resp = {};
struct bnxt_qplib_cmdqmsg msg = {};
- struct bnxt_qplib_rcfw_sbuf *sbuf;
+ struct bnxt_qplib_rcfw_sbuf sbuf;
struct creq_query_qp_resp_sb *sb;
struct cmdq_query_qp req = {};
u32 temp32[4];
- int i, rc = 0;
+ int i, rc;
+
+ sbuf.size = ALIGN(sizeof(*sb), BNXT_QPLIB_CMDQE_UNITS);
+ sbuf.sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf.size,
+ &sbuf.dma_addr, GFP_KERNEL);
+ if (!sbuf.sb)
+ return -ENOMEM;
+ sb = sbuf.sb;
bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
CMDQ_BASE_OPCODE_QUERY_QP,
sizeof(req));
- sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb));
- if (!sbuf)
- return -ENOMEM;
- sb = sbuf->sb;
-
req.qp_cid = cpu_to_le32(qp->id);
- req.resp_size = sizeof(*sb) / BNXT_QPLIB_CMDQE_UNITS;
- bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, sbuf, sizeof(req),
+ req.resp_size = sbuf.size / BNXT_QPLIB_CMDQE_UNITS;
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, &sbuf, sizeof(req),
sizeof(resp), 0);
rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
if (rc)
@@ -1391,8 +1416,7 @@ int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
qp->state = sb->en_sqd_async_notify_state &
CREQ_QUERY_QP_RESP_SB_STATE_MASK;
qp->en_sqd_async_notify = sb->en_sqd_async_notify_state &
- CREQ_QUERY_QP_RESP_SB_EN_SQD_ASYNC_NOTIFY ?
- true : false;
+ CREQ_QUERY_QP_RESP_SB_EN_SQD_ASYNC_NOTIFY;
qp->access = sb->access;
qp->pkey_index = le16_to_cpu(sb->pkey);
qp->qkey = le32_to_cpu(sb->qkey);
@@ -1442,19 +1466,23 @@ int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
memcpy(qp->smac, sb->src_mac, 6);
qp->vlan_id = le16_to_cpu(sb->vlan_pcp_vlan_dei_vlan_id);
bail:
- bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf);
+ dma_free_coherent(&rcfw->pdev->dev, sbuf.size,
+ sbuf.sb, sbuf.dma_addr);
return rc;
}
static void __clean_cq(struct bnxt_qplib_cq *cq, u64 qp)
{
struct bnxt_qplib_hwq *cq_hwq = &cq->hwq;
+ u32 peek_flags, peek_cons;
struct cq_base *hw_cqe;
int i;
+ peek_flags = cq->dbinfo.flags;
+ peek_cons = cq_hwq->cons;
for (i = 0; i < cq_hwq->max_elements; i++) {
- hw_cqe = bnxt_qplib_get_qe(cq_hwq, i, NULL);
- if (!CQE_CMP_VALID(hw_cqe, i, cq_hwq->max_elements))
+ hw_cqe = bnxt_qplib_get_qe(cq_hwq, peek_cons, NULL);
+ if (!CQE_CMP_VALID(hw_cqe, peek_flags))
continue;
/*
* The valid test of the entry must be done first before
@@ -1484,6 +1512,8 @@ static void __clean_cq(struct bnxt_qplib_cq *cq, u64 qp)
default:
break;
}
+ bnxt_qplib_hwq_incr_cons(cq_hwq->max_elements, &peek_cons,
+ 1, &peek_flags);
}
}
@@ -1585,6 +1615,27 @@ void *bnxt_qplib_get_qp1_rq_buf(struct bnxt_qplib_qp *qp,
return NULL;
}
+/* Fil the MSN table into the next psn row */
+static void bnxt_qplib_fill_msn_search(struct bnxt_qplib_qp *qp,
+ struct bnxt_qplib_swqe *wqe,
+ struct bnxt_qplib_swq *swq)
+{
+ struct sq_msn_search *msns;
+ u32 start_psn, next_psn;
+ u16 start_idx;
+
+ msns = (struct sq_msn_search *)swq->psn_search;
+ msns->start_idx_next_psn_start_psn = 0;
+
+ start_psn = swq->start_psn;
+ next_psn = swq->next_psn;
+ start_idx = swq->slot_idx;
+ msns->start_idx_next_psn_start_psn |=
+ bnxt_re_update_msn_tbl(start_idx, next_psn, start_psn);
+ qp->msn++;
+ qp->msn %= qp->msn_tbl_sz;
+}
+
static void bnxt_qplib_fill_psn_search(struct bnxt_qplib_qp *qp,
struct bnxt_qplib_swqe *wqe,
struct bnxt_qplib_swq *swq)
@@ -1596,6 +1647,12 @@ static void bnxt_qplib_fill_psn_search(struct bnxt_qplib_qp *qp,
if (!swq->psn_search)
return;
+ /* Handle MSN differently on cap flags */
+ if (qp->is_host_msn_tbl) {
+ bnxt_qplib_fill_msn_search(qp, wqe, swq);
+ return;
+ }
+ psns = (struct sq_psn_search *)swq->psn_search;
psns = swq->psn_search;
psns_ext = swq->psn_ext;
@@ -1606,7 +1663,7 @@ static void bnxt_qplib_fill_psn_search(struct bnxt_qplib_qp *qp,
flg_npsn = ((swq->next_psn << SQ_PSN_SEARCH_NEXT_PSN_SFT) &
SQ_PSN_SEARCH_NEXT_PSN_MASK);
- if (bnxt_qplib_is_chip_gen_p5(qp->cctx)) {
+ if (bnxt_qplib_is_chip_gen_p5_p7(qp->cctx)) {
psns_ext->opcode_start_psn = cpu_to_le32(op_spsn);
psns_ext->flags_next_psn = cpu_to_le32(flg_npsn);
psns_ext->start_slot_idx = cpu_to_le16(swq->slot_idx);
@@ -1704,8 +1761,8 @@ static u16 bnxt_qplib_required_slots(struct bnxt_qplib_qp *qp,
return slot;
}
-static void bnxt_qplib_pull_psn_buff(struct bnxt_qplib_q *sq,
- struct bnxt_qplib_swq *swq)
+static void bnxt_qplib_pull_psn_buff(struct bnxt_qplib_qp *qp, struct bnxt_qplib_q *sq,
+ struct bnxt_qplib_swq *swq, bool hw_retx)
{
struct bnxt_qplib_hwq *hwq;
u32 pg_num, pg_indx;
@@ -1716,6 +1773,11 @@ static void bnxt_qplib_pull_psn_buff(struct bnxt_qplib_q *sq,
if (!hwq->pad_pg)
return;
tail = swq->slot_idx / sq->dbinfo.max_slot;
+ if (hw_retx) {
+ /* For HW retx use qp msn index */
+ tail = qp->msn;
+ tail %= qp->msn_tbl_sz;
+ }
pg_num = (tail + hwq->pad_pgofft) / (PAGE_SIZE / hwq->pad_stride);
pg_indx = (tail + hwq->pad_pgofft) % (PAGE_SIZE / hwq->pad_stride);
buff = (void *)(hwq->pad_pg[pg_num] + pg_indx * hwq->pad_stride);
@@ -1740,6 +1802,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
struct bnxt_qplib_swq *swq;
bool sch_handler = false;
u16 wqe_sz, qdf = 0;
+ bool msn_update;
void *base_hdr;
void *ext_hdr;
__le32 temp32;
@@ -1767,7 +1830,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
}
swq = bnxt_qplib_get_swqe(sq, &wqe_idx);
- bnxt_qplib_pull_psn_buff(sq, swq);
+ bnxt_qplib_pull_psn_buff(qp, sq, swq, qp->is_host_msn_tbl);
idx = 0;
swq->slot_idx = hwq->prod;
@@ -1799,6 +1862,8 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
&idx);
if (data_len < 0)
goto queue_err;
+ /* Make sure we update MSN table only for wired wqes */
+ msn_update = true;
/* Specifics */
switch (wqe->type) {
case BNXT_QPLIB_SWQE_TYPE_SEND:
@@ -1839,6 +1904,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
SQ_SEND_DST_QP_MASK);
ext_sqe->avid = cpu_to_le32(wqe->send.avid &
SQ_SEND_AVID_MASK);
+ msn_update = false;
} else {
sqe->length = cpu_to_le32(data_len);
if (qp->mtu)
@@ -1896,7 +1962,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
sqe->wqe_type = wqe->type;
sqe->flags = wqe->flags;
sqe->inv_l_key = cpu_to_le32(wqe->local_inv.inv_l_key);
-
+ msn_update = false;
break;
}
case BNXT_QPLIB_SWQE_TYPE_FAST_REG_MR:
@@ -1928,6 +1994,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
PTU_PTE_VALID);
ext_sqe->pblptr = cpu_to_le64(wqe->frmr.pbl_dma_ptr);
ext_sqe->va = cpu_to_le64(wqe->frmr.va);
+ msn_update = false;
break;
}
@@ -1945,6 +2012,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
sqe->l_key = cpu_to_le32(wqe->bind.r_key);
ext_sqe->va = cpu_to_le64(wqe->bind.va);
ext_sqe->length_lo = cpu_to_le32(wqe->bind.length);
+ msn_update = false;
break;
}
default:
@@ -1952,11 +2020,13 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
rc = -EINVAL;
goto done;
}
- swq->next_psn = sq->psn & BTH_PSN_MASK;
- bnxt_qplib_fill_psn_search(qp, wqe, swq);
+ if (!qp->is_host_msn_tbl || msn_update) {
+ swq->next_psn = sq->psn & BTH_PSN_MASK;
+ bnxt_qplib_fill_psn_search(qp, wqe, swq);
+ }
queue_err:
bnxt_qplib_swq_mod_start(sq, wqe_idx);
- bnxt_qplib_hwq_incr_prod(hwq, swq->slots);
+ bnxt_qplib_hwq_incr_prod(&sq->dbinfo, hwq, swq->slots);
qp->wqe_cnt++;
done:
if (sch_handler) {
@@ -2044,7 +2114,7 @@ int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp,
base_hdr->wr_id[0] = cpu_to_le32(wqe_idx);
queue_err:
bnxt_qplib_swq_mod_start(rq, wqe_idx);
- bnxt_qplib_hwq_incr_prod(hwq, swq->slots);
+ bnxt_qplib_hwq_incr_prod(&rq->dbinfo, hwq, swq->slots);
done:
if (sch_handler) {
nq_work = kzalloc(sizeof(*nq_work), GFP_ATOMIC);
@@ -2081,6 +2151,7 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
return -EINVAL;
}
+ cq->dbinfo.flags = 0;
hwq_attr.res = res;
hwq_attr.depth = cq->max_wqe;
hwq_attr.stride = sizeof(struct cq_base);
@@ -2096,7 +2167,7 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
req.dpi = cpu_to_le32(cq->dpi->dpi);
req.cq_handle = cpu_to_le64(cq->cq_handle);
- req.cq_size = cpu_to_le32(cq->hwq.max_elements);
+ req.cq_size = cpu_to_le32(cq->max_wqe);
pbl = &cq->hwq.pbl[PBL_LVL_0];
pg_sz_lvl = (bnxt_qplib_base_pg_size(&cq->hwq) <<
CMDQ_CREATE_CQ_PG_SIZE_SFT);
@@ -2124,6 +2195,8 @@ int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
cq->dbinfo.xid = cq->id;
cq->dbinfo.db = cq->dpi->dbr;
cq->dbinfo.priv_db = res->dpi_tbl.priv_db;
+ cq->dbinfo.flags = 0;
+ cq->dbinfo.toggle = 0;
bnxt_qplib_armen_db(&cq->dbinfo, DBC_DBC_TYPE_CQ_ARMENA);
@@ -2139,6 +2212,8 @@ void bnxt_qplib_resize_cq_complete(struct bnxt_qplib_res *res,
{
bnxt_qplib_free_hwq(res, &cq->hwq);
memcpy(&cq->hwq, &cq->resize_hwq, sizeof(cq->hwq));
+ /* Reset only the cons bit in the flags */
+ cq->dbinfo.flags &= ~(1UL << BNXT_QPLIB_FLAG_EPOCH_CONS_SHIFT);
}
int bnxt_qplib_resize_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq,
@@ -2235,7 +2310,8 @@ static int __flush_sq(struct bnxt_qplib_q *sq, struct bnxt_qplib_qp *qp,
cqe++;
(*budget)--;
skip_compl:
- bnxt_qplib_hwq_incr_cons(&sq->hwq, sq->swq[last].slots);
+ bnxt_qplib_hwq_incr_cons(sq->hwq.max_elements, &sq->hwq.cons,
+ sq->swq[last].slots, &sq->dbinfo.flags);
sq->swq_last = sq->swq[last].next_idx;
}
*pcqe = cqe;
@@ -2282,7 +2358,8 @@ static int __flush_rq(struct bnxt_qplib_q *rq, struct bnxt_qplib_qp *qp,
cqe->wr_id = rq->swq[last].wr_id;
cqe++;
(*budget)--;
- bnxt_qplib_hwq_incr_cons(&rq->hwq, rq->swq[last].slots);
+ bnxt_qplib_hwq_incr_cons(rq->hwq.max_elements, &rq->hwq.cons,
+ rq->swq[last].slots, &rq->dbinfo.flags);
rq->swq_last = rq->swq[last].next_idx;
}
*pcqe = cqe;
@@ -2311,7 +2388,7 @@ void bnxt_qplib_mark_qp_error(void *qp_handle)
static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq,
u32 cq_cons, u32 swq_last, u32 cqe_sq_cons)
{
- u32 peek_sw_cq_cons, peek_raw_cq_cons, peek_sq_cons_idx;
+ u32 peek_sw_cq_cons, peek_sq_cons_idx, peek_flags;
struct bnxt_qplib_q *sq = &qp->sq;
struct cq_req *peek_req_hwcqe;
struct bnxt_qplib_qp *peek_qp;
@@ -2342,16 +2419,14 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq,
}
if (sq->condition) {
/* Peek at the completions */
- peek_raw_cq_cons = cq->hwq.cons;
+ peek_flags = cq->dbinfo.flags;
peek_sw_cq_cons = cq_cons;
i = cq->hwq.max_elements;
while (i--) {
- peek_sw_cq_cons = HWQ_CMP((peek_sw_cq_cons), &cq->hwq);
peek_hwcqe = bnxt_qplib_get_qe(&cq->hwq,
peek_sw_cq_cons, NULL);
/* If the next hwcqe is VALID */
- if (CQE_CMP_VALID(peek_hwcqe, peek_raw_cq_cons,
- cq->hwq.max_elements)) {
+ if (CQE_CMP_VALID(peek_hwcqe, peek_flags)) {
/*
* The valid test of the entry must be done first before
* reading any further.
@@ -2394,8 +2469,9 @@ static int do_wa9060(struct bnxt_qplib_qp *qp, struct bnxt_qplib_cq *cq,
rc = -EINVAL;
goto out;
}
- peek_sw_cq_cons++;
- peek_raw_cq_cons++;
+ bnxt_qplib_hwq_incr_cons(cq->hwq.max_elements,
+ &peek_sw_cq_cons,
+ 1, &peek_flags);
}
dev_err(&cq->hwq.pdev->dev,
"Should not have come here! cq_cons=0x%x qp=0x%x sq cons sw=0x%x hw=0x%x\n",
@@ -2406,6 +2482,32 @@ out:
return rc;
}
+static int bnxt_qplib_get_cqe_sq_cons(struct bnxt_qplib_q *sq, u32 cqe_slot)
+{
+ struct bnxt_qplib_hwq *sq_hwq;
+ struct bnxt_qplib_swq *swq;
+ int cqe_sq_cons = -1;
+ u32 start, last;
+
+ sq_hwq = &sq->hwq;
+
+ start = sq->swq_start;
+ last = sq->swq_last;
+
+ while (last != start) {
+ swq = &sq->swq[last];
+ if (swq->slot_idx == cqe_slot) {
+ cqe_sq_cons = swq->next_idx;
+ dev_err(&sq_hwq->pdev->dev, "%s: Found cons wqe = %d slot = %d\n",
+ __func__, cqe_sq_cons, cqe_slot);
+ break;
+ }
+
+ last = swq->next_idx;
+ }
+ return cqe_sq_cons;
+}
+
static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
struct cq_req *hwcqe,
struct bnxt_qplib_cqe **pcqe, int *budget,
@@ -2413,9 +2515,10 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
{
struct bnxt_qplib_swq *swq;
struct bnxt_qplib_cqe *cqe;
+ u32 cqe_sq_cons, slot_num;
struct bnxt_qplib_qp *qp;
struct bnxt_qplib_q *sq;
- u32 cqe_sq_cons;
+ int cqe_cons;
int rc = 0;
qp = (struct bnxt_qplib_qp *)((unsigned long)
@@ -2427,12 +2530,26 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
}
sq = &qp->sq;
- cqe_sq_cons = le16_to_cpu(hwcqe->sq_cons_idx) % sq->max_wqe;
+ cqe_sq_cons = le16_to_cpu(hwcqe->sq_cons_idx) % sq->max_sw_wqe;
if (qp->sq.flushed) {
dev_dbg(&cq->hwq.pdev->dev,
"%s: QP in Flush QP = %p\n", __func__, qp);
goto done;
}
+
+ if (__is_err_cqe_for_var_wqe(qp, hwcqe->status)) {
+ slot_num = le16_to_cpu(hwcqe->sq_cons_idx);
+ cqe_cons = bnxt_qplib_get_cqe_sq_cons(sq, slot_num);
+ if (cqe_cons < 0) {
+ dev_err(&cq->hwq.pdev->dev, "%s: Wrong SQ cons cqe_slot_indx = %d\n",
+ __func__, slot_num);
+ goto done;
+ }
+ cqe_sq_cons = cqe_cons;
+ dev_err(&cq->hwq.pdev->dev, "%s: cqe_sq_cons = %d swq_last = %d swq_start = %d\n",
+ __func__, cqe_sq_cons, sq->swq_last, sq->swq_start);
+ }
+
/* Require to walk the sq's swq to fabricate CQEs for all previously
* signaled SWQEs due to CQE aggregation from the current sq cons
* to the cqe_sq_cons
@@ -2482,7 +2599,8 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
}
}
skip:
- bnxt_qplib_hwq_incr_cons(&sq->hwq, swq->slots);
+ bnxt_qplib_hwq_incr_cons(sq->hwq.max_elements, &sq->hwq.cons,
+ swq->slots, &sq->dbinfo.flags);
sq->swq_last = swq->next_idx;
if (sq->single)
break;
@@ -2509,7 +2627,8 @@ static void bnxt_qplib_release_srqe(struct bnxt_qplib_srq *srq, u32 tag)
srq->swq[srq->last_idx].next_idx = (int)tag;
srq->last_idx = (int)tag;
srq->swq[srq->last_idx].next_idx = -1;
- srq->hwq.cons++; /* Support for SRQE counter */
+ bnxt_qplib_hwq_incr_cons(srq->hwq.max_elements, &srq->hwq.cons,
+ srq->dbinfo.max_slot, &srq->dbinfo.flags);
spin_unlock(&srq->hwq.lock);
}
@@ -2578,7 +2697,8 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
cqe->wr_id = swq->wr_id;
cqe++;
(*budget)--;
- bnxt_qplib_hwq_incr_cons(&rq->hwq, swq->slots);
+ bnxt_qplib_hwq_incr_cons(rq->hwq.max_elements, &rq->hwq.cons,
+ swq->slots, &rq->dbinfo.flags);
rq->swq_last = swq->next_idx;
*pcqe = cqe;
@@ -2664,7 +2784,8 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
cqe->wr_id = swq->wr_id;
cqe++;
(*budget)--;
- bnxt_qplib_hwq_incr_cons(&rq->hwq, swq->slots);
+ bnxt_qplib_hwq_incr_cons(rq->hwq.max_elements, &rq->hwq.cons,
+ swq->slots, &rq->dbinfo.flags);
rq->swq_last = swq->next_idx;
*pcqe = cqe;
@@ -2681,14 +2802,11 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
bool bnxt_qplib_is_cq_empty(struct bnxt_qplib_cq *cq)
{
struct cq_base *hw_cqe;
- u32 sw_cons, raw_cons;
bool rc = true;
- raw_cons = cq->hwq.cons;
- sw_cons = HWQ_CMP(raw_cons, &cq->hwq);
- hw_cqe = bnxt_qplib_get_qe(&cq->hwq, sw_cons, NULL);
+ hw_cqe = bnxt_qplib_get_qe(&cq->hwq, cq->hwq.cons, NULL);
/* Check for Valid bit. If the CQE is valid, return false */
- rc = !CQE_CMP_VALID(hw_cqe, raw_cons, cq->hwq.max_elements);
+ rc = !CQE_CMP_VALID(hw_cqe, cq->dbinfo.flags);
return rc;
}
@@ -2770,7 +2888,8 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
cqe->wr_id = swq->wr_id;
cqe++;
(*budget)--;
- bnxt_qplib_hwq_incr_cons(&rq->hwq, swq->slots);
+ bnxt_qplib_hwq_incr_cons(rq->hwq.max_elements, &rq->hwq.cons,
+ swq->slots, &rq->dbinfo.flags);
rq->swq_last = swq->next_idx;
*pcqe = cqe;
@@ -2815,7 +2934,7 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq,
cqe_cons = le16_to_cpu(hwcqe->sq_cons_idx);
if (cqe_cons == 0xFFFF)
goto do_rq;
- cqe_cons %= sq->max_wqe;
+ cqe_cons %= sq->max_sw_wqe;
if (qp->sq.flushed) {
dev_dbg(&cq->hwq.pdev->dev,
@@ -2843,7 +2962,8 @@ static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq,
cqe++;
(*budget)--;
}
- bnxt_qplib_hwq_incr_cons(&sq->hwq, sq->swq[swq_last].slots);
+ bnxt_qplib_hwq_incr_cons(sq->hwq.max_elements, &sq->hwq.cons,
+ sq->swq[swq_last].slots, &sq->dbinfo.flags);
sq->swq_last = sq->swq[swq_last].next_idx;
}
*pcqe = cqe;
@@ -2928,19 +3048,17 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
int num_cqes, struct bnxt_qplib_qp **lib_qp)
{
struct cq_base *hw_cqe;
- u32 sw_cons, raw_cons;
int budget, rc = 0;
+ u32 hw_polled = 0;
u8 type;
- raw_cons = cq->hwq.cons;
budget = num_cqes;
while (budget) {
- sw_cons = HWQ_CMP(raw_cons, &cq->hwq);
- hw_cqe = bnxt_qplib_get_qe(&cq->hwq, sw_cons, NULL);
+ hw_cqe = bnxt_qplib_get_qe(&cq->hwq, cq->hwq.cons, NULL);
/* Check for Valid bit */
- if (!CQE_CMP_VALID(hw_cqe, raw_cons, cq->hwq.max_elements))
+ if (!CQE_CMP_VALID(hw_cqe, cq->dbinfo.flags))
break;
/*
@@ -2955,7 +3073,7 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
rc = bnxt_qplib_cq_process_req(cq,
(struct cq_req *)hw_cqe,
&cqe, &budget,
- sw_cons, lib_qp);
+ cq->hwq.cons, lib_qp);
break;
case CQ_BASE_CQE_TYPE_RES_RC:
rc = bnxt_qplib_cq_process_res_rc(cq,
@@ -3001,18 +3119,20 @@ int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
dev_err(&cq->hwq.pdev->dev,
"process_cqe error rc = 0x%x\n", rc);
}
- raw_cons++;
+ hw_polled++;
+ bnxt_qplib_hwq_incr_cons(cq->hwq.max_elements, &cq->hwq.cons,
+ 1, &cq->dbinfo.flags);
+
}
- if (cq->hwq.cons != raw_cons) {
- cq->hwq.cons = raw_cons;
+ if (hw_polled)
bnxt_qplib_ring_db(&cq->dbinfo, DBC_DBC_TYPE_CQ);
- }
exit:
return num_cqes - budget;
}
void bnxt_qplib_req_notify_cq(struct bnxt_qplib_cq *cq, u32 arm_type)
{
+ cq->dbinfo.toggle = cq->toggle;
if (arm_type)
bnxt_qplib_ring_db(&cq->dbinfo, arm_type);
/* Using cq->arm_state variable to track whether to issue cq handler */
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
index 404b851091ca..b62df8701950 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
@@ -105,6 +105,7 @@ struct bnxt_qplib_srq {
struct bnxt_qplib_sg_info sg_info;
u16 eventq_hw_ring_id;
spinlock_t lock; /* protect SRQE link list */
+ u8 toggle;
};
struct bnxt_qplib_sge {
@@ -164,7 +165,7 @@ struct bnxt_qplib_swqe {
/* Send, with imm, inval key */
struct {
union {
- __be32 imm_data;
+ u32 imm_data;
u32 inv_key;
};
u32 q_key;
@@ -182,7 +183,7 @@ struct bnxt_qplib_swqe {
/* RDMA write, with imm, read */
struct {
union {
- __be32 imm_data;
+ u32 imm_data;
u32 inv_key;
};
u64 remote_va;
@@ -251,6 +252,7 @@ struct bnxt_qplib_q {
struct bnxt_qplib_db_info dbinfo;
struct bnxt_qplib_sg_info sg_info;
u32 max_wqe;
+ u32 max_sw_wqe;
u16 wqe_size;
u16 q_full_delta;
u16 max_sge;
@@ -338,6 +340,9 @@ struct bnxt_qplib_qp {
dma_addr_t rq_hdr_buf_map;
struct list_head sq_flush;
struct list_head rq_flush;
+ u32 msn;
+ u32 msn_tbl_sz;
+ bool is_host_msn_tbl;
};
#define BNXT_QPLIB_MAX_CQE_ENTRY_SIZE sizeof(struct cq_base)
@@ -348,9 +353,21 @@ struct bnxt_qplib_qp {
#define CQE_IDX(x) ((x) & CQE_MAX_IDX_PER_PG)
#define ROCE_CQE_CMP_V 0
-#define CQE_CMP_VALID(hdr, raw_cons, cp_bit) \
+#define CQE_CMP_VALID(hdr, pass) \
(!!((hdr)->cqe_type_toggle & CQ_BASE_TOGGLE) == \
- !((raw_cons) & (cp_bit)))
+ !((pass) & BNXT_QPLIB_FLAG_EPOCH_CONS_MASK))
+
+static inline u32 __bnxt_qplib_get_avail(struct bnxt_qplib_hwq *hwq)
+{
+ int cons, prod, avail;
+
+ cons = hwq->cons;
+ prod = hwq->prod;
+ avail = cons - prod;
+ if (cons <= prod)
+ avail += hwq->depth;
+ return avail;
+}
static inline bool bnxt_qplib_queue_full(struct bnxt_qplib_q *que,
u8 slots)
@@ -374,7 +391,7 @@ struct bnxt_qplib_cqe {
u16 cfa_meta;
u64 wr_id;
union {
- __be32 immdata;
+ __le32 immdata;
u32 invrkey;
};
u64 qp_handle;
@@ -406,6 +423,7 @@ struct bnxt_qplib_cq {
bool resize_in_progress;
struct bnxt_qplib_sg_info sg_info;
u64 cq_handle;
+ u8 toggle;
#define CQ_RESIZE_WAIT_TIME_MS 500
unsigned long flags;
@@ -443,9 +461,9 @@ struct bnxt_qplib_cq {
#define NQE_PG(x) (((x) & ~NQE_MAX_IDX_PER_PG) / NQE_CNT_PER_PG)
#define NQE_IDX(x) ((x) & NQE_MAX_IDX_PER_PG)
-#define NQE_CMP_VALID(hdr, raw_cons, cp_bit) \
+#define NQE_CMP_VALID(hdr, pass) \
(!!(le32_to_cpu((hdr)->info63_v[0]) & NQ_BASE_V) == \
- !((raw_cons) & (cp_bit)))
+ !((pass) & BNXT_QPLIB_FLAG_EPOCH_CONS_MASK))
#define BNXT_QPLIB_NQE_MAX_CNT (128 * 1024)
@@ -570,15 +588,22 @@ static inline void bnxt_qplib_swq_mod_start(struct bnxt_qplib_q *que, u32 idx)
que->swq_start = que->swq[idx].next_idx;
}
-static inline u32 bnxt_qplib_get_depth(struct bnxt_qplib_q *que)
+static inline u32 bnxt_qplib_get_depth(struct bnxt_qplib_q *que, u8 wqe_mode, bool is_sq)
{
- return (que->wqe_size * que->max_wqe) / sizeof(struct sq_sge);
+ u32 slots;
+
+ /* Queue depth is the number of slots. */
+ slots = (que->wqe_size * que->max_wqe) / sizeof(struct sq_sge);
+ /* For variable WQE mode, need to align the slots to 256 */
+ if (wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE && is_sq)
+ slots = ALIGN(slots, BNXT_VAR_MAX_SLOT_ALIGN);
+ return slots;
}
static inline u32 bnxt_qplib_set_sq_size(struct bnxt_qplib_q *que, u8 wqe_mode)
{
return (wqe_mode == BNXT_QPLIB_WQE_MODE_STATIC) ?
- que->max_wqe : bnxt_qplib_get_depth(que);
+ que->max_wqe : bnxt_qplib_get_depth(que, wqe_mode, true);
}
static inline u32 bnxt_qplib_set_sq_max_slot(u8 wqe_mode)
@@ -614,4 +639,25 @@ static inline u16 bnxt_qplib_calc_ilsize(struct bnxt_qplib_swqe *wqe, u16 max)
return size;
}
+
+/* MSN table update inlin */
+static inline __le64 bnxt_re_update_msn_tbl(u32 st_idx, u32 npsn, u32 start_psn)
+{
+ return cpu_to_le64((((u64)(st_idx) << SQ_MSN_SEARCH_START_IDX_SFT) &
+ SQ_MSN_SEARCH_START_IDX_MASK) |
+ (((u64)(npsn) << SQ_MSN_SEARCH_NEXT_PSN_SFT) &
+ SQ_MSN_SEARCH_NEXT_PSN_MASK) |
+ (((start_psn) << SQ_MSN_SEARCH_START_PSN_SFT) &
+ SQ_MSN_SEARCH_START_PSN_MASK));
+}
+
+static inline bool __is_var_wqe(struct bnxt_qplib_qp *qp)
+{
+ return (qp->wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE);
+}
+
+static inline bool __is_err_cqe_for_var_wqe(struct bnxt_qplib_qp *qp, u8 status)
+{
+ return (status != CQ_REQ_STATUS_OK) && __is_var_wqe(qp);
+}
#endif /* __BNXT_QPLIB_FP_H__ */
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
index bc3aea4592b9..3ffaef0c2651 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
@@ -55,7 +55,7 @@ static void bnxt_qplib_service_creq(struct tasklet_struct *t);
/**
* bnxt_qplib_map_rc - map return type based on opcode
- * @opcode - roce slow path opcode
+ * @opcode: roce slow path opcode
*
* case #1
* Firmware initiated error recovery is a safe state machine and
@@ -98,8 +98,8 @@ static int bnxt_qplib_map_rc(u8 opcode)
/**
* bnxt_re_is_fw_stalled - Check firmware health
- * @rcfw - rcfw channel instance of rdev
- * @cookie - cookie to track the command
+ * @rcfw: rcfw channel instance of rdev
+ * @cookie: cookie to track the command
*
* If firmware has not responded any rcfw command within
* rcfw->max_timeout, consider firmware as stalled.
@@ -133,8 +133,8 @@ static int bnxt_re_is_fw_stalled(struct bnxt_qplib_rcfw *rcfw,
/**
* __wait_for_resp - Don't hold the cpu context and wait for response
- * @rcfw - rcfw channel instance of rdev
- * @cookie - cookie to track the command
+ * @rcfw: rcfw channel instance of rdev
+ * @cookie: cookie to track the command
*
* Wait for command completion in sleepable context.
*
@@ -179,8 +179,8 @@ static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
/**
* __block_for_resp - hold the cpu context and wait for response
- * @rcfw - rcfw channel instance of rdev
- * @cookie - cookie to track the command
+ * @rcfw: rcfw channel instance of rdev
+ * @cookie: cookie to track the command
*
* This function will hold the cpu (non-sleepable context) and
* wait for command completion. Maximum holding interval is 8 second.
@@ -216,8 +216,8 @@ static int __block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
};
/* __send_message_no_waiter - get cookie and post the message.
- * @rcfw - rcfw channel instance of rdev
- * @msg - qplib message internal
+ * @rcfw: rcfw channel instance of rdev
+ * @msg: qplib message internal
*
* This function will just post and don't bother about completion.
* Current design of this function is -
@@ -335,7 +335,8 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw,
cpu_to_le64(sbuf->dma_addr));
__set_cmdq_base_resp_size(msg->req, msg->req_sz,
ALIGN(sbuf->size,
- BNXT_QPLIB_CMDQE_UNITS));
+ BNXT_QPLIB_CMDQE_UNITS) /
+ BNXT_QPLIB_CMDQE_UNITS);
}
preq = (u8 *)msg->req;
@@ -373,8 +374,8 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw,
/**
* __poll_for_resp - self poll completion for rcfw command
- * @rcfw - rcfw channel instance of rdev
- * @cookie - cookie to track the command
+ * @rcfw: rcfw channel instance of rdev
+ * @cookie: cookie to track the command
*
* It works same as __wait_for_resp except this function will
* do self polling in sort interval since interrupt is disabled.
@@ -470,8 +471,8 @@ static void __destroy_timedout_ah(struct bnxt_qplib_rcfw *rcfw,
/**
* __bnxt_qplib_rcfw_send_message - qplib interface to send
* and complete rcfw command.
- * @rcfw - rcfw channel instance of rdev
- * @msg - qplib message internal
+ * @rcfw: rcfw channel instance of rdev
+ * @msg: qplib message internal
*
* This function does not account shadow queue depth. It will send
* all the command unconditionally as long as send queue is not full.
@@ -487,7 +488,7 @@ static int __bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
struct bnxt_qplib_crsqe *crsqe;
unsigned long flags;
u16 cookie;
- int rc = 0;
+ int rc;
u8 opcode;
opcode = __get_cmdq_base_opcode(msg->req, msg->req_sz);
@@ -533,8 +534,8 @@ static int __bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
/**
* bnxt_qplib_rcfw_send_message - qplib interface to send
* and complete rcfw command.
- * @rcfw - rcfw channel instance of rdev
- * @msg - qplib message internal
+ * @rcfw: rcfw channel instance of rdev
+ * @msg: qplib message internal
*
* Driver interact with Firmware through rcfw channel/slow path in two ways.
* a. Blocking rcfw command send. In this path, driver cannot hold
@@ -664,7 +665,6 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
blocked = cookie & RCFW_CMD_IS_BLOCKING;
cookie &= RCFW_MAX_COOKIE_VALUE;
crsqe = &rcfw->crsqe_tbl[cookie];
- crsqe->is_in_used = false;
if (WARN_ONCE(test_bit(FIRMWARE_STALL_DETECTED,
&rcfw->cmdq.flags),
@@ -680,8 +680,14 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
atomic_dec(&rcfw->timeout_send);
if (crsqe->is_waiter_alive) {
- if (crsqe->resp)
+ if (crsqe->resp) {
memcpy(crsqe->resp, qp_event, sizeof(*qp_event));
+ /* Insert write memory barrier to ensure that
+ * response data is copied before clearing the
+ * flags
+ */
+ smp_wmb();
+ }
if (!blocked)
wait_cmds++;
}
@@ -693,6 +699,8 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
if (!is_waiter_alive)
crsqe->resp = NULL;
+ crsqe->is_in_used = false;
+
hwq->cons += req_size;
/* This is a case to handle below scenario -
@@ -726,17 +734,15 @@ static void bnxt_qplib_service_creq(struct tasklet_struct *t)
u32 type, budget = CREQ_ENTRY_POLL_BUDGET;
struct bnxt_qplib_hwq *hwq = &creq->hwq;
struct creq_base *creqe;
- u32 sw_cons, raw_cons;
unsigned long flags;
u32 num_wakeup = 0;
+ u32 hw_polled = 0;
/* Service the CREQ until budget is over */
spin_lock_irqsave(&hwq->lock, flags);
- raw_cons = hwq->cons;
while (budget > 0) {
- sw_cons = HWQ_CMP(raw_cons, hwq);
- creqe = bnxt_qplib_get_qe(hwq, sw_cons, NULL);
- if (!CREQ_CMP_VALID(creqe, raw_cons, hwq->max_elements))
+ creqe = bnxt_qplib_get_qe(hwq, hwq->cons, NULL);
+ if (!CREQ_CMP_VALID(creqe, creq->creq_db.dbinfo.flags))
break;
/* The valid test of the entry must be done first before
* reading any further.
@@ -767,15 +773,15 @@ static void bnxt_qplib_service_creq(struct tasklet_struct *t)
type);
break;
}
- raw_cons++;
budget--;
+ hw_polled++;
+ bnxt_qplib_hwq_incr_cons(hwq->max_elements, &hwq->cons,
+ 1, &creq->creq_db.dbinfo.flags);
}
- if (hwq->cons != raw_cons) {
- hwq->cons = raw_cons;
+ if (hw_polled)
bnxt_qplib_ring_nq_db(&creq->creq_db.dbinfo,
rcfw->res->cctx, true);
- }
spin_unlock_irqrestore(&hwq->lock, flags);
if (num_wakeup)
wake_up_nr(&rcfw->cmdq.waitq, num_wakeup);
@@ -846,7 +852,7 @@ int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
*/
if (is_virtfn)
goto skip_ctx_setup;
- if (bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx))
+ if (bnxt_qplib_is_chip_gen_p5_p7(rcfw->res->cctx))
goto config_vf_res;
lvl = ctx->qpc_tbl.level;
@@ -899,6 +905,8 @@ config_vf_res:
req.max_gid_per_vf = cpu_to_le32(ctx->vf_res.max_gid_per_vf);
skip_ctx_setup:
+ if (BNXT_RE_HW_RETX(rcfw->res->dattr->dev_cap_flags))
+ req.flags |= cpu_to_le16(CMDQ_INITIALIZE_FW_FLAGS_HW_REQUESTER_RETX_SUPPORTED);
req.stat_ctx_id = cpu_to_le32(ctx->stats.fw_id);
bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req), sizeof(resp), 0);
rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
@@ -1105,6 +1113,7 @@ static int bnxt_qplib_map_creq_db(struct bnxt_qplib_rcfw *rcfw, u32 reg_offt)
pdev = rcfw->pdev;
creq_db = &rcfw->creq.creq_db;
+ creq_db->dbinfo.flags = 0;
creq_db->reg.bar_id = RCFW_COMM_CONS_PCI_BAR_REGION;
creq_db->reg.bar_base = pci_resource_start(pdev, creq_db->reg.bar_id);
if (!creq_db->reg.bar_id)
@@ -1195,34 +1204,3 @@ int bnxt_qplib_enable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw,
return 0;
}
-
-struct bnxt_qplib_rcfw_sbuf *bnxt_qplib_rcfw_alloc_sbuf(
- struct bnxt_qplib_rcfw *rcfw,
- u32 size)
-{
- struct bnxt_qplib_rcfw_sbuf *sbuf;
-
- sbuf = kzalloc(sizeof(*sbuf), GFP_KERNEL);
- if (!sbuf)
- return NULL;
-
- sbuf->size = size;
- sbuf->sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf->size,
- &sbuf->dma_addr, GFP_KERNEL);
- if (!sbuf->sb)
- goto bail;
-
- return sbuf;
-bail:
- kfree(sbuf);
- return NULL;
-}
-
-void bnxt_qplib_rcfw_free_sbuf(struct bnxt_qplib_rcfw *rcfw,
- struct bnxt_qplib_rcfw_sbuf *sbuf)
-{
- if (sbuf->sb)
- dma_free_coherent(&rcfw->pdev->dev, sbuf->size,
- sbuf->sb, sbuf->dma_addr);
- kfree(sbuf);
-}
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
index 7b31bee3e000..45996e60a0d0 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
@@ -141,9 +141,9 @@ struct bnxt_qplib_crsbe {
/* Allocate 1 per QP for async error notification for now */
#define BNXT_QPLIB_CREQE_MAX_CNT (64 * 1024)
#define BNXT_QPLIB_CREQE_UNITS 16 /* 16-Bytes per prod unit */
-#define CREQ_CMP_VALID(hdr, raw_cons, cp_bit) \
+#define CREQ_CMP_VALID(hdr, pass) \
(!!((hdr)->v & CREQ_BASE_V) == \
- !((raw_cons) & (cp_bit)))
+ !((pass) & BNXT_QPLIB_FLAG_EPOCH_CONS_MASK))
#define CREQ_ENTRY_POLL_BUDGET 0x100
/* HWQ */
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c
index 739d942761d1..dfc943fab87b 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c
@@ -118,11 +118,11 @@ static int __alloc_pbl(struct bnxt_qplib_res *res,
else
pages = sginfo->npages;
/* page ptr arrays */
- pbl->pg_arr = vmalloc(pages * sizeof(void *));
+ pbl->pg_arr = vmalloc_array(pages, sizeof(void *));
if (!pbl->pg_arr)
return -ENOMEM;
- pbl->pg_map_arr = vmalloc(pages * sizeof(dma_addr_t));
+ pbl->pg_map_arr = vmalloc_array(pages, sizeof(dma_addr_t));
if (!pbl->pg_map_arr) {
vfree(pbl->pg_arr);
pbl->pg_arr = NULL;
@@ -343,7 +343,7 @@ done:
hwq->cons = 0;
hwq->pdev = pdev;
hwq->depth = hwq_attr->depth;
- hwq->max_elements = depth;
+ hwq->max_elements = hwq->depth;
hwq->element_size = stride;
hwq->qe_ppg = pg_size / stride;
/* For direct access to the elements */
@@ -385,7 +385,7 @@ static int bnxt_qplib_alloc_tqm_rings(struct bnxt_qplib_res *res,
struct bnxt_qplib_hwq_attr hwq_attr = {};
struct bnxt_qplib_sg_info sginfo = {};
struct bnxt_qplib_tqm_ctx *tqmctx;
- int rc = 0;
+ int rc;
int i;
tqmctx = &ctx->tqm_ctx;
@@ -463,7 +463,7 @@ static void bnxt_qplib_map_tqm_pgtbl(struct bnxt_qplib_tqm_ctx *ctx)
static int bnxt_qplib_setup_tqm_rings(struct bnxt_qplib_res *res,
struct bnxt_qplib_ctx *ctx)
{
- int rc = 0;
+ int rc;
rc = bnxt_qplib_alloc_tqm_rings(res, ctx);
if (rc)
@@ -501,7 +501,7 @@ int bnxt_qplib_alloc_ctx(struct bnxt_qplib_res *res,
{
struct bnxt_qplib_hwq_attr hwq_attr = {};
struct bnxt_qplib_sg_info sginfo = {};
- int rc = 0;
+ int rc;
if (virt_fn || is_p5)
goto stats_alloc;
@@ -642,31 +642,44 @@ static void bnxt_qplib_init_sgid_tbl(struct bnxt_qplib_sgid_tbl *sgid_tbl,
}
/* PDs */
-int bnxt_qplib_alloc_pd(struct bnxt_qplib_pd_tbl *pdt, struct bnxt_qplib_pd *pd)
+int bnxt_qplib_alloc_pd(struct bnxt_qplib_res *res, struct bnxt_qplib_pd *pd)
{
+ struct bnxt_qplib_pd_tbl *pdt = &res->pd_tbl;
u32 bit_num;
+ int rc = 0;
+ mutex_lock(&res->pd_tbl_lock);
bit_num = find_first_bit(pdt->tbl, pdt->max);
- if (bit_num == pdt->max)
- return -ENOMEM;
+ if (bit_num == pdt->max) {
+ rc = -ENOMEM;
+ goto exit;
+ }
/* Found unused PD */
clear_bit(bit_num, pdt->tbl);
pd->id = bit_num;
- return 0;
+exit:
+ mutex_unlock(&res->pd_tbl_lock);
+ return rc;
}
int bnxt_qplib_dealloc_pd(struct bnxt_qplib_res *res,
struct bnxt_qplib_pd_tbl *pdt,
struct bnxt_qplib_pd *pd)
{
+ int rc = 0;
+
+ mutex_lock(&res->pd_tbl_lock);
if (test_and_set_bit(pd->id, pdt->tbl)) {
dev_warn(&res->pdev->dev, "Freeing an unused PD? pdn = %d\n",
pd->id);
- return -EINVAL;
+ rc = -EINVAL;
+ goto exit;
}
pd->id = 0;
- return 0;
+exit:
+ mutex_unlock(&res->pd_tbl_lock);
+ return rc;
}
static void bnxt_qplib_free_pd_tbl(struct bnxt_qplib_pd_tbl *pdt)
@@ -691,6 +704,7 @@ static int bnxt_qplib_alloc_pd_tbl(struct bnxt_qplib_res *res,
pdt->max = max;
memset((u8 *)pdt->tbl, 0xFF, bytes);
+ mutex_init(&res->pd_tbl_lock);
return 0;
}
@@ -791,7 +805,7 @@ static int bnxt_qplib_alloc_dpi_tbl(struct bnxt_qplib_res *res,
dpit = &res->dpi_tbl;
reg = &dpit->wcreg;
- if (!bnxt_qplib_is_chip_gen_p5(res->cctx)) {
+ if (!bnxt_qplib_is_chip_gen_p5_p7(res->cctx)) {
/* Offest should come from L2 driver */
dbr_offset = dev_attr->l2_db_size;
dpit->ucreg.offset = dbr_offset;
@@ -877,7 +891,7 @@ int bnxt_qplib_alloc_res(struct bnxt_qplib_res *res, struct pci_dev *pdev,
struct net_device *netdev,
struct bnxt_qplib_dev_attr *dev_attr)
{
- int rc = 0;
+ int rc;
res->pdev = pdev;
res->netdev = netdev;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h
index d850a553821e..c2f710364e0f 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h
@@ -44,10 +44,23 @@ extern const struct bnxt_qplib_gid bnxt_qplib_gid_zero;
#define CHIP_NUM_57508 0x1750
#define CHIP_NUM_57504 0x1751
#define CHIP_NUM_57502 0x1752
+#define CHIP_NUM_58818 0xd818
+#define CHIP_NUM_57608 0x1760
+
+#define BNXT_QPLIB_DBR_VALID (0x1UL << 26)
+#define BNXT_QPLIB_DBR_EPOCH_SHIFT 24
+#define BNXT_QPLIB_DBR_TOGGLE_SHIFT 25
struct bnxt_qplib_drv_modes {
u8 wqe_mode;
bool db_push;
+ bool dbr_pacing;
+ u32 toggle_bits;
+};
+
+enum bnxt_re_toggle_modes {
+ BNXT_QPLIB_CQ_TOGGLE_BIT = 0x1,
+ BNXT_QPLIB_SRQ_TOGGLE_BIT = 0x2,
};
struct bnxt_qplib_chip_ctx {
@@ -58,6 +71,18 @@ struct bnxt_qplib_chip_ctx {
u16 hwrm_cmd_max_timeout;
struct bnxt_qplib_drv_modes modes;
u64 hwrm_intf_ver;
+ u32 dbr_stat_db_fifo;
+};
+
+struct bnxt_qplib_db_pacing_data {
+ u32 do_pacing;
+ u32 pacing_th;
+ u32 alarm_th;
+ u32 fifo_max_depth;
+ u32 fifo_room_mask;
+ u32 fifo_room_shift;
+ u32 grc_reg_offset;
+ u32 dev_err_state;
};
#define BNXT_QPLIB_DBR_PF_DB_OFFSET 0x10000
@@ -174,6 +199,20 @@ struct bnxt_qplib_db_info {
struct bnxt_qplib_hwq *hwq;
u32 xid;
u32 max_slot;
+ u32 flags;
+ u8 toggle;
+};
+
+enum bnxt_qplib_db_info_flags_mask {
+ BNXT_QPLIB_FLAG_EPOCH_CONS_SHIFT = 0x0UL,
+ BNXT_QPLIB_FLAG_EPOCH_PROD_SHIFT = 0x1UL,
+ BNXT_QPLIB_FLAG_EPOCH_CONS_MASK = 0x1UL,
+ BNXT_QPLIB_FLAG_EPOCH_PROD_MASK = 0x2UL,
+};
+
+enum bnxt_qplib_db_epoch_flag_shift {
+ BNXT_QPLIB_DB_EPOCH_CONS_SHIFT = BNXT_QPLIB_DBR_EPOCH_SHIFT,
+ BNXT_QPLIB_DB_EPOCH_PROD_SHIFT = (BNXT_QPLIB_DBR_EPOCH_SHIFT - 1),
};
/* Tables */
@@ -265,14 +304,23 @@ struct bnxt_qplib_res {
struct net_device *netdev;
struct bnxt_qplib_rcfw *rcfw;
struct bnxt_qplib_pd_tbl pd_tbl;
+ /* To protect the pd table bit map */
+ struct mutex pd_tbl_lock;
struct bnxt_qplib_sgid_tbl sgid_tbl;
struct bnxt_qplib_dpi_tbl dpi_tbl;
/* To protect the dpi table bit map */
struct mutex dpi_tbl_lock;
bool prio;
bool is_vf;
+ struct bnxt_qplib_db_pacing_data *pacing_data;
};
+static inline bool bnxt_qplib_is_chip_gen_p7(struct bnxt_qplib_chip_ctx *cctx)
+{
+ return (cctx->chip_num == CHIP_NUM_58818 ||
+ cctx->chip_num == CHIP_NUM_57608);
+}
+
static inline bool bnxt_qplib_is_chip_gen_p5(struct bnxt_qplib_chip_ctx *cctx)
{
return (cctx->chip_num == CHIP_NUM_57508 ||
@@ -280,15 +328,20 @@ static inline bool bnxt_qplib_is_chip_gen_p5(struct bnxt_qplib_chip_ctx *cctx)
cctx->chip_num == CHIP_NUM_57502);
}
+static inline bool bnxt_qplib_is_chip_gen_p5_p7(struct bnxt_qplib_chip_ctx *cctx)
+{
+ return bnxt_qplib_is_chip_gen_p5(cctx) || bnxt_qplib_is_chip_gen_p7(cctx);
+}
+
static inline u8 bnxt_qplib_get_hwq_type(struct bnxt_qplib_res *res)
{
- return bnxt_qplib_is_chip_gen_p5(res->cctx) ?
+ return bnxt_qplib_is_chip_gen_p5_p7(res->cctx) ?
HWQ_TYPE_QUEUE : HWQ_TYPE_L2_CMPL;
}
static inline u8 bnxt_qplib_get_ring_type(struct bnxt_qplib_chip_ctx *cctx)
{
- return bnxt_qplib_is_chip_gen_p5(cctx) ?
+ return bnxt_qplib_is_chip_gen_p5_p7(cctx) ?
RING_ALLOC_REQ_RING_TYPE_NQ :
RING_ALLOC_REQ_RING_TYPE_ROCE_CMPL;
}
@@ -355,7 +408,7 @@ void bnxt_qplib_free_hwq(struct bnxt_qplib_res *res,
struct bnxt_qplib_hwq *hwq);
int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq,
struct bnxt_qplib_hwq_attr *hwq_attr);
-int bnxt_qplib_alloc_pd(struct bnxt_qplib_pd_tbl *pd_tbl,
+int bnxt_qplib_alloc_pd(struct bnxt_qplib_res *res,
struct bnxt_qplib_pd *pd);
int bnxt_qplib_dealloc_pd(struct bnxt_qplib_res *res,
struct bnxt_qplib_pd_tbl *pd_tbl,
@@ -381,39 +434,61 @@ void bnxt_qplib_unmap_db_bar(struct bnxt_qplib_res *res);
int bnxt_qplib_determine_atomics(struct pci_dev *dev);
-static inline void bnxt_qplib_hwq_incr_prod(struct bnxt_qplib_hwq *hwq, u32 cnt)
+static inline void bnxt_qplib_hwq_incr_prod(struct bnxt_qplib_db_info *dbinfo,
+ struct bnxt_qplib_hwq *hwq, u32 cnt)
{
- hwq->prod = (hwq->prod + cnt) % hwq->depth;
+ /* move prod and update toggle/epoch if wrap around */
+ hwq->prod += cnt;
+ if (hwq->prod >= hwq->depth) {
+ hwq->prod %= hwq->depth;
+ dbinfo->flags ^= 1UL << BNXT_QPLIB_FLAG_EPOCH_PROD_SHIFT;
+ }
}
-static inline void bnxt_qplib_hwq_incr_cons(struct bnxt_qplib_hwq *hwq,
- u32 cnt)
+static inline void bnxt_qplib_hwq_incr_cons(u32 max_elements, u32 *cons, u32 cnt,
+ u32 *dbinfo_flags)
{
- hwq->cons = (hwq->cons + cnt) % hwq->depth;
+ /* move cons and update toggle/epoch if wrap around */
+ *cons += cnt;
+ if (*cons >= max_elements) {
+ *cons %= max_elements;
+ *dbinfo_flags ^= 1UL << BNXT_QPLIB_FLAG_EPOCH_CONS_SHIFT;
+ }
}
static inline void bnxt_qplib_ring_db32(struct bnxt_qplib_db_info *info,
bool arm)
{
- u32 key;
+ u32 key = 0;
- key = info->hwq->cons & (info->hwq->max_elements - 1);
- key |= (CMPL_DOORBELL_IDX_VALID |
+ key |= info->hwq->cons | (CMPL_DOORBELL_IDX_VALID |
(CMPL_DOORBELL_KEY_CMPL & CMPL_DOORBELL_KEY_MASK));
if (!arm)
key |= CMPL_DOORBELL_MASK;
writel(key, info->db);
}
+#define BNXT_QPLIB_INIT_DBHDR(xid, type, indx, toggle) \
+ (((u64)(((xid) & DBC_DBC_XID_MASK) | DBC_DBC_PATH_ROCE | \
+ (type) | BNXT_QPLIB_DBR_VALID) << 32) | (indx) | \
+ (((u32)(toggle)) << (BNXT_QPLIB_DBR_TOGGLE_SHIFT)))
+
static inline void bnxt_qplib_ring_db(struct bnxt_qplib_db_info *info,
u32 type)
{
u64 key = 0;
+ u32 indx;
+ u8 toggle = 0;
+
+ if (type == DBC_DBC_TYPE_CQ_ARMALL ||
+ type == DBC_DBC_TYPE_CQ_ARMSE)
+ toggle = info->toggle;
- key = (info->xid & DBC_DBC_XID_MASK) | DBC_DBC_PATH_ROCE | type;
- key <<= 32;
- key |= (info->hwq->cons & (info->hwq->max_elements - 1)) &
- DBC_DBC_INDEX_MASK;
+ indx = (info->hwq->cons & DBC_DBC_INDEX_MASK) |
+ ((info->flags & BNXT_QPLIB_FLAG_EPOCH_CONS_MASK) <<
+ BNXT_QPLIB_DB_EPOCH_CONS_SHIFT);
+
+ key = BNXT_QPLIB_INIT_DBHDR(info->xid, type, indx, toggle);
writeq(key, info->db);
}
@@ -421,10 +496,12 @@ static inline void bnxt_qplib_ring_prod_db(struct bnxt_qplib_db_info *info,
u32 type)
{
u64 key = 0;
+ u32 indx;
- key = (info->xid & DBC_DBC_XID_MASK) | DBC_DBC_PATH_ROCE | type;
- key <<= 32;
- key |= ((info->hwq->prod / info->max_slot)) & DBC_DBC_INDEX_MASK;
+ indx = (((info->hwq->prod / info->max_slot) & DBC_DBC_INDEX_MASK) |
+ ((info->flags & BNXT_QPLIB_FLAG_EPOCH_PROD_MASK) <<
+ BNXT_QPLIB_DB_EPOCH_PROD_SHIFT));
+ key = BNXT_QPLIB_INIT_DBHDR(info->xid, type, indx, 0);
writeq(key, info->db);
}
@@ -432,9 +509,12 @@ static inline void bnxt_qplib_armen_db(struct bnxt_qplib_db_info *info,
u32 type)
{
u64 key = 0;
+ u8 toggle = 0;
- key = (info->xid & DBC_DBC_XID_MASK) | DBC_DBC_PATH_ROCE | type;
- key <<= 32;
+ if (type == DBC_DBC_TYPE_CQ_ARMENA || type == DBC_DBC_TYPE_SRQ_ARMENA)
+ toggle = info->toggle;
+ /* Index always at 0 */
+ key = BNXT_QPLIB_INIT_DBHDR(info->xid, type, 0, toggle);
writeq(key, info->priv_db);
}
@@ -443,9 +523,7 @@ static inline void bnxt_qplib_srq_arm_db(struct bnxt_qplib_db_info *info,
{
u64 key = 0;
- key = (info->xid & DBC_DBC_XID_MASK) | DBC_DBC_PATH_ROCE | th;
- key <<= 32;
- key |= th & DBC_DBC_INDEX_MASK;
+ key = BNXT_QPLIB_INIT_DBHDR(info->xid, DBC_DBC_TYPE_SRQ_ARM, th, info->toggle);
writeq(key, info->priv_db);
}
@@ -456,7 +534,7 @@ static inline void bnxt_qplib_ring_nq_db(struct bnxt_qplib_db_info *info,
u32 type;
type = arm ? DBC_DBC_TYPE_NQ_ARM : DBC_DBC_TYPE_NQ;
- if (bnxt_qplib_is_chip_gen_p5(cctx))
+ if (bnxt_qplib_is_chip_gen_p5_p7(cctx))
bnxt_qplib_ring_db(info, type);
else
bnxt_qplib_ring_db32(info, arm);
@@ -467,4 +545,35 @@ static inline bool _is_ext_stats_supported(u16 dev_cap_flags)
return dev_cap_flags &
CREQ_QUERY_FUNC_RESP_SB_EXT_STATS;
}
+
+static inline bool _is_hw_retx_supported(u16 dev_cap_flags)
+{
+ return dev_cap_flags &
+ (CREQ_QUERY_FUNC_RESP_SB_HW_REQUESTER_RETX_ENABLED |
+ CREQ_QUERY_FUNC_RESP_SB_HW_RESPONDER_RETX_ENABLED);
+}
+
+#define BNXT_RE_HW_RETX(a) _is_hw_retx_supported((a))
+
+static inline bool _is_host_msn_table(u16 dev_cap_ext_flags2)
+{
+ return (dev_cap_ext_flags2 & CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_MASK) ==
+ CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_HOST_MSN_TABLE;
+}
+
+static inline u8 bnxt_qplib_dbr_pacing_en(struct bnxt_qplib_chip_ctx *cctx)
+{
+ return cctx->modes.dbr_pacing;
+}
+
+static inline bool _is_alloc_mr_unified(u16 dev_cap_flags)
+{
+ return dev_cap_flags & CREQ_QUERY_FUNC_RESP_SB_MR_REGISTER_ALLOC;
+}
+
+static inline bool _is_relaxed_ordering_supported(u16 dev_cap_ext_flags2)
+{
+ return dev_cap_ext_flags2 & CREQ_QUERY_FUNC_RESP_SB_MEMORY_REGION_RO_SUPPORTED;
+}
+
#endif /* __BNXT_QPLIB_RES_H__ */
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
index ab45f9d4bb02..4f75e7e5bcf7 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
@@ -59,7 +59,7 @@ static bool bnxt_qplib_is_atomic_cap(struct bnxt_qplib_rcfw *rcfw)
{
u16 pcie_ctl2 = 0;
- if (!bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx))
+ if (!bnxt_qplib_is_chip_gen_p5_p7(rcfw->res->cctx))
return false;
pcie_capability_read_word(rcfw->pdev, PCI_EXP_DEVCTL2, &pcie_ctl2);
@@ -72,7 +72,7 @@ static void bnxt_qplib_query_version(struct bnxt_qplib_rcfw *rcfw,
struct creq_query_version_resp resp = {};
struct bnxt_qplib_cmdqmsg msg = {};
struct cmdq_query_version req = {};
- int rc = 0;
+ int rc;
bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
CMDQ_BASE_OPCODE_QUERY_VERSION,
@@ -89,31 +89,31 @@ static void bnxt_qplib_query_version(struct bnxt_qplib_rcfw *rcfw,
}
int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
- struct bnxt_qplib_dev_attr *attr, bool vf)
+ struct bnxt_qplib_dev_attr *attr)
{
struct creq_query_func_resp resp = {};
struct bnxt_qplib_cmdqmsg msg = {};
struct creq_query_func_resp_sb *sb;
- struct bnxt_qplib_rcfw_sbuf *sbuf;
+ struct bnxt_qplib_rcfw_sbuf sbuf;
+ struct bnxt_qplib_chip_ctx *cctx;
struct cmdq_query_func req = {};
u8 *tqm_alloc;
- int i, rc = 0;
+ int i, rc;
u32 temp;
+ cctx = rcfw->res->cctx;
bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
CMDQ_BASE_OPCODE_QUERY_FUNC,
sizeof(req));
- sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb));
- if (!sbuf) {
- dev_err(&rcfw->pdev->dev,
- "SP: QUERY_FUNC alloc side buffer failed\n");
+ sbuf.size = ALIGN(sizeof(*sb), BNXT_QPLIB_CMDQE_UNITS);
+ sbuf.sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf.size,
+ &sbuf.dma_addr, GFP_KERNEL);
+ if (!sbuf.sb)
return -ENOMEM;
- }
-
- sb = sbuf->sb;
- req.resp_size = sizeof(*sb) / BNXT_QPLIB_CMDQE_UNITS;
- bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, sbuf, sizeof(req),
+ sb = sbuf.sb;
+ req.resp_size = sbuf.size / BNXT_QPLIB_CMDQE_UNITS;
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, &sbuf, sizeof(req),
sizeof(resp), 0);
rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
if (rc)
@@ -121,9 +121,8 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
/* Extract the context from the side buffer */
attr->max_qp = le32_to_cpu(sb->max_qp);
- /* max_qp value reported by FW for PF doesn't include the QP1 for PF */
- if (!vf)
- attr->max_qp += 1;
+ /* max_qp value reported by FW doesn't include the QP1 */
+ attr->max_qp += 1;
attr->max_qp_rd_atom =
sb->max_qp_rd_atom > BNXT_QPLIB_MAX_OUT_RD_ATOM ?
BNXT_QPLIB_MAX_OUT_RD_ATOM : sb->max_qp_rd_atom;
@@ -136,8 +135,9 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
* reporting the max number
*/
attr->max_qp_wqes -= BNXT_QPLIB_RESERVED_QP_WRS + 1;
- attr->max_qp_sges = bnxt_qplib_is_chip_gen_p5(rcfw->res->cctx) ?
- 6 : sb->max_sge;
+
+ attr->max_qp_sges = cctx->modes.wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE ?
+ min_t(u32, sb->max_sge_var_wqe, BNXT_VAR_MAX_SGE) : 6;
attr->max_cq = le32_to_cpu(sb->max_cq);
attr->max_cq_wqes = le32_to_cpu(sb->max_cqe);
attr->max_cq_sges = attr->max_qp_sges;
@@ -154,10 +154,12 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
attr->max_srq_sges = sb->max_srq_sge;
attr->max_pkey = 1;
attr->max_inline_data = le32_to_cpu(sb->max_inline_data);
- attr->l2_db_size = (sb->l2_db_space_size + 1) *
- (0x01 << RCFW_DBR_BASE_PAGE_SHIFT);
+ if (!bnxt_qplib_is_chip_gen_p7(rcfw->res->cctx))
+ attr->l2_db_size = (sb->l2_db_space_size + 1) *
+ (0x01 << RCFW_DBR_BASE_PAGE_SHIFT);
attr->max_sgid = BNXT_QPLIB_NUM_GIDS_SUPPORTED;
attr->dev_cap_flags = le16_to_cpu(sb->dev_cap_flags);
+ attr->dev_cap_flags2 = le16_to_cpu(sb->dev_cap_ext_flags_2);
bnxt_qplib_query_version(rcfw, attr->fw_ver);
@@ -175,7 +177,8 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
attr->is_atomic = bnxt_qplib_is_atomic_cap(rcfw);
bail:
- bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf);
+ dma_free_coherent(&rcfw->pdev->dev, sbuf.size,
+ sbuf.sb, sbuf.dma_addr);
return rc;
}
@@ -186,7 +189,7 @@ int bnxt_qplib_set_func_resources(struct bnxt_qplib_res *res,
struct creq_set_func_resources_resp resp = {};
struct cmdq_set_func_resources req = {};
struct bnxt_qplib_cmdqmsg msg = {};
- int rc = 0;
+ int rc;
bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
CMDQ_BASE_OPCODE_SET_FUNC_RESOURCES,
@@ -541,7 +544,7 @@ int bnxt_qplib_alloc_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw)
req.pd_id = cpu_to_le32(mrw->pd->id);
req.mrw_flags = mrw->type;
if ((mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR &&
- mrw->flags & BNXT_QPLIB_FR_PMR) ||
+ mrw->access_flags & BNXT_QPLIB_FR_PMR) ||
mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2A ||
mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2B)
req.access = CMDQ_ALLOCATE_MRW_ACCESS_CONSUMER_OWNED_KEY;
@@ -653,9 +656,12 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
req.log2_pbl_pg_size = cpu_to_le16(((ilog2(PAGE_SIZE) <<
CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_SFT) &
CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_MASK));
- req.access = (mr->flags & 0xFFFF);
+ req.access = (mr->access_flags & 0xFFFF);
req.va = cpu_to_le64(mr->va);
req.key = cpu_to_le32(mr->lkey);
+ if (_is_alloc_mr_unified(res->dattr->dev_cap_flags))
+ req.key = cpu_to_le32(mr->pd->id);
+ req.flags = cpu_to_le16(mr->flags);
req.mr_size = cpu_to_le64(mr->total_size);
bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, NULL, sizeof(req),
@@ -664,6 +670,11 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
if (rc)
goto fail;
+ if (_is_alloc_mr_unified(res->dattr->dev_cap_flags)) {
+ mr->lkey = le32_to_cpu(resp.xid);
+ mr->rkey = mr->lkey;
+ }
+
return 0;
fail:
@@ -718,23 +729,22 @@ int bnxt_qplib_get_roce_stats(struct bnxt_qplib_rcfw *rcfw,
struct creq_query_roce_stats_resp_sb *sb;
struct cmdq_query_roce_stats req = {};
struct bnxt_qplib_cmdqmsg msg = {};
- struct bnxt_qplib_rcfw_sbuf *sbuf;
- int rc = 0;
+ struct bnxt_qplib_rcfw_sbuf sbuf;
+ int rc;
bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
CMDQ_BASE_OPCODE_QUERY_ROCE_STATS,
sizeof(req));
- sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb));
- if (!sbuf) {
- dev_err(&rcfw->pdev->dev,
- "SP: QUERY_ROCE_STATS alloc side buffer failed\n");
+ sbuf.size = ALIGN(sizeof(*sb), BNXT_QPLIB_CMDQE_UNITS);
+ sbuf.sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf.size,
+ &sbuf.dma_addr, GFP_KERNEL);
+ if (!sbuf.sb)
return -ENOMEM;
- }
+ sb = sbuf.sb;
- sb = sbuf->sb;
- req.resp_size = sizeof(*sb) / BNXT_QPLIB_CMDQE_UNITS;
- bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, sbuf, sizeof(req),
+ req.resp_size = sbuf.size / BNXT_QPLIB_CMDQE_UNITS;
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, &sbuf, sizeof(req),
sizeof(resp), 0);
rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
if (rc)
@@ -790,7 +800,8 @@ int bnxt_qplib_get_roce_stats(struct bnxt_qplib_rcfw *rcfw,
}
bail:
- bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf);
+ dma_free_coherent(&rcfw->pdev->dev, sbuf.size,
+ sbuf.sb, sbuf.dma_addr);
return rc;
}
@@ -801,49 +812,56 @@ int bnxt_qplib_qext_stat(struct bnxt_qplib_rcfw *rcfw, u32 fid,
struct creq_query_roce_stats_ext_resp_sb *sb;
struct cmdq_query_roce_stats_ext req = {};
struct bnxt_qplib_cmdqmsg msg = {};
- struct bnxt_qplib_rcfw_sbuf *sbuf;
+ struct bnxt_qplib_rcfw_sbuf sbuf;
int rc;
- sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb));
- if (!sbuf) {
- dev_err(&rcfw->pdev->dev,
- "SP: QUERY_ROCE_STATS_EXT alloc sb failed");
+ sbuf.size = ALIGN(sizeof(*sb), BNXT_QPLIB_CMDQE_UNITS);
+ sbuf.sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf.size,
+ &sbuf.dma_addr, GFP_KERNEL);
+ if (!sbuf.sb)
return -ENOMEM;
- }
+ sb = sbuf.sb;
bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
CMDQ_QUERY_ROCE_STATS_EXT_OPCODE_QUERY_ROCE_STATS,
sizeof(req));
- req.resp_size = ALIGN(sizeof(*sb), BNXT_QPLIB_CMDQE_UNITS);
- req.resp_addr = cpu_to_le64(sbuf->dma_addr);
+ req.resp_size = sbuf.size / BNXT_QPLIB_CMDQE_UNITS;
+ req.resp_addr = cpu_to_le64(sbuf.dma_addr);
req.function_id = cpu_to_le32(fid);
req.flags = cpu_to_le16(CMDQ_QUERY_ROCE_STATS_EXT_FLAGS_FUNCTION_ID);
- bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, sbuf, sizeof(req),
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, &sbuf, sizeof(req),
sizeof(resp), 0);
rc = bnxt_qplib_rcfw_send_message(rcfw, &msg);
if (rc)
goto bail;
- sb = sbuf->sb;
estat->tx_atomic_req = le64_to_cpu(sb->tx_atomic_req_pkts);
estat->tx_read_req = le64_to_cpu(sb->tx_read_req_pkts);
estat->tx_read_res = le64_to_cpu(sb->tx_read_res_pkts);
estat->tx_write_req = le64_to_cpu(sb->tx_write_req_pkts);
estat->tx_send_req = le64_to_cpu(sb->tx_send_req_pkts);
+ estat->tx_roce_pkts = le64_to_cpu(sb->tx_roce_pkts);
+ estat->tx_roce_bytes = le64_to_cpu(sb->tx_roce_bytes);
estat->rx_atomic_req = le64_to_cpu(sb->rx_atomic_req_pkts);
estat->rx_read_req = le64_to_cpu(sb->rx_read_req_pkts);
estat->rx_read_res = le64_to_cpu(sb->rx_read_res_pkts);
estat->rx_write_req = le64_to_cpu(sb->rx_write_req_pkts);
estat->rx_send_req = le64_to_cpu(sb->rx_send_req_pkts);
+ estat->rx_roce_pkts = le64_to_cpu(sb->rx_roce_pkts);
+ estat->rx_roce_bytes = le64_to_cpu(sb->rx_roce_bytes);
estat->rx_roce_good_pkts = le64_to_cpu(sb->rx_roce_good_pkts);
estat->rx_roce_good_bytes = le64_to_cpu(sb->rx_roce_good_bytes);
estat->rx_out_of_buffer = le64_to_cpu(sb->rx_out_of_buffer_pkts);
estat->rx_out_of_sequence = le64_to_cpu(sb->rx_out_of_sequence_pkts);
+ estat->tx_cnp = le64_to_cpu(sb->tx_cnp_pkts);
+ estat->rx_cnp = le64_to_cpu(sb->rx_cnp_pkts);
+ estat->rx_ecn_marked = le64_to_cpu(sb->rx_ecn_marked_pkts);
bail:
- bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf);
+ dma_free_coherent(&rcfw->pdev->dev, sbuf.size,
+ sbuf.sb, sbuf.dma_addr);
return rc;
}
@@ -929,7 +947,7 @@ int bnxt_qplib_modify_cc(struct bnxt_qplib_res *res,
req->inactivity_th = cpu_to_le16(cc_param->inact_th);
/* For chip gen P5 onwards fill extended cmd and header */
- if (bnxt_qplib_is_chip_gen_p5(res->cctx)) {
+ if (bnxt_qplib_is_chip_gen_p5_p7(res->cctx)) {
struct roce_tlv *hdr;
u32 payload;
u32 chunks;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
index 264ef3cedc45..acd9c14a31c4 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
@@ -40,6 +40,7 @@
#ifndef __BNXT_QPLIB_SP_H__
#define __BNXT_QPLIB_SP_H__
+#include <rdma/bnxt_re-abi.h>
#define BNXT_QPLIB_RESERVED_QP_WRS 128
struct bnxt_qplib_dev_attr {
@@ -72,6 +73,7 @@ struct bnxt_qplib_dev_attr {
u8 tqm_alloc_reqs[MAX_TQM_ALLOC_REQ];
bool is_atomic;
u16 dev_cap_flags;
+ u16 dev_cap_flags2;
u32 max_dpi;
};
@@ -107,7 +109,7 @@ struct bnxt_qplib_ah {
struct bnxt_qplib_mrw {
struct bnxt_qplib_pd *pd;
int type;
- u32 flags;
+ u32 access_flags;
#define BNXT_QPLIB_FR_PMR 0x80000000
u32 lkey;
u32 rkey;
@@ -115,6 +117,7 @@ struct bnxt_qplib_mrw {
u64 va;
u64 total_size;
u32 npages;
+ u16 flags;
u64 mr_handle;
struct bnxt_qplib_hwq hwq;
};
@@ -322,7 +325,7 @@ int bnxt_qplib_update_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
struct bnxt_qplib_gid *gid, u16 gid_idx,
const u8 *smac);
int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
- struct bnxt_qplib_dev_attr *attr, bool vf);
+ struct bnxt_qplib_dev_attr *attr);
int bnxt_qplib_set_func_resources(struct bnxt_qplib_res *res,
struct bnxt_qplib_rcfw *rcfw,
struct bnxt_qplib_ctx *ctx);
@@ -350,4 +353,11 @@ int bnxt_qplib_qext_stat(struct bnxt_qplib_rcfw *rcfw, u32 fid,
int bnxt_qplib_modify_cc(struct bnxt_qplib_res *res,
struct bnxt_qplib_cc_param *cc_param);
+#define BNXT_VAR_MAX_WQE 4352
+#define BNXT_VAR_MAX_SLOT_ALIGN 256
+#define BNXT_VAR_MAX_SGE 13
+#define BNXT_RE_MAX_RQ_WQES 65536
+
+#define BNXT_STATIC_MAX_SGE 6
+
#endif /* __BNXT_QPLIB_SP_H__*/
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_tlv.h b/drivers/infiniband/hw/bnxt_re/qplib_tlv.h
index 402c220734f6..ae96a75d7f31 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_tlv.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_tlv.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
#ifndef __QPLIB_TLV_H__
#define __QPLIB_TLV_H__
diff --git a/drivers/infiniband/hw/bnxt_re/roce_hsi.h b/drivers/infiniband/hw/bnxt_re/roce_hsi.h
index 4a10303e0392..3ec895284e49 100644
--- a/drivers/infiniband/hw/bnxt_re/roce_hsi.h
+++ b/drivers/infiniband/hw/bnxt_re/roce_hsi.h
@@ -409,7 +409,7 @@ struct creq_deinitialize_fw_resp {
u8 reserved48[6];
};
-/* cmdq_create_qp (size:768b/96B) */
+/* cmdq_create_qp (size:832b/104B) */
struct cmdq_create_qp {
u8 opcode;
#define CMDQ_CREATE_QP_OPCODE_CREATE_QP 0x1UL
@@ -430,8 +430,11 @@ struct cmdq_create_qp {
#define CMDQ_CREATE_QP_QP_FLAGS_OPTIMIZED_TRANSMIT_ENABLED 0x20UL
#define CMDQ_CREATE_QP_QP_FLAGS_RESPONDER_UD_CQE_WITH_CFA 0x40UL
#define CMDQ_CREATE_QP_QP_FLAGS_EXT_STATS_ENABLED 0x80UL
+ #define CMDQ_CREATE_QP_QP_FLAGS_EXPRESS_MODE_ENABLED 0x100UL
+ #define CMDQ_CREATE_QP_QP_FLAGS_STEERING_TAG_VALID 0x200UL
+ #define CMDQ_CREATE_QP_QP_FLAGS_RDMA_READ_OR_ATOMICS_USED 0x400UL
#define CMDQ_CREATE_QP_QP_FLAGS_LAST \
- CMDQ_CREATE_QP_QP_FLAGS_EXT_STATS_ENABLED
+ CMDQ_CREATE_QP_QP_FLAGS_RDMA_READ_OR_ATOMICS_USED
u8 type;
#define CMDQ_CREATE_QP_TYPE_RC 0x2UL
#define CMDQ_CREATE_QP_TYPE_UD 0x4UL
@@ -492,6 +495,9 @@ struct cmdq_create_qp {
__le64 rq_pbl;
__le64 irrq_addr;
__le64 orrq_addr;
+ __le32 request_xid;
+ __le16 steering_tag;
+ __le16 reserved16;
};
/* creq_create_qp_resp (size:128b/16B) */
@@ -555,7 +561,12 @@ struct cmdq_modify_qp {
__le16 flags;
__le16 cookie;
u8 resp_size;
- u8 reserved8;
+ u8 qp_type;
+ #define CMDQ_MODIFY_QP_QP_TYPE_RC 0x2UL
+ #define CMDQ_MODIFY_QP_QP_TYPE_UD 0x4UL
+ #define CMDQ_MODIFY_QP_QP_TYPE_RAW_ETHERTYPE 0x6UL
+ #define CMDQ_MODIFY_QP_QP_TYPE_GSI 0x7UL
+ #define CMDQ_MODIFY_QP_QP_TYPE_LAST CMDQ_MODIFY_QP_QP_TYPE_GSI
__le64 resp_addr;
__le32 modify_mask;
#define CMDQ_MODIFY_QP_MODIFY_MASK_STATE 0x1UL
@@ -611,14 +622,12 @@ struct cmdq_modify_qp {
#define CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV6 (0x3UL << 6)
#define CMDQ_MODIFY_QP_NETWORK_TYPE_LAST CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV6
u8 access;
- #define CMDQ_MODIFY_QP_ACCESS_REMOTE_ATOMIC_REMOTE_READ_REMOTE_WRITE_LOCAL_WRITE_MASK \
- 0xffUL
- #define CMDQ_MODIFY_QP_ACCESS_REMOTE_ATOMIC_REMOTE_READ_REMOTE_WRITE_LOCAL_WRITE_SFT \
- 0
- #define CMDQ_MODIFY_QP_ACCESS_LOCAL_WRITE 0x1UL
- #define CMDQ_MODIFY_QP_ACCESS_REMOTE_WRITE 0x2UL
- #define CMDQ_MODIFY_QP_ACCESS_REMOTE_READ 0x4UL
- #define CMDQ_MODIFY_QP_ACCESS_REMOTE_ATOMIC 0x8UL
+ #define CMDQ_MODIFY_QP_ACCESS_REMOTE_ATOMIC_REMOTE_READ_REMOTE_WRITE_LOCAL_WRITE_MASK 0xffUL
+ #define CMDQ_MODIFY_QP_ACCESS_REMOTE_ATOMIC_REMOTE_READ_REMOTE_WRITE_LOCAL_WRITE_SFT 0
+ #define CMDQ_MODIFY_QP_ACCESS_LOCAL_WRITE 0x1UL
+ #define CMDQ_MODIFY_QP_ACCESS_REMOTE_WRITE 0x2UL
+ #define CMDQ_MODIFY_QP_ACCESS_REMOTE_READ 0x4UL
+ #define CMDQ_MODIFY_QP_ACCESS_REMOTE_ATOMIC 0x8UL
__le16 pkey;
__le32 qkey;
__le32 dgid[4];
@@ -673,6 +682,13 @@ struct cmdq_modify_qp {
#define CMDQ_MODIFY_QP_VLAN_PCP_SFT 13
__le64 irrq_addr;
__le64 orrq_addr;
+ __le32 ext_modify_mask;
+ #define CMDQ_MODIFY_QP_EXT_MODIFY_MASK_EXT_STATS_CTX 0x1UL
+ #define CMDQ_MODIFY_QP_EXT_MODIFY_MASK_SCHQ_ID_VALID 0x2UL
+ __le32 ext_stats_ctx_id;
+ __le16 schq_id;
+ __le16 unused_0;
+ __le32 reserved32;
};
/* creq_modify_qp_resp (size:128b/16B) */
@@ -962,13 +978,14 @@ struct creq_query_qp_extend_resp_sb_tlv {
__le16 reserved_16;
};
-/* cmdq_create_srq (size:384b/48B) */
+/* cmdq_create_srq (size:448b/56B) */
struct cmdq_create_srq {
u8 opcode;
#define CMDQ_CREATE_SRQ_OPCODE_CREATE_SRQ 0x5UL
#define CMDQ_CREATE_SRQ_OPCODE_LAST CMDQ_CREATE_SRQ_OPCODE_CREATE_SRQ
u8 cmd_size;
__le16 flags;
+ #define CMDQ_CREATE_SRQ_FLAGS_STEERING_TAG_VALID 0x1UL
__le16 cookie;
u8 resp_size;
u8 reserved8;
@@ -1002,6 +1019,8 @@ struct cmdq_create_srq {
__le32 dpi;
__le32 pd_id;
__le64 pbl;
+ __le16 steering_tag;
+ u8 reserved48[6];
};
/* creq_create_srq_resp (size:128b/16B) */
@@ -1108,7 +1127,7 @@ struct creq_query_srq_resp_sb {
__le32 data[4];
};
-/* cmdq_create_cq (size:384b/48B) */
+/* cmdq_create_cq (size:448b/56B) */
struct cmdq_create_cq {
u8 opcode;
#define CMDQ_CREATE_CQ_OPCODE_CREATE_CQ 0x9UL
@@ -1116,6 +1135,8 @@ struct cmdq_create_cq {
u8 cmd_size;
__le16 flags;
#define CMDQ_CREATE_CQ_FLAGS_DISABLE_CQ_OVERFLOW_DETECTION 0x1UL
+ #define CMDQ_CREATE_CQ_FLAGS_STEERING_TAG_VALID 0x2UL
+ #define CMDQ_CREATE_CQ_FLAGS_INFINITE_CQ_MODE 0x4UL
__le16 cookie;
u8 resp_size;
u8 reserved8;
@@ -1147,6 +1168,8 @@ struct cmdq_create_cq {
__le32 dpi;
__le32 cq_size;
__le64 pbl;
+ __le16 steering_tag;
+ u8 reserved48[6];
};
/* creq_create_cq_resp (size:128b/16B) */
@@ -1278,11 +1301,12 @@ struct cmdq_allocate_mrw {
#define CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2A 0x3UL
#define CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2B 0x4UL
#define CMDQ_ALLOCATE_MRW_MRW_FLAGS_LAST CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2B
- #define CMDQ_ALLOCATE_MRW_UNUSED4_MASK 0xf0UL
- #define CMDQ_ALLOCATE_MRW_UNUSED4_SFT 4
+ #define CMDQ_ALLOCATE_MRW_STEERING_TAG_VALID 0x10UL
+ #define CMDQ_ALLOCATE_MRW_UNUSED4_MASK 0xe0UL
+ #define CMDQ_ALLOCATE_MRW_UNUSED4_SFT 5
u8 access;
#define CMDQ_ALLOCATE_MRW_ACCESS_CONSUMER_OWNED_KEY 0x20UL
- __le16 unused16;
+ __le16 steering_tag;
__le32 pd_id;
};
@@ -1349,14 +1373,16 @@ struct creq_deallocate_key_resp {
__le32 bound_window_info;
};
-/* cmdq_register_mr (size:384b/48B) */
+/* cmdq_register_mr (size:448b/56B) */
struct cmdq_register_mr {
u8 opcode;
#define CMDQ_REGISTER_MR_OPCODE_REGISTER_MR 0xfUL
#define CMDQ_REGISTER_MR_OPCODE_LAST CMDQ_REGISTER_MR_OPCODE_REGISTER_MR
u8 cmd_size;
__le16 flags;
- #define CMDQ_REGISTER_MR_FLAGS_ALLOC_MR 0x1UL
+ #define CMDQ_REGISTER_MR_FLAGS_ALLOC_MR 0x1UL
+ #define CMDQ_REGISTER_MR_FLAGS_STEERING_TAG_VALID 0x2UL
+ #define CMDQ_REGISTER_MR_FLAGS_ENABLE_RO 0x4UL
__le16 cookie;
u8 resp_size;
u8 reserved8;
@@ -1405,6 +1431,8 @@ struct cmdq_register_mr {
__le64 pbl;
__le64 va;
__le64 mr_size;
+ __le16 steering_tag;
+ u8 reserved48[6];
};
/* creq_register_mr_resp (size:128b/16B) */
@@ -2147,8 +2175,36 @@ struct creq_query_func_resp_sb {
__le32 tqm_alloc_reqs[12];
__le32 max_dpi;
u8 max_sge_var_wqe;
- u8 reserved_8;
+ u8 dev_cap_ext_flags;
+ #define CREQ_QUERY_FUNC_RESP_SB_ATOMIC_OPS_NOT_SUPPORTED 0x1UL
+ #define CREQ_QUERY_FUNC_RESP_SB_DRV_VERSION_RGTR_SUPPORTED 0x2UL
+ #define CREQ_QUERY_FUNC_RESP_SB_CREATE_QP_BATCH_SUPPORTED 0x4UL
+ #define CREQ_QUERY_FUNC_RESP_SB_DESTROY_QP_BATCH_SUPPORTED 0x8UL
+ #define CREQ_QUERY_FUNC_RESP_SB_ROCE_STATS_EXT_CTX_SUPPORTED 0x10UL
+ #define CREQ_QUERY_FUNC_RESP_SB_CREATE_SRQ_SGE_SUPPORTED 0x20UL
+ #define CREQ_QUERY_FUNC_RESP_SB_FIXED_SIZE_WQE_DISABLED 0x40UL
+ #define CREQ_QUERY_FUNC_RESP_SB_DCN_SUPPORTED 0x80UL
__le16 max_inline_data_var_wqe;
+ __le32 start_qid;
+ u8 max_msn_table_size;
+ u8 reserved8_1;
+ __le16 dev_cap_ext_flags_2;
+ #define CREQ_QUERY_FUNC_RESP_SB_OPTIMIZE_MODIFY_QP_SUPPORTED 0x1UL
+ #define CREQ_QUERY_FUNC_RESP_SB_CHANGE_UDP_SRC_PORT_WQE_SUPPORTED 0x2UL
+ #define CREQ_QUERY_FUNC_RESP_SB_CQ_COALESCING_SUPPORTED 0x4UL
+ #define CREQ_QUERY_FUNC_RESP_SB_MEMORY_REGION_RO_SUPPORTED 0x8UL
+ #define CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_MASK 0x30UL
+ #define CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_SFT 4
+ #define CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_HOST_PSN_TABLE (0x0UL << 4)
+ #define CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_HOST_MSN_TABLE (0x1UL << 4)
+ #define CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_IQM_MSN_TABLE (0x2UL << 4)
+ #define CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_LAST \
+ CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_IQM_MSN_TABLE
+ __le16 max_xp_qp_size;
+ __le16 create_qp_batch_size;
+ __le16 destroy_qp_batch_size;
+ __le16 reserved16;
+ __le64 reserved64;
};
/* cmdq_set_func_resources (size:448b/56B) */
@@ -2919,6 +2975,35 @@ struct creq_qp_error_notification {
u8 status;
u8 req_slow_path_state;
u8 req_err_state_reason;
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_NO_ERROR 0X0UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_OPCODE_ERROR 0X1UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_TIMEOUT_RETRY_LIMIT 0X2UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_RNR_TIMEOUT_RETRY_LIMIT 0X3UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_NAK_ARRIVAL_1 0X4UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_NAK_ARRIVAL_2 0X5UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_NAK_ARRIVAL_3 0X6UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_NAK_ARRIVAL_4 0X7UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_RX_MEMORY_ERROR 0X8UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_TX_MEMORY_ERROR 0X9UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_READ_RESP_LENGTH 0XAUL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_INVALID_READ_RESP 0XBUL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_ILLEGAL_BIND 0XCUL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_ILLEGAL_FAST_REG 0XDUL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_ILLEGAL_INVALIDATE 0XEUL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_CMP_ERROR 0XFUL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_RETRAN_LOCAL_ERROR 0X10UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_WQE_FORMAT_ERROR 0X11UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_ORRQ_FORMAT_ERROR 0X12UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_INVALID_AVID_ERROR 0X13UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_AV_DOMAIN_ERROR 0X14UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_CQ_LOAD_ERROR 0X15UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_SERV_TYPE_ERROR 0X16UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_INVALID_OP_ERROR 0X17UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_TX_PCI_ERROR 0X18UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_RX_PCI_ERROR 0X19UL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_PROD_WQE_MSMTCH_ERROR 0X1AUL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_PSN_RANGE_CHECK_ERROR 0X1BUL
+ #define CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_RETX_SETUP_ERROR 0X1CUL
__le32 xid;
u8 v;
#define CREQ_QP_ERROR_NOTIFICATION_V 0x1UL
@@ -2928,6 +3013,35 @@ struct creq_qp_error_notification {
CREQ_QP_ERROR_NOTIFICATION_EVENT_QP_ERROR_NOTIFICATION
u8 res_slow_path_state;
u8 res_err_state_reason;
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_NO_ERROR 0x0UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_EXCEED_MAX 0x1UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_PAYLOAD_LENGTH_MISMATCH 0x2UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_EXCEEDS_WQE 0x3UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_OPCODE_ERROR 0x4UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_PSN_SEQ_ERROR_RETRY_LIMIT 0x5UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_RX_INVALID_R_KEY 0x6UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_RX_DOMAIN_ERROR 0x7UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_RX_NO_PERMISSION 0x8UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_RX_RANGE_ERROR 0x9UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_TX_INVALID_R_KEY 0xaUL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_TX_DOMAIN_ERROR 0xbUL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_TX_NO_PERMISSION 0xcUL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_TX_RANGE_ERROR 0xdUL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_IRRQ_OFLOW 0xeUL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_UNSUPPORTED_OPCODE 0xfUL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_UNALIGN_ATOMIC 0x10UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_REM_INVALIDATE 0x11UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_MEMORY_ERROR 0x12UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_SRQ_ERROR 0x13UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_CMP_ERROR 0x14UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_INVALID_DUP_RKEY 0x15UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_WQE_FORMAT_ERROR 0x16UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_IRRQ_FORMAT_ERROR 0x17UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_CQ_LOAD_ERROR 0x18UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_SRQ_LOAD_ERROR 0x19UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_TX_PCI_ERROR 0x1bUL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_RX_PCI_ERROR 0x1cUL
+ #define CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_PSN_NOT_FOUND 0x1dUL
__le16 sq_cons_idx;
__le16 rq_cons_idx;
};
@@ -3017,6 +3131,17 @@ struct sq_psn_search_ext {
__le32 reserved32;
};
+/* sq_msn_search (size:64b/8B) */
+struct sq_msn_search {
+ __le64 start_idx_next_psn_start_psn;
+ #define SQ_MSN_SEARCH_START_PSN_MASK 0xffffffUL
+ #define SQ_MSN_SEARCH_START_PSN_SFT 0
+ #define SQ_MSN_SEARCH_NEXT_PSN_MASK 0xffffff000000ULL
+ #define SQ_MSN_SEARCH_NEXT_PSN_SFT 24
+ #define SQ_MSN_SEARCH_START_IDX_MASK 0xffff000000000000ULL
+ #define SQ_MSN_SEARCH_START_IDX_SFT 48
+};
+
/* sq_send (size:1024b/128B) */
struct sq_send {
u8 wqe_type;
@@ -3705,13 +3830,35 @@ struct cq_base {
#define CQ_BASE_CQE_TYPE_RES_UD (0x2UL << 1)
#define CQ_BASE_CQE_TYPE_RES_RAWETH_QP1 (0x3UL << 1)
#define CQ_BASE_CQE_TYPE_RES_UD_CFA (0x4UL << 1)
+ #define CQ_BASE_CQE_TYPE_REQ_V3 (0x8UL << 1)
+ #define CQ_BASE_CQE_TYPE_RES_RC_V3 (0x9UL << 1)
+ #define CQ_BASE_CQE_TYPE_RES_UD_V3 (0xaUL << 1)
+ #define CQ_BASE_CQE_TYPE_RES_RAWETH_QP1_V3 (0xbUL << 1)
+ #define CQ_BASE_CQE_TYPE_RES_UD_CFA_V3 (0xcUL << 1)
#define CQ_BASE_CQE_TYPE_NO_OP (0xdUL << 1)
#define CQ_BASE_CQE_TYPE_TERMINAL (0xeUL << 1)
#define CQ_BASE_CQE_TYPE_CUT_OFF (0xfUL << 1)
#define CQ_BASE_CQE_TYPE_LAST CQ_BASE_CQE_TYPE_CUT_OFF
u8 status;
+ #define CQ_BASE_STATUS_OK 0x0UL
+ #define CQ_BASE_STATUS_BAD_RESPONSE_ERR 0x1UL
+ #define CQ_BASE_STATUS_LOCAL_LENGTH_ERR 0x2UL
+ #define CQ_BASE_STATUS_HW_LOCAL_LENGTH_ERR 0x3UL
+ #define CQ_BASE_STATUS_LOCAL_QP_OPERATION_ERR 0x4UL
+ #define CQ_BASE_STATUS_LOCAL_PROTECTION_ERR 0x5UL
+ #define CQ_BASE_STATUS_LOCAL_ACCESS_ERROR 0x6UL
+ #define CQ_BASE_STATUS_MEMORY_MGT_OPERATION_ERR 0x7UL
+ #define CQ_BASE_STATUS_REMOTE_INVALID_REQUEST_ERR 0x8UL
+ #define CQ_BASE_STATUS_REMOTE_ACCESS_ERR 0x9UL
+ #define CQ_BASE_STATUS_REMOTE_OPERATION_ERR 0xaUL
+ #define CQ_BASE_STATUS_RNR_NAK_RETRY_CNT_ERR 0xbUL
+ #define CQ_BASE_STATUS_TRANSPORT_RETRY_CNT_ERR 0xcUL
+ #define CQ_BASE_STATUS_WORK_REQUEST_FLUSHED_ERR 0xdUL
+ #define CQ_BASE_STATUS_HW_FLUSH_ERR 0xeUL
+ #define CQ_BASE_STATUS_OVERFLOW_ERR 0xfUL
+ #define CQ_BASE_STATUS_LAST CQ_BASE_STATUS_OVERFLOW_ERR
__le16 reserved16;
- __le32 reserved32;
+ __le32 opaque;
};
/* cq_req (size:256b/32B) */
@@ -4326,6 +4473,8 @@ struct cq_cutoff {
#define CQ_CUTOFF_CQE_TYPE_SFT 1
#define CQ_CUTOFF_CQE_TYPE_CUT_OFF (0xfUL << 1)
#define CQ_CUTOFF_CQE_TYPE_LAST CQ_CUTOFF_CQE_TYPE_CUT_OFF
+ #define CQ_CUTOFF_RESIZE_TOGGLE_MASK 0x60UL
+ #define CQ_CUTOFF_RESIZE_TOGGLE_SFT 5
u8 status;
#define CQ_CUTOFF_STATUS_OK 0x0UL
#define CQ_CUTOFF_STATUS_LAST CQ_CUTOFF_STATUS_OK
@@ -4377,6 +4526,8 @@ struct nq_srq_event {
#define NQ_SRQ_EVENT_TYPE_SFT 0
#define NQ_SRQ_EVENT_TYPE_SRQ_EVENT 0x32UL
#define NQ_SRQ_EVENT_TYPE_LAST NQ_SRQ_EVENT_TYPE_SRQ_EVENT
+ #define NQ_SRQ_EVENT_TOGGLE_MASK 0xc0UL
+ #define NQ_SRQ_EVENT_TOGGLE_SFT 6
u8 event;
#define NQ_SRQ_EVENT_EVENT_SRQ_THRESHOLD_EVENT 0x1UL
#define NQ_SRQ_EVENT_EVENT_LAST NQ_SRQ_EVENT_EVENT_SRQ_THRESHOLD_EVENT
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index ced615b5ea09..b3757c6a0457 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -1222,6 +1222,8 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb)
int ret;
ep = lookup_atid(t, atid);
+ if (!ep)
+ return -EINVAL;
pr_debug("ep %p tid %u snd_isn %u rcv_isn %u\n", ep, tid,
be32_to_cpu(req->snd_isn), be32_to_cpu(req->rcv_isn));
@@ -1965,6 +1967,9 @@ static int send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
int win;
skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+
req = __skb_put_zero(skb, sizeof(*req));
req->op_compl = htonl(WR_OP_V(FW_OFLD_CONNECTION_WR));
req->len16_pkd = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*req), 16)));
@@ -2276,6 +2281,9 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
int ret = 0;
ep = lookup_atid(t, atid);
+ if (!ep)
+ return -EINVAL;
+
la = (struct sockaddr_in *)&ep->com.local_addr;
ra = (struct sockaddr_in *)&ep->com.remote_addr;
la6 = (struct sockaddr_in6 *)&ep->com.local_addr;
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index 7e2835dcbc1c..14ced7b667fa 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -995,8 +995,9 @@ int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
}
int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
- struct ib_udata *udata)
+ struct uverbs_attr_bundle *attrs)
{
+ struct ib_udata *udata = &attrs->driver_udata;
struct ib_device *ibdev = ibcq->device;
int entries = attr->cqe;
int vector = attr->comp_vector;
@@ -1125,13 +1126,19 @@ int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
goto err_free_mm2;
mm->key = uresp.key;
- mm->addr = virt_to_phys(chp->cq.queue);
+ mm->addr = 0;
+ mm->vaddr = chp->cq.queue;
+ mm->dma_addr = chp->cq.dma_addr;
mm->len = chp->cq.memsize;
+ insert_flag_to_mmap(&rhp->rdev, mm, mm->addr);
insert_mmap(ucontext, mm);
mm2->key = uresp.gts_key;
mm2->addr = chp->cq.bar2_pa;
mm2->len = PAGE_SIZE;
+ mm2->vaddr = NULL;
+ mm2->dma_addr = 0;
+ insert_flag_to_mmap(&rhp->rdev, mm2, mm2->addr);
insert_mmap(ucontext, mm2);
}
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 50cb2259bf87..5b3007acaa1f 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -532,11 +532,21 @@ static inline struct c4iw_ucontext *to_c4iw_ucontext(struct ib_ucontext *c)
return container_of(c, struct c4iw_ucontext, ibucontext);
}
+enum {
+ CXGB4_MMAP_BAR,
+ CXGB4_MMAP_BAR_WC,
+ CXGB4_MMAP_CONTIG,
+ CXGB4_MMAP_NON_CONTIG,
+};
+
struct c4iw_mm_entry {
struct list_head entry;
u64 addr;
u32 key;
+ void *vaddr;
+ dma_addr_t dma_addr;
unsigned len;
+ u8 mmap_flag;
};
static inline struct c4iw_mm_entry *remove_mmap(struct c4iw_ucontext *ucontext,
@@ -561,6 +571,32 @@ static inline struct c4iw_mm_entry *remove_mmap(struct c4iw_ucontext *ucontext,
return NULL;
}
+static inline void insert_flag_to_mmap(struct c4iw_rdev *rdev,
+ struct c4iw_mm_entry *mm, u64 addr)
+{
+ if (addr >= pci_resource_start(rdev->lldi.pdev, 0) &&
+ (addr < (pci_resource_start(rdev->lldi.pdev, 0) +
+ pci_resource_len(rdev->lldi.pdev, 0))))
+ mm->mmap_flag = CXGB4_MMAP_BAR;
+ else if (addr >= pci_resource_start(rdev->lldi.pdev, 2) &&
+ (addr < (pci_resource_start(rdev->lldi.pdev, 2) +
+ pci_resource_len(rdev->lldi.pdev, 2)))) {
+ if (addr >= rdev->oc_mw_pa) {
+ mm->mmap_flag = CXGB4_MMAP_BAR_WC;
+ } else {
+ if (is_t4(rdev->lldi.adapter_type))
+ mm->mmap_flag = CXGB4_MMAP_BAR;
+ else
+ mm->mmap_flag = CXGB4_MMAP_BAR_WC;
+ }
+ } else {
+ if (addr)
+ mm->mmap_flag = CXGB4_MMAP_CONTIG;
+ else
+ mm->mmap_flag = CXGB4_MMAP_NON_CONTIG;
+ }
+}
+
static inline void insert_mmap(struct c4iw_ucontext *ucontext,
struct c4iw_mm_entry *mm)
{
@@ -930,15 +966,12 @@ void c4iw_id_table_free(struct c4iw_id_table *alloc);
typedef int (*c4iw_handler_func)(struct c4iw_dev *dev, struct sk_buff *skb);
-int c4iw_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new,
- struct l2t_entry *l2t);
void c4iw_put_qpid(struct c4iw_rdev *rdev, u32 qpid,
struct c4iw_dev_ucontext *uctx);
u32 c4iw_get_resource(struct c4iw_id_table *id_table);
void c4iw_put_resource(struct c4iw_id_table *id_table, u32 entry);
int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt,
u32 nr_pdid, u32 nr_srqt);
-int c4iw_init_ctrl_qp(struct c4iw_rdev *rdev);
int c4iw_pblpool_create(struct c4iw_rdev *rdev);
int c4iw_rqtpool_create(struct c4iw_rdev *rdev);
int c4iw_ocqp_pool_create(struct c4iw_rdev *rdev);
@@ -946,7 +979,6 @@ void c4iw_pblpool_destroy(struct c4iw_rdev *rdev);
void c4iw_rqtpool_destroy(struct c4iw_rdev *rdev);
void c4iw_ocqp_pool_destroy(struct c4iw_rdev *rdev);
void c4iw_destroy_resource(struct c4iw_resource *rscp);
-int c4iw_destroy_ctrl_qp(struct c4iw_rdev *rdev);
void c4iw_register_device(struct work_struct *work);
void c4iw_unregister_device(struct c4iw_dev *dev);
int __init c4iw_cm_init(void);
@@ -980,7 +1012,7 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata);
int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata);
void c4iw_cq_rem_ref(struct c4iw_cq *chp);
int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
- struct ib_udata *udata);
+ struct uverbs_attr_bundle *attrs);
int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
int c4iw_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *attr,
enum ib_srq_attr_mask srq_attr_mask,
@@ -1008,8 +1040,6 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp);
int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count);
int c4iw_flush_sq(struct c4iw_qp *qhp);
int c4iw_ev_handler(struct c4iw_dev *rnicp, u32 qid);
-u16 c4iw_rqes_posted(struct c4iw_qp *qhp);
-int c4iw_post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe);
u32 c4iw_get_cqid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx);
void c4iw_put_cqid(struct c4iw_rdev *rdev, u32 qid,
struct c4iw_dev_ucontext *uctx);
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index 246b739ddb2b..10a4c738b59f 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -113,6 +113,9 @@ static int c4iw_alloc_ucontext(struct ib_ucontext *ucontext,
mm->key = uresp.status_page_key;
mm->addr = virt_to_phys(rhp->rdev.status_page);
mm->len = PAGE_SIZE;
+ mm->vaddr = NULL;
+ mm->dma_addr = 0;
+ insert_flag_to_mmap(&rhp->rdev, mm, mm->addr);
insert_mmap(context, mm);
}
return 0;
@@ -131,6 +134,11 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
struct c4iw_mm_entry *mm;
struct c4iw_ucontext *ucontext;
u64 addr;
+ u8 mmap_flag;
+ size_t size;
+ void *vaddr;
+ unsigned long vm_pgoff;
+ dma_addr_t dma_addr;
pr_debug("pgoff 0x%lx key 0x%x len %d\n", vma->vm_pgoff,
key, len);
@@ -145,47 +153,38 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
if (!mm)
return -EINVAL;
addr = mm->addr;
+ vaddr = mm->vaddr;
+ dma_addr = mm->dma_addr;
+ size = mm->len;
+ mmap_flag = mm->mmap_flag;
kfree(mm);
- if ((addr >= pci_resource_start(rdev->lldi.pdev, 0)) &&
- (addr < (pci_resource_start(rdev->lldi.pdev, 0) +
- pci_resource_len(rdev->lldi.pdev, 0)))) {
-
- /*
- * MA_SYNC register...
- */
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ switch (mmap_flag) {
+ case CXGB4_MMAP_BAR:
+ ret = io_remap_pfn_range(vma, vma->vm_start, addr >> PAGE_SHIFT,
+ len,
+ pgprot_noncached(vma->vm_page_prot));
+ break;
+ case CXGB4_MMAP_BAR_WC:
ret = io_remap_pfn_range(vma, vma->vm_start,
addr >> PAGE_SHIFT,
- len, vma->vm_page_prot);
- } else if ((addr >= pci_resource_start(rdev->lldi.pdev, 2)) &&
- (addr < (pci_resource_start(rdev->lldi.pdev, 2) +
- pci_resource_len(rdev->lldi.pdev, 2)))) {
-
- /*
- * Map user DB or OCQP memory...
- */
- if (addr >= rdev->oc_mw_pa)
- vma->vm_page_prot = t4_pgprot_wc(vma->vm_page_prot);
- else {
- if (!is_t4(rdev->lldi.adapter_type))
- vma->vm_page_prot =
- t4_pgprot_wc(vma->vm_page_prot);
- else
- vma->vm_page_prot =
- pgprot_noncached(vma->vm_page_prot);
- }
+ len, t4_pgprot_wc(vma->vm_page_prot));
+ break;
+ case CXGB4_MMAP_CONTIG:
ret = io_remap_pfn_range(vma, vma->vm_start,
addr >> PAGE_SHIFT,
len, vma->vm_page_prot);
- } else {
-
- /*
- * Map WQ or CQ contig dma memory...
- */
- ret = remap_pfn_range(vma, vma->vm_start,
- addr >> PAGE_SHIFT,
- len, vma->vm_page_prot);
+ break;
+ case CXGB4_MMAP_NON_CONTIG:
+ vm_pgoff = vma->vm_pgoff;
+ vma->vm_pgoff = 0;
+ ret = dma_mmap_coherent(&rdev->lldi.pdev->dev, vma,
+ vaddr, dma_addr, size);
+ vma->vm_pgoff = vm_pgoff;
+ break;
+ default:
+ ret = -EINVAL;
+ break;
}
return ret;
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index ffbd9a89981e..7b5c4522b426 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -2281,24 +2281,39 @@ int c4iw_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *attrs,
if (ret)
goto err_free_ma_sync_key;
sq_key_mm->key = uresp.sq_key;
- sq_key_mm->addr = qhp->wq.sq.phys_addr;
+ sq_key_mm->addr = 0;
+ sq_key_mm->vaddr = qhp->wq.sq.queue;
+ sq_key_mm->dma_addr = qhp->wq.sq.dma_addr;
sq_key_mm->len = PAGE_ALIGN(qhp->wq.sq.memsize);
+ insert_flag_to_mmap(&rhp->rdev, sq_key_mm, sq_key_mm->addr);
insert_mmap(ucontext, sq_key_mm);
if (!attrs->srq) {
rq_key_mm->key = uresp.rq_key;
- rq_key_mm->addr = virt_to_phys(qhp->wq.rq.queue);
+ rq_key_mm->addr = 0;
+ rq_key_mm->vaddr = qhp->wq.rq.queue;
+ rq_key_mm->dma_addr = qhp->wq.rq.dma_addr;
rq_key_mm->len = PAGE_ALIGN(qhp->wq.rq.memsize);
+ insert_flag_to_mmap(&rhp->rdev, rq_key_mm,
+ rq_key_mm->addr);
insert_mmap(ucontext, rq_key_mm);
}
sq_db_key_mm->key = uresp.sq_db_gts_key;
sq_db_key_mm->addr = (u64)(unsigned long)qhp->wq.sq.bar2_pa;
+ sq_db_key_mm->vaddr = NULL;
+ sq_db_key_mm->dma_addr = 0;
sq_db_key_mm->len = PAGE_SIZE;
+ insert_flag_to_mmap(&rhp->rdev, sq_db_key_mm,
+ sq_db_key_mm->addr);
insert_mmap(ucontext, sq_db_key_mm);
if (!attrs->srq) {
rq_db_key_mm->key = uresp.rq_db_gts_key;
rq_db_key_mm->addr =
(u64)(unsigned long)qhp->wq.rq.bar2_pa;
rq_db_key_mm->len = PAGE_SIZE;
+ rq_db_key_mm->vaddr = NULL;
+ rq_db_key_mm->dma_addr = 0;
+ insert_flag_to_mmap(&rhp->rdev, rq_db_key_mm,
+ rq_db_key_mm->addr);
insert_mmap(ucontext, rq_db_key_mm);
}
if (ma_sync_key_mm) {
@@ -2307,6 +2322,10 @@ int c4iw_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *attrs,
(pci_resource_start(rhp->rdev.lldi.pdev, 0) +
PCIE_MA_SYNC_A) & PAGE_MASK;
ma_sync_key_mm->len = PAGE_SIZE;
+ ma_sync_key_mm->vaddr = NULL;
+ ma_sync_key_mm->dma_addr = 0;
+ insert_flag_to_mmap(&rhp->rdev, ma_sync_key_mm,
+ ma_sync_key_mm->addr);
insert_mmap(ucontext, ma_sync_key_mm);
}
@@ -2466,7 +2485,7 @@ int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
init_attr->cap.max_send_sge = qhp->attr.sq_max_sges;
init_attr->cap.max_recv_sge = qhp->attr.rq_max_sges;
init_attr->cap.max_inline_data = T4_MAX_SEND_INLINE;
- init_attr->sq_sig_type = qhp->sq_sig_all ? IB_SIGNAL_ALL_WR : 0;
+ init_attr->sq_sig_type = qhp->sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
return 0;
}
@@ -2761,12 +2780,19 @@ int c4iw_create_srq(struct ib_srq *ib_srq, struct ib_srq_init_attr *attrs,
if (ret)
goto err_free_srq_db_key_mm;
srq_key_mm->key = uresp.srq_key;
- srq_key_mm->addr = virt_to_phys(srq->wq.queue);
+ srq_key_mm->addr = 0;
srq_key_mm->len = PAGE_ALIGN(srq->wq.memsize);
+ srq_key_mm->vaddr = srq->wq.queue;
+ srq_key_mm->dma_addr = srq->wq.dma_addr;
+ insert_flag_to_mmap(&rhp->rdev, srq_key_mm, srq_key_mm->addr);
insert_mmap(ucontext, srq_key_mm);
srq_db_key_mm->key = uresp.srq_db_gts_key;
srq_db_key_mm->addr = (u64)(unsigned long)srq->wq.bar2_pa;
srq_db_key_mm->len = PAGE_SIZE;
+ srq_db_key_mm->vaddr = NULL;
+ srq_db_key_mm->dma_addr = 0;
+ insert_flag_to_mmap(&rhp->rdev, srq_db_key_mm,
+ srq_db_key_mm->addr);
insert_mmap(ucontext, srq_db_key_mm);
}
diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h
index 7352a1f5d811..d7fc9d5eeefd 100644
--- a/drivers/infiniband/hw/efa/efa.h
+++ b/drivers/infiniband/hw/efa/efa.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
/*
- * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#ifndef _EFA_H_
@@ -57,6 +57,7 @@ struct efa_dev {
u64 db_bar_addr;
u64 db_bar_len;
+ unsigned int num_irq_vectors;
int admin_msix_vector_idx;
struct efa_irq admin_irq;
@@ -80,9 +81,19 @@ struct efa_pd {
u16 pdn;
};
+struct efa_mr_interconnect_info {
+ u16 recv_ic_id;
+ u16 rdma_read_ic_id;
+ u16 rdma_recv_ic_id;
+ u8 recv_ic_id_valid : 1;
+ u8 rdma_read_ic_id_valid : 1;
+ u8 rdma_recv_ic_id_valid : 1;
+};
+
struct efa_mr {
struct ib_mr ibmr;
struct ib_umem *umem;
+ struct efa_mr_interconnect_info ic_info;
};
struct efa_cq {
@@ -150,14 +161,14 @@ int efa_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr,
struct ib_udata *udata);
int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
- struct ib_udata *udata);
+ struct uverbs_attr_bundle *attrs);
struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length,
u64 virt_addr, int access_flags,
struct ib_udata *udata);
struct ib_mr *efa_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start,
u64 length, u64 virt_addr,
int fd, int access_flags,
- struct ib_udata *udata);
+ struct uverbs_attr_bundle *attrs);
int efa_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
int efa_get_port_immutable(struct ib_device *ibdev, u32 port_num,
struct ib_port_immutable *immutable);
diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
index 4e93ef7f84ee..cd03a5429beb 100644
--- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
+++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
/*
- * Copyright 2018-2023 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#ifndef _EFA_ADMIN_CMDS_H_
@@ -66,6 +66,7 @@ enum efa_admin_get_stats_type {
EFA_ADMIN_GET_STATS_TYPE_BASIC = 0,
EFA_ADMIN_GET_STATS_TYPE_MESSAGES = 1,
EFA_ADMIN_GET_STATS_TYPE_RDMA_READ = 2,
+ EFA_ADMIN_GET_STATS_TYPE_RDMA_WRITE = 3,
};
enum efa_admin_get_stats_scope {
@@ -109,7 +110,10 @@ struct efa_admin_create_qp_cmd {
* virtual (IOVA returned by MR registration)
* 1 : rq_virt - If set, RQ ring base address is
* virtual (IOVA returned by MR registration)
- * 7:2 : reserved - MBZ
+ * 2 : unsolicited_write_recv - If set, work requests
+ * will not be consumed for incoming RDMA write with
+ * immediate
+ * 7:3 : reserved - MBZ
*/
u8 flags;
@@ -414,6 +418,32 @@ struct efa_admin_reg_mr_resp {
* memory region
*/
u32 r_key;
+
+ /*
+ * Mask indicating which fields have valid values
+ * 0 : recv_ic_id
+ * 1 : rdma_read_ic_id
+ * 2 : rdma_recv_ic_id
+ */
+ u8 validity;
+
+ /*
+ * Physical interconnect used by the device to reach the MR for receive
+ * operation
+ */
+ u8 recv_ic_id;
+
+ /*
+ * Physical interconnect used by the device to reach the MR for RDMA
+ * read operation
+ */
+ u8 rdma_read_ic_id;
+
+ /*
+ * Physical interconnect used by the device to reach the MR for RDMA
+ * write receive
+ */
+ u8 rdma_recv_ic_id;
};
struct efa_admin_dereg_mr_cmd {
@@ -570,6 +600,16 @@ struct efa_admin_rdma_read_stats {
u64 read_resp_bytes;
};
+struct efa_admin_rdma_write_stats {
+ u64 write_wrs;
+
+ u64 write_bytes;
+
+ u64 write_wr_err;
+
+ u64 write_recv_bytes;
+};
+
struct efa_admin_acq_get_stats_resp {
struct efa_admin_acq_common_desc acq_common_desc;
@@ -579,6 +619,8 @@ struct efa_admin_acq_get_stats_resp {
struct efa_admin_messages_stats messages_stats;
struct efa_admin_rdma_read_stats rdma_read_stats;
+
+ struct efa_admin_rdma_write_stats rdma_write_stats;
} u;
};
@@ -624,12 +666,17 @@ struct efa_admin_feature_device_attr_desc {
* polling is supported
* 3 : rdma_write - If set, RDMA Write is supported
* on TX queues
- * 31:4 : reserved - MBZ
+ * 4 : unsolicited_write_recv - If set, unsolicited
+ * write with imm. receive is supported
+ * 31:5 : reserved - MBZ
*/
u32 device_caps;
/* Max RDMA transfer size in bytes */
u32 max_rdma_size;
+
+ /* Unique global ID for an EFA device */
+ u64 guid;
};
struct efa_admin_feature_queue_attr_desc {
@@ -970,6 +1017,7 @@ struct efa_admin_host_info {
/* create_qp_cmd */
#define EFA_ADMIN_CREATE_QP_CMD_SQ_VIRT_MASK BIT(0)
#define EFA_ADMIN_CREATE_QP_CMD_RQ_VIRT_MASK BIT(1)
+#define EFA_ADMIN_CREATE_QP_CMD_UNSOLICITED_WRITE_RECV_MASK BIT(2)
/* modify_qp_cmd */
#define EFA_ADMIN_MODIFY_QP_CMD_QP_STATE_MASK BIT(0)
@@ -986,6 +1034,11 @@ struct efa_admin_host_info {
#define EFA_ADMIN_REG_MR_CMD_REMOTE_WRITE_ENABLE_MASK BIT(1)
#define EFA_ADMIN_REG_MR_CMD_REMOTE_READ_ENABLE_MASK BIT(2)
+/* reg_mr_resp */
+#define EFA_ADMIN_REG_MR_RESP_RECV_IC_ID_MASK BIT(0)
+#define EFA_ADMIN_REG_MR_RESP_RDMA_READ_IC_ID_MASK BIT(1)
+#define EFA_ADMIN_REG_MR_RESP_RDMA_RECV_IC_ID_MASK BIT(2)
+
/* create_cq_cmd */
#define EFA_ADMIN_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_MASK BIT(5)
#define EFA_ADMIN_CREATE_CQ_CMD_VIRT_MASK BIT(6)
@@ -1000,6 +1053,7 @@ struct efa_admin_host_info {
#define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RNR_RETRY_MASK BIT(1)
#define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_DATA_POLLING_128_MASK BIT(2)
#define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_WRITE_MASK BIT(3)
+#define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_UNSOLICITED_WRITE_RECV_MASK BIT(4)
/* create_eq_cmd */
#define EFA_ADMIN_CREATE_EQ_CMD_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0)
diff --git a/drivers/infiniband/hw/efa/efa_com.c b/drivers/infiniband/hw/efa/efa_com.c
index 16a24a05fc2a..bafd210dd43e 100644
--- a/drivers/infiniband/hw/efa/efa_com.c
+++ b/drivers/infiniband/hw/efa/efa_com.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
/*
- * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#include "efa_com.h"
@@ -406,8 +406,8 @@ static struct efa_comp_ctx *efa_com_submit_admin_cmd(struct efa_com_admin_queue
return comp_ctx;
}
-static void efa_com_handle_single_admin_completion(struct efa_com_admin_queue *aq,
- struct efa_admin_acq_entry *cqe)
+static int efa_com_handle_single_admin_completion(struct efa_com_admin_queue *aq,
+ struct efa_admin_acq_entry *cqe)
{
struct efa_comp_ctx *comp_ctx;
u16 cmd_id;
@@ -416,11 +416,11 @@ static void efa_com_handle_single_admin_completion(struct efa_com_admin_queue *a
EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID);
comp_ctx = efa_com_get_comp_ctx(aq, cmd_id, false);
- if (!comp_ctx) {
+ if (comp_ctx->status != EFA_CMD_SUBMITTED) {
ibdev_err(aq->efa_dev,
- "comp_ctx is NULL. Changing the admin queue running state\n");
- clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
- return;
+ "Received completion with unexpected command id[%d], sq producer: %d, sq consumer: %d, cq consumer: %d\n",
+ cmd_id, aq->sq.pc, aq->sq.cc, aq->cq.cc);
+ return -EINVAL;
}
comp_ctx->status = EFA_CMD_COMPLETED;
@@ -428,14 +428,17 @@ static void efa_com_handle_single_admin_completion(struct efa_com_admin_queue *a
if (!test_bit(EFA_AQ_STATE_POLLING_BIT, &aq->state))
complete(&comp_ctx->wait_event);
+
+ return 0;
}
static void efa_com_handle_admin_completion(struct efa_com_admin_queue *aq)
{
struct efa_admin_acq_entry *cqe;
u16 queue_size_mask;
- u16 comp_num = 0;
+ u16 comp_cmds = 0;
u8 phase;
+ int err;
u16 ci;
queue_size_mask = aq->depth - 1;
@@ -453,10 +456,12 @@ static void efa_com_handle_admin_completion(struct efa_com_admin_queue *aq)
* phase bit was validated
*/
dma_rmb();
- efa_com_handle_single_admin_completion(aq, cqe);
+ err = efa_com_handle_single_admin_completion(aq, cqe);
+ if (!err)
+ comp_cmds++;
+ aq->cq.cc++;
ci++;
- comp_num++;
if (ci == aq->depth) {
ci = 0;
phase = !phase;
@@ -465,10 +470,9 @@ static void efa_com_handle_admin_completion(struct efa_com_admin_queue *aq)
cqe = &aq->cq.entries[ci];
}
- aq->cq.cc += comp_num;
aq->cq.phase = phase;
- aq->sq.cc += comp_num;
- atomic64_add(comp_num, &aq->stats.completed_cmd);
+ aq->sq.cc += comp_cmds;
+ atomic64_add(comp_cmds, &aq->stats.completed_cmd);
}
static int efa_com_comp_status_to_errno(u8 comp_status)
diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c
index 8f8885e002ba..5a774925cdea 100644
--- a/drivers/infiniband/hw/efa/efa_com_cmd.c
+++ b/drivers/infiniband/hw/efa/efa_com_cmd.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
/*
- * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#include "efa_com.h"
@@ -32,6 +32,9 @@ int efa_com_create_qp(struct efa_com_dev *edev,
params->rq_depth;
create_qp_cmd.uar = params->uarn;
+ if (params->unsolicited_write_recv)
+ EFA_SET(&create_qp_cmd.flags, EFA_ADMIN_CREATE_QP_CMD_UNSOLICITED_WRITE_RECV, 1);
+
err = efa_com_cmd_exec(aq,
(struct efa_admin_aq_entry *)&create_qp_cmd,
sizeof(create_qp_cmd),
@@ -270,6 +273,15 @@ int efa_com_register_mr(struct efa_com_dev *edev,
result->l_key = cmd_completion.l_key;
result->r_key = cmd_completion.r_key;
+ result->ic_info.recv_ic_id = cmd_completion.recv_ic_id;
+ result->ic_info.rdma_read_ic_id = cmd_completion.rdma_read_ic_id;
+ result->ic_info.rdma_recv_ic_id = cmd_completion.rdma_recv_ic_id;
+ result->ic_info.recv_ic_id_valid = EFA_GET(&cmd_completion.validity,
+ EFA_ADMIN_REG_MR_RESP_RECV_IC_ID);
+ result->ic_info.rdma_read_ic_id_valid = EFA_GET(&cmd_completion.validity,
+ EFA_ADMIN_REG_MR_RESP_RDMA_READ_IC_ID);
+ result->ic_info.rdma_recv_ic_id_valid = EFA_GET(&cmd_completion.validity,
+ EFA_ADMIN_REG_MR_RESP_RDMA_RECV_IC_ID);
return 0;
}
@@ -453,6 +465,7 @@ int efa_com_get_device_attr(struct efa_com_dev *edev,
result->db_bar = resp.u.device_attr.db_bar;
result->max_rdma_size = resp.u.device_attr.max_rdma_size;
result->device_caps = resp.u.device_attr.device_caps;
+ result->guid = resp.u.device_attr.guid;
if (result->admin_api_version < 1) {
ibdev_err_ratelimited(
@@ -794,6 +807,12 @@ int efa_com_get_stats(struct efa_com_dev *edev,
result->rdma_read_stats.read_wr_err = resp.u.rdma_read_stats.read_wr_err;
result->rdma_read_stats.read_resp_bytes = resp.u.rdma_read_stats.read_resp_bytes;
break;
+ case EFA_ADMIN_GET_STATS_TYPE_RDMA_WRITE:
+ result->rdma_write_stats.write_wrs = resp.u.rdma_write_stats.write_wrs;
+ result->rdma_write_stats.write_bytes = resp.u.rdma_write_stats.write_bytes;
+ result->rdma_write_stats.write_wr_err = resp.u.rdma_write_stats.write_wr_err;
+ result->rdma_write_stats.write_recv_bytes = resp.u.rdma_write_stats.write_recv_bytes;
+ break;
}
return 0;
diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h
index 0898ad5bc340..668d033f7477 100644
--- a/drivers/infiniband/hw/efa/efa_com_cmd.h
+++ b/drivers/infiniband/hw/efa/efa_com_cmd.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
/*
- * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#ifndef _EFA_COM_CMD_H_
@@ -27,6 +27,7 @@ struct efa_com_create_qp_params {
u16 pd;
u16 uarn;
u8 qp_type;
+ u8 unsolicited_write_recv : 1;
};
struct efa_com_create_qp_result {
@@ -111,6 +112,7 @@ struct efa_com_get_device_attr_result {
u8 addr[EFA_GID_SIZE];
u64 page_size_cap;
u64 max_mr_pages;
+ u64 guid;
u32 mtu;
u32 fw_version;
u32 admin_api_version;
@@ -199,6 +201,15 @@ struct efa_com_reg_mr_params {
u8 indirect;
};
+struct efa_com_mr_interconnect_info {
+ u16 recv_ic_id;
+ u16 rdma_read_ic_id;
+ u16 rdma_recv_ic_id;
+ u8 recv_ic_id_valid : 1;
+ u8 rdma_read_ic_id_valid : 1;
+ u8 rdma_recv_ic_id_valid : 1;
+};
+
struct efa_com_reg_mr_result {
/*
* To be used in conjunction with local buffers references in SQ and
@@ -210,6 +221,7 @@ struct efa_com_reg_mr_result {
* accessed memory region
*/
u32 r_key;
+ struct efa_com_mr_interconnect_info ic_info;
};
struct efa_com_dereg_mr_params {
@@ -262,10 +274,18 @@ struct efa_com_rdma_read_stats {
u64 read_resp_bytes;
};
+struct efa_com_rdma_write_stats {
+ u64 write_wrs;
+ u64 write_bytes;
+ u64 write_wr_err;
+ u64 write_recv_bytes;
+};
+
union efa_com_get_stats_result {
struct efa_com_basic_stats basic_stats;
struct efa_com_messages_stats messages_stats;
struct efa_com_rdma_read_stats rdma_read_stats;
+ struct efa_com_rdma_write_stats rdma_write_stats;
};
int efa_com_create_qp(struct efa_com_dev *edev,
diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c
index 15ee92081118..ad225823e6f2 100644
--- a/drivers/infiniband/hw/efa/efa_main.c
+++ b/drivers/infiniband/hw/efa/efa_main.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
/*
- * Copyright 2018-2022 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#include <linux/module.h>
@@ -9,17 +9,20 @@
#include <linux/version.h>
#include <rdma/ib_user_verbs.h>
+#include <rdma/uverbs_ioctl.h>
#include "efa.h"
#define PCI_DEV_ID_EFA0_VF 0xefa0
#define PCI_DEV_ID_EFA1_VF 0xefa1
#define PCI_DEV_ID_EFA2_VF 0xefa2
+#define PCI_DEV_ID_EFA3_VF 0xefa3
static const struct pci_device_id efa_pci_tbl[] = {
{ PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA0_VF) },
{ PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA1_VF) },
{ PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA2_VF) },
+ { PCI_VDEVICE(AMAZON, PCI_DEV_ID_EFA3_VF) },
{ }
};
@@ -36,6 +39,8 @@ MODULE_DEVICE_TABLE(pci, efa_pci_tbl);
(BIT(EFA_ADMIN_FATAL_ERROR) | BIT(EFA_ADMIN_WARNING) | \
BIT(EFA_ADMIN_NOTIFICATION) | BIT(EFA_ADMIN_KEEP_ALIVE))
+extern const struct uapi_definition efa_uapi_defs[];
+
/* This handler will called for unknown event group or unimplemented handlers */
static void unimplemented_aenq_handler(void *data,
struct efa_admin_aenq_entry *aenq_e)
@@ -187,15 +192,23 @@ static int efa_request_doorbell_bar(struct efa_dev *dev)
{
u8 db_bar_idx = dev->dev_attr.db_bar;
struct pci_dev *pdev = dev->pdev;
- int bars;
+ int pci_mem_bars;
+ int db_bar;
int err;
- if (!(BIT(db_bar_idx) & EFA_BASE_BAR_MASK)) {
- bars = pci_select_bars(pdev, IORESOURCE_MEM) & BIT(db_bar_idx);
+ db_bar = BIT(db_bar_idx);
+ if (!(db_bar & EFA_BASE_BAR_MASK)) {
+ pci_mem_bars = pci_select_bars(pdev, IORESOURCE_MEM);
+ if (db_bar & ~pci_mem_bars) {
+ dev_err(&pdev->dev,
+ "Doorbells BAR unavailable. Requested %#x, available %#x\n",
+ db_bar, pci_mem_bars);
+ return -ENODEV;
+ }
- err = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME);
+ err = pci_request_selected_regions(pdev, db_bar, DRV_MODULE_NAME);
if (err) {
- dev_err(&dev->pdev->dev,
+ dev_err(&pdev->dev,
"pci_request_selected_regions for bar %d failed %d\n",
db_bar_idx, err);
return err;
@@ -319,7 +332,9 @@ static int efa_create_eqs(struct efa_dev *dev)
int err;
int i;
- neqs = min_t(unsigned int, neqs, num_online_cpus());
+ neqs = min_t(unsigned int, neqs,
+ dev->num_irq_vectors - EFA_COMP_EQS_VEC_BASE);
+
dev->neqs = neqs;
dev->eqs = kcalloc(neqs, sizeof(*dev->eqs), GFP_KERNEL);
if (!dev->eqs)
@@ -426,12 +441,15 @@ static int efa_ib_device_add(struct efa_dev *dev)
efa_set_host_info(dev);
dev->ibdev.node_type = RDMA_NODE_UNSPECIFIED;
+ dev->ibdev.node_guid = dev->dev_attr.guid;
dev->ibdev.phys_port_cnt = 1;
dev->ibdev.num_comp_vectors = dev->neqs ?: 1;
dev->ibdev.dev.parent = &pdev->dev;
ib_set_device_ops(&dev->ibdev, &efa_dev_ops);
+ dev->ibdev.driver_def = efa_uapi_defs;
+
err = ib_register_device(&dev->ibdev, "efa_%d", &pdev->dev);
if (err)
goto err_destroy_eqs;
@@ -463,34 +481,30 @@ static void efa_disable_msix(struct efa_dev *dev)
static int efa_enable_msix(struct efa_dev *dev)
{
- int msix_vecs, irq_num;
+ int max_vecs, num_vecs;
/*
* Reserve the max msix vectors we might need, one vector is reserved
* for admin.
*/
- msix_vecs = min_t(int, pci_msix_vec_count(dev->pdev),
- num_online_cpus() + 1);
+ max_vecs = min_t(int, pci_msix_vec_count(dev->pdev),
+ num_online_cpus() + 1);
dev_dbg(&dev->pdev->dev, "Trying to enable MSI-X, vectors %d\n",
- msix_vecs);
+ max_vecs);
dev->admin_msix_vector_idx = EFA_MGMNT_MSIX_VEC_IDX;
- irq_num = pci_alloc_irq_vectors(dev->pdev, msix_vecs,
- msix_vecs, PCI_IRQ_MSIX);
+ num_vecs = pci_alloc_irq_vectors(dev->pdev, 1,
+ max_vecs, PCI_IRQ_MSIX);
- if (irq_num < 0) {
- dev_err(&dev->pdev->dev, "Failed to enable MSI-X. irq_num %d\n",
- irq_num);
+ if (num_vecs < 0) {
+ dev_err(&dev->pdev->dev, "Failed to enable MSI-X. error %d\n",
+ num_vecs);
return -ENOSPC;
}
- if (irq_num != msix_vecs) {
- efa_disable_msix(dev);
- dev_err(&dev->pdev->dev,
- "Allocated %d MSI-X (out of %d requested)\n",
- irq_num, msix_vecs);
- return -ENOSPC;
- }
+ dev_dbg(&dev->pdev->dev, "Allocated %d MSI-X vectors\n", num_vecs);
+
+ dev->num_irq_vectors = num_vecs;
return 0;
}
@@ -528,7 +542,7 @@ static struct efa_dev *efa_probe_device(struct pci_dev *pdev)
{
struct efa_com_dev *edev;
struct efa_dev *dev;
- int bars;
+ int pci_mem_bars;
int err;
err = pci_enable_device_mem(pdev);
@@ -553,8 +567,14 @@ static struct efa_dev *efa_probe_device(struct pci_dev *pdev)
dev->pdev = pdev;
xa_init(&dev->cqs_xa);
- bars = pci_select_bars(pdev, IORESOURCE_MEM) & EFA_BASE_BAR_MASK;
- err = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME);
+ pci_mem_bars = pci_select_bars(pdev, IORESOURCE_MEM);
+ if (EFA_BASE_BAR_MASK & ~pci_mem_bars) {
+ dev_err(&pdev->dev, "BARs unavailable. Requested %#x, available %#x\n",
+ (int)EFA_BASE_BAR_MASK, pci_mem_bars);
+ err = -ENODEV;
+ goto err_ibdev_destroy;
+ }
+ err = pci_request_selected_regions(pdev, EFA_BASE_BAR_MASK, DRV_MODULE_NAME);
if (err) {
dev_err(&pdev->dev, "pci_request_selected_regions failed %d\n",
err);
@@ -668,11 +688,22 @@ static void efa_remove(struct pci_dev *pdev)
efa_remove_device(pdev);
}
+static void efa_shutdown(struct pci_dev *pdev)
+{
+ struct efa_dev *dev = pci_get_drvdata(pdev);
+
+ efa_destroy_eqs(dev);
+ efa_com_dev_reset(&dev->edev, EFA_REGS_RESET_SHUTDOWN);
+ efa_free_irq(dev, &dev->admin_irq);
+ efa_disable_msix(dev);
+}
+
static struct pci_driver efa_pci_driver = {
.name = DRV_MODULE_NAME,
.id_table = efa_pci_tbl,
.probe = efa_probe,
.remove = efa_remove,
+ .shutdown = efa_shutdown,
};
module_pci_driver(efa_pci_driver);
diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c
index 2a195c4b0f17..cc13415ff7e7 100644
--- a/drivers/infiniband/hw/efa/efa_verbs.c
+++ b/drivers/infiniband/hw/efa/efa_verbs.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
- * Copyright 2018-2023 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#include <linux/dma-buf.h>
@@ -13,6 +13,9 @@
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_verbs.h>
#include <rdma/uverbs_ioctl.h>
+#define UVERBS_MODULE_NAME efa_ib
+#include <rdma/uverbs_named_ioctl.h>
+#include <rdma/ib_user_ioctl_cmds.h>
#include "efa.h"
#include "efa_io_defs.h"
@@ -23,10 +26,6 @@ enum {
EFA_MMAP_IO_NC,
};
-#define EFA_AENQ_ENABLED_GROUPS \
- (BIT(EFA_ADMIN_FATAL_ERROR) | BIT(EFA_ADMIN_WARNING) | \
- BIT(EFA_ADMIN_NOTIFICATION) | BIT(EFA_ADMIN_KEEP_ALIVE))
-
struct efa_user_mmap_entry {
struct rdma_user_mmap_entry rdma_entry;
u64 address;
@@ -61,6 +60,10 @@ struct efa_user_mmap_entry {
op(EFA_RDMA_READ_BYTES, "rdma_read_bytes") \
op(EFA_RDMA_READ_WR_ERR, "rdma_read_wr_err") \
op(EFA_RDMA_READ_RESP_BYTES, "rdma_read_resp_bytes") \
+ op(EFA_RDMA_WRITE_WRS, "rdma_write_wrs") \
+ op(EFA_RDMA_WRITE_BYTES, "rdma_write_bytes") \
+ op(EFA_RDMA_WRITE_WR_ERR, "rdma_write_wr_err") \
+ op(EFA_RDMA_WRITE_RECV_BYTES, "rdma_write_recv_bytes") \
#define EFA_STATS_ENUM(ename, name) ename,
#define EFA_STATS_STR(ename, nam) \
@@ -256,6 +259,9 @@ int efa_query_device(struct ib_device *ibdev,
if (EFA_DEV_CAP(dev, RDMA_WRITE))
resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RDMA_WRITE;
+ if (EFA_DEV_CAP(dev, UNSOLICITED_WRITE_RECV))
+ resp.device_caps |= EFA_QUERY_DEVICE_CAPS_UNSOLICITED_WRITE_RECV;
+
if (dev->neqs)
resp.device_caps |= EFA_QUERY_DEVICE_CAPS_CQ_NOTIFICATIONS;
@@ -449,12 +455,12 @@ int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
ibdev_dbg(&dev->ibdev, "Destroy qp[%u]\n", ibqp->qp_num);
- efa_qp_user_mmap_entries_remove(qp);
-
err = efa_destroy_qp_handle(dev, qp->qp_handle);
if (err)
return err;
+ efa_qp_user_mmap_entries_remove(qp);
+
if (qp->rq_cpu_addr) {
ibdev_dbg(&dev->ibdev,
"qp->cpu_addr[0x%p] freed: size[%lu], dma[%pad]\n",
@@ -514,7 +520,7 @@ static int qp_mmap_entries_setup(struct efa_qp *qp,
address = dev->mem_bar_addr + resp->llq_desc_offset;
length = PAGE_ALIGN(params->sq_ring_size_in_bytes +
- (resp->llq_desc_offset & ~PAGE_MASK));
+ offset_in_page(resp->llq_desc_offset));
qp->llq_desc_mmap_entry =
efa_user_mmap_entry_insert(&ucontext->ibucontext,
@@ -632,6 +638,7 @@ int efa_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr,
struct efa_ibv_create_qp cmd = {};
struct efa_qp *qp = to_eqp(ibqp);
struct efa_ucontext *ucontext;
+ u16 supported_efa_flags = 0;
int err;
ucontext = rdma_udata_to_drv_context(udata, struct efa_ucontext,
@@ -669,13 +676,23 @@ int efa_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr,
goto err_out;
}
- if (cmd.comp_mask) {
+ if (cmd.comp_mask || !is_reserved_cleared(cmd.reserved_90)) {
ibdev_dbg(&dev->ibdev,
"Incompatible ABI params, unknown fields in udata\n");
err = -EINVAL;
goto err_out;
}
+ if (EFA_DEV_CAP(dev, UNSOLICITED_WRITE_RECV))
+ supported_efa_flags |= EFA_CREATE_QP_WITH_UNSOLICITED_WRITE_RECV;
+
+ if (cmd.flags & ~supported_efa_flags) {
+ ibdev_dbg(&dev->ibdev, "Unsupported EFA QP create flags[%#x], supported[%#x]\n",
+ cmd.flags, supported_efa_flags);
+ err = -EOPNOTSUPP;
+ goto err_out;
+ }
+
create_qp_params.uarn = ucontext->uarn;
create_qp_params.pd = to_epd(ibqp->pd)->pdn;
@@ -715,6 +732,9 @@ int efa_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr,
create_qp_params.rq_base_addr = qp->rq_dma_addr;
}
+ if (cmd.flags & EFA_CREATE_QP_WITH_UNSOLICITED_WRITE_RECV)
+ create_qp_params.unsolicited_write_recv = true;
+
err = efa_com_create_qp(&dev->edev, &create_qp_params,
&create_qp_resp);
if (err)
@@ -1013,8 +1033,8 @@ int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
"Destroy cq[%d] virt[0x%p] freed: size[%lu], dma[%pad]\n",
cq->cq_idx, cq->cpu_addr, cq->size, &cq->dma_addr);
- efa_cq_user_mmap_entries_remove(cq);
efa_destroy_cq_idx(dev, cq->cq_idx);
+ efa_cq_user_mmap_entries_remove(cq);
if (cq->eq) {
xa_erase(&dev->cqs_xa, cq->cq_idx);
synchronize_irq(cq->eq->irq.irqn);
@@ -1060,8 +1080,9 @@ static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq,
}
int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
- struct ib_udata *udata)
+ struct uverbs_attr_bundle *attrs)
{
+ struct ib_udata *udata = &attrs->driver_udata;
struct efa_ucontext *ucontext = rdma_udata_to_drv_context(
udata, struct efa_ucontext, ibucontext);
struct efa_com_create_cq_params params = {};
@@ -1649,6 +1670,12 @@ static int efa_register_mr(struct ib_pd *ibpd, struct efa_mr *mr, u64 start,
mr->ibmr.lkey = result.l_key;
mr->ibmr.rkey = result.r_key;
mr->ibmr.length = length;
+ mr->ic_info.recv_ic_id = result.ic_info.recv_ic_id;
+ mr->ic_info.rdma_read_ic_id = result.ic_info.rdma_read_ic_id;
+ mr->ic_info.rdma_recv_ic_id = result.ic_info.rdma_recv_ic_id;
+ mr->ic_info.recv_ic_id_valid = result.ic_info.recv_ic_id_valid;
+ mr->ic_info.rdma_read_ic_id_valid = result.ic_info.rdma_read_ic_id_valid;
+ mr->ic_info.rdma_recv_ic_id_valid = result.ic_info.rdma_recv_ic_id_valid;
ibdev_dbg(&dev->ibdev, "Registered mr[%d]\n", mr->ibmr.lkey);
return 0;
@@ -1657,14 +1684,14 @@ static int efa_register_mr(struct ib_pd *ibpd, struct efa_mr *mr, u64 start,
struct ib_mr *efa_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start,
u64 length, u64 virt_addr,
int fd, int access_flags,
- struct ib_udata *udata)
+ struct uverbs_attr_bundle *attrs)
{
struct efa_dev *dev = to_edev(ibpd->device);
struct ib_umem_dmabuf *umem_dmabuf;
struct efa_mr *mr;
int err;
- mr = efa_alloc_mr(ibpd, access_flags, udata);
+ mr = efa_alloc_mr(ibpd, access_flags, &attrs->driver_udata);
if (IS_ERR(mr)) {
err = PTR_ERR(mr);
goto err_out;
@@ -1731,6 +1758,39 @@ err_out:
return ERR_PTR(err);
}
+static int UVERBS_HANDLER(EFA_IB_METHOD_MR_QUERY)(struct uverbs_attr_bundle *attrs)
+{
+ struct ib_mr *ibmr = uverbs_attr_get_obj(attrs, EFA_IB_ATTR_QUERY_MR_HANDLE);
+ struct efa_mr *mr = to_emr(ibmr);
+ u16 ic_id_validity = 0;
+ int ret;
+
+ ret = uverbs_copy_to(attrs, EFA_IB_ATTR_QUERY_MR_RESP_RECV_IC_ID,
+ &mr->ic_info.recv_ic_id, sizeof(mr->ic_info.recv_ic_id));
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_to(attrs, EFA_IB_ATTR_QUERY_MR_RESP_RDMA_READ_IC_ID,
+ &mr->ic_info.rdma_read_ic_id, sizeof(mr->ic_info.rdma_read_ic_id));
+ if (ret)
+ return ret;
+
+ ret = uverbs_copy_to(attrs, EFA_IB_ATTR_QUERY_MR_RESP_RDMA_RECV_IC_ID,
+ &mr->ic_info.rdma_recv_ic_id, sizeof(mr->ic_info.rdma_recv_ic_id));
+ if (ret)
+ return ret;
+
+ if (mr->ic_info.recv_ic_id_valid)
+ ic_id_validity |= EFA_QUERY_MR_VALIDITY_RECV_IC_ID;
+ if (mr->ic_info.rdma_read_ic_id_valid)
+ ic_id_validity |= EFA_QUERY_MR_VALIDITY_RDMA_READ_IC_ID;
+ if (mr->ic_info.rdma_recv_ic_id_valid)
+ ic_id_validity |= EFA_QUERY_MR_VALIDITY_RDMA_RECV_IC_ID;
+
+ return uverbs_copy_to(attrs, EFA_IB_ATTR_QUERY_MR_RESP_IC_ID_VALIDITY,
+ &ic_id_validity, sizeof(ic_id_validity));
+}
+
int efa_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
{
struct efa_dev *dev = to_edev(ibmr->device);
@@ -2080,6 +2140,7 @@ static int efa_fill_port_stats(struct efa_dev *dev, struct rdma_hw_stats *stats,
{
struct efa_com_get_stats_params params = {};
union efa_com_get_stats_result result;
+ struct efa_com_rdma_write_stats *rws;
struct efa_com_rdma_read_stats *rrs;
struct efa_com_messages_stats *ms;
struct efa_com_basic_stats *bs;
@@ -2121,6 +2182,19 @@ static int efa_fill_port_stats(struct efa_dev *dev, struct rdma_hw_stats *stats,
stats->value[EFA_RDMA_READ_WR_ERR] = rrs->read_wr_err;
stats->value[EFA_RDMA_READ_RESP_BYTES] = rrs->read_resp_bytes;
+ if (EFA_DEV_CAP(dev, RDMA_WRITE)) {
+ params.type = EFA_ADMIN_GET_STATS_TYPE_RDMA_WRITE;
+ err = efa_com_get_stats(&dev->edev, &params, &result);
+ if (err)
+ return err;
+
+ rws = &result.rdma_write_stats;
+ stats->value[EFA_RDMA_WRITE_WRS] = rws->write_wrs;
+ stats->value[EFA_RDMA_WRITE_BYTES] = rws->write_bytes;
+ stats->value[EFA_RDMA_WRITE_WR_ERR] = rws->write_wr_err;
+ stats->value[EFA_RDMA_WRITE_RECV_BYTES] = rws->write_recv_bytes;
+ }
+
return ARRAY_SIZE(efa_port_stats_descs);
}
@@ -2139,3 +2213,30 @@ enum rdma_link_layer efa_port_link_layer(struct ib_device *ibdev,
return IB_LINK_LAYER_UNSPECIFIED;
}
+DECLARE_UVERBS_NAMED_METHOD(EFA_IB_METHOD_MR_QUERY,
+ UVERBS_ATTR_IDR(EFA_IB_ATTR_QUERY_MR_HANDLE,
+ UVERBS_OBJECT_MR,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(EFA_IB_ATTR_QUERY_MR_RESP_IC_ID_VALIDITY,
+ UVERBS_ATTR_TYPE(u16),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(EFA_IB_ATTR_QUERY_MR_RESP_RECV_IC_ID,
+ UVERBS_ATTR_TYPE(u16),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(EFA_IB_ATTR_QUERY_MR_RESP_RDMA_READ_IC_ID,
+ UVERBS_ATTR_TYPE(u16),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(EFA_IB_ATTR_QUERY_MR_RESP_RDMA_RECV_IC_ID,
+ UVERBS_ATTR_TYPE(u16),
+ UA_MANDATORY));
+
+ADD_UVERBS_METHODS(efa_mr,
+ UVERBS_OBJECT_MR,
+ &UVERBS_METHOD(EFA_IB_METHOD_MR_QUERY));
+
+const struct uapi_definition efa_uapi_defs[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_MR,
+ &efa_mr),
+ {},
+};
diff --git a/drivers/infiniband/hw/erdma/erdma.h b/drivers/infiniband/hw/erdma/erdma.h
index f190111840e9..3c166359448d 100644
--- a/drivers/infiniband/hw/erdma/erdma.h
+++ b/drivers/infiniband/hw/erdma/erdma.h
@@ -33,7 +33,8 @@ struct erdma_eq {
atomic64_t notify_num;
void __iomem *db;
- u64 *db_record;
+ u64 *dbrec;
+ dma_addr_t dbrec_dma;
};
struct erdma_cmdq_sq {
@@ -48,7 +49,8 @@ struct erdma_cmdq_sq {
u16 wqebb_cnt;
- u64 *db_record;
+ u64 *dbrec;
+ dma_addr_t dbrec_dma;
};
struct erdma_cmdq_cq {
@@ -61,7 +63,8 @@ struct erdma_cmdq_cq {
u32 ci;
u32 cmdsn;
- u64 *db_record;
+ u64 *dbrec;
+ dma_addr_t dbrec_dma;
atomic64_t armed_num;
};
@@ -177,9 +180,6 @@ enum {
ERDMA_RES_CNT = 2,
};
-#define ERDMA_EXTRA_BUFFER_SIZE ERDMA_DB_SIZE
-#define WARPPED_BUFSIZE(size) ((size) + ERDMA_EXTRA_BUFFER_SIZE)
-
struct erdma_dev {
struct ib_device ibdev;
struct net_device *netdev;
@@ -212,6 +212,9 @@ struct erdma_dev {
atomic_t num_ctx;
struct list_head cep_list;
+
+ struct dma_pool *db_pool;
+ struct dma_pool *resp_pool;
};
static inline void *get_queue_entry(void *qbuf, u32 idx, u32 depth, u32 shift)
@@ -271,7 +274,8 @@ void notify_eq(struct erdma_eq *eq);
void *get_next_valid_eqe(struct erdma_eq *eq);
int erdma_aeq_init(struct erdma_dev *dev);
-void erdma_aeq_destroy(struct erdma_dev *dev);
+int erdma_eq_common_init(struct erdma_dev *dev, struct erdma_eq *eq, u32 depth);
+void erdma_eq_destroy(struct erdma_dev *dev, struct erdma_eq *eq);
void erdma_aeq_event_handler(struct erdma_dev *dev);
void erdma_ceq_completion_handler(struct erdma_eq_cb *ceq_cb);
diff --git a/drivers/infiniband/hw/erdma/erdma_cmdq.c b/drivers/infiniband/hw/erdma/erdma_cmdq.c
index a151a7bdd504..a3d8922d1ad1 100644
--- a/drivers/infiniband/hw/erdma/erdma_cmdq.c
+++ b/drivers/infiniband/hw/erdma/erdma_cmdq.c
@@ -14,7 +14,7 @@ static void arm_cmdq_cq(struct erdma_cmdq *cmdq)
FIELD_PREP(ERDMA_CQDB_CMDSN_MASK, cmdq->cq.cmdsn) |
FIELD_PREP(ERDMA_CQDB_IDX_MASK, cmdq->cq.cmdsn);
- *cmdq->cq.db_record = db_data;
+ *cmdq->cq.dbrec = db_data;
writeq(db_data, dev->func_bar + ERDMA_CMDQ_CQDB_REG);
atomic64_inc(&cmdq->cq.armed_num);
@@ -25,7 +25,7 @@ static void kick_cmdq_db(struct erdma_cmdq *cmdq)
struct erdma_dev *dev = container_of(cmdq, struct erdma_dev, cmdq);
u64 db_data = FIELD_PREP(ERDMA_CMD_HDR_WQEBB_INDEX_MASK, cmdq->sq.pi);
- *cmdq->sq.db_record = db_data;
+ *cmdq->sq.dbrec = db_data;
writeq(db_data, dev->func_bar + ERDMA_CMDQ_SQDB_REG);
}
@@ -89,20 +89,18 @@ static int erdma_cmdq_sq_init(struct erdma_dev *dev)
{
struct erdma_cmdq *cmdq = &dev->cmdq;
struct erdma_cmdq_sq *sq = &cmdq->sq;
- u32 buf_size;
sq->wqebb_cnt = SQEBB_COUNT(ERDMA_CMDQ_SQE_SIZE);
sq->depth = cmdq->max_outstandings * sq->wqebb_cnt;
- buf_size = sq->depth << SQEBB_SHIFT;
-
- sq->qbuf =
- dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size),
- &sq->qbuf_dma_addr, GFP_KERNEL);
+ sq->qbuf = dma_alloc_coherent(&dev->pdev->dev, sq->depth << SQEBB_SHIFT,
+ &sq->qbuf_dma_addr, GFP_KERNEL);
if (!sq->qbuf)
return -ENOMEM;
- sq->db_record = (u64 *)(sq->qbuf + buf_size);
+ sq->dbrec = dma_pool_zalloc(dev->db_pool, GFP_KERNEL, &sq->dbrec_dma);
+ if (!sq->dbrec)
+ goto err_out;
spin_lock_init(&sq->lock);
@@ -111,30 +109,33 @@ static int erdma_cmdq_sq_init(struct erdma_dev *dev)
erdma_reg_write32(dev, ERDMA_REGS_CMDQ_SQ_ADDR_L_REG,
lower_32_bits(sq->qbuf_dma_addr));
erdma_reg_write32(dev, ERDMA_REGS_CMDQ_DEPTH_REG, sq->depth);
- erdma_reg_write64(dev, ERDMA_CMDQ_SQ_DB_HOST_ADDR_REG,
- sq->qbuf_dma_addr + buf_size);
+ erdma_reg_write64(dev, ERDMA_CMDQ_SQ_DB_HOST_ADDR_REG, sq->dbrec_dma);
return 0;
+
+err_out:
+ dma_free_coherent(&dev->pdev->dev, sq->depth << SQEBB_SHIFT,
+ sq->qbuf, sq->qbuf_dma_addr);
+
+ return -ENOMEM;
}
static int erdma_cmdq_cq_init(struct erdma_dev *dev)
{
struct erdma_cmdq *cmdq = &dev->cmdq;
struct erdma_cmdq_cq *cq = &cmdq->cq;
- u32 buf_size;
cq->depth = cmdq->sq.depth;
- buf_size = cq->depth << CQE_SHIFT;
-
- cq->qbuf =
- dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size),
- &cq->qbuf_dma_addr, GFP_KERNEL | __GFP_ZERO);
+ cq->qbuf = dma_alloc_coherent(&dev->pdev->dev, cq->depth << CQE_SHIFT,
+ &cq->qbuf_dma_addr, GFP_KERNEL);
if (!cq->qbuf)
return -ENOMEM;
spin_lock_init(&cq->lock);
- cq->db_record = (u64 *)(cq->qbuf + buf_size);
+ cq->dbrec = dma_pool_zalloc(dev->db_pool, GFP_KERNEL, &cq->dbrec_dma);
+ if (!cq->dbrec)
+ goto err_out;
atomic64_set(&cq->armed_num, 0);
@@ -142,40 +143,35 @@ static int erdma_cmdq_cq_init(struct erdma_dev *dev)
upper_32_bits(cq->qbuf_dma_addr));
erdma_reg_write32(dev, ERDMA_REGS_CMDQ_CQ_ADDR_L_REG,
lower_32_bits(cq->qbuf_dma_addr));
- erdma_reg_write64(dev, ERDMA_CMDQ_CQ_DB_HOST_ADDR_REG,
- cq->qbuf_dma_addr + buf_size);
+ erdma_reg_write64(dev, ERDMA_CMDQ_CQ_DB_HOST_ADDR_REG, cq->dbrec_dma);
return 0;
+
+err_out:
+ dma_free_coherent(&dev->pdev->dev, cq->depth << CQE_SHIFT, cq->qbuf,
+ cq->qbuf_dma_addr);
+
+ return -ENOMEM;
}
static int erdma_cmdq_eq_init(struct erdma_dev *dev)
{
struct erdma_cmdq *cmdq = &dev->cmdq;
struct erdma_eq *eq = &cmdq->eq;
- u32 buf_size;
-
- eq->depth = cmdq->max_outstandings;
- buf_size = eq->depth << EQE_SHIFT;
-
- eq->qbuf =
- dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size),
- &eq->qbuf_dma_addr, GFP_KERNEL | __GFP_ZERO);
- if (!eq->qbuf)
- return -ENOMEM;
+ int ret;
- spin_lock_init(&eq->lock);
- atomic64_set(&eq->event_num, 0);
+ ret = erdma_eq_common_init(dev, eq, cmdq->max_outstandings);
+ if (ret)
+ return ret;
eq->db = dev->func_bar + ERDMA_REGS_CEQ_DB_BASE_REG;
- eq->db_record = (u64 *)(eq->qbuf + buf_size);
erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_ADDR_H_REG,
upper_32_bits(eq->qbuf_dma_addr));
erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_ADDR_L_REG,
lower_32_bits(eq->qbuf_dma_addr));
erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_DEPTH_REG, eq->depth);
- erdma_reg_write64(dev, ERDMA_CMDQ_EQ_DB_HOST_ADDR_REG,
- eq->qbuf_dma_addr + buf_size);
+ erdma_reg_write64(dev, ERDMA_CMDQ_EQ_DB_HOST_ADDR_REG, eq->dbrec_dma);
return 0;
}
@@ -211,17 +207,17 @@ int erdma_cmdq_init(struct erdma_dev *dev)
return 0;
err_destroy_cq:
- dma_free_coherent(&dev->pdev->dev,
- (cmdq->cq.depth << CQE_SHIFT) +
- ERDMA_EXTRA_BUFFER_SIZE,
+ dma_free_coherent(&dev->pdev->dev, cmdq->cq.depth << CQE_SHIFT,
cmdq->cq.qbuf, cmdq->cq.qbuf_dma_addr);
+ dma_pool_free(dev->db_pool, cmdq->cq.dbrec, cmdq->cq.dbrec_dma);
+
err_destroy_sq:
- dma_free_coherent(&dev->pdev->dev,
- (cmdq->sq.depth << SQEBB_SHIFT) +
- ERDMA_EXTRA_BUFFER_SIZE,
+ dma_free_coherent(&dev->pdev->dev, cmdq->sq.depth << SQEBB_SHIFT,
cmdq->sq.qbuf, cmdq->sq.qbuf_dma_addr);
+ dma_pool_free(dev->db_pool, cmdq->sq.dbrec, cmdq->sq.dbrec_dma);
+
return err;
}
@@ -238,18 +234,17 @@ void erdma_cmdq_destroy(struct erdma_dev *dev)
clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state);
- dma_free_coherent(&dev->pdev->dev,
- (cmdq->eq.depth << EQE_SHIFT) +
- ERDMA_EXTRA_BUFFER_SIZE,
- cmdq->eq.qbuf, cmdq->eq.qbuf_dma_addr);
- dma_free_coherent(&dev->pdev->dev,
- (cmdq->sq.depth << SQEBB_SHIFT) +
- ERDMA_EXTRA_BUFFER_SIZE,
+ erdma_eq_destroy(dev, &cmdq->eq);
+
+ dma_free_coherent(&dev->pdev->dev, cmdq->sq.depth << SQEBB_SHIFT,
cmdq->sq.qbuf, cmdq->sq.qbuf_dma_addr);
- dma_free_coherent(&dev->pdev->dev,
- (cmdq->cq.depth << CQE_SHIFT) +
- ERDMA_EXTRA_BUFFER_SIZE,
+
+ dma_pool_free(dev->db_pool, cmdq->sq.dbrec, cmdq->sq.dbrec_dma);
+
+ dma_free_coherent(&dev->pdev->dev, cmdq->cq.depth << CQE_SHIFT,
cmdq->cq.qbuf, cmdq->cq.qbuf_dma_addr);
+
+ dma_pool_free(dev->db_pool, cmdq->cq.dbrec, cmdq->cq.dbrec_dma);
}
static void *get_next_valid_cmdq_cqe(struct erdma_cmdq *cmdq)
diff --git a/drivers/infiniband/hw/erdma/erdma_cq.c b/drivers/infiniband/hw/erdma/erdma_cq.c
index c1cb5568eab2..70f89f0162aa 100644
--- a/drivers/infiniband/hw/erdma/erdma_cq.c
+++ b/drivers/infiniband/hw/erdma/erdma_cq.c
@@ -26,7 +26,7 @@ static void notify_cq(struct erdma_cq *cq, u8 solcitied)
FIELD_PREP(ERDMA_CQDB_CMDSN_MASK, cq->kern_cq.cmdsn) |
FIELD_PREP(ERDMA_CQDB_CI_MASK, cq->kern_cq.ci);
- *cq->kern_cq.db_record = db_data;
+ *cq->kern_cq.dbrec = db_data;
writeq(db_data, cq->kern_cq.db);
}
diff --git a/drivers/infiniband/hw/erdma/erdma_eq.c b/drivers/infiniband/hw/erdma/erdma_eq.c
index ea47cb21fdb8..9a72fec6d5cc 100644
--- a/drivers/infiniband/hw/erdma/erdma_eq.c
+++ b/drivers/infiniband/hw/erdma/erdma_eq.c
@@ -13,7 +13,7 @@ void notify_eq(struct erdma_eq *eq)
u64 db_data = FIELD_PREP(ERDMA_EQDB_CI_MASK, eq->ci) |
FIELD_PREP(ERDMA_EQDB_ARM_MASK, 1);
- *eq->db_record = db_data;
+ *eq->dbrec = db_data;
writeq(db_data, eq->db);
atomic64_inc(&eq->notify_num);
@@ -80,47 +80,62 @@ void erdma_aeq_event_handler(struct erdma_dev *dev)
notify_eq(&dev->aeq);
}
-int erdma_aeq_init(struct erdma_dev *dev)
+int erdma_eq_common_init(struct erdma_dev *dev, struct erdma_eq *eq, u32 depth)
{
- struct erdma_eq *eq = &dev->aeq;
- u32 buf_size;
-
- eq->depth = ERDMA_DEFAULT_EQ_DEPTH;
- buf_size = eq->depth << EQE_SHIFT;
+ u32 buf_size = depth << EQE_SHIFT;
- eq->qbuf =
- dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size),
- &eq->qbuf_dma_addr, GFP_KERNEL | __GFP_ZERO);
+ eq->qbuf = dma_alloc_coherent(&dev->pdev->dev, buf_size,
+ &eq->qbuf_dma_addr, GFP_KERNEL);
if (!eq->qbuf)
return -ENOMEM;
+ eq->dbrec = dma_pool_zalloc(dev->db_pool, GFP_KERNEL, &eq->dbrec_dma);
+ if (!eq->dbrec)
+ goto err_free_qbuf;
+
spin_lock_init(&eq->lock);
atomic64_set(&eq->event_num, 0);
atomic64_set(&eq->notify_num, 0);
+ eq->ci = 0;
+ eq->depth = depth;
+
+ return 0;
+
+err_free_qbuf:
+ dma_free_coherent(&dev->pdev->dev, buf_size, eq->qbuf,
+ eq->qbuf_dma_addr);
+
+ return -ENOMEM;
+}
+
+void erdma_eq_destroy(struct erdma_dev *dev, struct erdma_eq *eq)
+{
+ dma_pool_free(dev->db_pool, eq->dbrec, eq->dbrec_dma);
+ dma_free_coherent(&dev->pdev->dev, eq->depth << EQE_SHIFT, eq->qbuf,
+ eq->qbuf_dma_addr);
+}
+
+int erdma_aeq_init(struct erdma_dev *dev)
+{
+ struct erdma_eq *eq = &dev->aeq;
+ int ret;
+
+ ret = erdma_eq_common_init(dev, &dev->aeq, ERDMA_DEFAULT_EQ_DEPTH);
+ if (ret)
+ return ret;
eq->db = dev->func_bar + ERDMA_REGS_AEQ_DB_REG;
- eq->db_record = (u64 *)(eq->qbuf + buf_size);
erdma_reg_write32(dev, ERDMA_REGS_AEQ_ADDR_H_REG,
upper_32_bits(eq->qbuf_dma_addr));
erdma_reg_write32(dev, ERDMA_REGS_AEQ_ADDR_L_REG,
lower_32_bits(eq->qbuf_dma_addr));
erdma_reg_write32(dev, ERDMA_REGS_AEQ_DEPTH_REG, eq->depth);
- erdma_reg_write64(dev, ERDMA_AEQ_DB_HOST_ADDR_REG,
- eq->qbuf_dma_addr + buf_size);
+ erdma_reg_write64(dev, ERDMA_AEQ_DB_HOST_ADDR_REG, eq->dbrec_dma);
return 0;
}
-void erdma_aeq_destroy(struct erdma_dev *dev)
-{
- struct erdma_eq *eq = &dev->aeq;
-
- dma_free_coherent(&dev->pdev->dev,
- WARPPED_BUFSIZE(eq->depth << EQE_SHIFT), eq->qbuf,
- eq->qbuf_dma_addr);
-}
-
void erdma_ceq_completion_handler(struct erdma_eq_cb *ceq_cb)
{
struct erdma_dev *dev = ceq_cb->dev;
@@ -209,7 +224,6 @@ static void erdma_free_ceq_irq(struct erdma_dev *dev, u16 ceqn)
static int create_eq_cmd(struct erdma_dev *dev, u32 eqn, struct erdma_eq *eq)
{
struct erdma_cmdq_create_eq_req req;
- dma_addr_t db_info_dma_addr;
erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON,
CMDQ_OPCODE_CREATE_EQ);
@@ -219,9 +233,8 @@ static int create_eq_cmd(struct erdma_dev *dev, u32 eqn, struct erdma_eq *eq)
req.qtype = ERDMA_EQ_TYPE_CEQ;
/* Vector index is the same as EQN. */
req.vector_idx = eqn;
- db_info_dma_addr = eq->qbuf_dma_addr + (eq->depth << EQE_SHIFT);
- req.db_dma_addr_l = lower_32_bits(db_info_dma_addr);
- req.db_dma_addr_h = upper_32_bits(db_info_dma_addr);
+ req.db_dma_addr_l = lower_32_bits(eq->dbrec_dma);
+ req.db_dma_addr_h = upper_32_bits(eq->dbrec_dma);
return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
}
@@ -229,29 +242,23 @@ static int create_eq_cmd(struct erdma_dev *dev, u32 eqn, struct erdma_eq *eq)
static int erdma_ceq_init_one(struct erdma_dev *dev, u16 ceqn)
{
struct erdma_eq *eq = &dev->ceqs[ceqn].eq;
- u32 buf_size = ERDMA_DEFAULT_EQ_DEPTH << EQE_SHIFT;
int ret;
- eq->qbuf =
- dma_alloc_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size),
- &eq->qbuf_dma_addr, GFP_KERNEL | __GFP_ZERO);
- if (!eq->qbuf)
- return -ENOMEM;
+ ret = erdma_eq_common_init(dev, eq, ERDMA_DEFAULT_EQ_DEPTH);
+ if (ret)
+ return ret;
- spin_lock_init(&eq->lock);
- atomic64_set(&eq->event_num, 0);
- atomic64_set(&eq->notify_num, 0);
-
- eq->depth = ERDMA_DEFAULT_EQ_DEPTH;
eq->db = dev->func_bar + ERDMA_REGS_CEQ_DB_BASE_REG +
(ceqn + 1) * ERDMA_DB_SIZE;
- eq->db_record = (u64 *)(eq->qbuf + buf_size);
- eq->ci = 0;
dev->ceqs[ceqn].dev = dev;
+ dev->ceqs[ceqn].ready = true;
/* CEQ indexed from 1, 0 rsvd for CMDQ-EQ. */
ret = create_eq_cmd(dev, ceqn + 1, eq);
- dev->ceqs[ceqn].ready = ret ? false : true;
+ if (ret) {
+ erdma_eq_destroy(dev, eq);
+ dev->ceqs[ceqn].ready = false;
+ }
return ret;
}
@@ -259,7 +266,6 @@ static int erdma_ceq_init_one(struct erdma_dev *dev, u16 ceqn)
static void erdma_ceq_uninit_one(struct erdma_dev *dev, u16 ceqn)
{
struct erdma_eq *eq = &dev->ceqs[ceqn].eq;
- u32 buf_size = ERDMA_DEFAULT_EQ_DEPTH << EQE_SHIFT;
struct erdma_cmdq_destroy_eq_req req;
int err;
@@ -276,8 +282,7 @@ static void erdma_ceq_uninit_one(struct erdma_dev *dev, u16 ceqn)
if (err)
return;
- dma_free_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(buf_size), eq->qbuf,
- eq->qbuf_dma_addr);
+ erdma_eq_destroy(dev, eq);
}
int erdma_ceqs_init(struct erdma_dev *dev)
diff --git a/drivers/infiniband/hw/erdma/erdma_hw.h b/drivers/infiniband/hw/erdma/erdma_hw.h
index a882b57aa118..05978f3b1475 100644
--- a/drivers/infiniband/hw/erdma/erdma_hw.h
+++ b/drivers/infiniband/hw/erdma/erdma_hw.h
@@ -11,8 +11,6 @@
#include <linux/types.h>
/* PCIe device related definition. */
-#define PCI_VENDOR_ID_ALIBABA 0x1ded
-
#define ERDMA_PCI_WIDTH 64
#define ERDMA_FUNC_BAR 0
#define ERDMA_MISX_BAR 2
@@ -146,6 +144,7 @@ enum CMDQ_COMMON_OPCODE {
CMDQ_OPCODE_DESTROY_EQ = 1,
CMDQ_OPCODE_QUERY_FW_INFO = 2,
CMDQ_OPCODE_CONF_MTU = 3,
+ CMDQ_OPCODE_GET_STATS = 4,
CMDQ_OPCODE_CONF_DEVICE = 5,
CMDQ_OPCODE_ALLOC_DB = 8,
CMDQ_OPCODE_FREE_DB = 9,
@@ -228,7 +227,7 @@ struct erdma_cmdq_ext_db_req {
/* create_cq cfg1 */
#define ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK GENMASK(31, 16)
-#define ERDMA_CMD_CREATE_CQ_MTT_TYPE_MASK BIT(15)
+#define ERDMA_CMD_CREATE_CQ_MTT_LEVEL_MASK BIT(15)
#define ERDMA_CMD_CREATE_CQ_MTT_DB_CFG_MASK BIT(11)
#define ERDMA_CMD_CREATE_CQ_EQN_MASK GENMASK(9, 0)
@@ -241,13 +240,14 @@ struct erdma_cmdq_create_cq_req {
u32 qbuf_addr_l;
u32 qbuf_addr_h;
u32 cfg1;
- u64 cq_db_info_addr;
+ u64 cq_dbrec_dma;
u32 first_page_offset;
u32 cfg2;
};
/* regmr/deregmr cfg0 */
#define ERDMA_CMD_MR_VALID_MASK BIT(31)
+#define ERDMA_CMD_MR_VERSION_MASK GENMASK(30, 28)
#define ERDMA_CMD_MR_KEY_MASK GENMASK(27, 20)
#define ERDMA_CMD_MR_MPT_IDX_MASK GENMASK(19, 0)
@@ -258,7 +258,8 @@ struct erdma_cmdq_create_cq_req {
/* regmr cfg2 */
#define ERDMA_CMD_REGMR_PAGESIZE_MASK GENMASK(31, 27)
-#define ERDMA_CMD_REGMR_MTT_TYPE_MASK GENMASK(21, 20)
+#define ERDMA_CMD_REGMR_MTT_PAGESIZE_MASK GENMASK(26, 24)
+#define ERDMA_CMD_REGMR_MTT_LEVEL_MASK GENMASK(21, 20)
#define ERDMA_CMD_REGMR_MTT_CNT_MASK GENMASK(19, 0)
struct erdma_cmdq_reg_mr_req {
@@ -268,7 +269,14 @@ struct erdma_cmdq_reg_mr_req {
u64 start_va;
u32 size;
u32 cfg2;
- u64 phy_addr[4];
+ union {
+ u64 phy_addr[4];
+ struct {
+ u64 rsvd;
+ u32 size_h;
+ u32 mtt_cnt_h;
+ };
+ };
};
struct erdma_cmdq_dereg_mr_req {
@@ -309,7 +317,7 @@ struct erdma_cmdq_modify_qp_req {
/* create qp mtt_cfg */
#define ERDMA_CMD_CREATE_QP_PAGE_OFFSET_MASK GENMASK(31, 12)
#define ERDMA_CMD_CREATE_QP_MTT_CNT_MASK GENMASK(11, 1)
-#define ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK BIT(0)
+#define ERDMA_CMD_CREATE_QP_MTT_LEVEL_MASK BIT(0)
/* create qp db cfg */
#define ERDMA_CMD_CREATE_QP_SQDB_CFG_MASK GENMASK(31, 16)
@@ -327,8 +335,8 @@ struct erdma_cmdq_create_qp_req {
u64 rq_buf_addr;
u32 sq_mtt_cfg;
u32 rq_mtt_cfg;
- u64 sq_db_info_dma_addr;
- u64 rq_db_info_dma_addr;
+ u64 sq_dbrec_dma;
+ u64 rq_dbrec_dma;
u64 sq_mtt_entry[3];
u64 rq_mtt_entry[3];
@@ -348,6 +356,44 @@ struct erdma_cmdq_reflush_req {
u32 rq_pi;
};
+#define ERDMA_HW_RESP_SIZE 256
+
+struct erdma_cmdq_query_req {
+ u64 hdr;
+ u32 rsvd;
+ u32 index;
+
+ u64 target_addr;
+ u32 target_length;
+};
+
+#define ERDMA_HW_RESP_MAGIC 0x5566
+
+struct erdma_cmdq_query_resp_hdr {
+ u16 magic;
+ u8 ver;
+ u8 length;
+
+ u32 index;
+ u32 rsvd[2];
+};
+
+struct erdma_cmdq_query_stats_resp {
+ struct erdma_cmdq_query_resp_hdr hdr;
+
+ u64 tx_req_cnt;
+ u64 tx_packets_cnt;
+ u64 tx_bytes_cnt;
+ u64 tx_drop_packets_cnt;
+ u64 tx_bps_meter_drop_packets_cnt;
+ u64 tx_pps_meter_drop_packets_cnt;
+ u64 rx_packets_cnt;
+ u64 rx_bytes_cnt;
+ u64 rx_drop_packets_cnt;
+ u64 rx_bps_meter_drop_packets_cnt;
+ u64 rx_pps_meter_drop_packets_cnt;
+};
+
/* cap qword 0 definition */
#define ERDMA_CMD_DEV_CAP_MAX_CQE_MASK GENMASK_ULL(47, 40)
#define ERDMA_CMD_DEV_CAP_FLAGS_MASK GENMASK_ULL(31, 24)
@@ -364,6 +410,7 @@ struct erdma_cmdq_reflush_req {
enum {
ERDMA_DEV_CAP_FLAGS_ATOMIC = 1 << 7,
+ ERDMA_DEV_CAP_FLAGS_MTT_VA = 1 << 5,
ERDMA_DEV_CAP_FLAGS_EXTEND_DB = 1 << 3,
};
diff --git a/drivers/infiniband/hw/erdma/erdma_main.c b/drivers/infiniband/hw/erdma/erdma_main.c
index 0880c79a978c..62f497a71004 100644
--- a/drivers/infiniband/hw/erdma/erdma_main.c
+++ b/drivers/infiniband/hw/erdma/erdma_main.c
@@ -172,14 +172,41 @@ static int erdma_device_init(struct erdma_dev *dev, struct pci_dev *pdev)
{
int ret;
+ dev->resp_pool = dma_pool_create("erdma_resp_pool", &pdev->dev,
+ ERDMA_HW_RESP_SIZE, ERDMA_HW_RESP_SIZE,
+ 0);
+ if (!dev->resp_pool)
+ return -ENOMEM;
+
+ dev->db_pool = dma_pool_create("erdma_db_pool", &pdev->dev,
+ ERDMA_DB_SIZE, ERDMA_DB_SIZE, 0);
+ if (!dev->db_pool) {
+ ret = -ENOMEM;
+ goto destroy_resp_pool;
+ }
+
ret = dma_set_mask_and_coherent(&pdev->dev,
DMA_BIT_MASK(ERDMA_PCI_WIDTH));
if (ret)
- return ret;
+ goto destroy_db_pool;
dma_set_max_seg_size(&pdev->dev, UINT_MAX);
return 0;
+
+destroy_db_pool:
+ dma_pool_destroy(dev->db_pool);
+
+destroy_resp_pool:
+ dma_pool_destroy(dev->resp_pool);
+
+ return ret;
+}
+
+static void erdma_device_uninit(struct erdma_dev *dev)
+{
+ dma_pool_destroy(dev->db_pool);
+ dma_pool_destroy(dev->resp_pool);
}
static void erdma_hw_reset(struct erdma_dev *dev)
@@ -273,7 +300,7 @@ static int erdma_probe_dev(struct pci_dev *pdev)
err = erdma_request_vectors(dev);
if (err)
- goto err_iounmap_func_bar;
+ goto err_uninit_device;
err = erdma_comm_irq_init(dev);
if (err)
@@ -306,7 +333,7 @@ err_uninit_cmdq:
erdma_cmdq_destroy(dev);
err_uninit_aeq:
- erdma_aeq_destroy(dev);
+ erdma_eq_destroy(dev, &dev->aeq);
err_uninit_comm_irq:
erdma_comm_irq_uninit(dev);
@@ -314,6 +341,9 @@ err_uninit_comm_irq:
err_free_vectors:
pci_free_irq_vectors(dev->pdev);
+err_uninit_device:
+ erdma_device_uninit(dev);
+
err_iounmap_func_bar:
devm_iounmap(&pdev->dev, dev->func_bar);
@@ -336,9 +366,10 @@ static void erdma_remove_dev(struct pci_dev *pdev)
erdma_ceqs_uninit(dev);
erdma_hw_reset(dev);
erdma_cmdq_destroy(dev);
- erdma_aeq_destroy(dev);
+ erdma_eq_destroy(dev, &dev->aeq);
erdma_comm_irq_uninit(dev);
pci_free_irq_vectors(dev->pdev);
+ erdma_device_uninit(dev);
devm_iounmap(&pdev->dev, dev->func_bar);
pci_release_selected_regions(pdev, ERDMA_BAR_MASK);
@@ -448,6 +479,7 @@ static const struct ib_device_ops erdma_device_ops = {
.driver_id = RDMA_DRIVER_ERDMA,
.uverbs_abi_ver = ERDMA_ABI_VERSION,
+ .alloc_hw_port_stats = erdma_alloc_hw_port_stats,
.alloc_mr = erdma_ib_alloc_mr,
.alloc_pd = erdma_alloc_pd,
.alloc_ucontext = erdma_alloc_ucontext,
@@ -458,7 +490,9 @@ static const struct ib_device_ops erdma_device_ops = {
.dereg_mr = erdma_dereg_mr,
.destroy_cq = erdma_destroy_cq,
.destroy_qp = erdma_destroy_qp,
+ .disassociate_ucontext = erdma_disassociate_ucontext,
.get_dma_mr = erdma_get_dma_mr,
+ .get_hw_stats = erdma_get_hw_stats,
.get_port_immutable = erdma_get_port_immutable,
.iw_accept = erdma_accept,
.iw_add_ref = erdma_qp_get_ref,
diff --git a/drivers/infiniband/hw/erdma/erdma_qp.c b/drivers/infiniband/hw/erdma/erdma_qp.c
index 44923c51a01b..4d1f9114cd97 100644
--- a/drivers/infiniband/hw/erdma/erdma_qp.c
+++ b/drivers/infiniband/hw/erdma/erdma_qp.c
@@ -410,7 +410,7 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi,
/* Copy SGLs to SQE content to accelerate */
memcpy(get_queue_entry(qp->kern_qp.sq_buf, idx + 1,
qp->attrs.sq_size, SQEBB_SHIFT),
- mr->mem.mtt_buf, MTT_SIZE(mr->mem.mtt_nents));
+ mr->mem.mtt->buf, MTT_SIZE(mr->mem.mtt_nents));
wqe_size = sizeof(struct erdma_reg_mr_sqe) +
MTT_SIZE(mr->mem.mtt_nents);
} else {
@@ -492,7 +492,7 @@ static void kick_sq_db(struct erdma_qp *qp, u16 pi)
u64 db_data = FIELD_PREP(ERDMA_SQE_HDR_QPN_MASK, QP_ID(qp)) |
FIELD_PREP(ERDMA_SQE_HDR_WQEBB_INDEX_MASK, pi);
- *(u64 *)qp->kern_qp.sq_db_info = db_data;
+ *(u64 *)qp->kern_qp.sq_dbrec = db_data;
writeq(db_data, qp->kern_qp.hw_sq_db);
}
@@ -557,7 +557,7 @@ static int erdma_post_recv_one(struct erdma_qp *qp,
return -EINVAL;
}
- *(u64 *)qp->kern_qp.rq_db_info = *(u64 *)rqe;
+ *(u64 *)qp->kern_qp.rq_dbrec = *(u64 *)rqe;
writeq(*(u64 *)rqe, qp->kern_qp.hw_rq_db);
qp->kern_qp.rwr_tbl[qp->kern_qp.rq_pi & (qp->attrs.rq_size - 1)] =
diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c
index 517676fbb8b1..51d619edb6c5 100644
--- a/drivers/infiniband/hw/erdma/erdma_verbs.c
+++ b/drivers/infiniband/hw/erdma/erdma_verbs.c
@@ -19,6 +19,23 @@
#include "erdma_cm.h"
#include "erdma_verbs.h"
+static void assemble_qbuf_mtt_for_cmd(struct erdma_mem *mem, u32 *cfg,
+ u64 *addr0, u64 *addr1)
+{
+ struct erdma_mtt *mtt = mem->mtt;
+
+ if (mem->mtt_nents > ERDMA_MAX_INLINE_MTT_ENTRIES) {
+ *addr0 = mtt->buf_dma;
+ *cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_LEVEL_MASK,
+ ERDMA_MR_MTT_1LEVEL);
+ } else {
+ *addr0 = mtt->buf[0];
+ memcpy(addr1, mtt->buf + 1, MTT_SIZE(mem->mtt_nents - 1));
+ *cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_LEVEL_MASK,
+ ERDMA_MR_MTT_0LEVEL);
+ }
+}
+
static int create_qp_cmd(struct erdma_ucontext *uctx, struct erdma_qp *qp)
{
struct erdma_dev *dev = to_edev(qp->ibqp.device);
@@ -53,47 +70,43 @@ static int create_qp_cmd(struct erdma_ucontext *uctx, struct erdma_qp *qp)
req.sq_mtt_cfg =
FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_OFFSET_MASK, 0) |
FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK, 1) |
- FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK,
- ERDMA_MR_INLINE_MTT);
+ FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_LEVEL_MASK,
+ ERDMA_MR_MTT_0LEVEL);
req.rq_mtt_cfg = req.sq_mtt_cfg;
req.rq_buf_addr = qp->kern_qp.rq_buf_dma_addr;
req.sq_buf_addr = qp->kern_qp.sq_buf_dma_addr;
- req.sq_db_info_dma_addr = qp->kern_qp.sq_buf_dma_addr +
- (qp->attrs.sq_size << SQEBB_SHIFT);
- req.rq_db_info_dma_addr = qp->kern_qp.rq_buf_dma_addr +
- (qp->attrs.rq_size << RQE_SHIFT);
+ req.sq_dbrec_dma = qp->kern_qp.sq_dbrec_dma;
+ req.rq_dbrec_dma = qp->kern_qp.rq_dbrec_dma;
} else {
user_qp = &qp->user_qp;
req.sq_cqn_mtt_cfg = FIELD_PREP(
ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
- ilog2(user_qp->sq_mtt.page_size) - ERDMA_HW_PAGE_SHIFT);
+ ilog2(user_qp->sq_mem.page_size) - ERDMA_HW_PAGE_SHIFT);
req.sq_cqn_mtt_cfg |=
FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->scq->cqn);
req.rq_cqn_mtt_cfg = FIELD_PREP(
ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK,
- ilog2(user_qp->rq_mtt.page_size) - ERDMA_HW_PAGE_SHIFT);
+ ilog2(user_qp->rq_mem.page_size) - ERDMA_HW_PAGE_SHIFT);
req.rq_cqn_mtt_cfg |=
FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->rcq->cqn);
- req.sq_mtt_cfg = user_qp->sq_mtt.page_offset;
+ req.sq_mtt_cfg = user_qp->sq_mem.page_offset;
req.sq_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK,
- user_qp->sq_mtt.mtt_nents) |
- FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK,
- user_qp->sq_mtt.mtt_type);
+ user_qp->sq_mem.mtt_nents);
- req.rq_mtt_cfg = user_qp->rq_mtt.page_offset;
+ req.rq_mtt_cfg = user_qp->rq_mem.page_offset;
req.rq_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK,
- user_qp->rq_mtt.mtt_nents) |
- FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK,
- user_qp->rq_mtt.mtt_type);
+ user_qp->rq_mem.mtt_nents);
- req.sq_buf_addr = user_qp->sq_mtt.mtt_entry[0];
- req.rq_buf_addr = user_qp->rq_mtt.mtt_entry[0];
+ assemble_qbuf_mtt_for_cmd(&user_qp->sq_mem, &req.sq_mtt_cfg,
+ &req.sq_buf_addr, req.sq_mtt_entry);
+ assemble_qbuf_mtt_for_cmd(&user_qp->rq_mem, &req.rq_mtt_cfg,
+ &req.rq_buf_addr, req.rq_mtt_entry);
- req.sq_db_info_dma_addr = user_qp->sq_db_info_dma_addr;
- req.rq_db_info_dma_addr = user_qp->rq_db_info_dma_addr;
+ req.sq_dbrec_dma = user_qp->sq_dbrec_dma;
+ req.rq_dbrec_dma = user_qp->rq_dbrec_dma;
if (uctx->ext_db.enable) {
req.sq_cqn_mtt_cfg |=
@@ -117,13 +130,26 @@ static int create_qp_cmd(struct erdma_ucontext *uctx, struct erdma_qp *qp)
static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr)
{
- struct erdma_cmdq_reg_mr_req req;
struct erdma_pd *pd = to_epd(mr->ibmr.pd);
- u64 *phy_addr;
- int i;
+ u32 mtt_level = ERDMA_MR_MTT_0LEVEL;
+ struct erdma_cmdq_reg_mr_req req;
erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, CMDQ_OPCODE_REG_MR);
+ if (mr->type == ERDMA_MR_TYPE_FRMR ||
+ mr->mem.page_cnt > ERDMA_MAX_INLINE_MTT_ENTRIES) {
+ if (mr->mem.mtt->continuous) {
+ req.phy_addr[0] = mr->mem.mtt->buf_dma;
+ mtt_level = ERDMA_MR_MTT_1LEVEL;
+ } else {
+ req.phy_addr[0] = sg_dma_address(mr->mem.mtt->sglist);
+ mtt_level = mr->mem.mtt->level;
+ }
+ } else if (mr->type != ERDMA_MR_TYPE_DMA) {
+ memcpy(req.phy_addr, mr->mem.mtt->buf,
+ MTT_SIZE(mr->mem.page_cnt));
+ }
+
req.cfg0 = FIELD_PREP(ERDMA_CMD_MR_VALID_MASK, mr->valid) |
FIELD_PREP(ERDMA_CMD_MR_KEY_MASK, mr->ibmr.lkey & 0xFF) |
FIELD_PREP(ERDMA_CMD_MR_MPT_IDX_MASK, mr->ibmr.lkey >> 8);
@@ -132,7 +158,7 @@ static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr)
FIELD_PREP(ERDMA_CMD_REGMR_RIGHT_MASK, mr->access);
req.cfg2 = FIELD_PREP(ERDMA_CMD_REGMR_PAGESIZE_MASK,
ilog2(mr->mem.page_size)) |
- FIELD_PREP(ERDMA_CMD_REGMR_MTT_TYPE_MASK, mr->mem.mtt_type) |
+ FIELD_PREP(ERDMA_CMD_REGMR_MTT_LEVEL_MASK, mtt_level) |
FIELD_PREP(ERDMA_CMD_REGMR_MTT_CNT_MASK, mr->mem.page_cnt);
if (mr->type == ERDMA_MR_TYPE_DMA)
@@ -143,14 +169,12 @@ static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr)
req.size = mr->mem.len;
}
- if (mr->type == ERDMA_MR_TYPE_FRMR ||
- mr->mem.mtt_type == ERDMA_MR_INDIRECT_MTT) {
- phy_addr = req.phy_addr;
- *phy_addr = mr->mem.mtt_entry[0];
- } else {
- phy_addr = req.phy_addr;
- for (i = 0; i < mr->mem.mtt_nents; i++)
- *phy_addr++ = mr->mem.mtt_entry[i];
+ if (!mr->mem.mtt->continuous && mr->mem.mtt->level > 1) {
+ req.cfg0 |= FIELD_PREP(ERDMA_CMD_MR_VERSION_MASK, 1);
+ req.cfg2 |= FIELD_PREP(ERDMA_CMD_REGMR_MTT_PAGESIZE_MASK,
+ PAGE_SHIFT - ERDMA_HW_PAGE_SHIFT);
+ req.size_h = upper_32_bits(mr->mem.len);
+ req.mtt_cnt_h = mr->mem.page_cnt >> 20;
}
post_cmd:
@@ -161,7 +185,7 @@ static int create_cq_cmd(struct erdma_ucontext *uctx, struct erdma_cq *cq)
{
struct erdma_dev *dev = to_edev(cq->ibcq.device);
struct erdma_cmdq_create_cq_req req;
- struct erdma_mem *mtt;
+ struct erdma_mem *mem;
u32 page_size;
erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
@@ -179,31 +203,34 @@ static int create_cq_cmd(struct erdma_ucontext *uctx, struct erdma_cq *cq)
req.qbuf_addr_h = upper_32_bits(cq->kern_cq.qbuf_dma_addr);
req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK, 1) |
- FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_TYPE_MASK,
- ERDMA_MR_INLINE_MTT);
+ FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_LEVEL_MASK,
+ ERDMA_MR_MTT_0LEVEL);
req.first_page_offset = 0;
- req.cq_db_info_addr =
- cq->kern_cq.qbuf_dma_addr + (cq->depth << CQE_SHIFT);
+ req.cq_dbrec_dma = cq->kern_cq.dbrec_dma;
} else {
- mtt = &cq->user_cq.qbuf_mtt;
+ mem = &cq->user_cq.qbuf_mem;
req.cfg0 |=
FIELD_PREP(ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK,
- ilog2(mtt->page_size) - ERDMA_HW_PAGE_SHIFT);
- if (mtt->mtt_nents == 1) {
- req.qbuf_addr_l = lower_32_bits(*(u64 *)mtt->mtt_buf);
- req.qbuf_addr_h = upper_32_bits(*(u64 *)mtt->mtt_buf);
+ ilog2(mem->page_size) - ERDMA_HW_PAGE_SHIFT);
+ if (mem->mtt_nents == 1) {
+ req.qbuf_addr_l = lower_32_bits(mem->mtt->buf[0]);
+ req.qbuf_addr_h = upper_32_bits(mem->mtt->buf[0]);
+ req.cfg1 |=
+ FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_LEVEL_MASK,
+ ERDMA_MR_MTT_0LEVEL);
} else {
- req.qbuf_addr_l = lower_32_bits(mtt->mtt_entry[0]);
- req.qbuf_addr_h = upper_32_bits(mtt->mtt_entry[0]);
+ req.qbuf_addr_l = lower_32_bits(mem->mtt->buf_dma);
+ req.qbuf_addr_h = upper_32_bits(mem->mtt->buf_dma);
+ req.cfg1 |=
+ FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_LEVEL_MASK,
+ ERDMA_MR_MTT_1LEVEL);
}
req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK,
- mtt->mtt_nents);
- req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_TYPE_MASK,
- mtt->mtt_type);
+ mem->mtt_nents);
- req.first_page_offset = mtt->page_offset;
- req.cq_db_info_addr = cq->user_cq.db_info_dma_addr;
+ req.first_page_offset = mem->page_offset;
+ req.cq_dbrec_dma = cq->user_cq.dbrec_dma;
if (uctx->ext_db.enable) {
req.cfg1 |= FIELD_PREP(
@@ -452,16 +479,24 @@ static void free_kernel_qp(struct erdma_qp *qp)
vfree(qp->kern_qp.rwr_tbl);
if (qp->kern_qp.sq_buf)
- dma_free_coherent(
- &dev->pdev->dev,
- WARPPED_BUFSIZE(qp->attrs.sq_size << SQEBB_SHIFT),
- qp->kern_qp.sq_buf, qp->kern_qp.sq_buf_dma_addr);
+ dma_free_coherent(&dev->pdev->dev,
+ qp->attrs.sq_size << SQEBB_SHIFT,
+ qp->kern_qp.sq_buf,
+ qp->kern_qp.sq_buf_dma_addr);
+
+ if (qp->kern_qp.sq_dbrec)
+ dma_pool_free(dev->db_pool, qp->kern_qp.sq_dbrec,
+ qp->kern_qp.sq_dbrec_dma);
if (qp->kern_qp.rq_buf)
- dma_free_coherent(
- &dev->pdev->dev,
- WARPPED_BUFSIZE(qp->attrs.rq_size << RQE_SHIFT),
- qp->kern_qp.rq_buf, qp->kern_qp.rq_buf_dma_addr);
+ dma_free_coherent(&dev->pdev->dev,
+ qp->attrs.rq_size << RQE_SHIFT,
+ qp->kern_qp.rq_buf,
+ qp->kern_qp.rq_buf_dma_addr);
+
+ if (qp->kern_qp.rq_dbrec)
+ dma_pool_free(dev->db_pool, qp->kern_qp.rq_dbrec,
+ qp->kern_qp.rq_dbrec_dma);
}
static int init_kernel_qp(struct erdma_dev *dev, struct erdma_qp *qp,
@@ -481,25 +516,32 @@ static int init_kernel_qp(struct erdma_dev *dev, struct erdma_qp *qp,
dev->func_bar + (ERDMA_SDB_SHARED_PAGE_INDEX << PAGE_SHIFT);
kqp->hw_rq_db = dev->func_bar + ERDMA_BAR_RQDB_SPACE_OFFSET;
- kqp->swr_tbl = vmalloc(qp->attrs.sq_size * sizeof(u64));
- kqp->rwr_tbl = vmalloc(qp->attrs.rq_size * sizeof(u64));
+ kqp->swr_tbl = vmalloc_array(qp->attrs.sq_size, sizeof(u64));
+ kqp->rwr_tbl = vmalloc_array(qp->attrs.rq_size, sizeof(u64));
if (!kqp->swr_tbl || !kqp->rwr_tbl)
goto err_out;
- size = (qp->attrs.sq_size << SQEBB_SHIFT) + ERDMA_EXTRA_BUFFER_SIZE;
+ size = qp->attrs.sq_size << SQEBB_SHIFT;
kqp->sq_buf = dma_alloc_coherent(&dev->pdev->dev, size,
&kqp->sq_buf_dma_addr, GFP_KERNEL);
if (!kqp->sq_buf)
goto err_out;
- size = (qp->attrs.rq_size << RQE_SHIFT) + ERDMA_EXTRA_BUFFER_SIZE;
+ kqp->sq_dbrec =
+ dma_pool_zalloc(dev->db_pool, GFP_KERNEL, &kqp->sq_dbrec_dma);
+ if (!kqp->sq_dbrec)
+ goto err_out;
+
+ size = qp->attrs.rq_size << RQE_SHIFT;
kqp->rq_buf = dma_alloc_coherent(&dev->pdev->dev, size,
&kqp->rq_buf_dma_addr, GFP_KERNEL);
if (!kqp->rq_buf)
goto err_out;
- kqp->sq_db_info = kqp->sq_buf + (qp->attrs.sq_size << SQEBB_SHIFT);
- kqp->rq_db_info = kqp->rq_buf + (qp->attrs.rq_size << RQE_SHIFT);
+ kqp->rq_dbrec =
+ dma_pool_zalloc(dev->db_pool, GFP_KERNEL, &kqp->rq_dbrec_dma);
+ if (!kqp->rq_dbrec)
+ goto err_out;
return 0;
@@ -508,12 +550,223 @@ err_out:
return -ENOMEM;
}
+static void erdma_fill_bottom_mtt(struct erdma_dev *dev, struct erdma_mem *mem)
+{
+ struct erdma_mtt *mtt = mem->mtt;
+ struct ib_block_iter biter;
+ u32 idx = 0;
+
+ while (mtt->low_level)
+ mtt = mtt->low_level;
+
+ rdma_umem_for_each_dma_block(mem->umem, &biter, mem->page_size)
+ mtt->buf[idx++] = rdma_block_iter_dma_address(&biter);
+}
+
+static struct erdma_mtt *erdma_create_cont_mtt(struct erdma_dev *dev,
+ size_t size)
+{
+ struct erdma_mtt *mtt;
+
+ mtt = kzalloc(sizeof(*mtt), GFP_KERNEL);
+ if (!mtt)
+ return ERR_PTR(-ENOMEM);
+
+ mtt->size = size;
+ mtt->buf = kzalloc(mtt->size, GFP_KERNEL);
+ if (!mtt->buf)
+ goto err_free_mtt;
+
+ mtt->continuous = true;
+ mtt->buf_dma = dma_map_single(&dev->pdev->dev, mtt->buf, mtt->size,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(&dev->pdev->dev, mtt->buf_dma))
+ goto err_free_mtt_buf;
+
+ return mtt;
+
+err_free_mtt_buf:
+ kfree(mtt->buf);
+
+err_free_mtt:
+ kfree(mtt);
+
+ return ERR_PTR(-ENOMEM);
+}
+
+static void erdma_destroy_mtt_buf_sg(struct erdma_dev *dev,
+ struct erdma_mtt *mtt)
+{
+ dma_unmap_sg(&dev->pdev->dev, mtt->sglist, mtt->nsg, DMA_TO_DEVICE);
+ vfree(mtt->sglist);
+}
+
+static void erdma_destroy_scatter_mtt(struct erdma_dev *dev,
+ struct erdma_mtt *mtt)
+{
+ erdma_destroy_mtt_buf_sg(dev, mtt);
+ vfree(mtt->buf);
+ kfree(mtt);
+}
+
+static void erdma_init_middle_mtt(struct erdma_mtt *mtt,
+ struct erdma_mtt *low_mtt)
+{
+ struct scatterlist *sg;
+ u32 idx = 0, i;
+
+ for_each_sg(low_mtt->sglist, sg, low_mtt->nsg, i)
+ mtt->buf[idx++] = sg_dma_address(sg);
+}
+
+static int erdma_create_mtt_buf_sg(struct erdma_dev *dev, struct erdma_mtt *mtt)
+{
+ struct scatterlist *sglist;
+ void *buf = mtt->buf;
+ u32 npages, i, nsg;
+ struct page *pg;
+
+ /* Failed if buf is not page aligned */
+ if ((uintptr_t)buf & ~PAGE_MASK)
+ return -EINVAL;
+
+ npages = DIV_ROUND_UP(mtt->size, PAGE_SIZE);
+ sglist = vzalloc(npages * sizeof(*sglist));
+ if (!sglist)
+ return -ENOMEM;
+
+ sg_init_table(sglist, npages);
+ for (i = 0; i < npages; i++) {
+ pg = vmalloc_to_page(buf);
+ if (!pg)
+ goto err;
+ sg_set_page(&sglist[i], pg, PAGE_SIZE, 0);
+ buf += PAGE_SIZE;
+ }
+
+ nsg = dma_map_sg(&dev->pdev->dev, sglist, npages, DMA_TO_DEVICE);
+ if (!nsg)
+ goto err;
+
+ mtt->sglist = sglist;
+ mtt->nsg = nsg;
+
+ return 0;
+err:
+ vfree(sglist);
+
+ return -ENOMEM;
+}
+
+static struct erdma_mtt *erdma_create_scatter_mtt(struct erdma_dev *dev,
+ size_t size)
+{
+ struct erdma_mtt *mtt;
+ int ret = -ENOMEM;
+
+ mtt = kzalloc(sizeof(*mtt), GFP_KERNEL);
+ if (!mtt)
+ return ERR_PTR(-ENOMEM);
+
+ mtt->size = ALIGN(size, PAGE_SIZE);
+ mtt->buf = vzalloc(mtt->size);
+ mtt->continuous = false;
+ if (!mtt->buf)
+ goto err_free_mtt;
+
+ ret = erdma_create_mtt_buf_sg(dev, mtt);
+ if (ret)
+ goto err_free_mtt_buf;
+
+ ibdev_dbg(&dev->ibdev, "create scatter mtt, size:%lu, nsg:%u\n",
+ mtt->size, mtt->nsg);
+
+ return mtt;
+
+err_free_mtt_buf:
+ vfree(mtt->buf);
+
+err_free_mtt:
+ kfree(mtt);
+
+ return ERR_PTR(ret);
+}
+
+static struct erdma_mtt *erdma_create_mtt(struct erdma_dev *dev, size_t size,
+ bool force_continuous)
+{
+ struct erdma_mtt *mtt, *tmp_mtt;
+ int ret, level = 0;
+
+ ibdev_dbg(&dev->ibdev, "create_mtt, size:%lu, force cont:%d\n", size,
+ force_continuous);
+
+ if (!(dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_MTT_VA))
+ force_continuous = true;
+
+ if (force_continuous)
+ return erdma_create_cont_mtt(dev, size);
+
+ mtt = erdma_create_scatter_mtt(dev, size);
+ if (IS_ERR(mtt))
+ return mtt;
+ level = 1;
+
+ /* convergence the mtt table. */
+ while (mtt->nsg != 1 && level <= 3) {
+ tmp_mtt = erdma_create_scatter_mtt(dev, MTT_SIZE(mtt->nsg));
+ if (IS_ERR(tmp_mtt)) {
+ ret = PTR_ERR(tmp_mtt);
+ goto err_free_mtt;
+ }
+ erdma_init_middle_mtt(tmp_mtt, mtt);
+ tmp_mtt->low_level = mtt;
+ mtt = tmp_mtt;
+ level++;
+ }
+
+ if (level > 3) {
+ ret = -ENOMEM;
+ goto err_free_mtt;
+ }
+
+ mtt->level = level;
+ ibdev_dbg(&dev->ibdev, "top mtt: level:%d, dma_addr 0x%llx\n",
+ mtt->level, mtt->sglist[0].dma_address);
+
+ return mtt;
+err_free_mtt:
+ while (mtt) {
+ tmp_mtt = mtt->low_level;
+ erdma_destroy_scatter_mtt(dev, mtt);
+ mtt = tmp_mtt;
+ }
+
+ return ERR_PTR(ret);
+}
+
+static void erdma_destroy_mtt(struct erdma_dev *dev, struct erdma_mtt *mtt)
+{
+ struct erdma_mtt *tmp_mtt;
+
+ if (mtt->continuous) {
+ dma_unmap_single(&dev->pdev->dev, mtt->buf_dma, mtt->size,
+ DMA_TO_DEVICE);
+ kfree(mtt->buf);
+ kfree(mtt);
+ } else {
+ while (mtt) {
+ tmp_mtt = mtt->low_level;
+ erdma_destroy_scatter_mtt(dev, mtt);
+ mtt = tmp_mtt;
+ }
+ }
+}
+
static int get_mtt_entries(struct erdma_dev *dev, struct erdma_mem *mem,
u64 start, u64 len, int access, u64 virt,
- unsigned long req_page_size, u8 force_indirect_mtt)
+ unsigned long req_page_size, bool force_continuous)
{
- struct ib_block_iter biter;
- uint64_t *phy_addr = NULL;
int ret = 0;
mem->umem = ib_umem_get(&dev->ibdev, start, len, access);
@@ -529,38 +782,14 @@ static int get_mtt_entries(struct erdma_dev *dev, struct erdma_mem *mem,
mem->page_offset = start & (mem->page_size - 1);
mem->mtt_nents = ib_umem_num_dma_blocks(mem->umem, mem->page_size);
mem->page_cnt = mem->mtt_nents;
-
- if (mem->page_cnt > ERDMA_MAX_INLINE_MTT_ENTRIES ||
- force_indirect_mtt) {
- mem->mtt_type = ERDMA_MR_INDIRECT_MTT;
- mem->mtt_buf =
- alloc_pages_exact(MTT_SIZE(mem->page_cnt), GFP_KERNEL);
- if (!mem->mtt_buf) {
- ret = -ENOMEM;
- goto error_ret;
- }
- phy_addr = mem->mtt_buf;
- } else {
- mem->mtt_type = ERDMA_MR_INLINE_MTT;
- phy_addr = mem->mtt_entry;
- }
-
- rdma_umem_for_each_dma_block(mem->umem, &biter, mem->page_size) {
- *phy_addr = rdma_block_iter_dma_address(&biter);
- phy_addr++;
+ mem->mtt = erdma_create_mtt(dev, MTT_SIZE(mem->page_cnt),
+ force_continuous);
+ if (IS_ERR(mem->mtt)) {
+ ret = PTR_ERR(mem->mtt);
+ goto error_ret;
}
- if (mem->mtt_type == ERDMA_MR_INDIRECT_MTT) {
- mem->mtt_entry[0] =
- dma_map_single(&dev->pdev->dev, mem->mtt_buf,
- MTT_SIZE(mem->page_cnt), DMA_TO_DEVICE);
- if (dma_mapping_error(&dev->pdev->dev, mem->mtt_entry[0])) {
- free_pages_exact(mem->mtt_buf, MTT_SIZE(mem->page_cnt));
- mem->mtt_buf = NULL;
- ret = -ENOMEM;
- goto error_ret;
- }
- }
+ erdma_fill_bottom_mtt(dev, mem);
return 0;
@@ -575,11 +804,8 @@ error_ret:
static void put_mtt_entries(struct erdma_dev *dev, struct erdma_mem *mem)
{
- if (mem->mtt_buf) {
- dma_unmap_single(&dev->pdev->dev, mem->mtt_entry[0],
- MTT_SIZE(mem->page_cnt), DMA_TO_DEVICE);
- free_pages_exact(mem->mtt_buf, MTT_SIZE(mem->page_cnt));
- }
+ if (mem->mtt)
+ erdma_destroy_mtt(dev, mem->mtt);
if (mem->umem) {
ib_umem_release(mem->umem);
@@ -650,9 +876,9 @@ erdma_unmap_user_dbrecords(struct erdma_ucontext *ctx,
}
static int init_user_qp(struct erdma_qp *qp, struct erdma_ucontext *uctx,
- u64 va, u32 len, u64 db_info_va)
+ u64 va, u32 len, u64 dbrec_va)
{
- dma_addr_t db_info_dma_addr;
+ dma_addr_t dbrec_dma;
u32 rq_offset;
int ret;
@@ -660,45 +886,45 @@ static int init_user_qp(struct erdma_qp *qp, struct erdma_ucontext *uctx,
qp->attrs.rq_size * RQE_SIZE))
return -EINVAL;
- ret = get_mtt_entries(qp->dev, &qp->user_qp.sq_mtt, va,
+ ret = get_mtt_entries(qp->dev, &qp->user_qp.sq_mem, va,
qp->attrs.sq_size << SQEBB_SHIFT, 0, va,
- (SZ_1M - SZ_4K), 1);
+ (SZ_1M - SZ_4K), true);
if (ret)
return ret;
rq_offset = ALIGN(qp->attrs.sq_size << SQEBB_SHIFT, ERDMA_HW_PAGE_SIZE);
qp->user_qp.rq_offset = rq_offset;
- ret = get_mtt_entries(qp->dev, &qp->user_qp.rq_mtt, va + rq_offset,
+ ret = get_mtt_entries(qp->dev, &qp->user_qp.rq_mem, va + rq_offset,
qp->attrs.rq_size << RQE_SHIFT, 0, va + rq_offset,
- (SZ_1M - SZ_4K), 1);
+ (SZ_1M - SZ_4K), true);
if (ret)
goto put_sq_mtt;
- ret = erdma_map_user_dbrecords(uctx, db_info_va,
+ ret = erdma_map_user_dbrecords(uctx, dbrec_va,
&qp->user_qp.user_dbr_page,
- &db_info_dma_addr);
+ &dbrec_dma);
if (ret)
goto put_rq_mtt;
- qp->user_qp.sq_db_info_dma_addr = db_info_dma_addr;
- qp->user_qp.rq_db_info_dma_addr = db_info_dma_addr + ERDMA_DB_SIZE;
+ qp->user_qp.sq_dbrec_dma = dbrec_dma;
+ qp->user_qp.rq_dbrec_dma = dbrec_dma + ERDMA_DB_SIZE;
return 0;
put_rq_mtt:
- put_mtt_entries(qp->dev, &qp->user_qp.rq_mtt);
+ put_mtt_entries(qp->dev, &qp->user_qp.rq_mem);
put_sq_mtt:
- put_mtt_entries(qp->dev, &qp->user_qp.sq_mtt);
+ put_mtt_entries(qp->dev, &qp->user_qp.sq_mem);
return ret;
}
static void free_user_qp(struct erdma_qp *qp, struct erdma_ucontext *uctx)
{
- put_mtt_entries(qp->dev, &qp->user_qp.sq_mtt);
- put_mtt_entries(qp->dev, &qp->user_qp.rq_mtt);
+ put_mtt_entries(qp->dev, &qp->user_qp.sq_mem);
+ put_mtt_entries(qp->dev, &qp->user_qp.rq_mem);
erdma_unmap_user_dbrecords(uctx, &qp->user_qp.user_dbr_page);
}
@@ -875,33 +1101,20 @@ struct ib_mr *erdma_ib_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
mr->mem.page_size = PAGE_SIZE; /* update it later. */
mr->mem.page_cnt = max_num_sg;
- mr->mem.mtt_type = ERDMA_MR_INDIRECT_MTT;
- mr->mem.mtt_buf =
- alloc_pages_exact(MTT_SIZE(mr->mem.page_cnt), GFP_KERNEL);
- if (!mr->mem.mtt_buf) {
- ret = -ENOMEM;
+ mr->mem.mtt = erdma_create_mtt(dev, MTT_SIZE(max_num_sg), true);
+ if (IS_ERR(mr->mem.mtt)) {
+ ret = PTR_ERR(mr->mem.mtt);
goto out_remove_stag;
}
- mr->mem.mtt_entry[0] =
- dma_map_single(&dev->pdev->dev, mr->mem.mtt_buf,
- MTT_SIZE(mr->mem.page_cnt), DMA_TO_DEVICE);
- if (dma_mapping_error(&dev->pdev->dev, mr->mem.mtt_entry[0])) {
- ret = -ENOMEM;
- goto out_free_mtt;
- }
-
ret = regmr_cmd(dev, mr);
if (ret)
- goto out_dma_unmap;
+ goto out_destroy_mtt;
return &mr->ibmr;
-out_dma_unmap:
- dma_unmap_single(&dev->pdev->dev, mr->mem.mtt_entry[0],
- MTT_SIZE(mr->mem.page_cnt), DMA_TO_DEVICE);
-out_free_mtt:
- free_pages_exact(mr->mem.mtt_buf, MTT_SIZE(mr->mem.page_cnt));
+out_destroy_mtt:
+ erdma_destroy_mtt(dev, mr->mem.mtt);
out_remove_stag:
erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX],
@@ -920,7 +1133,7 @@ static int erdma_set_page(struct ib_mr *ibmr, u64 addr)
if (mr->mem.mtt_nents >= mr->mem.page_cnt)
return -1;
- *((u64 *)mr->mem.mtt_buf + mr->mem.mtt_nents) = addr;
+ mr->mem.mtt->buf[mr->mem.mtt_nents] = addr;
mr->mem.mtt_nents++;
return 0;
@@ -956,7 +1169,7 @@ struct ib_mr *erdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
return ERR_PTR(-ENOMEM);
ret = get_mtt_entries(dev, &mr->mem, start, len, access, virt,
- SZ_2G - SZ_4K, 0);
+ SZ_2G - SZ_4K, false);
if (ret)
goto err_out_free;
@@ -1036,12 +1249,13 @@ int erdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
return err;
if (rdma_is_kernel_res(&cq->ibcq.res)) {
- dma_free_coherent(&dev->pdev->dev,
- WARPPED_BUFSIZE(cq->depth << CQE_SHIFT),
+ dma_free_coherent(&dev->pdev->dev, cq->depth << CQE_SHIFT,
cq->kern_cq.qbuf, cq->kern_cq.qbuf_dma_addr);
+ dma_pool_free(dev->db_pool, cq->kern_cq.dbrec,
+ cq->kern_cq.dbrec_dma);
} else {
erdma_unmap_user_dbrecords(ctx, &cq->user_cq.user_dbr_page);
- put_mtt_entries(dev, &cq->user_cq.qbuf_mtt);
+ put_mtt_entries(dev, &cq->user_cq.qbuf_mem);
}
xa_erase(&dev->cq_xa, cq->cqn);
@@ -1078,19 +1292,10 @@ int erdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
wait_for_completion(&qp->safe_free);
if (rdma_is_kernel_res(&qp->ibqp.res)) {
- vfree(qp->kern_qp.swr_tbl);
- vfree(qp->kern_qp.rwr_tbl);
- dma_free_coherent(
- &dev->pdev->dev,
- WARPPED_BUFSIZE(qp->attrs.rq_size << RQE_SHIFT),
- qp->kern_qp.rq_buf, qp->kern_qp.rq_buf_dma_addr);
- dma_free_coherent(
- &dev->pdev->dev,
- WARPPED_BUFSIZE(qp->attrs.sq_size << SQEBB_SHIFT),
- qp->kern_qp.sq_buf, qp->kern_qp.sq_buf_dma_addr);
+ free_kernel_qp(qp);
} else {
- put_mtt_entries(dev, &qp->user_qp.sq_mtt);
- put_mtt_entries(dev, &qp->user_qp.rq_mtt);
+ put_mtt_entries(dev, &qp->user_qp.sq_mem);
+ put_mtt_entries(dev, &qp->user_qp.rq_mem);
erdma_unmap_user_dbrecords(ctx, &qp->user_qp.user_dbr_page);
}
@@ -1339,11 +1544,31 @@ int erdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
return ret;
}
+static enum ib_qp_state query_qp_state(struct erdma_qp *qp)
+{
+ switch (qp->attrs.state) {
+ case ERDMA_QP_STATE_IDLE:
+ return IB_QPS_INIT;
+ case ERDMA_QP_STATE_RTR:
+ return IB_QPS_RTR;
+ case ERDMA_QP_STATE_RTS:
+ return IB_QPS_RTS;
+ case ERDMA_QP_STATE_CLOSING:
+ return IB_QPS_ERR;
+ case ERDMA_QP_STATE_TERMINATE:
+ return IB_QPS_ERR;
+ case ERDMA_QP_STATE_ERROR:
+ return IB_QPS_ERR;
+ default:
+ return IB_QPS_ERR;
+ }
+}
+
int erdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
{
- struct erdma_qp *qp;
struct erdma_dev *dev;
+ struct erdma_qp *qp;
if (ibqp && qp_attr && qp_init_attr) {
qp = to_eqp(ibqp);
@@ -1370,6 +1595,9 @@ int erdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
qp_init_attr->cap = qp_attr->cap;
+ qp_attr->qp_state = query_qp_state(qp);
+ qp_attr->cur_qp_state = query_qp_state(qp);
+
return 0;
}
@@ -1379,17 +1607,17 @@ static int erdma_init_user_cq(struct erdma_ucontext *ctx, struct erdma_cq *cq,
int ret;
struct erdma_dev *dev = to_edev(cq->ibcq.device);
- ret = get_mtt_entries(dev, &cq->user_cq.qbuf_mtt, ureq->qbuf_va,
+ ret = get_mtt_entries(dev, &cq->user_cq.qbuf_mem, ureq->qbuf_va,
ureq->qbuf_len, 0, ureq->qbuf_va, SZ_64M - SZ_4K,
- 1);
+ true);
if (ret)
return ret;
ret = erdma_map_user_dbrecords(ctx, ureq->db_record_va,
&cq->user_cq.user_dbr_page,
- &cq->user_cq.db_info_dma_addr);
+ &cq->user_cq.dbrec_dma);
if (ret)
- put_mtt_entries(dev, &cq->user_cq.qbuf_mtt);
+ put_mtt_entries(dev, &cq->user_cq.qbuf_mem);
return ret;
}
@@ -1399,24 +1627,33 @@ static int erdma_init_kernel_cq(struct erdma_cq *cq)
struct erdma_dev *dev = to_edev(cq->ibcq.device);
cq->kern_cq.qbuf =
- dma_alloc_coherent(&dev->pdev->dev,
- WARPPED_BUFSIZE(cq->depth << CQE_SHIFT),
+ dma_alloc_coherent(&dev->pdev->dev, cq->depth << CQE_SHIFT,
&cq->kern_cq.qbuf_dma_addr, GFP_KERNEL);
if (!cq->kern_cq.qbuf)
return -ENOMEM;
- cq->kern_cq.db_record =
- (u64 *)(cq->kern_cq.qbuf + (cq->depth << CQE_SHIFT));
+ cq->kern_cq.dbrec = dma_pool_zalloc(dev->db_pool, GFP_KERNEL,
+ &cq->kern_cq.dbrec_dma);
+ if (!cq->kern_cq.dbrec)
+ goto err_out;
+
spin_lock_init(&cq->kern_cq.lock);
/* use default cqdb addr */
cq->kern_cq.db = dev->func_bar + ERDMA_BAR_CQDB_SPACE_OFFSET;
return 0;
+
+err_out:
+ dma_free_coherent(&dev->pdev->dev, cq->depth << CQE_SHIFT,
+ cq->kern_cq.qbuf, cq->kern_cq.qbuf_dma_addr);
+
+ return -ENOMEM;
}
int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
- struct ib_udata *udata)
+ struct uverbs_attr_bundle *attrs)
{
+ struct ib_udata *udata = &attrs->driver_udata;
struct erdma_cq *cq = to_ecq(ibcq);
struct erdma_dev *dev = to_edev(ibcq->device);
unsigned int depth = attr->cqe;
@@ -1473,11 +1710,12 @@ int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
err_free_res:
if (!rdma_is_kernel_res(&ibcq->res)) {
erdma_unmap_user_dbrecords(ctx, &cq->user_cq.user_dbr_page);
- put_mtt_entries(dev, &cq->user_cq.qbuf_mtt);
+ put_mtt_entries(dev, &cq->user_cq.qbuf_mem);
} else {
- dma_free_coherent(&dev->pdev->dev,
- WARPPED_BUFSIZE(depth << CQE_SHIFT),
+ dma_free_coherent(&dev->pdev->dev, depth << CQE_SHIFT,
cq->kern_cq.qbuf, cq->kern_cq.qbuf_dma_addr);
+ dma_pool_free(dev->db_pool, cq->kern_cq.dbrec,
+ cq->kern_cq.dbrec_dma);
}
err_out_xa:
@@ -1486,6 +1724,10 @@ err_out_xa:
return ret;
}
+void erdma_disassociate_ucontext(struct ib_ucontext *ibcontext)
+{
+}
+
void erdma_set_mtu(struct erdma_dev *dev, u32 mtu)
{
struct erdma_cmdq_config_mtu_req req;
@@ -1507,3 +1749,93 @@ void erdma_port_event(struct erdma_dev *dev, enum ib_event_type reason)
ib_dispatch_event(&event);
}
+
+enum counters {
+ ERDMA_STATS_TX_REQS_CNT,
+ ERDMA_STATS_TX_PACKETS_CNT,
+ ERDMA_STATS_TX_BYTES_CNT,
+ ERDMA_STATS_TX_DISABLE_DROP_CNT,
+ ERDMA_STATS_TX_BPS_METER_DROP_CNT,
+ ERDMA_STATS_TX_PPS_METER_DROP_CNT,
+
+ ERDMA_STATS_RX_PACKETS_CNT,
+ ERDMA_STATS_RX_BYTES_CNT,
+ ERDMA_STATS_RX_DISABLE_DROP_CNT,
+ ERDMA_STATS_RX_BPS_METER_DROP_CNT,
+ ERDMA_STATS_RX_PPS_METER_DROP_CNT,
+
+ ERDMA_STATS_MAX
+};
+
+static const struct rdma_stat_desc erdma_descs[] = {
+ [ERDMA_STATS_TX_REQS_CNT].name = "tx_reqs_cnt",
+ [ERDMA_STATS_TX_PACKETS_CNT].name = "tx_packets_cnt",
+ [ERDMA_STATS_TX_BYTES_CNT].name = "tx_bytes_cnt",
+ [ERDMA_STATS_TX_DISABLE_DROP_CNT].name = "tx_disable_drop_cnt",
+ [ERDMA_STATS_TX_BPS_METER_DROP_CNT].name = "tx_bps_limit_drop_cnt",
+ [ERDMA_STATS_TX_PPS_METER_DROP_CNT].name = "tx_pps_limit_drop_cnt",
+ [ERDMA_STATS_RX_PACKETS_CNT].name = "rx_packets_cnt",
+ [ERDMA_STATS_RX_BYTES_CNT].name = "rx_bytes_cnt",
+ [ERDMA_STATS_RX_DISABLE_DROP_CNT].name = "rx_disable_drop_cnt",
+ [ERDMA_STATS_RX_BPS_METER_DROP_CNT].name = "rx_bps_limit_drop_cnt",
+ [ERDMA_STATS_RX_PPS_METER_DROP_CNT].name = "rx_pps_limit_drop_cnt",
+};
+
+struct rdma_hw_stats *erdma_alloc_hw_port_stats(struct ib_device *device,
+ u32 port_num)
+{
+ return rdma_alloc_hw_stats_struct(erdma_descs, ERDMA_STATS_MAX,
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+static int erdma_query_hw_stats(struct erdma_dev *dev,
+ struct rdma_hw_stats *stats)
+{
+ struct erdma_cmdq_query_stats_resp *resp;
+ struct erdma_cmdq_query_req req;
+ dma_addr_t dma_addr;
+ int err;
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON,
+ CMDQ_OPCODE_GET_STATS);
+
+ resp = dma_pool_zalloc(dev->resp_pool, GFP_KERNEL, &dma_addr);
+ if (!resp)
+ return -ENOMEM;
+
+ req.target_addr = dma_addr;
+ req.target_length = ERDMA_HW_RESP_SIZE;
+
+ err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
+ if (err)
+ goto out;
+
+ if (resp->hdr.magic != ERDMA_HW_RESP_MAGIC) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ memcpy(&stats->value[0], &resp->tx_req_cnt,
+ sizeof(u64) * stats->num_counters);
+
+out:
+ dma_pool_free(dev->resp_pool, resp, dma_addr);
+
+ return err;
+}
+
+int erdma_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+ u32 port, int index)
+{
+ struct erdma_dev *dev = to_edev(ibdev);
+ int ret;
+
+ if (port == 0)
+ return 0;
+
+ ret = erdma_query_hw_stats(dev, stats);
+ if (ret)
+ return ret;
+
+ return stats->num_counters;
+}
diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.h b/drivers/infiniband/hw/erdma/erdma_verbs.h
index 429fc3063f98..c998acd39a78 100644
--- a/drivers/infiniband/hw/erdma/erdma_verbs.h
+++ b/drivers/infiniband/hw/erdma/erdma_verbs.h
@@ -65,7 +65,7 @@ struct erdma_pd {
* MemoryRegion definition.
*/
#define ERDMA_MAX_INLINE_MTT_ENTRIES 4
-#define MTT_SIZE(mtt_cnt) (mtt_cnt << 3) /* per mtt takes 8 Bytes. */
+#define MTT_SIZE(mtt_cnt) ((mtt_cnt) << 3) /* per mtt entry takes 8 Bytes. */
#define ERDMA_MR_MAX_MTT_CNT 524288
#define ERDMA_MTT_ENTRY_SIZE 8
@@ -73,8 +73,8 @@ struct erdma_pd {
#define ERDMA_MR_TYPE_FRMR 1
#define ERDMA_MR_TYPE_DMA 2
-#define ERDMA_MR_INLINE_MTT 0
-#define ERDMA_MR_INDIRECT_MTT 1
+#define ERDMA_MR_MTT_0LEVEL 0
+#define ERDMA_MR_MTT_1LEVEL 1
#define ERDMA_MR_ACC_RA BIT(0)
#define ERDMA_MR_ACC_LR BIT(1)
@@ -90,10 +90,28 @@ static inline u8 to_erdma_access_flags(int access)
(access & IB_ACCESS_REMOTE_ATOMIC ? ERDMA_MR_ACC_RA : 0);
}
+/* Hierarchical storage structure for MTT entries */
+struct erdma_mtt {
+ u64 *buf;
+ size_t size;
+
+ bool continuous;
+ union {
+ dma_addr_t buf_dma;
+ struct {
+ struct scatterlist *sglist;
+ u32 nsg;
+ u32 level;
+ };
+ };
+
+ struct erdma_mtt *low_level;
+};
+
struct erdma_mem {
struct ib_umem *umem;
- void *mtt_buf;
- u32 mtt_type;
+ struct erdma_mtt *mtt;
+
u32 page_size;
u32 page_offset;
u32 page_cnt;
@@ -101,8 +119,6 @@ struct erdma_mem {
u64 va;
u64 len;
-
- u64 mtt_entry[ERDMA_MAX_INLINE_MTT_ENTRIES];
};
struct erdma_mr {
@@ -121,11 +137,11 @@ struct erdma_user_dbrecords_page {
};
struct erdma_uqp {
- struct erdma_mem sq_mtt;
- struct erdma_mem rq_mtt;
+ struct erdma_mem sq_mem;
+ struct erdma_mem rq_mem;
- dma_addr_t sq_db_info_dma_addr;
- dma_addr_t rq_db_info_dma_addr;
+ dma_addr_t sq_dbrec_dma;
+ dma_addr_t rq_dbrec_dma;
struct erdma_user_dbrecords_page *user_dbr_page;
@@ -151,8 +167,11 @@ struct erdma_kqp {
void *rq_buf;
dma_addr_t rq_buf_dma_addr;
- void *sq_db_info;
- void *rq_db_info;
+ void *sq_dbrec;
+ void *rq_dbrec;
+
+ dma_addr_t sq_dbrec_dma;
+ dma_addr_t rq_dbrec_dma;
u8 sig_all;
};
@@ -230,13 +249,14 @@ struct erdma_kcq_info {
spinlock_t lock;
u8 __iomem *db;
- u64 *db_record;
+ u64 *dbrec;
+ dma_addr_t dbrec_dma;
};
struct erdma_ucq_info {
- struct erdma_mem qbuf_mtt;
+ struct erdma_mem qbuf_mem;
struct erdma_user_dbrecords_page *user_dbr_page;
- dma_addr_t db_info_dma_addr;
+ dma_addr_t dbrec_dma;
};
struct erdma_cq {
@@ -309,7 +329,7 @@ int erdma_query_device(struct ib_device *dev, struct ib_device_attr *attr,
int erdma_get_port_immutable(struct ib_device *dev, u32 port,
struct ib_port_immutable *ib_port_immutable);
int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
- struct ib_udata *data);
+ struct uverbs_attr_bundle *attrs);
int erdma_query_port(struct ib_device *dev, u32 port,
struct ib_port_attr *attr);
int erdma_query_gid(struct ib_device *dev, u32 port, int idx,
@@ -324,6 +344,7 @@ int erdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int mask,
struct ib_udata *data);
int erdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata);
int erdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
+void erdma_disassociate_ucontext(struct ib_ucontext *ibcontext);
int erdma_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
struct ib_mr *erdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
u64 virt, int access, struct ib_udata *udata);
@@ -345,5 +366,9 @@ int erdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset);
void erdma_port_event(struct erdma_dev *dev, enum ib_event_type reason);
void erdma_set_mtu(struct erdma_dev *dev, u32 mtu);
+struct rdma_hw_stats *erdma_alloc_hw_port_stats(struct ib_device *device,
+ u32 port_num);
+int erdma_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+ u32 port, int index);
#endif
diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile
index 2e89ec10efed..5d977f363684 100644
--- a/drivers/infiniband/hw/hfi1/Makefile
+++ b/drivers/infiniband/hw/hfi1/Makefile
@@ -31,6 +31,7 @@ hfi1-y := \
netdev_rx.o \
opfn.o \
pcie.o \
+ pin_system.o \
pio.o \
pio_copy.o \
platform.o \
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index 77ee77d4000f..7ead8746b79b 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015 - 2020 Intel Corporation.
*/
@@ -230,11 +230,9 @@ static void node_affinity_add_tail(struct hfi1_affinity_node *entry)
/* It must be called with node_affinity.lock held */
static struct hfi1_affinity_node *node_affinity_lookup(int node)
{
- struct list_head *pos;
struct hfi1_affinity_node *entry;
- list_for_each(pos, &node_affinity.list) {
- entry = list_entry(pos, struct hfi1_affinity_node, list);
+ list_for_each_entry(entry, &node_affinity.list, list) {
if (entry->node == node)
return entry;
}
diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h
index 00854f21787f..ffdd0d571c7a 100644
--- a/drivers/infiniband/hw/hfi1/affinity.h
+++ b/drivers/infiniband/hw/hfi1/affinity.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015 - 2020 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hfi1/aspm.h b/drivers/infiniband/hw/hfi1/aspm.h
index df295f47b315..c8d92dc13daa 100644
--- a/drivers/infiniband/hw/hfi1/aspm.h
+++ b/drivers/infiniband/hw/hfi1/aspm.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015-2017 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index baaa4406d5e6..c52e6b2c9914 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015 - 2020 Intel Corporation.
* Copyright(c) 2021 Cornelis Networks.
@@ -251,7 +251,7 @@ struct flag_table {
/*
* CCE Error flags.
*/
-static struct flag_table cce_err_status_flags[] = {
+static const struct flag_table cce_err_status_flags[] = {
/* 0*/ FLAG_ENTRY0("CceCsrParityErr",
CCE_ERR_STATUS_CCE_CSR_PARITY_ERR_SMASK),
/* 1*/ FLAG_ENTRY0("CceCsrReadBadAddrErr",
@@ -341,7 +341,7 @@ static struct flag_table cce_err_status_flags[] = {
* Misc Error flags
*/
#define MES(text) MISC_ERR_STATUS_MISC_##text##_ERR_SMASK
-static struct flag_table misc_err_status_flags[] = {
+static const struct flag_table misc_err_status_flags[] = {
/* 0*/ FLAG_ENTRY0("CSR_PARITY", MES(CSR_PARITY)),
/* 1*/ FLAG_ENTRY0("CSR_READ_BAD_ADDR", MES(CSR_READ_BAD_ADDR)),
/* 2*/ FLAG_ENTRY0("CSR_WRITE_BAD_ADDR", MES(CSR_WRITE_BAD_ADDR)),
@@ -360,7 +360,7 @@ static struct flag_table misc_err_status_flags[] = {
/*
* TXE PIO Error flags and consequences
*/
-static struct flag_table pio_err_status_flags[] = {
+static const struct flag_table pio_err_status_flags[] = {
/* 0*/ FLAG_ENTRY("PioWriteBadCtxt",
SEC_WRITE_DROPPED,
SEND_PIO_ERR_STATUS_PIO_WRITE_BAD_CTXT_ERR_SMASK),
@@ -502,7 +502,7 @@ static struct flag_table pio_err_status_flags[] = {
/*
* TXE SDMA Error flags
*/
-static struct flag_table sdma_err_status_flags[] = {
+static const struct flag_table sdma_err_status_flags[] = {
/* 0*/ FLAG_ENTRY0("SDmaRpyTagErr",
SEND_DMA_ERR_STATUS_SDMA_RPY_TAG_ERR_SMASK),
/* 1*/ FLAG_ENTRY0("SDmaCsrParityErr",
@@ -530,7 +530,7 @@ static struct flag_table sdma_err_status_flags[] = {
* TXE Egress Error flags
*/
#define SEES(text) SEND_EGRESS_ERR_STATUS_##text##_ERR_SMASK
-static struct flag_table egress_err_status_flags[] = {
+static const struct flag_table egress_err_status_flags[] = {
/* 0*/ FLAG_ENTRY0("TxPktIntegrityMemCorErr", SEES(TX_PKT_INTEGRITY_MEM_COR)),
/* 1*/ FLAG_ENTRY0("TxPktIntegrityMemUncErr", SEES(TX_PKT_INTEGRITY_MEM_UNC)),
/* 2 reserved */
@@ -631,7 +631,7 @@ static struct flag_table egress_err_status_flags[] = {
* TXE Egress Error Info flags
*/
#define SEEI(text) SEND_EGRESS_ERR_INFO_##text##_ERR_SMASK
-static struct flag_table egress_err_info_flags[] = {
+static const struct flag_table egress_err_info_flags[] = {
/* 0*/ FLAG_ENTRY0("Reserved", 0ull),
/* 1*/ FLAG_ENTRY0("VLErr", SEEI(VL)),
/* 2*/ FLAG_ENTRY0("JobKeyErr", SEEI(JOB_KEY)),
@@ -680,7 +680,7 @@ static struct flag_table egress_err_info_flags[] = {
* TXE Send error flags
*/
#define SES(name) SEND_ERR_STATUS_SEND_##name##_ERR_SMASK
-static struct flag_table send_err_status_flags[] = {
+static const struct flag_table send_err_status_flags[] = {
/* 0*/ FLAG_ENTRY0("SendCsrParityErr", SES(CSR_PARITY)),
/* 1*/ FLAG_ENTRY0("SendCsrReadBadAddrErr", SES(CSR_READ_BAD_ADDR)),
/* 2*/ FLAG_ENTRY0("SendCsrWriteBadAddrErr", SES(CSR_WRITE_BAD_ADDR))
@@ -689,7 +689,7 @@ static struct flag_table send_err_status_flags[] = {
/*
* TXE Send Context Error flags and consequences
*/
-static struct flag_table sc_err_status_flags[] = {
+static const struct flag_table sc_err_status_flags[] = {
/* 0*/ FLAG_ENTRY("InconsistentSop",
SEC_PACKET_DROPPED | SEC_SC_HALTED,
SEND_CTXT_ERR_STATUS_PIO_INCONSISTENT_SOP_ERR_SMASK),
@@ -712,7 +712,7 @@ static struct flag_table sc_err_status_flags[] = {
* RXE Receive Error flags
*/
#define RXES(name) RCV_ERR_STATUS_RX_##name##_ERR_SMASK
-static struct flag_table rxe_err_status_flags[] = {
+static const struct flag_table rxe_err_status_flags[] = {
/* 0*/ FLAG_ENTRY0("RxDmaCsrCorErr", RXES(DMA_CSR_COR)),
/* 1*/ FLAG_ENTRY0("RxDcIntfParityErr", RXES(DC_INTF_PARITY)),
/* 2*/ FLAG_ENTRY0("RxRcvHdrUncErr", RXES(RCV_HDR_UNC)),
@@ -847,7 +847,7 @@ static struct flag_table rxe_err_status_flags[] = {
* DCC Error Flags
*/
#define DCCE(name) DCC_ERR_FLG_##name##_SMASK
-static struct flag_table dcc_err_flags[] = {
+static const struct flag_table dcc_err_flags[] = {
FLAG_ENTRY0("bad_l2_err", DCCE(BAD_L2_ERR)),
FLAG_ENTRY0("bad_sc_err", DCCE(BAD_SC_ERR)),
FLAG_ENTRY0("bad_mid_tail_err", DCCE(BAD_MID_TAIL_ERR)),
@@ -900,7 +900,7 @@ static struct flag_table dcc_err_flags[] = {
* LCB error flags
*/
#define LCBE(name) DC_LCB_ERR_FLG_##name##_SMASK
-static struct flag_table lcb_err_flags[] = {
+static const struct flag_table lcb_err_flags[] = {
/* 0*/ FLAG_ENTRY0("CSR_PARITY_ERR", LCBE(CSR_PARITY_ERR)),
/* 1*/ FLAG_ENTRY0("INVALID_CSR_ADDR", LCBE(INVALID_CSR_ADDR)),
/* 2*/ FLAG_ENTRY0("RST_FOR_FAILED_DESKEW", LCBE(RST_FOR_FAILED_DESKEW)),
@@ -943,7 +943,7 @@ static struct flag_table lcb_err_flags[] = {
* DC8051 Error Flags
*/
#define D8E(name) DC_DC8051_ERR_FLG_##name##_SMASK
-static struct flag_table dc8051_err_flags[] = {
+static const struct flag_table dc8051_err_flags[] = {
FLAG_ENTRY0("SET_BY_8051", D8E(SET_BY_8051)),
FLAG_ENTRY0("LOST_8051_HEART_BEAT", D8E(LOST_8051_HEART_BEAT)),
FLAG_ENTRY0("CRAM_MBE", D8E(CRAM_MBE)),
@@ -962,7 +962,7 @@ static struct flag_table dc8051_err_flags[] = {
*
* Flags in DC8051_DBG_ERR_INFO_SET_BY_8051.ERROR field.
*/
-static struct flag_table dc8051_info_err_flags[] = {
+static const struct flag_table dc8051_info_err_flags[] = {
FLAG_ENTRY0("Spico ROM check failed", SPICO_ROM_FAILED),
FLAG_ENTRY0("Unknown frame received", UNKNOWN_FRAME),
FLAG_ENTRY0("Target BER not met", TARGET_BER_NOT_MET),
@@ -986,7 +986,7 @@ static struct flag_table dc8051_info_err_flags[] = {
*
* Flags in DC8051_DBG_ERR_INFO_SET_BY_8051.HOST_MSG field.
*/
-static struct flag_table dc8051_info_host_msg_flags[] = {
+static const struct flag_table dc8051_info_host_msg_flags[] = {
FLAG_ENTRY0("Host request done", 0x0001),
FLAG_ENTRY0("BC PWR_MGM message", 0x0002),
FLAG_ENTRY0("BC SMA message", 0x0004),
@@ -1461,7 +1461,8 @@ static u64 dc_access_lcb_cntr(const struct cntr_entry *entry, void *context,
ret = write_lcb_csr(dd, csr, data);
if (ret) {
- dd_dev_err(dd, "Could not acquire LCB for counter 0x%x", csr);
+ if (!(dd->flags & HFI1_SHUTDOWN))
+ dd_dev_err(dd, "Could not acquire LCB for counter 0x%x", csr);
return 0;
}
@@ -5274,7 +5275,7 @@ done:
* the buffer. End in '*' if the buffer is too short.
*/
static char *flag_string(char *buf, int buf_len, u64 flags,
- struct flag_table *table, int table_size)
+ const struct flag_table *table, int table_size)
{
char extra[32];
char *p = buf;
@@ -5333,7 +5334,7 @@ static const char * const cce_misc_names[] = {
static char *is_misc_err_name(char *buf, size_t bsize, unsigned int source)
{
if (source < ARRAY_SIZE(cce_misc_names))
- strncpy(buf, cce_misc_names[source], bsize);
+ strscpy_pad(buf, cce_misc_names[source], bsize);
else
snprintf(buf, bsize, "Reserved%u",
source + IS_GENERAL_ERR_START);
@@ -5373,7 +5374,7 @@ static const char * const various_names[] = {
static char *is_various_name(char *buf, size_t bsize, unsigned int source)
{
if (source < ARRAY_SIZE(various_names))
- strncpy(buf, various_names[source], bsize);
+ strscpy_pad(buf, various_names[source], bsize);
else
snprintf(buf, bsize, "Reserved%u", source + IS_VARIOUS_START);
return buf;
@@ -6160,7 +6161,7 @@ static int request_host_lcb_access(struct hfi1_devdata *dd)
ret = do_8051_command(dd, HCMD_MISC,
(u64)HCMD_MISC_REQUEST_LCB_ACCESS <<
LOAD_DATA_FIELD_ID_SHIFT, NULL);
- if (ret != HCMD_SUCCESS) {
+ if (ret != HCMD_SUCCESS && !(dd->flags & HFI1_SHUTDOWN)) {
dd_dev_err(dd, "%s: command failed with error %d\n",
__func__, ret);
}
@@ -6241,7 +6242,8 @@ int acquire_lcb_access(struct hfi1_devdata *dd, int sleep_ok)
if (dd->lcb_access_count == 0) {
ret = request_host_lcb_access(dd);
if (ret) {
- dd_dev_err(dd,
+ if (!(dd->flags & HFI1_SHUTDOWN))
+ dd_dev_err(dd,
"%s: unable to acquire LCB access, err %d\n",
__func__, ret);
goto done;
@@ -13183,15 +13185,16 @@ static void read_mod_write(struct hfi1_devdata *dd, u16 src, u64 bits,
{
u64 reg;
u16 idx = src / BITS_PER_REGISTER;
+ unsigned long flags;
- spin_lock(&dd->irq_src_lock);
+ spin_lock_irqsave(&dd->irq_src_lock, flags);
reg = read_csr(dd, CCE_INT_MASK + (8 * idx));
if (set)
reg |= bits;
else
reg &= ~bits;
write_csr(dd, CCE_INT_MASK + (8 * idx), reg);
- spin_unlock(&dd->irq_src_lock);
+ spin_unlock_irqrestore(&dd->irq_src_lock, flags);
}
/**
diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index b2d53713da58..d861aa8fc640 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015 - 2020 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h
index 95a8d530d554..d79e25d20fb8 100644
--- a/drivers/infiniband/hw/hfi1/chip_registers.h
+++ b/drivers/infiniband/hw/hfi1/chip_registers.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015, 2016 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h
index 166ad6b828dc..8abc902b96f3 100644
--- a/drivers/infiniband/hw/hfi1/common.h
+++ b/drivers/infiniband/hw/hfi1/common.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015 - 2020 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c
index 80ba1e53c068..a1e01b447265 100644
--- a/drivers/infiniband/hw/hfi1/debugfs.c
+++ b/drivers/infiniband/hw/hfi1/debugfs.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015-2018 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hfi1/debugfs.h b/drivers/infiniband/hw/hfi1/debugfs.h
index 29a5a8de2c41..54d952a4016c 100644
--- a/drivers/infiniband/hw/hfi1/debugfs.h
+++ b/drivers/infiniband/hw/hfi1/debugfs.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015, 2016, 2018 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hfi1/device.c b/drivers/infiniband/hw/hfi1/device.c
index 05be0d119f79..4250d077b06f 100644
--- a/drivers/infiniband/hw/hfi1/device.c
+++ b/drivers/infiniband/hw/hfi1/device.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015, 2016 Intel Corporation.
*/
@@ -10,8 +10,29 @@
#include "hfi.h"
#include "device.h"
-static struct class *class;
-static struct class *user_class;
+static char *hfi1_devnode(const struct device *dev, umode_t *mode)
+{
+ if (mode)
+ *mode = 0600;
+ return kasprintf(GFP_KERNEL, "%s", dev_name(dev));
+}
+
+static const struct class class = {
+ .name = "hfi1",
+ .devnode = hfi1_devnode,
+};
+
+static char *hfi1_user_devnode(const struct device *dev, umode_t *mode)
+{
+ if (mode)
+ *mode = 0666;
+ return kasprintf(GFP_KERNEL, "%s", dev_name(dev));
+}
+
+static const struct class user_class = {
+ .name = "hfi1_user",
+ .devnode = hfi1_user_devnode,
+};
static dev_t hfi1_dev;
int hfi1_cdev_init(int minor, const char *name,
@@ -37,9 +58,9 @@ int hfi1_cdev_init(int minor, const char *name,
}
if (user_accessible)
- device = device_create(user_class, NULL, dev, NULL, "%s", name);
+ device = device_create(&user_class, NULL, dev, NULL, "%s", name);
else
- device = device_create(class, NULL, dev, NULL, "%s", name);
+ device = device_create(&class, NULL, dev, NULL, "%s", name);
if (IS_ERR(device)) {
ret = PTR_ERR(device);
@@ -72,26 +93,6 @@ const char *class_name(void)
return hfi1_class_name;
}
-static char *hfi1_devnode(const struct device *dev, umode_t *mode)
-{
- if (mode)
- *mode = 0600;
- return kasprintf(GFP_KERNEL, "%s", dev_name(dev));
-}
-
-static const char *hfi1_class_name_user = "hfi1_user";
-static const char *class_name_user(void)
-{
- return hfi1_class_name_user;
-}
-
-static char *hfi1_user_devnode(const struct device *dev, umode_t *mode)
-{
- if (mode)
- *mode = 0666;
- return kasprintf(GFP_KERNEL, "%s", dev_name(dev));
-}
-
int __init dev_init(void)
{
int ret;
@@ -102,27 +103,21 @@ int __init dev_init(void)
goto done;
}
- class = class_create(class_name());
- if (IS_ERR(class)) {
- ret = PTR_ERR(class);
+ ret = class_register(&class);
+ if (ret) {
pr_err("Could not create device class (err %d)\n", -ret);
unregister_chrdev_region(hfi1_dev, HFI1_NMINORS);
goto done;
}
- class->devnode = hfi1_devnode;
- user_class = class_create(class_name_user());
- if (IS_ERR(user_class)) {
- ret = PTR_ERR(user_class);
+ ret = class_register(&user_class);
+ if (ret) {
pr_err("Could not create device class for user accessible files (err %d)\n",
-ret);
- class_destroy(class);
- class = NULL;
- user_class = NULL;
+ class_unregister(&class);
unregister_chrdev_region(hfi1_dev, HFI1_NMINORS);
goto done;
}
- user_class->devnode = hfi1_user_devnode;
done:
return ret;
@@ -130,11 +125,8 @@ done:
void dev_cleanup(void)
{
- class_destroy(class);
- class = NULL;
-
- class_destroy(user_class);
- user_class = NULL;
+ class_unregister(&class);
+ class_unregister(&user_class);
unregister_chrdev_region(hfi1_dev, HFI1_NMINORS);
}
diff --git a/drivers/infiniband/hw/hfi1/device.h b/drivers/infiniband/hw/hfi1/device.h
index c371b5612b6b..a91bea426ba5 100644
--- a/drivers/infiniband/hw/hfi1/device.h
+++ b/drivers/infiniband/hw/hfi1/device.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015, 2016 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index f4492fa407e0..37a6794885d3 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015-2020 Intel Corporation.
* Copyright(c) 2021 Cornelis Networks.
diff --git a/drivers/infiniband/hw/hfi1/efivar.c b/drivers/infiniband/hw/hfi1/efivar.c
index 7741a1d69097..9ed05e10020e 100644
--- a/drivers/infiniband/hw/hfi1/efivar.c
+++ b/drivers/infiniband/hw/hfi1/efivar.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015, 2016 Intel Corporation.
*/
@@ -112,7 +112,7 @@ int read_hfi1_efi_var(struct hfi1_devdata *dd, const char *kind,
unsigned long *size, void **return_data)
{
char prefix_name[64];
- char name[64];
+ char name[128];
int result;
/* create a common prefix */
diff --git a/drivers/infiniband/hw/hfi1/efivar.h b/drivers/infiniband/hw/hfi1/efivar.h
index 5ebc2f07bbef..882240929a4b 100644
--- a/drivers/infiniband/hw/hfi1/efivar.h
+++ b/drivers/infiniband/hw/hfi1/efivar.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015, 2016 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hfi1/eprom.c b/drivers/infiniband/hw/hfi1/eprom.c
index fbe958107457..f93a160d8d05 100644
--- a/drivers/infiniband/hw/hfi1/eprom.c
+++ b/drivers/infiniband/hw/hfi1/eprom.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015, 2016 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hfi1/eprom.h b/drivers/infiniband/hw/hfi1/eprom.h
index 772c516366ce..51648d1afcf1 100644
--- a/drivers/infiniband/hw/hfi1/eprom.h
+++ b/drivers/infiniband/hw/hfi1/eprom.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015, 2016 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.c b/drivers/infiniband/hw/hfi1/exp_rcv.c
index b86f697c7956..879a66edbded 100644
--- a/drivers/infiniband/hw/hfi1/exp_rcv.c
+++ b/drivers/infiniband/hw/hfi1/exp_rcv.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2017 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.h b/drivers/infiniband/hw/hfi1/exp_rcv.h
index 41f7fe5d1839..141413d9fbc7 100644
--- a/drivers/infiniband/hw/hfi1/exp_rcv.h
+++ b/drivers/infiniband/hw/hfi1/exp_rcv.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2017 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hfi1/fault.c b/drivers/infiniband/hw/hfi1/fault.c
index 3af77a0840ab..ec9ee59fcf0c 100644
--- a/drivers/infiniband/hw/hfi1/fault.c
+++ b/drivers/infiniband/hw/hfi1/fault.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2018 Intel Corporation.
*/
@@ -203,7 +203,6 @@ static const struct file_operations __fault_opcodes_fops = {
.open = fault_opcodes_open,
.read = fault_opcodes_read,
.write = fault_opcodes_write,
- .llseek = no_llseek
};
void hfi1_fault_exit_debugfs(struct hfi1_ibdev *ibd)
diff --git a/drivers/infiniband/hw/hfi1/fault.h b/drivers/infiniband/hw/hfi1/fault.h
index 7fe7f47219db..51adafe240d7 100644
--- a/drivers/infiniband/hw/hfi1/fault.h
+++ b/drivers/infiniband/hw/hfi1/fault.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2018 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index a5ab22cedd41..503abec709c9 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2020 Cornelis Networks, Inc.
* Copyright(c) 2015-2020 Intel Corporation.
@@ -267,7 +267,7 @@ static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from)
if (!HFI1_CAP_IS_KSET(SDMA))
return -EINVAL;
- if (!from->user_backed)
+ if (!user_backed_iter(from))
return -EINVAL;
idx = srcu_read_lock(&fd->pq_srcu);
pq = srcu_dereference(fd->pq, &fd->pq_srcu);
diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c
index 0c0cef5b1e0e..3c228aeaaf81 100644
--- a/drivers/infiniband/hw/hfi1/firmware.c
+++ b/drivers/infiniband/hw/hfi1/firmware.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015 - 2017 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 7fa9cd39254f..eb38f81aeeb1 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -1,6 +1,6 @@
-/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
- * Copyright(c) 2020 Cornelis Networks, Inc.
+ * Copyright(c) 2020-2023 Cornelis Networks, Inc.
* Copyright(c) 2015-2020 Intel Corporation.
*/
@@ -1378,8 +1378,6 @@ struct hfi1_devdata {
#define PT_INVALID 3
struct tid_rb_node;
-struct mmu_rb_node;
-struct mmu_rb_handler;
/* Private data for file operations */
struct hfi1_filedata {
@@ -2427,7 +2425,7 @@ static inline bool hfi1_need_drop(struct hfi1_devdata *dd)
int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp);
#define DD_DEV_ENTRY(dd) __string(dev, dev_name(&(dd)->pcidev->dev))
-#define DD_DEV_ASSIGN(dd) __assign_str(dev, dev_name(&(dd)->pcidev->dev))
+#define DD_DEV_ASSIGN(dd) __assign_str(dev)
static inline void hfi1_update_ah_attr(struct ib_device *ibdev,
struct rdma_ah_attr *attr)
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index 6de37c5d7d27..cbac4a442d9e 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015 - 2020 Intel Corporation.
* Copyright(c) 2021 Cornelis Networks.
@@ -1027,7 +1027,6 @@ static void shutdown_device(struct hfi1_devdata *dd)
msix_clean_up_interrupts(dd);
for (pidx = 0; pidx < dd->num_pports; ++pidx) {
- ppd = dd->pport + pidx;
for (i = 0; i < dd->num_rcv_contexts; i++) {
rcd = hfi1_rcd_get_by_index(dd, i);
hfi1_rcvctrl(dd, HFI1_RCVCTRL_TAILUPD_DIS |
diff --git a/drivers/infiniband/hw/hfi1/intr.c b/drivers/infiniband/hw/hfi1/intr.c
index 70376e6dba33..3737f632d62a 100644
--- a/drivers/infiniband/hw/hfi1/intr.c
+++ b/drivers/infiniband/hw/hfi1/intr.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015, 2016 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hfi1/iowait.h b/drivers/infiniband/hw/hfi1/iowait.h
index 4df0700cbaba..49805a24bb0a 100644
--- a/drivers/infiniband/hw/hfi1/iowait.h
+++ b/drivers/infiniband/hw/hfi1/iowait.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015 - 2018 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hfi1/ipoib_main.c b/drivers/infiniband/hw/hfi1/ipoib_main.c
index 5d814afdf7f3..7c9d5203002b 100644
--- a/drivers/infiniband/hw/hfi1/ipoib_main.c
+++ b/drivers/infiniband/hw/hfi1/ipoib_main.c
@@ -21,36 +21,25 @@ static int hfi1_ipoib_dev_init(struct net_device *dev)
struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
int ret;
- dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!dev->tstats)
- return -ENOMEM;
-
ret = priv->netdev_ops->ndo_init(dev);
if (ret)
- goto out_ret;
+ return ret;
ret = hfi1_netdev_add_data(priv->dd,
qpn_from_mac(priv->netdev->dev_addr),
dev);
if (ret < 0) {
priv->netdev_ops->ndo_uninit(dev);
- goto out_ret;
+ return ret;
}
return 0;
-out_ret:
- free_percpu(dev->tstats);
- dev->tstats = NULL;
- return ret;
}
static void hfi1_ipoib_dev_uninit(struct net_device *dev)
{
struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
- free_percpu(dev->tstats);
- dev->tstats = NULL;
-
hfi1_netdev_remove_data(priv->dd, qpn_from_mac(priv->netdev->dev_addr));
priv->netdev_ops->ndo_uninit(dev);
@@ -107,7 +96,6 @@ static const struct net_device_ops hfi1_ipoib_netdev_ops = {
.ndo_uninit = hfi1_ipoib_dev_uninit,
.ndo_open = hfi1_ipoib_dev_open,
.ndo_stop = hfi1_ipoib_dev_stop,
- .ndo_get_stats64 = dev_get_tstats64,
};
static int hfi1_ipoib_mcast_attach(struct net_device *dev,
@@ -173,9 +161,6 @@ static void hfi1_ipoib_netdev_dtor(struct net_device *dev)
hfi1_ipoib_txreq_deinit(priv);
hfi1_ipoib_rxq_deinit(priv->netdev);
-
- free_percpu(dev->tstats);
- dev->tstats = NULL;
}
static void hfi1_ipoib_set_id(struct net_device *dev, int id)
@@ -234,6 +219,7 @@ static int hfi1_ipoib_setup_rn(struct ib_device *device,
netdev->priv_destructor = hfi1_ipoib_netdev_dtor;
netdev->needs_free_netdev = true;
+ netdev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
return 0;
}
diff --git a/drivers/infiniband/hw/hfi1/ipoib_tx.c b/drivers/infiniband/hw/hfi1/ipoib_tx.c
index e7d831330278..8b9cc55db59d 100644
--- a/drivers/infiniband/hw/hfi1/ipoib_tx.c
+++ b/drivers/infiniband/hw/hfi1/ipoib_tx.c
@@ -217,7 +217,7 @@ static int hfi1_ipoib_build_ulp_payload(struct ipoib_txreq *tx,
ret = sdma_txadd_page(dd,
txreq,
skb_frag_page(frag),
- frag->bv_offset,
+ skb_frag_off(frag),
skb_frag_size(frag),
NULL, NULL, NULL);
if (unlikely(ret))
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index e5e783c45810..a9883295f4af 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015-2018 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hfi1/mad.h b/drivers/infiniband/hw/hfi1/mad.h
index 1d45a008fa7f..b6e3141253c4 100644
--- a/drivers/infiniband/hw/hfi1/mad.h
+++ b/drivers/infiniband/hw/hfi1/mad.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015 - 2017 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c
index 7a51f7d73b61..67a5c410fb5e 100644
--- a/drivers/infiniband/hw/hfi1/mmu_rb.c
+++ b/drivers/infiniband/hw/hfi1/mmu_rb.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2020 Cornelis Networks, Inc.
* Copyright(c) 2016 - 2017 Intel Corporation.
@@ -40,7 +40,7 @@ static unsigned long mmu_node_last(struct mmu_rb_node *node)
}
int hfi1_mmu_rb_register(void *ops_arg,
- struct mmu_rb_ops *ops,
+ const struct mmu_rb_ops *ops,
struct workqueue_struct *wq,
struct mmu_rb_handler **handler)
{
diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.h b/drivers/infiniband/hw/hfi1/mmu_rb.h
index 751dc3fe1e02..3fa50dd64db6 100644
--- a/drivers/infiniband/hw/hfi1/mmu_rb.h
+++ b/drivers/infiniband/hw/hfi1/mmu_rb.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2020 Cornelis Networks, Inc.
* Copyright(c) 2016 Intel Corporation.
@@ -42,7 +42,7 @@ struct mmu_rb_handler {
/* Begin on a new cachline boundary here */
struct rb_root_cached root ____cacheline_aligned_in_smp;
void *ops_arg;
- struct mmu_rb_ops *ops;
+ const struct mmu_rb_ops *ops;
struct list_head lru_list;
struct work_struct del_work;
struct list_head del_list;
@@ -51,7 +51,7 @@ struct mmu_rb_handler {
};
int hfi1_mmu_rb_register(void *ops_arg,
- struct mmu_rb_ops *ops,
+ const struct mmu_rb_ops *ops,
struct workqueue_struct *wq,
struct mmu_rb_handler **handler);
void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler);
diff --git a/drivers/infiniband/hw/hfi1/netdev.h b/drivers/infiniband/hw/hfi1/netdev.h
index 8aa074670a9c..07c8f77c9181 100644
--- a/drivers/infiniband/hw/hfi1/netdev.h
+++ b/drivers/infiniband/hw/hfi1/netdev.h
@@ -49,7 +49,7 @@ struct hfi1_netdev_rxq {
* When 0 receive queues will be freed.
*/
struct hfi1_netdev_rx {
- struct net_device rx_napi;
+ struct net_device *rx_napi;
struct hfi1_devdata *dd;
struct hfi1_netdev_rxq *rxq;
int num_rx_q;
diff --git a/drivers/infiniband/hw/hfi1/netdev_rx.c b/drivers/infiniband/hw/hfi1/netdev_rx.c
index 720d4c85c9c9..8608044203bb 100644
--- a/drivers/infiniband/hw/hfi1/netdev_rx.c
+++ b/drivers/infiniband/hw/hfi1/netdev_rx.c
@@ -188,7 +188,7 @@ static int hfi1_netdev_rxq_init(struct hfi1_netdev_rx *rx)
int i;
int rc;
struct hfi1_devdata *dd = rx->dd;
- struct net_device *dev = &rx->rx_napi;
+ struct net_device *dev = rx->rx_napi;
rx->num_rx_q = dd->num_netdev_contexts;
rx->rxq = kcalloc_node(rx->num_rx_q, sizeof(*rx->rxq),
@@ -360,7 +360,11 @@ int hfi1_alloc_rx(struct hfi1_devdata *dd)
if (!rx)
return -ENOMEM;
rx->dd = dd;
- init_dummy_netdev(&rx->rx_napi);
+ rx->rx_napi = alloc_netdev_dummy(0);
+ if (!rx->rx_napi) {
+ kfree(rx);
+ return -ENOMEM;
+ }
xa_init(&rx->dev_tbl);
atomic_set(&rx->enabled, 0);
@@ -374,6 +378,7 @@ void hfi1_free_rx(struct hfi1_devdata *dd)
{
if (dd->netdev_rx) {
dd_dev_info(dd, "hfi1 rx freed\n");
+ free_netdev(dd->netdev_rx->rx_napi);
kfree(dd->netdev_rx);
dd->netdev_rx = NULL;
}
diff --git a/drivers/infiniband/hw/hfi1/opa_compat.h b/drivers/infiniband/hw/hfi1/opa_compat.h
index 31570b0cfd18..49f2da677b03 100644
--- a/drivers/infiniband/hw/hfi1/opa_compat.h
+++ b/drivers/infiniband/hw/hfi1/opa_compat.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015, 2016 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index 08732e1ac966..7133964749f8 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
@@ -1,8 +1,9 @@
-// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015 - 2019 Intel Corporation.
*/
+#include <linux/bitfield.h>
#include <linux/pci.h>
#include <linux/io.h>
#include <linux/delay.h>
@@ -210,12 +211,6 @@ static u32 extract_speed(u16 linkstat)
return speed;
}
-/* return the PCIe link speed from the given link status */
-static u32 extract_width(u16 linkstat)
-{
- return (linkstat & PCI_EXP_LNKSTA_NLW) >> PCI_EXP_LNKSTA_NLW_SHIFT;
-}
-
/* read the link status and set dd->{lbus_width,lbus_speed,lbus_info} */
static void update_lbus_info(struct hfi1_devdata *dd)
{
@@ -228,7 +223,7 @@ static void update_lbus_info(struct hfi1_devdata *dd)
return;
}
- dd->lbus_width = extract_width(linkstat);
+ dd->lbus_width = FIELD_GET(PCI_EXP_LNKSTA_NLW, linkstat);
dd->lbus_speed = extract_speed(linkstat);
snprintf(dd->lbus_info, sizeof(dd->lbus_info),
"PCIe,%uMHz,x%u", dd->lbus_speed, dd->lbus_width);
@@ -1212,14 +1207,11 @@ retry:
(u32)lnkctl2);
/* only write to parent if target is not as high as ours */
if ((lnkctl2 & PCI_EXP_LNKCTL2_TLS) < target_vector) {
- lnkctl2 &= ~PCI_EXP_LNKCTL2_TLS;
- lnkctl2 |= target_vector;
- dd_dev_info(dd, "%s: ..new link control2: 0x%x\n", __func__,
- (u32)lnkctl2);
- ret = pcie_capability_write_word(parent,
- PCI_EXP_LNKCTL2, lnkctl2);
+ ret = pcie_capability_clear_and_set_word(parent, PCI_EXP_LNKCTL2,
+ PCI_EXP_LNKCTL2_TLS,
+ target_vector);
if (ret) {
- dd_dev_err(dd, "Unable to write to PCI config\n");
+ dd_dev_err(dd, "Unable to change parent PCI target speed\n");
return_error = 1;
goto done;
}
@@ -1228,22 +1220,11 @@ retry:
}
dd_dev_info(dd, "%s: setting target link speed\n", __func__);
- ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKCTL2, &lnkctl2);
- if (ret) {
- dd_dev_err(dd, "Unable to read from PCI config\n");
- return_error = 1;
- goto done;
- }
-
- dd_dev_info(dd, "%s: ..old link control2: 0x%x\n", __func__,
- (u32)lnkctl2);
- lnkctl2 &= ~PCI_EXP_LNKCTL2_TLS;
- lnkctl2 |= target_vector;
- dd_dev_info(dd, "%s: ..new link control2: 0x%x\n", __func__,
- (u32)lnkctl2);
- ret = pcie_capability_write_word(dd->pcidev, PCI_EXP_LNKCTL2, lnkctl2);
+ ret = pcie_capability_clear_and_set_word(dd->pcidev, PCI_EXP_LNKCTL2,
+ PCI_EXP_LNKCTL2_TLS,
+ target_vector);
if (ret) {
- dd_dev_err(dd, "Unable to write to PCI config\n");
+ dd_dev_err(dd, "Unable to change device PCI target speed\n");
return_error = 1;
goto done;
}
diff --git a/drivers/infiniband/hw/hfi1/pin_system.c b/drivers/infiniband/hw/hfi1/pin_system.c
new file mode 100644
index 000000000000..cce56134519b
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/pin_system.c
@@ -0,0 +1,474 @@
+// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+/*
+ * Copyright(c) 2023 - Cornelis Networks, Inc.
+ */
+
+#include <linux/types.h>
+
+#include "hfi.h"
+#include "common.h"
+#include "device.h"
+#include "pinning.h"
+#include "mmu_rb.h"
+#include "user_sdma.h"
+#include "trace.h"
+
+struct sdma_mmu_node {
+ struct mmu_rb_node rb;
+ struct hfi1_user_sdma_pkt_q *pq;
+ struct page **pages;
+ unsigned int npages;
+};
+
+static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr,
+ unsigned long len);
+static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode, void *arg2,
+ bool *stop);
+static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode);
+
+static const struct mmu_rb_ops sdma_rb_ops = {
+ .filter = sdma_rb_filter,
+ .evict = sdma_rb_evict,
+ .remove = sdma_rb_remove,
+};
+
+int hfi1_init_system_pinning(struct hfi1_user_sdma_pkt_q *pq)
+{
+ struct hfi1_devdata *dd = pq->dd;
+ int ret;
+
+ ret = hfi1_mmu_rb_register(pq, &sdma_rb_ops, dd->pport->hfi1_wq,
+ &pq->handler);
+ if (ret)
+ dd_dev_err(dd,
+ "[%u:%u] Failed to register system memory DMA support with MMU: %d\n",
+ pq->ctxt, pq->subctxt, ret);
+ return ret;
+}
+
+void hfi1_free_system_pinning(struct hfi1_user_sdma_pkt_q *pq)
+{
+ if (pq->handler)
+ hfi1_mmu_rb_unregister(pq->handler);
+}
+
+static u32 sdma_cache_evict(struct hfi1_user_sdma_pkt_q *pq, u32 npages)
+{
+ struct evict_data evict_data;
+
+ evict_data.cleared = 0;
+ evict_data.target = npages;
+ hfi1_mmu_rb_evict(pq->handler, &evict_data);
+ return evict_data.cleared;
+}
+
+static void unpin_vector_pages(struct mm_struct *mm, struct page **pages,
+ unsigned int start, unsigned int npages)
+{
+ hfi1_release_user_pages(mm, pages + start, npages, false);
+ kfree(pages);
+}
+
+static inline struct mm_struct *mm_from_sdma_node(struct sdma_mmu_node *node)
+{
+ return node->rb.handler->mn.mm;
+}
+
+static void free_system_node(struct sdma_mmu_node *node)
+{
+ if (node->npages) {
+ unpin_vector_pages(mm_from_sdma_node(node), node->pages, 0,
+ node->npages);
+ atomic_sub(node->npages, &node->pq->n_locked);
+ }
+ kfree(node);
+}
+
+/*
+ * kref_get()'s an additional kref on the returned rb_node to prevent rb_node
+ * from being released until after rb_node is assigned to an SDMA descriptor
+ * (struct sdma_desc) under add_system_iovec_to_sdma_packet(), even if the
+ * virtual address range for rb_node is invalidated between now and then.
+ */
+static struct sdma_mmu_node *find_system_node(struct mmu_rb_handler *handler,
+ unsigned long start,
+ unsigned long end)
+{
+ struct mmu_rb_node *rb_node;
+ unsigned long flags;
+
+ spin_lock_irqsave(&handler->lock, flags);
+ rb_node = hfi1_mmu_rb_get_first(handler, start, (end - start));
+ if (!rb_node) {
+ spin_unlock_irqrestore(&handler->lock, flags);
+ return NULL;
+ }
+
+ /* "safety" kref to prevent release before add_system_iovec_to_sdma_packet() */
+ kref_get(&rb_node->refcount);
+ spin_unlock_irqrestore(&handler->lock, flags);
+
+ return container_of(rb_node, struct sdma_mmu_node, rb);
+}
+
+static int pin_system_pages(struct user_sdma_request *req,
+ uintptr_t start_address, size_t length,
+ struct sdma_mmu_node *node, int npages)
+{
+ struct hfi1_user_sdma_pkt_q *pq = req->pq;
+ int pinned, cleared;
+ struct page **pages;
+
+ pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL);
+ if (!pages)
+ return -ENOMEM;
+
+retry:
+ if (!hfi1_can_pin_pages(pq->dd, current->mm, atomic_read(&pq->n_locked),
+ npages)) {
+ SDMA_DBG(req, "Evicting: nlocked %u npages %u",
+ atomic_read(&pq->n_locked), npages);
+ cleared = sdma_cache_evict(pq, npages);
+ if (cleared >= npages)
+ goto retry;
+ }
+
+ SDMA_DBG(req, "Acquire user pages start_address %lx node->npages %u npages %u",
+ start_address, node->npages, npages);
+ pinned = hfi1_acquire_user_pages(current->mm, start_address, npages, 0,
+ pages);
+
+ if (pinned < 0) {
+ kfree(pages);
+ SDMA_DBG(req, "pinned %d", pinned);
+ return pinned;
+ }
+ if (pinned != npages) {
+ unpin_vector_pages(current->mm, pages, node->npages, pinned);
+ SDMA_DBG(req, "npages %u pinned %d", npages, pinned);
+ return -EFAULT;
+ }
+ node->rb.addr = start_address;
+ node->rb.len = length;
+ node->pages = pages;
+ node->npages = npages;
+ atomic_add(pinned, &pq->n_locked);
+ SDMA_DBG(req, "done. pinned %d", pinned);
+ return 0;
+}
+
+/*
+ * kref refcount on *node_p will be 2 on successful addition: one kref from
+ * kref_init() for mmu_rb_handler and one kref to prevent *node_p from being
+ * released until after *node_p is assigned to an SDMA descriptor (struct
+ * sdma_desc) under add_system_iovec_to_sdma_packet(), even if the virtual
+ * address range for *node_p is invalidated between now and then.
+ */
+static int add_system_pinning(struct user_sdma_request *req,
+ struct sdma_mmu_node **node_p,
+ unsigned long start, unsigned long len)
+
+{
+ struct hfi1_user_sdma_pkt_q *pq = req->pq;
+ struct sdma_mmu_node *node;
+ int ret;
+
+ node = kzalloc(sizeof(*node), GFP_KERNEL);
+ if (!node)
+ return -ENOMEM;
+
+ /* First kref "moves" to mmu_rb_handler */
+ kref_init(&node->rb.refcount);
+
+ /* "safety" kref to prevent release before add_system_iovec_to_sdma_packet() */
+ kref_get(&node->rb.refcount);
+
+ node->pq = pq;
+ ret = pin_system_pages(req, start, len, node, PFN_DOWN(len));
+ if (ret == 0) {
+ ret = hfi1_mmu_rb_insert(pq->handler, &node->rb);
+ if (ret)
+ free_system_node(node);
+ else
+ *node_p = node;
+
+ return ret;
+ }
+
+ kfree(node);
+ return ret;
+}
+
+static int get_system_cache_entry(struct user_sdma_request *req,
+ struct sdma_mmu_node **node_p,
+ size_t req_start, size_t req_len)
+{
+ struct hfi1_user_sdma_pkt_q *pq = req->pq;
+ u64 start = ALIGN_DOWN(req_start, PAGE_SIZE);
+ u64 end = PFN_ALIGN(req_start + req_len);
+ int ret;
+
+ if ((end - start) == 0) {
+ SDMA_DBG(req,
+ "Request for empty cache entry req_start %lx req_len %lx start %llx end %llx",
+ req_start, req_len, start, end);
+ return -EINVAL;
+ }
+
+ SDMA_DBG(req, "req_start %lx req_len %lu", req_start, req_len);
+
+ while (1) {
+ struct sdma_mmu_node *node =
+ find_system_node(pq->handler, start, end);
+ u64 prepend_len = 0;
+
+ SDMA_DBG(req, "node %p start %llx end %llu", node, start, end);
+ if (!node) {
+ ret = add_system_pinning(req, node_p, start,
+ end - start);
+ if (ret == -EEXIST) {
+ /*
+ * Another execution context has inserted a
+ * conficting entry first.
+ */
+ continue;
+ }
+ return ret;
+ }
+
+ if (node->rb.addr <= start) {
+ /*
+ * This entry covers at least part of the region. If it doesn't extend
+ * to the end, then this will be called again for the next segment.
+ */
+ *node_p = node;
+ return 0;
+ }
+
+ SDMA_DBG(req, "prepend: node->rb.addr %lx, node->rb.refcount %d",
+ node->rb.addr, kref_read(&node->rb.refcount));
+ prepend_len = node->rb.addr - start;
+
+ /*
+ * This node will not be returned, instead a new node
+ * will be. So release the reference.
+ */
+ kref_put(&node->rb.refcount, hfi1_mmu_rb_release);
+
+ /* Prepend a node to cover the beginning of the allocation */
+ ret = add_system_pinning(req, node_p, start, prepend_len);
+ if (ret == -EEXIST) {
+ /* Another execution context has inserted a conficting entry first. */
+ continue;
+ }
+ return ret;
+ }
+}
+
+static void sdma_mmu_rb_node_get(void *ctx)
+{
+ struct mmu_rb_node *node = ctx;
+
+ kref_get(&node->refcount);
+}
+
+static void sdma_mmu_rb_node_put(void *ctx)
+{
+ struct sdma_mmu_node *node = ctx;
+
+ kref_put(&node->rb.refcount, hfi1_mmu_rb_release);
+}
+
+static int add_mapping_to_sdma_packet(struct user_sdma_request *req,
+ struct user_sdma_txreq *tx,
+ struct sdma_mmu_node *cache_entry,
+ size_t start,
+ size_t from_this_cache_entry)
+{
+ struct hfi1_user_sdma_pkt_q *pq = req->pq;
+ unsigned int page_offset;
+ unsigned int from_this_page;
+ size_t page_index;
+ void *ctx;
+ int ret;
+
+ /*
+ * Because the cache may be more fragmented than the memory that is being accessed,
+ * it's not strictly necessary to have a descriptor per cache entry.
+ */
+
+ while (from_this_cache_entry) {
+ page_index = PFN_DOWN(start - cache_entry->rb.addr);
+
+ if (page_index >= cache_entry->npages) {
+ SDMA_DBG(req,
+ "Request for page_index %zu >= cache_entry->npages %u",
+ page_index, cache_entry->npages);
+ return -EINVAL;
+ }
+
+ page_offset = start - ALIGN_DOWN(start, PAGE_SIZE);
+ from_this_page = PAGE_SIZE - page_offset;
+
+ if (from_this_page < from_this_cache_entry) {
+ ctx = NULL;
+ } else {
+ /*
+ * In the case they are equal the next line has no practical effect,
+ * but it's better to do a register to register copy than a conditional
+ * branch.
+ */
+ from_this_page = from_this_cache_entry;
+ ctx = cache_entry;
+ }
+
+ ret = sdma_txadd_page(pq->dd, &tx->txreq,
+ cache_entry->pages[page_index],
+ page_offset, from_this_page,
+ ctx,
+ sdma_mmu_rb_node_get,
+ sdma_mmu_rb_node_put);
+ if (ret) {
+ /*
+ * When there's a failure, the entire request is freed by
+ * user_sdma_send_pkts().
+ */
+ SDMA_DBG(req,
+ "sdma_txadd_page failed %d page_index %lu page_offset %u from_this_page %u",
+ ret, page_index, page_offset, from_this_page);
+ return ret;
+ }
+ start += from_this_page;
+ from_this_cache_entry -= from_this_page;
+ }
+ return 0;
+}
+
+static int add_system_iovec_to_sdma_packet(struct user_sdma_request *req,
+ struct user_sdma_txreq *tx,
+ struct user_sdma_iovec *iovec,
+ size_t from_this_iovec)
+{
+ while (from_this_iovec > 0) {
+ struct sdma_mmu_node *cache_entry;
+ size_t from_this_cache_entry;
+ size_t start;
+ int ret;
+
+ start = (uintptr_t)iovec->iov.iov_base + iovec->offset;
+ ret = get_system_cache_entry(req, &cache_entry, start,
+ from_this_iovec);
+ if (ret) {
+ SDMA_DBG(req, "pin system segment failed %d", ret);
+ return ret;
+ }
+
+ from_this_cache_entry = cache_entry->rb.len - (start - cache_entry->rb.addr);
+ if (from_this_cache_entry > from_this_iovec)
+ from_this_cache_entry = from_this_iovec;
+
+ ret = add_mapping_to_sdma_packet(req, tx, cache_entry, start,
+ from_this_cache_entry);
+
+ /*
+ * Done adding cache_entry to zero or more sdma_desc. Can
+ * kref_put() the "safety" kref taken under
+ * get_system_cache_entry().
+ */
+ kref_put(&cache_entry->rb.refcount, hfi1_mmu_rb_release);
+
+ if (ret) {
+ SDMA_DBG(req, "add system segment failed %d", ret);
+ return ret;
+ }
+
+ iovec->offset += from_this_cache_entry;
+ from_this_iovec -= from_this_cache_entry;
+ }
+
+ return 0;
+}
+
+/*
+ * Add up to pkt_data_remaining bytes to the txreq, starting at the current
+ * offset in the given iovec entry and continuing until all data has been added
+ * to the iovec or the iovec entry type changes.
+ *
+ * On success, prior to returning, adjust pkt_data_remaining, req->iov_idx, and
+ * the offset value in req->iov[req->iov_idx] to reflect the data that has been
+ * consumed.
+ */
+int hfi1_add_pages_to_sdma_packet(struct user_sdma_request *req,
+ struct user_sdma_txreq *tx,
+ struct user_sdma_iovec *iovec,
+ u32 *pkt_data_remaining)
+{
+ size_t remaining_to_add = *pkt_data_remaining;
+ /*
+ * Walk through iovec entries, ensure the associated pages
+ * are pinned and mapped, add data to the packet until no more
+ * data remains to be added or the iovec entry type changes.
+ */
+ while (remaining_to_add > 0) {
+ struct user_sdma_iovec *cur_iovec;
+ size_t from_this_iovec;
+ int ret;
+
+ cur_iovec = iovec;
+ from_this_iovec = iovec->iov.iov_len - iovec->offset;
+
+ if (from_this_iovec > remaining_to_add) {
+ from_this_iovec = remaining_to_add;
+ } else {
+ /* The current iovec entry will be consumed by this pass. */
+ req->iov_idx++;
+ iovec++;
+ }
+
+ ret = add_system_iovec_to_sdma_packet(req, tx, cur_iovec,
+ from_this_iovec);
+ if (ret)
+ return ret;
+
+ remaining_to_add -= from_this_iovec;
+ }
+ *pkt_data_remaining = remaining_to_add;
+
+ return 0;
+}
+
+static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr,
+ unsigned long len)
+{
+ return (bool)(node->addr == addr);
+}
+
+/*
+ * Return 1 to remove the node from the rb tree and call the remove op.
+ *
+ * Called with the rb tree lock held.
+ */
+static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode,
+ void *evict_arg, bool *stop)
+{
+ struct sdma_mmu_node *node =
+ container_of(mnode, struct sdma_mmu_node, rb);
+ struct evict_data *evict_data = evict_arg;
+
+ /* this node will be evicted, add its pages to our count */
+ evict_data->cleared += node->npages;
+
+ /* have enough pages been cleared? */
+ if (evict_data->cleared >= evict_data->target)
+ *stop = true;
+
+ return 1; /* remove this node */
+}
+
+static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode)
+{
+ struct sdma_mmu_node *node =
+ container_of(mnode, struct sdma_mmu_node, rb);
+
+ free_system_node(node);
+}
diff --git a/drivers/infiniband/hw/hfi1/pinning.h b/drivers/infiniband/hw/hfi1/pinning.h
new file mode 100644
index 000000000000..a814a3aa9654
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/pinning.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+/*
+ * Copyright(c) 2023 Cornelis Networks, Inc.
+ */
+#ifndef _HFI1_PINNING_H
+#define _HFI1_PINNING_H
+
+struct hfi1_user_sdma_pkt_q;
+struct user_sdma_request;
+struct user_sdma_txreq;
+struct user_sdma_iovec;
+
+int hfi1_init_system_pinning(struct hfi1_user_sdma_pkt_q *pq);
+void hfi1_free_system_pinning(struct hfi1_user_sdma_pkt_q *pq);
+int hfi1_add_pages_to_sdma_packet(struct user_sdma_request *req,
+ struct user_sdma_txreq *tx,
+ struct user_sdma_iovec *iovec,
+ u32 *pkt_data_remaining);
+
+#endif /* _HFI1_PINNING_H */
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index 62e7dc9bea7b..5a91cbda4aee 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015-2018 Intel Corporation.
*/
@@ -1893,9 +1893,7 @@ int pio_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_scontexts)
vl_scontexts[i] = sc_per_vl + (extra > 0 ? 1 : 0);
}
/* build new map */
- newmap = kzalloc(sizeof(*newmap) +
- roundup_pow_of_two(num_vls) *
- sizeof(struct pio_map_elem *),
+ newmap = kzalloc(struct_size(newmap, map, roundup_pow_of_two(num_vls)),
GFP_KERNEL);
if (!newmap)
goto bail;
@@ -1910,9 +1908,8 @@ int pio_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_scontexts)
int sz = roundup_pow_of_two(vl_scontexts[i]);
/* only allocate once */
- newmap->map[i] = kzalloc(sizeof(*newmap->map[i]) +
- sz * sizeof(struct
- send_context *),
+ newmap->map[i] = kzalloc(struct_size(newmap->map[i],
+ ksc, sz),
GFP_KERNEL);
if (!newmap->map[i])
goto bail;
@@ -2089,7 +2086,7 @@ int init_credit_return(struct hfi1_devdata *dd)
"Unable to allocate credit return DMA range for NUMA %d\n",
i);
ret = -ENOMEM;
- goto done;
+ goto free_cr_base;
}
}
set_dev_node(&dd->pcidev->dev, dd->node);
@@ -2097,6 +2094,10 @@ int init_credit_return(struct hfi1_devdata *dd)
ret = 0;
done:
return ret;
+
+free_cr_base:
+ free_credit_return(dd);
+ goto done;
}
void free_credit_return(struct hfi1_devdata *dd)
diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h
index ea714008f261..d07cc6ea7c63 100644
--- a/drivers/infiniband/hw/hfi1/pio.h
+++ b/drivers/infiniband/hw/hfi1/pio.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015-2017 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c
index 7690f996d5e3..80fee812a930 100644
--- a/drivers/infiniband/hw/hfi1/pio_copy.c
+++ b/drivers/infiniband/hw/hfi1/pio_copy.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015, 2016 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c
index 54cbd8f1a6c1..7bd0e9b6cb50 100644
--- a/drivers/infiniband/hw/hfi1/platform.c
+++ b/drivers/infiniband/hw/hfi1/platform.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015, 2016 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hfi1/platform.h b/drivers/infiniband/hw/hfi1/platform.h
index 1d51dca1bc30..0631f9bf3a89 100644
--- a/drivers/infiniband/hw/hfi1/platform.h
+++ b/drivers/infiniband/hw/hfi1/platform.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015, 2016 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index 6193d48b2c1f..f3d8c0c193ac 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015 - 2020 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hfi1/qp.h b/drivers/infiniband/hw/hfi1/qp.h
index cdf87bc6ad94..870ff1a6e5c4 100644
--- a/drivers/infiniband/hw/hfi1/qp.h
+++ b/drivers/infiniband/hw/hfi1/qp.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015 - 2018 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c
index 19d7887a4f10..52cce1c8b76a 100644
--- a/drivers/infiniband/hw/hfi1/qsfp.c
+++ b/drivers/infiniband/hw/hfi1/qsfp.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015, 2016 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hfi1/qsfp.h b/drivers/infiniband/hw/hfi1/qsfp.h
index 8f14111eaa47..df1389bad86b 100644
--- a/drivers/infiniband/hw/hfi1/qsfp.h
+++ b/drivers/infiniband/hw/hfi1/qsfp.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015, 2016 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index acd2b273ea7d..b36242c9d42c 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015 - 2018 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c
index b0151b7293f5..aafa4e03b179 100644
--- a/drivers/infiniband/hw/hfi1/ruc.c
+++ b/drivers/infiniband/hw/hfi1/ruc.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015 - 2018 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index 26c62162759b..b67d23b1f286 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015 - 2018 Intel Corporation.
*/
@@ -3158,7 +3158,7 @@ int _pad_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx)
{
int rval = 0;
- if ((unlikely(tx->num_desc + 1 == tx->desc_limit))) {
+ if ((unlikely(tx->num_desc == tx->desc_limit))) {
rval = _extend_sdma_tx_descs(dd, tx);
if (rval) {
__sdma_txclean(dd, tx);
diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h
index 7fdebab202c4..d77246b48434 100644
--- a/drivers/infiniband/hw/hfi1/sdma.h
+++ b/drivers/infiniband/hw/hfi1/sdma.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015 - 2018 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hfi1/sdma_txreq.h b/drivers/infiniband/hw/hfi1/sdma_txreq.h
index 85ae7293c274..5782166d984c 100644
--- a/drivers/infiniband/hw/hfi1/sdma_txreq.h
+++ b/drivers/infiniband/hw/hfi1/sdma_txreq.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2016 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c
index 3b3407dc7c21..d62ba5fdd80c 100644
--- a/drivers/infiniband/hw/hfi1/sysfs.c
+++ b/drivers/infiniband/hw/hfi1/sysfs.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015-2017 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c
index 18b05ffb415a..c465966a1d9c 100644
--- a/drivers/infiniband/hw/hfi1/tid_rdma.c
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.c
@@ -315,7 +315,7 @@ int hfi1_kern_exp_rcv_init(struct hfi1_ctxtdata *rcd, int reinit)
* This routine returns the receive context associated
* with a a qp's qpn.
*
- * Returns the context.
+ * Return: the context.
*/
static struct hfi1_ctxtdata *qp_to_rcd(struct rvt_dev_info *rdi,
struct rvt_qp *qp)
@@ -710,7 +710,7 @@ void hfi1_tid_rdma_flush_wait(struct rvt_qp *qp)
* The exp_lock must be held.
*
* Return:
- * On success: a value postive value between 0 and RXE_NUM_TID_FLOWS - 1
+ * On success: a value positive value between 0 and RXE_NUM_TID_FLOWS - 1
* On failure: -EAGAIN
*/
static int kern_reserve_flow(struct hfi1_ctxtdata *rcd, int last)
@@ -1007,7 +1007,7 @@ static u32 tid_flush_pages(struct tid_rdma_pageset *list,
* pages are tested two at a time, i, i + 1 for contiguous
* pages and i - 1 and i contiguous pages.
*
- * If any condition is false, any accumlated pages are flushed and
+ * If any condition is false, any accumulated pages are flushed and
* v0,v1 are emitted as separate PAGE_SIZE pagesets
*
* Otherwise, the current 8k is totaled for a future flush.
@@ -1434,7 +1434,7 @@ static void kern_program_rcvarray(struct tid_rdma_flow *flow)
* (5) computes a tidarray with formatted TID entries which can be sent
* to the sender
* (6) Reserves and programs HW flows.
- * (7) It also manages queing the QP when TID/flow resources are not
+ * (7) It also manages queueing the QP when TID/flow resources are not
* available.
*
* @req points to struct tid_rdma_request of which the segments are a part. The
@@ -1604,7 +1604,7 @@ void hfi1_kern_exp_rcv_clear_all(struct tid_rdma_request *req)
}
/**
- * hfi1_kern_exp_rcv_free_flows - free priviously allocated flow information
+ * hfi1_kern_exp_rcv_free_flows - free previously allocated flow information
* @req: the tid rdma request to be cleaned
*/
static void hfi1_kern_exp_rcv_free_flows(struct tid_rdma_request *req)
@@ -2055,7 +2055,7 @@ static int tid_rdma_rcv_error(struct hfi1_packet *packet,
* req->clear_tail is advanced). However, when an earlier
* request is received, this request will not be complete any
* more (qp->s_tail_ack_queue is moved back, see below).
- * Consequently, we need to update the TID flow info everytime
+ * Consequently, we need to update the TID flow info every time
* a duplicate request is received.
*/
bth0 = be32_to_cpu(ohdr->bth[0]);
@@ -2219,7 +2219,7 @@ void hfi1_rc_rcv_tid_rdma_read_req(struct hfi1_packet *packet)
/*
* 1. Verify TID RDMA READ REQ as per IB_OPCODE_RC_RDMA_READ
* (see hfi1_rc_rcv())
- * 2. Put TID RDMA READ REQ into the response queueu (s_ack_queue)
+ * 2. Put TID RDMA READ REQ into the response queue (s_ack_queue)
* - Setup struct tid_rdma_req with request info
* - Initialize struct tid_rdma_flow info;
* - Copy TID entries;
@@ -2439,7 +2439,7 @@ find_tid_request(struct rvt_qp *qp, u32 psn, enum ib_wr_opcode opcode)
void hfi1_rc_rcv_tid_rdma_read_resp(struct hfi1_packet *packet)
{
- /* HANDLER FOR TID RDMA READ RESPONSE packet (Requestor side */
+ /* HANDLER FOR TID RDMA READ RESPONSE packet (Requester side) */
/*
* 1. Find matching SWQE
@@ -3649,7 +3649,7 @@ void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet)
* 1. Verify TID RDMA WRITE REQ as per IB_OPCODE_RC_RDMA_WRITE_FIRST
* (see hfi1_rc_rcv())
* - Don't allow 0-length requests.
- * 2. Put TID RDMA WRITE REQ into the response queueu (s_ack_queue)
+ * 2. Put TID RDMA WRITE REQ into the response queue (s_ack_queue)
* - Setup struct tid_rdma_req with request info
* - Prepare struct tid_rdma_flow array?
* 3. Set the qp->s_ack_state as state diagram in design doc.
@@ -4026,7 +4026,7 @@ unlock_r_lock:
void hfi1_rc_rcv_tid_rdma_write_resp(struct hfi1_packet *packet)
{
- /* HANDLER FOR TID RDMA WRITE RESPONSE packet (Requestor side */
+ /* HANDLER FOR TID RDMA WRITE RESPONSE packet (Requester side) */
/*
* 1. Find matching SWQE
@@ -5440,8 +5440,9 @@ static bool _hfi1_schedule_tid_send(struct rvt_qp *qp)
* the two state machines can step on each other with respect to the
* RVT_S_BUSY flag.
* Therefore, a modified test is used.
- * @return true if the second leg is scheduled;
- * false if the second leg is not scheduled.
+ *
+ * Return: %true if the second leg is scheduled;
+ * %false if the second leg is not scheduled.
*/
bool hfi1_schedule_tid_send(struct rvt_qp *qp)
{
diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c
index 8302469582c6..10290ebf76b2 100644
--- a/drivers/infiniband/hw/hfi1/trace.c
+++ b/drivers/infiniband/hw/hfi1/trace.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015 - 2020 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hfi1/trace.h b/drivers/infiniband/hw/hfi1/trace.h
index 31e027c5a0c0..bb3cc006bacd 100644
--- a/drivers/infiniband/hw/hfi1/trace.h
+++ b/drivers/infiniband/hw/hfi1/trace.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015 - 2018 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hfi1/trace_ctxts.h b/drivers/infiniband/hw/hfi1/trace_ctxts.h
index 1858eaf33b18..76c41bd79071 100644
--- a/drivers/infiniband/hw/hfi1/trace_ctxts.h
+++ b/drivers/infiniband/hw/hfi1/trace_ctxts.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015 - 2020 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hfi1/trace_dbg.h b/drivers/infiniband/hw/hfi1/trace_dbg.h
index 489395bfb5b3..58304b91380f 100644
--- a/drivers/infiniband/hw/hfi1/trace_dbg.h
+++ b/drivers/infiniband/hw/hfi1/trace_dbg.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015 - 2018 Intel Corporation.
*/
@@ -33,7 +33,7 @@ DECLARE_EVENT_CLASS(hfi1_trace_template,
TP_STRUCT__entry(__string(function, function)
__vstring(msg, vaf->fmt, vaf->va)
),
- TP_fast_assign(__assign_str(function, function);
+ TP_fast_assign(__assign_str(function);
__assign_vstr(msg, vaf->fmt, vaf->va);
),
TP_printk("(%s) %s",
diff --git a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
index b33f8f575f8a..b21356abc9ec 100644
--- a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
+++ b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015 - 2017 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hfi1/trace_misc.h b/drivers/infiniband/hw/hfi1/trace_misc.h
index 742675fa7576..8dc46b6891df 100644
--- a/drivers/infiniband/hw/hfi1/trace_misc.h
+++ b/drivers/infiniband/hw/hfi1/trace_misc.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015, 2016 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hfi1/trace_mmu.h b/drivers/infiniband/hw/hfi1/trace_mmu.h
index 82cc12aa3fb8..5a9dfd85e7f5 100644
--- a/drivers/infiniband/hw/hfi1/trace_mmu.h
+++ b/drivers/infiniband/hw/hfi1/trace_mmu.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2017 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hfi1/trace_rc.h b/drivers/infiniband/hw/hfi1/trace_rc.h
index 7c3a1c77536d..fa254f9b9c42 100644
--- a/drivers/infiniband/hw/hfi1/trace_rc.h
+++ b/drivers/infiniband/hw/hfi1/trace_rc.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015, 2016, 2017 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h
index 0da22f9bc75e..8d5e12fe88a5 100644
--- a/drivers/infiniband/hw/hfi1/trace_rx.h
+++ b/drivers/infiniband/hw/hfi1/trace_rx.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015 - 2018 Intel Corporation.
*/
@@ -90,7 +90,7 @@ TRACE_EVENT(hfi1_mmu_invalidate,
TP_fast_assign(
__entry->ctxt = ctxt;
__entry->subctxt = subctxt;
- __assign_str(type, type);
+ __assign_str(type);
__entry->start = start;
__entry->end = end;
),
diff --git a/drivers/infiniband/hw/hfi1/trace_tid.h b/drivers/infiniband/hw/hfi1/trace_tid.h
index d129b8195959..e358f5b885fa 100644
--- a/drivers/infiniband/hw/hfi1/trace_tid.h
+++ b/drivers/infiniband/hw/hfi1/trace_tid.h
@@ -358,7 +358,7 @@ DECLARE_EVENT_CLASS(/* msg */
),
TP_fast_assign(/* assign */
__entry->qpn = qp ? qp->ibqp.qp_num : 0;
- __assign_str(msg, msg);
+ __assign_str(msg);
__entry->more = more;
),
TP_printk(/* print */
@@ -651,7 +651,7 @@ DECLARE_EVENT_CLASS(/* tid_node */
TP_fast_assign(/* assign */
DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device));
__entry->qpn = qp->ibqp.qp_num;
- __assign_str(msg, msg);
+ __assign_str(msg);
__entry->index = index;
__entry->base = base;
__entry->map = map;
diff --git a/drivers/infiniband/hw/hfi1/trace_tx.h b/drivers/infiniband/hw/hfi1/trace_tx.h
index ed1b9e1e4b17..c0ba6b0a2c4e 100644
--- a/drivers/infiniband/hw/hfi1/trace_tx.h
+++ b/drivers/infiniband/hw/hfi1/trace_tx.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015 - 2017 Intel Corporation.
*/
@@ -740,8 +740,8 @@ TRACE_EVENT(hfi1_sdma_state,
__string(newstate, nstate)
),
TP_fast_assign(DD_DEV_ASSIGN(sde->dd);
- __assign_str(curstate, cstate);
- __assign_str(newstate, nstate);
+ __assign_str(curstate);
+ __assign_str(newstate);
),
TP_printk("[%s] current state %s new state %s",
__get_str(dev),
diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c
index 4e9d6aa39305..33d2c2a218e2 100644
--- a/drivers/infiniband/hw/hfi1/uc.c
+++ b/drivers/infiniband/hw/hfi1/uc.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015 - 2018 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c
index b64b9d7e08f0..89d1bae8f824 100644
--- a/drivers/infiniband/hw/hfi1/ud.c
+++ b/drivers/infiniband/hw/hfi1/ud.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015 - 2019 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
index 96058baf36ed..cf2d29098406 100644
--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2020 Cornelis Networks, Inc.
* Copyright(c) 2015-2018 Intel Corporation.
@@ -491,8 +491,8 @@ int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd,
if (unlikely(tinfo->tidcnt > fd->tid_used))
return -EINVAL;
- tidinfo = memdup_user(u64_to_user_ptr(tinfo->tidlist),
- sizeof(tidinfo[0]) * tinfo->tidcnt);
+ tidinfo = memdup_array_user(u64_to_user_ptr(tinfo->tidlist),
+ tinfo->tidcnt, sizeof(tidinfo[0]));
if (IS_ERR(tidinfo))
return PTR_ERR(tidinfo);
diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h
index f8ee997d0050..055726f7c139 100644
--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.h
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2020 - Cornelis Networks, Inc.
* Copyright(c) 2015 - 2017 Intel Corporation.
@@ -36,7 +36,7 @@ struct tid_rb_node {
dma_addr_t dma_addr;
bool freed;
unsigned int npages;
- struct page *pages[];
+ struct page *pages[] __counted_by(npages);
};
static inline int num_user_pages(unsigned long addr,
diff --git a/drivers/infiniband/hw/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c
index 36aaedc65145..c77913a7920f 100644
--- a/drivers/infiniband/hw/hfi1/user_pages.c
+++ b/drivers/infiniband/hw/hfi1/user_pages.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015-2017 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index 02bd62b857b7..b72625283fcf 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -1,6 +1,6 @@
-// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
- * Copyright(c) 2020 - Cornelis Networks, Inc.
+ * Copyright(c) 2020 - 2023 Cornelis Networks, Inc.
* Copyright(c) 2015 - 2018 Intel Corporation.
*/
@@ -60,22 +60,6 @@ static int defer_packet_queue(
uint seq,
bool pkts_sent);
static void activate_packet_queue(struct iowait *wait, int reason);
-static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr,
- unsigned long len);
-static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode,
- void *arg2, bool *stop);
-static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode);
-
-static struct mmu_rb_ops sdma_rb_ops = {
- .filter = sdma_rb_filter,
- .evict = sdma_rb_evict,
- .remove = sdma_rb_remove,
-};
-
-static int add_system_pages_to_sdma_packet(struct user_sdma_request *req,
- struct user_sdma_txreq *tx,
- struct user_sdma_iovec *iovec,
- u32 *pkt_remaining);
static int defer_packet_queue(
struct sdma_engine *sde,
@@ -185,12 +169,9 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
cq->nentries = hfi1_sdma_comp_ring_size;
- ret = hfi1_mmu_rb_register(pq, &sdma_rb_ops, dd->pport->hfi1_wq,
- &pq->handler);
- if (ret) {
- dd_dev_err(dd, "Failed to register with MMU %d", ret);
+ ret = hfi1_init_system_pinning(pq);
+ if (ret)
goto pq_mmu_fail;
- }
rcu_assign_pointer(fd->pq, pq);
fd->cq = cq;
@@ -249,8 +230,7 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd,
pq->wait,
!atomic_read(&pq->n_reqs));
kfree(pq->reqs);
- if (pq->handler)
- hfi1_mmu_rb_unregister(pq->handler);
+ hfi1_free_system_pinning(pq);
bitmap_free(pq->req_in_use);
kmem_cache_destroy(pq->txreq_cache);
flush_pq_iowait(pq);
@@ -514,8 +494,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
* equal to the pkt count. However, there is no way to
* tell at this point.
*/
- tmp = memdup_user(iovec[idx].iov_base,
- ntids * sizeof(*req->tids));
+ tmp = memdup_array_user(iovec[idx].iov_base,
+ ntids, sizeof(*req->tids));
if (IS_ERR(tmp)) {
ret = PTR_ERR(tmp);
SDMA_DBG(req, "Failed to copy %d TIDs (%d)",
@@ -821,8 +801,8 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts)
req->tidoffset += datalen;
req->sent += datalen;
while (datalen) {
- ret = add_system_pages_to_sdma_packet(req, tx, iovec,
- &datalen);
+ ret = hfi1_add_pages_to_sdma_packet(req, tx, iovec,
+ &datalen);
if (ret)
goto free_txreq;
iovec = &req->iovs[req->iov_idx];
@@ -860,17 +840,6 @@ free_tx:
return ret;
}
-static u32 sdma_cache_evict(struct hfi1_user_sdma_pkt_q *pq, u32 npages)
-{
- struct evict_data evict_data;
- struct mmu_rb_handler *handler = pq->handler;
-
- evict_data.cleared = 0;
- evict_data.target = npages;
- hfi1_mmu_rb_evict(handler, &evict_data);
- return evict_data.cleared;
-}
-
static int check_header_template(struct user_sdma_request *req,
struct hfi1_pkt_header *hdr, u32 lrhlen,
u32 datalen)
@@ -1253,401 +1222,3 @@ static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq,
trace_hfi1_sdma_user_completion(pq->dd, pq->ctxt, pq->subctxt,
idx, state, ret);
}
-
-static void unpin_vector_pages(struct mm_struct *mm, struct page **pages,
- unsigned int start, unsigned int npages)
-{
- hfi1_release_user_pages(mm, pages + start, npages, false);
- kfree(pages);
-}
-
-static void free_system_node(struct sdma_mmu_node *node)
-{
- if (node->npages) {
- unpin_vector_pages(mm_from_sdma_node(node), node->pages, 0,
- node->npages);
- atomic_sub(node->npages, &node->pq->n_locked);
- }
- kfree(node);
-}
-
-/*
- * kref_get()'s an additional kref on the returned rb_node to prevent rb_node
- * from being released until after rb_node is assigned to an SDMA descriptor
- * (struct sdma_desc) under add_system_iovec_to_sdma_packet(), even if the
- * virtual address range for rb_node is invalidated between now and then.
- */
-static struct sdma_mmu_node *find_system_node(struct mmu_rb_handler *handler,
- unsigned long start,
- unsigned long end)
-{
- struct mmu_rb_node *rb_node;
- unsigned long flags;
-
- spin_lock_irqsave(&handler->lock, flags);
- rb_node = hfi1_mmu_rb_get_first(handler, start, (end - start));
- if (!rb_node) {
- spin_unlock_irqrestore(&handler->lock, flags);
- return NULL;
- }
-
- /* "safety" kref to prevent release before add_system_iovec_to_sdma_packet() */
- kref_get(&rb_node->refcount);
- spin_unlock_irqrestore(&handler->lock, flags);
-
- return container_of(rb_node, struct sdma_mmu_node, rb);
-}
-
-static int pin_system_pages(struct user_sdma_request *req,
- uintptr_t start_address, size_t length,
- struct sdma_mmu_node *node, int npages)
-{
- struct hfi1_user_sdma_pkt_q *pq = req->pq;
- int pinned, cleared;
- struct page **pages;
-
- pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL);
- if (!pages)
- return -ENOMEM;
-
-retry:
- if (!hfi1_can_pin_pages(pq->dd, current->mm, atomic_read(&pq->n_locked),
- npages)) {
- SDMA_DBG(req, "Evicting: nlocked %u npages %u",
- atomic_read(&pq->n_locked), npages);
- cleared = sdma_cache_evict(pq, npages);
- if (cleared >= npages)
- goto retry;
- }
-
- SDMA_DBG(req, "Acquire user pages start_address %lx node->npages %u npages %u",
- start_address, node->npages, npages);
- pinned = hfi1_acquire_user_pages(current->mm, start_address, npages, 0,
- pages);
-
- if (pinned < 0) {
- kfree(pages);
- SDMA_DBG(req, "pinned %d", pinned);
- return pinned;
- }
- if (pinned != npages) {
- unpin_vector_pages(current->mm, pages, node->npages, pinned);
- SDMA_DBG(req, "npages %u pinned %d", npages, pinned);
- return -EFAULT;
- }
- node->rb.addr = start_address;
- node->rb.len = length;
- node->pages = pages;
- node->npages = npages;
- atomic_add(pinned, &pq->n_locked);
- SDMA_DBG(req, "done. pinned %d", pinned);
- return 0;
-}
-
-/*
- * kref refcount on *node_p will be 2 on successful addition: one kref from
- * kref_init() for mmu_rb_handler and one kref to prevent *node_p from being
- * released until after *node_p is assigned to an SDMA descriptor (struct
- * sdma_desc) under add_system_iovec_to_sdma_packet(), even if the virtual
- * address range for *node_p is invalidated between now and then.
- */
-static int add_system_pinning(struct user_sdma_request *req,
- struct sdma_mmu_node **node_p,
- unsigned long start, unsigned long len)
-
-{
- struct hfi1_user_sdma_pkt_q *pq = req->pq;
- struct sdma_mmu_node *node;
- int ret;
-
- node = kzalloc(sizeof(*node), GFP_KERNEL);
- if (!node)
- return -ENOMEM;
-
- /* First kref "moves" to mmu_rb_handler */
- kref_init(&node->rb.refcount);
-
- /* "safety" kref to prevent release before add_system_iovec_to_sdma_packet() */
- kref_get(&node->rb.refcount);
-
- node->pq = pq;
- ret = pin_system_pages(req, start, len, node, PFN_DOWN(len));
- if (ret == 0) {
- ret = hfi1_mmu_rb_insert(pq->handler, &node->rb);
- if (ret)
- free_system_node(node);
- else
- *node_p = node;
-
- return ret;
- }
-
- kfree(node);
- return ret;
-}
-
-static int get_system_cache_entry(struct user_sdma_request *req,
- struct sdma_mmu_node **node_p,
- size_t req_start, size_t req_len)
-{
- struct hfi1_user_sdma_pkt_q *pq = req->pq;
- u64 start = ALIGN_DOWN(req_start, PAGE_SIZE);
- u64 end = PFN_ALIGN(req_start + req_len);
- struct mmu_rb_handler *handler = pq->handler;
- int ret;
-
- if ((end - start) == 0) {
- SDMA_DBG(req,
- "Request for empty cache entry req_start %lx req_len %lx start %llx end %llx",
- req_start, req_len, start, end);
- return -EINVAL;
- }
-
- SDMA_DBG(req, "req_start %lx req_len %lu", req_start, req_len);
-
- while (1) {
- struct sdma_mmu_node *node =
- find_system_node(handler, start, end);
- u64 prepend_len = 0;
-
- SDMA_DBG(req, "node %p start %llx end %llu", node, start, end);
- if (!node) {
- ret = add_system_pinning(req, node_p, start,
- end - start);
- if (ret == -EEXIST) {
- /*
- * Another execution context has inserted a
- * conficting entry first.
- */
- continue;
- }
- return ret;
- }
-
- if (node->rb.addr <= start) {
- /*
- * This entry covers at least part of the region. If it doesn't extend
- * to the end, then this will be called again for the next segment.
- */
- *node_p = node;
- return 0;
- }
-
- SDMA_DBG(req, "prepend: node->rb.addr %lx, node->rb.refcount %d",
- node->rb.addr, kref_read(&node->rb.refcount));
- prepend_len = node->rb.addr - start;
-
- /*
- * This node will not be returned, instead a new node
- * will be. So release the reference.
- */
- kref_put(&node->rb.refcount, hfi1_mmu_rb_release);
-
- /* Prepend a node to cover the beginning of the allocation */
- ret = add_system_pinning(req, node_p, start, prepend_len);
- if (ret == -EEXIST) {
- /* Another execution context has inserted a conficting entry first. */
- continue;
- }
- return ret;
- }
-}
-
-static void sdma_mmu_rb_node_get(void *ctx)
-{
- struct mmu_rb_node *node = ctx;
-
- kref_get(&node->refcount);
-}
-
-static void sdma_mmu_rb_node_put(void *ctx)
-{
- struct sdma_mmu_node *node = ctx;
-
- kref_put(&node->rb.refcount, hfi1_mmu_rb_release);
-}
-
-static int add_mapping_to_sdma_packet(struct user_sdma_request *req,
- struct user_sdma_txreq *tx,
- struct sdma_mmu_node *cache_entry,
- size_t start,
- size_t from_this_cache_entry)
-{
- struct hfi1_user_sdma_pkt_q *pq = req->pq;
- unsigned int page_offset;
- unsigned int from_this_page;
- size_t page_index;
- void *ctx;
- int ret;
-
- /*
- * Because the cache may be more fragmented than the memory that is being accessed,
- * it's not strictly necessary to have a descriptor per cache entry.
- */
-
- while (from_this_cache_entry) {
- page_index = PFN_DOWN(start - cache_entry->rb.addr);
-
- if (page_index >= cache_entry->npages) {
- SDMA_DBG(req,
- "Request for page_index %zu >= cache_entry->npages %u",
- page_index, cache_entry->npages);
- return -EINVAL;
- }
-
- page_offset = start - ALIGN_DOWN(start, PAGE_SIZE);
- from_this_page = PAGE_SIZE - page_offset;
-
- if (from_this_page < from_this_cache_entry) {
- ctx = NULL;
- } else {
- /*
- * In the case they are equal the next line has no practical effect,
- * but it's better to do a register to register copy than a conditional
- * branch.
- */
- from_this_page = from_this_cache_entry;
- ctx = cache_entry;
- }
-
- ret = sdma_txadd_page(pq->dd, &tx->txreq,
- cache_entry->pages[page_index],
- page_offset, from_this_page,
- ctx,
- sdma_mmu_rb_node_get,
- sdma_mmu_rb_node_put);
- if (ret) {
- /*
- * When there's a failure, the entire request is freed by
- * user_sdma_send_pkts().
- */
- SDMA_DBG(req,
- "sdma_txadd_page failed %d page_index %lu page_offset %u from_this_page %u",
- ret, page_index, page_offset, from_this_page);
- return ret;
- }
- start += from_this_page;
- from_this_cache_entry -= from_this_page;
- }
- return 0;
-}
-
-static int add_system_iovec_to_sdma_packet(struct user_sdma_request *req,
- struct user_sdma_txreq *tx,
- struct user_sdma_iovec *iovec,
- size_t from_this_iovec)
-{
- while (from_this_iovec > 0) {
- struct sdma_mmu_node *cache_entry;
- size_t from_this_cache_entry;
- size_t start;
- int ret;
-
- start = (uintptr_t)iovec->iov.iov_base + iovec->offset;
- ret = get_system_cache_entry(req, &cache_entry, start,
- from_this_iovec);
- if (ret) {
- SDMA_DBG(req, "pin system segment failed %d", ret);
- return ret;
- }
-
- from_this_cache_entry = cache_entry->rb.len - (start - cache_entry->rb.addr);
- if (from_this_cache_entry > from_this_iovec)
- from_this_cache_entry = from_this_iovec;
-
- ret = add_mapping_to_sdma_packet(req, tx, cache_entry, start,
- from_this_cache_entry);
-
- /*
- * Done adding cache_entry to zero or more sdma_desc. Can
- * kref_put() the "safety" kref taken under
- * get_system_cache_entry().
- */
- kref_put(&cache_entry->rb.refcount, hfi1_mmu_rb_release);
-
- if (ret) {
- SDMA_DBG(req, "add system segment failed %d", ret);
- return ret;
- }
-
- iovec->offset += from_this_cache_entry;
- from_this_iovec -= from_this_cache_entry;
- }
-
- return 0;
-}
-
-static int add_system_pages_to_sdma_packet(struct user_sdma_request *req,
- struct user_sdma_txreq *tx,
- struct user_sdma_iovec *iovec,
- u32 *pkt_data_remaining)
-{
- size_t remaining_to_add = *pkt_data_remaining;
- /*
- * Walk through iovec entries, ensure the associated pages
- * are pinned and mapped, add data to the packet until no more
- * data remains to be added.
- */
- while (remaining_to_add > 0) {
- struct user_sdma_iovec *cur_iovec;
- size_t from_this_iovec;
- int ret;
-
- cur_iovec = iovec;
- from_this_iovec = iovec->iov.iov_len - iovec->offset;
-
- if (from_this_iovec > remaining_to_add) {
- from_this_iovec = remaining_to_add;
- } else {
- /* The current iovec entry will be consumed by this pass. */
- req->iov_idx++;
- iovec++;
- }
-
- ret = add_system_iovec_to_sdma_packet(req, tx, cur_iovec,
- from_this_iovec);
- if (ret)
- return ret;
-
- remaining_to_add -= from_this_iovec;
- }
- *pkt_data_remaining = remaining_to_add;
-
- return 0;
-}
-
-static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr,
- unsigned long len)
-{
- return (bool)(node->addr == addr);
-}
-
-/*
- * Return 1 to remove the node from the rb tree and call the remove op.
- *
- * Called with the rb tree lock held.
- */
-static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode,
- void *evict_arg, bool *stop)
-{
- struct sdma_mmu_node *node =
- container_of(mnode, struct sdma_mmu_node, rb);
- struct evict_data *evict_data = evict_arg;
-
- /* this node will be evicted, add its pages to our count */
- evict_data->cleared += node->npages;
-
- /* have enough pages been cleared? */
- if (evict_data->cleared >= evict_data->target)
- *stop = true;
-
- return 1; /* remove this node */
-}
-
-static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode)
-{
- struct sdma_mmu_node *node =
- container_of(mnode, struct sdma_mmu_node, rb);
-
- free_system_node(node);
-}
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h
index 548347d4c5bc..8735524e3a9a 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.h
+++ b/drivers/infiniband/hw/hfi1/user_sdma.h
@@ -1,6 +1,6 @@
-/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
- * Copyright(c) 2020 - Cornelis Networks, Inc.
+ * Copyright(c) 2023 - Cornelis Networks, Inc.
* Copyright(c) 2015 - 2018 Intel Corporation.
*/
#ifndef _HFI1_USER_SDMA_H
@@ -13,6 +13,8 @@
#include "iowait.h"
#include "user_exp_rcv.h"
#include "mmu_rb.h"
+#include "pinning.h"
+#include "sdma.h"
/* The maximum number of Data io vectors per message/request */
#define MAX_VECTORS_PER_REQ 8
@@ -101,13 +103,6 @@ struct hfi1_user_sdma_comp_q {
struct hfi1_sdma_comp_entry *comps;
};
-struct sdma_mmu_node {
- struct mmu_rb_node rb;
- struct hfi1_user_sdma_pkt_q *pq;
- struct page **pages;
- unsigned int npages;
-};
-
struct user_sdma_iovec {
struct list_head list;
struct iovec iov;
@@ -203,10 +198,4 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd,
int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
struct iovec *iovec, unsigned long dim,
unsigned long *count);
-
-static inline struct mm_struct *mm_from_sdma_node(struct sdma_mmu_node *node)
-{
- return node->rb.handler->mn.mm;
-}
-
#endif /* _HFI1_USER_SDMA_H */
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index fbdcfecb1768..33af2196ef31 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2015 - 2020 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index 7f30f32b34dc..070e4f0babe8 100644
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2015 - 2018 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.c b/drivers/infiniband/hw/hfi1/verbs_txreq.c
index cfecc81a27c7..822f0d05bac8 100644
--- a/drivers/infiniband/hw/hfi1/verbs_txreq.c
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2016 - 2018 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h
index 2a7e0ae892e9..56353c7676d0 100644
--- a/drivers/infiniband/hw/hfi1/verbs_txreq.h
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2016 - 2018 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hfi1/vnic.h b/drivers/infiniband/hw/hfi1/vnic.h
index 34f03e7770be..bbafeb5fc0ec 100644
--- a/drivers/infiniband/hw/hfi1/vnic.h
+++ b/drivers/infiniband/hw/hfi1/vnic.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* Copyright(c) 2017 - 2020 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c
index 3650fababf25..16a4c297a897 100644
--- a/drivers/infiniband/hw/hfi1/vnic_main.c
+++ b/drivers/infiniband/hw/hfi1/vnic_main.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2017 - 2020 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hfi1/vnic_sdma.c b/drivers/infiniband/hw/hfi1/vnic_sdma.c
index cc6324d2d1dd..6caf01ba0bca 100644
--- a/drivers/infiniband/hw/hfi1/vnic_sdma.c
+++ b/drivers/infiniband/hw/hfi1/vnic_sdma.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/*
* Copyright(c) 2017 - 2018 Intel Corporation.
*/
diff --git a/drivers/infiniband/hw/hns/Makefile b/drivers/infiniband/hw/hns/Makefile
index a7d259238305..be1e1cdbcfa8 100644
--- a/drivers/infiniband/hw/hns/Makefile
+++ b/drivers/infiniband/hw/hns/Makefile
@@ -7,7 +7,8 @@ ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3
hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \
hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \
- hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o hns_roce_restrack.o
+ hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o hns_roce_restrack.o \
+ hns_roce_debugfs.o
ifdef CONFIG_INFINIBAND_HNS_HIP08
hns-roce-hw-v2-objs := hns_roce_hw_v2.o $(hns-roce-objs)
diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c
index e77fcc74f15c..4fc5b9d5fea8 100644
--- a/drivers/infiniband/hw/hns/hns_roce_ah.c
+++ b/drivers/infiniband/hw/hns/hns_roce_ah.c
@@ -33,7 +33,9 @@
#include <linux/pci.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_cache.h>
+#include "hnae3.h"
#include "hns_roce_device.h"
+#include "hns_roce_hw_v2.h"
static inline u16 get_ah_udp_sport(const struct rdma_ah_attr *ah_attr)
{
@@ -55,11 +57,17 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
struct rdma_ah_attr *ah_attr = init_attr->ah_attr;
const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
struct hns_roce_dev *hr_dev = to_hr_dev(ibah->device);
+ struct hns_roce_ib_create_ah_resp resp = {};
struct hns_roce_ah *ah = to_hr_ah(ibah);
- int ret = 0;
-
- if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08 && udata)
- return -EOPNOTSUPP;
+ u8 tclass = get_tclass(grh);
+ u8 priority = 0;
+ u8 tc_mode = 0;
+ int ret;
+
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08 && udata) {
+ ret = -EOPNOTSUPP;
+ goto err_out;
+ }
ah->av.port = rdma_ah_get_port_num(ah_attr);
ah->av.gid_index = grh->sgid_index;
@@ -70,8 +78,25 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
ah->av.hop_limit = grh->hop_limit;
ah->av.flowlabel = grh->flow_label;
ah->av.udp_sport = get_ah_udp_sport(ah_attr);
- ah->av.sl = rdma_ah_get_sl(ah_attr);
- ah->av.tclass = get_tclass(grh);
+ ah->av.tclass = tclass;
+
+ ret = hr_dev->hw->get_dscp(hr_dev, tclass, &tc_mode, &priority);
+ if (ret == -EOPNOTSUPP)
+ ret = 0;
+
+ if (ret && grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
+ goto err_out;
+
+ if (tc_mode == HNAE3_TC_MAP_MODE_DSCP &&
+ grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
+ ah->av.sl = priority;
+ else
+ ah->av.sl = rdma_ah_get_sl(ah_attr);
+
+ if (!check_sl_valid(hr_dev, ah->av.sl)) {
+ ret = -EINVAL;
+ goto err_out;
+ }
memcpy(ah->av.dgid, grh->dgid.raw, HNS_ROCE_GID_SIZE);
memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN);
@@ -81,11 +106,23 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
ret = rdma_read_gid_l2_fields(ah_attr->grh.sgid_attr,
&ah->av.vlan_id, NULL);
if (ret)
- return ret;
+ goto err_out;
ah->av.vlan_en = ah->av.vlan_id < VLAN_N_VID;
}
+ if (udata) {
+ resp.priority = ah->av.sl;
+ resp.tc_mode = tc_mode;
+ memcpy(resp.dmac, ah_attr->roce.dmac, ETH_ALEN);
+ ret = ib_copy_to_udata(udata, &resp,
+ min(udata->outlen, sizeof(resp)));
+ }
+
+err_out:
+ if (ret)
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_AH_CREATE_ERR_CNT]);
+
return ret;
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c
index 11a78ceae568..950c133d4220 100644
--- a/drivers/infiniband/hw/hns/hns_roce_alloc.c
+++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c
@@ -153,8 +153,7 @@ int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
return total;
}
-int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
- int buf_cnt, struct ib_umem *umem,
+int hns_roce_get_umem_bufs(dma_addr_t *bufs, int buf_cnt, struct ib_umem *umem,
unsigned int page_shift)
{
struct ib_block_iter biter;
diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.c b/drivers/infiniband/hw/hns/hns_roce_cmd.c
index 864413607571..873e8a69a1b9 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cmd.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cmd.c
@@ -41,7 +41,15 @@
static int hns_roce_cmd_mbox_post_hw(struct hns_roce_dev *hr_dev,
struct hns_roce_mbox_msg *mbox_msg)
{
- return hr_dev->hw->post_mbox(hr_dev, mbox_msg);
+ int ret;
+
+ ret = hr_dev->hw->post_mbox(hr_dev, mbox_msg);
+ if (ret)
+ return ret;
+
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MBX_POSTED_CNT]);
+
+ return 0;
}
/* this should be called with "poll_sem" */
@@ -58,7 +66,13 @@ static int __hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev,
return ret;
}
- return hr_dev->hw->poll_mbox_done(hr_dev);
+ ret = hr_dev->hw->poll_mbox_done(hr_dev);
+ if (ret)
+ return ret;
+
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MBX_POLLED_CNT]);
+
+ return 0;
}
static int hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev,
@@ -89,6 +103,7 @@ void hns_roce_cmd_event(struct hns_roce_dev *hr_dev, u16 token, u8 status,
context->result = (status == HNS_ROCE_CMD_SUCCESS) ? 0 : (-EIO);
context->out_param = out_param;
complete(&context->done);
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MBX_EVENT_CNT]);
}
static int __hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev,
diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h
index 052a3d60905a..11dbbabebdc9 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cmd.h
+++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h
@@ -108,6 +108,9 @@ enum {
HNS_ROCE_CMD_QUERY_CEQC = 0x92,
HNS_ROCE_CMD_DESTROY_CEQC = 0x93,
+ /* SCC CTX commands */
+ HNS_ROCE_CMD_QUERY_SCCC = 0xa2,
+
/* SCC CTX BT commands */
HNS_ROCE_CMD_READ_SCCC_BT0 = 0xa4,
HNS_ROCE_CMD_WRITE_SCCC_BT0 = 0xa5,
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c
index 736dc2f993b4..4ec66611a143 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -133,14 +133,12 @@ static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
struct ib_device *ibdev = &hr_dev->ib_dev;
u64 mtts[MTT_MIN_COUNT] = {};
- dma_addr_t dma_handle;
int ret;
- ret = hns_roce_mtr_find(hr_dev, &hr_cq->mtr, 0, mtts, ARRAY_SIZE(mtts),
- &dma_handle);
- if (!ret) {
+ ret = hns_roce_mtr_find(hr_dev, &hr_cq->mtr, 0, mtts, ARRAY_SIZE(mtts));
+ if (ret) {
ibdev_err(ibdev, "failed to find CQ mtr, ret = %d.\n", ret);
- return -EINVAL;
+ return ret;
}
/* Get CQC memory HEM(Hardware Entry Memory) table */
@@ -151,20 +149,21 @@ static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
return ret;
}
- ret = xa_err(xa_store(&cq_table->array, hr_cq->cqn, hr_cq, GFP_KERNEL));
+ ret = xa_err(xa_store_irq(&cq_table->array, hr_cq->cqn, hr_cq, GFP_KERNEL));
if (ret) {
ibdev_err(ibdev, "failed to xa_store CQ, ret = %d.\n", ret);
goto err_put;
}
- ret = hns_roce_create_cqc(hr_dev, hr_cq, mtts, dma_handle);
+ ret = hns_roce_create_cqc(hr_dev, hr_cq, mtts,
+ hns_roce_get_mtr_ba(&hr_cq->mtr));
if (ret)
goto err_xa;
return 0;
err_xa:
- xa_erase(&cq_table->array, hr_cq->cqn);
+ xa_erase_irq(&cq_table->array, hr_cq->cqn);
err_put:
hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn);
@@ -183,7 +182,7 @@ static void free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
dev_err(dev, "DESTROY_CQ failed (%d) for CQN %06lx\n", ret,
hr_cq->cqn);
- xa_erase(&cq_table->array, hr_cq->cqn);
+ xa_erase_irq(&cq_table->array, hr_cq->cqn);
/* Waiting interrupt process procedure carried out */
synchronize_irq(hr_dev->eq_table.eq[hr_cq->vector].irq);
@@ -354,38 +353,41 @@ static int set_cqe_size(struct hns_roce_cq *hr_cq, struct ib_udata *udata,
}
int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
- struct ib_udata *udata)
+ struct uverbs_attr_bundle *attrs)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device);
+ struct ib_udata *udata = &attrs->driver_udata;
struct hns_roce_ib_create_cq_resp resp = {};
struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq);
struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_ib_create_cq ucmd = {};
int ret;
- if (attr->flags)
- return -EOPNOTSUPP;
+ if (attr->flags) {
+ ret = -EOPNOTSUPP;
+ goto err_out;
+ }
ret = verify_cq_create_attr(hr_dev, attr);
if (ret)
- return ret;
+ goto err_out;
if (udata) {
ret = get_cq_ucmd(hr_cq, udata, &ucmd);
if (ret)
- return ret;
+ goto err_out;
}
set_cq_param(hr_cq, attr->cqe, attr->comp_vector, &ucmd);
ret = set_cqe_size(hr_cq, udata, &ucmd);
if (ret)
- return ret;
+ goto err_out;
ret = alloc_cq_buf(hr_dev, hr_cq, udata, ucmd.buf_addr);
if (ret) {
ibdev_err(ibdev, "failed to alloc CQ buf, ret = %d.\n", ret);
- return ret;
+ goto err_out;
}
ret = alloc_cq_db(hr_dev, hr_cq, udata, ucmd.db_addr, &resp);
@@ -430,6 +432,9 @@ err_cq_db:
free_cq_db(hr_dev, hr_cq, udata);
err_cq_buf:
free_cq_buf(hr_dev, hr_cq);
+err_out:
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CQ_CREATE_ERR_CNT]);
+
return ret;
}
@@ -472,13 +477,6 @@ void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type)
struct ib_event event;
struct ib_cq *ibcq;
- hr_cq = xa_load(&hr_dev->cq_table.array,
- cqn & (hr_dev->caps.num_cqs - 1));
- if (!hr_cq) {
- dev_warn(dev, "async event for bogus CQ 0x%06x\n", cqn);
- return;
- }
-
if (event_type != HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID &&
event_type != HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR &&
event_type != HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW) {
@@ -487,7 +485,16 @@ void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type)
return;
}
- refcount_inc(&hr_cq->refcount);
+ xa_lock(&hr_dev->cq_table.array);
+ hr_cq = xa_load(&hr_dev->cq_table.array,
+ cqn & (hr_dev->caps.num_cqs - 1));
+ if (hr_cq)
+ refcount_inc(&hr_cq->refcount);
+ xa_unlock(&hr_dev->cq_table.array);
+ if (!hr_cq) {
+ dev_warn(dev, "async event for bogus CQ 0x%06x\n", cqn);
+ return;
+ }
ibcq = &hr_cq->ib_cq;
if (ibcq->event_handler) {
@@ -530,4 +537,5 @@ void hns_roce_cleanup_cq_table(struct hns_roce_dev *hr_dev)
for (i = 0; i < HNS_ROCE_CQ_BANK_NUM; i++)
ida_destroy(&hr_dev->cq_table.bank[i].ida);
+ mutex_destroy(&hr_dev->cq_table.bank_mutex);
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_debugfs.c b/drivers/infiniband/hw/hns/hns_roce_debugfs.c
new file mode 100644
index 000000000000..e8febb40f645
--- /dev/null
+++ b/drivers/infiniband/hw/hns/hns_roce_debugfs.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (c) 2023 Hisilicon Limited.
+ */
+
+#include <linux/debugfs.h>
+#include <linux/device.h>
+
+#include "hns_roce_device.h"
+
+static struct dentry *hns_roce_dbgfs_root;
+
+static int hns_debugfs_seqfile_open(struct inode *inode, struct file *f)
+{
+ struct hns_debugfs_seqfile *seqfile = inode->i_private;
+
+ return single_open(f, seqfile->read, seqfile->data);
+}
+
+static const struct file_operations hns_debugfs_seqfile_fops = {
+ .owner = THIS_MODULE,
+ .open = hns_debugfs_seqfile_open,
+ .release = single_release,
+ .read = seq_read,
+ .llseek = seq_lseek
+};
+
+static void init_debugfs_seqfile(struct hns_debugfs_seqfile *seq,
+ const char *name, struct dentry *parent,
+ int (*read_fn)(struct seq_file *, void *),
+ void *data)
+{
+ debugfs_create_file(name, 0400, parent, seq, &hns_debugfs_seqfile_fops);
+
+ seq->read = read_fn;
+ seq->data = data;
+}
+
+static const char * const sw_stat_info[] = {
+ [HNS_ROCE_DFX_AEQE_CNT] = "aeqe",
+ [HNS_ROCE_DFX_CEQE_CNT] = "ceqe",
+ [HNS_ROCE_DFX_CMDS_CNT] = "cmds",
+ [HNS_ROCE_DFX_CMDS_ERR_CNT] = "cmds_err",
+ [HNS_ROCE_DFX_MBX_POSTED_CNT] = "posted_mbx",
+ [HNS_ROCE_DFX_MBX_POLLED_CNT] = "polled_mbx",
+ [HNS_ROCE_DFX_MBX_EVENT_CNT] = "mbx_event",
+ [HNS_ROCE_DFX_QP_CREATE_ERR_CNT] = "qp_create_err",
+ [HNS_ROCE_DFX_QP_MODIFY_ERR_CNT] = "qp_modify_err",
+ [HNS_ROCE_DFX_CQ_CREATE_ERR_CNT] = "cq_create_err",
+ [HNS_ROCE_DFX_CQ_MODIFY_ERR_CNT] = "cq_modify_err",
+ [HNS_ROCE_DFX_SRQ_CREATE_ERR_CNT] = "srq_create_err",
+ [HNS_ROCE_DFX_SRQ_MODIFY_ERR_CNT] = "srq_modify_err",
+ [HNS_ROCE_DFX_XRCD_ALLOC_ERR_CNT] = "xrcd_alloc_err",
+ [HNS_ROCE_DFX_MR_REG_ERR_CNT] = "mr_reg_err",
+ [HNS_ROCE_DFX_MR_REREG_ERR_CNT] = "mr_rereg_err",
+ [HNS_ROCE_DFX_AH_CREATE_ERR_CNT] = "ah_create_err",
+ [HNS_ROCE_DFX_MMAP_ERR_CNT] = "mmap_err",
+ [HNS_ROCE_DFX_UCTX_ALLOC_ERR_CNT] = "uctx_alloc_err",
+};
+
+static int sw_stat_debugfs_show(struct seq_file *file, void *offset)
+{
+ struct hns_roce_dev *hr_dev = file->private;
+ int i;
+
+ for (i = 0; i < HNS_ROCE_DFX_CNT_TOTAL; i++)
+ seq_printf(file, "%-20s --- %lld\n", sw_stat_info[i],
+ atomic64_read(&hr_dev->dfx_cnt[i]));
+
+ return 0;
+}
+
+static void create_sw_stat_debugfs(struct hns_roce_dev *hr_dev,
+ struct dentry *parent)
+{
+ struct hns_sw_stat_debugfs *dbgfs = &hr_dev->dbgfs.sw_stat_root;
+
+ dbgfs->root = debugfs_create_dir("sw_stat", parent);
+
+ init_debugfs_seqfile(&dbgfs->sw_stat, "sw_stat", dbgfs->root,
+ sw_stat_debugfs_show, hr_dev);
+}
+
+/* debugfs for device */
+void hns_roce_register_debugfs(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_dev_debugfs *dbgfs = &hr_dev->dbgfs;
+
+ dbgfs->root = debugfs_create_dir(dev_name(&hr_dev->ib_dev.dev),
+ hns_roce_dbgfs_root);
+
+ create_sw_stat_debugfs(hr_dev, dbgfs->root);
+}
+
+void hns_roce_unregister_debugfs(struct hns_roce_dev *hr_dev)
+{
+ debugfs_remove_recursive(hr_dev->dbgfs.root);
+}
+
+/* debugfs for hns module */
+void hns_roce_init_debugfs(void)
+{
+ hns_roce_dbgfs_root = debugfs_create_dir("hns_roce", NULL);
+}
+
+void hns_roce_cleanup_debugfs(void)
+{
+ debugfs_remove_recursive(hns_roce_dbgfs_root);
+ hns_roce_dbgfs_root = NULL;
+}
diff --git a/drivers/infiniband/hw/hns/hns_roce_debugfs.h b/drivers/infiniband/hw/hns/hns_roce_debugfs.h
new file mode 100644
index 000000000000..98e87bd3161e
--- /dev/null
+++ b/drivers/infiniband/hw/hns/hns_roce_debugfs.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (c) 2023 Hisilicon Limited.
+ */
+
+#ifndef __HNS_ROCE_DEBUGFS_H
+#define __HNS_ROCE_DEBUGFS_H
+
+/* debugfs seqfile */
+struct hns_debugfs_seqfile {
+ int (*read)(struct seq_file *seq, void *data);
+ void *data;
+};
+
+struct hns_sw_stat_debugfs {
+ struct dentry *root;
+ struct hns_debugfs_seqfile sw_stat;
+};
+
+/* Debugfs for device */
+struct hns_roce_dev_debugfs {
+ struct dentry *root;
+ struct hns_sw_stat_debugfs sw_stat_root;
+};
+
+struct hns_roce_dev;
+
+void hns_roce_init_debugfs(void);
+void hns_roce_cleanup_debugfs(void);
+void hns_roce_register_debugfs(struct hns_roce_dev *hr_dev);
+void hns_roce_unregister_debugfs(struct hns_roce_dev *hr_dev);
+
+#endif
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 84239b907de2..0b1e21cb6d2d 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -35,6 +35,7 @@
#include <rdma/ib_verbs.h>
#include <rdma/hns-abi.h>
+#include "hns_roce_debugfs.h"
#define PCI_REVISION_ID_HIP08 0x21
#define PCI_REVISION_ID_HIP09 0x30
@@ -82,6 +83,7 @@
#define MR_TYPE_DMA 0x03
#define HNS_ROCE_FRMR_MAX_PA 512
+#define HNS_ROCE_FRMR_ALIGN_SIZE 128
#define PKEY_ID 0xffff
#define NODE_DESC_SIZE 64
@@ -90,6 +92,8 @@
/* Configure to HW for PAGE_SIZE larger than 4KB */
#define PG_SHIFT_OFFSET (PAGE_SHIFT - 12)
+#define ATOMIC_WR_LEN 8
+
#define HNS_ROCE_IDX_QUE_ENTRY_SZ 4
#define SRQ_DB_REG 0x230
@@ -97,6 +101,10 @@
#define HNS_ROCE_CQ_BANK_NUM 4
#define CQ_BANKID_SHIFT 2
+#define CQ_BANKID_MASK GENMASK(1, 0)
+
+#define HNS_ROCE_MAX_CQ_COUNT 0xFFFF
+#define HNS_ROCE_MAX_CQ_PERIOD 0xFFFF
enum {
SERV_TYPE_RC,
@@ -145,6 +153,7 @@ enum {
HNS_ROCE_CAP_FLAG_SDI_MODE = BIT(14),
HNS_ROCE_CAP_FLAG_STASH = BIT(17),
HNS_ROCE_CAP_FLAG_CQE_INLINE = BIT(19),
+ HNS_ROCE_CAP_FLAG_SRQ_RECORD_DB = BIT(22),
};
#define HNS_ROCE_DB_TYPE_COUNT 2
@@ -176,10 +185,14 @@ enum {
#define HNS_ROCE_CMD_SUCCESS 1
+#define HNS_ROCE_MAX_HOP_NUM 3
/* The minimum page size is 4K for hardware */
#define HNS_HW_PAGE_SHIFT 12
#define HNS_HW_PAGE_SIZE (1 << HNS_HW_PAGE_SHIFT)
+#define HNS_HW_MAX_PAGE_SHIFT 27
+#define HNS_HW_MAX_PAGE_SIZE (1 << HNS_HW_MAX_PAGE_SHIFT)
+
struct hns_roce_uar {
u64 pfn;
unsigned long index;
@@ -266,6 +279,11 @@ struct hns_roce_hem_list {
dma_addr_t root_ba; /* pointer to the root ba table */
};
+enum mtr_type {
+ MTR_DEFAULT = 0,
+ MTR_PBL,
+};
+
struct hns_roce_buf_attr {
struct {
size_t size; /* region size */
@@ -274,7 +292,10 @@ struct hns_roce_buf_attr {
unsigned int region_count; /* valid region count */
unsigned int page_shift; /* buffer page shift */
unsigned int user_access; /* umem access flag */
+ u64 iova;
+ enum mtr_type type;
bool mtt_only; /* only alloc buffer-required MTT memory */
+ bool adaptive; /* adaptive for page_shift and hopnum */
};
struct hns_roce_hem_cfg {
@@ -452,6 +473,8 @@ struct hns_roce_srq {
spinlock_t lock;
struct mutex mutex;
void (*event)(struct hns_roce_srq *srq, enum hns_roce_event event);
+ struct hns_roce_db rdb;
+ u32 cap_flags;
};
struct hns_roce_uar_table {
@@ -580,6 +603,13 @@ struct hns_roce_work {
u32 queue_num;
};
+enum hns_roce_cong_type {
+ CONG_TYPE_DCQCN,
+ CONG_TYPE_LDCP,
+ CONG_TYPE_HC3,
+ CONG_TYPE_DIP,
+};
+
struct hns_roce_qp {
struct ib_qp ibqp;
struct hns_roce_wq rq;
@@ -623,6 +653,9 @@ struct hns_roce_qp {
struct list_head sq_node; /* all send qps are on a list */
struct hns_user_mmap_entry *dwqe_mmap_entry;
u32 config;
+ enum hns_roce_cong_type cong_type;
+ u8 tc_mode;
+ u8 priority;
};
struct hns_roce_ib_iboe {
@@ -688,19 +721,13 @@ struct hns_roce_eq {
int shift;
int event_type;
int sub_type;
+ struct work_struct work;
};
struct hns_roce_eq_table {
struct hns_roce_eq *eq;
};
-enum cong_type {
- CONG_TYPE_DCQCN,
- CONG_TYPE_LDCP,
- CONG_TYPE_HC3,
- CONG_TYPE_DIP,
-};
-
struct hns_roce_caps {
u64 fw_ver;
u8 num_ports;
@@ -714,7 +741,6 @@ struct hns_roce_caps {
u32 max_rq_sg;
u32 rsv0;
u32 num_qps;
- u32 num_pi_qps;
u32 reserved_qps;
u32 num_srqs;
u32 max_wqes;
@@ -831,7 +857,8 @@ struct hns_roce_caps {
u16 default_aeq_period;
u16 default_aeq_arm_st;
u16 default_ceq_arm_st;
- enum cong_type cong_type;
+ u8 cong_cap;
+ enum hns_roce_cong_type default_cong_type;
};
enum hns_roce_device_state {
@@ -840,6 +867,55 @@ enum hns_roce_device_state {
HNS_ROCE_DEVICE_STATE_UNINIT,
};
+enum hns_roce_hw_pkt_stat_index {
+ HNS_ROCE_HW_RX_RC_PKT_CNT,
+ HNS_ROCE_HW_RX_UC_PKT_CNT,
+ HNS_ROCE_HW_RX_UD_PKT_CNT,
+ HNS_ROCE_HW_RX_XRC_PKT_CNT,
+ HNS_ROCE_HW_RX_PKT_CNT,
+ HNS_ROCE_HW_RX_ERR_PKT_CNT,
+ HNS_ROCE_HW_RX_CNP_PKT_CNT,
+ HNS_ROCE_HW_TX_RC_PKT_CNT,
+ HNS_ROCE_HW_TX_UC_PKT_CNT,
+ HNS_ROCE_HW_TX_UD_PKT_CNT,
+ HNS_ROCE_HW_TX_XRC_PKT_CNT,
+ HNS_ROCE_HW_TX_PKT_CNT,
+ HNS_ROCE_HW_TX_ERR_PKT_CNT,
+ HNS_ROCE_HW_TX_CNP_PKT_CNT,
+ HNS_ROCE_HW_TRP_GET_MPT_ERR_PKT_CNT,
+ HNS_ROCE_HW_TRP_GET_IRRL_ERR_PKT_CNT,
+ HNS_ROCE_HW_ECN_DB_CNT,
+ HNS_ROCE_HW_RX_BUF_CNT,
+ HNS_ROCE_HW_TRP_RX_SOF_CNT,
+ HNS_ROCE_HW_CQ_CQE_CNT,
+ HNS_ROCE_HW_CQ_POE_CNT,
+ HNS_ROCE_HW_CQ_NOTIFY_CNT,
+ HNS_ROCE_HW_CNT_TOTAL
+};
+
+enum hns_roce_sw_dfx_stat_index {
+ HNS_ROCE_DFX_AEQE_CNT,
+ HNS_ROCE_DFX_CEQE_CNT,
+ HNS_ROCE_DFX_CMDS_CNT,
+ HNS_ROCE_DFX_CMDS_ERR_CNT,
+ HNS_ROCE_DFX_MBX_POSTED_CNT,
+ HNS_ROCE_DFX_MBX_POLLED_CNT,
+ HNS_ROCE_DFX_MBX_EVENT_CNT,
+ HNS_ROCE_DFX_QP_CREATE_ERR_CNT,
+ HNS_ROCE_DFX_QP_MODIFY_ERR_CNT,
+ HNS_ROCE_DFX_CQ_CREATE_ERR_CNT,
+ HNS_ROCE_DFX_CQ_MODIFY_ERR_CNT,
+ HNS_ROCE_DFX_SRQ_CREATE_ERR_CNT,
+ HNS_ROCE_DFX_SRQ_MODIFY_ERR_CNT,
+ HNS_ROCE_DFX_XRCD_ALLOC_ERR_CNT,
+ HNS_ROCE_DFX_MR_REG_ERR_CNT,
+ HNS_ROCE_DFX_MR_REREG_ERR_CNT,
+ HNS_ROCE_DFX_AH_CREATE_ERR_CNT,
+ HNS_ROCE_DFX_MMAP_ERR_CNT,
+ HNS_ROCE_DFX_UCTX_ALLOC_ERR_CNT,
+ HNS_ROCE_DFX_CNT_TOTAL
+};
+
struct hns_roce_hw {
int (*cmq_init)(struct hns_roce_dev *hr_dev);
void (*cmq_exit)(struct hns_roce_dev *hr_dev);
@@ -859,8 +935,7 @@ struct hns_roce_hw {
int (*rereg_write_mtpt)(struct hns_roce_dev *hr_dev,
struct hns_roce_mr *mr, int flags,
void *mb_buf);
- int (*frmr_write_mtpt)(struct hns_roce_dev *hr_dev, void *mb_buf,
- struct hns_roce_mr *mr);
+ int (*frmr_write_mtpt)(void *mb_buf, struct hns_roce_mr *mr);
int (*mw_write_mtpt)(void *mb_buf, struct hns_roce_mw *mw);
void (*write_cqc)(struct hns_roce_dev *hr_dev,
struct hns_roce_cq *hr_cq, void *mb_buf, u64 *mtts,
@@ -882,6 +957,12 @@ struct hns_roce_hw {
int (*query_cqc)(struct hns_roce_dev *hr_dev, u32 cqn, void *buffer);
int (*query_qpc)(struct hns_roce_dev *hr_dev, u32 qpn, void *buffer);
int (*query_mpt)(struct hns_roce_dev *hr_dev, u32 key, void *buffer);
+ int (*query_srqc)(struct hns_roce_dev *hr_dev, u32 srqn, void *buffer);
+ int (*query_sccc)(struct hns_roce_dev *hr_dev, u32 qpn, void *buffer);
+ int (*query_hw_counter)(struct hns_roce_dev *hr_dev,
+ u64 *stats, u32 port, int *hw_counters);
+ int (*get_dscp)(struct hns_roce_dev *hr_dev, u8 dscp,
+ u8 *tc_mode, u8 *priority);
const struct ib_device_ops *hns_roce_dev_ops;
const struct ib_device_ops *hns_roce_dev_srq_ops;
};
@@ -947,6 +1028,8 @@ struct hns_roce_dev {
u32 is_vf;
u32 cong_algo_tmpl_id;
u64 dwqe_page;
+ struct hns_roce_dev_debugfs dbgfs;
+ atomic64_t *dfx_cnt;
};
static inline struct hns_roce_dev *to_hr_dev(struct ib_device *ib_dev)
@@ -1094,8 +1177,13 @@ void hns_roce_cmd_use_polling(struct hns_roce_dev *hr_dev);
/* hns roce hw need current block and next block addr from mtt */
#define MTT_MIN_COUNT 2
+static inline dma_addr_t hns_roce_get_mtr_ba(struct hns_roce_mtr *mtr)
+{
+ return mtr->hem_cfg.root_ba;
+}
+
int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
- u32 offset, u64 *mtt_buf, int mtt_max, u64 *base_addr);
+ u32 offset, u64 *mtt_buf, int mtt_max);
int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
struct hns_roce_buf_attr *buf_attr,
unsigned int page_shift, struct ib_udata *udata,
@@ -1112,7 +1200,6 @@ int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev);
void hns_roce_init_srq_table(struct hns_roce_dev *hr_dev);
void hns_roce_init_xrcd_table(struct hns_roce_dev *hr_dev);
-void hns_roce_cleanup_eq_table(struct hns_roce_dev *hr_dev);
void hns_roce_cleanup_cq_table(struct hns_roce_dev *hr_dev);
void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev);
@@ -1154,16 +1241,13 @@ struct hns_roce_buf *hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size,
int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
int buf_cnt, struct hns_roce_buf *buf,
unsigned int page_shift);
-int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs,
+int hns_roce_get_umem_bufs(dma_addr_t *bufs,
int buf_cnt, struct ib_umem *umem,
unsigned int page_shift);
int hns_roce_create_srq(struct ib_srq *srq,
struct ib_srq_init_attr *srq_init_attr,
struct ib_udata *udata);
-int hns_roce_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr,
- enum ib_srq_attr_mask srq_attr_mask,
- struct ib_udata *udata);
int hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata);
int hns_roce_alloc_xrcd(struct ib_xrcd *ib_xrcd, struct ib_udata *udata);
@@ -1190,7 +1274,7 @@ __be32 send_ieth(const struct ib_send_wr *wr);
int to_hr_qp_type(int qp_type);
int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr,
- struct ib_udata *udata);
+ struct uverbs_attr_bundle *attrs);
int hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata);
int hns_roce_db_map_user(struct hns_roce_ucontext *context, unsigned long virt,
@@ -1206,7 +1290,6 @@ void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type);
void flush_cqe(struct hns_roce_dev *dev, struct hns_roce_qp *qp);
void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type);
void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type);
-u8 hns_get_gid_index(struct hns_roce_dev *hr_dev, u32 port, int gid_index);
void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev);
int hns_roce_init(struct hns_roce_dev *hr_dev);
void hns_roce_exit(struct hns_roce_dev *hr_dev);
@@ -1216,8 +1299,12 @@ int hns_roce_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ib_qp);
int hns_roce_fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ib_qp);
int hns_roce_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ib_mr);
int hns_roce_fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ib_mr);
+int hns_roce_fill_res_srq_entry(struct sk_buff *msg, struct ib_srq *ib_srq);
+int hns_roce_fill_res_srq_entry_raw(struct sk_buff *msg, struct ib_srq *ib_srq);
struct hns_user_mmap_entry *
hns_roce_user_mmap_entry_insert(struct ib_ucontext *ucontext, u64 address,
size_t length,
enum hns_roce_mmap_type mmap_type);
+bool check_sl_valid(struct hns_roce_dev *hr_dev, u8 sl);
+
#endif /* _HNS_ROCE_DEVICE_H */
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c
index 47c0efed1821..c7c167e2a045 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.c
@@ -78,7 +78,7 @@ bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type)
return false;
}
- return hop_num ? true : false;
+ return hop_num;
}
static bool hns_roce_check_hem_null(struct hns_roce_hem **hem, u64 hem_idx,
@@ -249,85 +249,48 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev,
}
static struct hns_roce_hem *hns_roce_alloc_hem(struct hns_roce_dev *hr_dev,
- int npages,
unsigned long hem_alloc_size,
gfp_t gfp_mask)
{
- struct hns_roce_hem_chunk *chunk = NULL;
struct hns_roce_hem *hem;
- struct scatterlist *mem;
int order;
void *buf;
WARN_ON(gfp_mask & __GFP_HIGHMEM);
+ order = get_order(hem_alloc_size);
+ if (PAGE_SIZE << order != hem_alloc_size) {
+ dev_err(hr_dev->dev, "invalid hem_alloc_size: %lu!\n",
+ hem_alloc_size);
+ return NULL;
+ }
+
hem = kmalloc(sizeof(*hem),
gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
if (!hem)
return NULL;
- INIT_LIST_HEAD(&hem->chunk_list);
-
- order = get_order(hem_alloc_size);
-
- while (npages > 0) {
- if (!chunk) {
- chunk = kmalloc(sizeof(*chunk),
- gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
- if (!chunk)
- goto fail;
-
- sg_init_table(chunk->mem, HNS_ROCE_HEM_CHUNK_LEN);
- chunk->npages = 0;
- chunk->nsg = 0;
- memset(chunk->buf, 0, sizeof(chunk->buf));
- list_add_tail(&chunk->list, &hem->chunk_list);
- }
-
- while (1 << order > npages)
- --order;
+ buf = dma_alloc_coherent(hr_dev->dev, hem_alloc_size,
+ &hem->dma, gfp_mask);
+ if (!buf)
+ goto fail;
- /*
- * Alloc memory one time. If failed, don't alloc small block
- * memory, directly return fail.
- */
- mem = &chunk->mem[chunk->npages];
- buf = dma_alloc_coherent(hr_dev->dev, PAGE_SIZE << order,
- &sg_dma_address(mem), gfp_mask);
- if (!buf)
- goto fail;
-
- chunk->buf[chunk->npages] = buf;
- sg_dma_len(mem) = PAGE_SIZE << order;
-
- ++chunk->npages;
- ++chunk->nsg;
- npages -= 1 << order;
- }
+ hem->buf = buf;
+ hem->size = hem_alloc_size;
return hem;
fail:
- hns_roce_free_hem(hr_dev, hem);
+ kfree(hem);
return NULL;
}
void hns_roce_free_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem *hem)
{
- struct hns_roce_hem_chunk *chunk, *tmp;
- int i;
-
if (!hem)
return;
- list_for_each_entry_safe(chunk, tmp, &hem->chunk_list, list) {
- for (i = 0; i < chunk->npages; ++i)
- dma_free_coherent(hr_dev->dev,
- sg_dma_len(&chunk->mem[i]),
- chunk->buf[i],
- sg_dma_address(&chunk->mem[i]));
- kfree(chunk);
- }
+ dma_free_coherent(hr_dev->dev, hem->size, hem->buf, hem->dma);
kfree(hem);
}
@@ -415,7 +378,6 @@ static int alloc_mhop_hem(struct hns_roce_dev *hr_dev,
{
u32 bt_size = mhop->bt_chunk_size;
struct device *dev = hr_dev->dev;
- struct hns_roce_hem_iter iter;
gfp_t flag;
u64 bt_ba;
u32 size;
@@ -456,16 +418,15 @@ static int alloc_mhop_hem(struct hns_roce_dev *hr_dev,
*/
size = table->type < HEM_TYPE_MTT ? mhop->buf_chunk_size : bt_size;
flag = GFP_KERNEL | __GFP_NOWARN;
- table->hem[index->buf] = hns_roce_alloc_hem(hr_dev, size >> PAGE_SHIFT,
- size, flag);
+ table->hem[index->buf] = hns_roce_alloc_hem(hr_dev, size, flag);
if (!table->hem[index->buf]) {
ret = -ENOMEM;
goto err_alloc_hem;
}
index->inited |= HEM_INDEX_BUF;
- hns_roce_hem_first(table->hem[index->buf], &iter);
- bt_ba = hns_roce_hem_addr(&iter);
+ bt_ba = table->hem[index->buf]->dma;
+
if (table->type < HEM_TYPE_MTT) {
if (mhop->hop_num == 2)
*(table->bt_l1[index->l1] + mhop->l2_idx) = bt_ba;
@@ -586,7 +547,6 @@ int hns_roce_table_get(struct hns_roce_dev *hr_dev,
}
table->hem[i] = hns_roce_alloc_hem(hr_dev,
- table->table_chunk_size >> PAGE_SHIFT,
table->table_chunk_size,
GFP_KERNEL | __GFP_NOWARN);
if (!table->hem[i]) {
@@ -725,7 +685,6 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_table *table,
unsigned long obj, dma_addr_t *dma_handle)
{
- struct hns_roce_hem_chunk *chunk;
struct hns_roce_hem_mhop mhop;
struct hns_roce_hem *hem;
unsigned long mhop_obj = obj;
@@ -734,7 +693,6 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev,
int offset, dma_offset;
void *addr = NULL;
u32 hem_idx = 0;
- int length;
int i, j;
mutex_lock(&table->mutex);
@@ -767,23 +725,8 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev,
if (!hem)
goto out;
- list_for_each_entry(chunk, &hem->chunk_list, list) {
- for (i = 0; i < chunk->npages; ++i) {
- length = sg_dma_len(&chunk->mem[i]);
- if (dma_handle && dma_offset >= 0) {
- if (length > (u32)dma_offset)
- *dma_handle = sg_dma_address(
- &chunk->mem[i]) + dma_offset;
- dma_offset -= length;
- }
-
- if (length > (u32)offset) {
- addr = chunk->buf[i] + offset;
- goto out;
- }
- offset -= length;
- }
- }
+ *dma_handle = hem->dma + dma_offset;
+ addr = hem->buf + offset;
out:
mutex_unlock(&table->mutex);
@@ -934,6 +877,7 @@ void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev,
if (hns_roce_check_whether_mhop(hr_dev, table->type)) {
hns_roce_cleanup_mhop_hem_table(hr_dev, table);
+ mutex_destroy(&table->mutex);
return;
}
@@ -948,6 +892,7 @@ void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev,
hns_roce_free_hem(hr_dev, table->hem[i]);
}
+ mutex_destroy(&table->mutex);
kfree(table->hem);
}
@@ -1043,15 +988,13 @@ static void hem_list_free_all(struct hns_roce_dev *hr_dev,
}
}
-static void hem_list_link_bt(struct hns_roce_dev *hr_dev, void *base_addr,
- u64 table_addr)
+static void hem_list_link_bt(void *base_addr, u64 table_addr)
{
*(u64 *)(base_addr) = table_addr;
}
/* assign L0 table address to hem from root bt */
-static void hem_list_assign_bt(struct hns_roce_dev *hr_dev,
- struct hns_roce_hem_item *hem, void *cpu_addr,
+static void hem_list_assign_bt(struct hns_roce_hem_item *hem, void *cpu_addr,
u64 phy_addr)
{
hem->addr = cpu_addr;
@@ -1098,9 +1041,9 @@ static bool hem_list_is_bottom_bt(int hopnum, int bt_level)
* @bt_level: base address table level
* @unit: ba entries per bt page
*/
-static u32 hem_list_calc_ba_range(int hopnum, int bt_level, int unit)
+static u64 hem_list_calc_ba_range(int hopnum, int bt_level, int unit)
{
- u32 step;
+ u64 step;
int max;
int i;
@@ -1136,7 +1079,7 @@ int hns_roce_hem_list_calc_root_ba(const struct hns_roce_buf_region *regions,
{
struct hns_roce_buf_region *r;
int total = 0;
- int step;
+ u64 step;
int i;
for (i = 0; i < region_cnt; i++) {
@@ -1167,7 +1110,7 @@ static int hem_list_alloc_mid_bt(struct hns_roce_dev *hr_dev,
int ret = 0;
int max_ofs;
int level;
- u32 step;
+ u64 step;
int end;
if (hopnum <= 1)
@@ -1191,10 +1134,12 @@ static int hem_list_alloc_mid_bt(struct hns_roce_dev *hr_dev,
/* config L1 bt to last bt and link them to corresponding parent */
for (level = 1; level < hopnum; level++) {
- cur = hem_list_search_item(&mid_bt[level], offset);
- if (cur) {
- hem_ptrs[level] = cur;
- continue;
+ if (!hem_list_is_bottom_bt(hopnum, level)) {
+ cur = hem_list_search_item(&mid_bt[level], offset);
+ if (cur) {
+ hem_ptrs[level] = cur;
+ continue;
+ }
}
step = hem_list_calc_ba_range(hopnum, level, unit);
@@ -1204,7 +1149,7 @@ static int hem_list_alloc_mid_bt(struct hns_roce_dev *hr_dev,
}
start_aligned = (distance / step) * step + r->offset;
- end = min_t(int, start_aligned + step - 1, max_ofs);
+ end = min_t(u64, start_aligned + step - 1, max_ofs);
cur = hem_list_alloc_item(hr_dev, start_aligned, end, unit,
true);
if (!cur) {
@@ -1220,8 +1165,7 @@ static int hem_list_alloc_mid_bt(struct hns_roce_dev *hr_dev,
if (level > 1) {
pre = hem_ptrs[level - 1];
step = (cur->start - pre->start) / step * BA_BYTE_LEN;
- hem_list_link_bt(hr_dev, pre->addr + step,
- cur->dma_addr);
+ hem_list_link_bt(pre->addr + step, cur->dma_addr);
}
}
@@ -1279,7 +1223,7 @@ static int alloc_fake_root_bt(struct hns_roce_dev *hr_dev, void *cpu_base,
if (!hem)
return -ENOMEM;
- hem_list_assign_bt(hr_dev, hem, cpu_base, phy_base);
+ hem_list_assign_bt(hem, cpu_base, phy_base);
list_add(&hem->list, branch_head);
list_add(&hem->sibling, leaf_head);
@@ -1293,7 +1237,7 @@ static int setup_middle_bt(struct hns_roce_dev *hr_dev, void *cpu_base,
struct hns_roce_hem_item *hem, *temp_hem;
int total = 0;
int offset;
- int step;
+ u64 step;
step = hem_list_calc_ba_range(r->hopnum, 1, unit);
if (step < 1)
@@ -1302,7 +1246,7 @@ static int setup_middle_bt(struct hns_roce_dev *hr_dev, void *cpu_base,
/* if exist mid bt, link L1 to L0 */
list_for_each_entry_safe(hem, temp_hem, branch_head, list) {
offset = (hem->start - r->offset) / step * BA_BYTE_LEN;
- hem_list_link_bt(hr_dev, cpu_base + offset, hem->dma_addr);
+ hem_list_link_bt(cpu_base + offset, hem->dma_addr);
total++;
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h
index 7d23d3c51da4..9c415b2541af 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.h
@@ -56,41 +56,25 @@ enum {
HEM_TYPE_TRRL,
};
-#define HNS_ROCE_HEM_CHUNK_LEN \
- ((256 - sizeof(struct list_head) - 2 * sizeof(int)) / \
- (sizeof(struct scatterlist) + sizeof(void *)))
-
#define check_whether_bt_num_3(type, hop_num) \
- (type < HEM_TYPE_MTT && hop_num == 2)
+ ((type) < HEM_TYPE_MTT && (hop_num) == 2)
#define check_whether_bt_num_2(type, hop_num) \
- ((type < HEM_TYPE_MTT && hop_num == 1) || \
- (type >= HEM_TYPE_MTT && hop_num == 2))
+ (((type) < HEM_TYPE_MTT && (hop_num) == 1) || \
+ ((type) >= HEM_TYPE_MTT && (hop_num) == 2))
#define check_whether_bt_num_1(type, hop_num) \
- ((type < HEM_TYPE_MTT && hop_num == HNS_ROCE_HOP_NUM_0) || \
- (type >= HEM_TYPE_MTT && hop_num == 1) || \
- (type >= HEM_TYPE_MTT && hop_num == HNS_ROCE_HOP_NUM_0))
-
-struct hns_roce_hem_chunk {
- struct list_head list;
- int npages;
- int nsg;
- struct scatterlist mem[HNS_ROCE_HEM_CHUNK_LEN];
- void *buf[HNS_ROCE_HEM_CHUNK_LEN];
-};
+ (((type) < HEM_TYPE_MTT && (hop_num) == HNS_ROCE_HOP_NUM_0) || \
+ ((type) >= HEM_TYPE_MTT && (hop_num) == 1) || \
+ ((type) >= HEM_TYPE_MTT && (hop_num) == HNS_ROCE_HOP_NUM_0))
struct hns_roce_hem {
- struct list_head chunk_list;
+ void *buf;
+ dma_addr_t dma;
+ unsigned long size;
refcount_t refcount;
};
-struct hns_roce_hem_iter {
- struct hns_roce_hem *hem;
- struct hns_roce_hem_chunk *chunk;
- int page_idx;
-};
-
struct hns_roce_hem_mhop {
u32 hop_num;
u32 buf_chunk_size;
@@ -133,38 +117,4 @@ void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_list *hem_list,
int offset, int *mtt_cnt);
-static inline void hns_roce_hem_first(struct hns_roce_hem *hem,
- struct hns_roce_hem_iter *iter)
-{
- iter->hem = hem;
- iter->chunk = list_empty(&hem->chunk_list) ? NULL :
- list_entry(hem->chunk_list.next,
- struct hns_roce_hem_chunk, list);
- iter->page_idx = 0;
-}
-
-static inline int hns_roce_hem_last(struct hns_roce_hem_iter *iter)
-{
- return !iter->chunk;
-}
-
-static inline void hns_roce_hem_next(struct hns_roce_hem_iter *iter)
-{
- if (++iter->page_idx >= iter->chunk->nsg) {
- if (iter->chunk->list.next == &iter->hem->chunk_list) {
- iter->chunk = NULL;
- return;
- }
-
- iter->chunk = list_entry(iter->chunk->list.next,
- struct hns_roce_hem_chunk, list);
- iter->page_idx = 0;
- }
-}
-
-static inline dma_addr_t hns_roce_hem_addr(struct hns_roce_hem_iter *iter)
-{
- return sg_dma_address(&iter->chunk->mem[iter->page_idx]);
-}
-
#endif /* _HNS_ROCE_HEM_H */
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 8f7eb11066b4..24e906b9d3ae 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -36,6 +36,7 @@
#include <linux/iopoll.h>
#include <linux/kernel.h>
#include <linux/types.h>
+#include <linux/workqueue.h>
#include <net/addrconf.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_cache.h>
@@ -270,7 +271,7 @@ static bool check_inl_data_len(struct hns_roce_qp *qp, unsigned int len)
struct hns_roce_dev *hr_dev = to_hr_dev(qp->ibqp.device);
int mtu = ib_mtu_enum_to_int(qp->path_mtu);
- if (len > qp->max_inline_data || len > mtu) {
+ if (mtu < 0 || len > qp->max_inline_data || len > mtu) {
ibdev_err(&hr_dev->ib_dev,
"invalid length of data, data len = %u, max inline len = %u, path mtu = %d.\n",
len, qp->max_inline_data, mtu);
@@ -443,10 +444,6 @@ static int fill_ud_av(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe,
hr_reg_write(ud_sq_wqe, UD_SEND_WQE_HOPLIMIT, ah->av.hop_limit);
hr_reg_write(ud_sq_wqe, UD_SEND_WQE_TCLASS, ah->av.tclass);
hr_reg_write(ud_sq_wqe, UD_SEND_WQE_FLOW_LABEL, ah->av.flowlabel);
-
- if (WARN_ON(ah->av.sl > MAX_SERVICE_LEVEL))
- return -EINVAL;
-
hr_reg_write(ud_sq_wqe, UD_SEND_WQE_SL, ah->av.sl);
ud_sq_wqe->sgid_index = ah->av.gid_index;
@@ -595,11 +592,16 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp,
(wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0);
if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
- wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
+ wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
+ if (msg_len != ATOMIC_WR_LEN)
+ return -EINVAL;
set_atomic_seg(wr, rc_sq_wqe, valid_num_sge);
- else if (wr->opcode != IB_WR_REG_MR)
+ } else if (wr->opcode != IB_WR_REG_MR) {
ret = set_rwqe_data_seg(&qp->ibqp, wr, rc_sq_wqe,
&curr_idx, valid_num_sge);
+ if (ret)
+ return ret;
+ }
/*
* The pipeline can sequentially post all valid WQEs into WQ buffer,
@@ -750,7 +752,8 @@ out:
qp->sq.head += nreq;
qp->next_sge = sge_idx;
- if (nreq == 1 && (qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE))
+ if (nreq == 1 && !ret &&
+ (qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE))
write_dwqe(hr_dev, qp, wqe);
else
update_sq_db(hr_dev, qp);
@@ -940,20 +943,23 @@ static void fill_wqe_idx(struct hns_roce_srq *srq, unsigned int wqe_idx)
idx_que->head++;
}
-static void update_srq_db(struct hns_roce_v2_db *db, struct hns_roce_srq *srq)
+static void update_srq_db(struct hns_roce_srq *srq)
{
- hr_reg_write(db, DB_TAG, srq->srqn);
- hr_reg_write(db, DB_CMD, HNS_ROCE_V2_SRQ_DB);
- hr_reg_write(db, DB_PI, srq->idx_que.head);
+ struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device);
+ struct hns_roce_v2_db db;
+
+ hr_reg_write(&db, DB_TAG, srq->srqn);
+ hr_reg_write(&db, DB_CMD, HNS_ROCE_V2_SRQ_DB);
+ hr_reg_write(&db, DB_PI, srq->idx_que.head);
+
+ hns_roce_write64(hr_dev, (__le32 *)&db, srq->db_reg);
}
static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
const struct ib_recv_wr *wr,
const struct ib_recv_wr **bad_wr)
{
- struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device);
struct hns_roce_srq *srq = to_hr_srq(ibsrq);
- struct hns_roce_v2_db srq_db;
unsigned long flags;
int ret = 0;
u32 max_sge;
@@ -984,9 +990,11 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
}
if (likely(nreq)) {
- update_srq_db(&srq_db, srq);
-
- hns_roce_write64(hr_dev, (__le32 *)&srq_db, srq->db_reg);
+ if (srq->cap_flags & HNS_ROCE_SRQ_CAP_RECORD_DB)
+ *srq->rdb.db_record = srq->idx_que.head &
+ V2_DB_PRODUCER_IDX_M;
+ else
+ update_srq_db(srq);
}
spin_unlock_irqrestore(&srq->lock, flags);
@@ -1267,12 +1275,38 @@ static int hns_roce_cmd_err_convert_errno(u16 desc_ret)
return -EIO;
}
+static u32 hns_roce_cmdq_tx_timeout(u16 opcode, u32 tx_timeout)
+{
+ static const struct hns_roce_cmdq_tx_timeout_map cmdq_tx_timeout[] = {
+ {HNS_ROCE_OPC_POST_MB, HNS_ROCE_OPC_POST_MB_TIMEOUT},
+ };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(cmdq_tx_timeout); i++)
+ if (cmdq_tx_timeout[i].opcode == opcode)
+ return cmdq_tx_timeout[i].tx_timeout;
+
+ return tx_timeout;
+}
+
+static void hns_roce_wait_csq_done(struct hns_roce_dev *hr_dev, u16 opcode)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ u32 tx_timeout = hns_roce_cmdq_tx_timeout(opcode, priv->cmq.tx_timeout);
+ u32 timeout = 0;
+
+ do {
+ if (hns_roce_cmq_csq_done(hr_dev))
+ break;
+ udelay(1);
+ } while (++timeout < tx_timeout);
+}
+
static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
struct hns_roce_cmq_desc *desc, int num)
{
struct hns_roce_v2_priv *priv = hr_dev->priv;
struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq;
- u32 timeout = 0;
u16 desc_ret;
u32 tail;
int ret;
@@ -1291,12 +1325,9 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
/* Write to hardware */
roce_write(hr_dev, ROCEE_TX_CMQ_PI_REG, csq->head);
- do {
- if (hns_roce_cmq_csq_done(hr_dev))
- break;
- udelay(1);
- } while (++timeout < priv->cmq.tx_timeout);
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CMDS_CNT]);
+ hns_roce_wait_csq_done(hr_dev, le16_to_cpu(desc->opcode));
if (hns_roce_cmq_csq_done(hr_dev)) {
ret = 0;
for (i = 0; i < num; i++) {
@@ -1328,6 +1359,9 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
spin_unlock_bh(&csq->lock);
+ if (ret)
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CMDS_ERR_CNT]);
+
return ret;
}
@@ -1612,6 +1646,56 @@ static int hns_roce_query_func_info(struct hns_roce_dev *hr_dev)
return 0;
}
+static int hns_roce_hw_v2_query_counter(struct hns_roce_dev *hr_dev,
+ u64 *stats, u32 port, int *num_counters)
+{
+#define CNT_PER_DESC 3
+ struct hns_roce_cmq_desc *desc;
+ int bd_idx, cnt_idx;
+ __le64 *cnt_data;
+ int desc_num;
+ int ret;
+ int i;
+
+ if (port > hr_dev->caps.num_ports)
+ return -EINVAL;
+
+ desc_num = DIV_ROUND_UP(HNS_ROCE_HW_CNT_TOTAL, CNT_PER_DESC);
+ desc = kcalloc(desc_num, sizeof(*desc), GFP_KERNEL);
+ if (!desc)
+ return -ENOMEM;
+
+ for (i = 0; i < desc_num; i++) {
+ hns_roce_cmq_setup_basic_desc(&desc[i],
+ HNS_ROCE_OPC_QUERY_COUNTER, true);
+ if (i != desc_num - 1)
+ desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
+ }
+
+ ret = hns_roce_cmq_send(hr_dev, desc, desc_num);
+ if (ret) {
+ ibdev_err(&hr_dev->ib_dev,
+ "failed to get counter, ret = %d.\n", ret);
+ goto err_out;
+ }
+
+ for (i = 0; i < HNS_ROCE_HW_CNT_TOTAL && i < *num_counters; i++) {
+ bd_idx = i / CNT_PER_DESC;
+ if (bd_idx != HNS_ROCE_HW_CNT_TOTAL / CNT_PER_DESC &&
+ !(desc[bd_idx].flag & cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT)))
+ break;
+
+ cnt_data = (__le64 *)&desc[bd_idx].data[0];
+ cnt_idx = i % CNT_PER_DESC;
+ stats[i] = le64_to_cpu(cnt_data[cnt_idx]);
+ }
+ *num_counters = i;
+
+err_out:
+ kfree(desc);
+ return ret;
+}
+
static int hns_roce_config_global_param(struct hns_roce_dev *hr_dev)
{
struct hns_roce_cmq_desc desc;
@@ -1680,29 +1764,6 @@ static int load_func_res_caps(struct hns_roce_dev *hr_dev, bool is_vf)
return 0;
}
-static int load_ext_cfg_caps(struct hns_roce_dev *hr_dev, bool is_vf)
-{
- struct hns_roce_cmq_desc desc;
- struct hns_roce_cmq_req *req = (struct hns_roce_cmq_req *)desc.data;
- struct hns_roce_caps *caps = &hr_dev->caps;
- u32 func_num, qp_num;
- int ret;
-
- hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_EXT_CFG, true);
- ret = hns_roce_cmq_send(hr_dev, &desc, 1);
- if (ret)
- return ret;
-
- func_num = is_vf ? 1 : max_t(u32, 1, hr_dev->func_num);
- qp_num = hr_reg_read(req, EXT_CFG_QP_PI_NUM) / func_num;
- caps->num_pi_qps = round_down(qp_num, HNS_ROCE_QP_BANK_NUM);
-
- qp_num = hr_reg_read(req, EXT_CFG_QP_NUM) / func_num;
- caps->num_qps = round_down(qp_num, HNS_ROCE_QP_BANK_NUM);
-
- return 0;
-}
-
static int load_pf_timer_res_caps(struct hns_roce_dev *hr_dev)
{
struct hns_roce_cmq_desc desc;
@@ -1723,50 +1784,37 @@ static int load_pf_timer_res_caps(struct hns_roce_dev *hr_dev)
return 0;
}
-static int query_func_resource_caps(struct hns_roce_dev *hr_dev, bool is_vf)
+static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev)
{
struct device *dev = hr_dev->dev;
int ret;
- ret = load_func_res_caps(hr_dev, is_vf);
+ ret = load_func_res_caps(hr_dev, false);
if (ret) {
- dev_err(dev, "failed to load res caps, ret = %d (%s).\n", ret,
- is_vf ? "vf" : "pf");
+ dev_err(dev, "failed to load pf res caps, ret = %d.\n", ret);
return ret;
}
- if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) {
- ret = load_ext_cfg_caps(hr_dev, is_vf);
- if (ret)
- dev_err(dev, "failed to load ext cfg, ret = %d (%s).\n",
- ret, is_vf ? "vf" : "pf");
- }
+ ret = load_pf_timer_res_caps(hr_dev);
+ if (ret)
+ dev_err(dev, "failed to load pf timer resource, ret = %d.\n",
+ ret);
return ret;
}
-static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev)
+static int hns_roce_query_vf_resource(struct hns_roce_dev *hr_dev)
{
struct device *dev = hr_dev->dev;
int ret;
- ret = query_func_resource_caps(hr_dev, false);
+ ret = load_func_res_caps(hr_dev, true);
if (ret)
- return ret;
-
- ret = load_pf_timer_res_caps(hr_dev);
- if (ret)
- dev_err(dev, "failed to load pf timer resource, ret = %d.\n",
- ret);
+ dev_err(dev, "failed to load vf res caps, ret = %d.\n", ret);
return ret;
}
-static int hns_roce_query_vf_resource(struct hns_roce_dev *hr_dev)
-{
- return query_func_resource_caps(hr_dev, true);
-}
-
static int __hns_roce_set_vf_switch_param(struct hns_roce_dev *hr_dev,
u32 vf_id)
{
@@ -1849,24 +1897,6 @@ static int config_vf_hem_resource(struct hns_roce_dev *hr_dev, int vf_id)
return hns_roce_cmq_send(hr_dev, desc, 2);
}
-static int config_vf_ext_resource(struct hns_roce_dev *hr_dev, u32 vf_id)
-{
- struct hns_roce_cmq_desc desc;
- struct hns_roce_cmq_req *req = (struct hns_roce_cmq_req *)desc.data;
- struct hns_roce_caps *caps = &hr_dev->caps;
-
- hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_EXT_CFG, false);
-
- hr_reg_write(req, EXT_CFG_VF_ID, vf_id);
-
- hr_reg_write(req, EXT_CFG_QP_PI_NUM, caps->num_pi_qps);
- hr_reg_write(req, EXT_CFG_QP_PI_IDX, vf_id * caps->num_pi_qps);
- hr_reg_write(req, EXT_CFG_QP_NUM, caps->num_qps);
- hr_reg_write(req, EXT_CFG_QP_IDX, vf_id * caps->num_qps);
-
- return hns_roce_cmq_send(hr_dev, &desc, 1);
-}
-
static int hns_roce_alloc_vf_resource(struct hns_roce_dev *hr_dev)
{
u32 func_num = max_t(u32, 1, hr_dev->func_num);
@@ -1881,16 +1911,6 @@ static int hns_roce_alloc_vf_resource(struct hns_roce_dev *hr_dev)
vf_id, ret);
return ret;
}
-
- if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) {
- ret = config_vf_ext_resource(hr_dev, vf_id);
- if (ret) {
- dev_err(hr_dev->dev,
- "failed to config vf-%u ext res, ret = %d.\n",
- vf_id, ret);
- return ret;
- }
- }
}
return 0;
@@ -2063,6 +2083,7 @@ static void set_hem_page_size(struct hns_roce_dev *hr_dev)
/* Apply all loaded caps before setting to hardware */
static void apply_func_caps(struct hns_roce_dev *hr_dev)
{
+#define MAX_GID_TBL_LEN 256
struct hns_roce_caps *caps = &hr_dev->caps;
struct hns_roce_v2_priv *priv = hr_dev->priv;
@@ -2075,9 +2096,6 @@ static void apply_func_caps(struct hns_roce_dev *hr_dev)
caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0;
caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0;
- caps->num_xrcds = HNS_ROCE_V2_MAX_XRCD_NUM;
- caps->reserved_xrcds = HNS_ROCE_V2_RSV_XRCD_NUM;
-
caps->num_srqwqe_segs = HNS_ROCE_V2_MAX_SRQWQE_SEGS;
caps->num_idx_segs = HNS_ROCE_V2_MAX_IDX_SEGS;
@@ -2101,10 +2119,16 @@ static void apply_func_caps(struct hns_roce_dev *hr_dev)
caps->gmv_entry_sz = HNS_ROCE_V3_GMV_ENTRY_SZ;
caps->gmv_hop_num = HNS_ROCE_HOP_NUM_0;
- caps->gid_table_len[0] = caps->gmv_bt_num *
- (HNS_HW_PAGE_SIZE / caps->gmv_entry_sz);
- caps->gmv_entry_num = caps->gmv_bt_num * (PAGE_SIZE /
+ /* It's meaningless to support excessively large gid_table_len,
+ * as the type of sgid_index in kernel struct ib_global_route
+ * and userspace struct ibv_global_route are u8/uint8_t (0-255).
+ */
+ caps->gid_table_len[0] = min_t(u32, MAX_GID_TBL_LEN,
+ caps->gmv_bt_num *
+ (HNS_HW_PAGE_SIZE / caps->gmv_entry_sz));
+
+ caps->gmv_entry_num = caps->gmv_bt_num * (HNS_HW_PAGE_SIZE /
caps->gmv_entry_sz);
} else {
u32 func_num = max_t(u32, 1, hr_dev->func_num);
@@ -2200,6 +2224,7 @@ static int hns_roce_query_caps(struct hns_roce_dev *hr_dev)
caps->num_cqs = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_CQS);
caps->gid_table_len[0] = hr_reg_read(resp_c, PF_CAPS_C_MAX_GID);
caps->max_cqes = 1 << hr_reg_read(resp_c, PF_CAPS_C_CQ_DEPTH);
+ caps->num_xrcds = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_XRCDS);
caps->num_mtpts = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_MRWS);
caps->num_qps = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_QPS);
caps->max_qp_init_rdma = hr_reg_read(resp_c, PF_CAPS_C_MAX_ORD);
@@ -2207,11 +2232,12 @@ static int hns_roce_query_caps(struct hns_roce_dev *hr_dev)
caps->max_wqes = 1 << le16_to_cpu(resp_c->sq_depth);
caps->num_srqs = 1 << hr_reg_read(resp_d, PF_CAPS_D_NUM_SRQS);
- caps->cong_type = hr_reg_read(resp_d, PF_CAPS_D_CONG_TYPE);
+ caps->cong_cap = hr_reg_read(resp_d, PF_CAPS_D_CONG_CAP);
caps->max_srq_wrs = 1 << le16_to_cpu(resp_d->srq_depth);
caps->ceqe_depth = 1 << hr_reg_read(resp_d, PF_CAPS_D_CEQ_DEPTH);
caps->num_comp_vectors = hr_reg_read(resp_d, PF_CAPS_D_NUM_CEQS);
caps->aeqe_depth = 1 << hr_reg_read(resp_d, PF_CAPS_D_AEQ_DEPTH);
+ caps->default_cong_type = hr_reg_read(resp_d, PF_CAPS_D_DEFAULT_ALG);
caps->reserved_pds = hr_reg_read(resp_d, PF_CAPS_D_RSV_PDS);
caps->num_uars = 1 << hr_reg_read(resp_d, PF_CAPS_D_NUM_UARS);
caps->reserved_qps = hr_reg_read(resp_d, PF_CAPS_D_RSV_QPS);
@@ -2220,6 +2246,7 @@ static int hns_roce_query_caps(struct hns_roce_dev *hr_dev)
caps->reserved_mrws = hr_reg_read(resp_e, PF_CAPS_E_RSV_MRWS);
caps->chunk_sz = 1 << hr_reg_read(resp_e, PF_CAPS_E_CHUNK_SIZE_SHIFT);
caps->reserved_cqs = hr_reg_read(resp_e, PF_CAPS_E_RSV_CQS);
+ caps->reserved_xrcds = hr_reg_read(resp_e, PF_CAPS_E_RSV_XRCDS);
caps->reserved_srqs = hr_reg_read(resp_e, PF_CAPS_E_RSV_SRQS);
caps->reserved_lkey = hr_reg_read(resp_e, PF_CAPS_E_RSV_LKEYS);
@@ -2457,14 +2484,16 @@ static int set_llm_cfg_to_hw(struct hns_roce_dev *hr_dev,
static struct hns_roce_link_table *
alloc_link_table_buf(struct hns_roce_dev *hr_dev)
{
+ u16 total_sl = hr_dev->caps.sl_num * hr_dev->func_num;
struct hns_roce_v2_priv *priv = hr_dev->priv;
struct hns_roce_link_table *link_tbl;
u32 pg_shift, size, min_size;
link_tbl = &priv->ext_llm;
pg_shift = hr_dev->caps.llm_buf_pg_sz + PAGE_SHIFT;
- size = hr_dev->caps.num_qps * HNS_ROCE_V2_EXT_LLM_ENTRY_SZ;
- min_size = HNS_ROCE_EXT_LLM_MIN_PAGES(hr_dev->caps.sl_num) << pg_shift;
+ size = hr_dev->caps.num_qps * hr_dev->func_num *
+ HNS_ROCE_V2_EXT_LLM_ENTRY_SZ;
+ min_size = HNS_ROCE_EXT_LLM_MIN_PAGES(total_sl) << pg_shift;
/* Alloc data table */
size = max(size, min_size);
@@ -2667,6 +2696,8 @@ static void free_mr_exit(struct hns_roce_dev *hr_dev)
kfree(free_mr->rsv_pd);
free_mr->rsv_pd = NULL;
}
+
+ mutex_destroy(&free_mr->mutex);
}
static int free_mr_alloc_res(struct hns_roce_dev *hr_dev)
@@ -2707,6 +2738,10 @@ static int free_mr_alloc_res(struct hns_roce_dev *hr_dev)
return 0;
create_failed_qp:
+ for (i--; i >= 0; i--) {
+ hns_roce_v2_destroy_qp(&free_mr->rsv_qp[i]->ibqp, NULL);
+ kfree(free_mr->rsv_qp[i]);
+ }
hns_roce_destroy_cq(cq, NULL);
kfree(cq);
@@ -2813,8 +2848,10 @@ static int free_mr_init(struct hns_roce_dev *hr_dev)
mutex_init(&free_mr->mutex);
ret = free_mr_alloc_res(hr_dev);
- if (ret)
+ if (ret) {
+ mutex_destroy(&free_mr->mutex);
return ret;
+ }
ret = free_mr_modify_qp(hr_dev);
if (ret)
@@ -2935,6 +2972,9 @@ err_llm_init_failed:
static void hns_roce_v2_exit(struct hns_roce_dev *hr_dev)
{
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08)
+ free_mr_exit(hr_dev);
+
hns_roce_function_clear(hr_dev);
if (!hr_dev->is_vf)
@@ -3188,24 +3228,26 @@ static int set_mtpt_pbl(struct hns_roce_dev *hr_dev,
u64 pages[HNS_ROCE_V2_MAX_INNER_MTPT_NUM] = { 0 };
struct ib_device *ibdev = &hr_dev->ib_dev;
dma_addr_t pbl_ba;
- int i, count;
+ int ret;
+ int i;
- count = hns_roce_mtr_find(hr_dev, &mr->pbl_mtr, 0, pages,
- min_t(int, ARRAY_SIZE(pages), mr->npages),
- &pbl_ba);
- if (count < 1) {
- ibdev_err(ibdev, "failed to find PBL mtr, count = %d.\n",
- count);
- return -ENOBUFS;
+ ret = hns_roce_mtr_find(hr_dev, &mr->pbl_mtr, 0, pages,
+ min_t(int, ARRAY_SIZE(pages), mr->npages));
+ if (ret) {
+ ibdev_err(ibdev, "failed to find PBL mtr, ret = %d.\n", ret);
+ return ret;
}
/* Aligned to the hardware address access unit */
- for (i = 0; i < count; i++)
- pages[i] >>= 6;
+ for (i = 0; i < ARRAY_SIZE(pages); i++)
+ pages[i] >>= MPT_PBL_BUF_ADDR_S;
+
+ pbl_ba = hns_roce_get_mtr_ba(&mr->pbl_mtr);
mpt_entry->pbl_size = cpu_to_le32(mr->npages);
- mpt_entry->pbl_ba_l = cpu_to_le32(pbl_ba >> 3);
- hr_reg_write(mpt_entry, MPT_PBL_BA_H, upper_32_bits(pbl_ba >> 3));
+ mpt_entry->pbl_ba_l = cpu_to_le32(pbl_ba >> MPT_PBL_BA_ADDR_S);
+ hr_reg_write(mpt_entry, MPT_PBL_BA_H,
+ upper_32_bits(pbl_ba >> MPT_PBL_BA_ADDR_S));
mpt_entry->pa0_l = cpu_to_le32(lower_32_bits(pages[0]));
hr_reg_write(mpt_entry, MPT_PA0_H, upper_32_bits(pages[0]));
@@ -3298,21 +3340,14 @@ static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev,
return ret;
}
-static int hns_roce_v2_frmr_write_mtpt(struct hns_roce_dev *hr_dev,
- void *mb_buf, struct hns_roce_mr *mr)
+static int hns_roce_v2_frmr_write_mtpt(void *mb_buf, struct hns_roce_mr *mr)
{
- struct ib_device *ibdev = &hr_dev->ib_dev;
+ dma_addr_t pbl_ba = hns_roce_get_mtr_ba(&mr->pbl_mtr);
struct hns_roce_v2_mpt_entry *mpt_entry;
- dma_addr_t pbl_ba = 0;
mpt_entry = mb_buf;
memset(mpt_entry, 0, sizeof(*mpt_entry));
- if (hns_roce_mtr_find(hr_dev, &mr->pbl_mtr, 0, NULL, 0, &pbl_ba) < 0) {
- ibdev_err(ibdev, "failed to find frmr mtr.\n");
- return -ENOBUFS;
- }
-
hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_FREE);
hr_reg_write(mpt_entry, MPT_PD, mr->pd);
@@ -3332,8 +3367,10 @@ static int hns_roce_v2_frmr_write_mtpt(struct hns_roce_dev *hr_dev,
mpt_entry->pbl_size = cpu_to_le32(mr->npages);
- mpt_entry->pbl_ba_l = cpu_to_le32(lower_32_bits(pbl_ba >> 3));
- hr_reg_write(mpt_entry, MPT_PBL_BA_H, upper_32_bits(pbl_ba >> 3));
+ mpt_entry->pbl_ba_l = cpu_to_le32(lower_32_bits(pbl_ba >>
+ MPT_PBL_BA_ADDR_S));
+ hr_reg_write(mpt_entry, MPT_PBL_BA_H,
+ upper_32_bits(pbl_ba >> MPT_PBL_BA_ADDR_S));
return 0;
}
@@ -3579,14 +3616,14 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev,
to_hr_hw_page_shift(hr_cq->mtr.hem_cfg.ba_pg_shift));
hr_reg_write(cq_context, CQC_CQE_BUF_PG_SZ,
to_hr_hw_page_shift(hr_cq->mtr.hem_cfg.buf_pg_shift));
- hr_reg_write(cq_context, CQC_CQE_BA_L, dma_handle >> 3);
- hr_reg_write(cq_context, CQC_CQE_BA_H, (dma_handle >> (32 + 3)));
+ hr_reg_write(cq_context, CQC_CQE_BA_L, dma_handle >> CQC_CQE_BA_L_S);
+ hr_reg_write(cq_context, CQC_CQE_BA_H, dma_handle >> CQC_CQE_BA_H_S);
hr_reg_write_bool(cq_context, CQC_DB_RECORD_EN,
hr_cq->flags & HNS_ROCE_CQ_FLAG_RECORD_DB);
hr_reg_write(cq_context, CQC_CQE_DB_RECORD_ADDR_L,
((u32)hr_cq->db.dma) >> 1);
hr_reg_write(cq_context, CQC_CQE_DB_RECORD_ADDR_H,
- hr_cq->db.dma >> 32);
+ hr_cq->db.dma >> CQC_CQE_DB_RECORD_ADDR_H_S);
hr_reg_write(cq_context, CQC_CQ_MAX_CNT,
HNS_ROCE_V2_CQ_DEFAULT_BURST_NUM);
hr_reg_write(cq_context, CQC_CQ_PERIOD,
@@ -3708,8 +3745,9 @@ static void get_cqe_status(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp,
wc->status == IB_WC_WR_FLUSH_ERR))
return;
- ibdev_err(&hr_dev->ib_dev, "error cqe status 0x%x:\n", cqe_status);
- print_hex_dump(KERN_ERR, "", DUMP_PREFIX_NONE, 16, 4, cqe,
+ ibdev_err_ratelimited(&hr_dev->ib_dev, "error cqe status 0x%x:\n",
+ cqe_status);
+ print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_NONE, 16, 4, cqe,
cq->cqe_size, false);
wc->vendor_err = hr_reg_read(cqe, CQE_SUB_STATUS);
@@ -4056,7 +4094,6 @@ static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_table *table, int obj,
u32 step_idx)
{
- struct hns_roce_hem_iter iter;
struct hns_roce_hem_mhop mhop;
struct hns_roce_hem *hem;
unsigned long mhop_obj = obj;
@@ -4093,12 +4130,8 @@ static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev,
if (check_whether_last_step(hop_num, step_idx)) {
hem = table->hem[hem_idx];
- for (hns_roce_hem_first(hem, &iter);
- !hns_roce_hem_last(&iter); hns_roce_hem_next(&iter)) {
- bt_ba = hns_roce_hem_addr(&iter);
- ret = set_hem_to_hw(hr_dev, obj, bt_ba, table->type,
- step_idx);
- }
+
+ ret = set_hem_to_hw(hr_dev, obj, hem->dma, table->type, step_idx);
} else {
if (step_idx == 0)
bt_ba = table->bt_l0_dma_addr[i];
@@ -4219,8 +4252,7 @@ static void set_access_flags(struct hns_roce_qp *hr_qp,
}
static void set_qpc_wqe_cnt(struct hns_roce_qp *hr_qp,
- struct hns_roce_v2_qp_context *context,
- struct hns_roce_v2_qp_context *qpc_mask)
+ struct hns_roce_v2_qp_context *context)
{
hr_reg_write(context, QPC_SGE_SHIFT,
to_hr_hem_entries_shift(hr_qp->sge.sge_cnt,
@@ -4242,7 +4274,6 @@ static inline int get_pdn(struct ib_pd *ib_pd)
}
static void modify_qp_reset_to_init(struct ib_qp *ibqp,
- const struct ib_qp_attr *attr,
struct hns_roce_v2_qp_context *context,
struct hns_roce_v2_qp_context *qpc_mask)
{
@@ -4261,7 +4292,7 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
hr_reg_write(context, QPC_RQWS, ilog2(hr_qp->rq.max_gs));
- set_qpc_wqe_cnt(hr_qp, context, qpc_mask);
+ set_qpc_wqe_cnt(hr_qp, context);
/* No VLAN need to set 0xFFF */
hr_reg_write(context, QPC_VLAN_ID, 0xfff);
@@ -4302,7 +4333,6 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
}
static void modify_qp_init_to_init(struct ib_qp *ibqp,
- const struct ib_qp_attr *attr,
struct hns_roce_v2_qp_context *context,
struct hns_roce_v2_qp_context *qpc_mask)
{
@@ -4339,17 +4369,20 @@ static int config_qp_rq_buf(struct hns_roce_dev *hr_dev,
{
u64 mtts[MTT_MIN_COUNT] = { 0 };
u64 wqe_sge_ba;
- int count;
+ int ret;
/* Search qp buf's mtts */
- count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->rq.offset, mtts,
- MTT_MIN_COUNT, &wqe_sge_ba);
- if (hr_qp->rq.wqe_cnt && count < 1) {
+ ret = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->rq.offset, mtts,
+ MTT_MIN_COUNT);
+ if (hr_qp->rq.wqe_cnt && ret) {
ibdev_err(&hr_dev->ib_dev,
- "failed to find RQ WQE, QPN = 0x%lx.\n", hr_qp->qpn);
- return -EINVAL;
+ "failed to find QP(0x%lx) RQ WQE buf, ret = %d.\n",
+ hr_qp->qpn, ret);
+ return ret;
}
+ wqe_sge_ba = hns_roce_get_mtr_ba(&hr_qp->mtr);
+
context->wqe_sge_ba = cpu_to_le32(wqe_sge_ba >> 3);
qpc_mask->wqe_sge_ba = 0;
@@ -4393,12 +4426,14 @@ static int config_qp_rq_buf(struct hns_roce_dev *hr_dev,
upper_32_bits(to_hr_hw_page_addr(mtts[0])));
hr_reg_clear(qpc_mask, QPC_RQ_CUR_BLK_ADDR_H);
- context->rq_nxt_blk_addr = cpu_to_le32(to_hr_hw_page_addr(mtts[1]));
- qpc_mask->rq_nxt_blk_addr = 0;
-
- hr_reg_write(context, QPC_RQ_NXT_BLK_ADDR_H,
- upper_32_bits(to_hr_hw_page_addr(mtts[1])));
- hr_reg_clear(qpc_mask, QPC_RQ_NXT_BLK_ADDR_H);
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
+ context->rq_nxt_blk_addr =
+ cpu_to_le32(to_hr_hw_page_addr(mtts[1]));
+ qpc_mask->rq_nxt_blk_addr = 0;
+ hr_reg_write(context, QPC_RQ_NXT_BLK_ADDR_H,
+ upper_32_bits(to_hr_hw_page_addr(mtts[1])));
+ hr_reg_clear(qpc_mask, QPC_RQ_NXT_BLK_ADDR_H);
+ }
return 0;
}
@@ -4411,23 +4446,23 @@ static int config_qp_sq_buf(struct hns_roce_dev *hr_dev,
struct ib_device *ibdev = &hr_dev->ib_dev;
u64 sge_cur_blk = 0;
u64 sq_cur_blk = 0;
- int count;
+ int ret;
/* search qp buf's mtts */
- count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, 0, &sq_cur_blk, 1, NULL);
- if (count < 1) {
- ibdev_err(ibdev, "failed to find QP(0x%lx) SQ buf.\n",
- hr_qp->qpn);
- return -EINVAL;
+ ret = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->sq.offset,
+ &sq_cur_blk, 1);
+ if (ret) {
+ ibdev_err(ibdev, "failed to find QP(0x%lx) SQ WQE buf, ret = %d.\n",
+ hr_qp->qpn, ret);
+ return ret;
}
if (hr_qp->sge.sge_cnt > 0) {
- count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr,
- hr_qp->sge.offset,
- &sge_cur_blk, 1, NULL);
- if (count < 1) {
- ibdev_err(ibdev, "failed to find QP(0x%lx) SGE buf.\n",
- hr_qp->qpn);
- return -EINVAL;
+ ret = hns_roce_mtr_find(hr_dev, &hr_qp->mtr,
+ hr_qp->sge.offset, &sge_cur_blk, 1);
+ if (ret) {
+ ibdev_err(ibdev, "failed to find QP(0x%lx) SGE buf, ret = %d.\n",
+ hr_qp->qpn, ret);
+ return ret;
}
}
@@ -4520,16 +4555,16 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
return -EINVAL;
}
- hr_reg_write(context, QPC_TRRL_BA_L, trrl_ba >> 4);
+ hr_reg_write(context, QPC_TRRL_BA_L, trrl_ba >> QPC_TRRL_BA_L_S);
hr_reg_clear(qpc_mask, QPC_TRRL_BA_L);
- context->trrl_ba = cpu_to_le32(trrl_ba >> (16 + 4));
+ context->trrl_ba = cpu_to_le32(trrl_ba >> QPC_TRRL_BA_M_S);
qpc_mask->trrl_ba = 0;
- hr_reg_write(context, QPC_TRRL_BA_H, trrl_ba >> (32 + 16 + 4));
+ hr_reg_write(context, QPC_TRRL_BA_H, trrl_ba >> QPC_TRRL_BA_H_S);
hr_reg_clear(qpc_mask, QPC_TRRL_BA_H);
- context->irrl_ba = cpu_to_le32(irrl_ba >> 6);
+ context->irrl_ba = cpu_to_le32(irrl_ba >> QPC_IRRL_BA_L_S);
qpc_mask->irrl_ba = 0;
- hr_reg_write(context, QPC_IRRL_BA_H, irrl_ba >> (32 + 6));
+ hr_reg_write(context, QPC_IRRL_BA_H, irrl_ba >> QPC_IRRL_BA_H_S);
hr_reg_clear(qpc_mask, QPC_IRRL_BA_H);
hr_reg_enable(context, QPC_RMT_E2E);
@@ -4591,8 +4626,9 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
hr_reg_clear(qpc_mask, QPC_TRRL_HEAD_MAX);
hr_reg_clear(qpc_mask, QPC_TRRL_TAIL_MAX);
+#define MAX_LP_SGEN 3
/* rocee send 2^lp_sgen_ini segs every time */
- hr_reg_write(context, QPC_LP_SGEN_INI, 3);
+ hr_reg_write(context, QPC_LP_SGEN_INI, MAX_LP_SGEN);
hr_reg_clear(qpc_mask, QPC_LP_SGEN_INI);
if (udata && ibqp->qp_type == IB_QPT_RC &&
@@ -4618,8 +4654,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
return 0;
}
-static int modify_qp_rtr_to_rts(struct ib_qp *ibqp,
- const struct ib_qp_attr *attr, int attr_mask,
+static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, int attr_mask,
struct hns_roce_v2_qp_context *context,
struct hns_roce_v2_qp_context *qpc_mask)
{
@@ -4684,7 +4719,7 @@ static int get_dip_ctx_idx(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
*tail = (*tail == hr_dev->caps.num_qps - 1) ? 0 : (*tail + 1);
list_for_each_entry(hr_dip, &hr_dev->dip_list, node) {
- if (!memcmp(grh->dgid.raw, hr_dip->dgid, 16)) {
+ if (!memcmp(grh->dgid.raw, hr_dip->dgid, GID_LEN_V2)) {
*dip_idx = hr_dip->dip_idx;
goto out;
}
@@ -4737,10 +4772,10 @@ enum {
static int check_cong_type(struct ib_qp *ibqp,
struct hns_roce_congestion_algorithm *cong_alg)
{
- struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
/* different congestion types match different configurations */
- switch (hr_dev->caps.cong_type) {
+ switch (hr_qp->cong_type) {
case CONG_TYPE_DCQCN:
cong_alg->alg_sel = CONG_DCQCN;
cong_alg->alg_sub_sel = UNSUPPORT_CONG_LEVEL;
@@ -4766,10 +4801,12 @@ static int check_cong_type(struct ib_qp *ibqp,
cong_alg->wnd_mode_sel = WND_LIMIT;
break;
default:
- ibdev_err(&hr_dev->ib_dev,
- "error type(%u) for congestion selection.\n",
- hr_dev->caps.cong_type);
- return -EINVAL;
+ hr_qp->cong_type = CONG_TYPE_DCQCN;
+ cong_alg->alg_sel = CONG_DCQCN;
+ cong_alg->alg_sub_sel = UNSUPPORT_CONG_LEVEL;
+ cong_alg->dip_vld = DIP_INVALID;
+ cong_alg->wnd_mode_sel = WND_LIMIT;
+ break;
}
return 0;
@@ -4783,6 +4820,7 @@ static int fill_cong_field(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
struct hns_roce_congestion_algorithm cong_field;
struct ib_device *ibdev = ibqp->device;
struct hns_roce_dev *hr_dev = to_hr_dev(ibdev);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
u32 dip_idx = 0;
int ret;
@@ -4795,7 +4833,7 @@ static int fill_cong_field(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
return ret;
hr_reg_write(context, QPC_CONG_ALGO_TMPL_ID, hr_dev->cong_algo_tmpl_id +
- hr_dev->caps.cong_type * HNS_ROCE_CONG_SIZE);
+ hr_qp->cong_type * HNS_ROCE_CONG_SIZE);
hr_reg_clear(qpc_mask, QPC_CONG_ALGO_TMPL_ID);
hr_reg_write(&context->ext, QPCEX_CONG_ALG_SEL, cong_field.alg_sel);
hr_reg_clear(&qpc_mask->ext, QPCEX_CONG_ALG_SEL);
@@ -4824,6 +4862,69 @@ static int fill_cong_field(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
return 0;
}
+static int hns_roce_hw_v2_get_dscp(struct hns_roce_dev *hr_dev, u8 dscp,
+ u8 *tc_mode, u8 *priority)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hnae3_handle *handle = priv->handle;
+ const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+
+ if (!ops->get_dscp_prio)
+ return -EOPNOTSUPP;
+
+ return ops->get_dscp_prio(handle, dscp, tc_mode, priority);
+}
+
+bool check_sl_valid(struct hns_roce_dev *hr_dev, u8 sl)
+{
+ u32 max_sl;
+
+ max_sl = min_t(u32, MAX_SERVICE_LEVEL, hr_dev->caps.sl_num - 1);
+ if (unlikely(sl > max_sl)) {
+ ibdev_err_ratelimited(&hr_dev->ib_dev,
+ "failed to set SL(%u). Shouldn't be larger than %u.\n",
+ sl, max_sl);
+ return false;
+ }
+
+ return true;
+}
+
+static int hns_roce_set_sl(struct ib_qp *ibqp,
+ const struct ib_qp_attr *attr,
+ struct hns_roce_v2_qp_context *context,
+ struct hns_roce_v2_qp_context *qpc_mask)
+{
+ const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ int ret;
+
+ ret = hns_roce_hw_v2_get_dscp(hr_dev, get_tclass(&attr->ah_attr.grh),
+ &hr_qp->tc_mode, &hr_qp->priority);
+ if (ret && ret != -EOPNOTSUPP &&
+ grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
+ ibdev_err_ratelimited(ibdev,
+ "failed to get dscp, ret = %d.\n", ret);
+ return ret;
+ }
+
+ if (hr_qp->tc_mode == HNAE3_TC_MAP_MODE_DSCP &&
+ grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
+ hr_qp->sl = hr_qp->priority;
+ else
+ hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr);
+
+ if (!check_sl_valid(hr_dev, hr_qp->sl))
+ return -EINVAL;
+
+ hr_reg_write(context, QPC_SL, hr_qp->sl);
+ hr_reg_clear(qpc_mask, QPC_SL);
+
+ return 0;
+}
+
static int hns_roce_v2_set_path(struct ib_qp *ibqp,
const struct ib_qp_attr *attr,
int attr_mask,
@@ -4835,6 +4936,7 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp,
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
struct ib_device *ibdev = &hr_dev->ib_dev;
const struct ib_gid_attr *gid_attr = NULL;
+ u8 sl = rdma_ah_get_sl(&attr->ah_attr);
int is_roce_protocol;
u16 vlan_id = 0xffff;
bool is_udp = false;
@@ -4848,9 +4950,11 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp,
* In the loopback scenario, only sl needs to be set.
*/
if (hr_qp->free_mr_en) {
- hr_reg_write(context, QPC_SL, rdma_ah_get_sl(&attr->ah_attr));
+ if (!check_sl_valid(hr_dev, sl))
+ return -EINVAL;
+ hr_reg_write(context, QPC_SL, sl);
hr_reg_clear(qpc_mask, QPC_SL);
- hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr);
+ hr_qp->sl = sl;
return 0;
}
@@ -4917,18 +5021,7 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp,
memcpy(context->dgid, grh->dgid.raw, sizeof(grh->dgid.raw));
memset(qpc_mask->dgid, 0, sizeof(grh->dgid.raw));
- hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr);
- if (unlikely(hr_qp->sl > MAX_SERVICE_LEVEL)) {
- ibdev_err(ibdev,
- "failed to fill QPC, sl (%u) shouldn't be larger than %d.\n",
- hr_qp->sl, MAX_SERVICE_LEVEL);
- return -EINVAL;
- }
-
- hr_reg_write(context, QPC_SL, hr_qp->sl);
- hr_reg_clear(qpc_mask, QPC_SL);
-
- return 0;
+ return hns_roce_set_sl(ibqp, attr, context, qpc_mask);
}
static bool check_qp_state(enum ib_qp_state cur_state,
@@ -4975,15 +5068,14 @@ static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp,
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
memset(qpc_mask, 0, hr_dev->caps.qpc_sz);
- modify_qp_reset_to_init(ibqp, attr, context, qpc_mask);
+ modify_qp_reset_to_init(ibqp, context, qpc_mask);
} else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) {
- modify_qp_init_to_init(ibqp, attr, context, qpc_mask);
+ modify_qp_init_to_init(ibqp, context, qpc_mask);
} else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
ret = modify_qp_init_to_rtr(ibqp, attr, attr_mask, context,
qpc_mask, udata);
} else if (cur_state == IB_QPS_RTR && new_state == IB_QPS_RTS) {
- ret = modify_qp_rtr_to_rts(ibqp, attr, attr_mask, context,
- qpc_mask);
+ ret = modify_qp_rtr_to_rts(ibqp, attr_mask, context, qpc_mask);
}
return ret;
@@ -5286,6 +5378,54 @@ out:
return ret;
}
+static int hns_roce_v2_query_srqc(struct hns_roce_dev *hr_dev, u32 srqn,
+ void *buffer)
+{
+ struct hns_roce_srq_context *context;
+ struct hns_roce_cmd_mailbox *mailbox;
+ int ret;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ context = mailbox->buf;
+ ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, HNS_ROCE_CMD_QUERY_SRQC,
+ srqn);
+ if (ret)
+ goto out;
+
+ memcpy(buffer, context, sizeof(*context));
+
+out:
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+ return ret;
+}
+
+static int hns_roce_v2_query_sccc(struct hns_roce_dev *hr_dev, u32 qpn,
+ void *buffer)
+{
+ struct hns_roce_v2_scc_context *context;
+ struct hns_roce_cmd_mailbox *mailbox;
+ int ret;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, HNS_ROCE_CMD_QUERY_SCCC,
+ qpn);
+ if (ret)
+ goto out;
+
+ context = mailbox->buf;
+ memcpy(buffer, context, sizeof(*context));
+
+out:
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+ return ret;
+}
+
static u8 get_qp_timeout_attr(struct hns_roce_dev *hr_dev,
struct hns_roce_v2_qp_context *context)
{
@@ -5539,18 +5679,20 @@ static int hns_roce_v2_write_srqc_index_queue(struct hns_roce_srq *srq,
struct ib_device *ibdev = srq->ibsrq.device;
struct hns_roce_dev *hr_dev = to_hr_dev(ibdev);
u64 mtts_idx[MTT_MIN_COUNT] = {};
- dma_addr_t dma_handle_idx = 0;
+ dma_addr_t dma_handle_idx;
int ret;
/* Get physical address of idx que buf */
ret = hns_roce_mtr_find(hr_dev, &idx_que->mtr, 0, mtts_idx,
- ARRAY_SIZE(mtts_idx), &dma_handle_idx);
- if (ret < 1) {
+ ARRAY_SIZE(mtts_idx));
+ if (ret) {
ibdev_err(ibdev, "failed to find mtr for SRQ idx, ret = %d.\n",
ret);
- return -ENOBUFS;
+ return ret;
}
+ dma_handle_idx = hns_roce_get_mtr_ba(&idx_que->mtr);
+
hr_reg_write(ctx, SRQC_IDX_HOP_NUM,
to_hr_hem_hopnum(hr_dev->caps.idx_hop_num, srq->wqe_cnt));
@@ -5582,20 +5724,22 @@ static int hns_roce_v2_write_srqc(struct hns_roce_srq *srq, void *mb_buf)
struct hns_roce_dev *hr_dev = to_hr_dev(ibdev);
struct hns_roce_srq_context *ctx = mb_buf;
u64 mtts_wqe[MTT_MIN_COUNT] = {};
- dma_addr_t dma_handle_wqe = 0;
+ dma_addr_t dma_handle_wqe;
int ret;
memset(ctx, 0, sizeof(*ctx));
/* Get the physical address of srq buf */
ret = hns_roce_mtr_find(hr_dev, &srq->buf_mtr, 0, mtts_wqe,
- ARRAY_SIZE(mtts_wqe), &dma_handle_wqe);
- if (ret < 1) {
+ ARRAY_SIZE(mtts_wqe));
+ if (ret) {
ibdev_err(ibdev, "failed to find mtr for SRQ WQE, ret = %d.\n",
ret);
- return -ENOBUFS;
+ return ret;
}
+ dma_handle_wqe = hns_roce_get_mtr_ba(&srq->buf_mtr);
+
hr_reg_write(ctx, SRQC_SRQ_ST, 1);
hr_reg_write_bool(ctx, SRQC_SRQ_TYPE,
srq->ibsrq.srq_type == IB_SRQT_XRC);
@@ -5620,6 +5764,14 @@ static int hns_roce_v2_write_srqc(struct hns_roce_srq *srq, void *mb_buf)
hr_reg_write(ctx, SRQC_WQE_BUF_PG_SZ,
to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.buf_pg_shift));
+ if (srq->cap_flags & HNS_ROCE_SRQ_CAP_RECORD_DB) {
+ hr_reg_enable(ctx, SRQC_DB_RECORD_EN);
+ hr_reg_write(ctx, SRQC_DB_RECORD_ADDR_L,
+ lower_32_bits(srq->rdb.dma) >> 1);
+ hr_reg_write(ctx, SRQC_DB_RECORD_ADDR_H,
+ upper_32_bits(srq->rdb.dma));
+ }
+
return hns_roce_v2_write_srqc_index_queue(srq, ctx);
}
@@ -5633,19 +5785,25 @@ static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq,
struct hns_roce_srq_context *srq_context;
struct hns_roce_srq_context *srqc_mask;
struct hns_roce_cmd_mailbox *mailbox;
- int ret;
+ int ret = 0;
/* Resizing SRQs is not supported yet */
- if (srq_attr_mask & IB_SRQ_MAX_WR)
- return -EINVAL;
+ if (srq_attr_mask & IB_SRQ_MAX_WR) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
if (srq_attr_mask & IB_SRQ_LIMIT) {
- if (srq_attr->srq_limit > srq->wqe_cnt)
- return -EINVAL;
+ if (srq_attr->srq_limit > srq->wqe_cnt) {
+ ret = -EINVAL;
+ goto out;
+ }
mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
- if (IS_ERR(mailbox))
- return PTR_ERR(mailbox);
+ if (IS_ERR(mailbox)) {
+ ret = PTR_ERR(mailbox);
+ goto out;
+ }
srq_context = mailbox->buf;
srqc_mask = (struct hns_roce_srq_context *)mailbox->buf + 1;
@@ -5658,15 +5816,17 @@ static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq,
ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0,
HNS_ROCE_CMD_MODIFY_SRQC, srq->srqn);
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
- if (ret) {
+ if (ret)
ibdev_err(&hr_dev->ib_dev,
"failed to handle cmd of modifying SRQ, ret = %d.\n",
ret);
- return ret;
- }
}
- return 0;
+out:
+ if (ret)
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_SRQ_MODIFY_ERR_CNT]);
+
+ return ret;
}
static int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
@@ -5710,8 +5870,9 @@ static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
int ret;
mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
- if (IS_ERR(mailbox))
- return PTR_ERR(mailbox);
+ ret = PTR_ERR_OR_ZERO(mailbox);
+ if (ret)
+ goto err_out;
cq_context = mailbox->buf;
cqc_mask = (struct hns_roce_v2_cq_context *)mailbox->buf + 1;
@@ -5726,7 +5887,7 @@ static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
dev_info(hr_dev->dev,
"cq_period(%u) reached the upper limit, adjusted to 65.\n",
cq_period);
- cq_period = HNS_ROCE_MAX_CQ_PERIOD;
+ cq_period = HNS_ROCE_MAX_CQ_PERIOD_HIP08;
}
cq_period *= HNS_ROCE_CLOCK_ADJUST;
}
@@ -5741,6 +5902,10 @@ static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
"failed to process cmd when modifying CQ, ret = %d.\n",
ret);
+err_out:
+ if (ret)
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CQ_MODIFY_ERR_CNT]);
+
return ret;
}
@@ -5818,7 +5983,7 @@ static void hns_roce_irq_work_handle(struct work_struct *work)
case HNS_ROCE_EVENT_TYPE_COMM_EST:
break;
case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
- ibdev_warn(ibdev, "send queue drained.\n");
+ ibdev_dbg(ibdev, "send queue drained.\n");
break;
case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
ibdev_err(ibdev, "local work queue 0x%x catast error, sub_event type is: %d\n",
@@ -5833,10 +5998,10 @@ static void hns_roce_irq_work_handle(struct work_struct *work)
irq_work->queue_num, irq_work->sub_type);
break;
case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
- ibdev_warn(ibdev, "SRQ limit reach.\n");
+ ibdev_dbg(ibdev, "SRQ limit reach.\n");
break;
case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
- ibdev_warn(ibdev, "SRQ last wqe reach.\n");
+ ibdev_dbg(ibdev, "SRQ last wqe reach.\n");
break;
case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
ibdev_err(ibdev, "SRQ catas error.\n");
@@ -5980,6 +6145,8 @@ static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
++eq->cons_index;
aeqe_found = IRQ_HANDLED;
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_AEQE_CNT]);
+
hns_roce_v2_init_irq_work(hr_dev, eq, queue_num);
aeqe = next_aeqe_sw_v2(eq);
@@ -6002,32 +6169,11 @@ static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq)
!!(eq->cons_index & eq->entries)) ? ceqe : NULL;
}
-static irqreturn_t hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev,
- struct hns_roce_eq *eq)
+static irqreturn_t hns_roce_v2_ceq_int(struct hns_roce_eq *eq)
{
- struct hns_roce_ceqe *ceqe = next_ceqe_sw_v2(eq);
- irqreturn_t ceqe_found = IRQ_NONE;
- u32 cqn;
-
- while (ceqe) {
- /* Make sure we read CEQ entry after we have checked the
- * ownership bit
- */
- dma_rmb();
-
- cqn = hr_reg_read(ceqe, CEQE_CQN);
+ queue_work(system_bh_wq, &eq->work);
- hns_roce_cq_completion(hr_dev, cqn);
-
- ++eq->cons_index;
- ceqe_found = IRQ_HANDLED;
-
- ceqe = next_ceqe_sw_v2(eq);
- }
-
- update_eq_db(eq);
-
- return IRQ_RETVAL(ceqe_found);
+ return IRQ_HANDLED;
}
static irqreturn_t hns_roce_v2_msix_interrupt_eq(int irq, void *eq_ptr)
@@ -6038,7 +6184,7 @@ static irqreturn_t hns_roce_v2_msix_interrupt_eq(int irq, void *eq_ptr)
if (eq->type_flag == HNS_ROCE_CEQ)
/* Completion event interrupt */
- int_work = hns_roce_v2_ceq_int(hr_dev, eq);
+ int_work = hns_roce_v2_ceq_int(eq);
else
/* Asynchronous event interrupt */
int_work = hns_roce_v2_aeq_int(hr_dev, eq);
@@ -6052,6 +6198,7 @@ static irqreturn_t abnormal_interrupt_basic(struct hns_roce_dev *hr_dev,
struct pci_dev *pdev = hr_dev->pci_dev;
struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev);
const struct hnae3_ae_ops *ops = ae_dev->ops;
+ enum hnae3_reset_type reset_type;
irqreturn_t int_work = IRQ_NONE;
u32 int_en;
@@ -6063,10 +6210,12 @@ static irqreturn_t abnormal_interrupt_basic(struct hns_roce_dev *hr_dev,
roce_write(hr_dev, ROCEE_VF_ABN_INT_ST_REG,
1 << HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S);
+ reset_type = hr_dev->is_vf ?
+ HNAE3_VF_FUNC_RESET : HNAE3_FUNC_RESET;
+
/* Set reset level for reset_event() */
if (ops->set_default_reset_request)
- ops->set_default_reset_request(ae_dev,
- HNAE3_FUNC_RESET);
+ ops->set_default_reset_request(ae_dev, reset_type);
if (ops->reset_event)
ops->reset_event(pdev, NULL);
@@ -6136,7 +6285,7 @@ static u64 fmea_get_ram_res_addr(u32 res_type, __le64 *data)
res_type == ECC_RESOURCE_SCCC)
return le64_to_cpu(*data);
- return le64_to_cpu(*data) << PAGE_SHIFT;
+ return le64_to_cpu(*data) << HNS_HW_PAGE_SHIFT;
}
static int fmea_recover_others(struct hns_roce_dev *hr_dev, u32 res_type,
@@ -6250,9 +6399,16 @@ static void hns_roce_v2_int_mask_enable(struct hns_roce_dev *hr_dev,
roce_write(hr_dev, ROCEE_VF_ABN_INT_CFG_REG, enable_flag);
}
-static void hns_roce_v2_destroy_eqc(struct hns_roce_dev *hr_dev, u32 eqn)
+static void free_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
+{
+ hns_roce_mtr_destroy(hr_dev, &eq->mtr);
+}
+
+static void hns_roce_v2_destroy_eqc(struct hns_roce_dev *hr_dev,
+ struct hns_roce_eq *eq)
{
struct device *dev = hr_dev->dev;
+ int eqn = eq->eqn;
int ret;
u8 cmd;
@@ -6263,12 +6419,9 @@ static void hns_roce_v2_destroy_eqc(struct hns_roce_dev *hr_dev, u32 eqn)
ret = hns_roce_destroy_hw_ctx(hr_dev, cmd, eqn & HNS_ROCE_V2_EQN_M);
if (ret)
- dev_err(dev, "[mailbox cmd] destroy eqc(%u) failed.\n", eqn);
-}
+ dev_err(dev, "[mailbox cmd] destroy eqc(%d) failed.\n", eqn);
-static void free_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
-{
- hns_roce_mtr_destroy(hr_dev, &eq->mtr);
+ free_eq_buf(hr_dev, eq);
}
static void init_eq_config(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq)
@@ -6287,7 +6440,7 @@ static int config_eqc(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq,
u64 eqe_ba[MTT_MIN_COUNT] = { 0 };
struct hns_roce_eq_context *eqc;
u64 bt_ba = 0;
- int count;
+ int ret;
eqc = mb_buf;
memset(eqc, 0, sizeof(struct hns_roce_eq_context));
@@ -6295,13 +6448,15 @@ static int config_eqc(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq,
init_eq_config(hr_dev, eq);
/* if not multi-hop, eqe buffer only use one trunk */
- count = hns_roce_mtr_find(hr_dev, &eq->mtr, 0, eqe_ba, MTT_MIN_COUNT,
- &bt_ba);
- if (count < 1) {
- dev_err(hr_dev->dev, "failed to find EQE mtr\n");
- return -ENOBUFS;
+ ret = hns_roce_mtr_find(hr_dev, &eq->mtr, 0, eqe_ba,
+ ARRAY_SIZE(eqe_ba));
+ if (ret) {
+ dev_err(hr_dev->dev, "failed to find EQE mtr, ret = %d\n", ret);
+ return ret;
}
+ bt_ba = hns_roce_get_mtr_ba(&eq->mtr);
+
hr_reg_write(eqc, EQC_EQ_ST, HNS_ROCE_V2_EQ_STATE_VALID);
hr_reg_write(eqc, EQC_EQE_HOP_NUM, eq->hop_num);
hr_reg_write(eqc, EQC_OVER_IGNORE, eq->over_ignore);
@@ -6404,6 +6559,34 @@ free_cmd_mbox:
return ret;
}
+static void hns_roce_ceq_work(struct work_struct *work)
+{
+ struct hns_roce_eq *eq = from_work(eq, work, work);
+ struct hns_roce_ceqe *ceqe = next_ceqe_sw_v2(eq);
+ struct hns_roce_dev *hr_dev = eq->hr_dev;
+ int ceqe_num = 0;
+ u32 cqn;
+
+ while (ceqe && ceqe_num < hr_dev->caps.ceqe_depth) {
+ /* Make sure we read CEQ entry after we have checked the
+ * ownership bit
+ */
+ dma_rmb();
+
+ cqn = hr_reg_read(ceqe, CEQE_CQN);
+
+ hns_roce_cq_completion(hr_dev, cqn);
+
+ ++eq->cons_index;
+ ++ceqe_num;
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CEQE_CNT]);
+
+ ceqe = next_ceqe_sw_v2(eq);
+ }
+
+ update_eq_db(eq);
+}
+
static int __hns_roce_request_irq(struct hns_roce_dev *hr_dev, int irq_num,
int comp_num, int aeq_num, int other_num)
{
@@ -6423,32 +6606,36 @@ static int __hns_roce_request_irq(struct hns_roce_dev *hr_dev, int irq_num,
/* irq contains: abnormal + AEQ + CEQ */
for (j = 0; j < other_num; j++)
snprintf((char *)hr_dev->irq_names[j], HNS_ROCE_INT_NAME_LEN,
- "hns-abn-%d", j);
+ "hns-%s-abn-%d", pci_name(hr_dev->pci_dev), j);
for (j = other_num; j < (other_num + aeq_num); j++)
snprintf((char *)hr_dev->irq_names[j], HNS_ROCE_INT_NAME_LEN,
- "hns-aeq-%d", j - other_num);
+ "hns-%s-aeq-%d", pci_name(hr_dev->pci_dev), j - other_num);
for (j = (other_num + aeq_num); j < irq_num; j++)
snprintf((char *)hr_dev->irq_names[j], HNS_ROCE_INT_NAME_LEN,
- "hns-ceq-%d", j - other_num - aeq_num);
+ "hns-%s-ceq-%d", pci_name(hr_dev->pci_dev),
+ j - other_num - aeq_num);
for (j = 0; j < irq_num; j++) {
- if (j < other_num)
+ if (j < other_num) {
ret = request_irq(hr_dev->irq[j],
hns_roce_v2_msix_interrupt_abn,
0, hr_dev->irq_names[j], hr_dev);
-
- else if (j < (other_num + comp_num))
+ } else if (j < (other_num + comp_num)) {
+ INIT_WORK(&eq_table->eq[j - other_num].work,
+ hns_roce_ceq_work);
ret = request_irq(eq_table->eq[j - other_num].irq,
hns_roce_v2_msix_interrupt_eq,
0, hr_dev->irq_names[j + aeq_num],
&eq_table->eq[j - other_num]);
- else
+ } else {
ret = request_irq(eq_table->eq[j - other_num].irq,
hns_roce_v2_msix_interrupt_eq,
0, hr_dev->irq_names[j - comp_num],
&eq_table->eq[j - other_num]);
+ }
+
if (ret) {
dev_err(hr_dev->dev, "request irq error!\n");
goto err_request_failed;
@@ -6458,12 +6645,16 @@ static int __hns_roce_request_irq(struct hns_roce_dev *hr_dev, int irq_num,
return 0;
err_request_failed:
- for (j -= 1; j >= 0; j--)
- if (j < other_num)
+ for (j -= 1; j >= 0; j--) {
+ if (j < other_num) {
free_irq(hr_dev->irq[j], hr_dev);
- else
- free_irq(eq_table->eq[j - other_num].irq,
- &eq_table->eq[j - other_num]);
+ continue;
+ }
+ free_irq(eq_table->eq[j - other_num].irq,
+ &eq_table->eq[j - other_num]);
+ if (j < other_num + comp_num)
+ cancel_work_sync(&eq_table->eq[j - other_num].work);
+ }
err_kzalloc_failed:
for (i -= 1; i >= 0; i--)
@@ -6484,8 +6675,11 @@ static void __hns_roce_free_irq(struct hns_roce_dev *hr_dev)
for (i = 0; i < hr_dev->caps.num_other_vectors; i++)
free_irq(hr_dev->irq[i], hr_dev);
- for (i = 0; i < eq_num; i++)
+ for (i = 0; i < eq_num; i++) {
free_irq(hr_dev->eq_table.eq[i].irq, &hr_dev->eq_table.eq[i]);
+ if (i < hr_dev->caps.num_comp_vectors)
+ cancel_work_sync(&hr_dev->eq_table.eq[i].work);
+ }
for (i = 0; i < irq_num; i++)
kfree(hr_dev->irq_names[i]);
@@ -6574,7 +6768,7 @@ err_request_irq_fail:
err_create_eq_fail:
for (i -= 1; i >= 0; i--)
- free_eq_buf(hr_dev, &eq_table->eq[i]);
+ hns_roce_v2_destroy_eqc(hr_dev, &eq_table->eq[i]);
kfree(eq_table->eq);
return ret;
@@ -6594,11 +6788,8 @@ static void hns_roce_v2_cleanup_eq_table(struct hns_roce_dev *hr_dev)
__hns_roce_free_irq(hr_dev);
destroy_workqueue(hr_dev->irq_workq);
- for (i = 0; i < eq_num; i++) {
- hns_roce_v2_destroy_eqc(hr_dev, i);
-
- free_eq_buf(hr_dev, &eq_table->eq[i]);
- }
+ for (i = 0; i < eq_num; i++)
+ hns_roce_v2_destroy_eqc(hr_dev, &eq_table->eq[i]);
kfree(eq_table->eq);
}
@@ -6646,6 +6837,10 @@ static const struct hns_roce_hw hns_roce_hw_v2 = {
.query_cqc = hns_roce_v2_query_cqc,
.query_qpc = hns_roce_v2_query_qpc,
.query_mpt = hns_roce_v2_query_mpt,
+ .query_srqc = hns_roce_v2_query_srqc,
+ .query_sccc = hns_roce_v2_query_sccc,
+ .query_hw_counter = hns_roce_hw_v2_query_counter,
+ .get_dscp = hns_roce_hw_v2_get_dscp,
.hns_roce_dev_ops = &hns_roce_v2_dev_ops,
.hns_roce_dev_srq_ops = &hns_roce_v2_dev_srq_ops,
};
@@ -6722,14 +6917,14 @@ static int __hns_roce_hw_v2_init_instance(struct hnae3_handle *handle)
ret = hns_roce_init(hr_dev);
if (ret) {
dev_err(hr_dev->dev, "RoCE Engine init failed!\n");
- goto error_failed_cfg;
+ goto error_failed_roce_init;
}
if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
ret = free_mr_init(hr_dev);
if (ret) {
dev_err(hr_dev->dev, "failed to init free mr!\n");
- goto error_failed_roce_init;
+ goto error_failed_free_mr_init;
}
}
@@ -6737,10 +6932,10 @@ static int __hns_roce_hw_v2_init_instance(struct hnae3_handle *handle)
return 0;
-error_failed_roce_init:
+error_failed_free_mr_init:
hns_roce_exit(hr_dev);
-error_failed_cfg:
+error_failed_roce_init:
kfree(hr_dev->priv);
error_failed_kzalloc:
@@ -6762,9 +6957,6 @@ static void __hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle,
hr_dev->state = HNS_ROCE_DEVICE_STATE_UNINIT;
hns_roce_handle_device_err(hr_dev);
- if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08)
- free_mr_exit(hr_dev);
-
hns_roce_exit(hr_dev);
kfree(hr_dev->priv);
ib_dealloc_device(&hr_dev->ib_dev);
@@ -6927,12 +7119,14 @@ static struct hnae3_client hns_roce_hw_v2_client = {
static int __init hns_roce_hw_v2_init(void)
{
+ hns_roce_init_debugfs();
return hnae3_register_client(&hns_roce_hw_v2_client);
}
static void __exit hns_roce_hw_v2_exit(void)
{
hnae3_unregister_client(&hns_roce_hw_v2_client);
+ hns_roce_cleanup_debugfs();
}
module_init(hns_roce_hw_v2_init);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index 7033eae2407c..c65f68a14a26 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -42,7 +42,6 @@
#define HNS_ROCE_V2_MAX_SRQWQE_SEGS 0x1000000
#define HNS_ROCE_V2_MAX_IDX_SEGS 0x1000000
#define HNS_ROCE_V2_MAX_XRCD_NUM 0x1000000
-#define HNS_ROCE_V2_RSV_XRCD_NUM 0
#define HNS_ROCE_V2_QP_ACK_TIMEOUT_OFS_HIP08 10
@@ -199,6 +198,7 @@ enum hns_roce_opcode_type {
HNS_ROCE_OPC_QUERY_HW_VER = 0x8000,
HNS_ROCE_OPC_CFG_GLOBAL_PARAM = 0x8001,
HNS_ROCE_OPC_ALLOC_PF_RES = 0x8004,
+ HNS_ROCE_OPC_QUERY_COUNTER = 0x8206,
HNS_ROCE_OPC_QUERY_PF_RES = 0x8400,
HNS_ROCE_OPC_ALLOC_VF_RES = 0x8401,
HNS_ROCE_OPC_CFG_EXT_LLM = 0x8403,
@@ -220,11 +220,16 @@ enum hns_roce_opcode_type {
HNS_ROCE_OPC_QUERY_VF_RES = 0x850e,
HNS_ROCE_OPC_CFG_GMV_TBL = 0x850f,
HNS_ROCE_OPC_CFG_GMV_BT = 0x8510,
- HNS_ROCE_OPC_EXT_CFG = 0x8512,
HNS_ROCE_QUERY_RAM_ECC = 0x8513,
HNS_SWITCH_PARAMETER_CFG = 0x1033,
};
+#define HNS_ROCE_OPC_POST_MB_TIMEOUT 35000
+struct hns_roce_cmdq_tx_timeout_map {
+ u16 opcode;
+ u32 tx_timeout;
+};
+
enum {
TYPE_CRQ,
TYPE_CSQ,
@@ -277,6 +282,10 @@ struct hns_roce_v2_cq_context {
__le32 byte_64_se_cqe_idx;
};
+#define CQC_CQE_BA_L_S 3
+#define CQC_CQE_BA_H_S (32 + CQC_CQE_BA_L_S)
+#define CQC_CQE_DB_RECORD_ADDR_H_S 32
+
#define HNS_ROCE_V2_CQ_DEFAULT_BURST_NUM 0x0
#define HNS_ROCE_V2_CQ_DEFAULT_INTERVAL 0x0
@@ -448,6 +457,12 @@ struct hns_roce_v2_qp_context {
struct hns_roce_v2_qp_context_ex ext;
};
+#define QPC_TRRL_BA_L_S 4
+#define QPC_TRRL_BA_M_S (16 + QPC_TRRL_BA_L_S)
+#define QPC_TRRL_BA_H_S (32 + QPC_TRRL_BA_M_S)
+#define QPC_IRRL_BA_L_S 6
+#define QPC_IRRL_BA_H_S (32 + QPC_IRRL_BA_L_S)
+
#define QPC_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_qp_context, h, l)
#define QPC_TST QPC_FIELD_LOC(2, 0)
@@ -647,6 +662,12 @@ struct hns_roce_v2_qp_context {
#define QPCEX_SQ_RQ_NOT_FORBID_EN QPCEX_FIELD_LOC(23, 23)
#define QPCEX_STASH QPCEX_FIELD_LOC(82, 82)
+#define SCC_CONTEXT_SIZE 16
+
+struct hns_roce_v2_scc_context {
+ __le32 data[SCC_CONTEXT_SIZE];
+};
+
#define V2_QP_RWE_S 1 /* rdma write enable */
#define V2_QP_RRE_S 2 /* rdma read enable */
#define V2_QP_ATE_S 3 /* rdma atomic enable */
@@ -711,6 +732,9 @@ struct hns_roce_v2_mpt_entry {
__le32 byte_64_buf_pa1;
};
+#define MPT_PBL_BUF_ADDR_S 6
+#define MPT_PBL_BA_ADDR_S 3
+
#define MPT_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_mpt_entry, h, l)
#define MPT_ST MPT_FIELD_LOC(1, 0)
@@ -957,15 +981,6 @@ struct hns_roce_func_clear {
#define HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_INTERVAL 40
#define HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_FAIL_WAIT 20
-/* Fields of HNS_ROCE_OPC_EXT_CFG */
-#define EXT_CFG_VF_ID CMQ_REQ_FIELD_LOC(31, 0)
-#define EXT_CFG_QP_PI_IDX CMQ_REQ_FIELD_LOC(45, 32)
-#define EXT_CFG_QP_PI_NUM CMQ_REQ_FIELD_LOC(63, 48)
-#define EXT_CFG_QP_NUM CMQ_REQ_FIELD_LOC(87, 64)
-#define EXT_CFG_QP_IDX CMQ_REQ_FIELD_LOC(119, 96)
-#define EXT_CFG_LLM_IDX CMQ_REQ_FIELD_LOC(139, 128)
-#define EXT_CFG_LLM_NUM CMQ_REQ_FIELD_LOC(156, 144)
-
#define CFG_LLM_A_BA_L CMQ_REQ_FIELD_LOC(31, 0)
#define CFG_LLM_A_BA_H CMQ_REQ_FIELD_LOC(63, 32)
#define CFG_LLM_A_DEPTH CMQ_REQ_FIELD_LOC(76, 64)
@@ -1202,6 +1217,7 @@ struct hns_roce_query_pf_caps_c {
#define PF_CAPS_C_NUM_CQS PF_CAPS_C_FIELD_LOC(51, 32)
#define PF_CAPS_C_MAX_GID PF_CAPS_C_FIELD_LOC(60, 52)
#define PF_CAPS_C_CQ_DEPTH PF_CAPS_C_FIELD_LOC(86, 64)
+#define PF_CAPS_C_NUM_XRCDS PF_CAPS_C_FIELD_LOC(91, 87)
#define PF_CAPS_C_NUM_MRWS PF_CAPS_C_FIELD_LOC(115, 96)
#define PF_CAPS_C_NUM_QPS PF_CAPS_C_FIELD_LOC(147, 128)
#define PF_CAPS_C_MAX_ORD PF_CAPS_C_FIELD_LOC(155, 148)
@@ -1223,12 +1239,13 @@ struct hns_roce_query_pf_caps_d {
#define PF_CAPS_D_RQWQE_HOP_NUM PF_CAPS_D_FIELD_LOC(21, 20)
#define PF_CAPS_D_EX_SGE_HOP_NUM PF_CAPS_D_FIELD_LOC(23, 22)
#define PF_CAPS_D_SQWQE_HOP_NUM PF_CAPS_D_FIELD_LOC(25, 24)
-#define PF_CAPS_D_CONG_TYPE PF_CAPS_D_FIELD_LOC(29, 26)
+#define PF_CAPS_D_CONG_CAP PF_CAPS_D_FIELD_LOC(29, 26)
#define PF_CAPS_D_CEQ_DEPTH PF_CAPS_D_FIELD_LOC(85, 64)
#define PF_CAPS_D_NUM_CEQS PF_CAPS_D_FIELD_LOC(95, 86)
#define PF_CAPS_D_AEQ_DEPTH PF_CAPS_D_FIELD_LOC(117, 96)
#define PF_CAPS_D_AEQ_ARM_ST PF_CAPS_D_FIELD_LOC(119, 118)
#define PF_CAPS_D_CEQ_ARM_ST PF_CAPS_D_FIELD_LOC(121, 120)
+#define PF_CAPS_D_DEFAULT_ALG PF_CAPS_D_FIELD_LOC(127, 122)
#define PF_CAPS_D_RSV_PDS PF_CAPS_D_FIELD_LOC(147, 128)
#define PF_CAPS_D_NUM_UARS PF_CAPS_D_FIELD_LOC(155, 148)
#define PF_CAPS_D_RSV_QPS PF_CAPS_D_FIELD_LOC(179, 160)
@@ -1260,6 +1277,7 @@ struct hns_roce_query_pf_caps_e {
#define PF_CAPS_E_RSV_MRWS PF_CAPS_E_FIELD_LOC(19, 0)
#define PF_CAPS_E_CHUNK_SIZE_SHIFT PF_CAPS_E_FIELD_LOC(31, 20)
#define PF_CAPS_E_RSV_CQS PF_CAPS_E_FIELD_LOC(51, 32)
+#define PF_CAPS_E_RSV_XRCDS PF_CAPS_E_FIELD_LOC(63, 52)
#define PF_CAPS_E_RSV_SRQS PF_CAPS_E_FIELD_LOC(83, 64)
#define PF_CAPS_E_RSV_LKEYS PF_CAPS_E_FIELD_LOC(115, 96)
@@ -1335,7 +1353,7 @@ struct fmea_ram_ecc {
/* only for RNR timeout issue of HIP08 */
#define HNS_ROCE_CLOCK_ADJUST 1000
-#define HNS_ROCE_MAX_CQ_PERIOD 65
+#define HNS_ROCE_MAX_CQ_PERIOD_HIP08 65
#define HNS_ROCE_MAX_EQ_PERIOD 65
#define HNS_ROCE_RNR_TIMER_10NS 1
#define HNS_ROCE_1US_CFG 999
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 485e110ca433..4cb0af733587 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -37,9 +37,11 @@
#include <rdma/ib_smi.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_cache.h>
+#include "hnae3.h"
#include "hns_roce_common.h"
#include "hns_roce_device.h"
#include "hns_roce_hem.h"
+#include "hns_roce_hw_v2.h"
static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u32 port,
const u8 *addr)
@@ -192,6 +194,12 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
IB_ATOMIC_HCA : IB_ATOMIC_NONE;
props->max_pkeys = 1;
props->local_ca_ack_delay = hr_dev->caps.local_ca_ack_delay;
+ props->max_ah = INT_MAX;
+ props->cq_caps.max_cq_moderation_period = HNS_ROCE_MAX_CQ_PERIOD;
+ props->cq_caps.max_cq_moderation_count = HNS_ROCE_MAX_CQ_COUNT;
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08)
+ props->cq_caps.max_cq_moderation_period = HNS_ROCE_MAX_CQ_PERIOD_HIP08;
+
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) {
props->max_srq = hr_dev->caps.num_srqs;
props->max_srq_wr = hr_dev->caps.max_srq_wrs;
@@ -219,6 +227,7 @@ static int hns_roce_query_port(struct ib_device *ib_dev, u32 port_num,
unsigned long flags;
enum ib_mtu mtu;
u32 port;
+ int ret;
port = port_num - 1;
@@ -231,8 +240,10 @@ static int hns_roce_query_port(struct ib_device *ib_dev, u32 port_num,
IB_PORT_BOOT_MGMT_SUP;
props->max_msg_sz = HNS_ROCE_MAX_MSG_LEN;
props->pkey_tbl_len = 1;
- props->active_width = IB_WIDTH_4X;
- props->active_speed = 1;
+ ret = ib_get_eth_speed(ib_dev, port_num, &props->active_speed,
+ &props->active_width);
+ if (ret)
+ ibdev_warn(ib_dev, "failed to get speed, ret = %d.\n", ret);
spin_lock_irqsave(&hr_dev->iboe.lock, flags);
@@ -358,10 +369,10 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
struct hns_roce_dev *hr_dev = to_hr_dev(uctx->device);
struct hns_roce_ib_alloc_ucontext_resp resp = {};
struct hns_roce_ib_alloc_ucontext ucmd = {};
- int ret;
+ int ret = -EAGAIN;
if (!hr_dev->active)
- return -EAGAIN;
+ goto error_out;
resp.qp_tab_size = hr_dev->caps.num_qps;
resp.srq_tab_size = hr_dev->caps.num_srqs;
@@ -369,7 +380,7 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
ret = ib_copy_from_udata(&ucmd, udata,
min(udata->inlen, sizeof(ucmd)));
if (ret)
- return ret;
+ goto error_out;
if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
context->config = ucmd.config & HNS_ROCE_EXSGE_FLAGS;
@@ -391,9 +402,12 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
resp.config |= HNS_ROCE_RSP_CQE_INLINE_FLAGS;
}
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ resp.congest_type = hr_dev->caps.cong_cap;
+
ret = hns_roce_uar_alloc(hr_dev, &context->uar);
if (ret)
- goto error_fail_uar_alloc;
+ goto error_out;
ret = hns_roce_alloc_uar_entry(uctx);
if (ret)
@@ -415,12 +429,17 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
return 0;
error_fail_copy_to_udata:
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB ||
+ hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB)
+ mutex_destroy(&context->page_mutex);
hns_roce_dealloc_uar_entry(context);
error_fail_uar_entry:
ida_free(&hr_dev->uar_ida.ida, (int)context->uar.logic_idx);
-error_fail_uar_alloc:
+error_out:
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_UCTX_ALLOC_ERR_CNT]);
+
return ret;
}
@@ -429,6 +448,10 @@ static void hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext)
struct hns_roce_ucontext *context = to_hr_ucontext(ibcontext);
struct hns_roce_dev *hr_dev = to_hr_dev(ibcontext->device);
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB ||
+ hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB)
+ mutex_destroy(&context->page_mutex);
+
hns_roce_dealloc_uar_entry(context);
ida_free(&hr_dev->uar_ida.ida, (int)context->uar.logic_idx);
@@ -436,6 +459,7 @@ static void hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext)
static int hns_roce_mmap(struct ib_ucontext *uctx, struct vm_area_struct *vma)
{
+ struct hns_roce_dev *hr_dev = to_hr_dev(uctx->device);
struct rdma_user_mmap_entry *rdma_entry;
struct hns_user_mmap_entry *entry;
phys_addr_t pfn;
@@ -443,8 +467,10 @@ static int hns_roce_mmap(struct ib_ucontext *uctx, struct vm_area_struct *vma)
int ret;
rdma_entry = rdma_user_mmap_entry_get_pgoff(uctx, vma->vm_pgoff);
- if (!rdma_entry)
+ if (!rdma_entry) {
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MMAP_ERR_CNT]);
return -EINVAL;
+ }
entry = to_hns_mmap(rdma_entry);
pfn = entry->address >> PAGE_SHIFT;
@@ -464,6 +490,9 @@ static int hns_roce_mmap(struct ib_ucontext *uctx, struct vm_area_struct *vma)
out:
rdma_user_mmap_entry_put(rdma_entry);
+ if (ret)
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MMAP_ERR_CNT]);
+
return ret;
}
@@ -512,6 +541,74 @@ static void hns_roce_get_fw_ver(struct ib_device *device, char *str)
sub_minor);
}
+#define HNS_ROCE_HW_CNT(ename, cname) \
+ [HNS_ROCE_HW_##ename##_CNT].name = cname
+
+static const struct rdma_stat_desc hns_roce_port_stats_descs[] = {
+ HNS_ROCE_HW_CNT(RX_RC_PKT, "rx_rc_pkt"),
+ HNS_ROCE_HW_CNT(RX_UC_PKT, "rx_uc_pkt"),
+ HNS_ROCE_HW_CNT(RX_UD_PKT, "rx_ud_pkt"),
+ HNS_ROCE_HW_CNT(RX_XRC_PKT, "rx_xrc_pkt"),
+ HNS_ROCE_HW_CNT(RX_PKT, "rx_pkt"),
+ HNS_ROCE_HW_CNT(RX_ERR_PKT, "rx_err_pkt"),
+ HNS_ROCE_HW_CNT(RX_CNP_PKT, "rx_cnp_pkt"),
+ HNS_ROCE_HW_CNT(TX_RC_PKT, "tx_rc_pkt"),
+ HNS_ROCE_HW_CNT(TX_UC_PKT, "tx_uc_pkt"),
+ HNS_ROCE_HW_CNT(TX_UD_PKT, "tx_ud_pkt"),
+ HNS_ROCE_HW_CNT(TX_XRC_PKT, "tx_xrc_pkt"),
+ HNS_ROCE_HW_CNT(TX_PKT, "tx_pkt"),
+ HNS_ROCE_HW_CNT(TX_ERR_PKT, "tx_err_pkt"),
+ HNS_ROCE_HW_CNT(TX_CNP_PKT, "tx_cnp_pkt"),
+ HNS_ROCE_HW_CNT(TRP_GET_MPT_ERR_PKT, "trp_get_mpt_err_pkt"),
+ HNS_ROCE_HW_CNT(TRP_GET_IRRL_ERR_PKT, "trp_get_irrl_err_pkt"),
+ HNS_ROCE_HW_CNT(ECN_DB, "ecn_doorbell"),
+ HNS_ROCE_HW_CNT(RX_BUF, "rx_buffer"),
+ HNS_ROCE_HW_CNT(TRP_RX_SOF, "trp_rx_sof"),
+ HNS_ROCE_HW_CNT(CQ_CQE, "cq_cqe"),
+ HNS_ROCE_HW_CNT(CQ_POE, "cq_poe"),
+ HNS_ROCE_HW_CNT(CQ_NOTIFY, "cq_notify"),
+};
+
+static struct rdma_hw_stats *hns_roce_alloc_hw_port_stats(
+ struct ib_device *device, u32 port_num)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(device);
+
+ if (port_num > hr_dev->caps.num_ports) {
+ ibdev_err(device, "invalid port num.\n");
+ return NULL;
+ }
+
+ return rdma_alloc_hw_stats_struct(hns_roce_port_stats_descs,
+ ARRAY_SIZE(hns_roce_port_stats_descs),
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+static int hns_roce_get_hw_stats(struct ib_device *device,
+ struct rdma_hw_stats *stats,
+ u32 port, int index)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(device);
+ int num_counters = HNS_ROCE_HW_CNT_TOTAL;
+ int ret;
+
+ if (port == 0)
+ return 0;
+
+ if (port > hr_dev->caps.num_ports)
+ return -EINVAL;
+
+ ret = hr_dev->hw->query_hw_counter(hr_dev, stats->value, port,
+ &num_counters);
+ if (ret) {
+ ibdev_err(device, "failed to query hw counter, ret = %d\n",
+ ret);
+ return ret;
+ }
+
+ return num_counters;
+}
+
static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev)
{
struct hns_roce_ib_iboe *iboe = &hr_dev->iboe;
@@ -562,6 +659,11 @@ static const struct ib_device_ops hns_roce_dev_ops = {
INIT_RDMA_OBJ_SIZE(ib_ucontext, hns_roce_ucontext, ibucontext),
};
+static const struct ib_device_ops hns_roce_dev_hw_stats_ops = {
+ .alloc_hw_port_stats = hns_roce_alloc_hw_port_stats,
+ .get_hw_stats = hns_roce_get_hw_stats,
+};
+
static const struct ib_device_ops hns_roce_dev_mr_ops = {
.rereg_user_mr = hns_roce_rereg_user_mr,
};
@@ -599,6 +701,8 @@ static const struct ib_device_ops hns_roce_dev_restrack_ops = {
.fill_res_qp_entry_raw = hns_roce_fill_res_qp_entry_raw,
.fill_res_mr_entry = hns_roce_fill_res_mr_entry,
.fill_res_mr_entry_raw = hns_roce_fill_res_mr_entry_raw,
+ .fill_res_srq_entry = hns_roce_fill_res_srq_entry,
+ .fill_res_srq_entry_raw = hns_roce_fill_res_srq_entry_raw,
};
static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
@@ -638,6 +742,10 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC)
ib_set_device_ops(ib_dev, &hns_roce_dev_xrcd_ops);
+ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09 &&
+ !hr_dev->is_vf)
+ ib_set_device_ops(ib_dev, &hns_roce_dev_hw_stats_ops);
+
ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_ops);
ib_set_device_ops(ib_dev, &hns_roce_dev_ops);
ib_set_device_ops(ib_dev, &hns_roce_dev_restrack_ops);
@@ -832,6 +940,15 @@ err_unmap_dmpt:
return ret;
}
+static void hns_roce_teardown_hca(struct hns_roce_dev *hr_dev)
+{
+ hns_roce_cleanup_bitmap(hr_dev);
+
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB ||
+ hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB)
+ mutex_destroy(&hr_dev->pgdir_mutex);
+}
+
/**
* hns_roce_setup_hca - setup host channel adapter
* @hr_dev: pointer to hns roce device
@@ -880,6 +997,10 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev)
err_uar_table_free:
ida_destroy(&hr_dev->uar_ida.ida);
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB ||
+ hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB)
+ mutex_destroy(&hr_dev->pgdir_mutex);
+
return ret;
}
@@ -927,6 +1048,21 @@ void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev)
spin_unlock_irqrestore(&hr_dev->qp_list_lock, flags);
}
+static int hns_roce_alloc_dfx_cnt(struct hns_roce_dev *hr_dev)
+{
+ hr_dev->dfx_cnt = kvcalloc(HNS_ROCE_DFX_CNT_TOTAL, sizeof(atomic64_t),
+ GFP_KERNEL);
+ if (!hr_dev->dfx_cnt)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void hns_roce_dealloc_dfx_cnt(struct hns_roce_dev *hr_dev)
+{
+ kvfree(hr_dev->dfx_cnt);
+}
+
int hns_roce_init(struct hns_roce_dev *hr_dev)
{
struct device *dev = hr_dev->dev;
@@ -934,11 +1070,15 @@ int hns_roce_init(struct hns_roce_dev *hr_dev)
hr_dev->is_reset = false;
+ ret = hns_roce_alloc_dfx_cnt(hr_dev);
+ if (ret)
+ return ret;
+
if (hr_dev->hw->cmq_init) {
ret = hr_dev->hw->cmq_init(hr_dev);
if (ret) {
dev_err(dev, "init RoCE Command Queue failed!\n");
- return ret;
+ goto error_failed_alloc_dfx_cnt;
}
}
@@ -997,6 +1137,8 @@ int hns_roce_init(struct hns_roce_dev *hr_dev)
if (ret)
goto error_failed_register_device;
+ hns_roce_register_debugfs(hr_dev);
+
return 0;
error_failed_register_device:
@@ -1004,7 +1146,7 @@ error_failed_register_device:
hr_dev->hw->hw_exit(hr_dev);
error_failed_engine_init:
- hns_roce_cleanup_bitmap(hr_dev);
+ hns_roce_teardown_hca(hr_dev);
error_failed_setup_hca:
hns_roce_cleanup_hem(hr_dev);
@@ -1021,16 +1163,20 @@ error_failed_cmd_init:
if (hr_dev->hw->cmq_exit)
hr_dev->hw->cmq_exit(hr_dev);
+error_failed_alloc_dfx_cnt:
+ hns_roce_dealloc_dfx_cnt(hr_dev);
+
return ret;
}
void hns_roce_exit(struct hns_roce_dev *hr_dev)
{
+ hns_roce_unregister_debugfs(hr_dev);
hns_roce_unregister_device(hr_dev);
if (hr_dev->hw->hw_exit)
hr_dev->hw->hw_exit(hr_dev);
- hns_roce_cleanup_bitmap(hr_dev);
+ hns_roce_teardown_hca(hr_dev);
hns_roce_cleanup_hem(hr_dev);
if (hr_dev->cmd_mod)
@@ -1040,6 +1186,7 @@ void hns_roce_exit(struct hns_roce_dev *hr_dev)
hns_roce_cmd_cleanup(hr_dev);
if (hr_dev->hw->cmq_exit)
hr_dev->hw->cmq_exit(hr_dev);
+ hns_roce_dealloc_dfx_cnt(hr_dev);
}
MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index 14376490ac22..846da8c78b8b 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -32,6 +32,7 @@
*/
#include <linux/vmalloc.h>
+#include <linux/count_zeros.h>
#include <rdma/ib_umem.h>
#include <linux/math.h>
#include "hns_roce_device.h"
@@ -103,14 +104,21 @@ static int alloc_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr,
buf_attr.user_access = mr->access;
/* fast MR's buffer is alloced before mapping, not at creation */
buf_attr.mtt_only = is_fast;
+ buf_attr.iova = mr->iova;
+ /* pagesize and hopnum is fixed for fast MR */
+ buf_attr.adaptive = !is_fast;
+ buf_attr.type = MTR_PBL;
err = hns_roce_mtr_create(hr_dev, &mr->pbl_mtr, &buf_attr,
hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT,
udata, start);
- if (err)
+ if (err) {
ibdev_err(ibdev, "failed to alloc pbl mtr, ret = %d.\n", err);
- else
- mr->npages = mr->pbl_mtr.hem_cfg.buf_pg_count;
+ return err;
+ }
+
+ mr->npages = mr->pbl_mtr.hem_cfg.buf_pg_count;
+ mr->pbl_hop_num = buf_attr.region[0].hopnum;
return err;
}
@@ -154,7 +162,7 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev,
if (mr->type != MR_TYPE_FRMR)
ret = hr_dev->hw->write_mtpt(hr_dev, mailbox->buf, mr);
else
- ret = hr_dev->hw->frmr_write_mtpt(hr_dev, mailbox->buf, mr);
+ ret = hr_dev->hw->frmr_write_mtpt(mailbox->buf, mr);
if (ret) {
dev_err(dev, "failed to write mtpt, ret = %d.\n", ret);
goto err_page;
@@ -228,8 +236,10 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
int ret;
mr = kzalloc(sizeof(*mr), GFP_KERNEL);
- if (!mr)
- return ERR_PTR(-ENOMEM);
+ if (!mr) {
+ ret = -ENOMEM;
+ goto err_out;
+ }
mr->iova = virt_addr;
mr->size = length;
@@ -259,6 +269,9 @@ err_alloc_key:
free_mr_key(hr_dev, mr);
err_alloc_mr:
kfree(mr);
+err_out:
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MR_REG_ERR_CNT]);
+
return ERR_PTR(ret);
}
@@ -274,12 +287,15 @@ struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start,
unsigned long mtpt_idx;
int ret;
- if (!mr->enabled)
- return ERR_PTR(-EINVAL);
+ if (!mr->enabled) {
+ ret = -EINVAL;
+ goto err_out;
+ }
mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
- if (IS_ERR(mailbox))
- return ERR_CAST(mailbox);
+ ret = PTR_ERR_OR_ZERO(mailbox);
+ if (ret)
+ goto err_out;
mtpt_idx = key_to_hw_index(mr->key) & (hr_dev->caps.num_mtpts - 1);
@@ -331,8 +347,12 @@ struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start,
free_cmd_mbox:
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
- if (ret)
+err_out:
+ if (ret) {
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_MR_REREG_ERR_CNT]);
return ERR_PTR(ret);
+ }
+
return NULL;
}
@@ -421,18 +441,23 @@ int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_mr *mr = to_hr_mr(ibmr);
struct hns_roce_mtr *mtr = &mr->pbl_mtr;
- int ret = 0;
+ int ret, sg_num = 0;
+
+ if (!IS_ALIGNED(*sg_offset, HNS_ROCE_FRMR_ALIGN_SIZE) ||
+ ibmr->page_size < HNS_HW_PAGE_SIZE ||
+ ibmr->page_size > HNS_HW_MAX_PAGE_SIZE)
+ return sg_num;
mr->npages = 0;
mr->page_list = kvcalloc(mr->pbl_mtr.hem_cfg.buf_pg_count,
sizeof(dma_addr_t), GFP_KERNEL);
if (!mr->page_list)
- return ret;
+ return sg_num;
- ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, hns_roce_set_page);
- if (ret < 1) {
+ sg_num = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, hns_roce_set_page);
+ if (sg_num < 1) {
ibdev_err(ibdev, "failed to store sg pages %u %u, cnt = %d.\n",
- mr->npages, mr->pbl_mtr.hem_cfg.buf_pg_count, ret);
+ mr->npages, mr->pbl_mtr.hem_cfg.buf_pg_count, sg_num);
goto err_page_list;
}
@@ -443,17 +468,16 @@ int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
ret = hns_roce_mtr_map(hr_dev, mtr, mr->page_list, mr->npages);
if (ret) {
ibdev_err(ibdev, "failed to map sg mtr, ret = %d.\n", ret);
- ret = 0;
+ sg_num = 0;
} else {
mr->pbl_mtr.hem_cfg.buf_pg_shift = (u32)ilog2(ibmr->page_size);
- ret = mr->npages;
}
err_page_list:
kvfree(mr->page_list);
mr->page_list = NULL;
- return ret;
+ return sg_num;
}
static void hns_roce_mw_free(struct hns_roce_dev *hr_dev,
@@ -674,7 +698,7 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
mtr->kmem = NULL;
mtr->umem = ib_umem_get(ibdev, user_addr, total_size,
buf_attr->user_access);
- if (IS_ERR_OR_NULL(mtr->umem)) {
+ if (IS_ERR(mtr->umem)) {
ibdev_err(ibdev, "failed to get umem, ret = %ld.\n",
PTR_ERR(mtr->umem));
return -ENOMEM;
@@ -683,7 +707,7 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
mtr->umem = NULL;
mtr->kmem = hns_roce_buf_alloc(hr_dev, total_size,
buf_attr->page_shift,
- mtr->hem_cfg.is_direct ?
+ !mtr_has_mtt(buf_attr) ?
HNS_ROCE_BUF_DIRECT : 0);
if (IS_ERR(mtr->kmem)) {
ibdev_err(ibdev, "failed to alloc kmem, ret = %ld.\n",
@@ -695,21 +719,48 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
return 0;
}
-static int mtr_map_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
- int page_count, unsigned int page_shift)
+static int cal_mtr_pg_cnt(struct hns_roce_mtr *mtr)
+{
+ struct hns_roce_buf_region *region;
+ int page_cnt = 0;
+ int i;
+
+ for (i = 0; i < mtr->hem_cfg.region_count; i++) {
+ region = &mtr->hem_cfg.region[i];
+ page_cnt += region->count;
+ }
+
+ return page_cnt;
+}
+
+static bool need_split_huge_page(struct hns_roce_mtr *mtr)
+{
+ /* When HEM buffer uses 0-level addressing, the page size is
+ * equal to the whole buffer size. If the current MTR has multiple
+ * regions, we split the buffer into small pages(4k, required by hns
+ * ROCEE). These pages will be used in multiple regions.
+ */
+ return mtr->hem_cfg.is_direct && mtr->hem_cfg.region_count > 1;
+}
+
+static int mtr_map_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr)
{
struct ib_device *ibdev = &hr_dev->ib_dev;
+ int page_count = cal_mtr_pg_cnt(mtr);
+ unsigned int page_shift;
dma_addr_t *pages;
int npage;
int ret;
+ page_shift = need_split_huge_page(mtr) ? HNS_HW_PAGE_SHIFT :
+ mtr->hem_cfg.buf_pg_shift;
/* alloc a tmp array to store buffer's dma address */
pages = kvcalloc(page_count, sizeof(dma_addr_t), GFP_KERNEL);
if (!pages)
return -ENOMEM;
if (mtr->umem)
- npage = hns_roce_get_umem_bufs(hr_dev, pages, page_count,
+ npage = hns_roce_get_umem_bufs(pages, page_count,
mtr->umem, page_shift);
else
npage = hns_roce_get_kmem_bufs(hr_dev, pages, page_count,
@@ -722,7 +773,7 @@ static int mtr_map_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
goto err_alloc_list;
}
- if (mtr->hem_cfg.is_direct && npage > 1) {
+ if (need_split_huge_page(mtr) && npage > 1) {
ret = mtr_check_direct_pages(pages, npage, page_shift);
if (ret) {
ibdev_err(ibdev, "failed to check %s page: %d / %d.\n",
@@ -797,47 +848,53 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
return ret;
}
-int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
- u32 offset, u64 *mtt_buf, int mtt_max, u64 *base_addr)
+static int hns_roce_get_direct_addr_mtt(struct hns_roce_hem_cfg *cfg,
+ u32 start_index, u64 *mtt_buf,
+ int mtt_cnt)
{
- struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg;
- int mtt_count, left;
- u32 start_index;
+ int mtt_count;
int total = 0;
- __le64 *mtts;
u32 npage;
u64 addr;
- if (!mtt_buf || mtt_max < 1)
- goto done;
-
- /* no mtt memory in direct mode, so just return the buffer address */
- if (cfg->is_direct) {
- start_index = offset >> HNS_HW_PAGE_SHIFT;
- for (mtt_count = 0; mtt_count < cfg->region_count &&
- total < mtt_max; mtt_count++) {
- npage = cfg->region[mtt_count].offset;
- if (npage < start_index)
- continue;
+ if (mtt_cnt > cfg->region_count)
+ return -EINVAL;
- addr = cfg->root_ba + (npage << HNS_HW_PAGE_SHIFT);
- mtt_buf[total] = addr;
+ for (mtt_count = 0; mtt_count < cfg->region_count && total < mtt_cnt;
+ mtt_count++) {
+ npage = cfg->region[mtt_count].offset;
+ if (npage < start_index)
+ continue;
- total++;
- }
+ addr = cfg->root_ba + (npage << HNS_HW_PAGE_SHIFT);
+ mtt_buf[total] = addr;
- goto done;
+ total++;
}
- start_index = offset >> cfg->buf_pg_shift;
- left = mtt_max;
+ if (!total)
+ return -ENOENT;
+
+ return 0;
+}
+
+static int hns_roce_get_mhop_mtt(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mtr *mtr, u32 start_index,
+ u64 *mtt_buf, int mtt_cnt)
+{
+ int left = mtt_cnt;
+ int total = 0;
+ int mtt_count;
+ __le64 *mtts;
+ u32 npage;
+
while (left > 0) {
mtt_count = 0;
mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list,
start_index + total,
&mtt_count);
if (!mtts || !mtt_count)
- goto done;
+ break;
npage = min(mtt_count, left);
left -= npage;
@@ -845,69 +902,165 @@ int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
mtt_buf[total++] = le64_to_cpu(mtts[mtt_count]);
}
-done:
- if (base_addr)
- *base_addr = cfg->root_ba;
+ if (!total)
+ return -ENOENT;
- return total;
+ return 0;
+}
+
+int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
+ u32 offset, u64 *mtt_buf, int mtt_max)
+{
+ struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg;
+ u32 start_index;
+ int ret;
+
+ if (!mtt_buf || mtt_max < 1)
+ return -EINVAL;
+
+ /* no mtt memory in direct mode, so just return the buffer address */
+ if (cfg->is_direct) {
+ start_index = offset >> HNS_HW_PAGE_SHIFT;
+ ret = hns_roce_get_direct_addr_mtt(cfg, start_index,
+ mtt_buf, mtt_max);
+ } else {
+ start_index = offset >> cfg->buf_pg_shift;
+ ret = hns_roce_get_mhop_mtt(hr_dev, mtr, start_index,
+ mtt_buf, mtt_max);
+ }
+ return ret;
+}
+
+static int get_best_page_shift(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mtr *mtr,
+ struct hns_roce_buf_attr *buf_attr)
+{
+ unsigned int page_sz;
+
+ if (!buf_attr->adaptive || buf_attr->type != MTR_PBL || !mtr->umem)
+ return 0;
+
+ page_sz = ib_umem_find_best_pgsz(mtr->umem,
+ hr_dev->caps.page_size_cap,
+ buf_attr->iova);
+ if (!page_sz)
+ return -EINVAL;
+
+ buf_attr->page_shift = order_base_2(page_sz);
+ return 0;
+}
+
+static int get_best_hop_num(struct hns_roce_dev *hr_dev,
+ struct hns_roce_mtr *mtr,
+ struct hns_roce_buf_attr *buf_attr,
+ unsigned int ba_pg_shift)
+{
+#define INVALID_HOPNUM -1
+#define MIN_BA_CNT 1
+ size_t buf_pg_sz = 1 << buf_attr->page_shift;
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+ size_t ba_pg_sz = 1 << ba_pg_shift;
+ int hop_num = INVALID_HOPNUM;
+ size_t unit = MIN_BA_CNT;
+ size_t ba_cnt;
+ int j;
+
+ if (!buf_attr->adaptive || buf_attr->type != MTR_PBL)
+ return 0;
+
+ /* Caculating the number of buf pages, each buf page need a BA */
+ if (mtr->umem)
+ ba_cnt = ib_umem_num_dma_blocks(mtr->umem, buf_pg_sz);
+ else
+ ba_cnt = DIV_ROUND_UP(buf_attr->region[0].size, buf_pg_sz);
+
+ for (j = 0; j <= HNS_ROCE_MAX_HOP_NUM; j++) {
+ if (ba_cnt <= unit) {
+ hop_num = j;
+ break;
+ }
+ /* Number of BAs can be represented at per hop */
+ unit *= ba_pg_sz / BA_BYTE_LEN;
+ }
+
+ if (hop_num < 0) {
+ ibdev_err(ibdev,
+ "failed to calculate a valid hopnum.\n");
+ return -EINVAL;
+ }
+
+ buf_attr->region[0].hopnum = hop_num;
+
+ return 0;
+}
+
+static bool is_buf_attr_valid(struct hns_roce_dev *hr_dev,
+ struct hns_roce_buf_attr *attr)
+{
+ struct ib_device *ibdev = &hr_dev->ib_dev;
+
+ if (attr->region_count > ARRAY_SIZE(attr->region) ||
+ attr->region_count < 1 || attr->page_shift < HNS_HW_PAGE_SHIFT) {
+ ibdev_err(ibdev,
+ "invalid buf attr, region count %d, page shift %u.\n",
+ attr->region_count, attr->page_shift);
+ return false;
+ }
+
+ return true;
}
static int mtr_init_buf_cfg(struct hns_roce_dev *hr_dev,
- struct hns_roce_buf_attr *attr,
- struct hns_roce_hem_cfg *cfg,
- unsigned int *buf_page_shift, u64 unalinged_size)
+ struct hns_roce_mtr *mtr,
+ struct hns_roce_buf_attr *attr)
{
+ struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg;
struct hns_roce_buf_region *r;
- u64 first_region_padding;
- int page_cnt, region_cnt;
- unsigned int page_shift;
+ size_t buf_pg_sz;
size_t buf_size;
+ int page_cnt, i;
+ u64 pgoff = 0;
+
+ if (!is_buf_attr_valid(hr_dev, attr))
+ return -EINVAL;
/* If mtt is disabled, all pages must be within a continuous range */
cfg->is_direct = !mtr_has_mtt(attr);
+ cfg->region_count = attr->region_count;
buf_size = mtr_bufs_size(attr);
- if (cfg->is_direct) {
- /* When HEM buffer uses 0-level addressing, the page size is
- * equal to the whole buffer size, and we split the buffer into
- * small pages which is used to check whether the adjacent
- * units are in the continuous space and its size is fixed to
- * 4K based on hns ROCEE's requirement.
- */
- page_shift = HNS_HW_PAGE_SHIFT;
-
- /* The ROCEE requires the page size to be 4K * 2 ^ N. */
+ if (need_split_huge_page(mtr)) {
+ buf_pg_sz = HNS_HW_PAGE_SIZE;
cfg->buf_pg_count = 1;
+ /* The ROCEE requires the page size to be 4K * 2 ^ N. */
cfg->buf_pg_shift = HNS_HW_PAGE_SHIFT +
order_base_2(DIV_ROUND_UP(buf_size, HNS_HW_PAGE_SIZE));
- first_region_padding = 0;
} else {
- page_shift = attr->page_shift;
- cfg->buf_pg_count = DIV_ROUND_UP(buf_size + unalinged_size,
- 1 << page_shift);
- cfg->buf_pg_shift = page_shift;
- first_region_padding = unalinged_size;
+ buf_pg_sz = 1 << attr->page_shift;
+ cfg->buf_pg_count = mtr->umem ?
+ ib_umem_num_dma_blocks(mtr->umem, buf_pg_sz) :
+ DIV_ROUND_UP(buf_size, buf_pg_sz);
+ cfg->buf_pg_shift = attr->page_shift;
+ pgoff = mtr->umem ? mtr->umem->address & ~PAGE_MASK : 0;
}
/* Convert buffer size to page index and page count for each region and
* the buffer's offset needs to be appended to the first region.
*/
- for (page_cnt = 0, region_cnt = 0; region_cnt < attr->region_count &&
- region_cnt < ARRAY_SIZE(cfg->region); region_cnt++) {
- r = &cfg->region[region_cnt];
+ for (page_cnt = 0, i = 0; i < attr->region_count; i++) {
+ r = &cfg->region[i];
r->offset = page_cnt;
- buf_size = hr_hw_page_align(attr->region[region_cnt].size +
- first_region_padding);
- r->count = DIV_ROUND_UP(buf_size, 1 << page_shift);
- first_region_padding = 0;
+ buf_size = hr_hw_page_align(attr->region[i].size + pgoff);
+ if (attr->type == MTR_PBL && mtr->umem)
+ r->count = ib_umem_num_dma_blocks(mtr->umem, buf_pg_sz);
+ else
+ r->count = DIV_ROUND_UP(buf_size, buf_pg_sz);
+
+ pgoff = 0;
page_cnt += r->count;
- r->hopnum = to_hr_hem_hopnum(attr->region[region_cnt].hopnum,
- r->count);
+ r->hopnum = to_hr_hem_hopnum(attr->region[i].hopnum, r->count);
}
- cfg->region_count = region_cnt;
- *buf_page_shift = page_shift;
-
- return page_cnt;
+ return 0;
}
static u64 cal_pages_per_l1ba(unsigned int ba_per_bt, unsigned int hopnum)
@@ -995,50 +1148,58 @@ int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
unsigned long user_addr)
{
struct ib_device *ibdev = &hr_dev->ib_dev;
- unsigned int buf_page_shift = 0;
- int buf_page_cnt;
int ret;
- buf_page_cnt = mtr_init_buf_cfg(hr_dev, buf_attr, &mtr->hem_cfg,
- &buf_page_shift,
- udata ? user_addr & ~PAGE_MASK : 0);
- if (buf_page_cnt < 1 || buf_page_shift < HNS_HW_PAGE_SHIFT) {
- ibdev_err(ibdev, "failed to init mtr cfg, count %d shift %u.\n",
- buf_page_cnt, buf_page_shift);
- return -EINVAL;
- }
-
- ret = mtr_alloc_mtt(hr_dev, mtr, ba_page_shift);
- if (ret) {
- ibdev_err(ibdev, "failed to alloc mtr mtt, ret = %d.\n", ret);
- return ret;
- }
-
/* The caller has its own buffer list and invokes the hns_roce_mtr_map()
* to finish the MTT configuration.
*/
if (buf_attr->mtt_only) {
mtr->umem = NULL;
mtr->kmem = NULL;
- return 0;
+ } else {
+ ret = mtr_alloc_bufs(hr_dev, mtr, buf_attr, udata, user_addr);
+ if (ret) {
+ ibdev_err(ibdev,
+ "failed to alloc mtr bufs, ret = %d.\n", ret);
+ return ret;
+ }
+
+ ret = get_best_page_shift(hr_dev, mtr, buf_attr);
+ if (ret)
+ goto err_init_buf;
+
+ ret = get_best_hop_num(hr_dev, mtr, buf_attr, ba_page_shift);
+ if (ret)
+ goto err_init_buf;
}
- ret = mtr_alloc_bufs(hr_dev, mtr, buf_attr, udata, user_addr);
+ ret = mtr_init_buf_cfg(hr_dev, mtr, buf_attr);
+ if (ret)
+ goto err_init_buf;
+
+ ret = mtr_alloc_mtt(hr_dev, mtr, ba_page_shift);
if (ret) {
- ibdev_err(ibdev, "failed to alloc mtr bufs, ret = %d.\n", ret);
- goto err_alloc_mtt;
+ ibdev_err(ibdev, "failed to alloc mtr mtt, ret = %d.\n", ret);
+ goto err_init_buf;
}
+ if (buf_attr->mtt_only)
+ return 0;
+
/* Write buffer's dma address to MTT */
- ret = mtr_map_bufs(hr_dev, mtr, buf_page_cnt, buf_page_shift);
- if (ret)
+ ret = mtr_map_bufs(hr_dev, mtr);
+ if (ret) {
ibdev_err(ibdev, "failed to map mtr bufs, ret = %d.\n", ret);
- else
- return 0;
+ goto err_alloc_mtt;
+ }
+
+ return 0;
- mtr_free_bufs(hr_dev, mtr);
err_alloc_mtt:
mtr_free_mtt(hr_dev, mtr);
+err_init_buf:
+ mtr_free_bufs(hr_dev, mtr);
+
return ret;
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c
index 783e71852c50..d35cf59d0f43 100644
--- a/drivers/infiniband/hw/hns/hns_roce_pd.c
+++ b/drivers/infiniband/hw/hns/hns_roce_pd.c
@@ -149,14 +149,18 @@ int hns_roce_alloc_xrcd(struct ib_xrcd *ib_xrcd, struct ib_udata *udata)
struct hns_roce_xrcd *xrcd = to_hr_xrcd(ib_xrcd);
int ret;
- if (!(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC))
- return -EINVAL;
+ if (!(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC)) {
+ ret = -EOPNOTSUPP;
+ goto err_out;
+ }
ret = hns_roce_xrcd_alloc(hr_dev, &xrcd->xrcdn);
+
+err_out:
if (ret)
- return ret;
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_XRCD_ALLOC_ERR_CNT]);
- return 0;
+ return ret;
}
int hns_roce_dealloc_xrcd(struct ib_xrcd *ib_xrcd, struct ib_udata *udata)
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index d855a917f4cf..6b03ba671ff8 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -170,14 +170,29 @@ static void hns_roce_ib_qp_event(struct hns_roce_qp *hr_qp,
}
}
-static u8 get_least_load_bankid_for_qp(struct hns_roce_bank *bank)
+static u8 get_affinity_cq_bank(u8 qp_bank)
{
- u32 least_load = bank[0].inuse;
+ return (qp_bank >> 1) & CQ_BANKID_MASK;
+}
+
+static u8 get_least_load_bankid_for_qp(struct ib_qp_init_attr *init_attr,
+ struct hns_roce_bank *bank)
+{
+#define INVALID_LOAD_QPNUM 0xFFFFFFFF
+ struct ib_cq *scq = init_attr->send_cq;
+ u32 least_load = INVALID_LOAD_QPNUM;
+ unsigned long cqn = 0;
u8 bankid = 0;
u32 bankcnt;
u8 i;
- for (i = 1; i < HNS_ROCE_QP_BANK_NUM; i++) {
+ if (scq)
+ cqn = to_hr_cq(scq)->cqn;
+
+ for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++) {
+ if (scq && (get_affinity_cq_bank(i) != (cqn & CQ_BANKID_MASK)))
+ continue;
+
bankcnt = bank[i].inuse;
if (bankcnt < least_load) {
least_load = bankcnt;
@@ -209,7 +224,8 @@ static int alloc_qpn_with_bankid(struct hns_roce_bank *bank, u8 bankid,
return 0;
}
-static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
+static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
+ struct ib_qp_init_attr *init_attr)
{
struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
unsigned long num = 0;
@@ -220,7 +236,7 @@ static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
num = 1;
} else {
mutex_lock(&qp_table->bank_mutex);
- bankid = get_least_load_bankid_for_qp(qp_table->bank);
+ bankid = get_least_load_bankid_for_qp(init_attr, qp_table->bank);
ret = alloc_qpn_with_bankid(&qp_table->bank[bankid], bankid,
&num);
@@ -394,7 +410,8 @@ static void free_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
bankid = get_qp_bankid(hr_qp->qpn);
- ida_free(&hr_dev->qp_table.bank[bankid].ida, hr_qp->qpn >> 3);
+ ida_free(&hr_dev->qp_table.bank[bankid].ida,
+ hr_qp->qpn / HNS_ROCE_QP_BANK_NUM);
mutex_lock(&hr_dev->qp_table.bank_mutex);
hr_dev->qp_table.bank[bankid].inuse--;
@@ -515,13 +532,15 @@ static unsigned int get_sge_num_from_max_inl_data(bool is_ud_or_gsi,
{
unsigned int inline_sge;
- inline_sge = roundup_pow_of_two(max_inline_data) / HNS_ROCE_SGE_SIZE;
+ if (!max_inline_data)
+ return 0;
/*
* if max_inline_data less than
* HNS_ROCE_SGE_IN_WQE * HNS_ROCE_SGE_SIZE,
* In addition to ud's mode, no need to extend sge.
*/
+ inline_sge = roundup_pow_of_two(max_inline_data) / HNS_ROCE_SGE_SIZE;
if (!is_ud_or_gsi && inline_sge <= HNS_ROCE_SGE_IN_WQE)
inline_sge = 0;
@@ -988,6 +1007,60 @@ static void free_kernel_wrid(struct hns_roce_qp *hr_qp)
kfree(hr_qp->sq.wrid);
}
+static void default_congest_type(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp)
+{
+ if (hr_qp->ibqp.qp_type == IB_QPT_UD ||
+ hr_qp->ibqp.qp_type == IB_QPT_GSI)
+ hr_qp->cong_type = CONG_TYPE_DCQCN;
+ else
+ hr_qp->cong_type = hr_dev->caps.default_cong_type;
+}
+
+static int set_congest_type(struct hns_roce_qp *hr_qp,
+ struct hns_roce_ib_create_qp *ucmd)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device);
+
+ switch (ucmd->cong_type_flags) {
+ case HNS_ROCE_CREATE_QP_FLAGS_DCQCN:
+ hr_qp->cong_type = CONG_TYPE_DCQCN;
+ break;
+ case HNS_ROCE_CREATE_QP_FLAGS_LDCP:
+ hr_qp->cong_type = CONG_TYPE_LDCP;
+ break;
+ case HNS_ROCE_CREATE_QP_FLAGS_HC3:
+ hr_qp->cong_type = CONG_TYPE_HC3;
+ break;
+ case HNS_ROCE_CREATE_QP_FLAGS_DIP:
+ hr_qp->cong_type = CONG_TYPE_DIP;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (!test_bit(hr_qp->cong_type, (unsigned long *)&hr_dev->caps.cong_cap))
+ return -EOPNOTSUPP;
+
+ if (hr_qp->ibqp.qp_type == IB_QPT_UD &&
+ hr_qp->cong_type != CONG_TYPE_DCQCN)
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+static int set_congest_param(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp,
+ struct hns_roce_ib_create_qp *ucmd)
+{
+ if (ucmd->comp_mask & HNS_ROCE_CREATE_QP_MASK_CONGEST_TYPE)
+ return set_congest_type(hr_qp, ucmd);
+
+ default_congest_type(hr_dev, hr_qp);
+
+ return 0;
+}
+
static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata,
@@ -1027,6 +1100,10 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
ibdev_err(ibdev,
"failed to set user SQ size, ret = %d.\n",
ret);
+
+ ret = set_congest_param(hr_dev, hr_qp, ucmd);
+ if (ret)
+ return ret;
} else {
if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
hr_qp->config = HNS_ROCE_EXSGE_FLAGS;
@@ -1035,20 +1112,21 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
ibdev_err(ibdev,
"failed to set kernel SQ size, ret = %d.\n",
ret);
+
+ default_congest_type(hr_dev, hr_qp);
}
return ret;
}
static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
- struct ib_pd *ib_pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata,
struct hns_roce_qp *hr_qp)
{
struct hns_roce_ib_create_qp_resp resp = {};
struct ib_device *ibdev = &hr_dev->ib_dev;
- struct hns_roce_ib_create_qp ucmd;
+ struct hns_roce_ib_create_qp ucmd = {};
int ret;
mutex_init(&hr_qp->mutex);
@@ -1064,7 +1142,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
ret = set_qp_param(hr_dev, hr_qp, init_attr, udata, &ucmd);
if (ret) {
ibdev_err(ibdev, "failed to set QP param, ret = %d.\n", ret);
- return ret;
+ goto err_out;
}
if (!udata) {
@@ -1072,7 +1150,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
if (ret) {
ibdev_err(ibdev, "failed to alloc wrid, ret = %d.\n",
ret);
- return ret;
+ goto err_out;
}
}
@@ -1082,7 +1160,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
goto err_buf;
}
- ret = alloc_qpn(hr_dev, hr_qp);
+ ret = alloc_qpn(hr_dev, hr_qp, init_attr);
if (ret) {
ibdev_err(ibdev, "failed to alloc QPN, ret = %d.\n", ret);
goto err_qpn;
@@ -1143,6 +1221,8 @@ err_qpn:
free_qp_buf(hr_dev, hr_qp);
err_buf:
free_kernel_wrid(hr_qp);
+err_out:
+ mutex_destroy(&hr_qp->mutex);
return ret;
}
@@ -1158,6 +1238,7 @@ void hns_roce_qp_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
free_qp_buf(hr_dev, hr_qp);
free_kernel_wrid(hr_qp);
free_qp_db(hr_dev, hr_qp, udata);
+ mutex_destroy(&hr_qp->mutex);
}
static int check_qp_type(struct hns_roce_dev *hr_dev, enum ib_qp_type type,
@@ -1195,12 +1276,11 @@ int hns_roce_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *init_attr,
struct ib_device *ibdev = qp->device;
struct hns_roce_dev *hr_dev = to_hr_dev(ibdev);
struct hns_roce_qp *hr_qp = to_hr_qp(qp);
- struct ib_pd *pd = qp->pd;
int ret;
ret = check_qp_type(hr_dev, init_attr->qp_type, !!udata);
if (ret)
- return ret;
+ goto err_out;
if (init_attr->qp_type == IB_QPT_XRC_TGT)
hr_qp->xrcdn = to_hr_xrcd(init_attr->xrcd)->xrcdn;
@@ -1210,11 +1290,15 @@ int hns_roce_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *init_attr,
hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port];
}
- ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata, hr_qp);
+ ret = hns_roce_create_qp_common(hr_dev, init_attr, udata, hr_qp);
if (ret)
ibdev_err(ibdev, "create QP type 0x%x failed(%d)\n",
init_attr->qp_type, ret);
+err_out:
+ if (ret)
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_QP_CREATE_ERR_CNT]);
+
return ret;
}
@@ -1306,6 +1390,7 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_ib_modify_qp_resp resp = {};
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
enum ib_qp_state cur_state, new_state;
int ret = -EINVAL;
@@ -1347,9 +1432,23 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
ret = hr_dev->hw->modify_qp(ibqp, attr, attr_mask, cur_state,
new_state, udata);
+ if (ret)
+ goto out;
+
+ if (udata && udata->outlen) {
+ resp.tc_mode = hr_qp->tc_mode;
+ resp.priority = hr_qp->sl;
+ ret = ib_copy_to_udata(udata, &resp,
+ min(udata->outlen, sizeof(resp)));
+ if (ret)
+ ibdev_err_ratelimited(&hr_dev->ib_dev,
+ "failed to copy modify qp resp.\n");
+ }
out:
mutex_unlock(&hr_qp->mutex);
+ if (ret)
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_QP_MODIFY_ERR_CNT]);
return ret;
}
@@ -1361,19 +1460,19 @@ void hns_roce_lock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq)
__acquire(&send_cq->lock);
__acquire(&recv_cq->lock);
} else if (unlikely(send_cq != NULL && recv_cq == NULL)) {
- spin_lock_irq(&send_cq->lock);
+ spin_lock(&send_cq->lock);
__acquire(&recv_cq->lock);
} else if (unlikely(send_cq == NULL && recv_cq != NULL)) {
- spin_lock_irq(&recv_cq->lock);
+ spin_lock(&recv_cq->lock);
__acquire(&send_cq->lock);
} else if (send_cq == recv_cq) {
- spin_lock_irq(&send_cq->lock);
+ spin_lock(&send_cq->lock);
__acquire(&recv_cq->lock);
} else if (send_cq->cqn < recv_cq->cqn) {
- spin_lock_irq(&send_cq->lock);
+ spin_lock(&send_cq->lock);
spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
} else {
- spin_lock_irq(&recv_cq->lock);
+ spin_lock(&recv_cq->lock);
spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING);
}
}
@@ -1393,13 +1492,13 @@ void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq,
spin_unlock(&recv_cq->lock);
} else if (send_cq == recv_cq) {
__release(&recv_cq->lock);
- spin_unlock_irq(&send_cq->lock);
+ spin_unlock(&send_cq->lock);
} else if (send_cq->cqn < recv_cq->cqn) {
spin_unlock(&recv_cq->lock);
- spin_unlock_irq(&send_cq->lock);
+ spin_unlock(&send_cq->lock);
} else {
spin_unlock(&send_cq->lock);
- spin_unlock_irq(&recv_cq->lock);
+ spin_unlock(&recv_cq->lock);
}
}
@@ -1479,5 +1578,7 @@ void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev)
for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++)
ida_destroy(&hr_dev->qp_table.bank[i].ida);
+ mutex_destroy(&hr_dev->qp_table.bank_mutex);
+ mutex_destroy(&hr_dev->qp_table.scc_mutex);
kfree(hr_dev->qp_table.idx_table.spare_idx);
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_restrack.c b/drivers/infiniband/hw/hns/hns_roce_restrack.c
index 989a2af2e938..356d98816949 100644
--- a/drivers/infiniband/hw/hns/hns_roce_restrack.c
+++ b/drivers/infiniband/hw/hns/hns_roce_restrack.c
@@ -9,8 +9,6 @@
#include "hns_roce_device.h"
#include "hns_roce_hw_v2.h"
-#define MAX_ENTRY_NUM 256
-
int hns_roce_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ib_cq)
{
struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq);
@@ -47,8 +45,6 @@ int hns_roce_fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ib_cq)
struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device);
struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq);
struct hns_roce_v2_cq_context context;
- u32 data[MAX_ENTRY_NUM] = {};
- int offset = 0;
int ret;
if (!hr_dev->hw->query_cqc)
@@ -58,23 +54,7 @@ int hns_roce_fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ib_cq)
if (ret)
return -EINVAL;
- data[offset++] = hr_reg_read(&context, CQC_CQ_ST);
- data[offset++] = hr_reg_read(&context, CQC_SHIFT);
- data[offset++] = hr_reg_read(&context, CQC_CQE_SIZE);
- data[offset++] = hr_reg_read(&context, CQC_CQE_CNT);
- data[offset++] = hr_reg_read(&context, CQC_CQ_PRODUCER_IDX);
- data[offset++] = hr_reg_read(&context, CQC_CQ_CONSUMER_IDX);
- data[offset++] = hr_reg_read(&context, CQC_DB_RECORD_EN);
- data[offset++] = hr_reg_read(&context, CQC_ARM_ST);
- data[offset++] = hr_reg_read(&context, CQC_CMD_SN);
- data[offset++] = hr_reg_read(&context, CQC_CEQN);
- data[offset++] = hr_reg_read(&context, CQC_CQ_MAX_CNT);
- data[offset++] = hr_reg_read(&context, CQC_CQ_PERIOD);
- data[offset++] = hr_reg_read(&context, CQC_CQE_HOP_NUM);
- data[offset++] = hr_reg_read(&context, CQC_CQE_BAR_PG_SZ);
- data[offset++] = hr_reg_read(&context, CQC_CQE_BUF_PG_SZ);
-
- ret = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, offset * sizeof(u32), data);
+ ret = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, sizeof(context), &context);
return ret;
}
@@ -117,54 +97,34 @@ int hns_roce_fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ib_qp)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ib_qp->device);
struct hns_roce_qp *hr_qp = to_hr_qp(ib_qp);
- struct hns_roce_v2_qp_context context;
- u32 data[MAX_ENTRY_NUM] = {};
- int offset = 0;
+ struct hns_roce_full_qp_ctx {
+ struct hns_roce_v2_qp_context qpc;
+ struct hns_roce_v2_scc_context sccc;
+ } context = {};
int ret;
if (!hr_dev->hw->query_qpc)
return -EINVAL;
- ret = hr_dev->hw->query_qpc(hr_dev, hr_qp->qpn, &context);
+ ret = hr_dev->hw->query_qpc(hr_dev, hr_qp->qpn, &context.qpc);
if (ret)
- return -EINVAL;
+ return ret;
+
+ /* If SCC is disabled or the query fails, the queried SCCC will
+ * be all 0.
+ */
+ if (!(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL) ||
+ !hr_dev->hw->query_sccc)
+ goto out;
+
+ ret = hr_dev->hw->query_sccc(hr_dev, hr_qp->qpn, &context.sccc);
+ if (ret)
+ ibdev_warn_ratelimited(&hr_dev->ib_dev,
+ "failed to query SCCC, ret = %d.\n",
+ ret);
- data[offset++] = hr_reg_read(&context, QPC_QP_ST);
- data[offset++] = hr_reg_read(&context, QPC_ERR_TYPE);
- data[offset++] = hr_reg_read(&context, QPC_CHECK_FLG);
- data[offset++] = hr_reg_read(&context, QPC_SRQ_EN);
- data[offset++] = hr_reg_read(&context, QPC_SRQN);
- data[offset++] = hr_reg_read(&context, QPC_QKEY_XRCD);
- data[offset++] = hr_reg_read(&context, QPC_TX_CQN);
- data[offset++] = hr_reg_read(&context, QPC_RX_CQN);
- data[offset++] = hr_reg_read(&context, QPC_SQ_PRODUCER_IDX);
- data[offset++] = hr_reg_read(&context, QPC_SQ_CONSUMER_IDX);
- data[offset++] = hr_reg_read(&context, QPC_RQ_RECORD_EN);
- data[offset++] = hr_reg_read(&context, QPC_RQ_PRODUCER_IDX);
- data[offset++] = hr_reg_read(&context, QPC_RQ_CONSUMER_IDX);
- data[offset++] = hr_reg_read(&context, QPC_SQ_SHIFT);
- data[offset++] = hr_reg_read(&context, QPC_RQWS);
- data[offset++] = hr_reg_read(&context, QPC_RQ_SHIFT);
- data[offset++] = hr_reg_read(&context, QPC_SGE_SHIFT);
- data[offset++] = hr_reg_read(&context, QPC_SQ_HOP_NUM);
- data[offset++] = hr_reg_read(&context, QPC_RQ_HOP_NUM);
- data[offset++] = hr_reg_read(&context, QPC_SGE_HOP_NUM);
- data[offset++] = hr_reg_read(&context, QPC_WQE_SGE_BA_PG_SZ);
- data[offset++] = hr_reg_read(&context, QPC_WQE_SGE_BUF_PG_SZ);
- data[offset++] = hr_reg_read(&context, QPC_RETRY_NUM_INIT);
- data[offset++] = hr_reg_read(&context, QPC_RETRY_CNT);
- data[offset++] = hr_reg_read(&context, QPC_SQ_CUR_PSN);
- data[offset++] = hr_reg_read(&context, QPC_SQ_MAX_PSN);
- data[offset++] = hr_reg_read(&context, QPC_SQ_FLUSH_IDX);
- data[offset++] = hr_reg_read(&context, QPC_SQ_MAX_IDX);
- data[offset++] = hr_reg_read(&context, QPC_SQ_TX_ERR);
- data[offset++] = hr_reg_read(&context, QPC_SQ_RX_ERR);
- data[offset++] = hr_reg_read(&context, QPC_RQ_RX_ERR);
- data[offset++] = hr_reg_read(&context, QPC_RQ_TX_ERR);
- data[offset++] = hr_reg_read(&context, QPC_RQ_CQE_IDX);
- data[offset++] = hr_reg_read(&context, QPC_RQ_RTY_TX_ERR);
-
- ret = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, offset * sizeof(u32), data);
+out:
+ ret = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, sizeof(context), &context);
return ret;
}
@@ -204,8 +164,6 @@ int hns_roce_fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ib_mr)
struct hns_roce_dev *hr_dev = to_hr_dev(ib_mr->device);
struct hns_roce_mr *hr_mr = to_hr_mr(ib_mr);
struct hns_roce_v2_mpt_entry context;
- u32 data[MAX_ENTRY_NUM] = {};
- int offset = 0;
int ret;
if (!hr_dev->hw->query_mpt)
@@ -215,17 +173,56 @@ int hns_roce_fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ib_mr)
if (ret)
return -EINVAL;
- data[offset++] = hr_reg_read(&context, MPT_ST);
- data[offset++] = hr_reg_read(&context, MPT_PD);
- data[offset++] = hr_reg_read(&context, MPT_LKEY);
- data[offset++] = hr_reg_read(&context, MPT_LEN_L);
- data[offset++] = hr_reg_read(&context, MPT_LEN_H);
- data[offset++] = hr_reg_read(&context, MPT_PBL_SIZE);
- data[offset++] = hr_reg_read(&context, MPT_PBL_HOP_NUM);
- data[offset++] = hr_reg_read(&context, MPT_PBL_BA_PG_SZ);
- data[offset++] = hr_reg_read(&context, MPT_PBL_BUF_PG_SZ);
-
- ret = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, offset * sizeof(u32), data);
+ ret = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, sizeof(context), &context);
+
+ return ret;
+}
+
+int hns_roce_fill_res_srq_entry(struct sk_buff *msg, struct ib_srq *ib_srq)
+{
+ struct hns_roce_srq *hr_srq = to_hr_srq(ib_srq);
+ struct nlattr *table_attr;
+
+ table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
+ if (!table_attr)
+ return -EMSGSIZE;
+
+ if (rdma_nl_put_driver_u32_hex(msg, "srqn", hr_srq->srqn))
+ goto err;
+
+ if (rdma_nl_put_driver_u32_hex(msg, "wqe_cnt", hr_srq->wqe_cnt))
+ goto err;
+
+ if (rdma_nl_put_driver_u32_hex(msg, "max_gs", hr_srq->max_gs))
+ goto err;
+
+ if (rdma_nl_put_driver_u32_hex(msg, "xrcdn", hr_srq->xrcdn))
+ goto err;
+
+ nla_nest_end(msg, table_attr);
+
+ return 0;
+
+err:
+ nla_nest_cancel(msg, table_attr);
+ return -EMSGSIZE;
+}
+
+int hns_roce_fill_res_srq_entry_raw(struct sk_buff *msg, struct ib_srq *ib_srq)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ib_srq->device);
+ struct hns_roce_srq *hr_srq = to_hr_srq(ib_srq);
+ struct hns_roce_srq_context context;
+ int ret;
+
+ if (!hr_dev->hw->query_srqc)
+ return -EINVAL;
+
+ ret = hr_dev->hw->query_srqc(hr_dev, hr_srq->srqn, &context);
+ if (ret)
+ return ret;
+
+ ret = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, sizeof(context), &context);
return ret;
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c
index 8dae98f827eb..c9b8233f4b05 100644
--- a/drivers/infiniband/hw/hns/hns_roce_srq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_srq.c
@@ -5,6 +5,7 @@
#include <linux/pci.h>
#include <rdma/ib_umem.h>
+#include <rdma/uverbs_ioctl.h>
#include "hns_roce_device.h"
#include "hns_roce_cmd.h"
#include "hns_roce_hem.h"
@@ -122,7 +123,7 @@ static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
return ret;
}
- ret = xa_err(xa_store(&srq_table->xa, srq->srqn, srq, GFP_KERNEL));
+ ret = xa_err(xa_store_irq(&srq_table->xa, srq->srqn, srq, GFP_KERNEL));
if (ret) {
ibdev_err(ibdev, "failed to store SRQC, ret = %d.\n", ret);
goto err_put;
@@ -135,7 +136,7 @@ static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
return 0;
err_xa:
- xa_erase(&srq_table->xa, srq->srqn);
+ xa_erase_irq(&srq_table->xa, srq->srqn);
err_put:
hns_roce_table_put(hr_dev, &srq_table->table, srq->srqn);
@@ -153,7 +154,7 @@ static void free_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
dev_err(hr_dev->dev, "DESTROY_SRQ failed (%d) for SRQN %06lx\n",
ret, srq->srqn);
- xa_erase(&srq_table->xa, srq->srqn);
+ xa_erase_irq(&srq_table->xa, srq->srqn);
if (refcount_dec_and_test(&srq->refcount))
complete(&srq->free);
@@ -249,7 +250,7 @@ static void free_srq_wqe_buf(struct hns_roce_dev *hr_dev,
hns_roce_mtr_destroy(hr_dev, &srq->buf_mtr);
}
-static int alloc_srq_wrid(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
+static int alloc_srq_wrid(struct hns_roce_srq *srq)
{
srq->wrid = kvmalloc_array(srq->wqe_cnt, sizeof(u64), GFP_KERNEL);
if (!srq->wrid)
@@ -296,7 +297,7 @@ static int set_srq_basic_param(struct hns_roce_srq *srq,
max_sge = proc_srq_sge(hr_dev, srq, !!udata);
if (attr->max_wr > hr_dev->caps.max_srq_wrs ||
- attr->max_sge > max_sge) {
+ attr->max_sge > max_sge || !attr->max_sge) {
ibdev_err(&hr_dev->ib_dev,
"invalid SRQ attr, depth = %u, sge = %u.\n",
attr->max_wr, attr->max_sge);
@@ -365,7 +366,7 @@ static int alloc_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq,
goto err_idx;
if (!udata) {
- ret = alloc_srq_wrid(hr_dev, srq);
+ ret = alloc_srq_wrid(srq);
if (ret)
goto err_wqe_buf;
}
@@ -387,6 +388,79 @@ static void free_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
free_srq_idx(hr_dev, srq);
}
+static int get_srq_ucmd(struct hns_roce_srq *srq, struct ib_udata *udata,
+ struct hns_roce_ib_create_srq *ucmd)
+{
+ struct ib_device *ibdev = srq->ibsrq.device;
+ int ret;
+
+ ret = ib_copy_from_udata(ucmd, udata, min(udata->inlen, sizeof(*ucmd)));
+ if (ret) {
+ ibdev_err(ibdev, "failed to copy SRQ udata, ret = %d.\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void free_srq_db(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq,
+ struct ib_udata *udata)
+{
+ struct hns_roce_ucontext *uctx;
+
+ if (!(srq->cap_flags & HNS_ROCE_SRQ_CAP_RECORD_DB))
+ return;
+
+ srq->cap_flags &= ~HNS_ROCE_SRQ_CAP_RECORD_DB;
+ if (udata) {
+ uctx = rdma_udata_to_drv_context(udata,
+ struct hns_roce_ucontext,
+ ibucontext);
+ hns_roce_db_unmap_user(uctx, &srq->rdb);
+ } else {
+ hns_roce_free_db(hr_dev, &srq->rdb);
+ }
+}
+
+static int alloc_srq_db(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq,
+ struct ib_udata *udata,
+ struct hns_roce_ib_create_srq_resp *resp)
+{
+ struct hns_roce_ib_create_srq ucmd = {};
+ struct hns_roce_ucontext *uctx;
+ int ret;
+
+ if (udata) {
+ ret = get_srq_ucmd(srq, udata, &ucmd);
+ if (ret)
+ return ret;
+
+ if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ_RECORD_DB) &&
+ (ucmd.req_cap_flags & HNS_ROCE_SRQ_CAP_RECORD_DB)) {
+ uctx = rdma_udata_to_drv_context(udata,
+ struct hns_roce_ucontext, ibucontext);
+ ret = hns_roce_db_map_user(uctx, ucmd.db_addr,
+ &srq->rdb);
+ if (ret)
+ return ret;
+
+ srq->cap_flags |= HNS_ROCE_RSP_SRQ_CAP_RECORD_DB;
+ }
+ } else {
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ_RECORD_DB) {
+ ret = hns_roce_alloc_db(hr_dev, &srq->rdb, 1);
+ if (ret)
+ return ret;
+
+ *srq->rdb.db_record = 0;
+ srq->cap_flags |= HNS_ROCE_RSP_SRQ_CAP_RECORD_DB;
+ }
+ srq->db_reg = hr_dev->reg_base + SRQ_DB_REG;
+ }
+
+ return 0;
+}
+
int hns_roce_create_srq(struct ib_srq *ib_srq,
struct ib_srq_init_attr *init_attr,
struct ib_udata *udata)
@@ -401,21 +475,26 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
ret = set_srq_param(srq, init_attr, udata);
if (ret)
- return ret;
+ goto err_out;
ret = alloc_srq_buf(hr_dev, srq, udata);
if (ret)
- return ret;
+ goto err_out;
- ret = alloc_srqn(hr_dev, srq);
+ ret = alloc_srq_db(hr_dev, srq, udata, &resp);
if (ret)
goto err_srq_buf;
+ ret = alloc_srqn(hr_dev, srq);
+ if (ret)
+ goto err_srq_db;
+
ret = alloc_srqc(hr_dev, srq);
if (ret)
goto err_srqn;
if (udata) {
+ resp.cap_flags = srq->cap_flags;
resp.srqn = srq->srqn;
if (ib_copy_to_udata(udata, &resp,
min(udata->outlen, sizeof(resp)))) {
@@ -424,7 +503,6 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
}
}
- srq->db_reg = hr_dev->reg_base + SRQ_DB_REG;
srq->event = hns_roce_ib_srq_event;
refcount_set(&srq->refcount, 1);
init_completion(&srq->free);
@@ -435,8 +513,13 @@ err_srqc:
free_srqc(hr_dev, srq);
err_srqn:
free_srqn(hr_dev, srq);
+err_srq_db:
+ free_srq_db(hr_dev, srq, udata);
err_srq_buf:
free_srq_buf(hr_dev, srq);
+err_out:
+ mutex_destroy(&srq->mutex);
+ atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_SRQ_CREATE_ERR_CNT]);
return ret;
}
@@ -448,7 +531,9 @@ int hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
free_srqc(hr_dev, srq);
free_srqn(hr_dev, srq);
+ free_srq_db(hr_dev, srq, udata);
free_srq_buf(hr_dev, srq);
+ mutex_destroy(&srq->mutex);
return 0;
}
diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c
index 70009b970e08..36bb7e5ce638 100644
--- a/drivers/infiniband/hw/irdma/cm.c
+++ b/drivers/infiniband/hw/irdma/cm.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2015 - 2021 Intel Corporation */
#include "main.h"
#include "trace.h"
@@ -1555,22 +1555,56 @@ static int irdma_del_multiple_qhash(struct irdma_device *iwdev,
return ret;
}
+static u8 irdma_iw_get_vlan_prio(u32 *loc_addr, u8 prio, bool ipv4)
+{
+ struct net_device *ndev = NULL;
+
+ rcu_read_lock();
+ if (ipv4) {
+ ndev = ip_dev_find(&init_net, htonl(loc_addr[0]));
+ } else if (IS_ENABLED(CONFIG_IPV6)) {
+ struct net_device *ip_dev;
+ struct in6_addr laddr6;
+
+ irdma_copy_ip_htonl(laddr6.in6_u.u6_addr32, loc_addr);
+
+ for_each_netdev_rcu (&init_net, ip_dev) {
+ if (ipv6_chk_addr(&init_net, &laddr6, ip_dev, 1)) {
+ ndev = ip_dev;
+ break;
+ }
+ }
+ }
+
+ if (!ndev)
+ goto done;
+ if (is_vlan_dev(ndev))
+ prio = (vlan_dev_get_egress_qos_mask(ndev, prio) & VLAN_PRIO_MASK)
+ >> VLAN_PRIO_SHIFT;
+ if (ipv4)
+ dev_put(ndev);
+
+done:
+ rcu_read_unlock();
+
+ return prio;
+}
+
/**
- * irdma_netdev_vlan_ipv6 - Gets the netdev and mac
+ * irdma_get_vlan_mac_ipv6 - Gets the vlan and mac
* @addr: local IPv6 address
* @vlan_id: vlan id for the given IPv6 address
* @mac: mac address for the given IPv6 address
*
- * Returns the net_device of the IPv6 address and also sets the
- * vlan id and mac for that address.
+ * Returns the vlan id and mac for an IPv6 address.
*/
-struct net_device *irdma_netdev_vlan_ipv6(u32 *addr, u16 *vlan_id, u8 *mac)
+void irdma_get_vlan_mac_ipv6(u32 *addr, u16 *vlan_id, u8 *mac)
{
struct net_device *ip_dev = NULL;
struct in6_addr laddr6;
if (!IS_ENABLED(CONFIG_IPV6))
- return NULL;
+ return;
irdma_copy_ip_htonl(laddr6.in6_u.u6_addr32, addr);
if (vlan_id)
@@ -1589,8 +1623,6 @@ struct net_device *irdma_netdev_vlan_ipv6(u32 *addr, u16 *vlan_id, u8 *mac)
}
}
rcu_read_unlock();
-
- return ip_dev;
}
/**
@@ -1667,6 +1699,12 @@ static int irdma_add_mqh_6(struct irdma_device *iwdev,
ifp->addr.in6_u.u6_addr32);
memcpy(cm_info->loc_addr, child_listen_node->loc_addr,
sizeof(cm_info->loc_addr));
+ if (!iwdev->vsi.dscp_mode)
+ cm_info->user_pri =
+ irdma_iw_get_vlan_prio(child_listen_node->loc_addr,
+ cm_info->user_pri,
+ false);
+
ret = irdma_manage_qhash(iwdev, cm_info,
IRDMA_QHASH_TYPE_TCP_SYN,
IRDMA_QHASH_MANAGE_TYPE_ADD,
@@ -1751,6 +1789,11 @@ static int irdma_add_mqh_4(struct irdma_device *iwdev,
ntohl(ifa->ifa_address);
memcpy(cm_info->loc_addr, child_listen_node->loc_addr,
sizeof(cm_info->loc_addr));
+ if (!iwdev->vsi.dscp_mode)
+ cm_info->user_pri =
+ irdma_iw_get_vlan_prio(child_listen_node->loc_addr,
+ cm_info->user_pri,
+ true);
ret = irdma_manage_qhash(iwdev, cm_info,
IRDMA_QHASH_TYPE_TCP_SYN,
IRDMA_QHASH_MANAGE_TYPE_ADD,
@@ -1942,7 +1985,8 @@ static int irdma_addr_resolve_neigh(struct irdma_device *iwdev, u32 src_ip,
__be32 dst_ipaddr = htonl(dst_ip);
__be32 src_ipaddr = htonl(src_ip);
- rt = ip_route_output(&init_net, dst_ipaddr, src_ipaddr, 0, 0);
+ rt = ip_route_output(&init_net, dst_ipaddr, src_ipaddr, 0, 0,
+ RT_SCOPE_UNIVERSE);
if (IS_ERR(rt)) {
ibdev_dbg(&iwdev->ibdev, "CM: ip_route_output fail\n");
return -EINVAL;
@@ -2219,6 +2263,10 @@ irdma_make_cm_node(struct irdma_cm_core *cm_core, struct irdma_device *iwdev,
} else {
cm_node->tos = max(listener->tos, cm_info->tos);
cm_node->user_pri = rt_tos2priority(cm_node->tos);
+ cm_node->user_pri =
+ irdma_iw_get_vlan_prio(cm_info->loc_addr,
+ cm_node->user_pri,
+ cm_info->ipv4);
}
ibdev_dbg(&iwdev->ibdev,
"DCB: listener: TOS:[%d] UP:[%d]\n", cm_node->tos,
@@ -3577,7 +3625,6 @@ void irdma_free_lsmm_rsrc(struct irdma_qp *iwqp)
iwqp->ietf_mem.size, iwqp->ietf_mem.va,
iwqp->ietf_mem.pa);
iwqp->ietf_mem.va = NULL;
- iwqp->ietf_mem.va = NULL;
}
}
@@ -3617,8 +3664,8 @@ int irdma_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
cm_node->vlan_id = irdma_get_vlan_ipv4(cm_node->loc_addr);
} else {
cm_node->ipv4 = false;
- irdma_netdev_vlan_ipv6(cm_node->loc_addr, &cm_node->vlan_id,
- NULL);
+ irdma_get_vlan_mac_ipv6(cm_node->loc_addr, &cm_node->vlan_id,
+ NULL);
}
ibdev_dbg(&iwdev->ibdev, "CM: Accept vlan_id=%d\n",
cm_node->vlan_id);
@@ -3826,17 +3873,21 @@ int irdma_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
raddr6->sin6_addr.in6_u.u6_addr32);
cm_info.loc_port = ntohs(laddr6->sin6_port);
cm_info.rem_port = ntohs(raddr6->sin6_port);
- irdma_netdev_vlan_ipv6(cm_info.loc_addr, &cm_info.vlan_id,
- NULL);
+ irdma_get_vlan_mac_ipv6(cm_info.loc_addr, &cm_info.vlan_id,
+ NULL);
}
cm_info.cm_id = cm_id;
cm_info.qh_qpid = iwdev->vsi.ilq->qp_id;
cm_info.tos = cm_id->tos;
- if (iwdev->vsi.dscp_mode)
+ if (iwdev->vsi.dscp_mode) {
cm_info.user_pri =
iwqp->sc_qp.vsi->dscp_map[irdma_tos2dscp(cm_info.tos)];
- else
+ } else {
cm_info.user_pri = rt_tos2priority(cm_id->tos);
+ cm_info.user_pri = irdma_iw_get_vlan_prio(cm_info.loc_addr,
+ cm_info.user_pri,
+ cm_info.ipv4);
+ }
if (iwqp->sc_qp.dev->ws_add(iwqp->sc_qp.vsi, cm_info.user_pri))
return -ENOMEM;
@@ -3952,8 +4003,8 @@ int irdma_create_listen(struct iw_cm_id *cm_id, int backlog)
laddr6->sin6_addr.in6_u.u6_addr32);
cm_info.loc_port = ntohs(laddr6->sin6_port);
if (ipv6_addr_type(&laddr6->sin6_addr) != IPV6_ADDR_ANY) {
- irdma_netdev_vlan_ipv6(cm_info.loc_addr,
- &cm_info.vlan_id, NULL);
+ irdma_get_vlan_mac_ipv6(cm_info.loc_addr,
+ &cm_info.vlan_id, NULL);
} else {
cm_info.vlan_id = 0xFFFF;
wildcard = true;
@@ -3980,7 +4031,7 @@ int irdma_create_listen(struct iw_cm_id *cm_id, int backlog)
cm_listen_node->tos = cm_id->tos;
if (iwdev->vsi.dscp_mode)
cm_listen_node->user_pri =
- iwdev->vsi.dscp_map[irdma_tos2dscp(cm_id->tos)];
+ iwdev->vsi.dscp_map[irdma_tos2dscp(cm_id->tos)];
else
cm_listen_node->user_pri = rt_tos2priority(cm_id->tos);
cm_info.user_pri = cm_listen_node->user_pri;
@@ -3990,6 +4041,12 @@ int irdma_create_listen(struct iw_cm_id *cm_id, int backlog)
if (err)
goto error;
} else {
+ if (!iwdev->vsi.dscp_mode)
+ cm_listen_node->user_pri =
+ irdma_iw_get_vlan_prio(cm_info.loc_addr,
+ cm_info.user_pri,
+ cm_info.ipv4);
+ cm_info.user_pri = cm_listen_node->user_pri;
err = irdma_manage_qhash(iwdev, &cm_info,
IRDMA_QHASH_TYPE_TCP_SYN,
IRDMA_QHASH_MANAGE_TYPE_ADD,
diff --git a/drivers/infiniband/hw/irdma/cm.h b/drivers/infiniband/hw/irdma/cm.h
index 7feadb3e1eda..48ee285cf745 100644
--- a/drivers/infiniband/hw/irdma/cm.h
+++ b/drivers/infiniband/hw/irdma/cm.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2015 - 2021 Intel Corporation */
#ifndef IRDMA_CM_H
#define IRDMA_CM_H
diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c
index 45e3344daa04..6aed6169c07d 100644
--- a/drivers/infiniband/hw/irdma/ctrl.c
+++ b/drivers/infiniband/hw/irdma/ctrl.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2015 - 2021 Intel Corporation */
#include <linux/etherdevice.h>
@@ -1061,6 +1061,9 @@ static int irdma_sc_alloc_stag(struct irdma_sc_dev *dev,
u64 hdr;
enum irdma_page_size page_size;
+ if (!info->total_len && !info->all_memory)
+ return -EINVAL;
+
if (info->page_size == 0x40000000)
page_size = IRDMA_PAGE_SIZE_1G;
else if (info->page_size == 0x200000)
@@ -1126,6 +1129,9 @@ static int irdma_sc_mr_reg_non_shared(struct irdma_sc_dev *dev,
u8 addr_type;
enum irdma_page_size page_size;
+ if (!info->total_len && !info->all_memory)
+ return -EINVAL;
+
if (info->page_size == 0x40000000)
page_size = IRDMA_PAGE_SIZE_1G;
else if (info->page_size == 0x200000)
@@ -1301,7 +1307,6 @@ int irdma_sc_mr_fast_register(struct irdma_sc_qp *qp,
sq_info.wr_id = info->wr_id;
sq_info.signaled = info->signaled;
- sq_info.push_wqe = info->push_wqe;
wqe = irdma_qp_get_next_send_wqe(&qp->qp_uk, &wqe_idx,
IRDMA_QP_WQE_MIN_QUANTA, 0, &sq_info);
@@ -1335,7 +1340,6 @@ int irdma_sc_mr_fast_register(struct irdma_sc_qp *qp,
FIELD_PREP(IRDMAQPSQ_HPAGESIZE, page_size) |
FIELD_PREP(IRDMAQPSQ_STAGRIGHTS, info->access_rights) |
FIELD_PREP(IRDMAQPSQ_VABASEDTO, info->addr_type) |
- FIELD_PREP(IRDMAQPSQ_PUSHWQE, (sq_info.push_wqe ? 1 : 0)) |
FIELD_PREP(IRDMAQPSQ_READFENCE, info->read_fence) |
FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) |
FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) |
@@ -1346,13 +1350,9 @@ int irdma_sc_mr_fast_register(struct irdma_sc_qp *qp,
print_hex_dump_debug("WQE: FAST_REG WQE", DUMP_PREFIX_OFFSET, 16, 8,
wqe, IRDMA_QP_WQE_MIN_SIZE, false);
- if (sq_info.push_wqe) {
- irdma_qp_push_wqe(&qp->qp_uk, wqe, IRDMA_QP_WQE_MIN_QUANTA,
- wqe_idx, post_sq);
- } else {
- if (post_sq)
- irdma_uk_qp_post_wr(&qp->qp_uk);
- }
+
+ if (post_sq)
+ irdma_uk_qp_post_wr(&qp->qp_uk);
return 0;
}
@@ -4007,7 +4007,6 @@ int irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq,
{
u64 temp, compl_ctx;
__le64 *aeqe;
- u16 wqe_idx;
u8 ae_src;
u8 polarity;
@@ -4027,7 +4026,7 @@ int irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq,
aeqe, 16, false);
ae_src = (u8)FIELD_GET(IRDMA_AEQE_AESRC, temp);
- wqe_idx = (u16)FIELD_GET(IRDMA_AEQE_WQDESCIDX, temp);
+ info->wqe_idx = (u16)FIELD_GET(IRDMA_AEQE_WQDESCIDX, temp);
info->qp_cq_id = (u32)FIELD_GET(IRDMA_AEQE_QPCQID_LOW, temp) |
((u32)FIELD_GET(IRDMA_AEQE_QPCQID_HI, temp) << 18);
info->ae_id = (u16)FIELD_GET(IRDMA_AEQE_AECODE, temp);
@@ -4110,7 +4109,6 @@ int irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq,
case IRDMA_AE_SOURCE_RQ_0011:
info->qp = true;
info->rq = true;
- info->wqe_idx = wqe_idx;
info->compl_ctx = compl_ctx;
break;
case IRDMA_AE_SOURCE_CQ:
@@ -4124,7 +4122,6 @@ int irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq,
case IRDMA_AE_SOURCE_SQ_0111:
info->qp = true;
info->sq = true;
- info->wqe_idx = wqe_idx;
info->compl_ctx = compl_ctx;
break;
case IRDMA_AE_SOURCE_IN_RR_WR:
diff --git a/drivers/infiniband/hw/irdma/defs.h b/drivers/infiniband/hw/irdma/defs.h
index d06e45d2c23f..2cb4b96db721 100644
--- a/drivers/infiniband/hw/irdma/defs.h
+++ b/drivers/infiniband/hw/irdma/defs.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2015 - 2021 Intel Corporation */
#ifndef IRDMA_DEFS_H
#define IRDMA_DEFS_H
@@ -346,6 +346,7 @@ enum irdma_cqp_op_type {
#define IRDMA_AE_LLP_TOO_MANY_KEEPALIVE_RETRIES 0x050b
#define IRDMA_AE_LLP_DOUBT_REACHABILITY 0x050c
#define IRDMA_AE_LLP_CONNECTION_ESTABLISHED 0x050e
+#define IRDMA_AE_LLP_TOO_MANY_RNRS 0x050f
#define IRDMA_AE_RESOURCE_EXHAUSTION 0x0520
#define IRDMA_AE_RESET_SENT 0x0601
#define IRDMA_AE_TERMINATE_SENT 0x0602
diff --git a/drivers/infiniband/hw/irdma/hmc.c b/drivers/infiniband/hw/irdma/hmc.c
index 49307ce8c4da..ac58088a8e41 100644
--- a/drivers/infiniband/hw/irdma/hmc.c
+++ b/drivers/infiniband/hw/irdma/hmc.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2015 - 2021 Intel Corporation */
#include "osdep.h"
#include "hmc.h"
diff --git a/drivers/infiniband/hw/irdma/hmc.h b/drivers/infiniband/hw/irdma/hmc.h
index f5c5dacc7021..415f9e23bbf6 100644
--- a/drivers/infiniband/hw/irdma/hmc.h
+++ b/drivers/infiniband/hw/irdma/hmc.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2015 - 2020 Intel Corporation */
#ifndef IRDMA_HMC_H
#define IRDMA_HMC_H
diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c
index 457368e324e1..ad50b77282f8 100644
--- a/drivers/infiniband/hw/irdma/hw.c
+++ b/drivers/infiniband/hw/irdma/hw.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2015 - 2021 Intel Corporation */
#include "main.h"
@@ -219,7 +219,6 @@ static void irdma_process_aeq(struct irdma_pci_f *rf)
struct irdma_aeqe_info *info = &aeinfo;
int ret;
struct irdma_qp *iwqp = NULL;
- struct irdma_sc_cq *cq = NULL;
struct irdma_cq *iwcq = NULL;
struct irdma_sc_qp *qp = NULL;
struct irdma_qp_host_ctx_info *ctx_info = NULL;
@@ -322,7 +321,11 @@ static void irdma_process_aeq(struct irdma_pci_f *rf)
break;
case IRDMA_AE_QP_SUSPEND_COMPLETE:
if (iwqp->iwdev->vsi.tc_change_pending) {
- atomic_dec(&iwqp->sc_qp.vsi->qp_suspend_reqs);
+ if (!atomic_dec_return(&qp->vsi->qp_suspend_reqs))
+ wake_up(&iwqp->iwdev->suspend_wq);
+ }
+ if (iwqp->suspend_pending) {
+ iwqp->suspend_pending = false;
wake_up(&iwqp->iwdev->suspend_wq);
}
break;
@@ -336,10 +339,18 @@ static void irdma_process_aeq(struct irdma_pci_f *rf)
ibdev_err(&iwdev->ibdev,
"Processing an iWARP related AE for CQ misc = 0x%04X\n",
info->ae_id);
- cq = (struct irdma_sc_cq *)(unsigned long)
- info->compl_ctx;
- iwcq = cq->back_cq;
+ spin_lock_irqsave(&rf->cqtable_lock, flags);
+ iwcq = rf->cq_table[info->qp_cq_id];
+ if (!iwcq) {
+ spin_unlock_irqrestore(&rf->cqtable_lock,
+ flags);
+ ibdev_dbg(to_ibdev(dev),
+ "cq_id %d is already freed\n", info->qp_cq_id);
+ continue;
+ }
+ irdma_cq_add_ref(&iwcq->ibcq);
+ spin_unlock_irqrestore(&rf->cqtable_lock, flags);
if (iwcq->ibcq.event_handler) {
struct ib_event ibevent;
@@ -350,6 +361,7 @@ static void irdma_process_aeq(struct irdma_pci_f *rf)
iwcq->ibcq.event_handler(&ibevent,
iwcq->ibcq.cq_context);
}
+ irdma_cq_rem_ref(&iwcq->ibcq);
break;
case IRDMA_AE_RESET_NOT_SENT:
case IRDMA_AE_LLP_DOUBT_REACHABILITY:
@@ -375,6 +387,7 @@ static void irdma_process_aeq(struct irdma_pci_f *rf)
case IRDMA_AE_LLP_TOO_MANY_RETRIES:
case IRDMA_AE_LCE_QP_CATASTROPHIC:
case IRDMA_AE_LCE_FUNCTION_CATASTROPHIC:
+ case IRDMA_AE_LLP_TOO_MANY_RNRS:
case IRDMA_AE_LCE_CQ_CATASTROPHIC:
case IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG:
default:
@@ -558,26 +571,29 @@ static void irdma_destroy_irq(struct irdma_pci_f *rf,
dev->irq_ops->irdma_dis_irq(dev, msix_vec->idx);
irq_update_affinity_hint(msix_vec->irq, NULL);
free_irq(msix_vec->irq, dev_id);
+ if (rf == dev_id) {
+ tasklet_kill(&rf->dpc_tasklet);
+ } else {
+ struct irdma_ceq *iwceq = (struct irdma_ceq *)dev_id;
+
+ tasklet_kill(&iwceq->dpc_tasklet);
+ }
}
/**
* irdma_destroy_cqp - destroy control qp
* @rf: RDMA PCI function
- * @free_hwcqp: 1 if hw cqp should be freed
*
* Issue destroy cqp request and
* free the resources associated with the cqp
*/
-static void irdma_destroy_cqp(struct irdma_pci_f *rf, bool free_hwcqp)
+static void irdma_destroy_cqp(struct irdma_pci_f *rf)
{
struct irdma_sc_dev *dev = &rf->sc_dev;
struct irdma_cqp *cqp = &rf->cqp;
int status = 0;
- if (rf->cqp_cmpl_wq)
- destroy_workqueue(rf->cqp_cmpl_wq);
- if (free_hwcqp)
- status = irdma_sc_cqp_destroy(dev->cqp);
+ status = irdma_sc_cqp_destroy(dev->cqp);
if (status)
ibdev_dbg(to_ibdev(dev), "ERR: Destroy CQP failed %d\n", status);
@@ -741,6 +757,9 @@ static void irdma_destroy_ccq(struct irdma_pci_f *rf)
struct irdma_ccq *ccq = &rf->ccq;
int status = 0;
+ if (rf->cqp_cmpl_wq)
+ destroy_workqueue(rf->cqp_cmpl_wq);
+
if (!rf->reset)
status = irdma_sc_ccq_destroy(dev->ccq, 0, true);
if (status)
@@ -921,8 +940,8 @@ static int irdma_create_cqp(struct irdma_pci_f *rf)
cqp->scratch_array = kcalloc(sqsize, sizeof(*cqp->scratch_array), GFP_KERNEL);
if (!cqp->scratch_array) {
- kfree(cqp->cqp_requests);
- return -ENOMEM;
+ status = -ENOMEM;
+ goto err_scratch;
}
dev->cqp = &cqp->sc_cqp;
@@ -932,15 +951,14 @@ static int irdma_create_cqp(struct irdma_pci_f *rf)
cqp->sq.va = dma_alloc_coherent(dev->hw->device, cqp->sq.size,
&cqp->sq.pa, GFP_KERNEL);
if (!cqp->sq.va) {
- kfree(cqp->scratch_array);
- kfree(cqp->cqp_requests);
- return -ENOMEM;
+ status = -ENOMEM;
+ goto err_sq;
}
status = irdma_obj_aligned_mem(rf, &mem, sizeof(struct irdma_cqp_ctx),
IRDMA_HOST_CTX_ALIGNMENT_M);
if (status)
- goto exit;
+ goto err_ctx;
dev->cqp->host_ctx_pa = mem.pa;
dev->cqp->host_ctx = mem.va;
@@ -966,7 +984,7 @@ static int irdma_create_cqp(struct irdma_pci_f *rf)
status = irdma_sc_cqp_init(dev->cqp, &cqp_init_info);
if (status) {
ibdev_dbg(to_ibdev(dev), "ERR: cqp init status %d\n", status);
- goto exit;
+ goto err_ctx;
}
spin_lock_init(&cqp->req_lock);
@@ -977,7 +995,7 @@ static int irdma_create_cqp(struct irdma_pci_f *rf)
ibdev_dbg(to_ibdev(dev),
"ERR: cqp create failed - status %d maj_err %d min_err %d\n",
status, maj_err, min_err);
- goto exit;
+ goto err_ctx;
}
INIT_LIST_HEAD(&cqp->cqp_avail_reqs);
@@ -991,8 +1009,16 @@ static int irdma_create_cqp(struct irdma_pci_f *rf)
init_waitqueue_head(&cqp->remove_wq);
return 0;
-exit:
- irdma_destroy_cqp(rf, false);
+err_ctx:
+ dma_free_coherent(dev->hw->device, cqp->sq.size,
+ cqp->sq.va, cqp->sq.pa);
+ cqp->sq.va = NULL;
+err_sq:
+ kfree(cqp->scratch_array);
+ cqp->scratch_array = NULL;
+err_scratch:
+ kfree(cqp->cqp_requests);
+ cqp->cqp_requests = NULL;
return status;
}
@@ -1166,7 +1192,6 @@ static int irdma_create_ceq(struct irdma_pci_f *rf, struct irdma_ceq *iwceq,
int status;
struct irdma_ceq_init_info info = {};
struct irdma_sc_dev *dev = &rf->sc_dev;
- u64 scratch;
u32 ceq_size;
info.ceq_id = ceq_id;
@@ -1187,14 +1212,13 @@ static int irdma_create_ceq(struct irdma_pci_f *rf, struct irdma_ceq *iwceq,
iwceq->sc_ceq.ceq_id = ceq_id;
info.dev = dev;
info.vsi = vsi;
- scratch = (uintptr_t)&rf->cqp.sc_cqp;
status = irdma_sc_ceq_init(&iwceq->sc_ceq, &info);
if (!status) {
if (dev->ceq_valid)
status = irdma_cqp_ceq_cmd(&rf->sc_dev, &iwceq->sc_ceq,
IRDMA_OP_CEQ_CREATE);
else
- status = irdma_sc_cceq_create(&iwceq->sc_ceq, scratch);
+ status = irdma_sc_cceq_create(&iwceq->sc_ceq, 0);
}
if (status) {
@@ -1549,7 +1573,7 @@ static void irdma_del_init_mem(struct irdma_pci_f *rf)
kfree(dev->hmc_info->sd_table.sd_entry);
dev->hmc_info->sd_table.sd_entry = NULL;
- kfree(rf->mem_rsrc);
+ vfree(rf->mem_rsrc);
rf->mem_rsrc = NULL;
dma_free_coherent(rf->hw.device, rf->obj_mem.size, rf->obj_mem.va,
rf->obj_mem.pa);
@@ -1747,7 +1771,7 @@ void irdma_ctrl_deinit_hw(struct irdma_pci_f *rf)
rf->reset, rf->rdma_ver);
fallthrough;
case CQP_CREATED:
- irdma_destroy_cqp(rf, true);
+ irdma_destroy_cqp(rf);
fallthrough;
case INITIAL_STATE:
irdma_del_init_mem(rf);
@@ -1945,10 +1969,12 @@ static void irdma_set_hw_rsrc(struct irdma_pci_f *rf)
rf->allocated_arps = &rf->allocated_mcgs[BITS_TO_LONGS(rf->max_mcg)];
rf->qp_table = (struct irdma_qp **)
(&rf->allocated_arps[BITS_TO_LONGS(rf->arp_table_size)]);
+ rf->cq_table = (struct irdma_cq **)(&rf->qp_table[rf->max_qp]);
spin_lock_init(&rf->rsrc_lock);
spin_lock_init(&rf->arp_lock);
spin_lock_init(&rf->qptable_lock);
+ spin_lock_init(&rf->cqtable_lock);
spin_lock_init(&rf->qh_list_lock);
}
@@ -1969,6 +1995,7 @@ static u32 irdma_calc_mem_rsrc_size(struct irdma_pci_f *rf)
rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->max_ah);
rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->max_mcg);
rsrc_size += sizeof(struct irdma_qp **) * rf->max_qp;
+ rsrc_size += sizeof(struct irdma_cq **) * rf->max_cq;
return rsrc_size;
}
@@ -2002,10 +2029,10 @@ u32 irdma_initialize_hw_rsrc(struct irdma_pci_f *rf)
rf->max_mcg = rf->max_qp;
rsrc_size = irdma_calc_mem_rsrc_size(rf);
- rf->mem_rsrc = kzalloc(rsrc_size, GFP_KERNEL);
+ rf->mem_rsrc = vzalloc(rsrc_size);
if (!rf->mem_rsrc) {
ret = -ENOMEM;
- goto mem_rsrc_kzalloc_fail;
+ goto mem_rsrc_vzalloc_fail;
}
rf->arp_table = (struct irdma_arp_entry *)rf->mem_rsrc;
@@ -2033,7 +2060,7 @@ u32 irdma_initialize_hw_rsrc(struct irdma_pci_f *rf)
return 0;
-mem_rsrc_kzalloc_fail:
+mem_rsrc_vzalloc_fail:
bitmap_free(rf->allocated_ws_nodes);
rf->allocated_ws_nodes = NULL;
diff --git a/drivers/infiniband/hw/irdma/i40iw_hw.c b/drivers/infiniband/hw/irdma/i40iw_hw.c
index 37a40fb4d0d7..ce61a27cb1f6 100644
--- a/drivers/infiniband/hw/irdma/i40iw_hw.c
+++ b/drivers/infiniband/hw/irdma/i40iw_hw.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2015 - 2021 Intel Corporation */
#include "osdep.h"
#include "type.h"
@@ -254,5 +254,6 @@ void i40iw_init_hw(struct irdma_sc_dev *dev)
dev->hw_attrs.max_stat_idx = IRDMA_HW_STAT_INDEX_MAX_GEN_1;
dev->hw_attrs.max_hw_outbound_msg_size = I40IW_MAX_OUTBOUND_MSG_SIZE;
dev->hw_attrs.max_hw_inbound_msg_size = I40IW_MAX_INBOUND_MSG_SIZE;
+ dev->hw_attrs.uk_attrs.min_hw_wq_size = I40IW_MIN_WQ_SIZE;
dev->hw_attrs.max_qp_wr = I40IW_MAX_QP_WRS;
}
diff --git a/drivers/infiniband/hw/irdma/i40iw_hw.h b/drivers/infiniband/hw/irdma/i40iw_hw.h
index 1c438b3593ea..e1db84d8a62c 100644
--- a/drivers/infiniband/hw/irdma/i40iw_hw.h
+++ b/drivers/infiniband/hw/irdma/i40iw_hw.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2015 - 2021 Intel Corporation */
#ifndef I40IW_HW_H
#define I40IW_HW_H
@@ -140,11 +140,11 @@ enum i40iw_device_caps_const {
I40IW_MAX_CQ_SIZE = 1048575,
I40IW_MAX_OUTBOUND_MSG_SIZE = 2147483647,
I40IW_MAX_INBOUND_MSG_SIZE = 2147483647,
+ I40IW_MIN_WQ_SIZE = 4 /* WQEs */,
};
#define I40IW_QP_WQE_MIN_SIZE 32
#define I40IW_QP_WQE_MAX_SIZE 128
-#define I40IW_QP_SW_MIN_WQSIZE 4
#define I40IW_MAX_RQ_WQE_SHIFT 2
#define I40IW_MAX_QUANTA_PER_WR 2
diff --git a/drivers/infiniband/hw/irdma/i40iw_if.c b/drivers/infiniband/hw/irdma/i40iw_if.c
index 4053ead32416..cc50a7070371 100644
--- a/drivers/infiniband/hw/irdma/i40iw_if.c
+++ b/drivers/infiniband/hw/irdma/i40iw_if.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2015 - 2021 Intel Corporation */
#include "main.h"
#include "i40iw_hw.h"
@@ -186,7 +186,7 @@ static int i40iw_probe(struct auxiliary_device *aux_dev, const struct auxiliary_
aux_dev);
struct i40e_info *cdev_info = i40e_adev->ldev;
- strncpy(i40iw_client.name, "irdma", I40E_CLIENT_STR_LENGTH);
+ strscpy_pad(i40iw_client.name, "irdma", I40E_CLIENT_STR_LENGTH);
i40e_client_device_register(cdev_info, &i40iw_client);
return 0;
diff --git a/drivers/infiniband/hw/irdma/icrdma_hw.c b/drivers/infiniband/hw/irdma/icrdma_hw.c
index 298d14905993..941d3edffadb 100644
--- a/drivers/infiniband/hw/irdma/icrdma_hw.c
+++ b/drivers/infiniband/hw/irdma/icrdma_hw.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2017 - 2021 Intel Corporation */
#include "osdep.h"
#include "type.h"
@@ -195,6 +195,7 @@ void icrdma_init_hw(struct irdma_sc_dev *dev)
dev->hw_attrs.max_stat_inst = ICRDMA_MAX_STATS_COUNT;
dev->hw_attrs.max_stat_idx = IRDMA_HW_STAT_INDEX_MAX_GEN_2;
+ dev->hw_attrs.uk_attrs.min_hw_wq_size = ICRDMA_MIN_WQ_SIZE;
dev->hw_attrs.uk_attrs.max_hw_sq_chunk = IRDMA_MAX_QUANTA_PER_WR;
dev->hw_attrs.uk_attrs.feature_flags |= IRDMA_FEATURE_RTS_AE |
IRDMA_FEATURE_CQ_RESIZE;
diff --git a/drivers/infiniband/hw/irdma/icrdma_hw.h b/drivers/infiniband/hw/irdma/icrdma_hw.h
index b65c463abf0b..697b9572b5c6 100644
--- a/drivers/infiniband/hw/irdma/icrdma_hw.h
+++ b/drivers/infiniband/hw/irdma/icrdma_hw.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2017 - 2021 Intel Corporation */
#ifndef ICRDMA_HW_H
#define ICRDMA_HW_H
@@ -64,6 +64,7 @@ enum icrdma_device_caps_const {
ICRDMA_MAX_IRD_SIZE = 127,
ICRDMA_MAX_ORD_SIZE = 255,
+ ICRDMA_MIN_WQ_SIZE = 8 /* WQEs */,
};
diff --git a/drivers/infiniband/hw/irdma/irdma.h b/drivers/infiniband/hw/irdma/irdma.h
index 173e2dc2fc35..20d2e7393e3d 100644
--- a/drivers/infiniband/hw/irdma/irdma.h
+++ b/drivers/infiniband/hw/irdma/irdma.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2017 - 2021 Intel Corporation */
#ifndef IRDMA_H
#define IRDMA_H
@@ -119,6 +119,7 @@ struct irdma_uk_attrs {
u32 min_hw_cq_size;
u32 max_hw_cq_size;
u16 max_hw_sq_chunk;
+ u16 min_hw_wq_size;
u8 hw_rev;
};
diff --git a/drivers/infiniband/hw/irdma/main.c b/drivers/infiniband/hw/irdma/main.c
index 514453777e07..3f13200ff71b 100644
--- a/drivers/infiniband/hw/irdma/main.c
+++ b/drivers/infiniband/hw/irdma/main.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2015 - 2021 Intel Corporation */
#include "main.h"
#include "../../../net/ethernet/intel/ice/ice.h"
@@ -48,7 +48,7 @@ static void irdma_prep_tc_change(struct irdma_device *iwdev)
/* Wait for all qp's to suspend */
wait_event_timeout(iwdev->suspend_wq,
!atomic_read(&iwdev->vsi.qp_suspend_reqs),
- IRDMA_EVENT_TIMEOUT);
+ msecs_to_jiffies(IRDMA_EVENT_TIMEOUT_MS));
irdma_ws_reset(&iwdev->vsi);
}
diff --git a/drivers/infiniband/hw/irdma/main.h b/drivers/infiniband/hw/irdma/main.h
index 2323962cdeac..9f0ed6e84471 100644
--- a/drivers/infiniband/hw/irdma/main.h
+++ b/drivers/infiniband/hw/irdma/main.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2015 - 2021 Intel Corporation */
#ifndef IRDMA_MAIN_H
#define IRDMA_MAIN_H
@@ -78,7 +78,7 @@ extern struct auxiliary_driver i40iw_auxiliary_drv;
#define MAX_DPC_ITERATIONS 128
-#define IRDMA_EVENT_TIMEOUT 50000
+#define IRDMA_EVENT_TIMEOUT_MS 5000
#define IRDMA_VCHNL_EVENT_TIMEOUT 100000
#define IRDMA_RST_TIMEOUT_HZ 4
@@ -239,7 +239,7 @@ struct irdma_qv_info {
struct irdma_qvlist_info {
u32 num_vectors;
- struct irdma_qv_info qv_info[1];
+ struct irdma_qv_info qv_info[] __counted_by(num_vectors);
};
struct irdma_gen_ops {
@@ -309,7 +309,9 @@ struct irdma_pci_f {
spinlock_t arp_lock; /*protect ARP table access*/
spinlock_t rsrc_lock; /* protect HW resource array access */
spinlock_t qptable_lock; /*protect QP table access*/
+ spinlock_t cqtable_lock; /*protect CQ table access*/
struct irdma_qp **qp_table;
+ struct irdma_cq **cq_table;
spinlock_t qh_list_lock; /* protect mc_qht_list */
struct mc_table_list mc_qht_list;
struct irdma_msix_vector *iw_msixtbl;
@@ -500,6 +502,8 @@ int irdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
struct ib_udata *udata);
int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata);
+void irdma_cq_add_ref(struct ib_cq *ibcq);
+void irdma_cq_rem_ref(struct ib_cq *ibcq);
void irdma_cq_wq_destroy(struct irdma_pci_f *rf, struct irdma_sc_cq *cq);
void irdma_cleanup_pending_cqp_op(struct irdma_pci_f *rf);
@@ -529,7 +533,7 @@ void irdma_gen_ae(struct irdma_pci_f *rf, struct irdma_sc_qp *qp,
void irdma_copy_ip_ntohl(u32 *dst, __be32 *src);
void irdma_copy_ip_htonl(__be32 *dst, u32 *src);
u16 irdma_get_vlan_ipv4(u32 *addr);
-struct net_device *irdma_netdev_vlan_ipv6(u32 *addr, u16 *vlan_id, u8 *mac);
+void irdma_get_vlan_mac_ipv6(u32 *addr, u16 *vlan_id, u8 *mac);
struct ib_mr *irdma_reg_phys_mr(struct ib_pd *ib_pd, u64 addr, u64 size,
int acc, u64 *iova_start);
int irdma_upload_qp_context(struct irdma_qp *iwqp, bool freeze, bool raw);
diff --git a/drivers/infiniband/hw/irdma/osdep.h b/drivers/infiniband/hw/irdma/osdep.h
index fc1ba2a3e6fb..e1e3d3ae72b7 100644
--- a/drivers/infiniband/hw/irdma/osdep.h
+++ b/drivers/infiniband/hw/irdma/osdep.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2015 - 2021 Intel Corporation */
#ifndef IRDMA_OSDEP_H
#define IRDMA_OSDEP_H
diff --git a/drivers/infiniband/hw/irdma/pble.c b/drivers/infiniband/hw/irdma/pble.c
index c0bef11436b9..e7ce6840755f 100644
--- a/drivers/infiniband/hw/irdma/pble.c
+++ b/drivers/infiniband/hw/irdma/pble.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2015 - 2021 Intel Corporation */
#include "osdep.h"
#include "hmc.h"
diff --git a/drivers/infiniband/hw/irdma/pble.h b/drivers/infiniband/hw/irdma/pble.h
index b31b7c5d66fe..160ad728e9fb 100644
--- a/drivers/infiniband/hw/irdma/pble.h
+++ b/drivers/infiniband/hw/irdma/pble.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2015 - 2019 Intel Corporation */
#ifndef IRDMA_PBLE_H
#define IRDMA_PBLE_H
diff --git a/drivers/infiniband/hw/irdma/protos.h b/drivers/infiniband/hw/irdma/protos.h
index 113096b60323..d7c8ea948bcd 100644
--- a/drivers/infiniband/hw/irdma/protos.h
+++ b/drivers/infiniband/hw/irdma/protos.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2016 - 2021 Intel Corporation */
#ifndef IRDMA_PROTOS_H
#define IRDMA_PROTOS_H
diff --git a/drivers/infiniband/hw/irdma/puda.c b/drivers/infiniband/hw/irdma/puda.c
index 562531712ea4..7e3f9bca2c23 100644
--- a/drivers/infiniband/hw/irdma/puda.c
+++ b/drivers/infiniband/hw/irdma/puda.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2015 - 2021 Intel Corporation */
#include "osdep.h"
#include "hmc.h"
diff --git a/drivers/infiniband/hw/irdma/puda.h b/drivers/infiniband/hw/irdma/puda.h
index 5f5124db6ddf..bc6d9514c9c1 100644
--- a/drivers/infiniband/hw/irdma/puda.h
+++ b/drivers/infiniband/hw/irdma/puda.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2015 - 2020 Intel Corporation */
#ifndef IRDMA_PUDA_H
#define IRDMA_PUDA_H
diff --git a/drivers/infiniband/hw/irdma/trace.c b/drivers/infiniband/hw/irdma/trace.c
index b5133f4137e0..fc2f56697741 100644
--- a/drivers/infiniband/hw/irdma/trace.c
+++ b/drivers/infiniband/hw/irdma/trace.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2019 Intel Corporation */
#define CREATE_TRACE_POINTS
#include "trace.h"
diff --git a/drivers/infiniband/hw/irdma/trace.h b/drivers/infiniband/hw/irdma/trace.h
index 702e4efb018d..b8085a66b9f8 100644
--- a/drivers/infiniband/hw/irdma/trace.h
+++ b/drivers/infiniband/hw/irdma/trace.h
@@ -1,3 +1,3 @@
-/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2019 Intel Corporation */
#include "trace_cm.h"
diff --git a/drivers/infiniband/hw/irdma/trace_cm.h b/drivers/infiniband/hw/irdma/trace_cm.h
index f633fb343328..0d1699b55241 100644
--- a/drivers/infiniband/hw/irdma/trace_cm.h
+++ b/drivers/infiniband/hw/irdma/trace_cm.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2019 - 2021 Intel Corporation */
#if !defined(__TRACE_CM_H) || defined(TRACE_HEADER_MULTI_READ)
#define __TRACE_CM_H
diff --git a/drivers/infiniband/hw/irdma/type.h b/drivers/infiniband/hw/irdma/type.h
index a20709577ab0..59b34afa867b 100644
--- a/drivers/infiniband/hw/irdma/type.h
+++ b/drivers/infiniband/hw/irdma/type.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2015 - 2021 Intel Corporation */
#ifndef IRDMA_TYPE_H
#define IRDMA_TYPE_H
@@ -971,6 +971,7 @@ struct irdma_allocate_stag_info {
bool remote_access:1;
bool use_hmc_fcn_index:1;
bool use_pf_rid:1;
+ bool all_memory:1;
u8 hmc_fcn_index;
};
@@ -998,6 +999,7 @@ struct irdma_reg_ns_stag_info {
bool use_hmc_fcn_index:1;
u8 hmc_fcn_index;
bool use_pf_rid:1;
+ bool all_memory:1;
};
struct irdma_fast_reg_stag_info {
@@ -1017,7 +1019,6 @@ struct irdma_fast_reg_stag_info {
bool local_fence:1;
bool read_fence:1;
bool signaled:1;
- bool push_wqe:1;
bool use_hmc_fcn_index:1;
u8 hmc_fcn_index;
bool use_pf_rid:1;
diff --git a/drivers/infiniband/hw/irdma/uda.c b/drivers/infiniband/hw/irdma/uda.c
index 284cec2a74de..84051266d948 100644
--- a/drivers/infiniband/hw/irdma/uda.c
+++ b/drivers/infiniband/hw/irdma/uda.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2016 - 2021 Intel Corporation */
#include <linux/etherdevice.h>
diff --git a/drivers/infiniband/hw/irdma/uda.h b/drivers/infiniband/hw/irdma/uda.h
index fe4820ff0cca..27b8701cf21b 100644
--- a/drivers/infiniband/hw/irdma/uda.h
+++ b/drivers/infiniband/hw/irdma/uda.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2016 - 2021 Intel Corporation */
#ifndef IRDMA_UDA_H
#define IRDMA_UDA_H
diff --git a/drivers/infiniband/hw/irdma/uda_d.h b/drivers/infiniband/hw/irdma/uda_d.h
index bfc81cac2c51..5a9e6eabf032 100644
--- a/drivers/infiniband/hw/irdma/uda_d.h
+++ b/drivers/infiniband/hw/irdma/uda_d.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2016 - 2021 Intel Corporation */
#ifndef IRDMA_UDA_D_H
#define IRDMA_UDA_D_H
diff --git a/drivers/infiniband/hw/irdma/uk.c b/drivers/infiniband/hw/irdma/uk.c
index 280d633d4ec4..38c54e59cc2e 100644
--- a/drivers/infiniband/hw/irdma/uk.c
+++ b/drivers/infiniband/hw/irdma/uk.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2015 - 2021 Intel Corporation */
#include "osdep.h"
#include "defs.h"
@@ -127,10 +127,7 @@ void irdma_uk_qp_post_wr(struct irdma_qp_uk *qp)
hw_sq_tail = (u32)FIELD_GET(IRDMA_QP_DBSA_HW_SQ_TAIL, temp);
sw_sq_head = IRDMA_RING_CURRENT_HEAD(qp->sq_ring);
if (sw_sq_head != qp->initial_ring.head) {
- if (qp->push_dropped) {
- writel(qp->qp_id, qp->wqe_alloc_db);
- qp->push_dropped = false;
- } else if (sw_sq_head != hw_sq_tail) {
+ if (sw_sq_head != hw_sq_tail) {
if (sw_sq_head > qp->initial_ring.head) {
if (hw_sq_tail >= qp->initial_ring.head &&
hw_sq_tail < sw_sq_head)
@@ -147,38 +144,6 @@ void irdma_uk_qp_post_wr(struct irdma_qp_uk *qp)
}
/**
- * irdma_qp_ring_push_db - ring qp doorbell
- * @qp: hw qp ptr
- * @wqe_idx: wqe index
- */
-static void irdma_qp_ring_push_db(struct irdma_qp_uk *qp, u32 wqe_idx)
-{
- set_32bit_val(qp->push_db, 0,
- FIELD_PREP(IRDMA_WQEALLOC_WQE_DESC_INDEX, wqe_idx >> 3) | qp->qp_id);
- qp->initial_ring.head = qp->sq_ring.head;
- qp->push_mode = true;
- qp->push_dropped = false;
-}
-
-void irdma_qp_push_wqe(struct irdma_qp_uk *qp, __le64 *wqe, u16 quanta,
- u32 wqe_idx, bool post_sq)
-{
- __le64 *push;
-
- if (IRDMA_RING_CURRENT_HEAD(qp->initial_ring) !=
- IRDMA_RING_CURRENT_TAIL(qp->sq_ring) &&
- !qp->push_mode) {
- if (post_sq)
- irdma_uk_qp_post_wr(qp);
- } else {
- push = (__le64 *)((uintptr_t)qp->push_wqe +
- (wqe_idx & 0x7) * 0x20);
- memcpy(push, wqe, quanta * IRDMA_QP_WQE_MIN_SIZE);
- irdma_qp_ring_push_db(qp, wqe_idx);
- }
-}
-
-/**
* irdma_qp_get_next_send_wqe - pad with NOP if needed, return where next WR should go
* @qp: hw qp ptr
* @wqe_idx: return wqe index
@@ -192,7 +157,6 @@ __le64 *irdma_qp_get_next_send_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx,
{
__le64 *wqe;
__le64 *wqe_0 = NULL;
- u32 nop_wqe_idx;
u16 avail_quanta;
u16 i;
@@ -209,14 +173,10 @@ __le64 *irdma_qp_get_next_send_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx,
IRDMA_SQ_RING_FREE_QUANTA(qp->sq_ring))
return NULL;
- nop_wqe_idx = IRDMA_RING_CURRENT_HEAD(qp->sq_ring);
for (i = 0; i < avail_quanta; i++) {
irdma_nop_1(qp);
IRDMA_RING_MOVE_HEAD_NOCHECK(qp->sq_ring);
}
- if (qp->push_db && info->push_wqe)
- irdma_qp_push_wqe(qp, qp->sq_base[nop_wqe_idx].elem,
- avail_quanta, nop_wqe_idx, true);
}
*wqe_idx = IRDMA_RING_CURRENT_HEAD(qp->sq_ring);
@@ -282,8 +242,6 @@ int irdma_uk_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info,
bool read_fence = false;
u16 quanta;
- info->push_wqe = qp->push_db ? true : false;
-
op_info = &info->op.rdma_write;
if (op_info->num_lo_sges > qp->max_sq_frag_cnt)
return -EINVAL;
@@ -344,7 +302,6 @@ int irdma_uk_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info,
FIELD_PREP(IRDMAQPSQ_IMMDATAFLAG, info->imm_data_valid) |
FIELD_PREP(IRDMAQPSQ_REPORTRTT, info->report_rtt) |
FIELD_PREP(IRDMAQPSQ_ADDFRAGCNT, addl_frag_cnt) |
- FIELD_PREP(IRDMAQPSQ_PUSHWQE, info->push_wqe) |
FIELD_PREP(IRDMAQPSQ_READFENCE, read_fence) |
FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) |
FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) |
@@ -353,12 +310,9 @@ int irdma_uk_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info,
dma_wmb(); /* make sure WQE is populated before valid bit is set */
set_64bit_val(wqe, 24, hdr);
- if (info->push_wqe) {
- irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq);
- } else {
- if (post_sq)
- irdma_uk_qp_post_wr(qp);
- }
+
+ if (post_sq)
+ irdma_uk_qp_post_wr(qp);
return 0;
}
@@ -383,8 +337,6 @@ int irdma_uk_rdma_read(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info,
u16 quanta;
u64 hdr;
- info->push_wqe = qp->push_db ? true : false;
-
op_info = &info->op.rdma_read;
if (qp->max_sq_frag_cnt < op_info->num_lo_sges)
return -EINVAL;
@@ -431,7 +383,6 @@ int irdma_uk_rdma_read(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info,
FIELD_PREP(IRDMAQPSQ_ADDFRAGCNT, addl_frag_cnt) |
FIELD_PREP(IRDMAQPSQ_OPCODE,
(inv_stag ? IRDMAQP_OP_RDMA_READ_LOC_INV : IRDMAQP_OP_RDMA_READ)) |
- FIELD_PREP(IRDMAQPSQ_PUSHWQE, info->push_wqe) |
FIELD_PREP(IRDMAQPSQ_READFENCE, info->read_fence) |
FIELD_PREP(IRDMAQPSQ_LOCALFENCE, local_fence) |
FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) |
@@ -440,12 +391,9 @@ int irdma_uk_rdma_read(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info,
dma_wmb(); /* make sure WQE is populated before valid bit is set */
set_64bit_val(wqe, 24, hdr);
- if (info->push_wqe) {
- irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq);
- } else {
- if (post_sq)
- irdma_uk_qp_post_wr(qp);
- }
+
+ if (post_sq)
+ irdma_uk_qp_post_wr(qp);
return 0;
}
@@ -468,8 +416,6 @@ int irdma_uk_send(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info,
bool read_fence = false;
u16 quanta;
- info->push_wqe = qp->push_db ? true : false;
-
op_info = &info->op.send;
if (qp->max_sq_frag_cnt < op_info->num_sges)
return -EINVAL;
@@ -530,7 +476,6 @@ int irdma_uk_send(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info,
FIELD_PREP(IRDMAQPSQ_REPORTRTT, (info->report_rtt ? 1 : 0)) |
FIELD_PREP(IRDMAQPSQ_OPCODE, info->op_type) |
FIELD_PREP(IRDMAQPSQ_ADDFRAGCNT, addl_frag_cnt) |
- FIELD_PREP(IRDMAQPSQ_PUSHWQE, info->push_wqe) |
FIELD_PREP(IRDMAQPSQ_READFENCE, read_fence) |
FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) |
FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) |
@@ -541,12 +486,9 @@ int irdma_uk_send(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info,
dma_wmb(); /* make sure WQE is populated before valid bit is set */
set_64bit_val(wqe, 24, hdr);
- if (info->push_wqe) {
- irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq);
- } else {
- if (post_sq)
- irdma_uk_qp_post_wr(qp);
- }
+
+ if (post_sq)
+ irdma_uk_qp_post_wr(qp);
return 0;
}
@@ -720,7 +662,6 @@ int irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp,
u32 i, total_size = 0;
u16 quanta;
- info->push_wqe = qp->push_db ? true : false;
op_info = &info->op.rdma_write;
if (unlikely(qp->max_sq_frag_cnt < op_info->num_lo_sges))
@@ -750,7 +691,6 @@ int irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp,
FIELD_PREP(IRDMAQPSQ_REPORTRTT, info->report_rtt ? 1 : 0) |
FIELD_PREP(IRDMAQPSQ_INLINEDATAFLAG, 1) |
FIELD_PREP(IRDMAQPSQ_IMMDATAFLAG, info->imm_data_valid ? 1 : 0) |
- FIELD_PREP(IRDMAQPSQ_PUSHWQE, info->push_wqe ? 1 : 0) |
FIELD_PREP(IRDMAQPSQ_READFENCE, read_fence) |
FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) |
FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) |
@@ -767,12 +707,8 @@ int irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp,
set_64bit_val(wqe, 24, hdr);
- if (info->push_wqe) {
- irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq);
- } else {
- if (post_sq)
- irdma_uk_qp_post_wr(qp);
- }
+ if (post_sq)
+ irdma_uk_qp_post_wr(qp);
return 0;
}
@@ -794,7 +730,6 @@ int irdma_uk_inline_send(struct irdma_qp_uk *qp,
u32 i, total_size = 0;
u16 quanta;
- info->push_wqe = qp->push_db ? true : false;
op_info = &info->op.send;
if (unlikely(qp->max_sq_frag_cnt < op_info->num_sges))
@@ -827,7 +762,6 @@ int irdma_uk_inline_send(struct irdma_qp_uk *qp,
(info->imm_data_valid ? 1 : 0)) |
FIELD_PREP(IRDMAQPSQ_REPORTRTT, (info->report_rtt ? 1 : 0)) |
FIELD_PREP(IRDMAQPSQ_INLINEDATAFLAG, 1) |
- FIELD_PREP(IRDMAQPSQ_PUSHWQE, info->push_wqe) |
FIELD_PREP(IRDMAQPSQ_READFENCE, read_fence) |
FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) |
FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) |
@@ -845,12 +779,8 @@ int irdma_uk_inline_send(struct irdma_qp_uk *qp,
set_64bit_val(wqe, 24, hdr);
- if (info->push_wqe) {
- irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq);
- } else {
- if (post_sq)
- irdma_uk_qp_post_wr(qp);
- }
+ if (post_sq)
+ irdma_uk_qp_post_wr(qp);
return 0;
}
@@ -872,7 +802,6 @@ int irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp,
bool local_fence = false;
struct ib_sge sge = {};
- info->push_wqe = qp->push_db ? true : false;
op_info = &info->op.inv_local_stag;
local_fence = info->local_fence;
@@ -889,7 +818,6 @@ int irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp,
set_64bit_val(wqe, 16, 0);
hdr = FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMA_OP_TYPE_INV_STAG) |
- FIELD_PREP(IRDMAQPSQ_PUSHWQE, info->push_wqe) |
FIELD_PREP(IRDMAQPSQ_READFENCE, info->read_fence) |
FIELD_PREP(IRDMAQPSQ_LOCALFENCE, local_fence) |
FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) |
@@ -899,13 +827,8 @@ int irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp,
set_64bit_val(wqe, 24, hdr);
- if (info->push_wqe) {
- irdma_qp_push_wqe(qp, wqe, IRDMA_QP_WQE_MIN_QUANTA, wqe_idx,
- post_sq);
- } else {
- if (post_sq)
- irdma_uk_qp_post_wr(qp);
- }
+ if (post_sq)
+ irdma_uk_qp_post_wr(qp);
return 0;
}
@@ -1124,7 +1047,6 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
info->q_type = (u8)FIELD_GET(IRDMA_CQ_SQ, qword3);
info->error = (bool)FIELD_GET(IRDMA_CQ_ERROR, qword3);
- info->push_dropped = (bool)FIELD_GET(IRDMACQ_PSHDROP, qword3);
info->ipv4 = (bool)FIELD_GET(IRDMACQ_IPV4, qword3);
if (info->error) {
info->major_err = FIELD_GET(IRDMA_CQ_MAJERR, qword3);
@@ -1213,11 +1135,6 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
return irdma_uk_cq_poll_cmpl(cq, info);
}
}
- /*cease posting push mode on push drop*/
- if (info->push_dropped) {
- qp->push_mode = false;
- qp->push_dropped = true;
- }
if (info->comp_status != IRDMA_COMPL_STATUS_FLUSHED) {
info->wr_id = qp->sq_wrtrk_array[wqe_idx].wrid;
if (!info->comp_status)
@@ -1349,10 +1266,12 @@ void irdma_get_wqe_shift(struct irdma_uk_attrs *uk_attrs, u32 sge,
int irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, u32 sq_size, u8 shift,
u32 *sqdepth)
{
+ u32 min_size = (u32)uk_attrs->min_hw_wq_size << shift;
+
*sqdepth = irdma_qp_round_up((sq_size << shift) + IRDMA_SQ_RSVD);
- if (*sqdepth < (IRDMA_QP_SW_MIN_WQSIZE << shift))
- *sqdepth = IRDMA_QP_SW_MIN_WQSIZE << shift;
+ if (*sqdepth < min_size)
+ *sqdepth = min_size;
else if (*sqdepth > uk_attrs->max_hw_wq_quanta)
return -EINVAL;
@@ -1369,10 +1288,12 @@ int irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, u32 sq_size, u8 shift,
int irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, u32 rq_size, u8 shift,
u32 *rqdepth)
{
+ u32 min_size = (u32)uk_attrs->min_hw_wq_size << shift;
+
*rqdepth = irdma_qp_round_up((rq_size << shift) + IRDMA_RQ_RSVD);
- if (*rqdepth < (IRDMA_QP_SW_MIN_WQSIZE << shift))
- *rqdepth = IRDMA_QP_SW_MIN_WQSIZE << shift;
+ if (*rqdepth < min_size)
+ *rqdepth = min_size;
else if (*rqdepth > uk_attrs->max_hw_rq_quanta)
return -EINVAL;
@@ -1415,6 +1336,78 @@ static void irdma_setup_connection_wqes(struct irdma_qp_uk *qp,
}
/**
+ * irdma_uk_calc_shift_wq - calculate WQE shift for both SQ and RQ
+ * @ukinfo: qp initialization info
+ * @sq_shift: Returns shift of SQ
+ * @rq_shift: Returns shift of RQ
+ */
+void irdma_uk_calc_shift_wq(struct irdma_qp_uk_init_info *ukinfo, u8 *sq_shift,
+ u8 *rq_shift)
+{
+ bool imm_support = ukinfo->uk_attrs->hw_rev >= IRDMA_GEN_2;
+
+ irdma_get_wqe_shift(ukinfo->uk_attrs,
+ imm_support ? ukinfo->max_sq_frag_cnt + 1 :
+ ukinfo->max_sq_frag_cnt,
+ ukinfo->max_inline_data, sq_shift);
+
+ irdma_get_wqe_shift(ukinfo->uk_attrs, ukinfo->max_rq_frag_cnt, 0,
+ rq_shift);
+
+ if (ukinfo->uk_attrs->hw_rev == IRDMA_GEN_1) {
+ if (ukinfo->abi_ver > 4)
+ *rq_shift = IRDMA_MAX_RQ_WQE_SHIFT_GEN1;
+ }
+}
+
+/**
+ * irdma_uk_calc_depth_shift_sq - calculate depth and shift for SQ size.
+ * @ukinfo: qp initialization info
+ * @sq_depth: Returns depth of SQ
+ * @sq_shift: Returns shift of SQ
+ */
+int irdma_uk_calc_depth_shift_sq(struct irdma_qp_uk_init_info *ukinfo,
+ u32 *sq_depth, u8 *sq_shift)
+{
+ bool imm_support = ukinfo->uk_attrs->hw_rev >= IRDMA_GEN_2;
+ int status;
+
+ irdma_get_wqe_shift(ukinfo->uk_attrs,
+ imm_support ? ukinfo->max_sq_frag_cnt + 1 :
+ ukinfo->max_sq_frag_cnt,
+ ukinfo->max_inline_data, sq_shift);
+ status = irdma_get_sqdepth(ukinfo->uk_attrs, ukinfo->sq_size,
+ *sq_shift, sq_depth);
+
+ return status;
+}
+
+/**
+ * irdma_uk_calc_depth_shift_rq - calculate depth and shift for RQ size.
+ * @ukinfo: qp initialization info
+ * @rq_depth: Returns depth of RQ
+ * @rq_shift: Returns shift of RQ
+ */
+int irdma_uk_calc_depth_shift_rq(struct irdma_qp_uk_init_info *ukinfo,
+ u32 *rq_depth, u8 *rq_shift)
+{
+ int status;
+
+ irdma_get_wqe_shift(ukinfo->uk_attrs, ukinfo->max_rq_frag_cnt, 0,
+ rq_shift);
+
+ if (ukinfo->uk_attrs->hw_rev == IRDMA_GEN_1) {
+ if (ukinfo->abi_ver > 4)
+ *rq_shift = IRDMA_MAX_RQ_WQE_SHIFT_GEN1;
+ }
+
+ status = irdma_get_rqdepth(ukinfo->uk_attrs, ukinfo->rq_size,
+ *rq_shift, rq_depth);
+
+ return status;
+}
+
+/**
* irdma_uk_qp_init - initialize shared qp
* @qp: hw qp (user and kernel)
* @info: qp initialization info
@@ -1428,23 +1421,12 @@ int irdma_uk_qp_init(struct irdma_qp_uk *qp, struct irdma_qp_uk_init_info *info)
{
int ret_code = 0;
u32 sq_ring_size;
- u8 sqshift, rqshift;
qp->uk_attrs = info->uk_attrs;
if (info->max_sq_frag_cnt > qp->uk_attrs->max_hw_wq_frags ||
info->max_rq_frag_cnt > qp->uk_attrs->max_hw_wq_frags)
return -EINVAL;
- irdma_get_wqe_shift(qp->uk_attrs, info->max_rq_frag_cnt, 0, &rqshift);
- if (qp->uk_attrs->hw_rev == IRDMA_GEN_1) {
- irdma_get_wqe_shift(qp->uk_attrs, info->max_sq_frag_cnt,
- info->max_inline_data, &sqshift);
- if (info->abi_ver > 4)
- rqshift = IRDMA_MAX_RQ_WQE_SHIFT_GEN1;
- } else {
- irdma_get_wqe_shift(qp->uk_attrs, info->max_sq_frag_cnt + 1,
- info->max_inline_data, &sqshift);
- }
qp->qp_caps = info->qp_caps;
qp->sq_base = info->sq;
qp->rq_base = info->rq;
@@ -1456,9 +1438,8 @@ int irdma_uk_qp_init(struct irdma_qp_uk *qp, struct irdma_qp_uk_init_info *info)
qp->wqe_alloc_db = info->wqe_alloc_db;
qp->qp_id = info->qp_id;
qp->sq_size = info->sq_size;
- qp->push_mode = false;
qp->max_sq_frag_cnt = info->max_sq_frag_cnt;
- sq_ring_size = qp->sq_size << sqshift;
+ sq_ring_size = qp->sq_size << info->sq_shift;
IRDMA_RING_INIT(qp->sq_ring, sq_ring_size);
IRDMA_RING_INIT(qp->initial_ring, sq_ring_size);
if (info->first_sq_wq) {
@@ -1473,9 +1454,9 @@ int irdma_uk_qp_init(struct irdma_qp_uk *qp, struct irdma_qp_uk_init_info *info)
qp->rq_size = info->rq_size;
qp->max_rq_frag_cnt = info->max_rq_frag_cnt;
qp->max_inline_data = info->max_inline_data;
- qp->rq_wqe_size = rqshift;
+ qp->rq_wqe_size = info->rq_shift;
IRDMA_RING_INIT(qp->rq_ring, qp->rq_size);
- qp->rq_wqe_size_multiplier = 1 << rqshift;
+ qp->rq_wqe_size_multiplier = 1 << info->rq_shift;
if (qp->uk_attrs->hw_rev == IRDMA_GEN_1)
qp->wqe_ops = iw_wqe_uk_ops_gen_1;
else
@@ -1554,7 +1535,6 @@ int irdma_nop(struct irdma_qp_uk *qp, u64 wr_id, bool signaled, bool post_sq)
u32 wqe_idx;
struct irdma_post_sq_info info = {};
- info.push_wqe = false;
info.wr_id = wr_id;
wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, IRDMA_QP_WQE_MIN_QUANTA,
0, &info);
diff --git a/drivers/infiniband/hw/irdma/user.h b/drivers/infiniband/hw/irdma/user.h
index d0cdf609f5e0..380e4a47aede 100644
--- a/drivers/infiniband/hw/irdma/user.h
+++ b/drivers/infiniband/hw/irdma/user.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2015 - 2020 Intel Corporation */
#ifndef IRDMA_USER_H
#define IRDMA_USER_H
@@ -85,6 +85,7 @@ enum irdma_device_caps_const {
IRDMA_Q2_BUF_SIZE = 256,
IRDMA_QP_CTX_SIZE = 256,
IRDMA_MAX_PDS = 262144,
+ IRDMA_MIN_WQ_SIZE_GEN2 = 8,
};
enum irdma_addressing_type {
@@ -215,7 +216,6 @@ struct irdma_post_sq_info {
bool local_fence:1;
bool inline_data:1;
bool imm_data_valid:1;
- bool push_wqe:1;
bool report_rtt:1;
bool udp_hdr:1;
bool defer_flag:1;
@@ -247,7 +247,6 @@ struct irdma_cq_poll_info {
u8 op_type;
u8 q_type;
bool stag_invalid_set:1; /* or L_R_Key set */
- bool push_dropped:1;
bool error:1;
bool solicited_event:1;
bool ipv4:1;
@@ -295,6 +294,12 @@ void irdma_uk_cq_init(struct irdma_cq_uk *cq,
struct irdma_cq_uk_init_info *info);
int irdma_uk_qp_init(struct irdma_qp_uk *qp,
struct irdma_qp_uk_init_info *info);
+void irdma_uk_calc_shift_wq(struct irdma_qp_uk_init_info *ukinfo, u8 *sq_shift,
+ u8 *rq_shift);
+int irdma_uk_calc_depth_shift_sq(struct irdma_qp_uk_init_info *ukinfo,
+ u32 *sq_depth, u8 *sq_shift);
+int irdma_uk_calc_depth_shift_rq(struct irdma_qp_uk_init_info *ukinfo,
+ u32 *rq_depth, u8 *rq_shift);
struct irdma_sq_uk_wr_trk_info {
u64 wrid;
u32 wr_len;
@@ -314,8 +319,6 @@ struct irdma_qp_uk {
struct irdma_sq_uk_wr_trk_info *sq_wrtrk_array;
u64 *rq_wrid_array;
__le64 *shadow_area;
- __le32 *push_db;
- __le64 *push_wqe;
struct irdma_ring sq_ring;
struct irdma_ring rq_ring;
struct irdma_ring initial_ring;
@@ -335,8 +338,6 @@ struct irdma_qp_uk {
u8 rq_wqe_size;
u8 rq_wqe_size_multiplier;
bool deferred_flag:1;
- bool push_mode:1; /* whether the last post wqe was pushed */
- bool push_dropped:1;
bool first_sq_wq:1;
bool sq_flush_complete:1; /* Indicates flush was seen and SQ was empty after the flush */
bool rq_flush_complete:1; /* Indicates flush was seen and RQ was empty after the flush */
@@ -374,8 +375,12 @@ struct irdma_qp_uk_init_info {
u32 max_sq_frag_cnt;
u32 max_rq_frag_cnt;
u32 max_inline_data;
+ u32 sq_depth;
+ u32 rq_depth;
u8 first_sq_wq;
u8 type;
+ u8 sq_shift;
+ u8 rq_shift;
int abi_ver;
bool legacy_mode;
};
@@ -404,7 +409,5 @@ int irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, u32 sq_size, u8 shift,
u32 *wqdepth);
int irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, u32 rq_size, u8 shift,
u32 *wqdepth);
-void irdma_qp_push_wqe(struct irdma_qp_uk *qp, __le64 *wqe, u16 quanta,
- u32 wqe_idx, bool post_sq);
void irdma_clr_wqes(struct irdma_qp_uk *qp, u32 qp_wqe_idx);
#endif /* IRDMA_USER_H */
diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c
index eb083f70b09f..0422787592d8 100644
--- a/drivers/infiniband/hw/irdma/utils.c
+++ b/drivers/infiniband/hw/irdma/utils.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2015 - 2021 Intel Corporation */
#include "main.h"
@@ -760,6 +760,31 @@ void irdma_qp_rem_ref(struct ib_qp *ibqp)
complete(&iwqp->free_qp);
}
+void irdma_cq_add_ref(struct ib_cq *ibcq)
+{
+ struct irdma_cq *iwcq = to_iwcq(ibcq);
+
+ refcount_inc(&iwcq->refcnt);
+}
+
+void irdma_cq_rem_ref(struct ib_cq *ibcq)
+{
+ struct ib_device *ibdev = ibcq->device;
+ struct irdma_device *iwdev = to_iwdev(ibdev);
+ struct irdma_cq *iwcq = to_iwcq(ibcq);
+ unsigned long flags;
+
+ spin_lock_irqsave(&iwdev->rf->cqtable_lock, flags);
+ if (!refcount_dec_and_test(&iwcq->refcnt)) {
+ spin_unlock_irqrestore(&iwdev->rf->cqtable_lock, flags);
+ return;
+ }
+
+ iwdev->rf->cq_table[iwcq->cq_num] = NULL;
+ spin_unlock_irqrestore(&iwdev->rf->cqtable_lock, flags);
+ complete(&iwcq->free_cq);
+}
+
struct ib_device *to_ibdev(struct irdma_sc_dev *dev)
{
return &(container_of(dev, struct irdma_pci_f, sc_dev))->iwdev->ibdev;
@@ -1368,17 +1393,12 @@ int irdma_ieq_check_mpacrc(struct shash_desc *desc, void *addr, u32 len,
u32 val)
{
u32 crc = 0;
- int ret;
- int ret_code = 0;
- crypto_shash_init(desc);
- ret = crypto_shash_update(desc, addr, len);
- if (!ret)
- crypto_shash_final(desc, (u8 *)&crc);
+ crypto_shash_digest(desc, addr, len, (u8 *)&crc);
if (crc != val)
- ret_code = -EINVAL;
+ return -EINVAL;
- return ret_code;
+ return 0;
}
/**
diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c
index 9c4fe4fa9001..eeb932e58730 100644
--- a/drivers/infiniband/hw/irdma/verbs.c
+++ b/drivers/infiniband/hw/irdma/verbs.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2015 - 2021 Intel Corporation */
#include "main.h"
@@ -277,7 +277,7 @@ static int irdma_alloc_ucontext(struct ib_ucontext *uctx,
struct irdma_alloc_ucontext_req req = {};
struct irdma_alloc_ucontext_resp uresp = {};
struct irdma_ucontext *ucontext = to_ucontext(uctx);
- struct irdma_uk_attrs *uk_attrs;
+ struct irdma_uk_attrs *uk_attrs = &iwdev->rf->sc_dev.hw_attrs.uk_attrs;
if (udata->inlen < IRDMA_ALLOC_UCTX_MIN_REQ_LEN ||
udata->outlen < IRDMA_ALLOC_UCTX_MIN_RESP_LEN)
@@ -292,7 +292,9 @@ static int irdma_alloc_ucontext(struct ib_ucontext *uctx,
ucontext->iwdev = iwdev;
ucontext->abi_ver = req.userspace_ver;
- uk_attrs = &iwdev->rf->sc_dev.hw_attrs.uk_attrs;
+ if (req.comp_mask & IRDMA_ALLOC_UCTX_USE_RAW_ATTR)
+ ucontext->use_raw_attrs = true;
+
/* GEN_1 legacy support with libi40iw */
if (udata->outlen == IRDMA_ALLOC_UCTX_MIN_RESP_LEN) {
if (uk_attrs->hw_rev != IRDMA_GEN_1)
@@ -327,6 +329,9 @@ static int irdma_alloc_ucontext(struct ib_ucontext *uctx,
uresp.max_hw_cq_size = uk_attrs->max_hw_cq_size;
uresp.min_hw_cq_size = uk_attrs->min_hw_cq_size;
uresp.hw_rev = uk_attrs->hw_rev;
+ uresp.comp_mask |= IRDMA_ALLOC_UCTX_USE_RAW_ATTR;
+ uresp.min_hw_wq_size = uk_attrs->min_hw_wq_size;
+ uresp.comp_mask |= IRDMA_ALLOC_UCTX_MIN_HW_WQ_SIZE;
if (ib_copy_to_udata(udata, &uresp,
min(sizeof(uresp), udata->outlen))) {
rdma_user_mmap_entry_remove(ucontext->db_mmap_entry);
@@ -567,6 +572,87 @@ static void irdma_setup_virt_qp(struct irdma_device *iwdev,
}
/**
+ * irdma_setup_umode_qp - setup sq and rq size in user mode qp
+ * @udata: udata
+ * @iwdev: iwarp device
+ * @iwqp: qp ptr (user or kernel)
+ * @info: initialize info to return
+ * @init_attr: Initial QP create attributes
+ */
+static int irdma_setup_umode_qp(struct ib_udata *udata,
+ struct irdma_device *iwdev,
+ struct irdma_qp *iwqp,
+ struct irdma_qp_init_info *info,
+ struct ib_qp_init_attr *init_attr)
+{
+ struct irdma_ucontext *ucontext = rdma_udata_to_drv_context(udata,
+ struct irdma_ucontext, ibucontext);
+ struct irdma_qp_uk_init_info *ukinfo = &info->qp_uk_init_info;
+ struct irdma_create_qp_req req;
+ unsigned long flags;
+ int ret;
+
+ ret = ib_copy_from_udata(&req, udata,
+ min(sizeof(req), udata->inlen));
+ if (ret) {
+ ibdev_dbg(&iwdev->ibdev, "VERBS: ib_copy_from_data fail\n");
+ return ret;
+ }
+
+ iwqp->ctx_info.qp_compl_ctx = req.user_compl_ctx;
+ iwqp->user_mode = 1;
+ if (req.user_wqe_bufs) {
+ info->qp_uk_init_info.legacy_mode = ucontext->legacy_mode;
+ spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags);
+ iwqp->iwpbl = irdma_get_pbl((unsigned long)req.user_wqe_bufs,
+ &ucontext->qp_reg_mem_list);
+ spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
+
+ if (!iwqp->iwpbl) {
+ ret = -ENODATA;
+ ibdev_dbg(&iwdev->ibdev, "VERBS: no pbl info\n");
+ return ret;
+ }
+ }
+
+ if (!ucontext->use_raw_attrs) {
+ /**
+ * Maintain backward compat with older ABI which passes sq and
+ * rq depth in quanta in cap.max_send_wr and cap.max_recv_wr.
+ * There is no way to compute the correct value of
+ * iwqp->max_send_wr/max_recv_wr in the kernel.
+ */
+ iwqp->max_send_wr = init_attr->cap.max_send_wr;
+ iwqp->max_recv_wr = init_attr->cap.max_recv_wr;
+ ukinfo->sq_size = init_attr->cap.max_send_wr;
+ ukinfo->rq_size = init_attr->cap.max_recv_wr;
+ irdma_uk_calc_shift_wq(ukinfo, &ukinfo->sq_shift,
+ &ukinfo->rq_shift);
+ } else {
+ ret = irdma_uk_calc_depth_shift_sq(ukinfo, &ukinfo->sq_depth,
+ &ukinfo->sq_shift);
+ if (ret)
+ return ret;
+
+ ret = irdma_uk_calc_depth_shift_rq(ukinfo, &ukinfo->rq_depth,
+ &ukinfo->rq_shift);
+ if (ret)
+ return ret;
+
+ iwqp->max_send_wr =
+ (ukinfo->sq_depth - IRDMA_SQ_RSVD) >> ukinfo->sq_shift;
+ iwqp->max_recv_wr =
+ (ukinfo->rq_depth - IRDMA_RQ_RSVD) >> ukinfo->rq_shift;
+ ukinfo->sq_size = ukinfo->sq_depth >> ukinfo->sq_shift;
+ ukinfo->rq_size = ukinfo->rq_depth >> ukinfo->rq_shift;
+ }
+
+ irdma_setup_virt_qp(iwdev, iwqp, info);
+
+ return 0;
+}
+
+/**
* irdma_setup_kmode_qp - setup initialization for kernel mode qp
* @iwdev: iwarp device
* @iwqp: qp ptr (user or kernel)
@@ -579,40 +665,28 @@ static int irdma_setup_kmode_qp(struct irdma_device *iwdev,
struct ib_qp_init_attr *init_attr)
{
struct irdma_dma_mem *mem = &iwqp->kqp.dma_mem;
- u32 sqdepth, rqdepth;
- u8 sqshift, rqshift;
u32 size;
int status;
struct irdma_qp_uk_init_info *ukinfo = &info->qp_uk_init_info;
- struct irdma_uk_attrs *uk_attrs = &iwdev->rf->sc_dev.hw_attrs.uk_attrs;
- irdma_get_wqe_shift(uk_attrs,
- uk_attrs->hw_rev >= IRDMA_GEN_2 ? ukinfo->max_sq_frag_cnt + 1 :
- ukinfo->max_sq_frag_cnt,
- ukinfo->max_inline_data, &sqshift);
- status = irdma_get_sqdepth(uk_attrs, ukinfo->sq_size, sqshift,
- &sqdepth);
+ status = irdma_uk_calc_depth_shift_sq(ukinfo, &ukinfo->sq_depth,
+ &ukinfo->sq_shift);
if (status)
return status;
- if (uk_attrs->hw_rev == IRDMA_GEN_1)
- rqshift = IRDMA_MAX_RQ_WQE_SHIFT_GEN1;
- else
- irdma_get_wqe_shift(uk_attrs, ukinfo->max_rq_frag_cnt, 0,
- &rqshift);
-
- status = irdma_get_rqdepth(uk_attrs, ukinfo->rq_size, rqshift,
- &rqdepth);
+ status = irdma_uk_calc_depth_shift_rq(ukinfo, &ukinfo->rq_depth,
+ &ukinfo->rq_shift);
if (status)
return status;
iwqp->kqp.sq_wrid_mem =
- kcalloc(sqdepth, sizeof(*iwqp->kqp.sq_wrid_mem), GFP_KERNEL);
+ kcalloc(ukinfo->sq_depth, sizeof(*iwqp->kqp.sq_wrid_mem), GFP_KERNEL);
if (!iwqp->kqp.sq_wrid_mem)
return -ENOMEM;
iwqp->kqp.rq_wrid_mem =
- kcalloc(rqdepth, sizeof(*iwqp->kqp.rq_wrid_mem), GFP_KERNEL);
+ kcalloc(ukinfo->rq_depth, sizeof(*iwqp->kqp.rq_wrid_mem), GFP_KERNEL);
+
if (!iwqp->kqp.rq_wrid_mem) {
kfree(iwqp->kqp.sq_wrid_mem);
iwqp->kqp.sq_wrid_mem = NULL;
@@ -622,7 +696,7 @@ static int irdma_setup_kmode_qp(struct irdma_device *iwdev,
ukinfo->sq_wrtrk_array = iwqp->kqp.sq_wrid_mem;
ukinfo->rq_wrid_array = iwqp->kqp.rq_wrid_mem;
- size = (sqdepth + rqdepth) * IRDMA_QP_WQE_MIN_SIZE;
+ size = (ukinfo->sq_depth + ukinfo->rq_depth) * IRDMA_QP_WQE_MIN_SIZE;
size += (IRDMA_SHADOW_AREA_SIZE << 3);
mem->size = ALIGN(size, 256);
@@ -638,16 +712,18 @@ static int irdma_setup_kmode_qp(struct irdma_device *iwdev,
ukinfo->sq = mem->va;
info->sq_pa = mem->pa;
- ukinfo->rq = &ukinfo->sq[sqdepth];
- info->rq_pa = info->sq_pa + (sqdepth * IRDMA_QP_WQE_MIN_SIZE);
- ukinfo->shadow_area = ukinfo->rq[rqdepth].elem;
- info->shadow_area_pa = info->rq_pa + (rqdepth * IRDMA_QP_WQE_MIN_SIZE);
- ukinfo->sq_size = sqdepth >> sqshift;
- ukinfo->rq_size = rqdepth >> rqshift;
- ukinfo->qp_id = iwqp->ibqp.qp_num;
-
- init_attr->cap.max_send_wr = (sqdepth - IRDMA_SQ_RSVD) >> sqshift;
- init_attr->cap.max_recv_wr = (rqdepth - IRDMA_RQ_RSVD) >> rqshift;
+ ukinfo->rq = &ukinfo->sq[ukinfo->sq_depth];
+ info->rq_pa = info->sq_pa + (ukinfo->sq_depth * IRDMA_QP_WQE_MIN_SIZE);
+ ukinfo->shadow_area = ukinfo->rq[ukinfo->rq_depth].elem;
+ info->shadow_area_pa =
+ info->rq_pa + (ukinfo->rq_depth * IRDMA_QP_WQE_MIN_SIZE);
+ ukinfo->sq_size = ukinfo->sq_depth >> ukinfo->sq_shift;
+ ukinfo->rq_size = ukinfo->rq_depth >> ukinfo->rq_shift;
+
+ iwqp->max_send_wr = (ukinfo->sq_depth - IRDMA_SQ_RSVD) >> ukinfo->sq_shift;
+ iwqp->max_recv_wr = (ukinfo->rq_depth - IRDMA_RQ_RSVD) >> ukinfo->rq_shift;
+ init_attr->cap.max_send_wr = iwqp->max_send_wr;
+ init_attr->cap.max_recv_wr = iwqp->max_recv_wr;
return 0;
}
@@ -762,7 +838,9 @@ static int irdma_validate_qp_attrs(struct ib_qp_init_attr *init_attr,
if (init_attr->cap.max_inline_data > uk_attrs->max_hw_inline ||
init_attr->cap.max_send_sge > uk_attrs->max_hw_wq_frags ||
- init_attr->cap.max_recv_sge > uk_attrs->max_hw_wq_frags)
+ init_attr->cap.max_recv_sge > uk_attrs->max_hw_wq_frags ||
+ init_attr->cap.max_send_wr > uk_attrs->max_hw_wq_quanta ||
+ init_attr->cap.max_recv_wr > uk_attrs->max_hw_rq_quanta)
return -EINVAL;
if (rdma_protocol_roce(&iwdev->ibdev, 1)) {
@@ -803,18 +881,14 @@ static int irdma_create_qp(struct ib_qp *ibqp,
struct irdma_device *iwdev = to_iwdev(ibpd->device);
struct irdma_pci_f *rf = iwdev->rf;
struct irdma_qp *iwqp = to_iwqp(ibqp);
- struct irdma_create_qp_req req = {};
struct irdma_create_qp_resp uresp = {};
u32 qp_num = 0;
int err_code;
- int sq_size;
- int rq_size;
struct irdma_sc_qp *qp;
struct irdma_sc_dev *dev = &rf->sc_dev;
struct irdma_uk_attrs *uk_attrs = &dev->hw_attrs.uk_attrs;
struct irdma_qp_init_info init_info = {};
struct irdma_qp_host_ctx_info *ctx_info;
- unsigned long flags;
err_code = irdma_validate_qp_attrs(init_attr, iwdev);
if (err_code)
@@ -824,13 +898,10 @@ static int irdma_create_qp(struct ib_qp *ibqp,
udata->outlen < IRDMA_CREATE_QP_MIN_RESP_LEN))
return -EINVAL;
- sq_size = init_attr->cap.max_send_wr;
- rq_size = init_attr->cap.max_recv_wr;
-
init_info.vsi = &iwdev->vsi;
init_info.qp_uk_init_info.uk_attrs = uk_attrs;
- init_info.qp_uk_init_info.sq_size = sq_size;
- init_info.qp_uk_init_info.rq_size = rq_size;
+ init_info.qp_uk_init_info.sq_size = init_attr->cap.max_send_wr;
+ init_info.qp_uk_init_info.rq_size = init_attr->cap.max_recv_wr;
init_info.qp_uk_init_info.max_sq_frag_cnt = init_attr->cap.max_send_sge;
init_info.qp_uk_init_info.max_rq_frag_cnt = init_attr->cap.max_recv_sge;
init_info.qp_uk_init_info.max_inline_data = init_attr->cap.max_inline_data;
@@ -872,7 +943,7 @@ static int irdma_create_qp(struct ib_qp *ibqp,
iwqp->host_ctx.size = IRDMA_QP_CTX_SIZE;
init_info.pd = &iwpd->sc_pd;
- init_info.qp_uk_init_info.qp_id = iwqp->ibqp.qp_num;
+ init_info.qp_uk_init_info.qp_id = qp_num;
if (!rdma_protocol_roce(&iwdev->ibdev, 1))
init_info.qp_uk_init_info.first_sq_wq = 1;
iwqp->ctx_info.qp_compl_ctx = (uintptr_t)qp;
@@ -880,36 +951,9 @@ static int irdma_create_qp(struct ib_qp *ibqp,
init_waitqueue_head(&iwqp->mod_qp_waitq);
if (udata) {
- err_code = ib_copy_from_udata(&req, udata,
- min(sizeof(req), udata->inlen));
- if (err_code) {
- ibdev_dbg(&iwdev->ibdev,
- "VERBS: ib_copy_from_data fail\n");
- goto error;
- }
-
- iwqp->ctx_info.qp_compl_ctx = req.user_compl_ctx;
- iwqp->user_mode = 1;
- if (req.user_wqe_bufs) {
- struct irdma_ucontext *ucontext =
- rdma_udata_to_drv_context(udata,
- struct irdma_ucontext,
- ibucontext);
-
- init_info.qp_uk_init_info.legacy_mode = ucontext->legacy_mode;
- spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags);
- iwqp->iwpbl = irdma_get_pbl((unsigned long)req.user_wqe_bufs,
- &ucontext->qp_reg_mem_list);
- spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
-
- if (!iwqp->iwpbl) {
- err_code = -ENODATA;
- ibdev_dbg(&iwdev->ibdev, "VERBS: no pbl info\n");
- goto error;
- }
- }
init_info.qp_uk_init_info.abi_ver = iwpd->sc_pd.abi_ver;
- irdma_setup_virt_qp(iwdev, iwqp, &init_info);
+ err_code = irdma_setup_umode_qp(udata, iwdev, iwqp, &init_info,
+ init_attr);
} else {
INIT_DELAYED_WORK(&iwqp->dwork_flush, irdma_flush_worker);
init_info.qp_uk_init_info.abi_ver = IRDMA_ABI_VER;
@@ -962,10 +1006,8 @@ static int irdma_create_qp(struct ib_qp *ibqp,
refcount_set(&iwqp->refcnt, 1);
spin_lock_init(&iwqp->lock);
spin_lock_init(&iwqp->sc_qp.pfpdu.lock);
- iwqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? 1 : 0;
+ iwqp->sig_all = init_attr->sq_sig_type == IB_SIGNAL_ALL_WR;
rf->qp_table[qp_num] = iwqp;
- iwqp->max_send_wr = sq_size;
- iwqp->max_recv_wr = rq_size;
if (rdma_protocol_roce(&iwdev->ibdev, 1)) {
if (dev->ws_add(&iwdev->vsi, 0)) {
@@ -986,8 +1028,8 @@ static int irdma_create_qp(struct ib_qp *ibqp,
if (rdma_protocol_iwarp(&iwdev->ibdev, 1))
uresp.lsmm = 1;
}
- uresp.actual_sq_size = sq_size;
- uresp.actual_rq_size = rq_size;
+ uresp.actual_sq_size = init_info.qp_uk_init_info.sq_size;
+ uresp.actual_rq_size = init_info.qp_uk_init_info.rq_size;
uresp.qp_id = qp_num;
uresp.qp_caps = qp->qp_uk.qp_caps;
@@ -1098,6 +1140,39 @@ static int irdma_query_pkey(struct ib_device *ibdev, u32 port, u16 index,
return 0;
}
+static u8 irdma_roce_get_vlan_prio(const struct ib_gid_attr *attr, u8 prio)
+{
+ struct net_device *ndev;
+
+ rcu_read_lock();
+ ndev = rcu_dereference(attr->ndev);
+ if (!ndev)
+ goto exit;
+ if (is_vlan_dev(ndev)) {
+ u16 vlan_qos = vlan_dev_get_egress_qos_mask(ndev, prio);
+
+ prio = (vlan_qos & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
+ }
+exit:
+ rcu_read_unlock();
+ return prio;
+}
+
+static int irdma_wait_for_suspend(struct irdma_qp *iwqp)
+{
+ if (!wait_event_timeout(iwqp->iwdev->suspend_wq,
+ !iwqp->suspend_pending,
+ msecs_to_jiffies(IRDMA_EVENT_TIMEOUT_MS))) {
+ iwqp->suspend_pending = false;
+ ibdev_warn(&iwqp->iwdev->ibdev,
+ "modify_qp timed out waiting for suspend. qp_id = %d, last_ae = 0x%x\n",
+ iwqp->ibqp.qp_num, iwqp->last_aeq);
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
/**
* irdma_modify_qp_roce - modify qp request
* @ibqp: qp's pointer for modify
@@ -1174,7 +1249,8 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr,
if (attr_mask & IB_QP_AV) {
struct irdma_av *av = &iwqp->roce_ah.av;
- const struct ib_gid_attr *sgid_attr;
+ const struct ib_gid_attr *sgid_attr =
+ attr->ah_attr.grh.sgid_attr;
u16 vlan_id = VLAN_N_VID;
u32 local_ip[4];
@@ -1189,17 +1265,22 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr,
roce_info->dest_qp);
irdma_qp_rem_qos(&iwqp->sc_qp);
dev->ws_remove(iwqp->sc_qp.vsi, ctx_info->user_pri);
- ctx_info->user_pri = rt_tos2priority(udp_info->tos);
- iwqp->sc_qp.user_pri = ctx_info->user_pri;
- if (dev->ws_add(iwqp->sc_qp.vsi, ctx_info->user_pri))
- return -ENOMEM;
- irdma_qp_add_qos(&iwqp->sc_qp);
+ if (iwqp->sc_qp.vsi->dscp_mode)
+ ctx_info->user_pri =
+ iwqp->sc_qp.vsi->dscp_map[irdma_tos2dscp(udp_info->tos)];
+ else
+ ctx_info->user_pri = rt_tos2priority(udp_info->tos);
}
- sgid_attr = attr->ah_attr.grh.sgid_attr;
ret = rdma_read_gid_l2_fields(sgid_attr, &vlan_id,
ctx_info->roce_info->mac_addr);
if (ret)
return ret;
+ ctx_info->user_pri = irdma_roce_get_vlan_prio(sgid_attr,
+ ctx_info->user_pri);
+ if (dev->ws_add(iwqp->sc_qp.vsi, ctx_info->user_pri))
+ return -ENOMEM;
+ iwqp->sc_qp.user_pri = ctx_info->user_pri;
+ irdma_qp_add_qos(&iwqp->sc_qp);
if (vlan_id >= VLAN_N_VID && iwdev->dcb_vlan_mode)
vlan_id = 0;
@@ -1266,7 +1347,7 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr,
if (attr->max_dest_rd_atomic > dev->hw_attrs.max_hw_ird) {
ibdev_err(&iwdev->ibdev,
"rd_atomic = %d, above max_hw_ird=%d\n",
- attr->max_rd_atomic,
+ attr->max_dest_rd_atomic,
dev->hw_attrs.max_hw_ird);
return -EINVAL;
}
@@ -1355,17 +1436,11 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr,
info.next_iwarp_state = IRDMA_QP_STATE_SQD;
issue_modify_qp = 1;
+ iwqp->suspend_pending = true;
break;
case IB_QPS_SQE:
case IB_QPS_ERR:
case IB_QPS_RESET:
- if (iwqp->iwarp_state == IRDMA_QP_STATE_RTS) {
- spin_unlock_irqrestore(&iwqp->lock, flags);
- info.next_iwarp_state = IRDMA_QP_STATE_SQD;
- irdma_hw_modify_qp(iwdev, iwqp, &info, true);
- spin_lock_irqsave(&iwqp->lock, flags);
- }
-
if (iwqp->iwarp_state == IRDMA_QP_STATE_ERROR) {
spin_unlock_irqrestore(&iwqp->lock, flags);
if (udata && udata->inlen) {
@@ -1402,6 +1477,11 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr,
ctx_info->rem_endpoint_idx = udp_info->arp_idx;
if (irdma_hw_modify_qp(iwdev, iwqp, &info, true))
return -EINVAL;
+ if (info.next_iwarp_state == IRDMA_QP_STATE_SQD) {
+ ret = irdma_wait_for_suspend(iwqp);
+ if (ret)
+ return ret;
+ }
spin_lock_irqsave(&iwqp->lock, flags);
if (iwqp->iwarp_state == info.curr_iwarp_state) {
iwqp->iwarp_state = info.next_iwarp_state;
@@ -1781,6 +1861,9 @@ static int irdma_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
irdma_process_resize_list(iwcq, iwdev, NULL);
spin_unlock_irqrestore(&iwcq->lock, flags);
+ irdma_cq_rem_ref(ib_cq);
+ wait_for_completion(&iwcq->free_cq);
+
irdma_cq_wq_destroy(iwdev->rf, cq);
spin_lock_irqsave(&iwceq->ce_lock, flags);
@@ -1952,14 +2035,15 @@ static inline int cq_validate_flags(u32 flags, u8 hw_rev)
* irdma_create_cq - create cq
* @ibcq: CQ allocated
* @attr: attributes for cq
- * @udata: user data
+ * @attrs: uverbs attribute bundle
*/
static int irdma_create_cq(struct ib_cq *ibcq,
const struct ib_cq_init_attr *attr,
- struct ib_udata *udata)
+ struct uverbs_attr_bundle *attrs)
{
#define IRDMA_CREATE_CQ_MIN_REQ_LEN offsetofend(struct irdma_create_cq_req, user_cq_buf)
#define IRDMA_CREATE_CQ_MIN_RESP_LEN offsetofend(struct irdma_create_cq_resp, cq_size)
+ struct ib_udata *udata = &attrs->driver_udata;
struct ib_device *ibdev = ibcq->device;
struct irdma_device *iwdev = to_iwdev(ibdev);
struct irdma_pci_f *rf = iwdev->rf;
@@ -1990,6 +2074,7 @@ static int irdma_create_cq(struct ib_cq *ibcq,
cq = &iwcq->sc_cq;
cq->back_cq = iwcq;
+ refcount_set(&iwcq->refcnt, 1);
spin_lock_init(&iwcq->lock);
INIT_LIST_HEAD(&iwcq->resize_list);
INIT_LIST_HEAD(&iwcq->cmpl_generated);
@@ -2101,9 +2186,8 @@ static int irdma_create_cq(struct ib_cq *ibcq,
info.cq_base_pa = iwcq->kmem.pa;
}
- if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2)
- info.shadow_read_threshold = min(info.cq_uk_init_info.cq_size / 2,
- (u32)IRDMA_MAX_CQ_READ_THRESH);
+ info.shadow_read_threshold = min(info.cq_uk_init_info.cq_size / 2,
+ (u32)IRDMA_MAX_CQ_READ_THRESH);
if (irdma_sc_cq_init(cq, &info)) {
ibdev_dbg(&iwdev->ibdev, "VERBS: init cq fail\n");
@@ -2141,6 +2225,9 @@ static int irdma_create_cq(struct ib_cq *ibcq,
goto cq_destroy;
}
}
+ rf->cq_table[cq_num] = iwcq;
+ init_completion(&iwcq->free_cq);
+
return 0;
cq_destroy:
irdma_cq_wq_destroy(rf, cq);
@@ -2552,7 +2639,8 @@ static int irdma_hw_alloc_stag(struct irdma_device *iwdev,
struct irdma_mr *iwmr)
{
struct irdma_allocate_stag_info *info;
- struct irdma_pd *iwpd = to_iwpd(iwmr->ibmr.pd);
+ struct ib_pd *pd = iwmr->ibmr.pd;
+ struct irdma_pd *iwpd = to_iwpd(pd);
int status;
struct irdma_cqp_request *cqp_request;
struct cqp_cmds_info *cqp_info;
@@ -2568,6 +2656,7 @@ static int irdma_hw_alloc_stag(struct irdma_device *iwdev,
info->stag_idx = iwmr->stag >> IRDMA_CQPSQ_STAG_IDX_S;
info->pd_id = iwpd->sc_pd.pd_id;
info->total_len = iwmr->len;
+ info->all_memory = pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY;
info->remote_access = true;
cqp_info->cqp_cmd = IRDMA_OP_ALLOC_STAG;
cqp_info->post_sq = 1;
@@ -2575,8 +2664,11 @@ static int irdma_hw_alloc_stag(struct irdma_device *iwdev,
cqp_info->in.u.alloc_stag.scratch = (uintptr_t)cqp_request;
status = irdma_handle_cqp_op(iwdev->rf, cqp_request);
irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request);
+ if (status)
+ return status;
- return status;
+ iwmr->is_hwreg = 1;
+ return 0;
}
/**
@@ -2615,6 +2707,8 @@ static struct ib_mr *irdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
iwmr->type = IRDMA_MEMREG_TYPE_MEM;
palloc = &iwpbl->pble_alloc;
iwmr->page_cnt = max_num_sg;
+ /* Use system PAGE_SIZE as the sg page sizes are unknown at this point */
+ iwmr->len = max_num_sg * PAGE_SIZE;
err_code = irdma_get_pble(iwdev->rf->pble_rsrc, palloc, iwmr->page_cnt,
false);
if (err_code)
@@ -2694,7 +2788,8 @@ static int irdma_hwreg_mr(struct irdma_device *iwdev, struct irdma_mr *iwmr,
{
struct irdma_pbl *iwpbl = &iwmr->iwpbl;
struct irdma_reg_ns_stag_info *stag_info;
- struct irdma_pd *iwpd = to_iwpd(iwmr->ibmr.pd);
+ struct ib_pd *pd = iwmr->ibmr.pd;
+ struct irdma_pd *iwpd = to_iwpd(pd);
struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc;
struct irdma_cqp_request *cqp_request;
struct cqp_cmds_info *cqp_info;
@@ -2713,6 +2808,7 @@ static int irdma_hwreg_mr(struct irdma_device *iwdev, struct irdma_mr *iwmr,
stag_info->total_len = iwmr->len;
stag_info->access_rights = irdma_get_mr_access(access);
stag_info->pd_id = iwpd->sc_pd.pd_id;
+ stag_info->all_memory = pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY;
if (stag_info->access_rights & IRDMA_ACCESS_FLAGS_ZERO_BASED)
stag_info->addr_type = IRDMA_ADDR_TYPE_ZERO_BASED;
else
@@ -2738,14 +2834,18 @@ static int irdma_hwreg_mr(struct irdma_device *iwdev, struct irdma_mr *iwmr,
ret = irdma_handle_cqp_op(iwdev->rf, cqp_request);
irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request);
+ if (!ret)
+ iwmr->is_hwreg = 1;
+
return ret;
}
-static int irdma_reg_user_mr_type_mem(struct irdma_mr *iwmr, int access)
+static int irdma_reg_user_mr_type_mem(struct irdma_mr *iwmr, int access,
+ bool create_stag)
{
struct irdma_device *iwdev = to_iwdev(iwmr->ibmr.device);
struct irdma_pbl *iwpbl = &iwmr->iwpbl;
- u32 stag;
+ u32 stag = 0;
u8 lvl;
int err;
@@ -2764,15 +2864,18 @@ static int irdma_reg_user_mr_type_mem(struct irdma_mr *iwmr, int access)
}
}
- stag = irdma_create_stag(iwdev);
- if (!stag) {
- err = -ENOMEM;
- goto free_pble;
+ if (create_stag) {
+ stag = irdma_create_stag(iwdev);
+ if (!stag) {
+ err = -ENOMEM;
+ goto free_pble;
+ }
+
+ iwmr->stag = stag;
+ iwmr->ibmr.rkey = stag;
+ iwmr->ibmr.lkey = stag;
}
- iwmr->stag = stag;
- iwmr->ibmr.rkey = stag;
- iwmr->ibmr.lkey = stag;
err = irdma_hwreg_mr(iwdev, iwmr, access);
if (err)
goto err_hwreg;
@@ -2780,7 +2883,8 @@ static int irdma_reg_user_mr_type_mem(struct irdma_mr *iwmr, int access)
return 0;
err_hwreg:
- irdma_free_stag(iwdev, stag);
+ if (stag)
+ irdma_free_stag(iwdev, stag);
free_pble:
if (iwpbl->pble_alloc.level != PBLE_LEVEL_0 && iwpbl->pbl_allocated)
@@ -2794,8 +2898,8 @@ static struct irdma_mr *irdma_alloc_iwmr(struct ib_umem *region,
enum irdma_memreg_type reg_type)
{
struct irdma_device *iwdev = to_iwdev(pd->device);
- struct irdma_pbl *iwpbl = NULL;
- struct irdma_mr *iwmr = NULL;
+ struct irdma_pbl *iwpbl;
+ struct irdma_mr *iwmr;
unsigned long pgsz_bitmap;
iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL);
@@ -2811,7 +2915,7 @@ static struct irdma_mr *irdma_alloc_iwmr(struct ib_umem *region,
iwmr->type = reg_type;
pgsz_bitmap = (reg_type == IRDMA_MEMREG_TYPE_MEM) ?
- iwdev->rf->sc_dev.hw_attrs.page_size_cap : PAGE_SIZE;
+ iwdev->rf->sc_dev.hw_attrs.page_size_cap : SZ_4K;
iwmr->page_size = ib_umem_find_best_pgsz(region, pgsz_bitmap, virt);
if (unlikely(!iwmr->page_size)) {
@@ -2843,6 +2947,11 @@ static int irdma_reg_user_mr_type_qp(struct irdma_mem_reg_req req,
int err;
u8 lvl;
+ /* iWarp: Catch page not starting on OS page boundary */
+ if (!rdma_protocol_roce(&iwdev->ibdev, 1) &&
+ ib_umem_offset(iwmr->region))
+ return -EINVAL;
+
total = req.sq_pages + req.rq_pages + 1;
if (total > iwmr->page_cnt)
return -EINVAL;
@@ -2955,7 +3064,7 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
goto error;
break;
case IRDMA_MEMREG_TYPE_MEM:
- err = irdma_reg_user_mr_type_mem(iwmr, access);
+ err = irdma_reg_user_mr_type_mem(iwmr, access, true);
if (err)
goto error;
@@ -2976,7 +3085,7 @@ error:
static struct ib_mr *irdma_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start,
u64 len, u64 virt,
int fd, int access,
- struct ib_udata *udata)
+ struct uverbs_attr_bundle *attrs)
{
struct irdma_device *iwdev = to_iwdev(pd->device);
struct ib_umem_dmabuf *umem_dmabuf;
@@ -2999,7 +3108,7 @@ static struct ib_mr *irdma_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start,
goto err_release;
}
- err = irdma_reg_user_mr_type_mem(iwmr, access);
+ err = irdma_reg_user_mr_type_mem(iwmr, access, true);
if (err)
goto err_iwmr;
@@ -3014,6 +3123,161 @@ err_release:
return ERR_PTR(err);
}
+static int irdma_hwdereg_mr(struct ib_mr *ib_mr)
+{
+ struct irdma_device *iwdev = to_iwdev(ib_mr->device);
+ struct irdma_mr *iwmr = to_iwmr(ib_mr);
+ struct irdma_pd *iwpd = to_iwpd(ib_mr->pd);
+ struct irdma_dealloc_stag_info *info;
+ struct irdma_pbl *iwpbl = &iwmr->iwpbl;
+ struct irdma_cqp_request *cqp_request;
+ struct cqp_cmds_info *cqp_info;
+ int status;
+
+ /* Skip HW MR de-register when it is already de-registered
+ * during an MR re-reregister and the re-registration fails
+ */
+ if (!iwmr->is_hwreg)
+ return 0;
+
+ cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, true);
+ if (!cqp_request)
+ return -ENOMEM;
+
+ cqp_info = &cqp_request->info;
+ info = &cqp_info->in.u.dealloc_stag.info;
+ memset(info, 0, sizeof(*info));
+ info->pd_id = iwpd->sc_pd.pd_id;
+ info->stag_idx = ib_mr->rkey >> IRDMA_CQPSQ_STAG_IDX_S;
+ info->mr = true;
+ if (iwpbl->pbl_allocated)
+ info->dealloc_pbl = true;
+
+ cqp_info->cqp_cmd = IRDMA_OP_DEALLOC_STAG;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.dealloc_stag.dev = &iwdev->rf->sc_dev;
+ cqp_info->in.u.dealloc_stag.scratch = (uintptr_t)cqp_request;
+ status = irdma_handle_cqp_op(iwdev->rf, cqp_request);
+ irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request);
+ if (status)
+ return status;
+
+ iwmr->is_hwreg = 0;
+ return 0;
+}
+
+/*
+ * irdma_rereg_mr_trans - Re-register a user MR for a change translation.
+ * @iwmr: ptr of iwmr
+ * @start: virtual start address
+ * @len: length of mr
+ * @virt: virtual address
+ *
+ * Re-register a user memory region when a change translation is requested.
+ * Re-register a new region while reusing the stag from the original registration.
+ */
+static int irdma_rereg_mr_trans(struct irdma_mr *iwmr, u64 start, u64 len,
+ u64 virt)
+{
+ struct irdma_device *iwdev = to_iwdev(iwmr->ibmr.device);
+ struct irdma_pbl *iwpbl = &iwmr->iwpbl;
+ struct ib_pd *pd = iwmr->ibmr.pd;
+ struct ib_umem *region;
+ int err;
+
+ region = ib_umem_get(pd->device, start, len, iwmr->access);
+ if (IS_ERR(region))
+ return PTR_ERR(region);
+
+ iwmr->region = region;
+ iwmr->ibmr.iova = virt;
+ iwmr->ibmr.pd = pd;
+ iwmr->page_size = ib_umem_find_best_pgsz(region,
+ iwdev->rf->sc_dev.hw_attrs.page_size_cap,
+ virt);
+ if (unlikely(!iwmr->page_size)) {
+ err = -EOPNOTSUPP;
+ goto err;
+ }
+
+ iwmr->len = region->length;
+ iwpbl->user_base = virt;
+ iwmr->page_cnt = ib_umem_num_dma_blocks(region, iwmr->page_size);
+
+ err = irdma_reg_user_mr_type_mem(iwmr, iwmr->access, false);
+ if (err)
+ goto err;
+
+ return 0;
+
+err:
+ ib_umem_release(region);
+ return err;
+}
+
+/*
+ * irdma_rereg_user_mr - Re-Register a user memory region(MR)
+ * @ibmr: ib mem to access iwarp mr pointer
+ * @flags: bit mask to indicate which of the attr's of MR modified
+ * @start: virtual start address
+ * @len: length of mr
+ * @virt: virtual address
+ * @new_access: bit mask of access flags
+ * @new_pd: ptr of pd
+ * @udata: user data
+ *
+ * Return:
+ * NULL - Success, existing MR updated
+ * ERR_PTR - error occurred
+ */
+static struct ib_mr *irdma_rereg_user_mr(struct ib_mr *ib_mr, int flags,
+ u64 start, u64 len, u64 virt,
+ int new_access, struct ib_pd *new_pd,
+ struct ib_udata *udata)
+{
+ struct irdma_device *iwdev = to_iwdev(ib_mr->device);
+ struct irdma_mr *iwmr = to_iwmr(ib_mr);
+ struct irdma_pbl *iwpbl = &iwmr->iwpbl;
+ int ret;
+
+ if (len > iwdev->rf->sc_dev.hw_attrs.max_mr_size)
+ return ERR_PTR(-EINVAL);
+
+ if (flags & ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ ret = irdma_hwdereg_mr(ib_mr);
+ if (ret)
+ return ERR_PTR(ret);
+
+ if (flags & IB_MR_REREG_ACCESS)
+ iwmr->access = new_access;
+
+ if (flags & IB_MR_REREG_PD) {
+ iwmr->ibmr.pd = new_pd;
+ iwmr->ibmr.device = new_pd->device;
+ }
+
+ if (flags & IB_MR_REREG_TRANS) {
+ if (iwpbl->pbl_allocated) {
+ irdma_free_pble(iwdev->rf->pble_rsrc,
+ &iwpbl->pble_alloc);
+ iwpbl->pbl_allocated = false;
+ }
+ if (iwmr->region) {
+ ib_umem_release(iwmr->region);
+ iwmr->region = NULL;
+ }
+
+ ret = irdma_rereg_mr_trans(iwmr, start, len, virt);
+ } else
+ ret = irdma_hwreg_mr(iwdev, iwmr, iwmr->access);
+ if (ret)
+ return ERR_PTR(ret);
+
+ return NULL;
+}
+
/**
* irdma_reg_phys_mr - register kernel physical memory
* @pd: ibpd pointer
@@ -3121,16 +3385,10 @@ static void irdma_del_memlist(struct irdma_mr *iwmr,
*/
static int irdma_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
{
- struct ib_pd *ibpd = ib_mr->pd;
- struct irdma_pd *iwpd = to_iwpd(ibpd);
struct irdma_mr *iwmr = to_iwmr(ib_mr);
struct irdma_device *iwdev = to_iwdev(ib_mr->device);
- struct irdma_dealloc_stag_info *info;
struct irdma_pbl *iwpbl = &iwmr->iwpbl;
- struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc;
- struct irdma_cqp_request *cqp_request;
- struct cqp_cmds_info *cqp_info;
- int status;
+ int ret;
if (iwmr->type != IRDMA_MEMREG_TYPE_MEM) {
if (iwmr->region) {
@@ -3144,33 +3402,18 @@ static int irdma_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
goto done;
}
- cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, true);
- if (!cqp_request)
- return -ENOMEM;
-
- cqp_info = &cqp_request->info;
- info = &cqp_info->in.u.dealloc_stag.info;
- memset(info, 0, sizeof(*info));
- info->pd_id = iwpd->sc_pd.pd_id;
- info->stag_idx = ib_mr->rkey >> IRDMA_CQPSQ_STAG_IDX_S;
- info->mr = true;
- if (iwpbl->pbl_allocated)
- info->dealloc_pbl = true;
-
- cqp_info->cqp_cmd = IRDMA_OP_DEALLOC_STAG;
- cqp_info->post_sq = 1;
- cqp_info->in.u.dealloc_stag.dev = &iwdev->rf->sc_dev;
- cqp_info->in.u.dealloc_stag.scratch = (uintptr_t)cqp_request;
- status = irdma_handle_cqp_op(iwdev->rf, cqp_request);
- irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request);
- if (status)
- return status;
+ ret = irdma_hwdereg_mr(ib_mr);
+ if (ret)
+ return ret;
irdma_free_stag(iwdev, iwmr->stag);
done:
if (iwpbl->pbl_allocated)
- irdma_free_pble(iwdev->rf->pble_rsrc, palloc);
- ib_umem_release(iwmr->region);
+ irdma_free_pble(iwdev->rf->pble_rsrc, &iwpbl->pble_alloc);
+
+ if (iwmr->region)
+ ib_umem_release(iwmr->region);
+
kfree(iwmr);
return 0;
@@ -3476,8 +3719,7 @@ static void irdma_process_cqe(struct ib_wc *entry,
set_ib_wc_op_sq(cq_poll_info, entry);
} else {
set_ib_wc_op_rq(cq_poll_info, entry,
- qp->qp_uk.qp_caps & IRDMA_SEND_WITH_IMM ?
- true : false);
+ qp->qp_uk.qp_caps & IRDMA_SEND_WITH_IMM);
if (qp->qp_uk.qp_type != IRDMA_QP_TYPE_ROCE_UD &&
cq_poll_info->stag_invalid_set) {
entry->ex.invalidate_rkey = cq_poll_info->inv_stag;
@@ -3963,7 +4205,7 @@ static int irdma_attach_mcast(struct ib_qp *ibqp, union ib_gid *ibgid, u16 lid)
if (!ipv6_addr_v4mapped((struct in6_addr *)ibgid)) {
irdma_copy_ip_ntohl(ip_addr,
sgid_addr.saddr_in6.sin6_addr.in6_u.u6_addr32);
- irdma_netdev_vlan_ipv6(ip_addr, &vlan_id, NULL);
+ irdma_get_vlan_mac_ipv6(ip_addr, &vlan_id, NULL);
ipv4 = false;
ibdev_dbg(&iwdev->ibdev,
"VERBS: qp_id=%d, IP6address=%pI6\n", ibqp->qp_num,
@@ -4261,9 +4503,12 @@ static int irdma_setup_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *attr)
ah_info->vlan_tag = 0;
if (ah_info->vlan_tag < VLAN_N_VID) {
+ u8 prio = rt_tos2priority(ah_info->tc_tos);
+
+ prio = irdma_roce_get_vlan_prio(sgid_attr, prio);
+
+ ah_info->vlan_tag |= (u16)prio << VLAN_PRIO_SHIFT;
ah_info->insert_vlan_tag = true;
- ah_info->vlan_tag |=
- rt_tos2priority(ah_info->tc_tos) << VLAN_PRIO_SHIFT;
}
return 0;
@@ -4424,7 +4669,6 @@ static int irdma_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
ah_attr->grh.traffic_class = ah->sc_ah.ah_info.tc_tos;
ah_attr->grh.hop_limit = ah->sc_ah.ah_info.hop_ttl;
ah_attr->grh.sgid_index = ah->sgid_index;
- ah_attr->grh.sgid_index = ah->sgid_index;
memcpy(&ah_attr->grh.dgid, &ah->dgid,
sizeof(ah_attr->grh.dgid));
}
@@ -4499,6 +4743,7 @@ static const struct ib_device_ops irdma_dev_ops = {
.query_qp = irdma_query_qp,
.reg_user_mr = irdma_reg_user_mr,
.reg_user_mr_dmabuf = irdma_reg_user_mr_dmabuf,
+ .rereg_user_mr = irdma_rereg_user_mr,
.req_notify_cq = irdma_req_notify_cq,
.resize_cq = irdma_resize_cq,
INIT_RDMA_OBJ_SIZE(ib_pd, irdma_pd, ibpd),
diff --git a/drivers/infiniband/hw/irdma/verbs.h b/drivers/infiniband/hw/irdma/verbs.h
index a536e9fa85eb..cfa140b36395 100644
--- a/drivers/infiniband/hw/irdma/verbs.h
+++ b/drivers/infiniband/hw/irdma/verbs.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2015 - 2021 Intel Corporation */
#ifndef IRDMA_VERBS_H
#define IRDMA_VERBS_H
@@ -18,7 +18,8 @@ struct irdma_ucontext {
struct list_head qp_reg_mem_list;
spinlock_t qp_reg_mem_list_lock; /* protect QP memory list */
int abi_ver;
- bool legacy_mode;
+ u8 legacy_mode : 1;
+ u8 use_raw_attrs : 1;
};
struct irdma_pd {
@@ -99,6 +100,8 @@ struct irdma_mr {
struct ib_mw ibmw;
};
struct ib_umem *region;
+ int access;
+ u8 is_hwreg;
u16 type;
u32 page_cnt;
u64 page_size;
@@ -122,6 +125,8 @@ struct irdma_cq {
u32 cq_mem_size;
struct irdma_dma_mem kmem;
struct irdma_dma_mem kmem_shadow;
+ struct completion free_cq;
+ refcount_t refcnt;
spinlock_t lock; /* for poll cq */
struct irdma_pbl *iwpbl;
struct irdma_pbl *iwpbl_shadow;
@@ -193,6 +198,7 @@ struct irdma_qp {
u8 flush_issued : 1;
u8 sig_all : 1;
u8 pau_mode : 1;
+ u8 suspend_pending : 1;
u8 rsvd : 1;
u8 iwarp_state;
u16 term_sq_flush_code;
diff --git a/drivers/infiniband/hw/irdma/ws.c b/drivers/infiniband/hw/irdma/ws.c
index 20bc8d0d7f1f..542bc0b1bb03 100644
--- a/drivers/infiniband/hw/irdma/ws.c
+++ b/drivers/infiniband/hw/irdma/ws.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2017 - 2021 Intel Corporation */
#include "osdep.h"
#include "hmc.h"
diff --git a/drivers/infiniband/hw/irdma/ws.h b/drivers/infiniband/hw/irdma/ws.h
index d431e3327d26..45490031a389 100644
--- a/drivers/infiniband/hw/irdma/ws.h
+++ b/drivers/infiniband/hw/irdma/ws.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2015 - 2020 Intel Corporation */
#ifndef IRDMA_WS_H
#define IRDMA_WS_H
diff --git a/drivers/infiniband/hw/mana/cq.c b/drivers/infiniband/hw/mana/cq.c
index d141cab8a1e6..f04a679d2871 100644
--- a/drivers/infiniband/hw/mana/cq.c
+++ b/drivers/infiniband/hw/mana/cq.c
@@ -6,17 +6,25 @@
#include "mana_ib.h"
int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
- struct ib_udata *udata)
+ struct uverbs_attr_bundle *attrs)
{
+ struct ib_udata *udata = &attrs->driver_udata;
struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
+ struct mana_ib_create_cq_resp resp = {};
+ struct mana_ib_ucontext *mana_ucontext;
struct ib_device *ibdev = ibcq->device;
struct mana_ib_create_cq ucmd = {};
struct mana_ib_dev *mdev;
+ bool is_rnic_cq;
+ u32 doorbell;
int err;
mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
- if (udata->inlen < sizeof(ucmd))
+ cq->comp_vector = attr->comp_vector % ibdev->num_comp_vectors;
+ cq->cq_handle = INVALID_MANA_HANDLE;
+
+ if (udata->inlen < offsetof(struct mana_ib_create_cq, flags))
return -EINVAL;
err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
@@ -26,41 +34,54 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
return err;
}
- if (attr->cqe > MAX_SEND_BUFFERS_PER_QUEUE) {
+ is_rnic_cq = !!(ucmd.flags & MANA_IB_CREATE_RNIC_CQ);
+
+ if (!is_rnic_cq && attr->cqe > mdev->adapter_caps.max_qp_wr) {
ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr->cqe);
return -EINVAL;
}
cq->cqe = attr->cqe;
- cq->umem = ib_umem_get(ibdev, ucmd.buf_addr, cq->cqe * COMP_ENTRY_SIZE,
- IB_ACCESS_LOCAL_WRITE);
- if (IS_ERR(cq->umem)) {
- err = PTR_ERR(cq->umem);
- ibdev_dbg(ibdev, "Failed to get umem for create cq, err %d\n",
- err);
+ err = mana_ib_create_queue(mdev, ucmd.buf_addr, cq->cqe * COMP_ENTRY_SIZE, &cq->queue);
+ if (err) {
+ ibdev_dbg(ibdev, "Failed to create queue for create cq, %d\n", err);
return err;
}
- err = mana_ib_gd_create_dma_region(mdev, cq->umem, &cq->gdma_region);
- if (err) {
- ibdev_dbg(ibdev,
- "Failed to create dma region for create cq, %d\n",
- err);
- goto err_release_umem;
+ mana_ucontext = rdma_udata_to_drv_context(udata, struct mana_ib_ucontext,
+ ibucontext);
+ doorbell = mana_ucontext->doorbell;
+
+ if (is_rnic_cq) {
+ err = mana_ib_gd_create_cq(mdev, cq, doorbell);
+ if (err) {
+ ibdev_dbg(ibdev, "Failed to create RNIC cq, %d\n", err);
+ goto err_destroy_queue;
+ }
+
+ err = mana_ib_install_cq_cb(mdev, cq);
+ if (err) {
+ ibdev_dbg(ibdev, "Failed to install cq callback, %d\n", err);
+ goto err_destroy_rnic_cq;
+ }
}
- ibdev_dbg(ibdev,
- "mana_ib_gd_create_dma_region ret %d gdma_region 0x%llx\n",
- err, cq->gdma_region);
-
- /*
- * The CQ ID is not known at this time. The ID is generated at create_qp
- */
+ resp.cqid = cq->queue.id;
+ err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen));
+ if (err) {
+ ibdev_dbg(&mdev->ib_dev, "Failed to copy to udata, %d\n", err);
+ goto err_remove_cq_cb;
+ }
return 0;
-err_release_umem:
- ib_umem_release(cq->umem);
+err_remove_cq_cb:
+ mana_ib_remove_cq_cb(mdev, cq);
+err_destroy_rnic_cq:
+ mana_ib_gd_destroy_cq(mdev, cq);
+err_destroy_queue:
+ mana_ib_destroy_queue(mdev, &cq->queue);
+
return err;
}
@@ -72,8 +93,54 @@ int mana_ib_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
- mana_ib_gd_destroy_dma_region(mdev, cq->gdma_region);
- ib_umem_release(cq->umem);
+ mana_ib_remove_cq_cb(mdev, cq);
+
+ /* Ignore return code as there is not much we can do about it.
+ * The error message is printed inside.
+ */
+ mana_ib_gd_destroy_cq(mdev, cq);
+
+ mana_ib_destroy_queue(mdev, &cq->queue);
+
+ return 0;
+}
+
+static void mana_ib_cq_handler(void *ctx, struct gdma_queue *gdma_cq)
+{
+ struct mana_ib_cq *cq = ctx;
+
+ if (cq->ibcq.comp_handler)
+ cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
+}
+
+int mana_ib_install_cq_cb(struct mana_ib_dev *mdev, struct mana_ib_cq *cq)
+{
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ struct gdma_queue *gdma_cq;
+ if (cq->queue.id >= gc->max_num_cqs)
+ return -EINVAL;
+ /* Create CQ table entry */
+ WARN_ON(gc->cq_table[cq->queue.id]);
+ gdma_cq = kzalloc(sizeof(*gdma_cq), GFP_KERNEL);
+ if (!gdma_cq)
+ return -ENOMEM;
+
+ gdma_cq->cq.context = cq;
+ gdma_cq->type = GDMA_CQ;
+ gdma_cq->cq.callback = mana_ib_cq_handler;
+ gdma_cq->id = cq->queue.id;
+ gc->cq_table[cq->queue.id] = gdma_cq;
return 0;
}
+
+void mana_ib_remove_cq_cb(struct mana_ib_dev *mdev, struct mana_ib_cq *cq)
+{
+ struct gdma_context *gc = mdev_to_gc(mdev);
+
+ if (cq->queue.id >= gc->max_num_cqs || cq->queue.id == INVALID_QUEUE_ID)
+ return;
+
+ kfree(gc->cq_table[cq->queue.id]);
+ gc->cq_table[cq->queue.id] = NULL;
+}
diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c
index d4541b8707e4..7ac01918ef7c 100644
--- a/drivers/infiniband/hw/mana/device.c
+++ b/drivers/infiniband/hw/mana/device.c
@@ -5,6 +5,7 @@
#include "mana_ib.h"
#include <net/mana/mana_auxiliary.h>
+#include <net/addrconf.h>
MODULE_DESCRIPTION("Microsoft Azure Network Adapter IB driver");
MODULE_LICENSE("GPL");
@@ -15,6 +16,7 @@ static const struct ib_device_ops mana_ib_dev_ops = {
.driver_id = RDMA_DRIVER_MANA,
.uverbs_abi_ver = MANA_IB_UVERBS_ABI_VERSION,
+ .add_gid = mana_ib_gd_add_gid,
.alloc_pd = mana_ib_alloc_pd,
.alloc_ucontext = mana_ib_alloc_ucontext,
.create_cq = mana_ib_create_cq,
@@ -23,18 +25,21 @@ static const struct ib_device_ops mana_ib_dev_ops = {
.create_wq = mana_ib_create_wq,
.dealloc_pd = mana_ib_dealloc_pd,
.dealloc_ucontext = mana_ib_dealloc_ucontext,
+ .del_gid = mana_ib_gd_del_gid,
.dereg_mr = mana_ib_dereg_mr,
.destroy_cq = mana_ib_destroy_cq,
.destroy_qp = mana_ib_destroy_qp,
.destroy_rwq_ind_table = mana_ib_destroy_rwq_ind_table,
.destroy_wq = mana_ib_destroy_wq,
.disassociate_ucontext = mana_ib_disassociate_ucontext,
+ .get_link_layer = mana_ib_get_link_layer,
.get_port_immutable = mana_ib_get_port_immutable,
.mmap = mana_ib_mmap,
.modify_qp = mana_ib_modify_qp,
.modify_wq = mana_ib_modify_wq,
.query_device = mana_ib_query_device,
.query_gid = mana_ib_query_gid,
+ .query_pkey = mana_ib_query_pkey,
.query_port = mana_ib_query_port,
.reg_user_mr = mana_ib_reg_user_mr,
@@ -51,8 +56,10 @@ static int mana_ib_probe(struct auxiliary_device *adev,
{
struct mana_adev *madev = container_of(adev, struct mana_adev, adev);
struct gdma_dev *mdev = madev->mdev;
+ struct net_device *ndev;
struct mana_context *mc;
struct mana_ib_dev *dev;
+ u8 mac_addr[ETH_ALEN];
int ret;
mc = mdev->driver_data;
@@ -68,26 +75,84 @@ static int mana_ib_probe(struct auxiliary_device *adev,
ibdev_dbg(&dev->ib_dev, "mdev=%p id=%d num_ports=%d\n", mdev,
mdev->dev_id.as_uint32, dev->ib_dev.phys_port_cnt);
- dev->gdma_dev = mdev;
dev->ib_dev.node_type = RDMA_NODE_IB_CA;
/*
* num_comp_vectors needs to set to the max MSIX index
* when interrupts and event queues are implemented
*/
- dev->ib_dev.num_comp_vectors = 1;
+ dev->ib_dev.num_comp_vectors = mdev->gdma_context->max_num_queues;
dev->ib_dev.dev.parent = mdev->gdma_context->dev;
- ret = ib_register_device(&dev->ib_dev, "mana_%d",
- mdev->gdma_context->dev);
+ rcu_read_lock(); /* required to get primary netdev */
+ ndev = mana_get_primary_netdev_rcu(mc, 0);
+ if (!ndev) {
+ rcu_read_unlock();
+ ret = -ENODEV;
+ ibdev_err(&dev->ib_dev, "Failed to get netdev for IB port 1");
+ goto free_ib_device;
+ }
+ ether_addr_copy(mac_addr, ndev->dev_addr);
+ addrconf_addr_eui48((u8 *)&dev->ib_dev.node_guid, ndev->dev_addr);
+ ret = ib_device_set_netdev(&dev->ib_dev, ndev, 1);
+ rcu_read_unlock();
+ if (ret) {
+ ibdev_err(&dev->ib_dev, "Failed to set ib netdev, ret %d", ret);
+ goto free_ib_device;
+ }
+
+ ret = mana_gd_register_device(&mdev->gdma_context->mana_ib);
if (ret) {
- ib_dealloc_device(&dev->ib_dev);
- return ret;
+ ibdev_err(&dev->ib_dev, "Failed to register device, ret %d",
+ ret);
+ goto free_ib_device;
}
+ dev->gdma_dev = &mdev->gdma_context->mana_ib;
+
+ ret = mana_ib_gd_query_adapter_caps(dev);
+ if (ret) {
+ ibdev_err(&dev->ib_dev, "Failed to query device caps, ret %d",
+ ret);
+ goto deregister_device;
+ }
+
+ ret = mana_ib_create_eqs(dev);
+ if (ret) {
+ ibdev_err(&dev->ib_dev, "Failed to create EQs, ret %d", ret);
+ goto deregister_device;
+ }
+
+ ret = mana_ib_gd_create_rnic_adapter(dev);
+ if (ret)
+ goto destroy_eqs;
+
+ xa_init_flags(&dev->qp_table_wq, XA_FLAGS_LOCK_IRQ);
+ ret = mana_ib_gd_config_mac(dev, ADDR_OP_ADD, mac_addr);
+ if (ret) {
+ ibdev_err(&dev->ib_dev, "Failed to add Mac address, ret %d",
+ ret);
+ goto destroy_rnic;
+ }
+
+ ret = ib_register_device(&dev->ib_dev, "mana_%d",
+ mdev->gdma_context->dev);
+ if (ret)
+ goto destroy_rnic;
dev_set_drvdata(&adev->dev, dev);
return 0;
+
+destroy_rnic:
+ xa_destroy(&dev->qp_table_wq);
+ mana_ib_gd_destroy_rnic_adapter(dev);
+destroy_eqs:
+ mana_ib_destroy_eqs(dev);
+deregister_device:
+ mana_gd_deregister_device(dev->gdma_dev);
+free_ib_device:
+ ib_dealloc_device(&dev->ib_dev);
+ return ret;
}
static void mana_ib_remove(struct auxiliary_device *adev)
@@ -95,6 +160,10 @@ static void mana_ib_remove(struct auxiliary_device *adev)
struct mana_ib_dev *dev = dev_get_drvdata(&adev->dev);
ib_unregister_device(&dev->ib_dev);
+ xa_destroy(&dev->qp_table_wq);
+ mana_ib_gd_destroy_rnic_adapter(dev);
+ mana_ib_destroy_eqs(dev);
+ mana_gd_deregister_device(dev->gdma_dev);
ib_dealloc_device(&dev->ib_dev);
}
diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c
index 7be4c3adb4e2..67c2d43135a8 100644
--- a/drivers/infiniband/hw/mana/main.c
+++ b/drivers/infiniband/hw/mana/main.c
@@ -8,13 +8,10 @@
void mana_ib_uncfg_vport(struct mana_ib_dev *dev, struct mana_ib_pd *pd,
u32 port)
{
- struct gdma_dev *gd = dev->gdma_dev;
struct mana_port_context *mpc;
struct net_device *ndev;
- struct mana_context *mc;
- mc = gd->driver_data;
- ndev = mc->ports[port];
+ ndev = mana_ib_get_netdev(&dev->ib_dev, port);
mpc = netdev_priv(ndev);
mutex_lock(&pd->vport_mutex);
@@ -31,14 +28,11 @@ void mana_ib_uncfg_vport(struct mana_ib_dev *dev, struct mana_ib_pd *pd,
int mana_ib_cfg_vport(struct mana_ib_dev *dev, u32 port, struct mana_ib_pd *pd,
u32 doorbell_id)
{
- struct gdma_dev *mdev = dev->gdma_dev;
struct mana_port_context *mpc;
- struct mana_context *mc;
struct net_device *ndev;
int err;
- mc = mdev->driver_data;
- ndev = mc->ports[port];
+ ndev = mana_ib_get_netdev(&dev->ib_dev, port);
mpc = netdev_priv(ndev);
mutex_lock(&pd->vport_mutex);
@@ -79,17 +73,17 @@ int mana_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
struct gdma_create_pd_req req = {};
enum gdma_pd_flags flags = 0;
struct mana_ib_dev *dev;
- struct gdma_dev *mdev;
+ struct gdma_context *gc;
int err;
dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
- mdev = dev->gdma_dev;
+ gc = mdev_to_gc(dev);
mana_gd_init_req_hdr(&req.hdr, GDMA_CREATE_PD, sizeof(req),
sizeof(resp));
req.flags = flags;
- err = mana_gd_send_request(mdev->gdma_context, sizeof(req), &req,
+ err = mana_gd_send_request(gc, sizeof(req), &req,
sizeof(resp), &resp);
if (err || resp.hdr.status) {
@@ -119,17 +113,17 @@ int mana_ib_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
struct gdma_destory_pd_resp resp = {};
struct gdma_destroy_pd_req req = {};
struct mana_ib_dev *dev;
- struct gdma_dev *mdev;
+ struct gdma_context *gc;
int err;
dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
- mdev = dev->gdma_dev;
+ gc = mdev_to_gc(dev);
mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_PD, sizeof(req),
sizeof(resp));
req.pd_handle = pd->pd_handle;
- err = mana_gd_send_request(mdev->gdma_context, sizeof(req), &req,
+ err = mana_gd_send_request(gc, sizeof(req), &req,
sizeof(resp), &resp);
if (err || resp.hdr.status) {
@@ -206,13 +200,11 @@ int mana_ib_alloc_ucontext(struct ib_ucontext *ibcontext,
struct ib_device *ibdev = ibcontext->device;
struct mana_ib_dev *mdev;
struct gdma_context *gc;
- struct gdma_dev *dev;
int doorbell_page;
int ret;
mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
- dev = mdev->gdma_dev;
- gc = dev->gdma_context;
+ gc = mdev_to_gc(mdev);
/* Allocate a doorbell page index */
ret = mana_gd_allocate_doorbell_page(gc, &doorbell_page);
@@ -238,13 +230,54 @@ void mana_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
int ret;
mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
- gc = mdev->gdma_dev->gdma_context;
+ gc = mdev_to_gc(mdev);
ret = mana_gd_destroy_doorbell_page(gc, mana_ucontext->doorbell);
if (ret)
ibdev_dbg(ibdev, "Failed to destroy doorbell page %d\n", ret);
}
+int mana_ib_create_queue(struct mana_ib_dev *mdev, u64 addr, u32 size,
+ struct mana_ib_queue *queue)
+{
+ struct ib_umem *umem;
+ int err;
+
+ queue->umem = NULL;
+ queue->id = INVALID_QUEUE_ID;
+ queue->gdma_region = GDMA_INVALID_DMA_REGION;
+
+ umem = ib_umem_get(&mdev->ib_dev, addr, size, IB_ACCESS_LOCAL_WRITE);
+ if (IS_ERR(umem)) {
+ err = PTR_ERR(umem);
+ ibdev_dbg(&mdev->ib_dev, "Failed to get umem, %d\n", err);
+ return err;
+ }
+
+ err = mana_ib_create_zero_offset_dma_region(mdev, umem, &queue->gdma_region);
+ if (err) {
+ ibdev_dbg(&mdev->ib_dev, "Failed to create dma region, %d\n", err);
+ goto free_umem;
+ }
+ queue->umem = umem;
+
+ ibdev_dbg(&mdev->ib_dev, "created dma region 0x%llx\n", queue->gdma_region);
+
+ return 0;
+free_umem:
+ ib_umem_release(umem);
+ return err;
+}
+
+void mana_ib_destroy_queue(struct mana_ib_dev *mdev, struct mana_ib_queue *queue)
+{
+ /* Ignore return code as there is not much we can do about it.
+ * The error message is printed inside.
+ */
+ mana_ib_gd_destroy_dma_region(mdev, queue->gdma_region);
+ ib_umem_release(queue->umem);
+}
+
static int
mana_ib_gd_first_dma_region(struct mana_ib_dev *dev,
struct gdma_context *gc,
@@ -309,8 +342,8 @@ mana_ib_gd_add_dma_region(struct mana_ib_dev *dev, struct gdma_context *gc,
return 0;
}
-int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
- mana_handle_t *gdma_region)
+static int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
+ mana_handle_t *gdma_region, unsigned long page_sz)
{
struct gdma_dma_region_add_pages_req *add_req = NULL;
size_t num_pages_processed = 0, num_pages_to_handle;
@@ -322,23 +355,14 @@ int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
size_t max_pgs_create_cmd;
struct gdma_context *gc;
size_t num_pages_total;
- struct gdma_dev *mdev;
- unsigned long page_sz;
unsigned int tail = 0;
u64 *page_addr_list;
void *request_buf;
int err;
- mdev = dev->gdma_dev;
- gc = mdev->gdma_context;
+ gc = mdev_to_gc(dev);
hwc = gc->hwc.driver_data;
- /* Hardware requires dma region to align to chosen page size */
- page_sz = ib_umem_find_best_pgsz(umem, PAGE_SZ_BM, 0);
- if (!page_sz) {
- ibdev_dbg(&dev->ib_dev, "failed to find page size.\n");
- return -ENOMEM;
- }
num_pages_total = ib_umem_num_dma_blocks(umem, page_sz);
max_pgs_create_cmd =
@@ -358,8 +382,8 @@ int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
sizeof(struct gdma_create_dma_region_resp));
create_req->length = umem->length;
- create_req->offset_in_page = umem->address & (page_sz - 1);
- create_req->gdma_page_type = order_base_2(page_sz) - PAGE_SHIFT;
+ create_req->offset_in_page = ib_umem_dma_offset(umem, page_sz);
+ create_req->gdma_page_type = order_base_2(page_sz) - MANA_PAGE_SHIFT;
create_req->page_count = num_pages_total;
ibdev_dbg(&dev->ib_dev, "size_dma_region %lu num_pages_total %lu\n",
@@ -424,12 +448,39 @@ out:
return err;
}
+int mana_ib_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
+ mana_handle_t *gdma_region, u64 virt)
+{
+ unsigned long page_sz;
+
+ page_sz = ib_umem_find_best_pgsz(umem, PAGE_SZ_BM, virt);
+ if (!page_sz) {
+ ibdev_dbg(&dev->ib_dev, "Failed to find page size.\n");
+ return -EINVAL;
+ }
+
+ return mana_ib_gd_create_dma_region(dev, umem, gdma_region, page_sz);
+}
+
+int mana_ib_create_zero_offset_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
+ mana_handle_t *gdma_region)
+{
+ unsigned long page_sz;
+
+ /* Hardware requires dma region to align to chosen page size */
+ page_sz = ib_umem_find_best_pgoff(umem, PAGE_SZ_BM, 0);
+ if (!page_sz) {
+ ibdev_dbg(&dev->ib_dev, "Failed to find page size.\n");
+ return -EINVAL;
+ }
+
+ return mana_ib_gd_create_dma_region(dev, umem, gdma_region, page_sz);
+}
+
int mana_ib_gd_destroy_dma_region(struct mana_ib_dev *dev, u64 gdma_region)
{
- struct gdma_dev *mdev = dev->gdma_dev;
- struct gdma_context *gc;
+ struct gdma_context *gc = mdev_to_gc(dev);
- gc = mdev->gdma_context;
ibdev_dbg(&dev->ib_dev, "destroy dma region 0x%llx\n", gdma_region);
return mana_gd_destroy_dma_region(gc, gdma_region);
@@ -447,7 +498,7 @@ int mana_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
int ret;
mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
- gc = mdev->gdma_dev->gdma_context;
+ gc = mdev_to_gc(mdev);
if (vma->vm_pgoff != 0) {
ibdev_dbg(ibdev, "Unexpected vm_pgoff %lu\n", vma->vm_pgoff);
@@ -460,13 +511,13 @@ int mana_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
PAGE_SHIFT;
prot = pgprot_writecombine(vma->vm_page_prot);
- ret = rdma_user_mmap_io(ibcontext, vma, pfn, gc->db_page_size, prot,
+ ret = rdma_user_mmap_io(ibcontext, vma, pfn, PAGE_SIZE, prot,
NULL);
if (ret)
ibdev_dbg(ibdev, "can't rdma_user_mmap_io ret %d\n", ret);
else
- ibdev_dbg(ibdev, "mapped I/O pfn 0x%llx page_size %u, ret %d\n",
- pfn, gc->db_page_size, ret);
+ ibdev_dbg(ibdev, "mapped I/O pfn 0x%llx page_size %lu, ret %d\n",
+ pfn, PAGE_SIZE, ret);
return ret;
}
@@ -474,11 +525,18 @@ int mana_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
int mana_ib_get_port_immutable(struct ib_device *ibdev, u32 port_num,
struct ib_port_immutable *immutable)
{
- /*
- * This version only support RAW_PACKET
- * other values need to be filled for other types
- */
+ struct ib_port_attr attr;
+ int err;
+
+ err = ib_query_port(ibdev, port_num, &attr);
+ if (err)
+ return err;
+
+ immutable->pkey_tbl_len = attr.pkey_tbl_len;
+ immutable->gid_tbl_len = attr.gid_tbl_len;
immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
+ if (port_num == 1)
+ immutable->core_cap_flags |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
return 0;
}
@@ -486,20 +544,30 @@ int mana_ib_get_port_immutable(struct ib_device *ibdev, u32 port_num,
int mana_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
struct ib_udata *uhw)
{
- props->max_qp = MANA_MAX_NUM_QUEUES;
- props->max_qp_wr = MAX_SEND_BUFFERS_PER_QUEUE;
-
- /*
- * max_cqe could be potentially much bigger.
- * As this version of driver only support RAW QP, set it to the same
- * value as max_qp_wr
- */
- props->max_cqe = MAX_SEND_BUFFERS_PER_QUEUE;
+ struct mana_ib_dev *dev = container_of(ibdev,
+ struct mana_ib_dev, ib_dev);
+ memset(props, 0, sizeof(*props));
props->max_mr_size = MANA_IB_MAX_MR_SIZE;
- props->max_mr = MANA_IB_MAX_MR;
- props->max_send_sge = MAX_TX_WQE_SGL_ENTRIES;
- props->max_recv_sge = MAX_RX_WQE_SGL_ENTRIES;
+ props->page_size_cap = PAGE_SZ_BM;
+ props->max_qp = dev->adapter_caps.max_qp_count;
+ props->max_qp_wr = dev->adapter_caps.max_qp_wr;
+ props->device_cap_flags = IB_DEVICE_RC_RNR_NAK_GEN;
+ props->max_send_sge = dev->adapter_caps.max_send_sge_count;
+ props->max_recv_sge = dev->adapter_caps.max_recv_sge_count;
+ props->max_sge_rd = dev->adapter_caps.max_recv_sge_count;
+ props->max_cq = dev->adapter_caps.max_cq_count;
+ props->max_cqe = dev->adapter_caps.max_qp_wr;
+ props->max_mr = dev->adapter_caps.max_mr_count;
+ props->max_pd = dev->adapter_caps.max_pd_count;
+ props->max_qp_rd_atom = dev->adapter_caps.max_inbound_read_limit;
+ props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
+ props->max_qp_init_rd_atom = dev->adapter_caps.max_outbound_read_limit;
+ props->atomic_cap = IB_ATOMIC_NONE;
+ props->masked_atomic_cap = IB_ATOMIC_NONE;
+ props->max_ah = INT_MAX;
+ props->max_pkeys = 1;
+ props->local_ca_ack_delay = MANA_CA_ACK_DELAY;
return 0;
}
@@ -507,7 +575,42 @@ int mana_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
int mana_ib_query_port(struct ib_device *ibdev, u32 port,
struct ib_port_attr *props)
{
- /* This version doesn't return port properties */
+ struct net_device *ndev = mana_ib_get_netdev(ibdev, port);
+
+ if (!ndev)
+ return -EINVAL;
+
+ memset(props, 0, sizeof(*props));
+ props->max_mtu = IB_MTU_4096;
+ props->active_mtu = ib_mtu_int_to_enum(ndev->mtu);
+
+ if (netif_carrier_ok(ndev) && netif_running(ndev)) {
+ props->state = IB_PORT_ACTIVE;
+ props->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
+ } else {
+ props->state = IB_PORT_DOWN;
+ props->phys_state = IB_PORT_PHYS_STATE_DISABLED;
+ }
+
+ props->active_width = IB_WIDTH_4X;
+ props->active_speed = IB_SPEED_EDR;
+ props->pkey_tbl_len = 1;
+ if (port == 1)
+ props->gid_tbl_len = 16;
+
+ return 0;
+}
+
+enum rdma_link_layer mana_ib_get_link_layer(struct ib_device *device, u32 port_num)
+{
+ return IB_LINK_LAYER_ETHERNET;
+}
+
+int mana_ib_query_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey)
+{
+ if (index != 0)
+ return -EINVAL;
+ *pkey = IB_DEFAULT_PKEY_FULL;
return 0;
}
@@ -521,3 +624,366 @@ int mana_ib_query_gid(struct ib_device *ibdev, u32 port, int index,
void mana_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
{
}
+
+int mana_ib_gd_query_adapter_caps(struct mana_ib_dev *dev)
+{
+ struct mana_ib_adapter_caps *caps = &dev->adapter_caps;
+ struct mana_ib_query_adapter_caps_resp resp = {};
+ struct mana_ib_query_adapter_caps_req req = {};
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_GET_ADAPTER_CAP, sizeof(req),
+ sizeof(resp));
+ req.hdr.resp.msg_version = GDMA_MESSAGE_V3;
+ req.hdr.dev_id = dev->gdma_dev->dev_id;
+
+ err = mana_gd_send_request(mdev_to_gc(dev), sizeof(req),
+ &req, sizeof(resp), &resp);
+
+ if (err) {
+ ibdev_err(&dev->ib_dev,
+ "Failed to query adapter caps err %d", err);
+ return err;
+ }
+
+ caps->max_sq_id = resp.max_sq_id;
+ caps->max_rq_id = resp.max_rq_id;
+ caps->max_cq_id = resp.max_cq_id;
+ caps->max_qp_count = resp.max_qp_count;
+ caps->max_cq_count = resp.max_cq_count;
+ caps->max_mr_count = resp.max_mr_count;
+ caps->max_pd_count = resp.max_pd_count;
+ caps->max_inbound_read_limit = resp.max_inbound_read_limit;
+ caps->max_outbound_read_limit = resp.max_outbound_read_limit;
+ caps->mw_count = resp.mw_count;
+ caps->max_srq_count = resp.max_srq_count;
+ caps->max_qp_wr = min_t(u32,
+ resp.max_requester_sq_size / GDMA_MAX_SQE_SIZE,
+ resp.max_requester_rq_size / GDMA_MAX_RQE_SIZE);
+ caps->max_inline_data_size = resp.max_inline_data_size;
+ caps->max_send_sge_count = resp.max_send_sge_count;
+ caps->max_recv_sge_count = resp.max_recv_sge_count;
+
+ return 0;
+}
+
+static void
+mana_ib_event_handler(void *ctx, struct gdma_queue *q, struct gdma_event *event)
+{
+ struct mana_ib_dev *mdev = (struct mana_ib_dev *)ctx;
+ struct mana_ib_qp *qp;
+ struct ib_event ev;
+ u32 qpn;
+
+ switch (event->type) {
+ case GDMA_EQE_RNIC_QP_FATAL:
+ qpn = event->details[0];
+ qp = mana_get_qp_ref(mdev, qpn);
+ if (!qp)
+ break;
+ if (qp->ibqp.event_handler) {
+ ev.device = qp->ibqp.device;
+ ev.element.qp = &qp->ibqp;
+ ev.event = IB_EVENT_QP_FATAL;
+ qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
+ }
+ mana_put_qp_ref(qp);
+ break;
+ default:
+ break;
+ }
+}
+
+int mana_ib_create_eqs(struct mana_ib_dev *mdev)
+{
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ struct gdma_queue_spec spec = {};
+ int err, i;
+
+ spec.type = GDMA_EQ;
+ spec.monitor_avl_buf = false;
+ spec.queue_size = EQ_SIZE;
+ spec.eq.callback = mana_ib_event_handler;
+ spec.eq.context = mdev;
+ spec.eq.log2_throttle_limit = LOG2_EQ_THROTTLE;
+ spec.eq.msix_index = 0;
+
+ err = mana_gd_create_mana_eq(&gc->mana_ib, &spec, &mdev->fatal_err_eq);
+ if (err)
+ return err;
+
+ mdev->eqs = kcalloc(mdev->ib_dev.num_comp_vectors, sizeof(struct gdma_queue *),
+ GFP_KERNEL);
+ if (!mdev->eqs) {
+ err = -ENOMEM;
+ goto destroy_fatal_eq;
+ }
+ spec.eq.callback = NULL;
+ for (i = 0; i < mdev->ib_dev.num_comp_vectors; i++) {
+ spec.eq.msix_index = (i + 1) % gc->num_msix_usable;
+ err = mana_gd_create_mana_eq(mdev->gdma_dev, &spec, &mdev->eqs[i]);
+ if (err)
+ goto destroy_eqs;
+ }
+
+ return 0;
+
+destroy_eqs:
+ while (i-- > 0)
+ mana_gd_destroy_queue(gc, mdev->eqs[i]);
+ kfree(mdev->eqs);
+destroy_fatal_eq:
+ mana_gd_destroy_queue(gc, mdev->fatal_err_eq);
+ return err;
+}
+
+void mana_ib_destroy_eqs(struct mana_ib_dev *mdev)
+{
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ int i;
+
+ mana_gd_destroy_queue(gc, mdev->fatal_err_eq);
+
+ for (i = 0; i < mdev->ib_dev.num_comp_vectors; i++)
+ mana_gd_destroy_queue(gc, mdev->eqs[i]);
+
+ kfree(mdev->eqs);
+}
+
+int mana_ib_gd_create_rnic_adapter(struct mana_ib_dev *mdev)
+{
+ struct mana_rnic_create_adapter_resp resp = {};
+ struct mana_rnic_create_adapter_req req = {};
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_CREATE_ADAPTER, sizeof(req), sizeof(resp));
+ req.hdr.req.msg_version = GDMA_MESSAGE_V2;
+ req.hdr.dev_id = gc->mana_ib.dev_id;
+ req.notify_eq_id = mdev->fatal_err_eq->id;
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create RNIC adapter err %d", err);
+ return err;
+ }
+ mdev->adapter_handle = resp.adapter;
+
+ return 0;
+}
+
+int mana_ib_gd_destroy_rnic_adapter(struct mana_ib_dev *mdev)
+{
+ struct mana_rnic_destroy_adapter_resp resp = {};
+ struct mana_rnic_destroy_adapter_req req = {};
+ struct gdma_context *gc;
+ int err;
+
+ gc = mdev_to_gc(mdev);
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_DESTROY_ADAPTER, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = gc->mana_ib.dev_id;
+ req.adapter = mdev->adapter_handle;
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to destroy RNIC adapter err %d", err);
+ return err;
+ }
+
+ return 0;
+}
+
+int mana_ib_gd_add_gid(const struct ib_gid_attr *attr, void **context)
+{
+ struct mana_ib_dev *mdev = container_of(attr->device, struct mana_ib_dev, ib_dev);
+ enum rdma_network_type ntype = rdma_gid_attr_network_type(attr);
+ struct mana_rnic_config_addr_resp resp = {};
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ struct mana_rnic_config_addr_req req = {};
+ int err;
+
+ if (ntype != RDMA_NETWORK_IPV4 && ntype != RDMA_NETWORK_IPV6) {
+ ibdev_dbg(&mdev->ib_dev, "Unsupported rdma network type %d", ntype);
+ return -EINVAL;
+ }
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_CONFIG_IP_ADDR, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = gc->mana_ib.dev_id;
+ req.adapter = mdev->adapter_handle;
+ req.op = ADDR_OP_ADD;
+ req.sgid_type = (ntype == RDMA_NETWORK_IPV6) ? SGID_TYPE_IPV6 : SGID_TYPE_IPV4;
+ copy_in_reverse(req.ip_addr, attr->gid.raw, sizeof(union ib_gid));
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to config IP addr err %d\n", err);
+ return err;
+ }
+
+ return 0;
+}
+
+int mana_ib_gd_del_gid(const struct ib_gid_attr *attr, void **context)
+{
+ struct mana_ib_dev *mdev = container_of(attr->device, struct mana_ib_dev, ib_dev);
+ enum rdma_network_type ntype = rdma_gid_attr_network_type(attr);
+ struct mana_rnic_config_addr_resp resp = {};
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ struct mana_rnic_config_addr_req req = {};
+ int err;
+
+ if (ntype != RDMA_NETWORK_IPV4 && ntype != RDMA_NETWORK_IPV6) {
+ ibdev_dbg(&mdev->ib_dev, "Unsupported rdma network type %d", ntype);
+ return -EINVAL;
+ }
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_CONFIG_IP_ADDR, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = gc->mana_ib.dev_id;
+ req.adapter = mdev->adapter_handle;
+ req.op = ADDR_OP_REMOVE;
+ req.sgid_type = (ntype == RDMA_NETWORK_IPV6) ? SGID_TYPE_IPV6 : SGID_TYPE_IPV4;
+ copy_in_reverse(req.ip_addr, attr->gid.raw, sizeof(union ib_gid));
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to config IP addr err %d\n", err);
+ return err;
+ }
+
+ return 0;
+}
+
+int mana_ib_gd_config_mac(struct mana_ib_dev *mdev, enum mana_ib_addr_op op, u8 *mac)
+{
+ struct mana_rnic_config_mac_addr_resp resp = {};
+ struct mana_rnic_config_mac_addr_req req = {};
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_CONFIG_MAC_ADDR, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = gc->mana_ib.dev_id;
+ req.adapter = mdev->adapter_handle;
+ req.op = op;
+ copy_in_reverse(req.mac_addr, mac, ETH_ALEN);
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to config Mac addr err %d", err);
+ return err;
+ }
+
+ return 0;
+}
+
+int mana_ib_gd_create_cq(struct mana_ib_dev *mdev, struct mana_ib_cq *cq, u32 doorbell)
+{
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ struct mana_rnic_create_cq_resp resp = {};
+ struct mana_rnic_create_cq_req req = {};
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_CREATE_CQ, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = gc->mana_ib.dev_id;
+ req.adapter = mdev->adapter_handle;
+ req.gdma_region = cq->queue.gdma_region;
+ req.eq_id = mdev->eqs[cq->comp_vector]->id;
+ req.doorbell_page = doorbell;
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create cq err %d", err);
+ return err;
+ }
+
+ cq->queue.id = resp.cq_id;
+ cq->cq_handle = resp.cq_handle;
+ /* The GDMA region is now owned by the CQ handle */
+ cq->queue.gdma_region = GDMA_INVALID_DMA_REGION;
+
+ return 0;
+}
+
+int mana_ib_gd_destroy_cq(struct mana_ib_dev *mdev, struct mana_ib_cq *cq)
+{
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ struct mana_rnic_destroy_cq_resp resp = {};
+ struct mana_rnic_destroy_cq_req req = {};
+ int err;
+
+ if (cq->cq_handle == INVALID_MANA_HANDLE)
+ return 0;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_DESTROY_CQ, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = gc->mana_ib.dev_id;
+ req.adapter = mdev->adapter_handle;
+ req.cq_handle = cq->cq_handle;
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to destroy cq err %d", err);
+ return err;
+ }
+
+ return 0;
+}
+
+int mana_ib_gd_create_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp,
+ struct ib_qp_init_attr *attr, u32 doorbell, u64 flags)
+{
+ struct mana_ib_cq *send_cq = container_of(qp->ibqp.send_cq, struct mana_ib_cq, ibcq);
+ struct mana_ib_cq *recv_cq = container_of(qp->ibqp.recv_cq, struct mana_ib_cq, ibcq);
+ struct mana_ib_pd *pd = container_of(qp->ibqp.pd, struct mana_ib_pd, ibpd);
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ struct mana_rnic_create_qp_resp resp = {};
+ struct mana_rnic_create_qp_req req = {};
+ int err, i;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_CREATE_RC_QP, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = gc->mana_ib.dev_id;
+ req.adapter = mdev->adapter_handle;
+ req.pd_handle = pd->pd_handle;
+ req.send_cq_handle = send_cq->cq_handle;
+ req.recv_cq_handle = recv_cq->cq_handle;
+ for (i = 0; i < MANA_RC_QUEUE_TYPE_MAX; i++)
+ req.dma_region[i] = qp->rc_qp.queues[i].gdma_region;
+ req.doorbell_page = doorbell;
+ req.max_send_wr = attr->cap.max_send_wr;
+ req.max_recv_wr = attr->cap.max_recv_wr;
+ req.max_send_sge = attr->cap.max_send_sge;
+ req.max_recv_sge = attr->cap.max_recv_sge;
+ req.flags = flags;
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create rc qp err %d", err);
+ return err;
+ }
+ qp->qp_handle = resp.rc_qp_handle;
+ for (i = 0; i < MANA_RC_QUEUE_TYPE_MAX; i++) {
+ qp->rc_qp.queues[i].id = resp.queue_ids[i];
+ /* The GDMA regions are now owned by the RNIC QP handle */
+ qp->rc_qp.queues[i].gdma_region = GDMA_INVALID_DMA_REGION;
+ }
+ return 0;
+}
+
+int mana_ib_gd_destroy_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
+{
+ struct mana_rnic_destroy_rc_qp_resp resp = {0};
+ struct mana_rnic_destroy_rc_qp_req req = {0};
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_DESTROY_RC_QP, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = gc->mana_ib.dev_id;
+ req.adapter = mdev->adapter_handle;
+ req.rc_qp_handle = qp->qp_handle;
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to destroy rc qp err %d", err);
+ return err;
+ }
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h
index 502cc8672eef..b53a5b4de908 100644
--- a/drivers/infiniband/hw/mana/mana_ib.h
+++ b/drivers/infiniband/hw/mana/mana_ib.h
@@ -27,18 +27,50 @@
*/
#define MANA_IB_MAX_MR 0xFFFFFFu
+/*
+ * The CA timeout is approx. 260ms (4us * 2^(DELAY))
+ */
+#define MANA_CA_ACK_DELAY 16
+
+struct mana_ib_adapter_caps {
+ u32 max_sq_id;
+ u32 max_rq_id;
+ u32 max_cq_id;
+ u32 max_qp_count;
+ u32 max_cq_count;
+ u32 max_mr_count;
+ u32 max_pd_count;
+ u32 max_inbound_read_limit;
+ u32 max_outbound_read_limit;
+ u32 mw_count;
+ u32 max_srq_count;
+ u32 max_qp_wr;
+ u32 max_send_sge_count;
+ u32 max_recv_sge_count;
+ u32 max_inline_data_size;
+};
+
+struct mana_ib_queue {
+ struct ib_umem *umem;
+ u64 gdma_region;
+ u64 id;
+};
+
struct mana_ib_dev {
struct ib_device ib_dev;
struct gdma_dev *gdma_dev;
+ mana_handle_t adapter_handle;
+ struct gdma_queue *fatal_err_eq;
+ struct gdma_queue **eqs;
+ struct xarray qp_table_wq;
+ struct mana_ib_adapter_caps adapter_caps;
};
struct mana_ib_wq {
struct ib_wq ibwq;
- struct ib_umem *umem;
+ struct mana_ib_queue queue;
int wqe;
u32 wq_buf_size;
- u64 gdma_region;
- u64 id;
mana_handle_t rx_object;
};
@@ -63,24 +95,39 @@ struct mana_ib_mr {
struct mana_ib_cq {
struct ib_cq ibcq;
- struct ib_umem *umem;
+ struct mana_ib_queue queue;
int cqe;
- u64 gdma_region;
- u64 id;
+ u32 comp_vector;
+ mana_handle_t cq_handle;
+};
+
+enum mana_rc_queue_type {
+ MANA_RC_SEND_QUEUE_REQUESTER = 0,
+ MANA_RC_SEND_QUEUE_RESPONDER,
+ MANA_RC_SEND_QUEUE_FMR,
+ MANA_RC_RECV_QUEUE_REQUESTER,
+ MANA_RC_RECV_QUEUE_RESPONDER,
+ MANA_RC_QUEUE_TYPE_MAX,
+};
+
+struct mana_ib_rc_qp {
+ struct mana_ib_queue queues[MANA_RC_QUEUE_TYPE_MAX];
};
struct mana_ib_qp {
struct ib_qp ibqp;
- /* Work queue info */
- struct ib_umem *sq_umem;
- int sqe;
- u64 sq_gdma_region;
- u64 sq_id;
- mana_handle_t tx_object;
+ mana_handle_t qp_handle;
+ union {
+ struct mana_ib_queue raw_sq;
+ struct mana_ib_rc_qp rc_qp;
+ };
/* The port on the IB device, starting with 1 */
u32 port;
+
+ refcount_t refcount;
+ struct completion free;
};
struct mana_ib_ucontext {
@@ -92,12 +139,259 @@ struct mana_ib_rwq_ind_table {
struct ib_rwq_ind_table ib_ind_table;
};
-int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
- mana_handle_t *gdma_region);
+enum mana_ib_command_code {
+ MANA_IB_GET_ADAPTER_CAP = 0x30001,
+ MANA_IB_CREATE_ADAPTER = 0x30002,
+ MANA_IB_DESTROY_ADAPTER = 0x30003,
+ MANA_IB_CONFIG_IP_ADDR = 0x30004,
+ MANA_IB_CONFIG_MAC_ADDR = 0x30005,
+ MANA_IB_CREATE_CQ = 0x30008,
+ MANA_IB_DESTROY_CQ = 0x30009,
+ MANA_IB_CREATE_RC_QP = 0x3000a,
+ MANA_IB_DESTROY_RC_QP = 0x3000b,
+ MANA_IB_SET_QP_STATE = 0x3000d,
+};
+
+struct mana_ib_query_adapter_caps_req {
+ struct gdma_req_hdr hdr;
+}; /*HW Data */
+
+struct mana_ib_query_adapter_caps_resp {
+ struct gdma_resp_hdr hdr;
+ u32 max_sq_id;
+ u32 max_rq_id;
+ u32 max_cq_id;
+ u32 max_qp_count;
+ u32 max_cq_count;
+ u32 max_mr_count;
+ u32 max_pd_count;
+ u32 max_inbound_read_limit;
+ u32 max_outbound_read_limit;
+ u32 mw_count;
+ u32 max_srq_count;
+ u32 max_requester_sq_size;
+ u32 max_responder_sq_size;
+ u32 max_requester_rq_size;
+ u32 max_responder_rq_size;
+ u32 max_send_sge_count;
+ u32 max_recv_sge_count;
+ u32 max_inline_data_size;
+}; /* HW Data */
+
+struct mana_rnic_create_adapter_req {
+ struct gdma_req_hdr hdr;
+ u32 notify_eq_id;
+ u32 reserved;
+ u64 feature_flags;
+}; /*HW Data */
+
+struct mana_rnic_create_adapter_resp {
+ struct gdma_resp_hdr hdr;
+ mana_handle_t adapter;
+}; /* HW Data */
+
+struct mana_rnic_destroy_adapter_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+}; /*HW Data */
+
+struct mana_rnic_destroy_adapter_resp {
+ struct gdma_resp_hdr hdr;
+}; /* HW Data */
+
+enum mana_ib_addr_op {
+ ADDR_OP_ADD = 1,
+ ADDR_OP_REMOVE = 2,
+};
+
+enum sgid_entry_type {
+ SGID_TYPE_IPV4 = 1,
+ SGID_TYPE_IPV6 = 2,
+};
+
+struct mana_rnic_config_addr_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+ enum mana_ib_addr_op op;
+ enum sgid_entry_type sgid_type;
+ u8 ip_addr[16];
+}; /* HW Data */
+
+struct mana_rnic_config_addr_resp {
+ struct gdma_resp_hdr hdr;
+}; /* HW Data */
+
+struct mana_rnic_config_mac_addr_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+ enum mana_ib_addr_op op;
+ u8 mac_addr[ETH_ALEN];
+ u8 reserved[6];
+}; /* HW Data */
+
+struct mana_rnic_config_mac_addr_resp {
+ struct gdma_resp_hdr hdr;
+}; /* HW Data */
+
+struct mana_rnic_create_cq_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+ u64 gdma_region;
+ u32 eq_id;
+ u32 doorbell_page;
+}; /* HW Data */
+
+struct mana_rnic_create_cq_resp {
+ struct gdma_resp_hdr hdr;
+ mana_handle_t cq_handle;
+ u32 cq_id;
+ u32 reserved;
+}; /* HW Data */
+
+struct mana_rnic_destroy_cq_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+ mana_handle_t cq_handle;
+}; /* HW Data */
+
+struct mana_rnic_destroy_cq_resp {
+ struct gdma_resp_hdr hdr;
+}; /* HW Data */
+
+enum mana_rnic_create_rc_flags {
+ MANA_RC_FLAG_NO_FMR = 2,
+};
+
+struct mana_rnic_create_qp_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+ mana_handle_t pd_handle;
+ mana_handle_t send_cq_handle;
+ mana_handle_t recv_cq_handle;
+ u64 dma_region[MANA_RC_QUEUE_TYPE_MAX];
+ u64 deprecated[2];
+ u64 flags;
+ u32 doorbell_page;
+ u32 max_send_wr;
+ u32 max_recv_wr;
+ u32 max_send_sge;
+ u32 max_recv_sge;
+ u32 reserved;
+}; /* HW Data */
+
+struct mana_rnic_create_qp_resp {
+ struct gdma_resp_hdr hdr;
+ mana_handle_t rc_qp_handle;
+ u32 queue_ids[MANA_RC_QUEUE_TYPE_MAX];
+ u32 reserved;
+}; /* HW Data*/
+
+struct mana_rnic_destroy_rc_qp_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+ mana_handle_t rc_qp_handle;
+}; /* HW Data */
+
+struct mana_rnic_destroy_rc_qp_resp {
+ struct gdma_resp_hdr hdr;
+}; /* HW Data */
+
+struct mana_ib_ah_attr {
+ u8 src_addr[16];
+ u8 dest_addr[16];
+ u8 src_mac[ETH_ALEN];
+ u8 dest_mac[ETH_ALEN];
+ u8 src_addr_type;
+ u8 dest_addr_type;
+ u8 hop_limit;
+ u8 traffic_class;
+ u16 src_port;
+ u16 dest_port;
+ u32 reserved;
+};
+
+struct mana_rnic_set_qp_state_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+ mana_handle_t qp_handle;
+ u64 attr_mask;
+ u32 qp_state;
+ u32 path_mtu;
+ u32 rq_psn;
+ u32 sq_psn;
+ u32 dest_qpn;
+ u32 max_dest_rd_atomic;
+ u32 retry_cnt;
+ u32 rnr_retry;
+ u32 min_rnr_timer;
+ u32 reserved;
+ struct mana_ib_ah_attr ah_attr;
+}; /* HW Data */
+
+struct mana_rnic_set_qp_state_resp {
+ struct gdma_resp_hdr hdr;
+}; /* HW Data */
+
+static inline struct gdma_context *mdev_to_gc(struct mana_ib_dev *mdev)
+{
+ return mdev->gdma_dev->gdma_context;
+}
+
+static inline struct mana_ib_qp *mana_get_qp_ref(struct mana_ib_dev *mdev,
+ uint32_t qid)
+{
+ struct mana_ib_qp *qp;
+ unsigned long flag;
+
+ xa_lock_irqsave(&mdev->qp_table_wq, flag);
+ qp = xa_load(&mdev->qp_table_wq, qid);
+ if (qp)
+ refcount_inc(&qp->refcount);
+ xa_unlock_irqrestore(&mdev->qp_table_wq, flag);
+ return qp;
+}
+
+static inline void mana_put_qp_ref(struct mana_ib_qp *qp)
+{
+ if (refcount_dec_and_test(&qp->refcount))
+ complete(&qp->free);
+}
+
+static inline struct net_device *mana_ib_get_netdev(struct ib_device *ibdev, u32 port)
+{
+ struct mana_ib_dev *mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ struct mana_context *mc = gc->mana.driver_data;
+
+ if (port < 1 || port > mc->num_ports)
+ return NULL;
+ return mc->ports[port - 1];
+}
+
+static inline void copy_in_reverse(u8 *dst, const u8 *src, u32 size)
+{
+ u32 i;
+
+ for (i = 0; i < size; i++)
+ dst[size - 1 - i] = src[i];
+}
+
+int mana_ib_install_cq_cb(struct mana_ib_dev *mdev, struct mana_ib_cq *cq);
+void mana_ib_remove_cq_cb(struct mana_ib_dev *mdev, struct mana_ib_cq *cq);
+
+int mana_ib_create_zero_offset_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
+ mana_handle_t *gdma_region);
+
+int mana_ib_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
+ mana_handle_t *gdma_region, u64 virt);
int mana_ib_gd_destroy_dma_region(struct mana_ib_dev *dev,
mana_handle_t gdma_region);
+int mana_ib_create_queue(struct mana_ib_dev *mdev, u64 addr, u32 size,
+ struct mana_ib_queue *queue);
+void mana_ib_destroy_queue(struct mana_ib_dev *mdev, struct mana_ib_queue *queue);
+
struct ib_wq *mana_ib_create_wq(struct ib_pd *pd,
struct ib_wq_init_attr *init_attr,
struct ib_udata *udata);
@@ -135,7 +429,7 @@ void mana_ib_uncfg_vport(struct mana_ib_dev *dev, struct mana_ib_pd *pd,
u32 port);
int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
- struct ib_udata *udata);
+ struct uverbs_attr_bundle *attrs);
int mana_ib_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
@@ -159,4 +453,31 @@ int mana_ib_query_gid(struct ib_device *ibdev, u32 port, int index,
void mana_ib_disassociate_ucontext(struct ib_ucontext *ibcontext);
+int mana_ib_gd_query_adapter_caps(struct mana_ib_dev *mdev);
+
+int mana_ib_create_eqs(struct mana_ib_dev *mdev);
+
+void mana_ib_destroy_eqs(struct mana_ib_dev *mdev);
+
+int mana_ib_gd_create_rnic_adapter(struct mana_ib_dev *mdev);
+
+int mana_ib_gd_destroy_rnic_adapter(struct mana_ib_dev *mdev);
+
+int mana_ib_query_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey);
+
+enum rdma_link_layer mana_ib_get_link_layer(struct ib_device *device, u32 port_num);
+
+int mana_ib_gd_add_gid(const struct ib_gid_attr *attr, void **context);
+
+int mana_ib_gd_del_gid(const struct ib_gid_attr *attr, void **context);
+
+int mana_ib_gd_config_mac(struct mana_ib_dev *mdev, enum mana_ib_addr_op op, u8 *mac);
+
+int mana_ib_gd_create_cq(struct mana_ib_dev *mdev, struct mana_ib_cq *cq, u32 doorbell);
+
+int mana_ib_gd_destroy_cq(struct mana_ib_dev *mdev, struct mana_ib_cq *cq);
+
+int mana_ib_gd_create_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp,
+ struct ib_qp_init_attr *attr, u32 doorbell, u64 flags);
+int mana_ib_gd_destroy_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp);
#endif
diff --git a/drivers/infiniband/hw/mana/mr.c b/drivers/infiniband/hw/mana/mr.c
index 351207c60eb6..887b09dd86e7 100644
--- a/drivers/infiniband/hw/mana/mr.c
+++ b/drivers/infiniband/hw/mana/mr.c
@@ -30,12 +30,9 @@ static int mana_ib_gd_create_mr(struct mana_ib_dev *dev, struct mana_ib_mr *mr,
{
struct gdma_create_mr_response resp = {};
struct gdma_create_mr_request req = {};
- struct gdma_dev *mdev = dev->gdma_dev;
- struct gdma_context *gc;
+ struct gdma_context *gc = mdev_to_gc(dev);
int err;
- gc = mdev->gdma_context;
-
mana_gd_init_req_hdr(&req.hdr, GDMA_CREATE_MR, sizeof(req),
sizeof(resp));
req.pd_handle = mr_params->pd_handle;
@@ -77,12 +74,9 @@ static int mana_ib_gd_destroy_mr(struct mana_ib_dev *dev, u64 mr_handle)
{
struct gdma_destroy_mr_response resp = {};
struct gdma_destroy_mr_request req = {};
- struct gdma_dev *mdev = dev->gdma_dev;
- struct gdma_context *gc;
+ struct gdma_context *gc = mdev_to_gc(dev);
int err;
- gc = mdev->gdma_context;
-
mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_MR, sizeof(req),
sizeof(resp));
@@ -118,6 +112,7 @@ struct ib_mr *mana_ib_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length,
"start 0x%llx, iova 0x%llx length 0x%llx access_flags 0x%x",
start, iova, length, access_flags);
+ access_flags &= ~IB_ACCESS_OPTIONAL;
if (access_flags & ~VALID_MR_FLAGS)
return ERR_PTR(-EINVAL);
@@ -133,7 +128,7 @@ struct ib_mr *mana_ib_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length,
goto err_free;
}
- err = mana_ib_gd_create_dma_region(dev, mr->umem, &dma_region_handle);
+ err = mana_ib_create_dma_region(dev, mr->umem, &dma_region_handle, iova);
if (err) {
ibdev_dbg(ibdev, "Failed create dma region for user-mr, %d\n",
err);
@@ -141,7 +136,7 @@ struct ib_mr *mana_ib_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length,
}
ibdev_dbg(ibdev,
- "mana_ib_gd_create_dma_region ret %d gdma_region %llx\n", err,
+ "created dma region for user-mr 0x%llx\n",
dma_region_handle);
mr_params.pd_handle = pd->pd_handle;
@@ -164,8 +159,7 @@ struct ib_mr *mana_ib_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length,
return &mr->ibmr;
err_dma_region:
- mana_gd_destroy_dma_region(dev->gdma_dev->gdma_context,
- dma_region_handle);
+ mana_gd_destroy_dma_region(mdev_to_gc(dev), dma_region_handle);
err_umem:
ib_umem_release(mr->umem);
diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c
index 4b3b5b274e84..73d67c853b6f 100644
--- a/drivers/infiniband/hw/mana/qp.c
+++ b/drivers/infiniband/hw/mana/qp.c
@@ -15,17 +15,13 @@ static int mana_ib_cfg_vport_steering(struct mana_ib_dev *dev,
struct mana_port_context *mpc = netdev_priv(ndev);
struct mana_cfg_rx_steer_req_v2 *req;
struct mana_cfg_rx_steer_resp resp = {};
- mana_handle_t *req_indir_tab;
struct gdma_context *gc;
- struct gdma_dev *mdev;
u32 req_buf_size;
int i, err;
- mdev = dev->gdma_dev;
- gc = mdev->gdma_context;
+ gc = mdev_to_gc(dev);
- req_buf_size =
- sizeof(*req) + sizeof(mana_handle_t) * MANA_INDIRECT_TABLE_SIZE;
+ req_buf_size = struct_size(req, indir_tab, MANA_INDIRECT_TABLE_DEF_SIZE);
req = kzalloc(req_buf_size, GFP_KERNEL);
if (!req)
return -ENOMEM;
@@ -39,27 +35,27 @@ static int mana_ib_cfg_vport_steering(struct mana_ib_dev *dev,
req->rx_enable = 1;
req->update_default_rxobj = 1;
req->default_rxobj = default_rxobj;
- req->hdr.dev_id = mdev->dev_id;
+ req->hdr.dev_id = gc->mana.dev_id;
/* If there are more than 1 entries in indirection table, enable RSS */
if (log_ind_tbl_size)
req->rss_enable = true;
- req->num_indir_entries = MANA_INDIRECT_TABLE_SIZE;
- req->indir_tab_offset = sizeof(*req);
+ req->num_indir_entries = MANA_INDIRECT_TABLE_DEF_SIZE;
+ req->indir_tab_offset = offsetof(struct mana_cfg_rx_steer_req_v2,
+ indir_tab);
req->update_indir_tab = true;
req->cqe_coalescing_enable = 1;
- req_indir_tab = (mana_handle_t *)(req + 1);
/* The ind table passed to the hardware must have
- * MANA_INDIRECT_TABLE_SIZE entries. Adjust the verb
+ * MANA_INDIRECT_TABLE_DEF_SIZE entries. Adjust the verb
* ind_table to MANA_INDIRECT_TABLE_SIZE if required
*/
ibdev_dbg(&dev->ib_dev, "ind table size %u\n", 1 << log_ind_tbl_size);
- for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) {
- req_indir_tab[i] = ind_table[i % (1 << log_ind_tbl_size)];
+ for (i = 0; i < MANA_INDIRECT_TABLE_DEF_SIZE; i++) {
+ req->indir_tab[i] = ind_table[i % (1 << log_ind_tbl_size)];
ibdev_dbg(&dev->ib_dev, "index %u handle 0x%llx\n", i,
- req_indir_tab[i]);
+ req->indir_tab[i]);
}
req->update_hashkey = true;
@@ -102,22 +98,19 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
struct ib_rwq_ind_table *ind_tbl = attr->rwq_ind_tbl;
struct mana_ib_create_qp_rss_resp resp = {};
struct mana_ib_create_qp_rss ucmd = {};
- struct gdma_dev *gd = mdev->gdma_dev;
mana_handle_t *mana_ind_table;
struct mana_port_context *mpc;
- struct mana_context *mc;
+ unsigned int ind_tbl_size;
struct net_device *ndev;
struct mana_ib_cq *cq;
struct mana_ib_wq *wq;
- unsigned int ind_tbl_size;
+ struct mana_eq *eq;
struct ib_cq *ibcq;
struct ib_wq *ibwq;
int i = 0;
u32 port;
int ret;
- mc = gd->driver_data;
-
if (!udata || udata->inlen < sizeof(ucmd))
return -EINVAL;
@@ -129,7 +122,7 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
return ret;
}
- if (attr->cap.max_recv_wr > MAX_SEND_BUFFERS_PER_QUEUE) {
+ if (attr->cap.max_recv_wr > mdev->adapter_caps.max_qp_wr) {
ibdev_dbg(&mdev->ib_dev,
"Requested max_recv_wr %d exceeding limit\n",
attr->cap.max_recv_wr);
@@ -144,7 +137,7 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
}
ind_tbl_size = 1 << ind_tbl->log_ind_tbl_size;
- if (ind_tbl_size > MANA_INDIRECT_TABLE_SIZE) {
+ if (ind_tbl_size > MANA_INDIRECT_TABLE_DEF_SIZE) {
ibdev_dbg(&mdev->ib_dev,
"Indirect table size %d exceeding limit\n",
ind_tbl_size);
@@ -160,12 +153,12 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
/* IB ports start with 1, MANA start with 0 */
port = ucmd.port;
- if (port < 1 || port > mc->num_ports) {
+ ndev = mana_ib_get_netdev(pd->device, port);
+ if (!ndev) {
ibdev_dbg(&mdev->ib_dev, "Invalid port %u in creating qp\n",
port);
return -EINVAL;
}
- ndev = mc->ports[port - 1];
mpc = netdev_priv(ndev);
ibdev_dbg(&mdev->ib_dev, "rx_hash_function %d port %d\n",
@@ -190,34 +183,43 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
ibcq = ibwq->cq;
cq = container_of(ibcq, struct mana_ib_cq, ibcq);
- wq_spec.gdma_region = wq->gdma_region;
+ wq_spec.gdma_region = wq->queue.gdma_region;
wq_spec.queue_size = wq->wq_buf_size;
- cq_spec.gdma_region = cq->gdma_region;
+ cq_spec.gdma_region = cq->queue.gdma_region;
cq_spec.queue_size = cq->cqe * COMP_ENTRY_SIZE;
cq_spec.modr_ctx_id = 0;
- cq_spec.attached_eq = GDMA_CQ_NO_EQ;
+ eq = &mpc->ac->eqs[cq->comp_vector];
+ cq_spec.attached_eq = eq->eq->id;
ret = mana_create_wq_obj(mpc, mpc->port_handle, GDMA_RQ,
&wq_spec, &cq_spec, &wq->rx_object);
- if (ret)
+ if (ret) {
+ /* Do cleanup starting with index i-1 */
+ i--;
goto fail;
+ }
/* The GDMA regions are now owned by the WQ object */
- wq->gdma_region = GDMA_INVALID_DMA_REGION;
- cq->gdma_region = GDMA_INVALID_DMA_REGION;
+ wq->queue.gdma_region = GDMA_INVALID_DMA_REGION;
+ cq->queue.gdma_region = GDMA_INVALID_DMA_REGION;
- wq->id = wq_spec.queue_index;
- cq->id = cq_spec.queue_index;
+ wq->queue.id = wq_spec.queue_index;
+ cq->queue.id = cq_spec.queue_index;
ibdev_dbg(&mdev->ib_dev,
- "ret %d rx_object 0x%llx wq id %llu cq id %llu\n",
- ret, wq->rx_object, wq->id, cq->id);
+ "rx_object 0x%llx wq id %llu cq id %llu\n",
+ wq->rx_object, wq->queue.id, cq->queue.id);
- resp.entries[i].cqid = cq->id;
- resp.entries[i].wqid = wq->id;
+ resp.entries[i].cqid = cq->queue.id;
+ resp.entries[i].wqid = wq->queue.id;
mana_ind_table[i] = wq->rx_object;
+
+ /* Create CQ table entry */
+ ret = mana_ib_install_cq_cb(mdev, cq);
+ if (ret)
+ goto fail;
}
resp.num_entries = i;
@@ -244,7 +246,11 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
fail:
while (i-- > 0) {
ibwq = ind_tbl->ind_tbl[i];
+ ibcq = ibwq->cq;
wq = container_of(ibwq, struct mana_ib_wq, ibwq);
+ cq = container_of(ibcq, struct mana_ib_cq, ibcq);
+
+ mana_ib_remove_cq_cb(mdev, cq);
mana_destroy_wq_obj(mpc, GDMA_RQ, wq->rx_object);
}
@@ -267,18 +273,15 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd,
rdma_udata_to_drv_context(udata, struct mana_ib_ucontext,
ibucontext);
struct mana_ib_create_qp_resp resp = {};
- struct gdma_dev *gd = mdev->gdma_dev;
struct mana_ib_create_qp ucmd = {};
struct mana_obj_spec wq_spec = {};
struct mana_obj_spec cq_spec = {};
struct mana_port_context *mpc;
- struct mana_context *mc;
struct net_device *ndev;
- struct ib_umem *umem;
- int err;
+ struct mana_eq *eq;
+ int eq_vec;
u32 port;
-
- mc = gd->driver_data;
+ int err;
if (!mana_ucontext || udata->inlen < sizeof(ucmd))
return -EINVAL;
@@ -290,12 +293,7 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd,
return err;
}
- /* IB ports start with 1, MANA Ethernet ports start with 0 */
- port = ucmd.port;
- if (port < 1 || port > mc->num_ports)
- return -EINVAL;
-
- if (attr->cap.max_send_wr > MAX_SEND_BUFFERS_PER_QUEUE) {
+ if (attr->cap.max_send_wr > mdev->adapter_caps.max_qp_wr) {
ibdev_dbg(&mdev->ib_dev,
"Requested max_send_wr %d exceeding limit\n",
attr->cap.max_send_wr);
@@ -309,11 +307,17 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd,
return -EINVAL;
}
- ndev = mc->ports[port - 1];
+ port = ucmd.port;
+ ndev = mana_ib_get_netdev(ibpd->device, port);
+ if (!ndev) {
+ ibdev_dbg(&mdev->ib_dev, "Invalid port %u in creating qp\n",
+ port);
+ return -EINVAL;
+ }
mpc = netdev_priv(ndev);
ibdev_dbg(&mdev->ib_dev, "port %u ndev %p mpc %p\n", port, ndev, mpc);
- err = mana_ib_cfg_vport(mdev, port - 1, pd, mana_ucontext->doorbell);
+ err = mana_ib_cfg_vport(mdev, port, pd, mana_ucontext->doorbell);
if (err)
return -ENODEV;
@@ -322,61 +326,51 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd,
ibdev_dbg(&mdev->ib_dev, "ucmd sq_buf_addr 0x%llx port %u\n",
ucmd.sq_buf_addr, ucmd.port);
- umem = ib_umem_get(ibpd->device, ucmd.sq_buf_addr, ucmd.sq_buf_size,
- IB_ACCESS_LOCAL_WRITE);
- if (IS_ERR(umem)) {
- err = PTR_ERR(umem);
- ibdev_dbg(&mdev->ib_dev,
- "Failed to get umem for create qp-raw, err %d\n",
- err);
- goto err_free_vport;
- }
- qp->sq_umem = umem;
-
- err = mana_ib_gd_create_dma_region(mdev, qp->sq_umem,
- &qp->sq_gdma_region);
+ err = mana_ib_create_queue(mdev, ucmd.sq_buf_addr, ucmd.sq_buf_size, &qp->raw_sq);
if (err) {
ibdev_dbg(&mdev->ib_dev,
- "Failed to create dma region for create qp-raw, %d\n",
- err);
- goto err_release_umem;
+ "Failed to create queue for create qp-raw, err %d\n", err);
+ goto err_free_vport;
}
- ibdev_dbg(&mdev->ib_dev,
- "mana_ib_gd_create_dma_region ret %d gdma_region 0x%llx\n",
- err, qp->sq_gdma_region);
-
/* Create a WQ on the same port handle used by the Ethernet */
- wq_spec.gdma_region = qp->sq_gdma_region;
+ wq_spec.gdma_region = qp->raw_sq.gdma_region;
wq_spec.queue_size = ucmd.sq_buf_size;
- cq_spec.gdma_region = send_cq->gdma_region;
+ cq_spec.gdma_region = send_cq->queue.gdma_region;
cq_spec.queue_size = send_cq->cqe * COMP_ENTRY_SIZE;
cq_spec.modr_ctx_id = 0;
- cq_spec.attached_eq = GDMA_CQ_NO_EQ;
+ eq_vec = send_cq->comp_vector;
+ eq = &mpc->ac->eqs[eq_vec];
+ cq_spec.attached_eq = eq->eq->id;
err = mana_create_wq_obj(mpc, mpc->port_handle, GDMA_SQ, &wq_spec,
- &cq_spec, &qp->tx_object);
+ &cq_spec, &qp->qp_handle);
if (err) {
ibdev_dbg(&mdev->ib_dev,
"Failed to create wq for create raw-qp, err %d\n",
err);
- goto err_destroy_dma_region;
+ goto err_destroy_queue;
}
/* The GDMA regions are now owned by the WQ object */
- qp->sq_gdma_region = GDMA_INVALID_DMA_REGION;
- send_cq->gdma_region = GDMA_INVALID_DMA_REGION;
+ qp->raw_sq.gdma_region = GDMA_INVALID_DMA_REGION;
+ send_cq->queue.gdma_region = GDMA_INVALID_DMA_REGION;
+
+ qp->raw_sq.id = wq_spec.queue_index;
+ send_cq->queue.id = cq_spec.queue_index;
- qp->sq_id = wq_spec.queue_index;
- send_cq->id = cq_spec.queue_index;
+ /* Create CQ table entry */
+ err = mana_ib_install_cq_cb(mdev, send_cq);
+ if (err)
+ goto err_destroy_wq_obj;
ibdev_dbg(&mdev->ib_dev,
- "ret %d qp->tx_object 0x%llx sq id %llu cq id %llu\n", err,
- qp->tx_object, qp->sq_id, send_cq->id);
+ "qp->qp_handle 0x%llx sq id %llu cq id %llu\n",
+ qp->qp_handle, qp->raw_sq.id, send_cq->queue.id);
- resp.sqid = qp->sq_id;
- resp.cqid = send_cq->id;
+ resp.sqid = qp->raw_sq.id;
+ resp.cqid = send_cq->queue.id;
resp.tx_vp_offset = pd->tx_vp_offset;
err = ib_copy_to_udata(udata, &resp, sizeof(resp));
@@ -384,26 +378,118 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd,
ibdev_dbg(&mdev->ib_dev,
"Failed copy udata for create qp-raw, %d\n",
err);
- goto err_destroy_wq_obj;
+ goto err_remove_cq_cb;
}
return 0;
-err_destroy_wq_obj:
- mana_destroy_wq_obj(mpc, GDMA_SQ, qp->tx_object);
+err_remove_cq_cb:
+ mana_ib_remove_cq_cb(mdev, send_cq);
-err_destroy_dma_region:
- mana_ib_gd_destroy_dma_region(mdev, qp->sq_gdma_region);
+err_destroy_wq_obj:
+ mana_destroy_wq_obj(mpc, GDMA_SQ, qp->qp_handle);
-err_release_umem:
- ib_umem_release(umem);
+err_destroy_queue:
+ mana_ib_destroy_queue(mdev, &qp->raw_sq);
err_free_vport:
- mana_ib_uncfg_vport(mdev, pd, port - 1);
+ mana_ib_uncfg_vport(mdev, pd, port);
return err;
}
+static int mana_table_store_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
+{
+ refcount_set(&qp->refcount, 1);
+ init_completion(&qp->free);
+ return xa_insert_irq(&mdev->qp_table_wq, qp->ibqp.qp_num, qp,
+ GFP_KERNEL);
+}
+
+static void mana_table_remove_qp(struct mana_ib_dev *mdev,
+ struct mana_ib_qp *qp)
+{
+ xa_erase_irq(&mdev->qp_table_wq, qp->ibqp.qp_num);
+ mana_put_qp_ref(qp);
+ wait_for_completion(&qp->free);
+}
+
+static int mana_ib_create_rc_qp(struct ib_qp *ibqp, struct ib_pd *ibpd,
+ struct ib_qp_init_attr *attr, struct ib_udata *udata)
+{
+ struct mana_ib_dev *mdev = container_of(ibpd->device, struct mana_ib_dev, ib_dev);
+ struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
+ struct mana_ib_create_rc_qp_resp resp = {};
+ struct mana_ib_ucontext *mana_ucontext;
+ struct mana_ib_create_rc_qp ucmd = {};
+ int i, err, j;
+ u64 flags = 0;
+ u32 doorbell;
+
+ if (!udata || udata->inlen < sizeof(ucmd))
+ return -EINVAL;
+
+ mana_ucontext = rdma_udata_to_drv_context(udata, struct mana_ib_ucontext, ibucontext);
+ doorbell = mana_ucontext->doorbell;
+ flags = MANA_RC_FLAG_NO_FMR;
+ err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
+ if (err) {
+ ibdev_dbg(&mdev->ib_dev, "Failed to copy from udata, %d\n", err);
+ return err;
+ }
+
+ for (i = 0, j = 0; i < MANA_RC_QUEUE_TYPE_MAX; ++i) {
+ /* skip FMR for user-level RC QPs */
+ if (i == MANA_RC_SEND_QUEUE_FMR) {
+ qp->rc_qp.queues[i].id = INVALID_QUEUE_ID;
+ qp->rc_qp.queues[i].gdma_region = GDMA_INVALID_DMA_REGION;
+ continue;
+ }
+ err = mana_ib_create_queue(mdev, ucmd.queue_buf[j], ucmd.queue_size[j],
+ &qp->rc_qp.queues[i]);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create queue %d, err %d\n", i, err);
+ goto destroy_queues;
+ }
+ j++;
+ }
+
+ err = mana_ib_gd_create_rc_qp(mdev, qp, attr, doorbell, flags);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create rc qp %d\n", err);
+ goto destroy_queues;
+ }
+ qp->ibqp.qp_num = qp->rc_qp.queues[MANA_RC_RECV_QUEUE_RESPONDER].id;
+ qp->port = attr->port_num;
+
+ if (udata) {
+ for (i = 0, j = 0; i < MANA_RC_QUEUE_TYPE_MAX; ++i) {
+ if (i == MANA_RC_SEND_QUEUE_FMR)
+ continue;
+ resp.queue_id[j] = qp->rc_qp.queues[i].id;
+ j++;
+ }
+ err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen));
+ if (err) {
+ ibdev_dbg(&mdev->ib_dev, "Failed to copy to udata, %d\n", err);
+ goto destroy_qp;
+ }
+ }
+
+ err = mana_table_store_qp(mdev, qp);
+ if (err)
+ goto destroy_qp;
+
+ return 0;
+
+destroy_qp:
+ mana_ib_gd_destroy_rc_qp(mdev, qp);
+destroy_queues:
+ while (i-- > 0)
+ mana_ib_destroy_queue(mdev, &qp->rc_qp.queues[i]);
+ return err;
+}
+
int mana_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr,
struct ib_udata *udata)
{
@@ -415,8 +501,9 @@ int mana_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr,
udata);
return mana_ib_create_qp_raw(ibqp, ibqp->pd, attr, udata);
+ case IB_QPT_RC:
+ return mana_ib_create_rc_qp(ibqp, ibqp->pd, attr, udata);
default:
- /* Creating QP other than IB_QPT_RAW_PACKET is not supported */
ibdev_dbg(ibqp->device, "Creating QP type %u not supported\n",
attr->qp_type);
}
@@ -424,11 +511,79 @@ int mana_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr,
return -EINVAL;
}
+static int mana_ib_gd_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+ struct mana_ib_dev *mdev = container_of(ibqp->device, struct mana_ib_dev, ib_dev);
+ struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
+ struct mana_rnic_set_qp_state_resp resp = {};
+ struct mana_rnic_set_qp_state_req req = {};
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ struct mana_port_context *mpc;
+ struct net_device *ndev;
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_SET_QP_STATE, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = gc->mana_ib.dev_id;
+ req.adapter = mdev->adapter_handle;
+ req.qp_handle = qp->qp_handle;
+ req.qp_state = attr->qp_state;
+ req.attr_mask = attr_mask;
+ req.path_mtu = attr->path_mtu;
+ req.rq_psn = attr->rq_psn;
+ req.sq_psn = attr->sq_psn;
+ req.dest_qpn = attr->dest_qp_num;
+ req.max_dest_rd_atomic = attr->max_dest_rd_atomic;
+ req.retry_cnt = attr->retry_cnt;
+ req.rnr_retry = attr->rnr_retry;
+ req.min_rnr_timer = attr->min_rnr_timer;
+ if (attr_mask & IB_QP_AV) {
+ ndev = mana_ib_get_netdev(&mdev->ib_dev, ibqp->port);
+ if (!ndev) {
+ ibdev_dbg(&mdev->ib_dev, "Invalid port %u in QP %u\n",
+ ibqp->port, ibqp->qp_num);
+ return -EINVAL;
+ }
+ mpc = netdev_priv(ndev);
+ copy_in_reverse(req.ah_attr.src_mac, mpc->mac_addr, ETH_ALEN);
+ copy_in_reverse(req.ah_attr.dest_mac, attr->ah_attr.roce.dmac, ETH_ALEN);
+ copy_in_reverse(req.ah_attr.src_addr, attr->ah_attr.grh.sgid_attr->gid.raw,
+ sizeof(union ib_gid));
+ copy_in_reverse(req.ah_attr.dest_addr, attr->ah_attr.grh.dgid.raw,
+ sizeof(union ib_gid));
+ if (rdma_gid_attr_network_type(attr->ah_attr.grh.sgid_attr) == RDMA_NETWORK_IPV4) {
+ req.ah_attr.src_addr_type = SGID_TYPE_IPV4;
+ req.ah_attr.dest_addr_type = SGID_TYPE_IPV4;
+ } else {
+ req.ah_attr.src_addr_type = SGID_TYPE_IPV6;
+ req.ah_attr.dest_addr_type = SGID_TYPE_IPV6;
+ }
+ req.ah_attr.dest_port = ROCE_V2_UDP_DPORT;
+ req.ah_attr.src_port = rdma_get_udp_sport(attr->ah_attr.grh.flow_label,
+ ibqp->qp_num, attr->dest_qp_num);
+ req.ah_attr.traffic_class = attr->ah_attr.grh.traffic_class;
+ req.ah_attr.hop_limit = attr->ah_attr.grh.hop_limit;
+ }
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed modify qp err %d", err);
+ return err;
+ }
+
+ return 0;
+}
+
int mana_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata)
{
- /* modify_qp is not supported by this version of the driver */
- return -EOPNOTSUPP;
+ switch (ibqp->qp_type) {
+ case IB_QPT_RC:
+ return mana_ib_gd_modify_qp(ibqp, attr, attr_mask, udata);
+ default:
+ ibdev_dbg(ibqp->device, "Modify QP type %u not supported", ibqp->qp_type);
+ return -EOPNOTSUPP;
+ }
}
static int mana_ib_destroy_qp_rss(struct mana_ib_qp *qp,
@@ -437,16 +592,13 @@ static int mana_ib_destroy_qp_rss(struct mana_ib_qp *qp,
{
struct mana_ib_dev *mdev =
container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
- struct gdma_dev *gd = mdev->gdma_dev;
struct mana_port_context *mpc;
- struct mana_context *mc;
struct net_device *ndev;
struct mana_ib_wq *wq;
struct ib_wq *ibwq;
int i;
- mc = gd->driver_data;
- ndev = mc->ports[qp->port - 1];
+ ndev = mana_ib_get_netdev(qp->ibqp.device, qp->port);
mpc = netdev_priv(ndev);
for (i = 0; i < (1 << ind_tbl->log_ind_tbl_size); i++) {
@@ -464,26 +616,38 @@ static int mana_ib_destroy_qp_raw(struct mana_ib_qp *qp, struct ib_udata *udata)
{
struct mana_ib_dev *mdev =
container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
- struct gdma_dev *gd = mdev->gdma_dev;
struct ib_pd *ibpd = qp->ibqp.pd;
struct mana_port_context *mpc;
- struct mana_context *mc;
struct net_device *ndev;
struct mana_ib_pd *pd;
- mc = gd->driver_data;
- ndev = mc->ports[qp->port - 1];
+ ndev = mana_ib_get_netdev(qp->ibqp.device, qp->port);
mpc = netdev_priv(ndev);
pd = container_of(ibpd, struct mana_ib_pd, ibpd);
- mana_destroy_wq_obj(mpc, GDMA_SQ, qp->tx_object);
+ mana_destroy_wq_obj(mpc, GDMA_SQ, qp->qp_handle);
- if (qp->sq_umem) {
- mana_ib_gd_destroy_dma_region(mdev, qp->sq_gdma_region);
- ib_umem_release(qp->sq_umem);
- }
+ mana_ib_destroy_queue(mdev, &qp->raw_sq);
- mana_ib_uncfg_vport(mdev, pd, qp->port - 1);
+ mana_ib_uncfg_vport(mdev, pd, qp->port);
+
+ return 0;
+}
+
+static int mana_ib_destroy_rc_qp(struct mana_ib_qp *qp, struct ib_udata *udata)
+{
+ struct mana_ib_dev *mdev =
+ container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
+ int i;
+
+ mana_table_remove_qp(mdev, qp);
+
+ /* Ignore return code as there is not much we can do about it.
+ * The error message is printed inside.
+ */
+ mana_ib_gd_destroy_rc_qp(mdev, qp);
+ for (i = 0; i < MANA_RC_QUEUE_TYPE_MAX; ++i)
+ mana_ib_destroy_queue(mdev, &qp->rc_qp.queues[i]);
return 0;
}
@@ -499,7 +663,8 @@ int mana_ib_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
udata);
return mana_ib_destroy_qp_raw(qp, udata);
-
+ case IB_QPT_RC:
+ return mana_ib_destroy_rc_qp(qp, udata);
default:
ibdev_dbg(ibqp->device, "Unexpected QP type %u\n",
ibqp->qp_type);
diff --git a/drivers/infiniband/hw/mana/wq.c b/drivers/infiniband/hw/mana/wq.c
index 372d361510e0..f959f4b9244f 100644
--- a/drivers/infiniband/hw/mana/wq.c
+++ b/drivers/infiniband/hw/mana/wq.c
@@ -13,7 +13,6 @@ struct ib_wq *mana_ib_create_wq(struct ib_pd *pd,
container_of(pd->device, struct mana_ib_dev, ib_dev);
struct mana_ib_create_wq ucmd = {};
struct mana_ib_wq *wq;
- struct ib_umem *umem;
int err;
if (udata->inlen < sizeof(ucmd))
@@ -32,39 +31,18 @@ struct ib_wq *mana_ib_create_wq(struct ib_pd *pd,
ibdev_dbg(&mdev->ib_dev, "ucmd wq_buf_addr 0x%llx\n", ucmd.wq_buf_addr);
- umem = ib_umem_get(pd->device, ucmd.wq_buf_addr, ucmd.wq_buf_size,
- IB_ACCESS_LOCAL_WRITE);
- if (IS_ERR(umem)) {
- err = PTR_ERR(umem);
+ err = mana_ib_create_queue(mdev, ucmd.wq_buf_addr, ucmd.wq_buf_size, &wq->queue);
+ if (err) {
ibdev_dbg(&mdev->ib_dev,
- "Failed to get umem for create wq, err %d\n", err);
+ "Failed to create queue for create wq, %d\n", err);
goto err_free_wq;
}
- wq->umem = umem;
wq->wqe = init_attr->max_wr;
wq->wq_buf_size = ucmd.wq_buf_size;
wq->rx_object = INVALID_MANA_HANDLE;
-
- err = mana_ib_gd_create_dma_region(mdev, wq->umem, &wq->gdma_region);
- if (err) {
- ibdev_dbg(&mdev->ib_dev,
- "Failed to create dma region for create wq, %d\n",
- err);
- goto err_release_umem;
- }
-
- ibdev_dbg(&mdev->ib_dev,
- "mana_ib_gd_create_dma_region ret %d gdma_region 0x%llx\n",
- err, wq->gdma_region);
-
- /* WQ ID is returned at wq_create time, doesn't know the value yet */
-
return &wq->ibwq;
-err_release_umem:
- ib_umem_release(umem);
-
err_free_wq:
kfree(wq);
@@ -86,8 +64,7 @@ int mana_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata)
mdev = container_of(ib_dev, struct mana_ib_dev, ib_dev);
- mana_ib_gd_destroy_dma_region(mdev, wq->gdma_region);
- ib_umem_release(wq->umem);
+ mana_ib_destroy_queue(mdev, &wq->queue);
kfree(wq);
diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c
index 111fa88a3be4..d7327735b8d0 100644
--- a/drivers/infiniband/hw/mlx4/alias_GUID.c
+++ b/drivers/infiniband/hw/mlx4/alias_GUID.c
@@ -829,7 +829,6 @@ void mlx4_ib_destroy_alias_guid_service(struct mlx4_ib_dev *dev)
int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev)
{
- char alias_wq_name[15];
int ret = 0;
int i, j;
union ib_gid gid;
@@ -875,9 +874,8 @@ int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev)
dev->sriov.alias_guid.ports_guid[i].parent = &dev->sriov.alias_guid;
dev->sriov.alias_guid.ports_guid[i].port = i;
- snprintf(alias_wq_name, sizeof alias_wq_name, "alias_guid%d", i);
dev->sriov.alias_guid.ports_guid[i].wq =
- alloc_ordered_workqueue(alias_wq_name, WQ_MEM_RECLAIM);
+ alloc_ordered_workqueue("alias_guid%d", WQ_MEM_RECLAIM, i);
if (!dev->sriov.alias_guid.ports_guid[i].wq) {
ret = -ENOMEM;
goto err_thread;
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 4cd738aae53c..aa9ea6ba26e5 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -172,8 +172,9 @@ err_buf:
#define CQ_CREATE_FLAGS_SUPPORTED IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION
int mlx4_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
- struct ib_udata *udata)
+ struct uverbs_attr_bundle *attrs)
{
+ struct ib_udata *udata = &attrs->driver_udata;
struct ib_device *ibdev = ibcq->device;
int entries = attr->cqe;
int vector = attr->comp_vector;
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index a37cfac5e23f..e6e132f10625 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -2158,7 +2158,6 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev,
struct mlx4_ib_demux_ctx *ctx,
int port)
{
- char name[12];
int ret = 0;
int i;
@@ -2194,24 +2193,21 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev,
goto err_mcg;
}
- snprintf(name, sizeof(name), "mlx4_ibt%d", port);
- ctx->wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM);
+ ctx->wq = alloc_ordered_workqueue("mlx4_ibt%d", WQ_MEM_RECLAIM, port);
if (!ctx->wq) {
pr_err("Failed to create tunnelling WQ for port %d\n", port);
ret = -ENOMEM;
goto err_wq;
}
- snprintf(name, sizeof(name), "mlx4_ibwi%d", port);
- ctx->wi_wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM);
+ ctx->wi_wq = alloc_ordered_workqueue("mlx4_ibwi%d", WQ_MEM_RECLAIM, port);
if (!ctx->wi_wq) {
pr_err("Failed to create wire WQ for port %d\n", port);
ret = -ENOMEM;
goto err_wiwq;
}
- snprintf(name, sizeof(name), "mlx4_ibud%d", port);
- ctx->ud_wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM);
+ ctx->ud_wq = alloc_ordered_workqueue("mlx4_ibud%d", WQ_MEM_RECLAIM, port);
if (!ctx->ud_wq) {
pr_err("Failed to create up/down WQ for port %d\n", port);
ret = -ENOMEM;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index b18e9f2adc82..529db874d67c 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -82,6 +82,8 @@ static const char mlx4_ib_version[] =
static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init);
static enum rdma_link_layer mlx4_ib_port_link_layer(struct ib_device *device,
u32 port_num);
+static int mlx4_ib_event(struct notifier_block *this, unsigned long event,
+ void *param);
static struct workqueue_struct *wq;
@@ -125,14 +127,16 @@ static struct net_device *mlx4_ib_get_netdev(struct ib_device *device,
u32 port_num)
{
struct mlx4_ib_dev *ibdev = to_mdev(device);
- struct net_device *dev;
+ struct net_device *dev, *ret = NULL;
rcu_read_lock();
- dev = mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port_num);
+ for_each_netdev_rcu(&init_net, dev) {
+ if (dev->dev.parent != ibdev->ib_dev.dev.parent ||
+ dev->dev_port + 1 != port_num)
+ continue;
- if (dev) {
if (mlx4_is_bonded(ibdev->dev)) {
- struct net_device *upper = NULL;
+ struct net_device *upper;
upper = netdev_master_upper_dev_get_rcu(dev);
if (upper) {
@@ -143,11 +147,14 @@ static struct net_device *mlx4_ib_get_netdev(struct ib_device *device,
dev = active;
}
}
+
+ dev_hold(dev);
+ ret = dev;
+ break;
}
- dev_hold(dev);
rcu_read_unlock();
- return dev;
+ return ret;
}
static int mlx4_ib_update_gids_v1(struct gid_entry *gids,
@@ -254,7 +261,7 @@ static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context)
int ret = 0;
int hw_update = 0;
int i;
- struct gid_entry *gids = NULL;
+ struct gid_entry *gids;
u16 vlan_id = 0xffff;
u8 mac[ETH_ALEN];
@@ -293,8 +300,7 @@ static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context)
ret = -ENOMEM;
} else {
*context = port_gid_table->gids[free].ctx;
- memcpy(&port_gid_table->gids[free].gid,
- &attr->gid, sizeof(attr->gid));
+ port_gid_table->gids[free].gid = attr->gid;
port_gid_table->gids[free].gid_type = attr->gid_type;
port_gid_table->gids[free].vlan_id = vlan_id;
port_gid_table->gids[free].ctx->real_index = free;
@@ -345,7 +351,7 @@ static int mlx4_ib_del_gid(const struct ib_gid_attr *attr, void **context)
struct mlx4_port_gid_table *port_gid_table;
int ret = 0;
int hw_update = 0;
- struct gid_entry *gids = NULL;
+ struct gid_entry *gids;
if (!rdma_cap_roce_gid_table(attr->device, attr->port_num))
return -EINVAL;
@@ -431,8 +437,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
struct ib_udata *uhw)
{
struct mlx4_ib_dev *dev = to_mdev(ibdev);
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
+ struct ib_smp *in_mad;
+ struct ib_smp *out_mad;
int err;
int have_ib_ports;
struct mlx4_uverbs_ex_query_device cmd;
@@ -649,8 +655,8 @@ mlx4_ib_port_link_layer(struct ib_device *device, u32 port_num)
static int ib_link_query_port(struct ib_device *ibdev, u32 port,
struct ib_port_attr *props, int netw_view)
{
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
+ struct ib_smp *in_mad;
+ struct ib_smp *out_mad;
int ext_active_speed;
int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
int err = -ENOMEM;
@@ -827,8 +833,8 @@ static int mlx4_ib_query_port(struct ib_device *ibdev, u32 port,
int __mlx4_ib_query_gid(struct ib_device *ibdev, u32 port, int index,
union ib_gid *gid, int netw_view)
{
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
+ struct ib_smp *in_mad;
+ struct ib_smp *out_mad;
int err = -ENOMEM;
struct mlx4_ib_dev *dev = to_mdev(ibdev);
int clear = 0;
@@ -892,8 +898,8 @@ static int mlx4_ib_query_sl2vl(struct ib_device *ibdev, u32 port,
u64 *sl2vl_tbl)
{
union sl2vl_tbl_to_u64 sl2vl64;
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
+ struct ib_smp *in_mad;
+ struct ib_smp *out_mad;
int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
int err = -ENOMEM;
int jj;
@@ -952,8 +958,8 @@ static void mlx4_init_sl2vl_tbl(struct mlx4_ib_dev *mdev)
int __mlx4_ib_query_pkey(struct ib_device *ibdev, u32 port, u16 index,
u16 *pkey, int netw_view)
{
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
+ struct ib_smp *in_mad;
+ struct ib_smp *out_mad;
int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
int err = -ENOMEM;
@@ -1968,8 +1974,8 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
static int init_node_data(struct mlx4_ib_dev *dev)
{
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
+ struct ib_smp *in_mad;
+ struct ib_smp *out_mad;
int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
int err = -ENOMEM;
@@ -2319,61 +2325,53 @@ unlock:
mutex_unlock(&ibdev->qp1_proxy_lock[port - 1]);
}
-static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev,
- struct net_device *dev,
- unsigned long event)
+static void mlx4_ib_scan_netdev(struct mlx4_ib_dev *ibdev,
+ struct net_device *dev,
+ unsigned long event)
{
- struct mlx4_ib_iboe *iboe;
- int update_qps_port = -1;
- int port;
+ struct mlx4_ib_iboe *iboe = &ibdev->iboe;
ASSERT_RTNL();
- iboe = &ibdev->iboe;
+ if (dev->dev.parent != ibdev->ib_dev.dev.parent)
+ return;
spin_lock_bh(&iboe->lock);
- mlx4_foreach_ib_transport_port(port, ibdev->dev) {
-
- iboe->netdevs[port - 1] =
- mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port);
- if (dev == iboe->netdevs[port - 1] &&
- (event == NETDEV_CHANGEADDR || event == NETDEV_REGISTER ||
- event == NETDEV_UP || event == NETDEV_CHANGE))
- update_qps_port = port;
+ iboe->netdevs[dev->dev_port] = event != NETDEV_UNREGISTER ? dev : NULL;
- if (dev == iboe->netdevs[port - 1] &&
- (event == NETDEV_UP || event == NETDEV_DOWN)) {
- enum ib_port_state port_state;
- struct ib_event ibev = { };
-
- if (ib_get_cached_port_state(&ibdev->ib_dev, port,
- &port_state))
- continue;
+ if (event == NETDEV_UP || event == NETDEV_DOWN) {
+ enum ib_port_state port_state;
+ struct ib_event ibev = { };
- if (event == NETDEV_UP &&
- (port_state != IB_PORT_ACTIVE ||
- iboe->last_port_state[port - 1] != IB_PORT_DOWN))
- continue;
- if (event == NETDEV_DOWN &&
- (port_state != IB_PORT_DOWN ||
- iboe->last_port_state[port - 1] != IB_PORT_ACTIVE))
- continue;
- iboe->last_port_state[port - 1] = port_state;
+ if (ib_get_cached_port_state(&ibdev->ib_dev, dev->dev_port + 1,
+ &port_state))
+ goto iboe_out;
- ibev.device = &ibdev->ib_dev;
- ibev.element.port_num = port;
- ibev.event = event == NETDEV_UP ? IB_EVENT_PORT_ACTIVE :
- IB_EVENT_PORT_ERR;
- ib_dispatch_event(&ibev);
- }
+ if (event == NETDEV_UP &&
+ (port_state != IB_PORT_ACTIVE ||
+ iboe->last_port_state[dev->dev_port] != IB_PORT_DOWN))
+ goto iboe_out;
+ if (event == NETDEV_DOWN &&
+ (port_state != IB_PORT_DOWN ||
+ iboe->last_port_state[dev->dev_port] != IB_PORT_ACTIVE))
+ goto iboe_out;
+ iboe->last_port_state[dev->dev_port] = port_state;
+ ibev.device = &ibdev->ib_dev;
+ ibev.element.port_num = dev->dev_port + 1;
+ ibev.event = event == NETDEV_UP ? IB_EVENT_PORT_ACTIVE :
+ IB_EVENT_PORT_ERR;
+ ib_dispatch_event(&ibev);
}
+
+iboe_out:
spin_unlock_bh(&iboe->lock);
- if (update_qps_port > 0)
- mlx4_ib_update_qps(ibdev, dev, update_qps_port);
+ if (event == NETDEV_CHANGEADDR || event == NETDEV_REGISTER ||
+ event == NETDEV_UP || event == NETDEV_CHANGE)
+ mlx4_ib_update_qps(ibdev, dev, dev->dev_port + 1);
}
static int mlx4_ib_netdev_event(struct notifier_block *this,
@@ -2386,7 +2384,7 @@ static int mlx4_ib_netdev_event(struct notifier_block *this,
return NOTIFY_DONE;
ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb);
- mlx4_ib_scan_netdevs(ibdev, dev, event);
+ mlx4_ib_scan_netdev(ibdev, dev, event);
return NOTIFY_DONE;
}
@@ -2610,8 +2608,11 @@ static const struct ib_device_ops mlx4_ib_dev_fs_ops = {
.destroy_flow = mlx4_ib_destroy_flow,
};
-static void *mlx4_ib_add(struct mlx4_dev *dev)
+static int mlx4_ib_probe(struct auxiliary_device *adev,
+ const struct auxiliary_device_id *id)
{
+ struct mlx4_adev *madev = container_of(adev, struct mlx4_adev, adev);
+ struct mlx4_dev *dev = madev->mdev;
struct mlx4_ib_dev *ibdev;
int num_ports = 0;
int i, j;
@@ -2621,7 +2622,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
int num_req_counters;
int allocated;
u32 counter_index;
- struct counter_index *new_counter_index = NULL;
+ struct counter_index *new_counter_index;
pr_info_once("%s", mlx4_ib_version);
@@ -2631,27 +2632,31 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
/* No point in registering a device with no ports... */
if (num_ports == 0)
- return NULL;
+ return -ENODEV;
ibdev = ib_alloc_device(mlx4_ib_dev, ib_dev);
if (!ibdev) {
dev_err(&dev->persist->pdev->dev,
"Device struct alloc failed\n");
- return NULL;
+ return -ENOMEM;
}
iboe = &ibdev->iboe;
- if (mlx4_pd_alloc(dev, &ibdev->priv_pdn))
+ err = mlx4_pd_alloc(dev, &ibdev->priv_pdn);
+ if (err)
goto err_dealloc;
- if (mlx4_uar_alloc(dev, &ibdev->priv_uar))
+ err = mlx4_uar_alloc(dev, &ibdev->priv_uar);
+ if (err)
goto err_pd;
ibdev->uar_map = ioremap((phys_addr_t) ibdev->priv_uar.pfn << PAGE_SHIFT,
PAGE_SIZE);
- if (!ibdev->uar_map)
+ if (!ibdev->uar_map) {
+ err = -ENOMEM;
goto err_uar;
+ }
MLX4_INIT_DOORBELL_LOCK(&ibdev->uar_lock);
ibdev->dev = dev;
@@ -2695,7 +2700,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
spin_lock_init(&iboe->lock);
- if (init_node_data(ibdev))
+ err = init_node_data(ibdev);
+ if (err)
goto err_map;
mlx4_init_sl2vl_tbl(ibdev);
@@ -2727,6 +2733,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
new_counter_index = kmalloc(sizeof(*new_counter_index),
GFP_KERNEL);
if (!new_counter_index) {
+ err = -ENOMEM;
if (allocated)
mlx4_counter_free(ibdev->dev, counter_index);
goto err_counter;
@@ -2744,8 +2751,10 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
new_counter_index =
kmalloc(sizeof(struct counter_index),
GFP_KERNEL);
- if (!new_counter_index)
+ if (!new_counter_index) {
+ err = -ENOMEM;
goto err_counter;
+ }
new_counter_index->index = counter_index;
new_counter_index->allocated = 0;
list_add_tail(&new_counter_index->list,
@@ -2774,8 +2783,10 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->ib_uc_qpns_bitmap = bitmap_alloc(ibdev->steer_qpn_count,
GFP_KERNEL);
- if (!ibdev->ib_uc_qpns_bitmap)
+ if (!ibdev->ib_uc_qpns_bitmap) {
+ err = -ENOMEM;
goto err_steer_qp_release;
+ }
if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_IPOIB) {
bitmap_zero(ibdev->ib_uc_qpns_bitmap,
@@ -2795,17 +2806,21 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
for (j = 1; j <= ibdev->dev->caps.num_ports; j++)
atomic64_set(&iboe->mac[j - 1], ibdev->dev->caps.def_mac[j]);
- if (mlx4_ib_alloc_diag_counters(ibdev))
+ err = mlx4_ib_alloc_diag_counters(ibdev);
+ if (err)
goto err_steer_free_bitmap;
- if (ib_register_device(&ibdev->ib_dev, "mlx4_%d",
- &dev->persist->pdev->dev))
+ err = ib_register_device(&ibdev->ib_dev, "mlx4_%d",
+ &dev->persist->pdev->dev);
+ if (err)
goto err_diag_counters;
- if (mlx4_ib_mad_init(ibdev))
+ err = mlx4_ib_mad_init(ibdev);
+ if (err)
goto err_reg;
- if (mlx4_ib_init_sriov(ibdev))
+ err = mlx4_ib_init_sriov(ibdev);
+ if (err)
goto err_mad;
if (!iboe->nb.notifier_call) {
@@ -2839,7 +2854,14 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
do_slave_init(ibdev, j, 1);
}
}
- return ibdev;
+
+ /* register mlx4 core notifier */
+ ibdev->mlx_nb.notifier_call = mlx4_ib_event;
+ err = mlx4_register_event_notifier(dev, &ibdev->mlx_nb);
+ WARN(err, "failed to register mlx4 event notifier (%d)", err);
+
+ auxiliary_set_drvdata(adev, ibdev);
+ return 0;
err_notif:
if (ibdev->iboe.nb.notifier_call) {
@@ -2883,7 +2905,7 @@ err_pd:
err_dealloc:
ib_dealloc_device(&ibdev->ib_dev);
- return NULL;
+ return err;
}
int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn)
@@ -2923,7 +2945,7 @@ int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
{
int err;
size_t flow_size;
- struct ib_flow_attr *flow = NULL;
+ struct ib_flow_attr *flow;
struct ib_flow_spec_ib *ib_spec;
if (is_attach) {
@@ -2943,19 +2965,23 @@ int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
err = __mlx4_ib_create_flow(&mqp->ibqp, flow, MLX4_DOMAIN_NIC,
MLX4_FS_REGULAR, &mqp->reg_id);
- } else {
- err = __mlx4_ib_destroy_flow(mdev->dev, mqp->reg_id);
+ kfree(flow);
+ return err;
}
- kfree(flow);
- return err;
+
+ return __mlx4_ib_destroy_flow(mdev->dev, mqp->reg_id);
}
-static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
+static void mlx4_ib_remove(struct auxiliary_device *adev)
{
- struct mlx4_ib_dev *ibdev = ibdev_ptr;
+ struct mlx4_adev *madev = container_of(adev, struct mlx4_adev, adev);
+ struct mlx4_dev *dev = madev->mdev;
+ struct mlx4_ib_dev *ibdev = auxiliary_get_drvdata(adev);
int p;
int i;
+ mlx4_unregister_event_notifier(dev, &ibdev->mlx_nb);
+
mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
devlink_port_type_clear(mlx4_get_devlink_port(dev, i));
ibdev->ib_active = false;
@@ -2992,7 +3018,7 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init)
{
- struct mlx4_ib_demux_work **dm = NULL;
+ struct mlx4_ib_demux_work **dm;
struct mlx4_dev *dev = ibdev->dev;
int i;
unsigned long flags;
@@ -3176,11 +3202,13 @@ void mlx4_sched_ib_sl2vl_update_work(struct mlx4_ib_dev *ibdev,
}
}
-static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
- enum mlx4_dev_event event, unsigned long param)
+static int mlx4_ib_event(struct notifier_block *this, unsigned long event,
+ void *param)
{
+ struct mlx4_ib_dev *ibdev =
+ container_of(this, struct mlx4_ib_dev, mlx_nb);
+ struct mlx4_dev *dev = ibdev->dev;
struct ib_event ibev;
- struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr);
struct mlx4_eqe *eqe = NULL;
struct ib_event_work *ew;
int p = 0;
@@ -3190,22 +3218,28 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
(event == MLX4_DEV_EVENT_PORT_DOWN))) {
ew = kmalloc(sizeof(*ew), GFP_ATOMIC);
if (!ew)
- return;
+ return NOTIFY_DONE;
INIT_WORK(&ew->work, handle_bonded_port_state_event);
ew->ib_dev = ibdev;
queue_work(wq, &ew->work);
- return;
+ return NOTIFY_DONE;
}
- if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE)
+ switch (event) {
+ case MLX4_DEV_EVENT_CATASTROPHIC_ERROR:
+ break;
+ case MLX4_DEV_EVENT_PORT_MGMT_CHANGE:
eqe = (struct mlx4_eqe *)param;
- else
- p = (int) param;
+ break;
+ default:
+ p = *(int *)param;
+ break;
+ }
switch (event) {
case MLX4_DEV_EVENT_PORT_UP:
if (p > ibdev->num_ports)
- return;
+ return NOTIFY_DONE;
if (!mlx4_is_slave(dev) &&
rdma_port_get_link_layer(&ibdev->ib_dev, p) ==
IB_LINK_LAYER_INFINIBAND) {
@@ -3220,7 +3254,7 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
case MLX4_DEV_EVENT_PORT_DOWN:
if (p > ibdev->num_ports)
- return;
+ return NOTIFY_DONE;
ibev.event = IB_EVENT_PORT_ERR;
break;
@@ -3233,7 +3267,7 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
case MLX4_DEV_EVENT_PORT_MGMT_CHANGE:
ew = kmalloc(sizeof *ew, GFP_ATOMIC);
if (!ew)
- return;
+ return NOTIFY_DONE;
INIT_WORK(&ew->work, handle_port_mgmt_change_event);
memcpy(&ew->ib_eqe, eqe, sizeof *eqe);
@@ -3243,7 +3277,7 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
queue_work(wq, &ew->work);
else
handle_port_mgmt_change_event(&ew->work);
- return;
+ return NOTIFY_DONE;
case MLX4_DEV_EVENT_SLAVE_INIT:
/* here, p is the slave id */
@@ -3259,7 +3293,7 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
1);
}
}
- return;
+ return NOTIFY_DONE;
case MLX4_DEV_EVENT_SLAVE_SHUTDOWN:
if (mlx4_is_master(dev)) {
@@ -3275,22 +3309,33 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
}
/* here, p is the slave id */
do_slave_init(ibdev, p, 0);
- return;
+ return NOTIFY_DONE;
default:
- return;
+ return NOTIFY_DONE;
}
- ibev.device = ibdev_ptr;
+ ibev.device = &ibdev->ib_dev;
ibev.element.port_num = mlx4_is_bonded(ibdev->dev) ? 1 : (u8)p;
ib_dispatch_event(&ibev);
+ return NOTIFY_DONE;
}
-static struct mlx4_interface mlx4_ib_interface = {
- .add = mlx4_ib_add,
- .remove = mlx4_ib_remove,
- .event = mlx4_ib_event,
+static const struct auxiliary_device_id mlx4_ib_id_table[] = {
+ { .name = MLX4_ADEV_NAME ".ib" },
+ {},
+};
+
+MODULE_DEVICE_TABLE(auxiliary, mlx4_ib_id_table);
+
+static struct mlx4_adrv mlx4_ib_adrv = {
+ .adrv = {
+ .name = "ib",
+ .probe = mlx4_ib_probe,
+ .remove = mlx4_ib_remove,
+ .id_table = mlx4_ib_id_table,
+ },
.protocol = MLX4_PROT_IB_IPV6,
.flags = MLX4_INTFF_BONDING
};
@@ -3315,7 +3360,7 @@ static int __init mlx4_ib_init(void)
if (err)
goto clean_cm;
- err = mlx4_register_interface(&mlx4_ib_interface);
+ err = mlx4_register_auxiliary_driver(&mlx4_ib_adrv);
if (err)
goto clean_mcg;
@@ -3337,7 +3382,7 @@ clean_qp_event:
static void __exit mlx4_ib_cleanup(void)
{
- mlx4_unregister_interface(&mlx4_ib_interface);
+ mlx4_unregister_auxiliary_driver(&mlx4_ib_adrv);
mlx4_ib_mcg_destroy();
mlx4_ib_cm_destroy();
mlx4_ib_qp_event_cleanup();
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 17fee1e73a45..b52bceff7d97 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -38,6 +38,7 @@
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/idr.h>
+#include <linux/notifier.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_umem.h>
@@ -644,6 +645,7 @@ struct mlx4_ib_dev {
spinlock_t reset_flow_resource_lock;
struct list_head qp_list;
struct mlx4_ib_diag_counters diag_counters[MLX4_DIAG_COUNTERS_TYPES];
+ struct notifier_block mlx_nb;
};
struct ib_event_work {
@@ -765,7 +767,7 @@ int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata);
int mlx4_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
- struct ib_udata *udata);
+ struct uverbs_attr_bundle *attrs);
int mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
int mlx4_ib_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c
index 24ee79aa2122..88f534cf690e 100644
--- a/drivers/infiniband/hw/mlx4/sysfs.c
+++ b/drivers/infiniband/hw/mlx4/sysfs.c
@@ -223,7 +223,7 @@ void del_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
static int add_port_entries(struct mlx4_ib_dev *device, int port_num)
{
int i;
- char buff[11];
+ char buff[12];
struct mlx4_ib_iov_port *port = NULL;
int ret = 0 ;
struct ib_port_attr attr;
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
index 612ee8190a2d..b38961f5058e 100644
--- a/drivers/infiniband/hw/mlx5/Makefile
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -6,6 +6,7 @@ mlx5_ib-y := ah.o \
cong.o \
counters.o \
cq.o \
+ data_direct.o \
dm.o \
doorbell.o \
gsi.o \
@@ -28,3 +29,4 @@ mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o \
fs.o \
qos.o \
std_types.o
+mlx5_ib-$(CONFIG_MLX5_MACSEC) += macsec.o
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
index 1d0c8d5e745b..7c08e3008927 100644
--- a/drivers/infiniband/hw/mlx5/cmd.c
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -177,7 +177,7 @@ int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid)
return mlx5_cmd_exec_in(dev, dealloc_xrcd, in);
}
-int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
+int mlx5_cmd_mad_ifc(struct mlx5_ib_dev *dev, const void *inb, void *outb,
u16 opmod, u8 port)
{
int outlen = MLX5_ST_SZ_BYTES(mad_ifc_out);
@@ -195,12 +195,18 @@ int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
MLX5_SET(mad_ifc_in, in, opcode, MLX5_CMD_OP_MAD_IFC);
MLX5_SET(mad_ifc_in, in, op_mod, opmod);
- MLX5_SET(mad_ifc_in, in, port, port);
+ if (dev->ib_dev.type == RDMA_DEVICE_TYPE_SMI) {
+ MLX5_SET(mad_ifc_in, in, plane_index, port);
+ MLX5_SET(mad_ifc_in, in, port,
+ smi_to_native_portnum(dev, port));
+ } else {
+ MLX5_SET(mad_ifc_in, in, port, port);
+ }
data = MLX5_ADDR_OF(mad_ifc_in, in, mad);
memcpy(data, inb, MLX5_FLD_SZ_BYTES(mad_ifc_in, mad));
- err = mlx5_cmd_exec_inout(dev, mad_ifc, in, out);
+ err = mlx5_cmd_exec_inout(dev->mdev, mad_ifc, in, out);
if (err)
goto out;
@@ -239,3 +245,24 @@ int mlx5_cmd_uar_dealloc(struct mlx5_core_dev *dev, u32 uarn, u16 uid)
MLX5_SET(dealloc_uar_in, in, uid, uid);
return mlx5_cmd_exec_in(dev, dealloc_uar, in);
}
+
+int mlx5_cmd_query_vuid(struct mlx5_core_dev *dev, bool data_direct,
+ char *out_vuid)
+{
+ u8 out[MLX5_ST_SZ_BYTES(query_vuid_out) +
+ MLX5_ST_SZ_BYTES(array1024_auto)] = {};
+ u8 in[MLX5_ST_SZ_BYTES(query_vuid_in)] = {};
+ char *vuid;
+ int err;
+
+ MLX5_SET(query_vuid_in, in, opcode, MLX5_CMD_OPCODE_QUERY_VUID);
+ MLX5_SET(query_vuid_in, in, vhca_id, MLX5_CAP_GEN(dev, vhca_id));
+ MLX5_SET(query_vuid_in, in, data_direct, data_direct);
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (err)
+ return err;
+
+ vuid = MLX5_ADDR_OF(query_vuid_out, out, vuid);
+ memcpy(out_vuid, vuid, MLX5_ST_SZ_BYTES(array1024_auto));
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
index 93a971a40d11..e6c88b6ebd0d 100644
--- a/drivers/infiniband/hw/mlx5/cmd.h
+++ b/drivers/infiniband/hw/mlx5/cmd.h
@@ -54,8 +54,10 @@ int mlx5_cmd_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid,
u32 qpn, u16 uid);
int mlx5_cmd_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn, u16 uid);
int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid);
-int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
+int mlx5_cmd_mad_ifc(struct mlx5_ib_dev *dev, const void *inb, void *outb,
u16 opmod, u8 port);
int mlx5_cmd_uar_alloc(struct mlx5_core_dev *dev, u32 *uarn, u16 uid);
int mlx5_cmd_uar_dealloc(struct mlx5_core_dev *dev, u32 uarn, u16 uid);
+int mlx5_cmd_query_vuid(struct mlx5_core_dev *dev, bool data_direct,
+ char *out_vuid);
#endif /* MLX5_IB_CMD_H */
diff --git a/drivers/infiniband/hw/mlx5/cong.c b/drivers/infiniband/hw/mlx5/cong.c
index f87531318feb..a78a067e3ce7 100644
--- a/drivers/infiniband/hw/mlx5/cong.c
+++ b/drivers/infiniband/hw/mlx5/cong.c
@@ -458,6 +458,12 @@ void mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u32 port_num)
dbg_cc_params->root = debugfs_create_dir("cc_params", mlx5_debugfs_get_dev_root(mdev));
for (i = 0; i < MLX5_IB_DBG_CC_MAX; i++) {
+ if ((i == MLX5_IB_DBG_CC_GENERAL_RTT_RESP_DSCP_VALID ||
+ i == MLX5_IB_DBG_CC_GENERAL_RTT_RESP_DSCP))
+ if (!MLX5_CAP_GEN(mdev, roce) ||
+ !MLX5_CAP_ROCE(mdev, roce_cc_general))
+ continue;
+
dbg_cc_params->params[i].offset = i;
dbg_cc_params->params[i].dev = dev;
dbg_cc_params->params[i].port_num = port_num;
diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c
index 93257fa5aae8..4f6c1968a2ee 100644
--- a/drivers/infiniband/hw/mlx5/counters.c
+++ b/drivers/infiniband/hw/mlx5/counters.c
@@ -27,6 +27,7 @@ static const struct mlx5_ib_counter basic_q_cnts[] = {
INIT_Q_COUNTER(rx_write_requests),
INIT_Q_COUNTER(rx_read_requests),
INIT_Q_COUNTER(rx_atomic_requests),
+ INIT_Q_COUNTER(rx_dct_connect),
INIT_Q_COUNTER(out_of_buffer),
};
@@ -46,6 +47,7 @@ static const struct mlx5_ib_counter vport_basic_q_cnts[] = {
INIT_VPORT_Q_COUNTER(rx_write_requests),
INIT_VPORT_Q_COUNTER(rx_read_requests),
INIT_VPORT_Q_COUNTER(rx_atomic_requests),
+ INIT_VPORT_Q_COUNTER(rx_dct_connect),
INIT_VPORT_Q_COUNTER(out_of_buffer),
};
@@ -81,6 +83,8 @@ static const struct mlx5_ib_counter extended_err_cnts[] = {
INIT_Q_COUNTER(resp_remote_access_errors),
INIT_Q_COUNTER(resp_cqe_flush_error),
INIT_Q_COUNTER(req_cqe_flush_error),
+ INIT_Q_COUNTER(req_transport_retries_exceeded),
+ INIT_Q_COUNTER(req_rnr_retries_exceeded),
};
static const struct mlx5_ib_counter roce_accl_cnts[] = {
@@ -100,6 +104,8 @@ static const struct mlx5_ib_counter vport_extended_err_cnts[] = {
INIT_VPORT_Q_COUNTER(resp_remote_access_errors),
INIT_VPORT_Q_COUNTER(resp_cqe_flush_error),
INIT_VPORT_Q_COUNTER(req_cqe_flush_error),
+ INIT_VPORT_Q_COUNTER(req_transport_retries_exceeded),
+ INIT_VPORT_Q_COUNTER(req_rnr_retries_exceeded),
};
static const struct mlx5_ib_counter vport_roce_accl_cnts[] = {
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index efc9e4a6df04..4c54dc578069 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -38,6 +38,9 @@
#include "srq.h"
#include "qp.h"
+#define UVERBS_MODULE_NAME mlx5_ib
+#include <rdma/uverbs_named_ioctl.h>
+
static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq, struct mlx5_eqe *eqe)
{
struct ib_cq *ibcq = &to_mibcq(cq)->ibcq;
@@ -714,7 +717,8 @@ static int mini_cqe_res_format_to_hw(struct mlx5_ib_dev *dev, u8 format)
static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
struct mlx5_ib_cq *cq, int entries, u32 **cqb,
- int *cqe_size, int *index, int *inlen)
+ int *cqe_size, int *index, int *inlen,
+ struct uverbs_attr_bundle *attrs)
{
struct mlx5_ib_create_cq ucmd = {};
unsigned long page_size;
@@ -788,7 +792,11 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
order_base_2(page_size) - MLX5_ADAPTER_PAGE_SHIFT);
MLX5_SET(cqc, cqc, page_offset, page_offset_quantized);
- if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX) {
+ if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_CREATE_CQ_UAR_INDEX)) {
+ err = uverbs_copy_from(index, attrs, MLX5_IB_ATTR_CREATE_CQ_UAR_INDEX);
+ if (err)
+ goto err_cqb;
+ } else if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX) {
*index = ucmd.uar_page_index;
} else if (context->bfregi.lib_uar_dyn) {
err = -EINVAL;
@@ -942,8 +950,9 @@ static void notify_soft_wc_handler(struct work_struct *work)
}
int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
- struct ib_udata *udata)
+ struct uverbs_attr_bundle *attrs)
{
+ struct ib_udata *udata = &attrs->driver_udata;
struct ib_device *ibdev = ibcq->device;
int entries = attr->cqe;
int vector = attr->comp_vector;
@@ -980,7 +989,7 @@ int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
if (udata) {
err = create_cq_user(dev, udata, cq, entries, &cqb, &cqe_size,
- &index, &inlen);
+ &index, &inlen, attrs);
if (err)
return err;
} else {
@@ -993,7 +1002,7 @@ int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
INIT_WORK(&cq->notify_work, notify_soft_wc_handler);
}
- err = mlx5_vector2eqn(dev->mdev, vector, &eqn);
+ err = mlx5_comp_eqn_get(dev->mdev, vector, &eqn);
if (err)
goto err_cqb;
@@ -1442,3 +1451,17 @@ int mlx5_ib_generate_wc(struct ib_cq *ibcq, struct ib_wc *wc)
return 0;
}
+
+ADD_UVERBS_ATTRIBUTES_SIMPLE(
+ mlx5_ib_cq_create,
+ UVERBS_OBJECT_CQ,
+ UVERBS_METHOD_CQ_CREATE,
+ UVERBS_ATTR_PTR_IN(
+ MLX5_IB_ATTR_CREATE_CQ_UAR_INDEX,
+ UVERBS_ATTR_TYPE(u32),
+ UA_OPTIONAL));
+
+const struct uapi_definition mlx5_ib_create_cq_defs[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_CQ, &mlx5_ib_cq_create),
+ {},
+};
diff --git a/drivers/infiniband/hw/mlx5/data_direct.c b/drivers/infiniband/hw/mlx5/data_direct.c
new file mode 100644
index 000000000000..b9ba84afaae2
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/data_direct.c
@@ -0,0 +1,227 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved
+ */
+
+#include "mlx5_ib.h"
+#include "data_direct.h"
+
+static LIST_HEAD(mlx5_data_direct_dev_list);
+static LIST_HEAD(mlx5_data_direct_reg_list);
+
+/*
+ * This mutex should be held when accessing either of the above lists
+ */
+static DEFINE_MUTEX(mlx5_data_direct_mutex);
+
+struct mlx5_data_direct_registration {
+ struct mlx5_ib_dev *ibdev;
+ char vuid[MLX5_ST_SZ_BYTES(array1024_auto) + 1];
+ struct list_head list;
+};
+
+static const struct pci_device_id mlx5_data_direct_pci_table[] = {
+ { PCI_VDEVICE(MELLANOX, 0x2100) }, /* ConnectX-8 Data Direct */
+ { 0, }
+};
+
+static int mlx5_data_direct_vpd_get_vuid(struct mlx5_data_direct_dev *dev)
+{
+ struct pci_dev *pdev = dev->pdev;
+ unsigned int vpd_size, kw_len;
+ u8 *vpd_data;
+ int start;
+ int ret;
+
+ vpd_data = pci_vpd_alloc(pdev, &vpd_size);
+ if (IS_ERR(vpd_data)) {
+ pci_err(pdev, "Unable to read VPD, err=%ld\n", PTR_ERR(vpd_data));
+ return PTR_ERR(vpd_data);
+ }
+
+ start = pci_vpd_find_ro_info_keyword(vpd_data, vpd_size, "VU", &kw_len);
+ if (start < 0) {
+ ret = start;
+ pci_err(pdev, "VU keyword not found, err=%d\n", ret);
+ goto end;
+ }
+
+ dev->vuid = kmemdup_nul(vpd_data + start, kw_len, GFP_KERNEL);
+ ret = dev->vuid ? 0 : -ENOMEM;
+
+end:
+ kfree(vpd_data);
+ return ret;
+}
+
+static void mlx5_data_direct_shutdown(struct pci_dev *pdev)
+{
+ pci_disable_device(pdev);
+}
+
+static int mlx5_data_direct_set_dma_caps(struct pci_dev *pdev)
+{
+ int err;
+
+ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+ if (err) {
+ dev_warn(&pdev->dev,
+ "Warning: couldn't set 64-bit PCI DMA mask, err=%d\n", err);
+ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+ if (err) {
+ dev_err(&pdev->dev, "Can't set PCI DMA mask, err=%d\n", err);
+ return err;
+ }
+ }
+
+ dma_set_max_seg_size(&pdev->dev, SZ_2G);
+ return 0;
+}
+
+int mlx5_data_direct_ib_reg(struct mlx5_ib_dev *ibdev, char *vuid)
+{
+ struct mlx5_data_direct_registration *reg;
+ struct mlx5_data_direct_dev *dev;
+
+ reg = kzalloc(sizeof(*reg), GFP_KERNEL);
+ if (!reg)
+ return -ENOMEM;
+
+ reg->ibdev = ibdev;
+ strcpy(reg->vuid, vuid);
+
+ mutex_lock(&mlx5_data_direct_mutex);
+ list_for_each_entry(dev, &mlx5_data_direct_dev_list, list) {
+ if (strcmp(dev->vuid, vuid) == 0) {
+ mlx5_ib_data_direct_bind(ibdev, dev);
+ break;
+ }
+ }
+
+ /* Add the registration to its global list, to be used upon bind/unbind
+ * of its affiliated data direct device
+ */
+ list_add_tail(&reg->list, &mlx5_data_direct_reg_list);
+ mutex_unlock(&mlx5_data_direct_mutex);
+ return 0;
+}
+
+void mlx5_data_direct_ib_unreg(struct mlx5_ib_dev *ibdev)
+{
+ struct mlx5_data_direct_registration *reg;
+
+ mutex_lock(&mlx5_data_direct_mutex);
+ list_for_each_entry(reg, &mlx5_data_direct_reg_list, list) {
+ if (reg->ibdev == ibdev) {
+ list_del(&reg->list);
+ kfree(reg);
+ goto end;
+ }
+ }
+
+ WARN_ON(true);
+end:
+ mutex_unlock(&mlx5_data_direct_mutex);
+}
+
+static void mlx5_data_direct_dev_reg(struct mlx5_data_direct_dev *dev)
+{
+ struct mlx5_data_direct_registration *reg;
+
+ mutex_lock(&mlx5_data_direct_mutex);
+ list_for_each_entry(reg, &mlx5_data_direct_reg_list, list) {
+ if (strcmp(dev->vuid, reg->vuid) == 0)
+ mlx5_ib_data_direct_bind(reg->ibdev, dev);
+ }
+
+ /* Add the data direct device to the global list, further IB devices may
+ * use it later as well
+ */
+ list_add_tail(&dev->list, &mlx5_data_direct_dev_list);
+ mutex_unlock(&mlx5_data_direct_mutex);
+}
+
+static void mlx5_data_direct_dev_unreg(struct mlx5_data_direct_dev *dev)
+{
+ struct mlx5_data_direct_registration *reg;
+
+ mutex_lock(&mlx5_data_direct_mutex);
+ /* Prevent any further affiliations */
+ list_del(&dev->list);
+ list_for_each_entry(reg, &mlx5_data_direct_reg_list, list) {
+ if (strcmp(dev->vuid, reg->vuid) == 0)
+ mlx5_ib_data_direct_unbind(reg->ibdev);
+ }
+ mutex_unlock(&mlx5_data_direct_mutex);
+}
+
+static int mlx5_data_direct_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ struct mlx5_data_direct_dev *dev;
+ int err;
+
+ dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+ if (!dev)
+ return -ENOMEM;
+
+ dev->device = &pdev->dev;
+ dev->pdev = pdev;
+
+ pci_set_drvdata(dev->pdev, dev);
+ err = pci_enable_device(pdev);
+ if (err) {
+ dev_err(dev->device, "Cannot enable PCI device, err=%d\n", err);
+ goto err;
+ }
+
+ pci_set_master(pdev);
+ err = mlx5_data_direct_set_dma_caps(pdev);
+ if (err)
+ goto err_disable;
+
+ if (pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP32) &&
+ pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP64) &&
+ pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP128))
+ dev_dbg(dev->device, "Enabling pci atomics failed\n");
+
+ err = mlx5_data_direct_vpd_get_vuid(dev);
+ if (err)
+ goto err_disable;
+
+ mlx5_data_direct_dev_reg(dev);
+ return 0;
+
+err_disable:
+ pci_disable_device(pdev);
+err:
+ kfree(dev);
+ return err;
+}
+
+static void mlx5_data_direct_remove(struct pci_dev *pdev)
+{
+ struct mlx5_data_direct_dev *dev = pci_get_drvdata(pdev);
+
+ mlx5_data_direct_dev_unreg(dev);
+ pci_disable_device(pdev);
+ kfree(dev->vuid);
+ kfree(dev);
+}
+
+static struct pci_driver mlx5_data_direct_driver = {
+ .name = KBUILD_MODNAME,
+ .id_table = mlx5_data_direct_pci_table,
+ .probe = mlx5_data_direct_probe,
+ .remove = mlx5_data_direct_remove,
+ .shutdown = mlx5_data_direct_shutdown,
+};
+
+int mlx5_data_direct_driver_register(void)
+{
+ return pci_register_driver(&mlx5_data_direct_driver);
+}
+
+void mlx5_data_direct_driver_unregister(void)
+{
+ pci_unregister_driver(&mlx5_data_direct_driver);
+}
diff --git a/drivers/infiniband/hw/mlx5/data_direct.h b/drivers/infiniband/hw/mlx5/data_direct.h
new file mode 100644
index 000000000000..2fd2bdbe8f69
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/data_direct.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved
+ */
+
+#ifndef _MLX5_IB_DATA_DIRECT_H
+#define _MLX5_IB_DATA_DIRECT_H
+
+struct mlx5_ib_dev;
+
+struct mlx5_data_direct_dev {
+ struct device *device;
+ struct pci_dev *pdev;
+ char *vuid;
+ struct list_head list;
+};
+
+int mlx5_data_direct_ib_reg(struct mlx5_ib_dev *ibdev, char *vuid);
+void mlx5_data_direct_ib_unreg(struct mlx5_ib_dev *ibdev);
+int mlx5_data_direct_driver_register(void);
+void mlx5_data_direct_driver_unregister(void);
+
+#endif
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index db5fb196c728..69999d8d24f3 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -1002,7 +1002,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)(
return PTR_ERR(c);
dev = to_mdev(c->ibucontext.device);
- err = mlx5_vector2eqn(dev->mdev, user_vector, &dev_eqn);
+ err = mlx5_comp_eqn_get(dev->mdev, user_vector, &dev_eqn);
if (err < 0)
return err;
@@ -2498,7 +2498,7 @@ static void dispatch_event_fd(struct list_head *fd_list,
list_for_each_entry_rcu(item, fd_list, xa_list) {
if (item->eventfd)
- eventfd_signal(item->eventfd, 1);
+ eventfd_signal(item->eventfd);
else
deliver_event(item, data);
}
@@ -2673,7 +2673,6 @@ static const struct file_operations devx_async_cmd_event_fops = {
.read = devx_async_cmd_event_read,
.poll = devx_async_cmd_event_poll,
.release = uverbs_uobject_fd_release,
- .llseek = no_llseek,
};
static ssize_t devx_async_event_read(struct file *filp, char __user *buf,
@@ -2788,7 +2787,6 @@ static const struct file_operations devx_async_event_fops = {
.read = devx_async_event_read,
.poll = devx_async_event_poll,
.release = uverbs_uobject_fd_release,
- .llseek = no_llseek,
};
static void devx_async_cmd_event_destroy_uobj(struct ib_uobject *uobj,
@@ -2949,7 +2947,7 @@ DECLARE_UVERBS_NAMED_METHOD(
MLX5_IB_METHOD_DEVX_OBJ_MODIFY,
UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE,
UVERBS_IDR_ANY_OBJECT,
- UVERBS_ACCESS_WRITE,
+ UVERBS_ACCESS_READ,
UA_MANDATORY),
UVERBS_ATTR_PTR_IN(
MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN,
diff --git a/drivers/infiniband/hw/mlx5/dm.c b/drivers/infiniband/hw/mlx5/dm.c
index 3669c90b2dad..b4c97fb62abf 100644
--- a/drivers/infiniband/hw/mlx5/dm.c
+++ b/drivers/infiniband/hw/mlx5/dm.c
@@ -341,6 +341,8 @@ static enum mlx5_sw_icm_type get_icm_type(int uapi_type)
return MLX5_SW_ICM_TYPE_HEADER_MODIFY;
case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM:
return MLX5_SW_ICM_TYPE_HEADER_MODIFY_PATTERN;
+ case MLX5_IB_UAPI_DM_TYPE_ENCAP_SW_ICM:
+ return MLX5_SW_ICM_TYPE_SW_ENCAP;
case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
default:
return MLX5_SW_ICM_TYPE_STEERING;
@@ -364,6 +366,7 @@ static struct ib_dm *handle_alloc_dm_sw_icm(struct ib_ucontext *ctx,
switch (type) {
case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
+ case MLX5_IB_UAPI_DM_TYPE_ENCAP_SW_ICM:
if (!(MLX5_CAP_FLOWTABLE_NIC_RX(dev, sw_owner) ||
MLX5_CAP_FLOWTABLE_NIC_TX(dev, sw_owner) ||
MLX5_CAP_FLOWTABLE_NIC_RX(dev, sw_owner_v2) ||
@@ -438,6 +441,7 @@ struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM:
+ case MLX5_IB_UAPI_DM_TYPE_ENCAP_SW_ICM:
return handle_alloc_dm_sw_icm(context, attr, attrs, type);
default:
return ERR_PTR(-EOPNOTSUPP);
@@ -491,6 +495,7 @@ static int mlx5_ib_dealloc_dm(struct ib_dm *ibdm,
case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM:
+ case MLX5_IB_UAPI_DM_TYPE_ENCAP_SW_ICM:
return mlx5_dm_icm_dealloc(ctx, to_icm(ibdm));
default:
return -EOPNOTSUPP;
diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c
index 1e419e080b53..520034acf73a 100644
--- a/drivers/infiniband/hw/mlx5/fs.c
+++ b/drivers/infiniband/hw/mlx5/fs.c
@@ -2470,8 +2470,8 @@ destroy_res:
mlx5_steering_anchor_destroy_res(ft_prio);
put_flow_table:
put_flow_table(dev, ft_prio, true);
- mutex_unlock(&dev->flow_db->lock);
free_obj:
+ mutex_unlock(&dev->flow_db->lock);
kfree(obj);
return err;
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c
index c7a4ee896121..49af1cfbe6d1 100644
--- a/drivers/infiniband/hw/mlx5/ib_rep.c
+++ b/drivers/infiniband/hw/mlx5/ib_rep.c
@@ -13,6 +13,7 @@ mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev,
int vport_index)
{
struct mlx5_ib_dev *ibdev;
+ struct net_device *ndev;
ibdev = mlx5_eswitch_uplink_get_proto_dev(dev->priv.eswitch, REP_IB);
if (!ibdev)
@@ -20,12 +21,9 @@ mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev,
ibdev->port[vport_index].rep = rep;
rep->rep_data[REP_IB].priv = ibdev;
- write_lock(&ibdev->port[vport_index].roce.netdev_lock);
- ibdev->port[vport_index].roce.netdev =
- mlx5_ib_get_rep_netdev(rep->esw, rep->vport);
- write_unlock(&ibdev->port[vport_index].roce.netdev_lock);
+ ndev = mlx5_ib_get_rep_netdev(rep->esw, rep->vport);
- return 0;
+ return ib_device_set_netdev(&ibdev->ib_dev, ndev, vport_index + 1);
}
static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev);
@@ -104,10 +102,15 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
ibdev->is_rep = true;
vport_index = rep->vport_index;
ibdev->port[vport_index].rep = rep;
- ibdev->port[vport_index].roce.netdev =
- mlx5_ib_get_rep_netdev(lag_master->priv.eswitch, rep->vport);
ibdev->mdev = lag_master;
ibdev->num_ports = num_ports;
+ ibdev->ib_dev.phys_port_cnt = num_ports;
+ ret = ib_device_set_netdev(&ibdev->ib_dev,
+ mlx5_ib_get_rep_netdev(lag_master->priv.eswitch,
+ rep->vport),
+ vport_index + 1);
+ if (ret)
+ goto fail_add;
ret = __mlx5_ib_add(ibdev, profile);
if (ret)
@@ -160,9 +163,8 @@ mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
}
port = &dev->port[vport_index];
- write_lock(&port->roce.netdev_lock);
- port->roce.netdev = NULL;
- write_unlock(&port->roce.netdev_lock);
+
+ ib_device_set_netdev(&dev->ib_dev, NULL, vport_index + 1);
rep->rep_data[REP_IB].priv = NULL;
port->rep = NULL;
diff --git a/drivers/infiniband/hw/mlx5/macsec.c b/drivers/infiniband/hw/mlx5/macsec.c
new file mode 100644
index 000000000000..3c56eb5eddf3
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/macsec.c
@@ -0,0 +1,364 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. */
+
+#include "macsec.h"
+#include <linux/mlx5/macsec.h>
+
+struct mlx5_reserved_gids {
+ int macsec_index;
+ const struct ib_gid_attr *physical_gid;
+};
+
+struct mlx5_roce_gids {
+ struct list_head roce_gid_list_entry;
+ u16 gid_idx;
+ union {
+ struct sockaddr_in sockaddr_in;
+ struct sockaddr_in6 sockaddr_in6;
+ } addr;
+};
+
+struct mlx5_macsec_device {
+ struct list_head macsec_devices_list_entry;
+ void *macdev;
+ struct list_head macsec_roce_gids;
+ struct list_head tx_rules_list;
+ struct list_head rx_rules_list;
+};
+
+static void cleanup_macsec_device(struct mlx5_macsec_device *macsec_device)
+{
+ if (!list_empty(&macsec_device->tx_rules_list) ||
+ !list_empty(&macsec_device->rx_rules_list) ||
+ !list_empty(&macsec_device->macsec_roce_gids))
+ return;
+
+ list_del(&macsec_device->macsec_devices_list_entry);
+ kfree(macsec_device);
+}
+
+static struct mlx5_macsec_device *get_macsec_device(void *macdev,
+ struct list_head *macsec_devices_list)
+{
+ struct mlx5_macsec_device *iter, *macsec_device = NULL;
+
+ list_for_each_entry(iter, macsec_devices_list, macsec_devices_list_entry) {
+ if (iter->macdev == macdev) {
+ macsec_device = iter;
+ break;
+ }
+ }
+
+ if (macsec_device)
+ return macsec_device;
+
+ macsec_device = kzalloc(sizeof(*macsec_device), GFP_KERNEL);
+ if (!macsec_device)
+ return NULL;
+
+ macsec_device->macdev = macdev;
+ INIT_LIST_HEAD(&macsec_device->tx_rules_list);
+ INIT_LIST_HEAD(&macsec_device->rx_rules_list);
+ INIT_LIST_HEAD(&macsec_device->macsec_roce_gids);
+ list_add(&macsec_device->macsec_devices_list_entry, macsec_devices_list);
+
+ return macsec_device;
+}
+
+static void mlx5_macsec_del_roce_gid(struct mlx5_macsec_device *macsec_device, u16 gid_idx)
+{
+ struct mlx5_roce_gids *current_gid, *next_gid;
+
+ list_for_each_entry_safe(current_gid, next_gid, &macsec_device->macsec_roce_gids,
+ roce_gid_list_entry)
+ if (current_gid->gid_idx == gid_idx) {
+ list_del(&current_gid->roce_gid_list_entry);
+ kfree(current_gid);
+ }
+}
+
+static void mlx5_macsec_save_roce_gid(struct mlx5_macsec_device *macsec_device,
+ const struct sockaddr *addr, u16 gid_idx)
+{
+ struct mlx5_roce_gids *roce_gids;
+
+ roce_gids = kzalloc(sizeof(*roce_gids), GFP_KERNEL);
+ if (!roce_gids)
+ return;
+
+ roce_gids->gid_idx = gid_idx;
+ if (addr->sa_family == AF_INET)
+ memcpy(&roce_gids->addr.sockaddr_in, addr, sizeof(roce_gids->addr.sockaddr_in));
+ else
+ memcpy(&roce_gids->addr.sockaddr_in6, addr, sizeof(roce_gids->addr.sockaddr_in6));
+
+ list_add_tail(&roce_gids->roce_gid_list_entry, &macsec_device->macsec_roce_gids);
+}
+
+static void handle_macsec_gids(struct list_head *macsec_devices_list,
+ struct mlx5_macsec_event_data *data)
+{
+ struct mlx5_macsec_device *macsec_device;
+ struct mlx5_roce_gids *gid;
+
+ macsec_device = get_macsec_device(data->macdev, macsec_devices_list);
+ if (!macsec_device)
+ return;
+
+ list_for_each_entry(gid, &macsec_device->macsec_roce_gids, roce_gid_list_entry) {
+ mlx5_macsec_add_roce_sa_rules(data->fs_id, (struct sockaddr *)&gid->addr,
+ gid->gid_idx, &macsec_device->tx_rules_list,
+ &macsec_device->rx_rules_list, data->macsec_fs,
+ data->is_tx);
+ }
+}
+
+static void del_sa_roce_rule(struct list_head *macsec_devices_list,
+ struct mlx5_macsec_event_data *data)
+{
+ struct mlx5_macsec_device *macsec_device;
+
+ macsec_device = get_macsec_device(data->macdev, macsec_devices_list);
+ WARN_ON(!macsec_device);
+
+ mlx5_macsec_del_roce_sa_rules(data->fs_id, data->macsec_fs,
+ &macsec_device->tx_rules_list,
+ &macsec_device->rx_rules_list, data->is_tx);
+}
+
+static int macsec_event(struct notifier_block *nb, unsigned long event, void *data)
+{
+ struct mlx5_macsec *macsec = container_of(nb, struct mlx5_macsec, blocking_events_nb);
+
+ mutex_lock(&macsec->lock);
+ switch (event) {
+ case MLX5_DRIVER_EVENT_MACSEC_SA_ADDED:
+ handle_macsec_gids(&macsec->macsec_devices_list, data);
+ break;
+ case MLX5_DRIVER_EVENT_MACSEC_SA_DELETED:
+ del_sa_roce_rule(&macsec->macsec_devices_list, data);
+ break;
+ default:
+ mutex_unlock(&macsec->lock);
+ return NOTIFY_DONE;
+ }
+ mutex_unlock(&macsec->lock);
+ return NOTIFY_OK;
+}
+
+void mlx5r_macsec_event_register(struct mlx5_ib_dev *dev)
+{
+ if (!mlx5_is_macsec_roce_supported(dev->mdev)) {
+ mlx5_ib_dbg(dev, "RoCE MACsec not supported due to capabilities\n");
+ return;
+ }
+
+ dev->macsec.blocking_events_nb.notifier_call = macsec_event;
+ blocking_notifier_chain_register(&dev->mdev->macsec_nh,
+ &dev->macsec.blocking_events_nb);
+}
+
+void mlx5r_macsec_event_unregister(struct mlx5_ib_dev *dev)
+{
+ if (!mlx5_is_macsec_roce_supported(dev->mdev)) {
+ mlx5_ib_dbg(dev, "RoCE MACsec not supported due to capabilities\n");
+ return;
+ }
+
+ blocking_notifier_chain_unregister(&dev->mdev->macsec_nh,
+ &dev->macsec.blocking_events_nb);
+}
+
+int mlx5r_macsec_init_gids_and_devlist(struct mlx5_ib_dev *dev)
+{
+ int i, j, max_gids;
+
+ if (!mlx5_is_macsec_roce_supported(dev->mdev)) {
+ mlx5_ib_dbg(dev, "RoCE MACsec not supported due to capabilities\n");
+ return 0;
+ }
+
+ max_gids = MLX5_CAP_ROCE(dev->mdev, roce_address_table_size);
+ for (i = 0; i < dev->num_ports; i++) {
+ dev->port[i].reserved_gids = kcalloc(max_gids,
+ sizeof(*dev->port[i].reserved_gids),
+ GFP_KERNEL);
+ if (!dev->port[i].reserved_gids)
+ goto err;
+
+ for (j = 0; j < max_gids; j++)
+ dev->port[i].reserved_gids[j].macsec_index = -1;
+ }
+
+ INIT_LIST_HEAD(&dev->macsec.macsec_devices_list);
+ mutex_init(&dev->macsec.lock);
+
+ return 0;
+err:
+ while (i >= 0) {
+ kfree(dev->port[i].reserved_gids);
+ i--;
+ }
+ return -ENOMEM;
+}
+
+void mlx5r_macsec_dealloc_gids(struct mlx5_ib_dev *dev)
+{
+ int i;
+
+ if (!mlx5_is_macsec_roce_supported(dev->mdev))
+ mlx5_ib_dbg(dev, "RoCE MACsec not supported due to capabilities\n");
+
+ for (i = 0; i < dev->num_ports; i++)
+ kfree(dev->port[i].reserved_gids);
+
+ mutex_destroy(&dev->macsec.lock);
+}
+
+int mlx5r_add_gid_macsec_operations(const struct ib_gid_attr *attr)
+{
+ struct mlx5_ib_dev *dev = to_mdev(attr->device);
+ struct mlx5_macsec_device *macsec_device;
+ const struct ib_gid_attr *physical_gid;
+ struct mlx5_reserved_gids *mgids;
+ struct net_device *ndev;
+ int ret = 0;
+ union {
+ struct sockaddr_in sockaddr_in;
+ struct sockaddr_in6 sockaddr_in6;
+ } addr;
+
+ if (attr->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
+ return 0;
+
+ if (!mlx5_is_macsec_roce_supported(dev->mdev)) {
+ mlx5_ib_dbg(dev, "RoCE MACsec not supported due to capabilities\n");
+ return 0;
+ }
+
+ rcu_read_lock();
+ ndev = rcu_dereference(attr->ndev);
+ if (!ndev) {
+ rcu_read_unlock();
+ return -ENODEV;
+ }
+
+ if (!netif_is_macsec(ndev) || !macsec_netdev_is_offloaded(ndev)) {
+ rcu_read_unlock();
+ return 0;
+ }
+ dev_hold(ndev);
+ rcu_read_unlock();
+
+ mutex_lock(&dev->macsec.lock);
+ macsec_device = get_macsec_device(ndev, &dev->macsec.macsec_devices_list);
+ if (!macsec_device) {
+ ret = -ENOMEM;
+ goto dev_err;
+ }
+
+ physical_gid = rdma_find_gid(attr->device, &attr->gid,
+ attr->gid_type, NULL);
+ if (!IS_ERR(physical_gid)) {
+ ret = set_roce_addr(to_mdev(physical_gid->device),
+ physical_gid->port_num,
+ physical_gid->index, NULL,
+ physical_gid);
+ if (ret)
+ goto gid_err;
+
+ mgids = &dev->port[attr->port_num - 1].reserved_gids[physical_gid->index];
+ mgids->macsec_index = attr->index;
+ mgids->physical_gid = physical_gid;
+ }
+
+ /* Proceed with adding steering rules, regardless if there was gid ambiguity or not.*/
+ rdma_gid2ip((struct sockaddr *)&addr, &attr->gid);
+ ret = mlx5_macsec_add_roce_rule(ndev, (struct sockaddr *)&addr, attr->index,
+ &macsec_device->tx_rules_list,
+ &macsec_device->rx_rules_list, dev->mdev->macsec_fs);
+ if (ret && !IS_ERR(physical_gid))
+ goto rule_err;
+
+ mlx5_macsec_save_roce_gid(macsec_device, (struct sockaddr *)&addr, attr->index);
+
+ dev_put(ndev);
+ mutex_unlock(&dev->macsec.lock);
+ return ret;
+
+rule_err:
+ set_roce_addr(to_mdev(physical_gid->device), physical_gid->port_num,
+ physical_gid->index, &physical_gid->gid, physical_gid);
+ mgids->macsec_index = -1;
+gid_err:
+ rdma_put_gid_attr(physical_gid);
+ cleanup_macsec_device(macsec_device);
+dev_err:
+ dev_put(ndev);
+ mutex_unlock(&dev->macsec.lock);
+ return ret;
+}
+
+void mlx5r_del_gid_macsec_operations(const struct ib_gid_attr *attr)
+{
+ struct mlx5_ib_dev *dev = to_mdev(attr->device);
+ struct mlx5_macsec_device *macsec_device;
+ struct mlx5_reserved_gids *mgids;
+ struct net_device *ndev;
+ int i, max_gids;
+
+ if (attr->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
+ return;
+
+ if (!mlx5_is_macsec_roce_supported(dev->mdev)) {
+ mlx5_ib_dbg(dev, "RoCE MACsec not supported due to capabilities\n");
+ return;
+ }
+
+ mgids = &dev->port[attr->port_num - 1].reserved_gids[attr->index];
+ if (mgids->macsec_index != -1) { /* Checking if physical gid has ambiguous IP */
+ rdma_put_gid_attr(mgids->physical_gid);
+ mgids->macsec_index = -1;
+ return;
+ }
+
+ rcu_read_lock();
+ ndev = rcu_dereference(attr->ndev);
+ if (!ndev) {
+ rcu_read_unlock();
+ return;
+ }
+
+ if (!netif_is_macsec(ndev) || !macsec_netdev_is_offloaded(ndev)) {
+ rcu_read_unlock();
+ return;
+ }
+ dev_hold(ndev);
+ rcu_read_unlock();
+
+ mutex_lock(&dev->macsec.lock);
+ max_gids = MLX5_CAP_ROCE(dev->mdev, roce_address_table_size);
+ for (i = 0; i < max_gids; i++) { /* Checking if macsec gid has ambiguous IP */
+ mgids = &dev->port[attr->port_num - 1].reserved_gids[i];
+ if (mgids->macsec_index == attr->index) {
+ const struct ib_gid_attr *physical_gid = mgids->physical_gid;
+
+ set_roce_addr(to_mdev(physical_gid->device),
+ physical_gid->port_num,
+ physical_gid->index,
+ &physical_gid->gid, physical_gid);
+
+ rdma_put_gid_attr(physical_gid);
+ mgids->macsec_index = -1;
+ break;
+ }
+ }
+ macsec_device = get_macsec_device(ndev, &dev->macsec.macsec_devices_list);
+ mlx5_macsec_del_roce_rule(attr->index, dev->mdev->macsec_fs,
+ &macsec_device->tx_rules_list, &macsec_device->rx_rules_list);
+ mlx5_macsec_del_roce_gid(macsec_device, attr->index);
+ cleanup_macsec_device(macsec_device);
+
+ dev_put(ndev);
+ mutex_unlock(&dev->macsec.lock);
+}
diff --git a/drivers/infiniband/hw/mlx5/macsec.h b/drivers/infiniband/hw/mlx5/macsec.h
new file mode 100644
index 000000000000..9b77ba90f0f4
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/macsec.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. */
+
+#ifndef __MLX5_MACSEC_H__
+#define __MLX5_MACSEC_H__
+
+#include <net/macsec.h>
+#include <rdma/ib_cache.h>
+#include <rdma/ib_addr.h>
+#include "mlx5_ib.h"
+
+#ifdef CONFIG_MLX5_MACSEC
+struct mlx5_reserved_gids;
+
+int mlx5r_add_gid_macsec_operations(const struct ib_gid_attr *attr);
+void mlx5r_del_gid_macsec_operations(const struct ib_gid_attr *attr);
+int mlx5r_macsec_init_gids_and_devlist(struct mlx5_ib_dev *dev);
+void mlx5r_macsec_dealloc_gids(struct mlx5_ib_dev *dev);
+void mlx5r_macsec_event_register(struct mlx5_ib_dev *dev);
+void mlx5r_macsec_event_unregister(struct mlx5_ib_dev *dev);
+#else
+static inline int mlx5r_add_gid_macsec_operations(const struct ib_gid_attr *attr) { return 0; }
+static inline void mlx5r_del_gid_macsec_operations(const struct ib_gid_attr *attr) {}
+static inline int mlx5r_macsec_init_gids_and_devlist(struct mlx5_ib_dev *dev) { return 0; }
+static inline void mlx5r_macsec_dealloc_gids(struct mlx5_ib_dev *dev) {}
+static inline void mlx5r_macsec_event_register(struct mlx5_ib_dev *dev) {}
+static inline void mlx5r_macsec_event_unregister(struct mlx5_ib_dev *dev) {}
+#endif
+#endif /* __MLX5_MACSEC_H__ */
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index 9c8a7b206dcf..1b6c5e37d169 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -69,7 +69,7 @@ static int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey,
if (ignore_bkey || !in_wc)
op_modifier |= 0x2;
- return mlx5_cmd_mad_ifc(dev->mdev, in_mad, response_mad, op_modifier,
+ return mlx5_cmd_mad_ifc(dev, in_mad, response_mad, op_modifier,
port);
}
@@ -147,8 +147,39 @@ static void pma_cnt_assign(struct ib_pma_portcounters *pma_cnt,
vl_15_dropped);
}
-static int query_ib_ppcnt(struct mlx5_core_dev *dev, u8 port_num, void *out,
- size_t sz)
+static void pma_cnt_ext_assign_ppcnt(struct ib_pma_portcounters_ext *cnt_ext,
+ void *out)
+{
+ void *out_pma = MLX5_ADDR_OF(ppcnt_reg, out,
+ counter_set);
+
+#define MLX5_GET_EXT_CNTR(counter_name) \
+ MLX5_GET64(ib_ext_port_cntrs_grp_data_layout, \
+ out_pma, counter_name##_high)
+
+ cnt_ext->port_xmit_data =
+ cpu_to_be64(MLX5_GET_EXT_CNTR(port_xmit_data) >> 2);
+ cnt_ext->port_rcv_data =
+ cpu_to_be64(MLX5_GET_EXT_CNTR(port_rcv_data) >> 2);
+
+ cnt_ext->port_xmit_packets =
+ cpu_to_be64(MLX5_GET_EXT_CNTR(port_xmit_pkts));
+ cnt_ext->port_rcv_packets =
+ cpu_to_be64(MLX5_GET_EXT_CNTR(port_rcv_pkts));
+
+ cnt_ext->port_unicast_xmit_packets =
+ cpu_to_be64(MLX5_GET_EXT_CNTR(port_unicast_xmit_pkts));
+ cnt_ext->port_unicast_rcv_packets =
+ cpu_to_be64(MLX5_GET_EXT_CNTR(port_unicast_rcv_pkts));
+
+ cnt_ext->port_multicast_xmit_packets =
+ cpu_to_be64(MLX5_GET_EXT_CNTR(port_multicast_xmit_pkts));
+ cnt_ext->port_multicast_rcv_packets =
+ cpu_to_be64(MLX5_GET_EXT_CNTR(port_multicast_rcv_pkts));
+}
+
+static int query_ib_ppcnt(struct mlx5_core_dev *dev, u8 port_num, u8 plane_num,
+ void *out, size_t sz, bool ext)
{
u32 *in;
int err;
@@ -160,8 +191,14 @@ static int query_ib_ppcnt(struct mlx5_core_dev *dev, u8 port_num, void *out,
}
MLX5_SET(ppcnt_reg, in, local_port, port_num);
-
- MLX5_SET(ppcnt_reg, in, grp, MLX5_INFINIBAND_PORT_COUNTERS_GROUP);
+ MLX5_SET(ppcnt_reg, in, plane_ind, plane_num);
+
+ if (ext)
+ MLX5_SET(ppcnt_reg, in, grp,
+ MLX5_INFINIBAND_EXTENDED_PORT_COUNTERS_GROUP);
+ else
+ MLX5_SET(ppcnt_reg, in, grp,
+ MLX5_INFINIBAND_PORT_COUNTERS_GROUP);
err = mlx5_core_access_reg(dev, in, sz, out,
sz, MLX5_REG_PPCNT, 0, 0);
@@ -188,7 +225,9 @@ static int process_pma_cmd(struct mlx5_ib_dev *dev, u32 port_num,
mdev = dev->mdev;
mdev_port_num = 1;
}
- if (MLX5_CAP_GEN(dev->mdev, num_ports) == 1) {
+ if (MLX5_CAP_GEN(dev->mdev, num_ports) == 1 &&
+ !mlx5_core_mp_enabled(mdev) &&
+ dev->ib_dev.type != RDMA_DEVICE_TYPE_SMI) {
/* set local port to one for Function-Per-Port HCA. */
mdev = dev->mdev;
mdev_port_num = 1;
@@ -207,7 +246,8 @@ static int process_pma_cmd(struct mlx5_ib_dev *dev, u32 port_num,
if (in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS_EXT) {
struct ib_pma_portcounters_ext *pma_cnt_ext =
(struct ib_pma_portcounters_ext *)(out_mad->data + 40);
- int sz = MLX5_ST_SZ_BYTES(query_vport_counter_out);
+ int sz = max(MLX5_ST_SZ_BYTES(query_vport_counter_out),
+ MLX5_ST_SZ_BYTES(ppcnt_reg));
out_cnt = kvzalloc(sz, GFP_KERNEL);
if (!out_cnt) {
@@ -215,10 +255,18 @@ static int process_pma_cmd(struct mlx5_ib_dev *dev, u32 port_num,
goto done;
}
- err = mlx5_core_query_vport_counter(mdev, 0, 0, mdev_port_num,
- out_cnt);
- if (!err)
- pma_cnt_ext_assign(pma_cnt_ext, out_cnt);
+ if (dev->ib_dev.type == RDMA_DEVICE_TYPE_SMI) {
+ err = query_ib_ppcnt(mdev, mdev_port_num,
+ port_num, out_cnt, sz, 1);
+ if (!err)
+ pma_cnt_ext_assign_ppcnt(pma_cnt_ext, out_cnt);
+ } else {
+ err = mlx5_core_query_vport_counter(mdev, 0, 0,
+ mdev_port_num,
+ out_cnt);
+ if (!err)
+ pma_cnt_ext_assign(pma_cnt_ext, out_cnt);
+ }
} else {
struct ib_pma_portcounters *pma_cnt =
(struct ib_pma_portcounters *)(out_mad->data + 40);
@@ -230,7 +278,7 @@ static int process_pma_cmd(struct mlx5_ib_dev *dev, u32 port_num,
goto done;
}
- err = query_ib_ppcnt(mdev, mdev_port_num, out_cnt, sz);
+ err = query_ib_ppcnt(mdev, mdev_port_num, 0, out_cnt, sz, 0);
if (!err)
pma_cnt_assign(pma_cnt, out_cnt);
}
@@ -308,8 +356,8 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u32 port_num,
int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, unsigned int port)
{
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
+ struct ib_smp *in_mad;
+ struct ib_smp *out_mad;
int err = -ENOMEM;
u16 packet_error;
@@ -338,8 +386,8 @@ out:
static int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev,
struct ib_smp *out_mad)
{
- struct ib_smp *in_mad = NULL;
- int err = -ENOMEM;
+ struct ib_smp *in_mad;
+ int err;
in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
if (!in_mad)
@@ -358,8 +406,8 @@ static int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev,
int mlx5_query_mad_ifc_system_image_guid(struct ib_device *ibdev,
__be64 *sys_image_guid)
{
- struct ib_smp *out_mad = NULL;
- int err = -ENOMEM;
+ struct ib_smp *out_mad;
+ int err;
out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
if (!out_mad)
@@ -380,8 +428,8 @@ out:
int mlx5_query_mad_ifc_max_pkeys(struct ib_device *ibdev,
u16 *max_pkeys)
{
- struct ib_smp *out_mad = NULL;
- int err = -ENOMEM;
+ struct ib_smp *out_mad;
+ int err;
out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
if (!out_mad)
@@ -402,8 +450,8 @@ out:
int mlx5_query_mad_ifc_vendor_id(struct ib_device *ibdev,
u32 *vendor_id)
{
- struct ib_smp *out_mad = NULL;
- int err = -ENOMEM;
+ struct ib_smp *out_mad;
+ int err;
out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
if (!out_mad)
@@ -423,8 +471,8 @@ out:
int mlx5_query_mad_ifc_node_desc(struct mlx5_ib_dev *dev, char *node_desc)
{
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
+ struct ib_smp *in_mad;
+ struct ib_smp *out_mad;
int err = -ENOMEM;
in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
@@ -448,8 +496,8 @@ out:
int mlx5_query_mad_ifc_node_guid(struct mlx5_ib_dev *dev, __be64 *node_guid)
{
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
+ struct ib_smp *in_mad;
+ struct ib_smp *out_mad;
int err = -ENOMEM;
in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
@@ -474,8 +522,8 @@ out:
int mlx5_query_mad_ifc_pkey(struct ib_device *ibdev, u32 port, u16 index,
u16 *pkey)
{
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
+ struct ib_smp *in_mad;
+ struct ib_smp *out_mad;
int err = -ENOMEM;
in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
@@ -503,8 +551,8 @@ out:
int mlx5_query_mad_ifc_gids(struct ib_device *ibdev, u32 port, int index,
union ib_gid *gid)
{
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
+ struct ib_smp *in_mad;
+ struct ib_smp *out_mad;
int err = -ENOMEM;
in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
@@ -545,8 +593,8 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u32 port,
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_core_dev *mdev = dev->mdev;
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
+ struct ib_smp *in_mad;
+ struct ib_smp *out_mad;
int ext_active_speed;
int err = -ENOMEM;
@@ -619,6 +667,19 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u32 port,
}
}
+ /* Check if extended speeds 2 (XDR/...) are supported */
+ if (props->port_cap_flags & IB_PORT_CAP_MASK2_SUP &&
+ props->port_cap_flags2 & IB_PORT_EXTENDED_SPEEDS2_SUP) {
+ ext_active_speed = (out_mad->data[56] >> 4) & 0x6;
+
+ switch (ext_active_speed) {
+ case 2:
+ if (props->port_cap_flags2 & IB_PORT_LINK_SPEED_XDR_SUP)
+ props->active_speed = IB_SPEED_XDR;
+ break;
+ }
+ }
+
/* If reported active speed is QDR, check if is FDR-10 */
if (props->active_speed == 4) {
if (dev->port_caps[port - 1].ext_port_cap &
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index f0b394ed7452..4999239c8f41 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -24,6 +24,7 @@
#include <linux/mlx5/vport.h>
#include <linux/mlx5/fs.h>
#include <linux/mlx5/eswitch.h>
+#include <linux/mlx5/driver.h>
#include <linux/list.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_umem_odp.h>
@@ -46,6 +47,8 @@
#include <rdma/uverbs_ioctl.h>
#include <rdma/mlx5_user_ioctl_verbs.h>
#include <rdma/mlx5_user_ioctl_cmds.h>
+#include "macsec.h"
+#include "data_direct.h"
#define UVERBS_MODULE_NAME mlx5_ib
#include <rdma/uverbs_named_ioctl.h>
@@ -144,16 +147,52 @@ static struct mlx5_roce *mlx5_get_rep_roce(struct mlx5_ib_dev *dev,
if (upper && port->rep->vport == MLX5_VPORT_UPLINK)
continue;
-
- read_lock(&port->roce.netdev_lock);
- rep_ndev = mlx5_ib_get_rep_netdev(port->rep->esw,
- port->rep->vport);
- if (rep_ndev == ndev) {
- read_unlock(&port->roce.netdev_lock);
+ rep_ndev = ib_device_get_netdev(&dev->ib_dev, i + 1);
+ if (rep_ndev && rep_ndev == ndev) {
+ dev_put(rep_ndev);
*port_num = i + 1;
return &port->roce;
}
- read_unlock(&port->roce.netdev_lock);
+
+ dev_put(rep_ndev);
+ }
+
+ return NULL;
+}
+
+static bool mlx5_netdev_send_event(struct mlx5_ib_dev *dev,
+ struct net_device *ndev,
+ struct net_device *upper,
+ struct net_device *ib_ndev)
+{
+ if (!dev->ib_active)
+ return false;
+
+ /* Event is about our upper device */
+ if (upper == ndev)
+ return true;
+
+ /* RDMA device is not in lag and not in switchdev */
+ if (!dev->is_rep && !upper && ndev == ib_ndev)
+ return true;
+
+ /* RDMA devie is in switchdev */
+ if (dev->is_rep && ndev == ib_ndev)
+ return true;
+
+ return false;
+}
+
+static struct net_device *mlx5_ib_get_rep_uplink_netdev(struct mlx5_ib_dev *ibdev)
+{
+ struct mlx5_ib_port *port;
+ int i;
+
+ for (i = 0; i < ibdev->num_ports; i++) {
+ port = &ibdev->port[i];
+ if (port->rep && port->rep->vport == MLX5_VPORT_UPLINK) {
+ return ib_device_get_netdev(&ibdev->ib_dev, i + 1);
+ }
}
return NULL;
@@ -165,6 +204,7 @@ static int mlx5_netdev_event(struct notifier_block *this,
struct mlx5_roce *roce = container_of(this, struct mlx5_roce, nb);
struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
u32 port_num = roce->native_port_num;
+ struct net_device *ib_ndev = NULL;
struct mlx5_core_dev *mdev;
struct mlx5_ib_dev *ibdev;
@@ -178,47 +218,63 @@ static int mlx5_netdev_event(struct notifier_block *this,
/* Should already be registered during the load */
if (ibdev->is_rep)
break;
- write_lock(&roce->netdev_lock);
+
+ ib_ndev = ib_device_get_netdev(&ibdev->ib_dev, port_num);
+ /* Exit if already registered */
+ if (ib_ndev)
+ goto put_ndev;
+
if (ndev->dev.parent == mdev->device)
- roce->netdev = ndev;
- write_unlock(&roce->netdev_lock);
+ ib_device_set_netdev(&ibdev->ib_dev, ndev, port_num);
break;
case NETDEV_UNREGISTER:
/* In case of reps, ib device goes away before the netdevs */
- write_lock(&roce->netdev_lock);
- if (roce->netdev == ndev)
- roce->netdev = NULL;
- write_unlock(&roce->netdev_lock);
- break;
+ if (ibdev->is_rep)
+ break;
+ ib_ndev = ib_device_get_netdev(&ibdev->ib_dev, port_num);
+ if (ib_ndev == ndev)
+ ib_device_set_netdev(&ibdev->ib_dev, NULL, port_num);
+ goto put_ndev;
case NETDEV_CHANGE:
case NETDEV_UP:
case NETDEV_DOWN: {
- struct net_device *lag_ndev = mlx5_lag_get_roce_netdev(mdev);
struct net_device *upper = NULL;
- if (lag_ndev) {
- upper = netdev_master_upper_dev_get(lag_ndev);
- dev_put(lag_ndev);
+ if (mlx5_lag_is_roce(mdev) || mlx5_lag_is_sriov(mdev)) {
+ struct net_device *lag_ndev;
+
+ if(mlx5_lag_is_roce(mdev))
+ lag_ndev = ib_device_get_netdev(&ibdev->ib_dev, 1);
+ else /* sriov lag */
+ lag_ndev = mlx5_ib_get_rep_uplink_netdev(ibdev);
+
+ if (lag_ndev) {
+ upper = netdev_master_upper_dev_get(lag_ndev);
+ dev_put(lag_ndev);
+ } else {
+ goto done;
+ }
}
if (ibdev->is_rep)
roce = mlx5_get_rep_roce(ibdev, ndev, upper, &port_num);
if (!roce)
return NOTIFY_DONE;
- if ((upper == ndev ||
- ((!upper || ibdev->is_rep) && ndev == roce->netdev)) &&
- ibdev->ib_active) {
+
+ ib_ndev = ib_device_get_netdev(&ibdev->ib_dev, port_num);
+
+ if (mlx5_netdev_send_event(ibdev, ndev, upper, ib_ndev)) {
struct ib_event ibev = { };
enum ib_port_state port_state;
if (get_port_state(&ibdev->ib_dev, port_num,
&port_state))
- goto done;
+ goto put_ndev;
if (roce->last_port_state == port_state)
- goto done;
+ goto put_ndev;
roce->last_port_state = port_state;
ibev.device = &ibdev->ib_dev;
@@ -227,7 +283,7 @@ static int mlx5_netdev_event(struct notifier_block *this,
else if (port_state == IB_PORT_ACTIVE)
ibev.event = IB_EVENT_PORT_ACTIVE;
else
- goto done;
+ goto put_ndev;
ibev.element.port_num = port_num;
ib_dispatch_event(&ibev);
@@ -238,39 +294,13 @@ static int mlx5_netdev_event(struct notifier_block *this,
default:
break;
}
+put_ndev:
+ dev_put(ib_ndev);
done:
mlx5_ib_put_native_port_mdev(ibdev, port_num);
return NOTIFY_DONE;
}
-static struct net_device *mlx5_ib_get_netdev(struct ib_device *device,
- u32 port_num)
-{
- struct mlx5_ib_dev *ibdev = to_mdev(device);
- struct net_device *ndev;
- struct mlx5_core_dev *mdev;
-
- mdev = mlx5_ib_get_native_port_mdev(ibdev, port_num, NULL);
- if (!mdev)
- return NULL;
-
- ndev = mlx5_lag_get_roce_netdev(mdev);
- if (ndev)
- goto out;
-
- /* Ensure ndev does not disappear before we invoke dev_hold()
- */
- read_lock(&ibdev->port[port_num - 1].roce.netdev_lock);
- ndev = ibdev->port[port_num - 1].roce.netdev;
- if (ndev)
- dev_hold(ndev);
- read_unlock(&ibdev->port[port_num - 1].roce.netdev_lock);
-
-out:
- mlx5_ib_put_native_port_mdev(ibdev, port_num);
- return ndev;
-}
-
struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *ibdev,
u32 ib_port_num,
u32 *native_port_num)
@@ -281,6 +311,14 @@ struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *ibdev,
struct mlx5_ib_multiport_info *mpi;
struct mlx5_ib_port *port;
+ if (ibdev->ib_dev.type == RDMA_DEVICE_TYPE_SMI) {
+ if (native_port_num)
+ *native_port_num = smi_to_native_portnum(ibdev,
+ ib_port_num);
+ return ibdev->mdev;
+
+ }
+
if (!mlx5_core_mp_enabled(ibdev->mdev) ||
ll != IB_LINK_LAYER_ETHERNET) {
if (native_port_num)
@@ -442,7 +480,7 @@ static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u16 *active_speed,
*active_width = IB_WIDTH_2X;
*active_speed = IB_SPEED_NDR;
break;
- case MLX5E_PROT_MASK(MLX5E_400GAUI_8):
+ case MLX5E_PROT_MASK(MLX5E_400GAUI_8_400GBASE_CR8):
*active_width = IB_WIDTH_8X;
*active_speed = IB_SPEED_HDR;
break;
@@ -450,6 +488,10 @@ static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u16 *active_speed,
*active_width = IB_WIDTH_4X;
*active_speed = IB_SPEED_NDR;
break;
+ case MLX5E_PROT_MASK(MLX5E_800GAUI_8_800GBASE_CR8_KR8):
+ *active_width = IB_WIDTH_8X;
+ *active_speed = IB_SPEED_NDR;
+ break;
default:
return -EINVAL;
}
@@ -498,10 +540,10 @@ static int mlx5_query_port_roce(struct ib_device *device, u32 port_num,
*/
if (dev->is_rep)
err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN,
- 1);
+ 1, 0);
else
err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN,
- mdev_port_num);
+ mdev_port_num, 0);
if (err)
goto out;
ext = !!MLX5_GET_ETH_PROTO(ptys_reg, out, true, eth_proto_capability);
@@ -533,11 +575,11 @@ static int mlx5_query_port_roce(struct ib_device *device, u32 port_num,
if (!put_mdev)
goto out;
- ndev = mlx5_ib_get_netdev(device, port_num);
+ ndev = ib_device_get_netdev(device, port_num);
if (!ndev)
goto out;
- if (dev->lag_active) {
+ if (mlx5_lag_is_roce(mdev) || mlx5_lag_is_sriov(mdev)) {
rcu_read_lock();
upper = netdev_master_upper_dev_get_rcu(ndev);
if (upper) {
@@ -564,9 +606,9 @@ out:
return err;
}
-static int set_roce_addr(struct mlx5_ib_dev *dev, u32 port_num,
- unsigned int index, const union ib_gid *gid,
- const struct ib_gid_attr *attr)
+int set_roce_addr(struct mlx5_ib_dev *dev, u32 port_num,
+ unsigned int index, const union ib_gid *gid,
+ const struct ib_gid_attr *attr)
{
enum ib_gid_type gid_type;
u16 vlan_id = 0xffff;
@@ -607,6 +649,12 @@ static int set_roce_addr(struct mlx5_ib_dev *dev, u32 port_num,
static int mlx5_ib_add_gid(const struct ib_gid_attr *attr,
__always_unused void **context)
{
+ int ret;
+
+ ret = mlx5r_add_gid_macsec_operations(attr);
+ if (ret)
+ return ret;
+
return set_roce_addr(to_mdev(attr->device), attr->port_num,
attr->index, &attr->gid, attr);
}
@@ -614,8 +662,15 @@ static int mlx5_ib_add_gid(const struct ib_gid_attr *attr,
static int mlx5_ib_del_gid(const struct ib_gid_attr *attr,
__always_unused void **context)
{
- return set_roce_addr(to_mdev(attr->device), attr->port_num,
- attr->index, NULL, attr);
+ int ret;
+
+ ret = set_roce_addr(to_mdev(attr->device), attr->port_num,
+ attr->index, NULL, attr);
+ if (ret)
+ return ret;
+
+ mlx5r_del_gid_macsec_operations(attr);
+ return 0;
}
__be16 mlx5_get_roce_udp_sport_min(const struct mlx5_ib_dev *dev,
@@ -799,6 +854,17 @@ static int mlx5_query_node_desc(struct mlx5_ib_dev *dev, char *node_desc)
MLX5_REG_NODE_DESC, 0, 0);
}
+static void fill_esw_mgr_reg_c0(struct mlx5_core_dev *mdev,
+ struct mlx5_ib_query_device_resp *resp)
+{
+ struct mlx5_eswitch *esw = mdev->priv.eswitch;
+ u16 vport = mlx5_eswitch_manager_vport(mdev);
+
+ resp->reg_c0.value = mlx5_eswitch_get_vport_metadata_for_match(esw,
+ vport);
+ resp->reg_c0.mask = mlx5_eswitch_get_vport_metadata_mask();
+}
+
static int mlx5_ib_query_device(struct ib_device *ibdev,
struct ib_device_attr *props,
struct ib_udata *uhw)
@@ -1190,6 +1256,19 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
MLX5_CAP_GEN(mdev, log_max_dci_errored_streams);
}
+ if (offsetofend(typeof(resp), reserved) <= uhw_outlen)
+ resp.response_length += sizeof(resp.reserved);
+
+ if (offsetofend(typeof(resp), reg_c0) <= uhw_outlen) {
+ struct mlx5_eswitch *esw = mdev->priv.eswitch;
+
+ resp.response_length += sizeof(resp.reg_c0);
+
+ if (mlx5_eswitch_mode(mdev) == MLX5_ESWITCH_OFFLOADS &&
+ mlx5_eswitch_vport_match_metadata_enabled(esw))
+ fill_esw_mgr_reg_c0(mdev, &resp);
+ }
+
if (uhw_outlen) {
err = ib_copy_to_udata(uhw, &resp, resp.response_length);
@@ -1291,11 +1370,11 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u32 port,
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_hca_vport_context *rep;
+ u8 vl_hw_cap, plane_index = 0;
u16 max_mtu;
u16 oper_mtu;
int err;
u16 ib_link_width_oper;
- u8 vl_hw_cap;
rep = kzalloc(sizeof(*rep), GFP_KERNEL);
if (!rep) {
@@ -1305,6 +1384,11 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u32 port,
/* props being zeroed by the caller, avoid zeroing it here */
+ if (ibdev->type == RDMA_DEVICE_TYPE_SMI) {
+ plane_index = port;
+ port = smi_to_native_portnum(dev, port);
+ }
+
err = mlx5_query_hca_vport_context(mdev, 0, port, 0, rep);
if (err)
goto out;
@@ -1315,7 +1399,14 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u32 port,
props->sm_sl = rep->sm_sl;
props->state = rep->vport_state;
props->phys_state = rep->port_physical_state;
- props->port_cap_flags = rep->cap_mask1;
+
+ props->port_cap_flags = rep->cap_mask1;
+ if (dev->num_plane) {
+ props->port_cap_flags |= IB_PORT_SM_DISABLED;
+ props->port_cap_flags &= ~IB_PORT_SM;
+ } else if (ibdev->type == RDMA_DEVICE_TYPE_SMI)
+ props->port_cap_flags &= ~IB_PORT_CM_SUP;
+
props->gid_tbl_len = mlx5_get_gid_table_len(MLX5_CAP_GEN(mdev, gid_table_size));
props->max_msg_sz = 1 << MLX5_CAP_GEN(mdev, log_max_msg);
props->pkey_tbl_len = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev, pkey_table_size));
@@ -1328,7 +1419,7 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u32 port,
props->port_cap_flags2 = rep->cap_mask2;
err = mlx5_query_ib_port_oper(mdev, &ib_link_width_oper,
- &props->active_speed, port);
+ &props->active_speed, port, plane_index);
if (err)
goto out;
@@ -1768,7 +1859,7 @@ static int set_ucontext_resp(struct ib_ucontext *uctx,
}
resp->qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
- if (dev->wc_support)
+ if (mlx5_wc_support_get(dev->mdev))
resp->bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev,
log_bf_reg_size);
resp->cache_line_size = cache_line_size();
@@ -2070,7 +2161,7 @@ static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd)
case MLX5_IB_MMAP_DEVICE_MEM:
return "Device Memory";
default:
- return NULL;
+ return "Unknown";
}
}
@@ -2295,7 +2386,7 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
switch (command) {
case MLX5_IB_MMAP_WC_PAGE:
case MLX5_IB_MMAP_ALLOC_WC:
- if (!dev->wc_support)
+ if (!mlx5_wc_support_get(dev->mdev))
return -EPERM;
fallthrough;
case MLX5_IB_MMAP_NC_PAGE:
@@ -2734,6 +2825,23 @@ static int mlx5_ib_event_slave_port(struct notifier_block *nb,
return NOTIFY_OK;
}
+static int mlx5_ib_get_plane_num(struct mlx5_core_dev *mdev, u8 *num_plane)
+{
+ struct mlx5_hca_vport_context vport_ctx;
+ int err;
+
+ *num_plane = 0;
+ if (!MLX5_CAP_GEN(mdev, ib_virt))
+ return 0;
+
+ err = mlx5_query_hca_vport_context(mdev, 0, 1, 0, &vport_ctx);
+ if (err)
+ return err;
+
+ *num_plane = vport_ctx.num_plane;
+ return 0;
+}
+
static int set_has_smi_cap(struct mlx5_ib_dev *dev)
{
struct mlx5_hca_vport_context vport_ctx;
@@ -2744,10 +2852,15 @@ static int set_has_smi_cap(struct mlx5_ib_dev *dev)
return 0;
for (port = 1; port <= dev->num_ports; port++) {
- if (!MLX5_CAP_GEN(dev->mdev, ib_virt)) {
+ if (dev->num_plane) {
+ dev->port_caps[port - 1].has_smi = false;
+ continue;
+ } else if (!MLX5_CAP_GEN(dev->mdev, ib_virt) ||
+ dev->ib_dev.type == RDMA_DEVICE_TYPE_SMI) {
dev->port_caps[port - 1].has_smi = true;
continue;
}
+
err = mlx5_query_hca_vport_context(dev->mdev, 0, port, 0,
&vport_ctx);
if (err) {
@@ -2781,37 +2894,72 @@ static u8 mlx5_get_umr_fence(u8 umr_fence_cap)
}
}
-static int mlx5_ib_dev_res_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_dev_res_cq_init(struct mlx5_ib_dev *dev)
{
struct mlx5_ib_resources *devr = &dev->devr;
- struct ib_srq_init_attr attr;
- struct ib_device *ibdev;
struct ib_cq_init_attr cq_attr = {.cqe = 1};
- int port;
+ struct ib_device *ibdev;
+ struct ib_pd *pd;
+ struct ib_cq *cq;
int ret = 0;
- ibdev = &dev->ib_dev;
- if (!MLX5_CAP_GEN(dev->mdev, xrc))
- return -EOPNOTSUPP;
+ /*
+ * devr->c0 is set once, never changed until device unload.
+ * Avoid taking the mutex if initialization is already done.
+ */
+ if (devr->c0)
+ return 0;
- devr->p0 = ib_alloc_pd(ibdev, 0);
- if (IS_ERR(devr->p0))
- return PTR_ERR(devr->p0);
+ mutex_lock(&devr->cq_lock);
+ if (devr->c0)
+ goto unlock;
- devr->c0 = ib_create_cq(ibdev, NULL, NULL, NULL, &cq_attr);
- if (IS_ERR(devr->c0)) {
- ret = PTR_ERR(devr->c0);
- goto error1;
+ ibdev = &dev->ib_dev;
+ pd = ib_alloc_pd(ibdev, 0);
+ if (IS_ERR(pd)) {
+ ret = PTR_ERR(pd);
+ mlx5_ib_err(dev, "Couldn't allocate PD for res init, err=%d\n", ret);
+ goto unlock;
}
- ret = mlx5_cmd_xrcd_alloc(dev->mdev, &devr->xrcdn0, 0);
- if (ret)
- goto error2;
+ cq = ib_create_cq(ibdev, NULL, NULL, NULL, &cq_attr);
+ if (IS_ERR(cq)) {
+ ret = PTR_ERR(cq);
+ mlx5_ib_err(dev, "Couldn't create CQ for res init, err=%d\n", ret);
+ ib_dealloc_pd(pd);
+ goto unlock;
+ }
- ret = mlx5_cmd_xrcd_alloc(dev->mdev, &devr->xrcdn1, 0);
+ devr->p0 = pd;
+ devr->c0 = cq;
+
+unlock:
+ mutex_unlock(&devr->cq_lock);
+ return ret;
+}
+
+int mlx5_ib_dev_res_srq_init(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_ib_resources *devr = &dev->devr;
+ struct ib_srq_init_attr attr;
+ struct ib_srq *s0, *s1;
+ int ret = 0;
+
+ /*
+ * devr->s1 is set once, never changed until device unload.
+ * Avoid taking the mutex if initialization is already done.
+ */
+ if (devr->s1)
+ return 0;
+
+ mutex_lock(&devr->srq_lock);
+ if (devr->s1)
+ goto unlock;
+
+ ret = mlx5_ib_dev_res_cq_init(dev);
if (ret)
- goto error3;
+ goto unlock;
memset(&attr, 0, sizeof(attr));
attr.attr.max_sge = 1;
@@ -2819,10 +2967,11 @@ static int mlx5_ib_dev_res_init(struct mlx5_ib_dev *dev)
attr.srq_type = IB_SRQT_XRC;
attr.ext.cq = devr->c0;
- devr->s0 = ib_create_srq(devr->p0, &attr);
- if (IS_ERR(devr->s0)) {
- ret = PTR_ERR(devr->s0);
- goto err_create;
+ s0 = ib_create_srq(devr->p0, &attr);
+ if (IS_ERR(s0)) {
+ ret = PTR_ERR(s0);
+ mlx5_ib_err(dev, "Couldn't create SRQ 0 for res init, err=%d\n", ret);
+ goto unlock;
}
memset(&attr, 0, sizeof(attr));
@@ -2830,29 +2979,48 @@ static int mlx5_ib_dev_res_init(struct mlx5_ib_dev *dev)
attr.attr.max_wr = 1;
attr.srq_type = IB_SRQT_BASIC;
- devr->s1 = ib_create_srq(devr->p0, &attr);
- if (IS_ERR(devr->s1)) {
- ret = PTR_ERR(devr->s1);
- goto error6;
+ s1 = ib_create_srq(devr->p0, &attr);
+ if (IS_ERR(s1)) {
+ ret = PTR_ERR(s1);
+ mlx5_ib_err(dev, "Couldn't create SRQ 1 for res init, err=%d\n", ret);
+ ib_destroy_srq(s0);
+ }
+
+ devr->s0 = s0;
+ devr->s1 = s1;
+
+unlock:
+ mutex_unlock(&devr->srq_lock);
+ return ret;
+}
+
+static int mlx5_ib_dev_res_init(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_ib_resources *devr = &dev->devr;
+ int port;
+ int ret;
+
+ if (!MLX5_CAP_GEN(dev->mdev, xrc))
+ return -EOPNOTSUPP;
+
+ ret = mlx5_cmd_xrcd_alloc(dev->mdev, &devr->xrcdn0, 0);
+ if (ret)
+ return ret;
+
+ ret = mlx5_cmd_xrcd_alloc(dev->mdev, &devr->xrcdn1, 0);
+ if (ret) {
+ mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn0, 0);
+ return ret;
}
for (port = 0; port < ARRAY_SIZE(devr->ports); ++port)
INIT_WORK(&devr->ports[port].pkey_change_work,
pkey_change_handler);
- return 0;
+ mutex_init(&devr->cq_lock);
+ mutex_init(&devr->srq_lock);
-error6:
- ib_destroy_srq(devr->s0);
-err_create:
- mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn1, 0);
-error3:
- mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn0, 0);
-error2:
- ib_destroy_cq(devr->c0);
-error1:
- ib_dealloc_pd(devr->p0);
- return ret;
+ return 0;
}
static void mlx5_ib_dev_res_cleanup(struct mlx5_ib_dev *dev)
@@ -2869,12 +3037,73 @@ static void mlx5_ib_dev_res_cleanup(struct mlx5_ib_dev *dev)
for (port = 0; port < ARRAY_SIZE(devr->ports); ++port)
cancel_work_sync(&devr->ports[port].pkey_change_work);
- ib_destroy_srq(devr->s1);
- ib_destroy_srq(devr->s0);
+ /* After s0/s1 init, they are not unset during the device lifetime. */
+ if (devr->s1) {
+ ib_destroy_srq(devr->s1);
+ ib_destroy_srq(devr->s0);
+ }
mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn1, 0);
mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn0, 0);
- ib_destroy_cq(devr->c0);
- ib_dealloc_pd(devr->p0);
+ /* After p0/c0 init, they are not unset during the device lifetime. */
+ if (devr->c0) {
+ ib_destroy_cq(devr->c0);
+ ib_dealloc_pd(devr->p0);
+ }
+ mutex_destroy(&devr->cq_lock);
+ mutex_destroy(&devr->srq_lock);
+}
+
+static int
+mlx5_ib_create_data_direct_resources(struct mlx5_ib_dev *dev)
+{
+ int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+ struct mlx5_core_dev *mdev = dev->mdev;
+ void *mkc;
+ u32 mkey;
+ u32 pdn;
+ u32 *in;
+ int err;
+
+ err = mlx5_core_alloc_pd(mdev, &pdn);
+ if (err)
+ return err;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ goto err;
+ }
+
+ MLX5_SET(create_mkey_in, in, data_direct, 1);
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+ MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
+ MLX5_SET(mkc, mkc, lw, 1);
+ MLX5_SET(mkc, mkc, lr, 1);
+ MLX5_SET(mkc, mkc, rw, 1);
+ MLX5_SET(mkc, mkc, rr, 1);
+ MLX5_SET(mkc, mkc, a, 1);
+ MLX5_SET(mkc, mkc, pd, pdn);
+ MLX5_SET(mkc, mkc, length64, 1);
+ MLX5_SET(mkc, mkc, qpn, 0xffffff);
+ err = mlx5_core_create_mkey(mdev, &mkey, in, inlen);
+ kvfree(in);
+ if (err)
+ goto err;
+
+ dev->ddr.mkey = mkey;
+ dev->ddr.pdn = pdn;
+ return 0;
+
+err:
+ mlx5_core_dealloc_pd(mdev, pdn);
+ return err;
+}
+
+static void
+mlx5_ib_free_data_direct_resources(struct mlx5_ib_dev *dev)
+{
+ mlx5_core_destroy_mkey(dev->mdev, dev->ddr.mkey);
+ mlx5_core_dealloc_pd(dev->mdev, dev->ddr.pdn);
}
static u32 get_core_cap_flags(struct ib_device *ibdev,
@@ -2890,6 +3119,13 @@ static u32 get_core_cap_flags(struct ib_device *ibdev,
if (rep->grh_required)
ret |= RDMA_CORE_CAP_IB_GRH_REQUIRED;
+ if (dev->num_plane)
+ return ret | RDMA_CORE_CAP_PROT_IB | RDMA_CORE_CAP_IB_MAD |
+ RDMA_CORE_CAP_IB_CM | RDMA_CORE_CAP_IB_SA |
+ RDMA_CORE_CAP_AF_IB;
+ else if (ibdev->type == RDMA_DEVICE_TYPE_SMI)
+ return ret | RDMA_CORE_CAP_IB_MAD | RDMA_CORE_CAP_IB_SMI;
+
if (ll == IB_LINK_LAYER_INFINIBAND)
return ret | RDMA_CORE_PORT_IBA_IB;
@@ -2925,6 +3161,9 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u32 port_num,
return err;
if (ll == IB_LINK_LAYER_INFINIBAND) {
+ if (ibdev->type == RDMA_DEVICE_TYPE_SMI)
+ port_num = smi_to_native_portnum(dev, port_num);
+
err = mlx5_query_hca_vport_context(dev->mdev, 0, port_num, 0,
&rep);
if (err)
@@ -2967,6 +3206,60 @@ static void get_dev_fw_str(struct ib_device *ibdev, char *str)
fw_rev_sub(dev->mdev));
}
+static int lag_event(struct notifier_block *nb, unsigned long event, void *data)
+{
+ struct mlx5_ib_dev *dev = container_of(nb, struct mlx5_ib_dev,
+ lag_events);
+ struct mlx5_core_dev *mdev = dev->mdev;
+ struct mlx5_ib_port *port;
+ struct net_device *ndev;
+ int i, err;
+ int portnum;
+
+ portnum = 0;
+ switch (event) {
+ case MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE:
+ ndev = data;
+ if (ndev) {
+ if (!mlx5_lag_is_roce(mdev)) {
+ // sriov lag
+ for (i = 0; i < dev->num_ports; i++) {
+ port = &dev->port[i];
+ if (port->rep && port->rep->vport ==
+ MLX5_VPORT_UPLINK) {
+ portnum = i;
+ break;
+ }
+ }
+ }
+ err = ib_device_set_netdev(&dev->ib_dev, ndev,
+ portnum + 1);
+ dev_put(ndev);
+ if (err)
+ return err;
+ /* Rescan gids after new netdev assignment */
+ rdma_roce_rescan_device(&dev->ib_dev);
+ }
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
+ return NOTIFY_OK;
+}
+
+static void mlx5e_lag_event_register(struct mlx5_ib_dev *dev)
+{
+ dev->lag_events.notifier_call = lag_event;
+ blocking_notifier_chain_register(&dev->mdev->priv.lag_nh,
+ &dev->lag_events);
+}
+
+static void mlx5e_lag_event_unregister(struct mlx5_ib_dev *dev)
+{
+ blocking_notifier_chain_unregister(&dev->mdev->priv.lag_nh,
+ &dev->lag_events);
+}
+
static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
@@ -2988,6 +3281,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev)
goto err_destroy_vport_lag;
}
+ mlx5e_lag_event_register(dev);
dev->flow_db->lag_demux_ft = ft;
dev->lag_ports = mlx5_lag_get_num_ports(mdev);
dev->lag_active = true;
@@ -3005,6 +3299,7 @@ static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev)
if (dev->lag_active) {
dev->lag_active = false;
+ mlx5e_lag_event_unregister(dev);
mlx5_destroy_flow_table(dev->flow_db->lag_demux_ft);
dev->flow_db->lag_demux_ft = NULL;
@@ -3161,6 +3456,13 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
lockdep_assert_held(&mlx5_ib_multiport_mutex);
+ mlx5_core_mp_event_replay(ibdev->mdev,
+ MLX5_DRIVER_EVENT_AFFILIATION_REMOVED,
+ NULL);
+ mlx5_core_mp_event_replay(mpi->mdev,
+ MLX5_DRIVER_EVENT_AFFILIATION_REMOVED,
+ NULL);
+
mlx5_ib_cleanup_cong_debugfs(ibdev, port_num);
spin_lock(&port->mp.mpi_lock);
@@ -3212,6 +3514,7 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev,
struct mlx5_ib_multiport_info *mpi)
{
u32 port_num = mlx5_core_native_port_num(mpi->mdev) - 1;
+ u64 key;
int err;
lockdep_assert_held(&mlx5_ib_multiport_mutex);
@@ -3240,6 +3543,14 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev,
mlx5_ib_init_cong_debugfs(ibdev, port_num);
+ key = mpi->mdev->priv.adev_idx;
+ mlx5_core_mp_event_replay(mpi->mdev,
+ MLX5_DRIVER_EVENT_AFFILIATION_DONE,
+ &key);
+ mlx5_core_mp_event_replay(ibdev->mdev,
+ MLX5_DRIVER_EVENT_AFFILIATION_DONE,
+ &key);
+
return true;
unbind:
@@ -3247,6 +3558,41 @@ unbind:
return false;
}
+static int mlx5_ib_data_direct_init(struct mlx5_ib_dev *dev)
+{
+ char vuid[MLX5_ST_SZ_BYTES(array1024_auto) + 1] = {};
+ int ret;
+
+ if (!MLX5_CAP_GEN(dev->mdev, data_direct) ||
+ !MLX5_CAP_GEN_2(dev->mdev, query_vuid))
+ return 0;
+
+ ret = mlx5_cmd_query_vuid(dev->mdev, true, vuid);
+ if (ret)
+ return ret;
+
+ ret = mlx5_ib_create_data_direct_resources(dev);
+ if (ret)
+ return ret;
+
+ INIT_LIST_HEAD(&dev->data_direct_mr_list);
+ ret = mlx5_data_direct_ib_reg(dev, vuid);
+ if (ret)
+ mlx5_ib_free_data_direct_resources(dev);
+
+ return ret;
+}
+
+static void mlx5_ib_data_direct_cleanup(struct mlx5_ib_dev *dev)
+{
+ if (!MLX5_CAP_GEN(dev->mdev, data_direct) ||
+ !MLX5_CAP_GEN_2(dev->mdev, query_vuid))
+ return;
+
+ mlx5_data_direct_ib_unreg(dev);
+ mlx5_ib_free_data_direct_resources(dev);
+}
+
static int mlx5_ib_init_multiport_master(struct mlx5_ib_dev *dev)
{
u32 port_num = mlx5_core_native_port_num(dev->mdev) - 1;
@@ -3554,7 +3900,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_UAR_OBJ_ALLOC)(
alloc_type != MLX5_IB_UAPI_UAR_ALLOC_TYPE_NC)
return -EOPNOTSUPP;
- if (!to_mdev(c->ibucontext.device)->wc_support &&
+ if (!mlx5_wc_support_get(to_mdev(c->ibucontext.device)->mdev) &&
alloc_type == MLX5_IB_UAPI_UAR_ALLOC_TYPE_BF)
return -EOPNOTSUPP;
@@ -3623,14 +3969,24 @@ ADD_UVERBS_ATTRIBUTES_SIMPLE(
dump_fill_mkey),
UA_MANDATORY));
+ADD_UVERBS_ATTRIBUTES_SIMPLE(
+ mlx5_ib_reg_dmabuf_mr,
+ UVERBS_OBJECT_MR,
+ UVERBS_METHOD_REG_DMABUF_MR,
+ UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_REG_DMABUF_MR_ACCESS_FLAGS,
+ enum mlx5_ib_uapi_reg_dmabuf_flags,
+ UA_OPTIONAL));
+
static const struct uapi_definition mlx5_ib_defs[] = {
UAPI_DEF_CHAIN(mlx5_ib_devx_defs),
UAPI_DEF_CHAIN(mlx5_ib_flow_defs),
UAPI_DEF_CHAIN(mlx5_ib_qos_defs),
UAPI_DEF_CHAIN(mlx5_ib_std_types_defs),
UAPI_DEF_CHAIN(mlx5_ib_dm_defs),
+ UAPI_DEF_CHAIN(mlx5_ib_create_cq_defs),
UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DEVICE, &mlx5_ib_query_context),
+ UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_MR, &mlx5_ib_reg_dmabuf_mr),
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_VAR,
UAPI_DEF_IS_OBJ_SUPPORTED(var_is_supported)),
UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_UAR),
@@ -3639,28 +3995,27 @@ static const struct uapi_definition mlx5_ib_defs[] = {
static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
{
+ mlx5_ib_data_direct_cleanup(dev);
mlx5_ib_cleanup_multiport_master(dev);
WARN_ON(!xa_empty(&dev->odp_mkeys));
mutex_destroy(&dev->cap_mask_mutex);
WARN_ON(!xa_empty(&dev->sig_mrs));
WARN_ON(!bitmap_empty(dev->dm.memic_alloc_pages, MLX5_MAX_MEMIC_PAGES));
+ mlx5r_macsec_dealloc_gids(dev);
}
static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
- int err;
- int i;
+ int err, i;
dev->ib_dev.node_type = RDMA_NODE_IB_CA;
dev->ib_dev.local_dma_lkey = 0 /* not supported for now */;
- dev->ib_dev.phys_port_cnt = dev->num_ports;
dev->ib_dev.dev.parent = mdev->device;
dev->ib_dev.lag_flags = RDMA_LAG_FLAGS_HASH_ALL_SLAVES;
for (i = 0; i < dev->num_ports; i++) {
spin_lock_init(&dev->port[i].mp.mpi_lock);
- rwlock_init(&dev->port[i].roce.netdev_lock);
dev->port[i].roce.dev = dev;
dev->port[i].roce.native_port_num = i + 1;
dev->port[i].roce.last_port_state = IB_PORT_DOWN;
@@ -3670,10 +4025,14 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
if (err)
return err;
- err = mlx5_ib_init_multiport_master(dev);
+ err = mlx5r_macsec_init_gids_and_devlist(dev);
if (err)
return err;
+ err = mlx5_ib_init_multiport_master(dev);
+ if (err)
+ goto err;
+
err = set_has_smi_cap(dev);
if (err)
goto err_mp;
@@ -3685,9 +4044,10 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
if (mlx5_use_mad_ifc(dev))
get_ext_port_caps(dev);
- dev->ib_dev.num_comp_vectors = mlx5_comp_vectors_count(mdev);
+ dev->ib_dev.num_comp_vectors = mlx5_comp_vectors_max(mdev);
mutex_init(&dev->cap_mask_mutex);
+ mutex_init(&dev->data_direct_lock);
INIT_LIST_HEAD(&dev->qp_list);
spin_lock_init(&dev->reset_flow_resource_lock);
xa_init(&dev->odp_mkeys);
@@ -3696,24 +4056,22 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
spin_lock_init(&dev->dm.lock);
dev->dm.dev = mdev;
- return 0;
+ err = mlx5_ib_data_direct_init(dev);
+ if (err)
+ goto err_mp;
+ return 0;
err_mp:
mlx5_ib_cleanup_multiport_master(dev);
+err:
+ mlx5r_macsec_dealloc_gids(dev);
return err;
}
-static int mlx5_ib_enable_driver(struct ib_device *dev)
-{
- struct mlx5_ib_dev *mdev = to_mdev(dev);
- int ret;
-
- ret = mlx5_ib_test_wc(mdev);
- mlx5_ib_dbg(mdev, "Write-Combining %s",
- mdev->wc_support ? "supported" : "not supported");
-
- return ret;
-}
+static struct ib_device *mlx5_ib_add_sub_dev(struct ib_device *parent,
+ enum rdma_nl_dev_type type,
+ const char *name);
+static void mlx5_ib_del_sub_dev(struct ib_device *sub_dev);
static const struct ib_device_ops mlx5_ib_dev_ops = {
.owner = THIS_MODULE,
@@ -3721,6 +4079,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
.uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION,
.add_gid = mlx5_ib_add_gid,
+ .add_sub_dev = mlx5_ib_add_sub_dev,
.alloc_mr = mlx5_ib_alloc_mr,
.alloc_mr_integrity = mlx5_ib_alloc_mr_integrity,
.alloc_pd = mlx5_ib_alloc_pd,
@@ -3735,6 +4094,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
.dealloc_pd = mlx5_ib_dealloc_pd,
.dealloc_ucontext = mlx5_ib_dealloc_ucontext,
.del_gid = mlx5_ib_del_gid,
+ .del_sub_dev = mlx5_ib_del_sub_dev,
.dereg_mr = mlx5_ib_dereg_mr,
.destroy_ah = mlx5_ib_destroy_ah,
.destroy_cq = mlx5_ib_destroy_cq,
@@ -3745,7 +4105,6 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
.drain_rq = mlx5_ib_drain_rq,
.drain_sq = mlx5_ib_drain_sq,
.device_group = &mlx5_attr_group,
- .enable_driver = mlx5_ib_enable_driver,
.get_dev_fw_str = get_dev_fw_str,
.get_dma_mr = mlx5_ib_get_dma_mr,
.get_link_layer = mlx5_ib_port_link_layer,
@@ -3921,7 +4280,6 @@ static const struct ib_device_ops mlx5_ib_dev_common_roce_ops = {
.create_wq = mlx5_ib_create_wq,
.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table,
.destroy_wq = mlx5_ib_destroy_wq,
- .get_netdev = mlx5_ib_get_netdev,
.modify_wq = mlx5_ib_modify_wq,
INIT_RDMA_OBJ_SIZE(ib_rwq_ind_table, mlx5_ib_rwq_ind_table,
@@ -4025,7 +4383,10 @@ static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
{
const char *name;
- if (!mlx5_lag_is_active(dev->mdev))
+ if (dev->sub_dev_name) {
+ name = dev->sub_dev_name;
+ ib_mark_name_assigned_by_user(&dev->ib_dev);
+ } else if (!mlx5_lag_is_active(dev->mdev))
name = "mlx5_%d";
else
name = "mlx5_bond_%d";
@@ -4036,6 +4397,7 @@ static void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev)
{
mlx5_mkey_cache_cleanup(dev);
mlx5r_umr_resource_cleanup(dev);
+ mlx5r_umr_cleanup(dev);
}
static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
@@ -4047,15 +4409,13 @@ static int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev)
{
int ret;
- ret = mlx5r_umr_resource_init(dev);
+ ret = mlx5r_umr_init(dev);
if (ret)
return ret;
ret = mlx5_mkey_cache_init(dev);
- if (ret) {
+ if (ret)
mlx5_ib_warn(dev, "mr cache init failed %d\n", ret);
- mlx5r_umr_resource_cleanup(dev);
- }
return ret;
}
@@ -4106,14 +4466,34 @@ static int mlx5_ib_stage_dev_notifier_init(struct mlx5_ib_dev *dev)
{
dev->mdev_events.notifier_call = mlx5_ib_event;
mlx5_notifier_register(dev->mdev, &dev->mdev_events);
+
+ mlx5r_macsec_event_register(dev);
+
return 0;
}
static void mlx5_ib_stage_dev_notifier_cleanup(struct mlx5_ib_dev *dev)
{
+ mlx5r_macsec_event_unregister(dev);
mlx5_notifier_unregister(dev->mdev, &dev->mdev_events);
}
+void mlx5_ib_data_direct_bind(struct mlx5_ib_dev *ibdev,
+ struct mlx5_data_direct_dev *dev)
+{
+ mutex_lock(&ibdev->data_direct_lock);
+ ibdev->data_direct_dev = dev;
+ mutex_unlock(&ibdev->data_direct_lock);
+}
+
+void mlx5_ib_data_direct_unbind(struct mlx5_ib_dev *ibdev)
+{
+ mutex_lock(&ibdev->data_direct_lock);
+ mlx5_ib_revoke_data_direct_mrs(ibdev);
+ ibdev->data_direct_dev = NULL;
+ mutex_unlock(&ibdev->data_direct_lock);
+}
+
void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
const struct mlx5_ib_profile *profile,
int stage)
@@ -4283,6 +4663,90 @@ const struct mlx5_ib_profile raw_eth_profile = {
NULL),
};
+static const struct mlx5_ib_profile plane_profile = {
+ STAGE_CREATE(MLX5_IB_STAGE_INIT,
+ mlx5_ib_stage_init_init,
+ mlx5_ib_stage_init_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_CAPS,
+ mlx5_ib_stage_caps_init,
+ mlx5_ib_stage_caps_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB,
+ mlx5_ib_stage_non_default_cb,
+ NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_QP,
+ mlx5_init_qp_table,
+ mlx5_cleanup_qp_table),
+ STAGE_CREATE(MLX5_IB_STAGE_SRQ,
+ mlx5_init_srq_table,
+ mlx5_cleanup_srq_table),
+ STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
+ mlx5_ib_dev_res_init,
+ mlx5_ib_dev_res_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_BFREG,
+ mlx5_ib_stage_bfrag_init,
+ mlx5_ib_stage_bfrag_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
+ mlx5_ib_stage_ib_reg_init,
+ mlx5_ib_stage_ib_reg_cleanup),
+};
+
+static struct ib_device *mlx5_ib_add_sub_dev(struct ib_device *parent,
+ enum rdma_nl_dev_type type,
+ const char *name)
+{
+ struct mlx5_ib_dev *mparent = to_mdev(parent), *mplane;
+ enum rdma_link_layer ll;
+ int ret;
+
+ if (mparent->smi_dev)
+ return ERR_PTR(-EEXIST);
+
+ ll = mlx5_port_type_cap_to_rdma_ll(MLX5_CAP_GEN(mparent->mdev,
+ port_type));
+ if (type != RDMA_DEVICE_TYPE_SMI || !mparent->num_plane ||
+ ll != IB_LINK_LAYER_INFINIBAND ||
+ !MLX5_CAP_GEN_2(mparent->mdev, multiplane_qp_ud))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mplane = ib_alloc_device(mlx5_ib_dev, ib_dev);
+ if (!mplane)
+ return ERR_PTR(-ENOMEM);
+
+ mplane->port = kcalloc(mparent->num_plane * mparent->num_ports,
+ sizeof(*mplane->port), GFP_KERNEL);
+ if (!mplane->port) {
+ ret = -ENOMEM;
+ goto fail_kcalloc;
+ }
+
+ mplane->ib_dev.type = type;
+ mplane->mdev = mparent->mdev;
+ mplane->num_ports = mparent->num_plane;
+ mplane->sub_dev_name = name;
+ mplane->ib_dev.phys_port_cnt = mplane->num_ports;
+
+ ret = __mlx5_ib_add(mplane, &plane_profile);
+ if (ret)
+ goto fail_ib_add;
+
+ mparent->smi_dev = mplane;
+ return &mplane->ib_dev;
+
+fail_ib_add:
+ kfree(mplane->port);
+fail_kcalloc:
+ ib_dealloc_device(&mplane->ib_dev);
+ return ERR_PTR(ret);
+}
+
+static void mlx5_ib_del_sub_dev(struct ib_device *sub_dev)
+{
+ struct mlx5_ib_dev *mdev = to_mdev(sub_dev);
+
+ to_mdev(sub_dev->parent)->smi_dev = NULL;
+ __mlx5_ib_remove(mdev, mdev->profile, MLX5_IB_STAGE_MAX);
+}
+
static int mlx5r_mp_probe(struct auxiliary_device *adev,
const struct auxiliary_device_id *id)
{
@@ -4360,15 +4824,23 @@ static int mlx5r_probe(struct auxiliary_device *adev,
dev = ib_alloc_device(mlx5_ib_dev, ib_dev);
if (!dev)
return -ENOMEM;
+
+ if (ll == IB_LINK_LAYER_INFINIBAND) {
+ ret = mlx5_ib_get_plane_num(mdev, &dev->num_plane);
+ if (ret)
+ goto fail;
+ }
+
dev->port = kcalloc(num_ports, sizeof(*dev->port),
GFP_KERNEL);
if (!dev->port) {
- ib_dealloc_device(&dev->ib_dev);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto fail;
}
dev->mdev = mdev;
dev->num_ports = num_ports;
+ dev->ib_dev.phys_port_cnt = num_ports;
if (ll == IB_LINK_LAYER_ETHERNET && !mlx5_get_roce_state(mdev))
profile = &raw_eth_profile;
@@ -4376,14 +4848,17 @@ static int mlx5r_probe(struct auxiliary_device *adev,
profile = &pf_profile;
ret = __mlx5_ib_add(dev, profile);
- if (ret) {
- kfree(dev->port);
- ib_dealloc_device(&dev->ib_dev);
- return ret;
- }
+ if (ret)
+ goto fail_ib_add;
auxiliary_set_drvdata(adev, dev);
return 0;
+
+fail_ib_add:
+ kfree(dev->port);
+fail:
+ ib_dealloc_device(&dev->ib_dev);
+ return ret;
}
static void mlx5r_remove(struct auxiliary_device *adev)
@@ -4443,17 +4918,23 @@ static int __init mlx5_ib_init(void)
ret = mlx5r_rep_init();
if (ret)
goto rep_err;
+ ret = mlx5_data_direct_driver_register();
+ if (ret)
+ goto dd_err;
ret = auxiliary_driver_register(&mlx5r_mp_driver);
if (ret)
goto mp_err;
ret = auxiliary_driver_register(&mlx5r_driver);
if (ret)
goto drv_err;
+
return 0;
drv_err:
auxiliary_driver_unregister(&mlx5r_mp_driver);
mp_err:
+ mlx5_data_direct_driver_unregister();
+dd_err:
mlx5r_rep_cleanup();
rep_err:
mlx5_ib_qp_event_cleanup();
@@ -4465,6 +4946,7 @@ qp_event_err:
static void __exit mlx5_ib_cleanup(void)
{
+ mlx5_data_direct_driver_unregister();
auxiliary_driver_unregister(&mlx5r_driver);
auxiliary_driver_unregister(&mlx5r_mp_driver);
mlx5r_rep_cleanup();
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
index 96ffbbaf0a73..af321f6ef7f5 100644
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -32,7 +32,6 @@
#include <rdma/ib_umem_odp.h>
#include "mlx5_ib.h"
-#include <linux/jiffies.h>
/*
* Fill in a physical address list. ib_umem_num_dma_blocks() entries will be
@@ -94,202 +93,3 @@ unsigned long __mlx5_umem_find_best_quantized_pgoff(
return 0;
return page_size;
}
-
-#define WR_ID_BF 0xBF
-#define WR_ID_END 0xBAD
-#define TEST_WC_NUM_WQES 255
-#define TEST_WC_POLLING_MAX_TIME_JIFFIES msecs_to_jiffies(100)
-static int post_send_nop(struct mlx5_ib_dev *dev, struct ib_qp *ibqp, u64 wr_id,
- bool signaled)
-{
- struct mlx5_ib_qp *qp = to_mqp(ibqp);
- struct mlx5_wqe_ctrl_seg *ctrl;
- struct mlx5_bf *bf = &qp->bf;
- __be32 mmio_wqe[16] = {};
- unsigned long flags;
- unsigned int idx;
- int i;
-
- if (unlikely(dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR))
- return -EIO;
-
- spin_lock_irqsave(&qp->sq.lock, flags);
-
- idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
- ctrl = mlx5_frag_buf_get_wqe(&qp->sq.fbc, idx);
-
- memset(ctrl, 0, sizeof(struct mlx5_wqe_ctrl_seg));
- ctrl->fm_ce_se = signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0;
- ctrl->opmod_idx_opcode =
- cpu_to_be32(((u32)(qp->sq.cur_post) << 8) | MLX5_OPCODE_NOP);
- ctrl->qpn_ds = cpu_to_be32((sizeof(struct mlx5_wqe_ctrl_seg) / 16) |
- (qp->trans_qp.base.mqp.qpn << 8));
-
- qp->sq.wrid[idx] = wr_id;
- qp->sq.w_list[idx].opcode = MLX5_OPCODE_NOP;
- qp->sq.wqe_head[idx] = qp->sq.head + 1;
- qp->sq.cur_post += DIV_ROUND_UP(sizeof(struct mlx5_wqe_ctrl_seg),
- MLX5_SEND_WQE_BB);
- qp->sq.w_list[idx].next = qp->sq.cur_post;
- qp->sq.head++;
-
- memcpy(mmio_wqe, ctrl, sizeof(*ctrl));
- ((struct mlx5_wqe_ctrl_seg *)&mmio_wqe)->fm_ce_se |=
- MLX5_WQE_CTRL_CQ_UPDATE;
-
- /* Make sure that descriptors are written before
- * updating doorbell record and ringing the doorbell
- */
- wmb();
-
- qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post);
-
- /* Make sure doorbell record is visible to the HCA before
- * we hit doorbell
- */
- wmb();
- for (i = 0; i < 8; i++)
- mlx5_write64(&mmio_wqe[i * 2],
- bf->bfreg->map + bf->offset + i * 8);
- io_stop_wc();
-
- bf->offset ^= bf->buf_size;
-
- spin_unlock_irqrestore(&qp->sq.lock, flags);
-
- return 0;
-}
-
-static int test_wc_poll_cq_result(struct mlx5_ib_dev *dev, struct ib_cq *cq)
-{
- int ret;
- struct ib_wc wc = {};
- unsigned long end = jiffies + TEST_WC_POLLING_MAX_TIME_JIFFIES;
-
- do {
- ret = ib_poll_cq(cq, 1, &wc);
- if (ret < 0 || wc.status)
- return ret < 0 ? ret : -EINVAL;
- if (ret)
- break;
- } while (!time_after(jiffies, end));
-
- if (!ret)
- return -ETIMEDOUT;
-
- if (wc.wr_id != WR_ID_BF)
- ret = 0;
-
- return ret;
-}
-
-static int test_wc_do_send(struct mlx5_ib_dev *dev, struct ib_qp *qp)
-{
- int err, i;
-
- for (i = 0; i < TEST_WC_NUM_WQES; i++) {
- err = post_send_nop(dev, qp, WR_ID_BF, false);
- if (err)
- return err;
- }
-
- return post_send_nop(dev, qp, WR_ID_END, true);
-}
-
-int mlx5_ib_test_wc(struct mlx5_ib_dev *dev)
-{
- struct ib_cq_init_attr cq_attr = { .cqe = TEST_WC_NUM_WQES + 1 };
- int port_type_cap = MLX5_CAP_GEN(dev->mdev, port_type);
- struct ib_qp_init_attr qp_init_attr = {
- .cap = { .max_send_wr = TEST_WC_NUM_WQES },
- .qp_type = IB_QPT_UD,
- .sq_sig_type = IB_SIGNAL_REQ_WR,
- .create_flags = MLX5_IB_QP_CREATE_WC_TEST,
- };
- struct ib_qp_attr qp_attr = { .port_num = 1 };
- struct ib_device *ibdev = &dev->ib_dev;
- struct ib_qp *qp;
- struct ib_cq *cq;
- struct ib_pd *pd;
- int ret;
-
- if (!MLX5_CAP_GEN(dev->mdev, bf))
- return 0;
-
- if (!dev->mdev->roce.roce_en &&
- port_type_cap == MLX5_CAP_PORT_TYPE_ETH) {
- if (mlx5_core_is_pf(dev->mdev))
- dev->wc_support = arch_can_pci_mmap_wc();
- return 0;
- }
-
- ret = mlx5_alloc_bfreg(dev->mdev, &dev->wc_bfreg, true, false);
- if (ret)
- goto print_err;
-
- if (!dev->wc_bfreg.wc)
- goto out1;
-
- pd = ib_alloc_pd(ibdev, 0);
- if (IS_ERR(pd)) {
- ret = PTR_ERR(pd);
- goto out1;
- }
-
- cq = ib_create_cq(ibdev, NULL, NULL, NULL, &cq_attr);
- if (IS_ERR(cq)) {
- ret = PTR_ERR(cq);
- goto out2;
- }
-
- qp_init_attr.recv_cq = cq;
- qp_init_attr.send_cq = cq;
- qp = ib_create_qp(pd, &qp_init_attr);
- if (IS_ERR(qp)) {
- ret = PTR_ERR(qp);
- goto out3;
- }
-
- qp_attr.qp_state = IB_QPS_INIT;
- ret = ib_modify_qp(qp, &qp_attr,
- IB_QP_STATE | IB_QP_PORT | IB_QP_PKEY_INDEX |
- IB_QP_QKEY);
- if (ret)
- goto out4;
-
- qp_attr.qp_state = IB_QPS_RTR;
- ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
- if (ret)
- goto out4;
-
- qp_attr.qp_state = IB_QPS_RTS;
- ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN);
- if (ret)
- goto out4;
-
- ret = test_wc_do_send(dev, qp);
- if (ret < 0)
- goto out4;
-
- ret = test_wc_poll_cq_result(dev, cq);
- if (ret > 0) {
- dev->wc_support = true;
- ret = 0;
- }
-
-out4:
- ib_destroy_qp(qp);
-out3:
- ib_destroy_cq(cq);
-out2:
- ib_dealloc_pd(pd);
-out1:
- mlx5_free_bfreg(dev->mdev, &dev->wc_bfreg);
-print_err:
- if (ret)
- mlx5_ib_err(
- dev,
- "Error %d while trying to test write-combining support\n",
- ret);
- return ret;
-}
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 9c33d960af3c..23fd72f7f63d 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -26,6 +26,7 @@
#include "srq.h"
#include "qp.h"
+#include "macsec.h"
#define mlx5_ib_dbg(_dev, format, arg...) \
dev_dbg(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__, \
@@ -62,17 +63,6 @@ __mlx5_log_page_size_to_bitmap(unsigned int log_pgsz_bits,
return GENMASK(largest_pg_shift, pgsz_shift);
}
-/*
- * For mkc users, instead of a page_offset the command has a start_iova which
- * specifies both the page_offset and the on-the-wire IOVA
- */
-#define mlx5_umem_find_best_pgsz(umem, typ, log_pgsz_fld, pgsz_shift, iova) \
- ib_umem_find_best_pgsz(umem, \
- __mlx5_log_page_size_to_bitmap( \
- __mlx5_bit_sz(typ, log_pgsz_fld), \
- pgsz_shift), \
- iova)
-
static __always_inline unsigned long
__mlx5_page_offset_to_bitmask(unsigned int page_offset_bits,
unsigned int offset_shift)
@@ -114,6 +104,19 @@ unsigned long __mlx5_umem_find_best_quantized_pgoff(
__mlx5_bit_sz(typ, page_offset_fld), 0, scale, \
page_offset_quantized)
+static inline unsigned long
+mlx5_umem_dmabuf_find_best_pgsz(struct ib_umem_dmabuf *umem_dmabuf)
+{
+ /*
+ * mkeys used for dmabuf are fixed at PAGE_SIZE because we must be able
+ * to hold any sgl after a move operation. Ideally the mkc page size
+ * could be changed at runtime to be optimal, but right now the driver
+ * cannot do that.
+ */
+ return ib_umem_find_best_pgsz(&umem_dmabuf->umem, PAGE_SIZE,
+ umem_dmabuf->umem.iova);
+}
+
enum {
MLX5_IB_MMAP_OFFSET_START = 9,
MLX5_IB_MMAP_OFFSET_END = 255,
@@ -340,7 +343,6 @@ struct mlx5_ib_flow_db {
* rely on the range reserved for that use in the ib_qp_create_flags enum.
*/
#define MLX5_IB_QP_CREATE_SQPN_QP1 IB_QP_CREATE_RESERVED_START
-#define MLX5_IB_QP_CREATE_WC_TEST (IB_QP_CREATE_RESERVED_START << 1)
struct wr_list {
u16 opcode;
@@ -627,6 +629,8 @@ enum mlx5_mkey_type {
MLX5_MKEY_MR = 1,
MLX5_MKEY_MW,
MLX5_MKEY_INDIRECT_DEVX,
+ MLX5_MKEY_NULL,
+ MLX5_MKEY_IMPLICIT_CHILD,
};
struct mlx5r_cache_rb_key {
@@ -642,9 +646,10 @@ struct mlx5_ib_mkey {
unsigned int ndescs;
struct wait_queue_head wait;
refcount_t usecount;
- /* User Mkey must hold either a rb_key or a cache_ent. */
+ /* Cacheable user Mkey must hold either a rb_key or a cache_ent. */
struct mlx5r_cache_rb_key rb_key;
struct mlx5_cache_ent *cache_ent;
+ u8 cacheable : 1;
};
#define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE)
@@ -668,6 +673,8 @@ struct mlx5_ib_mr {
struct mlx5_ib_mkey mmkey;
struct ib_umem *umem;
+ /* The mr is data direct related */
+ u8 data_direct :1;
union {
/* Used only by kernel MRs (umem == NULL) */
@@ -705,6 +712,11 @@ struct mlx5_ib_mr {
} odp_destroy;
struct ib_odp_counters odp_stats;
bool is_odp_implicit;
+ /* The affilated data direct crossed mr */
+ struct mlx5_ib_mr *dd_crossed_mr;
+ struct list_head dd_node;
+ u8 revoked :1;
+ struct mlx5_ib_mkey null_mmkey;
};
};
};
@@ -750,12 +762,29 @@ struct umr_common {
*/
struct mutex lock;
unsigned int state;
+ /* Protects from repeat UMR QP creation */
+ struct mutex init_lock;
+};
+
+#define NUM_MKEYS_PER_PAGE \
+ ((PAGE_SIZE - sizeof(struct list_head)) / sizeof(u32))
+
+struct mlx5_mkeys_page {
+ u32 mkeys[NUM_MKEYS_PER_PAGE];
+ struct list_head list;
+};
+static_assert(sizeof(struct mlx5_mkeys_page) == PAGE_SIZE);
+
+struct mlx5_mkeys_queue {
+ struct list_head pages_list;
+ u32 num_pages;
+ unsigned long ci;
+ spinlock_t lock; /* sync list ops */
};
struct mlx5_cache_ent {
- struct xarray mkeys;
- unsigned long stored;
- unsigned long reserved;
+ struct mlx5_mkeys_queue mkeys_queue;
+ u32 pending;
char name[4];
@@ -765,6 +794,7 @@ struct mlx5_cache_ent {
u8 is_tmp:1;
u8 disabled:1;
u8 fill_to_high_water:1;
+ u8 tmp_cleanup_scheduled:1;
/*
* - limit is the low water mark for stored mkeys, 2* limit is the
@@ -796,7 +826,6 @@ struct mlx5_mkey_cache {
struct mutex rb_lock;
struct dentry *fs_root;
unsigned long last_add;
- struct delayed_work remove_ent_dwork;
};
struct mlx5_ib_port_resources {
@@ -804,13 +833,20 @@ struct mlx5_ib_port_resources {
struct work_struct pkey_change_work;
};
+struct mlx5_data_direct_resources {
+ u32 pdn;
+ u32 mkey;
+};
+
struct mlx5_ib_resources {
struct ib_cq *c0;
+ struct mutex cq_lock;
u32 xrcdn0;
u32 xrcdn1;
struct ib_pd *p0;
struct ib_srq *s0;
struct ib_srq *s1;
+ struct mutex srq_lock;
struct mlx5_ib_port_resources ports[2];
};
@@ -852,8 +888,6 @@ struct mlx5_roce {
/* Protect mlx5_ib_get_netdev from invoking dev_hold() with a NULL
* netdev pointer
*/
- rwlock_t netdev_lock;
- struct net_device *netdev;
struct notifier_block nb;
struct netdev_net_notifier nn;
struct notifier_block mdev_nb;
@@ -870,6 +904,9 @@ struct mlx5_ib_port {
struct mlx5_ib_dbg_cc_params *dbg_cc_params;
struct mlx5_roce roce;
struct mlx5_eswitch_rep *rep;
+#ifdef CONFIG_MLX5_MACSEC
+ struct mlx5_reserved_gids *reserved_gids;
+#endif
};
struct mlx5_ib_dbg_param {
@@ -1086,10 +1123,20 @@ struct mlx5_special_mkeys {
__be32 terminate_scatter_list_mkey;
};
+struct mlx5_macsec {
+ struct mutex lock; /* Protects mlx5_macsec internal contexts */
+ struct list_head macsec_devices_list;
+ struct notifier_block blocking_events_nb;
+};
+
struct mlx5_ib_dev {
struct ib_device ib_dev;
struct mlx5_core_dev *mdev;
+ struct mlx5_data_direct_dev *data_direct_dev;
+ /* protect accessing data_direct_dev */
+ struct mutex data_direct_lock;
struct notifier_block mdev_events;
+ struct notifier_block lag_events;
int num_ports;
/* serialize update of capability mask
*/
@@ -1097,7 +1144,6 @@ struct mlx5_ib_dev {
u8 ib_active:1;
u8 is_rep:1;
u8 lag_active:1;
- u8 wc_support:1;
u8 fill_delay;
struct umr_common umrc;
/* sync used page count stats
@@ -1120,10 +1166,10 @@ struct mlx5_ib_dev {
/* protect resources needed as part of reset flow */
spinlock_t reset_flow_resource_lock;
struct list_head qp_list;
+ struct list_head data_direct_mr_list;
/* Array with num_ports elements */
struct mlx5_ib_port *port;
struct mlx5_sq_bfreg bfreg;
- struct mlx5_sq_bfreg wc_bfreg;
struct mlx5_sq_bfreg fp_bfreg;
struct mlx5_ib_delay_drop delay_drop;
const struct mlx5_ib_profile *profile;
@@ -1145,6 +1191,15 @@ struct mlx5_ib_dev {
u16 pkey_table_len;
u8 lag_ports;
struct mlx5_special_mkeys mkeys;
+ struct mlx5_data_direct_resources ddr;
+
+#ifdef CONFIG_MLX5_MACSEC
+ struct mlx5_macsec macsec;
+#endif
+
+ u8 num_plane;
+ struct mlx5_ib_dev *smi_dev;
+ const char *sub_dev_name;
};
static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
@@ -1243,6 +1298,8 @@ to_mmmap(struct rdma_user_mmap_entry *rdma_entry)
struct mlx5_user_mmap_entry, rdma_entry);
}
+int mlx5_ib_dev_res_cq_init(struct mlx5_ib_dev *dev);
+int mlx5_ib_dev_res_srq_init(struct mlx5_ib_dev *dev);
int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,
struct mlx5_db *db);
void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db);
@@ -1282,7 +1339,7 @@ int mlx5_ib_read_wqe_rq(struct mlx5_ib_qp *qp, int wqe_index, void *buffer,
int mlx5_ib_read_wqe_srq(struct mlx5_ib_srq *srq, int wqe_index, void *buffer,
size_t buflen, size_t *bc);
int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
- struct ib_udata *udata);
+ struct uverbs_attr_bundle *attrs);
int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
@@ -1295,7 +1352,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start,
u64 length, u64 virt_addr,
int fd, int access_flags,
- struct ib_udata *udata);
+ struct uverbs_attr_bundle *attrs);
int mlx5_ib_advise_mr(struct ib_pd *pd,
enum ib_uverbs_advise_mr_advice advice,
u32 flags,
@@ -1306,7 +1363,6 @@ int mlx5_ib_alloc_mw(struct ib_mw *mw, struct ib_udata *udata);
int mlx5_ib_dealloc_mw(struct ib_mw *mw);
struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
int access_flags);
-void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr);
void mlx5_ib_free_odp_mr(struct mlx5_ib_mr *mr);
struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
u64 length, u64 virt_addr, int access_flags,
@@ -1348,7 +1404,6 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u32 port,
struct ib_port_attr *props);
void mlx5_ib_populate_pas(struct ib_umem *umem, size_t page_size, __be64 *pas,
u64 access_flags);
-void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num);
int mlx5_ib_get_cqe_size(struct ib_cq *ibcq);
int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev);
void mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev);
@@ -1376,6 +1431,10 @@ int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table);
struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
struct ib_dm_mr_attr *attr,
struct uverbs_attr_bundle *attrs);
+void mlx5_ib_data_direct_bind(struct mlx5_ib_dev *ibdev,
+ struct mlx5_data_direct_dev *dev);
+void mlx5_ib_data_direct_unbind(struct mlx5_ib_dev *ibdev);
+void mlx5_ib_revoke_data_direct_mrs(struct mlx5_ib_dev *dev);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev);
@@ -1483,6 +1542,7 @@ extern const struct uapi_definition mlx5_ib_devx_defs[];
extern const struct uapi_definition mlx5_ib_flow_defs[];
extern const struct uapi_definition mlx5_ib_qos_defs[];
extern const struct uapi_definition mlx5_ib_std_types_defs[];
+extern const struct uapi_definition mlx5_ib_create_cq_defs[];
static inline int is_qp1(enum ib_qp_type qp_type)
{
@@ -1583,8 +1643,6 @@ static inline void mlx5r_deref_wait_odp_mkey(struct mlx5_ib_mkey *mmkey)
wait_event(mmkey->wait, refcount_read(&mmkey->usecount) == 0);
}
-int mlx5_ib_test_wc(struct mlx5_ib_dev *dev);
-
static inline bool mlx5_ib_lag_should_assign_affinity(struct mlx5_ib_dev *dev)
{
/*
@@ -1648,4 +1706,29 @@ static inline bool mlx5_umem_needs_ats(struct mlx5_ib_dev *dev,
return access_flags & IB_ACCESS_RELAXED_ORDERING;
}
+int set_roce_addr(struct mlx5_ib_dev *dev, u32 port_num,
+ unsigned int index, const union ib_gid *gid,
+ const struct ib_gid_attr *attr);
+
+static inline u32 smi_to_native_portnum(struct mlx5_ib_dev *dev, u32 port)
+{
+ return (port - 1) / dev->num_ports + 1;
+}
+
+/*
+ * For mkc users, instead of a page_offset the command has a start_iova which
+ * specifies both the page_offset and the on-the-wire IOVA
+ */
+static __always_inline unsigned long
+mlx5_umem_mkc_find_best_pgsz(struct mlx5_ib_dev *dev, struct ib_umem *umem,
+ u64 iova)
+{
+ int page_size_bits =
+ MLX5_CAP_GEN_2(dev->mdev, umr_log_entity_size_5) ? 6 : 5;
+ unsigned long bitmap =
+ __mlx5_log_page_size_to_bitmap(page_size_bits, 0);
+
+ return ib_umem_find_best_pgsz(umem, bitmap, iova);
+}
+
#endif /* MLX5_IB_H */
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 2017ede100a6..45d9dc9c6c8f 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -43,18 +43,22 @@
#include "dm.h"
#include "mlx5_ib.h"
#include "umr.h"
+#include "data_direct.h"
enum {
MAX_PENDING_REG_MR = 8,
};
+#define MLX5_MR_CACHE_PERSISTENT_ENTRY_MIN_DESCS 4
#define MLX5_UMR_ALIGN 2048
static void
create_mkey_callback(int status, struct mlx5_async_work *context);
static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
u64 iova, int access_flags,
- unsigned int page_size, bool populate);
+ unsigned int page_size, bool populate,
+ int access_mode);
+static int __mlx5_ib_dereg_mr(struct ib_mr *ibmr);
static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr,
struct ib_pd *pd)
@@ -143,110 +147,47 @@ static void create_mkey_warn(struct mlx5_ib_dev *dev, int status, void *out)
mlx5_cmd_out_err(dev->mdev, MLX5_CMD_OP_CREATE_MKEY, 0, out);
}
-static int push_mkey_locked(struct mlx5_cache_ent *ent, bool limit_pendings,
- void *to_store)
+static int push_mkey_locked(struct mlx5_cache_ent *ent, u32 mkey)
{
- XA_STATE(xas, &ent->mkeys, 0);
- void *curr;
+ unsigned long tmp = ent->mkeys_queue.ci % NUM_MKEYS_PER_PAGE;
+ struct mlx5_mkeys_page *page;
- if (limit_pendings &&
- (ent->reserved - ent->stored) > MAX_PENDING_REG_MR)
- return -EAGAIN;
-
- while (1) {
- /*
- * This is cmpxchg (NULL, XA_ZERO_ENTRY) however this version
- * doesn't transparently unlock. Instead we set the xas index to
- * the current value of reserved every iteration.
- */
- xas_set(&xas, ent->reserved);
- curr = xas_load(&xas);
- if (!curr) {
- if (to_store && ent->stored == ent->reserved)
- xas_store(&xas, to_store);
- else
- xas_store(&xas, XA_ZERO_ENTRY);
- if (xas_valid(&xas)) {
- ent->reserved++;
- if (to_store) {
- if (ent->stored != ent->reserved)
- __xa_store(&ent->mkeys,
- ent->stored,
- to_store,
- GFP_KERNEL);
- ent->stored++;
- queue_adjust_cache_locked(ent);
- WRITE_ONCE(ent->dev->cache.last_add,
- jiffies);
- }
- }
- }
- xa_unlock_irq(&ent->mkeys);
-
- /*
- * Notice xas_nomem() must always be called as it cleans
- * up any cached allocation.
- */
- if (!xas_nomem(&xas, GFP_KERNEL))
- break;
- xa_lock_irq(&ent->mkeys);
+ lockdep_assert_held(&ent->mkeys_queue.lock);
+ if (ent->mkeys_queue.ci >=
+ ent->mkeys_queue.num_pages * NUM_MKEYS_PER_PAGE) {
+ page = kzalloc(sizeof(*page), GFP_ATOMIC);
+ if (!page)
+ return -ENOMEM;
+ ent->mkeys_queue.num_pages++;
+ list_add_tail(&page->list, &ent->mkeys_queue.pages_list);
+ } else {
+ page = list_last_entry(&ent->mkeys_queue.pages_list,
+ struct mlx5_mkeys_page, list);
}
- xa_lock_irq(&ent->mkeys);
- if (xas_error(&xas))
- return xas_error(&xas);
- if (WARN_ON(curr))
- return -EINVAL;
- return 0;
-}
-
-static int push_mkey(struct mlx5_cache_ent *ent, bool limit_pendings,
- void *to_store)
-{
- int ret;
-
- xa_lock_irq(&ent->mkeys);
- ret = push_mkey_locked(ent, limit_pendings, to_store);
- xa_unlock_irq(&ent->mkeys);
- return ret;
-}
-
-static void undo_push_reserve_mkey(struct mlx5_cache_ent *ent)
-{
- void *old;
-
- ent->reserved--;
- old = __xa_erase(&ent->mkeys, ent->reserved);
- WARN_ON(old);
-}
-static void push_to_reserved(struct mlx5_cache_ent *ent, u32 mkey)
-{
- void *old;
-
- old = __xa_store(&ent->mkeys, ent->stored, xa_mk_value(mkey), 0);
- WARN_ON(old);
- ent->stored++;
+ page->mkeys[tmp] = mkey;
+ ent->mkeys_queue.ci++;
+ return 0;
}
-static u32 pop_stored_mkey(struct mlx5_cache_ent *ent)
+static int pop_mkey_locked(struct mlx5_cache_ent *ent)
{
- void *old, *xa_mkey;
-
- ent->stored--;
- ent->reserved--;
+ unsigned long tmp = (ent->mkeys_queue.ci - 1) % NUM_MKEYS_PER_PAGE;
+ struct mlx5_mkeys_page *last_page;
+ u32 mkey;
- if (ent->stored == ent->reserved) {
- xa_mkey = __xa_erase(&ent->mkeys, ent->stored);
- WARN_ON(!xa_mkey);
- return (u32)xa_to_value(xa_mkey);
+ lockdep_assert_held(&ent->mkeys_queue.lock);
+ last_page = list_last_entry(&ent->mkeys_queue.pages_list,
+ struct mlx5_mkeys_page, list);
+ mkey = last_page->mkeys[tmp];
+ last_page->mkeys[tmp] = 0;
+ ent->mkeys_queue.ci--;
+ if (ent->mkeys_queue.num_pages > 1 && !tmp) {
+ list_del(&last_page->list);
+ ent->mkeys_queue.num_pages--;
+ kfree(last_page);
}
-
- xa_mkey = __xa_store(&ent->mkeys, ent->stored, XA_ZERO_ENTRY,
- GFP_KERNEL);
- WARN_ON(!xa_mkey || xa_is_err(xa_mkey));
- old = __xa_erase(&ent->mkeys, ent->reserved);
- WARN_ON(old);
- return (u32)xa_to_value(xa_mkey);
+ return mkey;
}
static void create_mkey_callback(int status, struct mlx5_async_work *context)
@@ -260,10 +201,10 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context)
if (status) {
create_mkey_warn(dev, status, mkey_out->out);
kfree(mkey_out);
- xa_lock_irqsave(&ent->mkeys, flags);
- undo_push_reserve_mkey(ent);
+ spin_lock_irqsave(&ent->mkeys_queue.lock, flags);
+ ent->pending--;
WRITE_ONCE(dev->fill_delay, 1);
- xa_unlock_irqrestore(&ent->mkeys, flags);
+ spin_unlock_irqrestore(&ent->mkeys_queue.lock, flags);
mod_timer(&dev->delay_timer, jiffies + HZ);
return;
}
@@ -272,11 +213,12 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context)
MLX5_GET(create_mkey_out, mkey_out->out, mkey_index));
WRITE_ONCE(dev->cache.last_add, jiffies);
- xa_lock_irqsave(&ent->mkeys, flags);
- push_to_reserved(ent, mkey_out->mkey);
+ spin_lock_irqsave(&ent->mkeys_queue.lock, flags);
+ push_mkey_locked(ent, mkey_out->mkey);
+ ent->pending--;
/* If we are doing fill_to_high_water then keep going. */
queue_adjust_cache_locked(ent);
- xa_unlock_irqrestore(&ent->mkeys, flags);
+ spin_unlock_irqrestore(&ent->mkeys_queue.lock, flags);
kfree(mkey_out);
}
@@ -301,12 +243,14 @@ static int get_mkc_octo_size(unsigned int access_mode, unsigned int ndescs)
static void set_cache_mkc(struct mlx5_cache_ent *ent, void *mkc)
{
- set_mkc_access_pd_addr_fields(mkc, 0, 0, ent->dev->umrc.pd);
+ set_mkc_access_pd_addr_fields(mkc, ent->rb_key.access_flags, 0,
+ ent->dev->umrc.pd);
MLX5_SET(mkc, mkc, free, 1);
MLX5_SET(mkc, mkc, umr_en, 1);
MLX5_SET(mkc, mkc, access_mode_1_0, ent->rb_key.access_mode & 0x3);
MLX5_SET(mkc, mkc, access_mode_4_2,
(ent->rb_key.access_mode >> 2) & 0x7);
+ MLX5_SET(mkc, mkc, ma_translation_mode, !!ent->rb_key.ats);
MLX5_SET(mkc, mkc, translations_octword_size,
get_mkc_octo_size(ent->rb_key.access_mode,
@@ -332,24 +276,28 @@ static int add_keys(struct mlx5_cache_ent *ent, unsigned int num)
set_cache_mkc(ent, mkc);
async_create->ent = ent;
- err = push_mkey(ent, true, NULL);
- if (err)
+ spin_lock_irq(&ent->mkeys_queue.lock);
+ if (ent->pending >= MAX_PENDING_REG_MR) {
+ err = -EAGAIN;
goto free_async_create;
+ }
+ ent->pending++;
+ spin_unlock_irq(&ent->mkeys_queue.lock);
err = mlx5_ib_create_mkey_cb(async_create);
if (err) {
mlx5_ib_warn(ent->dev, "create mkey failed %d\n", err);
- goto err_undo_reserve;
+ goto err_create_mkey;
}
}
return 0;
-err_undo_reserve:
- xa_lock_irq(&ent->mkeys);
- undo_push_reserve_mkey(ent);
- xa_unlock_irq(&ent->mkeys);
+err_create_mkey:
+ spin_lock_irq(&ent->mkeys_queue.lock);
+ ent->pending--;
free_async_create:
+ spin_unlock_irq(&ent->mkeys_queue.lock);
kfree(async_create);
return err;
}
@@ -382,36 +330,36 @@ static void remove_cache_mr_locked(struct mlx5_cache_ent *ent)
{
u32 mkey;
- lockdep_assert_held(&ent->mkeys.xa_lock);
- if (!ent->stored)
+ lockdep_assert_held(&ent->mkeys_queue.lock);
+ if (!ent->mkeys_queue.ci)
return;
- mkey = pop_stored_mkey(ent);
- xa_unlock_irq(&ent->mkeys);
+ mkey = pop_mkey_locked(ent);
+ spin_unlock_irq(&ent->mkeys_queue.lock);
mlx5_core_destroy_mkey(ent->dev->mdev, mkey);
- xa_lock_irq(&ent->mkeys);
+ spin_lock_irq(&ent->mkeys_queue.lock);
}
static int resize_available_mrs(struct mlx5_cache_ent *ent, unsigned int target,
bool limit_fill)
- __acquires(&ent->mkeys) __releases(&ent->mkeys)
+ __acquires(&ent->mkeys_queue.lock) __releases(&ent->mkeys_queue.lock)
{
int err;
- lockdep_assert_held(&ent->mkeys.xa_lock);
+ lockdep_assert_held(&ent->mkeys_queue.lock);
while (true) {
if (limit_fill)
target = ent->limit * 2;
- if (target == ent->reserved)
+ if (target == ent->pending + ent->mkeys_queue.ci)
return 0;
- if (target > ent->reserved) {
- u32 todo = target - ent->reserved;
+ if (target > ent->pending + ent->mkeys_queue.ci) {
+ u32 todo = target - (ent->pending + ent->mkeys_queue.ci);
- xa_unlock_irq(&ent->mkeys);
+ spin_unlock_irq(&ent->mkeys_queue.lock);
err = add_keys(ent, todo);
if (err == -EAGAIN)
usleep_range(3000, 5000);
- xa_lock_irq(&ent->mkeys);
+ spin_lock_irq(&ent->mkeys_queue.lock);
if (err) {
if (err != -EAGAIN)
return err;
@@ -439,7 +387,7 @@ static ssize_t size_write(struct file *filp, const char __user *buf,
* cannot free MRs that are in use. Compute the target value for stored
* mkeys.
*/
- xa_lock_irq(&ent->mkeys);
+ spin_lock_irq(&ent->mkeys_queue.lock);
if (target < ent->in_use) {
err = -EINVAL;
goto err_unlock;
@@ -452,12 +400,12 @@ static ssize_t size_write(struct file *filp, const char __user *buf,
err = resize_available_mrs(ent, target, false);
if (err)
goto err_unlock;
- xa_unlock_irq(&ent->mkeys);
+ spin_unlock_irq(&ent->mkeys_queue.lock);
return count;
err_unlock:
- xa_unlock_irq(&ent->mkeys);
+ spin_unlock_irq(&ent->mkeys_queue.lock);
return err;
}
@@ -468,7 +416,8 @@ static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
char lbuf[20];
int err;
- err = snprintf(lbuf, sizeof(lbuf), "%ld\n", ent->stored + ent->in_use);
+ err = snprintf(lbuf, sizeof(lbuf), "%ld\n",
+ ent->mkeys_queue.ci + ent->in_use);
if (err < 0)
return err;
@@ -497,10 +446,10 @@ static ssize_t limit_write(struct file *filp, const char __user *buf,
* Upon set we immediately fill the cache to high water mark implied by
* the limit.
*/
- xa_lock_irq(&ent->mkeys);
+ spin_lock_irq(&ent->mkeys_queue.lock);
ent->limit = var;
err = resize_available_mrs(ent, 0, true);
- xa_unlock_irq(&ent->mkeys);
+ spin_unlock_irq(&ent->mkeys_queue.lock);
if (err)
return err;
return count;
@@ -536,9 +485,9 @@ static bool someone_adding(struct mlx5_mkey_cache *cache)
mutex_lock(&cache->rb_lock);
for (node = rb_first(&cache->rb_root); node; node = rb_next(node)) {
ent = rb_entry(node, struct mlx5_cache_ent, node);
- xa_lock_irq(&ent->mkeys);
- ret = ent->stored < ent->limit;
- xa_unlock_irq(&ent->mkeys);
+ spin_lock_irq(&ent->mkeys_queue.lock);
+ ret = ent->mkeys_queue.ci < ent->limit;
+ spin_unlock_irq(&ent->mkeys_queue.lock);
if (ret) {
mutex_unlock(&cache->rb_lock);
return true;
@@ -555,26 +504,26 @@ static bool someone_adding(struct mlx5_mkey_cache *cache)
*/
static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent)
{
- lockdep_assert_held(&ent->mkeys.xa_lock);
+ lockdep_assert_held(&ent->mkeys_queue.lock);
if (ent->disabled || READ_ONCE(ent->dev->fill_delay) || ent->is_tmp)
return;
- if (ent->stored < ent->limit) {
+ if (ent->mkeys_queue.ci < ent->limit) {
ent->fill_to_high_water = true;
mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0);
} else if (ent->fill_to_high_water &&
- ent->reserved < 2 * ent->limit) {
+ ent->mkeys_queue.ci + ent->pending < 2 * ent->limit) {
/*
* Once we start populating due to hitting a low water mark
* continue until we pass the high water mark.
*/
mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0);
- } else if (ent->stored == 2 * ent->limit) {
+ } else if (ent->mkeys_queue.ci == 2 * ent->limit) {
ent->fill_to_high_water = false;
- } else if (ent->stored > 2 * ent->limit) {
+ } else if (ent->mkeys_queue.ci > 2 * ent->limit) {
/* Queue deletion of excess entries */
ent->fill_to_high_water = false;
- if (ent->stored != ent->reserved)
+ if (ent->pending)
queue_delayed_work(ent->dev->cache.wq, &ent->dwork,
msecs_to_jiffies(1000));
else
@@ -582,21 +531,37 @@ static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent)
}
}
+static void clean_keys(struct mlx5_ib_dev *dev, struct mlx5_cache_ent *ent)
+{
+ u32 mkey;
+
+ spin_lock_irq(&ent->mkeys_queue.lock);
+ while (ent->mkeys_queue.ci) {
+ mkey = pop_mkey_locked(ent);
+ spin_unlock_irq(&ent->mkeys_queue.lock);
+ mlx5_core_destroy_mkey(dev->mdev, mkey);
+ spin_lock_irq(&ent->mkeys_queue.lock);
+ }
+ ent->tmp_cleanup_scheduled = false;
+ spin_unlock_irq(&ent->mkeys_queue.lock);
+}
+
static void __cache_work_func(struct mlx5_cache_ent *ent)
{
struct mlx5_ib_dev *dev = ent->dev;
struct mlx5_mkey_cache *cache = &dev->cache;
int err;
- xa_lock_irq(&ent->mkeys);
+ spin_lock_irq(&ent->mkeys_queue.lock);
if (ent->disabled)
goto out;
- if (ent->fill_to_high_water && ent->reserved < 2 * ent->limit &&
+ if (ent->fill_to_high_water &&
+ ent->mkeys_queue.ci + ent->pending < 2 * ent->limit &&
!READ_ONCE(dev->fill_delay)) {
- xa_unlock_irq(&ent->mkeys);
+ spin_unlock_irq(&ent->mkeys_queue.lock);
err = add_keys(ent, 1);
- xa_lock_irq(&ent->mkeys);
+ spin_lock_irq(&ent->mkeys_queue.lock);
if (ent->disabled)
goto out;
if (err) {
@@ -614,7 +579,7 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
msecs_to_jiffies(1000));
}
}
- } else if (ent->stored > 2 * ent->limit) {
+ } else if (ent->mkeys_queue.ci > 2 * ent->limit) {
bool need_delay;
/*
@@ -629,11 +594,11 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
* the garbage collection work to try to run in next cycle, in
* order to free CPU resources to other tasks.
*/
- xa_unlock_irq(&ent->mkeys);
+ spin_unlock_irq(&ent->mkeys_queue.lock);
need_delay = need_resched() || someone_adding(cache) ||
!time_after(jiffies,
READ_ONCE(cache->last_add) + 300 * HZ);
- xa_lock_irq(&ent->mkeys);
+ spin_lock_irq(&ent->mkeys_queue.lock);
if (ent->disabled)
goto out;
if (need_delay) {
@@ -644,7 +609,7 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
queue_adjust_cache_locked(ent);
}
out:
- xa_unlock_irq(&ent->mkeys);
+ spin_unlock_irq(&ent->mkeys_queue.lock);
}
static void delayed_cache_work_func(struct work_struct *work)
@@ -652,7 +617,11 @@ static void delayed_cache_work_func(struct work_struct *work)
struct mlx5_cache_ent *ent;
ent = container_of(work, struct mlx5_cache_ent, dwork.work);
- __cache_work_func(ent);
+ /* temp entries are never filled, only cleaned */
+ if (ent->is_tmp)
+ clean_keys(ent->dev, ent);
+ else
+ __cache_work_func(ent);
}
static int cache_ent_key_cmp(struct mlx5r_cache_rb_key key1,
@@ -696,10 +665,8 @@ static int mlx5_cache_ent_insert(struct mlx5_mkey_cache *cache,
new = &((*new)->rb_left);
if (cmp < 0)
new = &((*new)->rb_right);
- if (cmp == 0) {
- mutex_unlock(&cache->rb_lock);
+ if (cmp == 0)
return -EEXIST;
- }
}
/* Add new node and rebalance tree. */
@@ -715,6 +682,7 @@ mkey_cache_ent_from_rb_key(struct mlx5_ib_dev *dev,
{
struct rb_node *node = dev->cache.rb_root.rb_node;
struct mlx5_cache_ent *cur, *smallest = NULL;
+ u64 ndescs_limit;
int cmp;
/*
@@ -733,10 +701,18 @@ mkey_cache_ent_from_rb_key(struct mlx5_ib_dev *dev,
return cur;
}
+ /*
+ * Limit the usage of mkeys larger than twice the required size while
+ * also allowing the usage of smallest cache entry for small MRs.
+ */
+ ndescs_limit = max_t(u64, rb_key.ndescs * 2,
+ MLX5_MR_CACHE_PERSISTENT_ENTRY_MIN_DESCS);
+
return (smallest &&
smallest->rb_key.access_mode == rb_key.access_mode &&
smallest->rb_key.access_flags == rb_key.access_flags &&
- smallest->rb_key.ats == rb_key.ats) ?
+ smallest->rb_key.ats == rb_key.ats &&
+ smallest->rb_key.ndescs <= ndescs_limit) ?
smallest :
NULL;
}
@@ -752,28 +728,30 @@ static struct mlx5_ib_mr *_mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
if (!mr)
return ERR_PTR(-ENOMEM);
- xa_lock_irq(&ent->mkeys);
+ spin_lock_irq(&ent->mkeys_queue.lock);
ent->in_use++;
- if (!ent->stored) {
+ if (!ent->mkeys_queue.ci) {
queue_adjust_cache_locked(ent);
ent->miss++;
- xa_unlock_irq(&ent->mkeys);
+ spin_unlock_irq(&ent->mkeys_queue.lock);
err = create_cache_mkey(ent, &mr->mmkey.key);
if (err) {
- xa_lock_irq(&ent->mkeys);
+ spin_lock_irq(&ent->mkeys_queue.lock);
ent->in_use--;
- xa_unlock_irq(&ent->mkeys);
+ spin_unlock_irq(&ent->mkeys_queue.lock);
kfree(mr);
return ERR_PTR(err);
}
} else {
- mr->mmkey.key = pop_stored_mkey(ent);
+ mr->mmkey.key = pop_mkey_locked(ent);
queue_adjust_cache_locked(ent);
- xa_unlock_irq(&ent->mkeys);
+ spin_unlock_irq(&ent->mkeys_queue.lock);
}
mr->mmkey.cache_ent = ent;
mr->mmkey.type = MLX5_MKEY_MR;
+ mr->mmkey.rb_key = ent->rb_key;
+ mr->mmkey.cacheable = true;
init_waitqueue_head(&mr->mmkey.wait);
return mr;
}
@@ -819,21 +797,6 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
return _mlx5_mr_cache_alloc(dev, ent, access_flags);
}
-static void clean_keys(struct mlx5_ib_dev *dev, struct mlx5_cache_ent *ent)
-{
- u32 mkey;
-
- cancel_delayed_work(&ent->dwork);
- xa_lock_irq(&ent->mkeys);
- while (ent->stored) {
- mkey = pop_stored_mkey(ent);
- xa_unlock_irq(&ent->mkeys);
- mlx5_core_destroy_mkey(dev->mdev, mkey);
- xa_lock_irq(&ent->mkeys);
- }
- xa_unlock_irq(&ent->mkeys);
-}
-
static void mlx5_mkey_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
{
if (!mlx5_debugfs_root || dev->is_rep)
@@ -859,7 +822,7 @@ static void mlx5_mkey_cache_debugfs_add_ent(struct mlx5_ib_dev *dev,
dir = debugfs_create_dir(ent->name, dev->cache.fs_root);
debugfs_create_file("size", 0600, dir, ent, &size_fops);
debugfs_create_file("limit", 0600, dir, ent, &limit_fops);
- debugfs_create_ulong("cur", 0400, dir, &ent->stored);
+ debugfs_create_ulong("cur", 0400, dir, &ent->mkeys_queue.ci);
debugfs_create_u32("miss", 0600, dir, &ent->miss);
}
@@ -881,6 +844,31 @@ static void delay_time_func(struct timer_list *t)
WRITE_ONCE(dev->fill_delay, 0);
}
+static int mlx5r_mkeys_init(struct mlx5_cache_ent *ent)
+{
+ struct mlx5_mkeys_page *page;
+
+ page = kzalloc(sizeof(*page), GFP_KERNEL);
+ if (!page)
+ return -ENOMEM;
+ INIT_LIST_HEAD(&ent->mkeys_queue.pages_list);
+ spin_lock_init(&ent->mkeys_queue.lock);
+ list_add_tail(&page->list, &ent->mkeys_queue.pages_list);
+ ent->mkeys_queue.num_pages++;
+ return 0;
+}
+
+static void mlx5r_mkeys_uninit(struct mlx5_cache_ent *ent)
+{
+ struct mlx5_mkeys_page *page;
+
+ WARN_ON(ent->mkeys_queue.ci || ent->mkeys_queue.num_pages > 1);
+ page = list_last_entry(&ent->mkeys_queue.pages_list,
+ struct mlx5_mkeys_page, list);
+ list_del(&page->list);
+ kfree(page);
+}
+
struct mlx5_cache_ent *
mlx5r_cache_create_ent_locked(struct mlx5_ib_dev *dev,
struct mlx5r_cache_rb_key rb_key,
@@ -894,7 +882,9 @@ mlx5r_cache_create_ent_locked(struct mlx5_ib_dev *dev,
if (!ent)
return ERR_PTR(-ENOMEM);
- xa_init_flags(&ent->mkeys, XA_FLAGS_LOCK_IRQ);
+ ret = mlx5r_mkeys_init(ent);
+ if (ret)
+ goto mkeys_err;
ent->rb_key = rb_key;
ent->dev = dev;
ent->is_tmp = !persistent_entry;
@@ -902,10 +892,8 @@ mlx5r_cache_create_ent_locked(struct mlx5_ib_dev *dev,
INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
ret = mlx5_cache_ent_insert(&dev->cache, ent);
- if (ret) {
- kfree(ent);
- return ERR_PTR(ret);
- }
+ if (ret)
+ goto ent_insert_err;
if (persistent_entry) {
if (rb_key.access_mode == MLX5_MKC_ACCESS_MODE_KSM)
@@ -921,42 +909,14 @@ mlx5r_cache_create_ent_locked(struct mlx5_ib_dev *dev,
ent->limit = 0;
mlx5_mkey_cache_debugfs_add_ent(dev, ent);
- } else {
- mod_delayed_work(ent->dev->cache.wq,
- &ent->dev->cache.remove_ent_dwork,
- msecs_to_jiffies(30 * 1000));
}
return ent;
-}
-
-static void remove_ent_work_func(struct work_struct *work)
-{
- struct mlx5_mkey_cache *cache;
- struct mlx5_cache_ent *ent;
- struct rb_node *cur;
-
- cache = container_of(work, struct mlx5_mkey_cache,
- remove_ent_dwork.work);
- mutex_lock(&cache->rb_lock);
- cur = rb_last(&cache->rb_root);
- while (cur) {
- ent = rb_entry(cur, struct mlx5_cache_ent, node);
- cur = rb_prev(cur);
- mutex_unlock(&cache->rb_lock);
-
- xa_lock_irq(&ent->mkeys);
- if (!ent->is_tmp) {
- xa_unlock_irq(&ent->mkeys);
- mutex_lock(&cache->rb_lock);
- continue;
- }
- xa_unlock_irq(&ent->mkeys);
-
- clean_keys(ent->dev, ent);
- mutex_lock(&cache->rb_lock);
- }
- mutex_unlock(&cache->rb_lock);
+ent_insert_err:
+ mlx5r_mkeys_uninit(ent);
+mkeys_err:
+ kfree(ent);
+ return ERR_PTR(ret);
}
int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
@@ -974,7 +934,6 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
mutex_init(&dev->slow_path_mutex);
mutex_init(&dev->cache.rb_lock);
dev->cache.rb_root = RB_ROOT;
- INIT_DELAYED_WORK(&dev->cache.remove_ent_dwork, remove_ent_work_func);
cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM);
if (!cache->wq) {
mlx5_ib_warn(dev, "failed to create work queue\n");
@@ -986,7 +945,7 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
mlx5_mkey_cache_debugfs_init(dev);
mutex_lock(&cache->rb_lock);
for (i = 0; i <= mkey_cache_max_order(dev); i++) {
- rb_key.ndescs = 1 << (i + 2);
+ rb_key.ndescs = MLX5_MR_CACHE_PERSISTENT_ENTRY_MIN_DESCS << i;
ent = mlx5r_cache_create_ent_locked(dev, rb_key, true);
if (IS_ERR(ent)) {
ret = PTR_ERR(ent);
@@ -1001,9 +960,9 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
mutex_unlock(&cache->rb_lock);
for (node = rb_first(root); node; node = rb_next(node)) {
ent = rb_entry(node, struct mlx5_cache_ent, node);
- xa_lock_irq(&ent->mkeys);
+ spin_lock_irq(&ent->mkeys_queue.lock);
queue_adjust_cache_locked(ent);
- xa_unlock_irq(&ent->mkeys);
+ spin_unlock_irq(&ent->mkeys_queue.lock);
}
return 0;
@@ -1024,25 +983,34 @@ void mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev)
if (!dev->cache.wq)
return;
- cancel_delayed_work_sync(&dev->cache.remove_ent_dwork);
mutex_lock(&dev->cache.rb_lock);
for (node = rb_first(root); node; node = rb_next(node)) {
ent = rb_entry(node, struct mlx5_cache_ent, node);
- xa_lock_irq(&ent->mkeys);
+ spin_lock_irq(&ent->mkeys_queue.lock);
ent->disabled = true;
- xa_unlock_irq(&ent->mkeys);
- cancel_delayed_work_sync(&ent->dwork);
+ spin_unlock_irq(&ent->mkeys_queue.lock);
+ cancel_delayed_work(&ent->dwork);
}
+ mutex_unlock(&dev->cache.rb_lock);
+
+ /*
+ * After all entries are disabled and will not reschedule on WQ,
+ * flush it and all async commands.
+ */
+ flush_workqueue(dev->cache.wq);
mlx5_mkey_cache_debugfs_cleanup(dev);
mlx5_cmd_cleanup_async_ctx(&dev->async_ctx);
+ /* At this point all entries are disabled and have no concurrent work. */
+ mutex_lock(&dev->cache.rb_lock);
node = rb_first(root);
while (node) {
ent = rb_entry(node, struct mlx5_cache_ent, node);
node = rb_next(node);
clean_keys(dev, ent);
rb_erase(&ent->node, root);
+ mlx5r_mkeys_uninit(ent);
kfree(ent);
}
mutex_unlock(&dev->cache.rb_lock);
@@ -1076,6 +1044,7 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
MLX5_SET(mkc, mkc, length64, 1);
set_mkc_access_pd_addr_fields(mkc, acc | IB_ACCESS_RELAXED_ORDERING, 0,
pd);
+ MLX5_SET(mkc, mkc, ma_translation_mode, MLX5_CAP_GEN(dev->mdev, ats));
err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
if (err)
@@ -1140,12 +1109,10 @@ static unsigned int mlx5_umem_dmabuf_default_pgsz(struct ib_umem *umem,
static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
struct ib_umem *umem, u64 iova,
- int access_flags)
+ int access_flags, int access_mode)
{
- struct mlx5r_cache_rb_key rb_key = {
- .access_mode = MLX5_MKC_ACCESS_MODE_MTT,
- };
struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct mlx5r_cache_rb_key rb_key = {};
struct mlx5_cache_ent *ent;
struct mlx5_ib_mr *mr;
unsigned int page_size;
@@ -1153,11 +1120,11 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
if (umem->is_dmabuf)
page_size = mlx5_umem_dmabuf_default_pgsz(umem, iova);
else
- page_size = mlx5_umem_find_best_pgsz(umem, mkc, log_page_size,
- 0, iova);
+ page_size = mlx5_umem_mkc_find_best_pgsz(dev, umem, iova);
if (WARN_ON(!page_size))
return ERR_PTR(-EINVAL);
+ rb_key.access_mode = access_mode;
rb_key.ndescs = ib_umem_num_dma_blocks(umem, page_size);
rb_key.ats = mlx5_umem_needs_ats(dev, umem, access_flags);
rb_key.access_flags = get_unchangeable_access_flags(dev, access_flags);
@@ -1168,11 +1135,12 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
*/
if (!ent) {
mutex_lock(&dev->slow_path_mutex);
- mr = reg_create(pd, umem, iova, access_flags, page_size, false);
+ mr = reg_create(pd, umem, iova, access_flags, page_size, false, access_mode);
mutex_unlock(&dev->slow_path_mutex);
if (IS_ERR(mr))
return mr;
mr->mmkey.rb_key = rb_key;
+ mr->mmkey.cacheable = true;
return mr;
}
@@ -1188,13 +1156,71 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
return mr;
}
+static struct ib_mr *
+reg_create_crossing_vhca_mr(struct ib_pd *pd, u64 iova, u64 length, int access_flags,
+ u32 crossed_lkey)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ int access_mode = MLX5_MKC_ACCESS_MODE_CROSSING;
+ struct mlx5_ib_mr *mr;
+ void *mkc;
+ int inlen;
+ u32 *in;
+ int err;
+
+ if (!MLX5_CAP_GEN(dev->mdev, crossing_vhca_mkey))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ goto err_1;
+ }
+
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+ MLX5_SET(mkc, mkc, crossing_target_vhca_id,
+ MLX5_CAP_GEN(dev->mdev, vhca_id));
+ MLX5_SET(mkc, mkc, translations_octword_size, crossed_lkey);
+ MLX5_SET(mkc, mkc, access_mode_1_0, access_mode & 0x3);
+ MLX5_SET(mkc, mkc, access_mode_4_2, (access_mode >> 2) & 0x7);
+
+ /* for this crossing mkey IOVA should be 0 and len should be IOVA + len */
+ set_mkc_access_pd_addr_fields(mkc, access_flags, 0, pd);
+ MLX5_SET64(mkc, mkc, len, iova + length);
+
+ MLX5_SET(mkc, mkc, free, 0);
+ MLX5_SET(mkc, mkc, umr_en, 0);
+ err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);
+ if (err)
+ goto err_2;
+
+ mr->mmkey.type = MLX5_MKEY_MR;
+ set_mr_fields(dev, mr, length, access_flags, iova);
+ mr->ibmr.pd = pd;
+ kvfree(in);
+ mlx5_ib_dbg(dev, "crossing mkey = 0x%x\n", mr->mmkey.key);
+
+ return &mr->ibmr;
+err_2:
+ kvfree(in);
+err_1:
+ kfree(mr);
+ return ERR_PTR(err);
+}
+
/*
* If ibmr is NULL it will be allocated by reg_create.
* Else, the given ibmr will be used.
*/
static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
u64 iova, int access_flags,
- unsigned int page_size, bool populate)
+ unsigned int page_size, bool populate,
+ int access_mode)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_ib_mr *mr;
@@ -1203,7 +1229,9 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
int inlen;
u32 *in;
int err;
- bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg));
+ bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg)) &&
+ (access_mode == MLX5_MKC_ACCESS_MODE_MTT);
+ bool ksm_mode = (access_mode == MLX5_MKC_ACCESS_MODE_KSM);
if (!page_size)
return ERR_PTR(-EINVAL);
@@ -1226,7 +1254,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
}
pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
if (populate) {
- if (WARN_ON(access_flags & IB_ACCESS_ON_DEMAND)) {
+ if (WARN_ON(access_flags & IB_ACCESS_ON_DEMAND || ksm_mode)) {
err = -EINVAL;
goto err_2;
}
@@ -1235,20 +1263,29 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
}
/* The pg_access bit allows setting the access flags
- * in the page list submitted with the command. */
+ * in the page list submitted with the command.
+ */
MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap));
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
set_mkc_access_pd_addr_fields(mkc, access_flags, iova,
populate ? pd : dev->umrc.pd);
+ /* In case a data direct flow, overwrite the pdn field by its internal kernel PD */
+ if (umem->is_dmabuf && ksm_mode)
+ MLX5_SET(mkc, mkc, pd, dev->ddr.pdn);
+
MLX5_SET(mkc, mkc, free, !populate);
- MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
+ MLX5_SET(mkc, mkc, access_mode_1_0, access_mode);
MLX5_SET(mkc, mkc, umr_en, 1);
MLX5_SET64(mkc, mkc, len, umem->length);
MLX5_SET(mkc, mkc, bsf_octword_size, 0);
- MLX5_SET(mkc, mkc, translations_octword_size,
- get_octo_len(iova, umem->length, mr->page_shift));
+ if (ksm_mode)
+ MLX5_SET(mkc, mkc, translations_octword_size,
+ get_octo_len(iova, umem->length, mr->page_shift) * 2);
+ else
+ MLX5_SET(mkc, mkc, translations_octword_size,
+ get_octo_len(iova, umem->length, mr->page_shift));
MLX5_SET(mkc, mkc, log_page_size, mr->page_shift);
if (mlx5_umem_needs_ats(dev, umem, access_flags))
MLX5_SET(mkc, mkc, ma_translation_mode, 1);
@@ -1361,6 +1398,7 @@ struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_PATTERN_SW_ICM:
+ case MLX5_IB_UAPI_DM_TYPE_ENCAP_SW_ICM:
if (attr->access_flags & ~MLX5_IB_DM_SW_ICM_ALLOWED_ACCESS)
return ERR_PTR(-EINVAL);
@@ -1384,13 +1422,15 @@ static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem,
xlt_with_umr = mlx5r_umr_can_load_pas(dev, umem->length);
if (xlt_with_umr) {
- mr = alloc_cacheable_mr(pd, umem, iova, access_flags);
+ mr = alloc_cacheable_mr(pd, umem, iova, access_flags,
+ MLX5_MKC_ACCESS_MODE_MTT);
} else {
- unsigned int page_size = mlx5_umem_find_best_pgsz(
- umem, mkc, log_page_size, 0, iova);
+ unsigned int page_size =
+ mlx5_umem_mkc_find_best_pgsz(dev, umem, iova);
mutex_lock(&dev->slow_path_mutex);
- mr = reg_create(pd, umem, iova, access_flags, page_size, true);
+ mr = reg_create(pd, umem, iova, access_flags, page_size,
+ true, MLX5_MKC_ACCESS_MODE_MTT);
mutex_unlock(&dev->slow_path_mutex);
}
if (IS_ERR(mr)) {
@@ -1453,7 +1493,8 @@ static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length,
if (IS_ERR(odp))
return ERR_CAST(odp);
- mr = alloc_cacheable_mr(pd, &odp->umem, iova, access_flags);
+ mr = alloc_cacheable_mr(pd, &odp->umem, iova, access_flags,
+ MLX5_MKC_ACCESS_MODE_MTT);
if (IS_ERR(mr)) {
ib_umem_release(&odp->umem);
return ERR_CAST(mr);
@@ -1481,6 +1522,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct ib_umem *umem;
+ int err;
if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM))
return ERR_PTR(-EOPNOTSUPP);
@@ -1488,6 +1530,10 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mlx5_ib_dbg(dev, "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n",
start, iova, length, access_flags);
+ err = mlx5r_umr_resource_init(dev);
+ if (err)
+ return ERR_PTR(err);
+
if (access_flags & IB_ACCESS_ON_DEMAND)
return create_user_odp_mr(pd, start, length, iova, access_flags,
udata);
@@ -1516,31 +1562,31 @@ static struct dma_buf_attach_ops mlx5_ib_dmabuf_attach_ops = {
.move_notify = mlx5_ib_dmabuf_invalidate_cb,
};
-struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset,
- u64 length, u64 virt_addr,
- int fd, int access_flags,
- struct ib_udata *udata)
+static struct ib_mr *
+reg_user_mr_dmabuf(struct ib_pd *pd, struct device *dma_device,
+ u64 offset, u64 length, u64 virt_addr,
+ int fd, int access_flags, int access_mode)
{
+ bool pinned_mode = (access_mode == MLX5_MKC_ACCESS_MODE_KSM);
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_ib_mr *mr = NULL;
struct ib_umem_dmabuf *umem_dmabuf;
int err;
- if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) ||
- !IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
- return ERR_PTR(-EOPNOTSUPP);
-
- mlx5_ib_dbg(dev,
- "offset 0x%llx, virt_addr 0x%llx, length 0x%llx, fd %d, access_flags 0x%x\n",
- offset, virt_addr, length, fd, access_flags);
+ err = mlx5r_umr_resource_init(dev);
+ if (err)
+ return ERR_PTR(err);
- /* dmabuf requires xlt update via umr to work. */
- if (!mlx5r_umr_can_load_pas(dev, length))
- return ERR_PTR(-EINVAL);
+ if (!pinned_mode)
+ umem_dmabuf = ib_umem_dmabuf_get(&dev->ib_dev,
+ offset, length, fd,
+ access_flags,
+ &mlx5_ib_dmabuf_attach_ops);
+ else
+ umem_dmabuf = ib_umem_dmabuf_get_pinned_with_dma_device(&dev->ib_dev,
+ dma_device, offset, length,
+ fd, access_flags);
- umem_dmabuf = ib_umem_dmabuf_get(&dev->ib_dev, offset, length, fd,
- access_flags,
- &mlx5_ib_dmabuf_attach_ops);
if (IS_ERR(umem_dmabuf)) {
mlx5_ib_dbg(dev, "umem_dmabuf get failed (%ld)\n",
PTR_ERR(umem_dmabuf));
@@ -1548,7 +1594,7 @@ struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset,
}
mr = alloc_cacheable_mr(pd, &umem_dmabuf->umem, virt_addr,
- access_flags);
+ access_flags, access_mode);
if (IS_ERR(mr)) {
ib_umem_release(&umem_dmabuf->umem);
return ERR_CAST(mr);
@@ -1558,9 +1604,13 @@ struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset,
atomic_add(ib_umem_num_pages(mr->umem), &dev->mdev->priv.reg_pages);
umem_dmabuf->private = mr;
- err = mlx5r_store_odp_mkey(dev, &mr->mmkey);
- if (err)
- goto err_dereg_mr;
+ if (!pinned_mode) {
+ err = mlx5r_store_odp_mkey(dev, &mr->mmkey);
+ if (err)
+ goto err_dereg_mr;
+ } else {
+ mr->data_direct = true;
+ }
err = mlx5_ib_init_dmabuf_mr(mr);
if (err)
@@ -1568,10 +1618,101 @@ struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset,
return &mr->ibmr;
err_dereg_mr:
- mlx5_ib_dereg_mr(&mr->ibmr, NULL);
+ __mlx5_ib_dereg_mr(&mr->ibmr);
return ERR_PTR(err);
}
+static struct ib_mr *
+reg_user_mr_dmabuf_by_data_direct(struct ib_pd *pd, u64 offset,
+ u64 length, u64 virt_addr,
+ int fd, int access_flags)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct mlx5_data_direct_dev *data_direct_dev;
+ struct ib_mr *crossing_mr;
+ struct ib_mr *crossed_mr;
+ int ret = 0;
+
+ /* As of HW behaviour the IOVA must be page aligned in KSM mode */
+ if (!PAGE_ALIGNED(virt_addr) || (access_flags & IB_ACCESS_ON_DEMAND))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mutex_lock(&dev->data_direct_lock);
+ data_direct_dev = dev->data_direct_dev;
+ if (!data_direct_dev) {
+ ret = -EINVAL;
+ goto end;
+ }
+
+ /* The device's 'data direct mkey' was created without RO flags to
+ * simplify things and allow for a single mkey per device.
+ * Since RO is not a must, mask it out accordingly.
+ */
+ access_flags &= ~IB_ACCESS_RELAXED_ORDERING;
+ crossed_mr = reg_user_mr_dmabuf(pd, &data_direct_dev->pdev->dev,
+ offset, length, virt_addr, fd,
+ access_flags, MLX5_MKC_ACCESS_MODE_KSM);
+ if (IS_ERR(crossed_mr)) {
+ ret = PTR_ERR(crossed_mr);
+ goto end;
+ }
+
+ mutex_lock(&dev->slow_path_mutex);
+ crossing_mr = reg_create_crossing_vhca_mr(pd, virt_addr, length, access_flags,
+ crossed_mr->lkey);
+ mutex_unlock(&dev->slow_path_mutex);
+ if (IS_ERR(crossing_mr)) {
+ __mlx5_ib_dereg_mr(crossed_mr);
+ ret = PTR_ERR(crossing_mr);
+ goto end;
+ }
+
+ list_add_tail(&to_mmr(crossed_mr)->dd_node, &dev->data_direct_mr_list);
+ to_mmr(crossing_mr)->dd_crossed_mr = to_mmr(crossed_mr);
+ to_mmr(crossing_mr)->data_direct = true;
+end:
+ mutex_unlock(&dev->data_direct_lock);
+ return ret ? ERR_PTR(ret) : crossing_mr;
+}
+
+struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset,
+ u64 length, u64 virt_addr,
+ int fd, int access_flags,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ int mlx5_access_flags = 0;
+ int err;
+
+ if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) ||
+ !IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_REG_DMABUF_MR_ACCESS_FLAGS)) {
+ err = uverbs_get_flags32(&mlx5_access_flags, attrs,
+ MLX5_IB_ATTR_REG_DMABUF_MR_ACCESS_FLAGS,
+ MLX5_IB_UAPI_REG_DMABUF_ACCESS_DATA_DIRECT);
+ if (err)
+ return ERR_PTR(err);
+ }
+
+ mlx5_ib_dbg(dev,
+ "offset 0x%llx, virt_addr 0x%llx, length 0x%llx, fd %d, access_flags 0x%x, mlx5_access_flags 0x%x\n",
+ offset, virt_addr, length, fd, access_flags, mlx5_access_flags);
+
+ /* dmabuf requires xlt update via umr to work. */
+ if (!mlx5r_umr_can_load_pas(dev, length))
+ return ERR_PTR(-EINVAL);
+
+ if (mlx5_access_flags & MLX5_IB_UAPI_REG_DMABUF_ACCESS_DATA_DIRECT)
+ return reg_user_mr_dmabuf_by_data_direct(pd, offset, length, virt_addr,
+ fd, access_flags);
+
+ return reg_user_mr_dmabuf(pd, pd->device->dma_device,
+ offset, length, virt_addr,
+ fd, access_flags, MLX5_MKC_ACCESS_MODE_MTT);
+}
+
/*
* True if the change in access flags can be done via UMR, only some access
* flags can be updated.
@@ -1583,7 +1724,8 @@ static bool can_use_umr_rereg_access(struct mlx5_ib_dev *dev,
unsigned int diffs = current_access_flags ^ target_access_flags;
if (diffs & ~(IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE |
- IB_ACCESS_REMOTE_READ | IB_ACCESS_RELAXED_ORDERING))
+ IB_ACCESS_REMOTE_READ | IB_ACCESS_RELAXED_ORDERING |
+ IB_ACCESS_REMOTE_ATOMIC))
return false;
return mlx5r_umr_can_reconfig(dev, current_access_flags,
target_access_flags);
@@ -1602,8 +1744,7 @@ static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr,
if (!mlx5r_umr_can_load_pas(dev, new_umem->length))
return false;
- *page_size =
- mlx5_umem_find_best_pgsz(new_umem, mkc, log_page_size, 0, iova);
+ *page_size = mlx5_umem_mkc_find_best_pgsz(dev, new_umem, iova);
if (WARN_ON(!*page_size))
return false;
return (mr->mmkey.cache_ent->rb_key.ndescs) >=
@@ -1666,7 +1807,7 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
struct mlx5_ib_mr *mr = to_mmr(ib_mr);
int err;
- if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM))
+ if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) || mr->data_direct)
return ERR_PTR(-EOPNOTSUPP);
mlx5_ib_dbg(
@@ -1766,6 +1907,11 @@ mlx5_alloc_priv_descs(struct ib_device *device,
int ret;
add_size = max_t(int, MLX5_UMR_ALIGN - ARCH_KMALLOC_MINALIGN, 0);
+ if (is_power_of_2(MLX5_UMR_ALIGN) && add_size) {
+ int end = max_t(int, MLX5_UMR_ALIGN, roundup_pow_of_two(size));
+
+ add_size = min_t(int, end - size, add_size);
+ }
mr->descs_alloc = kzalloc(size + add_size, GFP_KERNEL);
if (!mr->descs_alloc)
@@ -1789,7 +1935,7 @@ err:
static void
mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
{
- if (!mr->umem && mr->descs) {
+ if (!mr->umem && !mr->data_direct && mr->descs) {
struct ib_device *device = mr->ibmr.device;
int size = mr->max_descs * mr->desc_size;
struct mlx5_ib_dev *dev = to_mdev(device);
@@ -1809,7 +1955,7 @@ static int cache_ent_find_and_store(struct mlx5_ib_dev *dev,
int ret;
if (mr->mmkey.cache_ent) {
- xa_lock_irq(&mr->mmkey.cache_ent->mkeys);
+ spin_lock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock);
mr->mmkey.cache_ent->in_use--;
goto end;
}
@@ -1823,7 +1969,7 @@ static int cache_ent_find_and_store(struct mlx5_ib_dev *dev,
return -EOPNOTSUPP;
}
mr->mmkey.cache_ent = ent;
- xa_lock_irq(&mr->mmkey.cache_ent->mkeys);
+ spin_lock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock);
mutex_unlock(&cache->rb_lock);
goto end;
}
@@ -1835,16 +1981,70 @@ static int cache_ent_find_and_store(struct mlx5_ib_dev *dev,
return PTR_ERR(ent);
mr->mmkey.cache_ent = ent;
- xa_lock_irq(&mr->mmkey.cache_ent->mkeys);
+ spin_lock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock);
end:
- ret = push_mkey_locked(mr->mmkey.cache_ent, false,
- xa_mk_value(mr->mmkey.key));
- xa_unlock_irq(&mr->mmkey.cache_ent->mkeys);
+ ret = push_mkey_locked(mr->mmkey.cache_ent, mr->mmkey.key);
+ spin_unlock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock);
return ret;
}
-int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
+static int mlx5_ib_revoke_data_direct_mr(struct mlx5_ib_mr *mr)
+{
+ struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
+ struct ib_umem_dmabuf *umem_dmabuf = to_ib_umem_dmabuf(mr->umem);
+ int err;
+
+ lockdep_assert_held(&dev->data_direct_lock);
+ mr->revoked = true;
+ err = mlx5r_umr_revoke_mr(mr);
+ if (WARN_ON(err))
+ return err;
+
+ ib_umem_dmabuf_revoke(umem_dmabuf);
+ return 0;
+}
+
+void mlx5_ib_revoke_data_direct_mrs(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_ib_mr *mr, *next;
+
+ lockdep_assert_held(&dev->data_direct_lock);
+
+ list_for_each_entry_safe(mr, next, &dev->data_direct_mr_list, dd_node) {
+ list_del(&mr->dd_node);
+ mlx5_ib_revoke_data_direct_mr(mr);
+ }
+}
+
+static int mlx5_revoke_mr(struct mlx5_ib_mr *mr)
+{
+ struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
+ struct mlx5_cache_ent *ent = mr->mmkey.cache_ent;
+
+ if (mr->mmkey.cacheable && !mlx5r_umr_revoke_mr(mr) && !cache_ent_find_and_store(dev, mr)) {
+ ent = mr->mmkey.cache_ent;
+ /* upon storing to a clean temp entry - schedule its cleanup */
+ spin_lock_irq(&ent->mkeys_queue.lock);
+ if (ent->is_tmp && !ent->tmp_cleanup_scheduled) {
+ mod_delayed_work(ent->dev->cache.wq, &ent->dwork,
+ msecs_to_jiffies(30 * 1000));
+ ent->tmp_cleanup_scheduled = true;
+ }
+ spin_unlock_irq(&ent->mkeys_queue.lock);
+ return 0;
+ }
+
+ if (ent) {
+ spin_lock_irq(&ent->mkeys_queue.lock);
+ ent->in_use--;
+ mr->mmkey.cache_ent = NULL;
+ spin_unlock_irq(&ent->mkeys_queue.lock);
+ }
+ return destroy_mkey(dev, mr);
+}
+
+static int __mlx5_ib_dereg_mr(struct ib_mr *ibmr)
{
struct mlx5_ib_mr *mr = to_mmr(ibmr);
struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
@@ -1889,16 +2089,9 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
}
/* Stop DMA */
- if (mr->umem && mlx5r_umr_can_load_pas(dev, mr->umem->length))
- if (mlx5r_umr_revoke_mr(mr) ||
- cache_ent_find_and_store(dev, mr))
- mr->mmkey.cache_ent = NULL;
-
- if (!mr->mmkey.cache_ent) {
- rc = destroy_mkey(to_mdev(mr->ibmr.device), mr);
- if (rc)
- return rc;
- }
+ rc = mlx5_revoke_mr(mr);
+ if (rc)
+ return rc;
if (mr->umem) {
bool is_odp = is_odp_mr(mr);
@@ -1918,9 +2111,40 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
return 0;
}
+static int dereg_crossing_data_direct_mr(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_mr *mr)
+{
+ struct mlx5_ib_mr *dd_crossed_mr = mr->dd_crossed_mr;
+ int ret;
+
+ ret = __mlx5_ib_dereg_mr(&mr->ibmr);
+ if (ret)
+ return ret;
+
+ mutex_lock(&dev->data_direct_lock);
+ if (!dd_crossed_mr->revoked)
+ list_del(&dd_crossed_mr->dd_node);
+
+ ret = __mlx5_ib_dereg_mr(&dd_crossed_mr->ibmr);
+ mutex_unlock(&dev->data_direct_lock);
+ return ret;
+}
+
+int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
+{
+ struct mlx5_ib_mr *mr = to_mmr(ibmr);
+ struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
+
+ if (mr->data_direct)
+ return dereg_crossing_data_direct_mr(dev, mr);
+
+ return __mlx5_ib_dereg_mr(ibmr);
+}
+
static void mlx5_set_umr_free_mkey(struct ib_pd *pd, u32 *in, int ndescs,
int access_mode, int page_shift)
{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
void *mkc;
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
@@ -1933,6 +2157,9 @@ static void mlx5_set_umr_free_mkey(struct ib_pd *pd, u32 *in, int ndescs,
MLX5_SET(mkc, mkc, access_mode_4_2, (access_mode >> 2) & 0x7);
MLX5_SET(mkc, mkc, umr_en, 1);
MLX5_SET(mkc, mkc, log_page_size, page_shift);
+ if (access_mode == MLX5_MKC_ACCESS_MODE_PA ||
+ access_mode == MLX5_MKC_ACCESS_MODE_MTT)
+ MLX5_SET(mkc, mkc, ma_translation_mode, MLX5_CAP_GEN(dev->mdev, ats));
}
static int _mlx5_alloc_mkey_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 4a04cbc5b78a..4b37446758fd 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -45,7 +45,7 @@
/* Contains the details of a pagefault. */
struct mlx5_pagefault {
u32 bytes_committed;
- u32 token;
+ u64 token;
u8 event_subtype;
u8 type;
union {
@@ -74,6 +74,14 @@ struct mlx5_pagefault {
u32 rdma_op_len;
u64 rdma_va;
} rdma;
+ struct {
+ u64 va;
+ u32 mkey;
+ u32 fault_byte_count;
+ u32 prefetch_before_byte_count;
+ u32 prefetch_after_byte_count;
+ u8 flags;
+ } memory;
};
struct mlx5_ib_pf_eq *eq;
@@ -99,13 +107,20 @@ static u64 mlx5_imr_ksm_entries;
static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries,
struct mlx5_ib_mr *imr, int flags)
{
+ struct mlx5_core_dev *dev = mr_to_mdev(imr)->mdev;
struct mlx5_klm *end = pklm + nentries;
+ int step = MLX5_CAP_ODP(dev, mem_page_fault) ? MLX5_IMR_MTT_SIZE : 0;
+ __be32 key = MLX5_CAP_ODP(dev, mem_page_fault) ?
+ cpu_to_be32(imr->null_mmkey.key) :
+ mr_to_mdev(imr)->mkeys.null_mkey;
+ u64 va =
+ MLX5_CAP_ODP(dev, mem_page_fault) ? idx * MLX5_IMR_MTT_SIZE : 0;
if (flags & MLX5_IB_UPD_XLT_ZAP) {
- for (; pklm != end; pklm++, idx++) {
+ for (; pklm != end; pklm++, idx++, va += step) {
pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE);
- pklm->key = mr_to_mdev(imr)->mkeys.null_mkey;
- pklm->va = 0;
+ pklm->key = key;
+ pklm->va = cpu_to_be64(va);
}
return;
}
@@ -129,7 +144,7 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries,
*/
lockdep_assert_held(&to_ib_umem_odp(imr->umem)->umem_mutex);
- for (; pklm != end; pklm++, idx++) {
+ for (; pklm != end; pklm++, idx++, va += step) {
struct mlx5_ib_mr *mtt = xa_load(&imr->implicit_children, idx);
pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE);
@@ -137,8 +152,8 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries,
pklm->key = cpu_to_be32(mtt->ibmr.lkey);
pklm->va = cpu_to_be64(idx * MLX5_IMR_MTT_SIZE);
} else {
- pklm->key = mr_to_mdev(imr)->mkeys.null_mkey;
- pklm->va = 0;
+ pklm->key = key;
+ pklm->va = cpu_to_be64(va);
}
}
}
@@ -217,6 +232,9 @@ static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr)
return;
xa_erase(&imr->implicit_children, idx);
+ if (MLX5_CAP_ODP(mr_to_mdev(mr)->mdev, mem_page_fault))
+ xa_erase(&mr_to_mdev(mr)->odp_mkeys,
+ mlx5_base_mkey(mr->mmkey.key));
/* Freeing a MR is a sleeping operation, so bounce to a work queue */
INIT_WORK(&mr->odp_destroy.work, free_implicit_child_mr_work);
@@ -332,46 +350,46 @@ static void internal_fill_odp_caps(struct mlx5_ib_dev *dev)
else
dev->odp_max_size = BIT_ULL(MLX5_MAX_UMR_SHIFT + PAGE_SHIFT);
- if (MLX5_CAP_ODP(dev->mdev, ud_odp_caps.send))
+ if (MLX5_CAP_ODP_SCHEME(dev->mdev, ud_odp_caps.send))
caps->per_transport_caps.ud_odp_caps |= IB_ODP_SUPPORT_SEND;
- if (MLX5_CAP_ODP(dev->mdev, ud_odp_caps.srq_receive))
+ if (MLX5_CAP_ODP_SCHEME(dev->mdev, ud_odp_caps.srq_receive))
caps->per_transport_caps.ud_odp_caps |= IB_ODP_SUPPORT_SRQ_RECV;
- if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.send))
+ if (MLX5_CAP_ODP_SCHEME(dev->mdev, rc_odp_caps.send))
caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_SEND;
- if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.receive))
+ if (MLX5_CAP_ODP_SCHEME(dev->mdev, rc_odp_caps.receive))
caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_RECV;
- if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.write))
+ if (MLX5_CAP_ODP_SCHEME(dev->mdev, rc_odp_caps.write))
caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_WRITE;
- if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.read))
+ if (MLX5_CAP_ODP_SCHEME(dev->mdev, rc_odp_caps.read))
caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_READ;
- if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.atomic))
+ if (MLX5_CAP_ODP_SCHEME(dev->mdev, rc_odp_caps.atomic))
caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_ATOMIC;
- if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.srq_receive))
+ if (MLX5_CAP_ODP_SCHEME(dev->mdev, rc_odp_caps.srq_receive))
caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_SRQ_RECV;
- if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.send))
+ if (MLX5_CAP_ODP_SCHEME(dev->mdev, xrc_odp_caps.send))
caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_SEND;
- if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.receive))
+ if (MLX5_CAP_ODP_SCHEME(dev->mdev, xrc_odp_caps.receive))
caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_RECV;
- if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.write))
+ if (MLX5_CAP_ODP_SCHEME(dev->mdev, xrc_odp_caps.write))
caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_WRITE;
- if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.read))
+ if (MLX5_CAP_ODP_SCHEME(dev->mdev, xrc_odp_caps.read))
caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_READ;
- if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.atomic))
+ if (MLX5_CAP_ODP_SCHEME(dev->mdev, xrc_odp_caps.atomic))
caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_ATOMIC;
- if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.srq_receive))
+ if (MLX5_CAP_ODP_SCHEME(dev->mdev, xrc_odp_caps.srq_receive))
caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_SRQ_RECV;
if (MLX5_CAP_GEN(dev->mdev, fixed_buffer_size) &&
@@ -388,13 +406,29 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev,
int wq_num = pfault->event_subtype == MLX5_PFAULT_SUBTYPE_WQE ?
pfault->wqe.wq_num : pfault->token;
u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)] = {};
+ void *info;
int err;
MLX5_SET(page_fault_resume_in, in, opcode, MLX5_CMD_OP_PAGE_FAULT_RESUME);
- MLX5_SET(page_fault_resume_in, in, page_fault_type, pfault->type);
- MLX5_SET(page_fault_resume_in, in, token, pfault->token);
- MLX5_SET(page_fault_resume_in, in, wq_number, wq_num);
- MLX5_SET(page_fault_resume_in, in, error, !!error);
+
+ if (pfault->event_subtype == MLX5_PFAULT_SUBTYPE_MEMORY) {
+ info = MLX5_ADDR_OF(page_fault_resume_in, in,
+ page_fault_info.mem_page_fault_info);
+ MLX5_SET(mem_page_fault_info, info, fault_token_31_0,
+ pfault->token & 0xffffffff);
+ MLX5_SET(mem_page_fault_info, info, fault_token_47_32,
+ (pfault->token >> 32) & 0xffff);
+ MLX5_SET(mem_page_fault_info, info, error, !!error);
+ } else {
+ info = MLX5_ADDR_OF(page_fault_resume_in, in,
+ page_fault_info.trans_page_fault_info);
+ MLX5_SET(trans_page_fault_info, info, page_fault_type,
+ pfault->type);
+ MLX5_SET(trans_page_fault_info, info, fault_token,
+ pfault->token);
+ MLX5_SET(trans_page_fault_info, info, wq_number, wq_num);
+ MLX5_SET(trans_page_fault_info, info, error, !!error);
+ }
err = mlx5_cmd_exec_in(dev->mdev, page_fault_resume, in);
if (err)
@@ -468,6 +502,16 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
}
xa_unlock(&imr->implicit_children);
+ if (MLX5_CAP_ODP(dev->mdev, mem_page_fault)) {
+ ret = xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key),
+ &mr->mmkey, GFP_KERNEL);
+ if (xa_is_err(ret)) {
+ ret = ERR_PTR(xa_err(ret));
+ xa_erase(&imr->implicit_children, idx);
+ goto out_mr;
+ }
+ mr->mmkey.type = MLX5_MKEY_IMPLICIT_CHILD;
+ }
mlx5_ib_dbg(mr_to_mdev(imr), "key %x mr %p\n", mr->mmkey.key, mr);
return mr;
@@ -478,6 +522,57 @@ out_mr:
return ret;
}
+/*
+ * When using memory scheme ODP, implicit MRs can't use the reserved null mkey
+ * and each implicit MR needs to assign a private null mkey to get the page
+ * faults on.
+ * The null mkey is created with the properties to enable getting the page
+ * fault for every time it is accessed and having all relevant access flags.
+ */
+static int alloc_implicit_mr_null_mkey(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_mr *imr,
+ struct mlx5_ib_pd *pd)
+{
+ size_t inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + 64;
+ void *mkc;
+ u32 *in;
+ int err;
+
+ in = kzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(create_mkey_in, in, translations_octword_actual_size, 4);
+ MLX5_SET(create_mkey_in, in, pg_access, 1);
+
+ mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+ MLX5_SET(mkc, mkc, a, 1);
+ MLX5_SET(mkc, mkc, rw, 1);
+ MLX5_SET(mkc, mkc, rr, 1);
+ MLX5_SET(mkc, mkc, lw, 1);
+ MLX5_SET(mkc, mkc, lr, 1);
+ MLX5_SET(mkc, mkc, free, 0);
+ MLX5_SET(mkc, mkc, umr_en, 0);
+ MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
+
+ MLX5_SET(mkc, mkc, translations_octword_size, 4);
+ MLX5_SET(mkc, mkc, log_page_size, 61);
+ MLX5_SET(mkc, mkc, length64, 1);
+ MLX5_SET(mkc, mkc, pd, pd->pdn);
+ MLX5_SET64(mkc, mkc, start_addr, 0);
+ MLX5_SET(mkc, mkc, qpn, 0xffffff);
+
+ err = mlx5_core_create_mkey(dev->mdev, &imr->null_mmkey.key, in, inlen);
+ if (err)
+ goto free_in;
+
+ imr->null_mmkey.type = MLX5_MKEY_NULL;
+
+free_in:
+ kfree(in);
+ return err;
+}
+
struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
int access_flags)
{
@@ -510,6 +605,16 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
imr->is_odp_implicit = true;
xa_init(&imr->implicit_children);
+ if (MLX5_CAP_ODP(dev->mdev, mem_page_fault)) {
+ err = alloc_implicit_mr_null_mkey(dev, imr, pd);
+ if (err)
+ goto out_mr;
+
+ err = mlx5r_store_odp_mkey(dev, &imr->null_mmkey);
+ if (err)
+ goto out_mr;
+ }
+
err = mlx5r_umr_update_xlt(imr, 0,
mlx5_imr_ksm_entries,
MLX5_KSM_PAGE_SHIFT,
@@ -544,6 +649,14 @@ void mlx5_ib_free_odp_mr(struct mlx5_ib_mr *mr)
xa_erase(&mr->implicit_children, idx);
mlx5_ib_dereg_mr(&mtt->ibmr, NULL);
}
+
+ if (mr->null_mmkey.key) {
+ xa_erase(&mr_to_mdev(mr)->odp_mkeys,
+ mlx5_base_mkey(mr->null_mmkey.key));
+
+ mlx5_core_destroy_mkey(mr_to_mdev(mr)->mdev,
+ mr->null_mmkey.key);
+ }
}
#define MLX5_PF_FLAGS_DOWNGRADE BIT(1)
@@ -693,7 +806,7 @@ static int pagefault_dmabuf_mr(struct mlx5_ib_mr *mr, size_t bcnt,
struct ib_umem_dmabuf *umem_dmabuf = to_ib_umem_dmabuf(mr->umem);
u32 xlt_flags = 0;
int err;
- unsigned int page_size;
+ unsigned long page_size;
if (flags & MLX5_PF_FLAGS_ENABLE)
xlt_flags |= MLX5_IB_UPD_XLT_ENABLE;
@@ -705,14 +818,15 @@ static int pagefault_dmabuf_mr(struct mlx5_ib_mr *mr, size_t bcnt,
return err;
}
- page_size = mlx5_umem_find_best_pgsz(&umem_dmabuf->umem, mkc,
- log_page_size, 0,
- umem_dmabuf->umem.iova);
- if (unlikely(page_size < PAGE_SIZE)) {
+ page_size = mlx5_umem_dmabuf_find_best_pgsz(umem_dmabuf);
+ if (!page_size) {
ib_umem_dmabuf_unmap_pages(umem_dmabuf);
err = -EINVAL;
} else {
- err = mlx5r_umr_update_mr_pas(mr, xlt_flags);
+ if (mr->data_direct)
+ err = mlx5r_umr_update_data_direct_ksm_pas(mr, xlt_flags);
+ else
+ err = mlx5r_umr_update_mr_pas(mr, xlt_flags);
}
dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv);
@@ -735,24 +849,31 @@ static int pagefault_dmabuf_mr(struct mlx5_ib_mr *mr, size_t bcnt,
* >0: Number of pages mapped
*/
static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt,
- u32 *bytes_mapped, u32 flags)
+ u32 *bytes_mapped, u32 flags, bool permissive_fault)
{
struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
- if (unlikely(io_virt < mr->ibmr.iova))
+ if (unlikely(io_virt < mr->ibmr.iova) && !permissive_fault)
return -EFAULT;
if (mr->umem->is_dmabuf)
return pagefault_dmabuf_mr(mr, bcnt, bytes_mapped, flags);
if (!odp->is_implicit_odp) {
+ u64 offset = io_virt < mr->ibmr.iova ? 0 : io_virt - mr->ibmr.iova;
u64 user_va;
- if (check_add_overflow(io_virt - mr->ibmr.iova,
- (u64)odp->umem.address, &user_va))
+ if (check_add_overflow(offset, (u64)odp->umem.address,
+ &user_va))
return -EFAULT;
- if (unlikely(user_va >= ib_umem_end(odp) ||
- ib_umem_end(odp) - user_va < bcnt))
+
+ if (permissive_fault) {
+ if (user_va < ib_umem_start(odp))
+ user_va = ib_umem_start(odp);
+ if ((user_va + bcnt) > ib_umem_end(odp))
+ bcnt = ib_umem_end(odp) - user_va;
+ } else if (unlikely(user_va >= ib_umem_end(odp) ||
+ ib_umem_end(odp) - user_va < bcnt))
return -EFAULT;
return pagefault_real_mr(mr, odp, user_va, bcnt, bytes_mapped,
flags);
@@ -799,6 +920,27 @@ static bool mkey_is_eq(struct mlx5_ib_mkey *mmkey, u32 key)
return mmkey->key == key;
}
+static struct mlx5_ib_mkey *find_odp_mkey(struct mlx5_ib_dev *dev, u32 key)
+{
+ struct mlx5_ib_mkey *mmkey;
+
+ xa_lock(&dev->odp_mkeys);
+ mmkey = xa_load(&dev->odp_mkeys, mlx5_base_mkey(key));
+ if (!mmkey) {
+ mmkey = ERR_PTR(-ENOENT);
+ goto out;
+ }
+ if (!mkey_is_eq(mmkey, key)) {
+ mmkey = ERR_PTR(-EFAULT);
+ goto out;
+ }
+ refcount_inc(&mmkey->usecount);
+out:
+ xa_unlock(&dev->odp_mkeys);
+
+ return mmkey;
+}
+
/*
* Handle a single data segment in a page-fault WQE or RDMA region.
*
@@ -826,32 +968,24 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
io_virt += *bytes_committed;
bcnt -= *bytes_committed;
-
next_mr:
- xa_lock(&dev->odp_mkeys);
- mmkey = xa_load(&dev->odp_mkeys, mlx5_base_mkey(key));
- if (!mmkey) {
- xa_unlock(&dev->odp_mkeys);
- mlx5_ib_dbg(
- dev,
- "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
- key);
- if (bytes_mapped)
- *bytes_mapped += bcnt;
- /*
- * The user could specify a SGL with multiple lkeys and only
- * some of them are ODP. Treat the non-ODP ones as fully
- * faulted.
- */
- ret = 0;
- goto end;
- }
- refcount_inc(&mmkey->usecount);
- xa_unlock(&dev->odp_mkeys);
-
- if (!mkey_is_eq(mmkey, key)) {
- mlx5_ib_dbg(dev, "failed to find mkey %x\n", key);
- ret = -EFAULT;
+ mmkey = find_odp_mkey(dev, key);
+ if (IS_ERR(mmkey)) {
+ ret = PTR_ERR(mmkey);
+ if (ret == -ENOENT) {
+ mlx5_ib_dbg(
+ dev,
+ "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
+ key);
+ if (bytes_mapped)
+ *bytes_mapped += bcnt;
+ /*
+ * The user could specify a SGL with multiple lkeys and
+ * only some of them are ODP. Treat the non-ODP ones as
+ * fully faulted.
+ */
+ ret = 0;
+ }
goto end;
}
@@ -859,7 +993,7 @@ next_mr:
case MLX5_MKEY_MR:
mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
- ret = pagefault_mr(mr, io_virt, bcnt, bytes_mapped, 0);
+ ret = pagefault_mr(mr, io_virt, bcnt, bytes_mapped, 0, false);
if (ret < 0)
goto end;
@@ -946,7 +1080,7 @@ next_mr:
}
end:
- if (mmkey)
+ if (!IS_ERR(mmkey))
mlx5r_deref_odp_mkey(mmkey);
while (head) {
frame = head;
@@ -1268,7 +1402,7 @@ read_user:
if (ret)
mlx5_ib_err(
dev,
- "Failed reading a WQE following page fault, error %d, wqe_index %x, qpn %x\n",
+ "Failed reading a WQE following page fault, error %d, wqe_index %x, qpn %llx\n",
ret, wqe_index, pfault->token);
resolve_page_fault:
@@ -1327,13 +1461,13 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev,
} else if (ret < 0 || pages_in_range(address, length) > ret) {
mlx5_ib_page_fault_resume(dev, pfault, 1);
if (ret != -ENOENT)
- mlx5_ib_dbg(dev, "PAGE FAULT error %d. QP 0x%x, type: 0x%x\n",
+ mlx5_ib_dbg(dev, "PAGE FAULT error %d. QP 0x%llx, type: 0x%x\n",
ret, pfault->token, pfault->type);
return;
}
mlx5_ib_page_fault_resume(dev, pfault, 0);
- mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x, type: 0x%x, prefetch_activated: %d\n",
+ mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%llx, type: 0x%x, prefetch_activated: %d\n",
pfault->token, pfault->type,
prefetch_activated);
@@ -1349,12 +1483,80 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev,
prefetch_len,
&bytes_committed, NULL);
if (ret < 0 && ret != -EAGAIN) {
- mlx5_ib_dbg(dev, "Prefetch failed. ret: %d, QP 0x%x, address: 0x%.16llx, length = 0x%.16x\n",
+ mlx5_ib_dbg(dev, "Prefetch failed. ret: %d, QP 0x%llx, address: 0x%.16llx, length = 0x%.16x\n",
ret, pfault->token, address, prefetch_len);
}
}
}
+#define MLX5_MEMORY_PAGE_FAULT_FLAGS_LAST BIT(7)
+static void mlx5_ib_mr_memory_pfault_handler(struct mlx5_ib_dev *dev,
+ struct mlx5_pagefault *pfault)
+{
+ u64 prefetch_va =
+ pfault->memory.va - pfault->memory.prefetch_before_byte_count;
+ size_t prefetch_size = pfault->memory.prefetch_before_byte_count +
+ pfault->memory.fault_byte_count +
+ pfault->memory.prefetch_after_byte_count;
+ struct mlx5_ib_mkey *mmkey;
+ struct mlx5_ib_mr *mr, *child_mr;
+ int ret = 0;
+
+ mmkey = find_odp_mkey(dev, pfault->memory.mkey);
+ if (IS_ERR(mmkey))
+ goto err;
+
+ switch (mmkey->type) {
+ case MLX5_MKEY_IMPLICIT_CHILD:
+ child_mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
+ mr = child_mr->parent;
+ break;
+ case MLX5_MKEY_NULL:
+ mr = container_of(mmkey, struct mlx5_ib_mr, null_mmkey);
+ break;
+ default:
+ mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
+ break;
+ }
+
+ /* If prefetch fails, handle only demanded page fault */
+ ret = pagefault_mr(mr, prefetch_va, prefetch_size, NULL, 0, true);
+ if (ret < 0) {
+ ret = pagefault_mr(mr, pfault->memory.va,
+ pfault->memory.fault_byte_count, NULL, 0,
+ true);
+ if (ret < 0)
+ goto err;
+ }
+
+ mlx5_update_odp_stats(mr, faults, ret);
+ mlx5r_deref_odp_mkey(mmkey);
+
+ if (pfault->memory.flags & MLX5_MEMORY_PAGE_FAULT_FLAGS_LAST)
+ mlx5_ib_page_fault_resume(dev, pfault, 0);
+
+ mlx5_ib_dbg(
+ dev,
+ "PAGE FAULT completed %s. token 0x%llx, mkey: 0x%x, va: 0x%llx, byte_count: 0x%x\n",
+ pfault->memory.flags & MLX5_MEMORY_PAGE_FAULT_FLAGS_LAST ?
+ "" :
+ "without resume cmd",
+ pfault->token, pfault->memory.mkey, pfault->memory.va,
+ pfault->memory.fault_byte_count);
+
+ return;
+
+err:
+ if (!IS_ERR(mmkey))
+ mlx5r_deref_odp_mkey(mmkey);
+ mlx5_ib_page_fault_resume(dev, pfault, 1);
+ mlx5_ib_dbg(
+ dev,
+ "PAGE FAULT error. token 0x%llx, mkey: 0x%x, va: 0x%llx, byte_count: 0x%x, err: %d\n",
+ pfault->token, pfault->memory.mkey, pfault->memory.va,
+ pfault->memory.fault_byte_count, ret);
+}
+
static void mlx5_ib_pfault(struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfault)
{
u8 event_subtype = pfault->event_subtype;
@@ -1366,6 +1568,9 @@ static void mlx5_ib_pfault(struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfaul
case MLX5_PFAULT_SUBTYPE_RDMA:
mlx5_ib_mr_rdma_pfault_handler(dev, pfault);
break;
+ case MLX5_PFAULT_SUBTYPE_MEMORY:
+ mlx5_ib_mr_memory_pfault_handler(dev, pfault);
+ break;
default:
mlx5_ib_err(dev, "Invalid page fault event subtype: 0x%x\n",
event_subtype);
@@ -1384,6 +1589,7 @@ static void mlx5_ib_eqe_pf_action(struct work_struct *work)
mempool_free(pfault, eq->pool);
}
+#define MEMORY_SCHEME_PAGE_FAULT_GRANULARITY 4096
static void mlx5_ib_eq_pf_process(struct mlx5_ib_pf_eq *eq)
{
struct mlx5_eqe_page_fault *pf_eqe;
@@ -1400,15 +1606,12 @@ static void mlx5_ib_eq_pf_process(struct mlx5_ib_pf_eq *eq)
pf_eqe = &eqe->data.page_fault;
pfault->event_subtype = eqe->sub_type;
- pfault->bytes_committed = be32_to_cpu(pf_eqe->bytes_committed);
-
- mlx5_ib_dbg(eq->dev,
- "PAGE_FAULT: subtype: 0x%02x, bytes_committed: 0x%06x\n",
- eqe->sub_type, pfault->bytes_committed);
switch (eqe->sub_type) {
case MLX5_PFAULT_SUBTYPE_RDMA:
/* RDMA based event */
+ pfault->bytes_committed =
+ be32_to_cpu(pf_eqe->rdma.bytes_committed);
pfault->type =
be32_to_cpu(pf_eqe->rdma.pftype_token) >> 24;
pfault->token =
@@ -1422,10 +1625,12 @@ static void mlx5_ib_eq_pf_process(struct mlx5_ib_pf_eq *eq)
be32_to_cpu(pf_eqe->rdma.rdma_op_len);
pfault->rdma.rdma_va =
be64_to_cpu(pf_eqe->rdma.rdma_va);
- mlx5_ib_dbg(eq->dev,
- "PAGE_FAULT: type:0x%x, token: 0x%06x, r_key: 0x%08x\n",
- pfault->type, pfault->token,
- pfault->rdma.r_key);
+ mlx5_ib_dbg(
+ eq->dev,
+ "PAGE_FAULT: subtype: 0x%02x, bytes_committed: 0x%06x, type:0x%x, token: 0x%06llx, r_key: 0x%08x\n",
+ eqe->sub_type, pfault->bytes_committed,
+ pfault->type, pfault->token,
+ pfault->rdma.r_key);
mlx5_ib_dbg(eq->dev,
"PAGE_FAULT: rdma_op_len: 0x%08x, rdma_va: 0x%016llx\n",
pfault->rdma.rdma_op_len,
@@ -1434,6 +1639,8 @@ static void mlx5_ib_eq_pf_process(struct mlx5_ib_pf_eq *eq)
case MLX5_PFAULT_SUBTYPE_WQE:
/* WQE based event */
+ pfault->bytes_committed =
+ be32_to_cpu(pf_eqe->wqe.bytes_committed);
pfault->type =
(be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24) & 0x7;
pfault->token =
@@ -1445,11 +1652,47 @@ static void mlx5_ib_eq_pf_process(struct mlx5_ib_pf_eq *eq)
be16_to_cpu(pf_eqe->wqe.wqe_index);
pfault->wqe.packet_size =
be16_to_cpu(pf_eqe->wqe.packet_length);
- mlx5_ib_dbg(eq->dev,
- "PAGE_FAULT: type:0x%x, token: 0x%06x, wq_num: 0x%06x, wqe_index: 0x%04x\n",
- pfault->type, pfault->token,
- pfault->wqe.wq_num,
- pfault->wqe.wqe_index);
+ mlx5_ib_dbg(
+ eq->dev,
+ "PAGE_FAULT: subtype: 0x%02x, bytes_committed: 0x%06x, type:0x%x, token: 0x%06llx, wq_num: 0x%06x, wqe_index: 0x%04x\n",
+ eqe->sub_type, pfault->bytes_committed,
+ pfault->type, pfault->token, pfault->wqe.wq_num,
+ pfault->wqe.wqe_index);
+ break;
+
+ case MLX5_PFAULT_SUBTYPE_MEMORY:
+ /* Memory based event */
+ pfault->bytes_committed = 0;
+ pfault->token =
+ be32_to_cpu(pf_eqe->memory.token31_0) |
+ ((u64)be16_to_cpu(pf_eqe->memory.token47_32)
+ << 32);
+ pfault->memory.va = be64_to_cpu(pf_eqe->memory.va);
+ pfault->memory.mkey = be32_to_cpu(pf_eqe->memory.mkey);
+ pfault->memory.fault_byte_count = (be32_to_cpu(
+ pf_eqe->memory.demand_fault_pages) >> 12) *
+ MEMORY_SCHEME_PAGE_FAULT_GRANULARITY;
+ pfault->memory.prefetch_before_byte_count =
+ be16_to_cpu(
+ pf_eqe->memory.pre_demand_fault_pages) *
+ MEMORY_SCHEME_PAGE_FAULT_GRANULARITY;
+ pfault->memory.prefetch_after_byte_count =
+ be16_to_cpu(
+ pf_eqe->memory.post_demand_fault_pages) *
+ MEMORY_SCHEME_PAGE_FAULT_GRANULARITY;
+ pfault->memory.flags = pf_eqe->memory.flags;
+ mlx5_ib_dbg(
+ eq->dev,
+ "PAGE_FAULT: subtype: 0x%02x, token: 0x%06llx, mkey: 0x%06x, fault_byte_count: 0x%06x, va: 0x%016llx, flags: 0x%02x\n",
+ eqe->sub_type, pfault->token,
+ pfault->memory.mkey,
+ pfault->memory.fault_byte_count,
+ pfault->memory.va, pfault->memory.flags);
+ mlx5_ib_dbg(
+ eq->dev,
+ "PAGE_FAULT: prefetch size: before: 0x%06x, after 0x%06x\n",
+ pfault->memory.prefetch_before_byte_count,
+ pfault->memory.prefetch_after_byte_count);
break;
default:
@@ -1712,7 +1955,7 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *w)
for (i = 0; i < work->num_sge; ++i) {
ret = pagefault_mr(work->frags[i].mr, work->frags[i].io_virt,
work->frags[i].length, &bytes_mapped,
- work->pf_flags);
+ work->pf_flags, false);
if (ret <= 0)
continue;
mlx5_update_odp_stats(work->frags[i].mr, prefetch, ret);
@@ -1763,7 +2006,7 @@ static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd,
if (IS_ERR(mr))
return PTR_ERR(mr);
ret = pagefault_mr(mr, sg_list[i].addr, sg_list[i].length,
- &bytes_mapped, pf_flags);
+ &bytes_mapped, pf_flags, false);
if (ret < 0) {
mlx5r_deref_odp_mkey(&mr->mmkey);
return ret;
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 78b96bfb4e6a..e39b1a101e97 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -1107,8 +1107,6 @@ static int _create_kernel_qp(struct mlx5_ib_dev *dev,
if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR)
qp->bf.bfreg = &dev->fp_bfreg;
- else if (qp->flags & MLX5_IB_QP_CREATE_WC_TEST)
- qp->bf.bfreg = &dev->wc_bfreg;
else
qp->bf.bfreg = &dev->bfreg;
@@ -2959,14 +2957,6 @@ static void process_create_flag(struct mlx5_ib_dev *dev, int *flags, int flag,
return;
}
- if (flag == MLX5_IB_QP_CREATE_WC_TEST) {
- /*
- * Special case, if condition didn't meet, it won't be error,
- * just different in-kernel flow.
- */
- *flags &= ~MLX5_IB_QP_CREATE_WC_TEST;
- return;
- }
mlx5_ib_dbg(dev, "Verbs create QP flag 0x%X is not supported\n", flag);
}
@@ -3027,8 +3017,6 @@ static int process_create_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
IB_QP_CREATE_PCI_WRITE_END_PADDING,
MLX5_CAP_GEN(mdev, end_pad), qp);
- process_create_flag(dev, &create_flags, MLX5_IB_QP_CREATE_WC_TEST,
- qp_type != MLX5_IB_QPT_REG_UMR, qp);
process_create_flag(dev, &create_flags, MLX5_IB_QP_CREATE_SQPN_QP1,
true, qp);
@@ -3097,7 +3085,6 @@ static int create_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
switch (qp->type) {
case MLX5_IB_QPT_DCT:
err = create_dct(dev, pd, qp, params);
- rdma_restrack_no_track(&qp->ibqp.res);
break;
case MLX5_IB_QPT_DCI:
err = create_dci(dev, pd, qp, params);
@@ -3109,9 +3096,9 @@ static int create_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
err = mlx5_ib_create_gsi(pd, qp, params->attr);
break;
case MLX5_IB_QPT_HW_GSI:
- case MLX5_IB_QPT_REG_UMR:
rdma_restrack_no_track(&qp->ibqp.res);
fallthrough;
+ case MLX5_IB_QPT_REG_UMR:
default:
if (params->udata)
err = create_user_qp(dev, pd, qp, params);
@@ -3247,6 +3234,10 @@ int mlx5_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr,
enum ib_qp_type type;
int err;
+ err = mlx5_ib_dev_res_srq_init(dev);
+ if (err)
+ return err;
+
err = check_qp_type(dev, attr, &type);
if (err)
return err;
@@ -3436,7 +3427,7 @@ static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate)
if (rate == IB_RATE_PORT_CURRENT)
return 0;
- if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_600_GBPS)
+ if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_800_GBPS)
return -EINVAL;
stat_rate_support = MLX5_CAP_GEN(dev->mdev, stat_rate_support);
@@ -4045,6 +4036,30 @@ static unsigned int get_tx_affinity(struct ib_qp *qp,
return tx_affinity;
}
+static int __mlx5_ib_qp_set_raw_qp_counter(struct mlx5_ib_qp *qp, u32 set_id,
+ struct mlx5_core_dev *mdev)
+{
+ struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
+ struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
+ u32 in[MLX5_ST_SZ_DW(modify_rq_in)] = {};
+ void *rqc;
+
+ if (!qp->rq.wqe_cnt)
+ return 0;
+
+ MLX5_SET(modify_rq_in, in, rq_state, rq->state);
+ MLX5_SET(modify_rq_in, in, uid, to_mpd(qp->ibqp.pd)->uid);
+
+ rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
+ MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RDY);
+
+ MLX5_SET64(modify_rq_in, in, modify_bitmask,
+ MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID);
+ MLX5_SET(rqc, rqc, counter_set_id, set_id);
+
+ return mlx5_core_modify_rq(mdev, rq->base.mqp.qpn, in);
+}
+
static int __mlx5_ib_qp_set_counter(struct ib_qp *qp,
struct rdma_counter *counter)
{
@@ -4060,6 +4075,9 @@ static int __mlx5_ib_qp_set_counter(struct ib_qp *qp,
else
set_id = mlx5_ib_get_counters_id(dev, mqp->port - 1);
+ if (mqp->type == IB_QPT_RAW_PACKET)
+ return __mlx5_ib_qp_set_raw_qp_counter(mqp, set_id, dev->mdev);
+
base = &mqp->trans_qp.base;
MLX5_SET(rts2rts_qp_in, in, opcode, MLX5_CMD_OP_RTS2RTS_QP);
MLX5_SET(rts2rts_qp_in, in, qpn, base->mqp.qpn);
@@ -4199,7 +4217,12 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
/* todo implement counter_index functionality */
- if (is_sqp(qp->type))
+ if (dev->ib_dev.type == RDMA_DEVICE_TYPE_SMI && is_qp0(qp->type)) {
+ MLX5_SET(ads, pri_path, vhca_port_num,
+ smi_to_native_portnum(dev, qp->port));
+ if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR)
+ MLX5_SET(ads, pri_path, plane_index, qp->port);
+ } else if (is_sqp(qp->type))
MLX5_SET(ads, pri_path, vhca_port_num, qp->port);
if (attr_mask & IB_QP_PORT)
@@ -4583,10 +4606,6 @@ static bool mlx5_ib_modify_qp_allowed(struct mlx5_ib_dev *dev,
if (qp->type == IB_QPT_RAW_PACKET || qp->type == MLX5_IB_QPT_REG_UMR)
return true;
- /* Internal QP used for wc testing, with NOPs in wq */
- if (qp->flags & MLX5_IB_QP_CREATE_WC_TEST)
- return true;
-
return false;
}
diff --git a/drivers/infiniband/hw/mlx5/qpc.c b/drivers/infiniband/hw/mlx5/qpc.c
index d9cf6982d645..d3dcc272200a 100644
--- a/drivers/infiniband/hw/mlx5/qpc.c
+++ b/drivers/infiniband/hw/mlx5/qpc.c
@@ -249,7 +249,8 @@ int mlx5_qpc_create_qp(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp,
if (err)
goto err_cmd;
- mlx5_debug_qp_add(dev->mdev, qp);
+ if (dev->ib_dev.type != RDMA_DEVICE_TYPE_SMI)
+ mlx5_debug_qp_add(dev->mdev, qp);
return 0;
@@ -307,7 +308,8 @@ int mlx5_core_destroy_qp(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp)
{
u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
- mlx5_debug_qp_remove(dev->mdev, qp);
+ if (dev->ib_dev.type != RDMA_DEVICE_TYPE_SMI)
+ mlx5_debug_qp_remove(dev->mdev, qp);
destroy_resource_common(dev, qp);
@@ -504,7 +506,9 @@ int mlx5_init_qp_table(struct mlx5_ib_dev *dev)
spin_lock_init(&table->lock);
INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
xa_init(&table->dct_xa);
- mlx5_qp_debugfs_init(dev->mdev);
+
+ if (dev->ib_dev.type != RDMA_DEVICE_TYPE_SMI)
+ mlx5_qp_debugfs_init(dev->mdev);
table->nb.notifier_call = rsc_event_notifier;
mlx5_notifier_register(dev->mdev, &table->nb);
@@ -517,7 +521,8 @@ void mlx5_cleanup_qp_table(struct mlx5_ib_dev *dev)
struct mlx5_qp_table *table = &dev->qp_table;
mlx5_notifier_unregister(dev->mdev, &table->nb);
- mlx5_qp_debugfs_cleanup(dev->mdev);
+ if (dev->ib_dev.type != RDMA_DEVICE_TYPE_SMI)
+ mlx5_qp_debugfs_cleanup(dev->mdev);
}
int mlx5_core_qp_query(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp,
diff --git a/drivers/infiniband/hw/mlx5/restrack.c b/drivers/infiniband/hw/mlx5/restrack.c
index 4ac429e72004..affcf8fe943c 100644
--- a/drivers/infiniband/hw/mlx5/restrack.c
+++ b/drivers/infiniband/hw/mlx5/restrack.c
@@ -156,6 +156,34 @@ static int fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ibcq)
return fill_res_raw(msg, dev, MLX5_SGMT_TYPE_PRM_QUERY_CQ, cq->mcq.cqn);
}
+static int fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ibqp)
+{
+ struct mlx5_ib_qp *qp = to_mqp(ibqp);
+ int ret;
+
+ if (qp->type < IB_QPT_DRIVER)
+ return 0;
+
+ switch (qp->type) {
+ case MLX5_IB_QPT_REG_UMR:
+ ret = nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUBTYPE,
+ "REG_UMR");
+ break;
+ case MLX5_IB_QPT_DCT:
+ ret = nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUBTYPE, "DCT");
+ break;
+ case MLX5_IB_QPT_DCI:
+ ret = nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUBTYPE, "DCI");
+ break;
+ default:
+ return 0;
+ }
+ if (ret)
+ return ret;
+
+ return nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, IB_QPT_DRIVER);
+}
+
static int fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ibqp)
{
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
@@ -168,6 +196,7 @@ static const struct ib_device_ops restrack_ops = {
.fill_res_cq_entry_raw = fill_res_cq_entry_raw,
.fill_res_mr_entry = fill_res_mr_entry,
.fill_res_mr_entry_raw = fill_res_mr_entry_raw,
+ .fill_res_qp_entry = fill_res_qp_entry,
.fill_res_qp_entry_raw = fill_res_qp_entry_raw,
.fill_stat_mr_entry = fill_stat_mr_entry,
};
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index a056ea835da5..bcb6b324af50 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -199,20 +199,27 @@ int mlx5_ib_create_srq(struct ib_srq *ib_srq,
int err;
struct mlx5_srq_attr in = {};
__u32 max_srq_wqes = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
+ __u32 max_sge_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq) /
+ sizeof(struct mlx5_wqe_data_seg);
if (init_attr->srq_type != IB_SRQT_BASIC &&
init_attr->srq_type != IB_SRQT_XRC &&
init_attr->srq_type != IB_SRQT_TM)
return -EOPNOTSUPP;
- /* Sanity check SRQ size before proceeding */
- if (init_attr->attr.max_wr >= max_srq_wqes) {
- mlx5_ib_dbg(dev, "max_wr %d, cap %d\n",
- init_attr->attr.max_wr,
- max_srq_wqes);
+ /* Sanity check SRQ and sge size before proceeding */
+ if (init_attr->attr.max_wr >= max_srq_wqes ||
+ init_attr->attr.max_sge > max_sge_sz) {
+ mlx5_ib_dbg(dev, "max_wr %d,wr_cap %d,max_sge %d, sge_cap:%d\n",
+ init_attr->attr.max_wr, max_srq_wqes,
+ init_attr->attr.max_sge, max_sge_sz);
return -EINVAL;
}
+ err = mlx5_ib_dev_res_cq_init(dev);
+ if (err)
+ return err;
+
mutex_init(&srq->mutex);
spin_lock_init(&srq->lock);
srq->msrq.max = roundup_pow_of_two(init_attr->attr.max_wr + 1);
diff --git a/drivers/infiniband/hw/mlx5/std_types.c b/drivers/infiniband/hw/mlx5/std_types.c
index bbfcce3bdc84..bdb568411091 100644
--- a/drivers/infiniband/hw/mlx5/std_types.c
+++ b/drivers/infiniband/hw/mlx5/std_types.c
@@ -10,6 +10,7 @@
#include <linux/mlx5/eswitch.h>
#include <linux/mlx5/vport.h>
#include "mlx5_ib.h"
+#include "data_direct.h"
#define UVERBS_MODULE_NAME mlx5_ib
#include <rdma/uverbs_named_ioctl.h>
@@ -111,6 +112,23 @@ out:
return err;
}
+static int fill_multiport_info(struct mlx5_ib_dev *dev, u32 port_num,
+ struct mlx5_ib_uapi_query_port *info)
+{
+ struct mlx5_core_dev *mdev;
+
+ mdev = mlx5_ib_get_native_port_mdev(dev, port_num, NULL);
+ if (!mdev)
+ return -EINVAL;
+
+ info->vport_vhca_id = MLX5_CAP_GEN(mdev, vhca_id);
+ info->flags |= MLX5_IB_UAPI_QUERY_PORT_VPORT_VHCA_ID;
+
+ mlx5_ib_put_native_port_mdev(dev, port_num);
+
+ return 0;
+}
+
static int fill_switchdev_info(struct mlx5_ib_dev *dev, u32 port_num,
struct mlx5_ib_uapi_query_port *info)
{
@@ -177,12 +195,60 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_QUERY_PORT)(
ret = fill_switchdev_info(dev, port_num, &info);
if (ret)
return ret;
+ } else if (mlx5_core_mp_enabled(dev->mdev)) {
+ ret = fill_multiport_info(dev, port_num, &info);
+ if (ret)
+ return ret;
}
return uverbs_copy_to_struct_or_zero(attrs, MLX5_IB_ATTR_QUERY_PORT, &info,
sizeof(info));
}
+static int UVERBS_HANDLER(MLX5_IB_METHOD_GET_DATA_DIRECT_SYSFS_PATH)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_data_direct_dev *data_direct_dev;
+ struct mlx5_ib_ucontext *c;
+ struct mlx5_ib_dev *dev;
+ int out_len = uverbs_attr_get_len(attrs,
+ MLX5_IB_ATTR_GET_DATA_DIRECT_SYSFS_PATH);
+ u32 dev_path_len;
+ char *dev_path;
+ int ret;
+
+ c = to_mucontext(ib_uverbs_get_ucontext(attrs));
+ if (IS_ERR(c))
+ return PTR_ERR(c);
+ dev = to_mdev(c->ibucontext.device);
+ mutex_lock(&dev->data_direct_lock);
+ data_direct_dev = dev->data_direct_dev;
+ if (!data_direct_dev) {
+ ret = -ENODEV;
+ goto end;
+ }
+
+ dev_path = kobject_get_path(&data_direct_dev->device->kobj, GFP_KERNEL);
+ if (!dev_path) {
+ ret = -ENOMEM;
+ goto end;
+ }
+
+ dev_path_len = strlen(dev_path) + 1;
+ if (dev_path_len > out_len) {
+ ret = -ENOSPC;
+ goto end;
+ }
+
+ ret = uverbs_copy_to(attrs, MLX5_IB_ATTR_GET_DATA_DIRECT_SYSFS_PATH, dev_path,
+ dev_path_len);
+ kfree(dev_path);
+
+end:
+ mutex_unlock(&dev->data_direct_lock);
+ return ret;
+}
+
DECLARE_UVERBS_NAMED_METHOD(
MLX5_IB_METHOD_QUERY_PORT,
UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_QUERY_PORT_PORT_NUM,
@@ -193,9 +259,17 @@ DECLARE_UVERBS_NAMED_METHOD(
reg_c0),
UA_MANDATORY));
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_GET_DATA_DIRECT_SYSFS_PATH,
+ UVERBS_ATTR_PTR_OUT(
+ MLX5_IB_ATTR_GET_DATA_DIRECT_SYSFS_PATH,
+ UVERBS_ATTR_MIN_SIZE(0),
+ UA_MANDATORY));
+
ADD_UVERBS_METHODS(mlx5_ib_device,
UVERBS_OBJECT_DEVICE,
- &UVERBS_METHOD(MLX5_IB_METHOD_QUERY_PORT));
+ &UVERBS_METHOD(MLX5_IB_METHOD_QUERY_PORT),
+ &UVERBS_METHOD(MLX5_IB_METHOD_GET_DATA_DIRECT_SYSFS_PATH));
DECLARE_UVERBS_NAMED_METHOD(
MLX5_IB_METHOD_PD_QUERY,
diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c
index 234bf30db731..887fd6fa3ba9 100644
--- a/drivers/infiniband/hw/mlx5/umr.c
+++ b/drivers/infiniband/hw/mlx5/umr.c
@@ -135,22 +135,28 @@ static int mlx5r_umr_qp_rst2rts(struct mlx5_ib_dev *dev, struct ib_qp *qp)
int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev)
{
struct ib_qp_init_attr init_attr = {};
- struct ib_pd *pd;
struct ib_cq *cq;
struct ib_qp *qp;
- int ret;
+ int ret = 0;
- pd = ib_alloc_pd(&dev->ib_dev, 0);
- if (IS_ERR(pd)) {
- mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
- return PTR_ERR(pd);
- }
+
+ /*
+ * UMR qp is set once, never changed until device unload.
+ * Avoid taking the mutex if initialization is already done.
+ */
+ if (dev->umrc.qp)
+ return 0;
+
+ mutex_lock(&dev->umrc.init_lock);
+ /* First user allocates the UMR resources. Skip if already allocated. */
+ if (dev->umrc.qp)
+ goto unlock;
cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ);
if (IS_ERR(cq)) {
mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n");
ret = PTR_ERR(cq);
- goto destroy_pd;
+ goto unlock;
}
init_attr.send_cq = cq;
@@ -160,7 +166,7 @@ int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev)
init_attr.cap.max_send_sge = 1;
init_attr.qp_type = MLX5_IB_QPT_REG_UMR;
init_attr.port_num = 1;
- qp = ib_create_qp(pd, &init_attr);
+ qp = ib_create_qp(dev->umrc.pd, &init_attr);
if (IS_ERR(qp)) {
mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n");
ret = PTR_ERR(qp);
@@ -171,22 +177,22 @@ int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev)
if (ret)
goto destroy_qp;
- dev->umrc.qp = qp;
dev->umrc.cq = cq;
- dev->umrc.pd = pd;
sema_init(&dev->umrc.sem, MAX_UMR_WR);
mutex_init(&dev->umrc.lock);
dev->umrc.state = MLX5_UMR_STATE_ACTIVE;
+ dev->umrc.qp = qp;
+ mutex_unlock(&dev->umrc.init_lock);
return 0;
destroy_qp:
ib_destroy_qp(qp);
destroy_cq:
ib_free_cq(cq);
-destroy_pd:
- ib_dealloc_pd(pd);
+unlock:
+ mutex_unlock(&dev->umrc.init_lock);
return ret;
}
@@ -194,8 +200,34 @@ void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev *dev)
{
if (dev->umrc.state == MLX5_UMR_STATE_UNINIT)
return;
+ mutex_destroy(&dev->umrc.lock);
+ /* After device init, UMR cp/qp are not unset during the lifetime. */
ib_destroy_qp(dev->umrc.qp);
ib_free_cq(dev->umrc.cq);
+}
+
+int mlx5r_umr_init(struct mlx5_ib_dev *dev)
+{
+ struct ib_pd *pd;
+
+ pd = ib_alloc_pd(&dev->ib_dev, 0);
+ if (IS_ERR(pd)) {
+ mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
+ return PTR_ERR(pd);
+ }
+ dev->umrc.pd = pd;
+
+ mutex_init(&dev->umrc.init_lock);
+
+ return 0;
+}
+
+void mlx5r_umr_cleanup(struct mlx5_ib_dev *dev)
+{
+ if (!dev->umrc.pd)
+ return;
+
+ mutex_destroy(&dev->umrc.init_lock);
ib_dealloc_pd(dev->umrc.pd);
}
@@ -332,8 +364,8 @@ static int mlx5r_umr_post_send_wait(struct mlx5_ib_dev *dev, u32 mkey,
WARN_ON_ONCE(1);
mlx5_ib_warn(dev,
- "reg umr failed (%u). Trying to recover and resubmit the flushed WQEs\n",
- umr_context.status);
+ "reg umr failed (%u). Trying to recover and resubmit the flushed WQEs, mkey = %u\n",
+ umr_context.status, mkey);
mutex_lock(&umrc->lock);
err = mlx5r_umr_recover(dev);
mutex_unlock(&umrc->lock);
@@ -603,44 +635,47 @@ static void mlx5r_umr_final_update_xlt(struct mlx5_ib_dev *dev,
wqe->data_seg.byte_count = cpu_to_be32(sg->length);
}
-/*
- * Send the DMA list to the HW for a normal MR using UMR.
- * Dmabuf MR is handled in a similar way, except that the MLX5_IB_UPD_XLT_ZAP
- * flag may be used.
- */
-int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
+static int
+_mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags, bool dd)
{
+ size_t ent_size = dd ? sizeof(struct mlx5_ksm) : sizeof(struct mlx5_mtt);
struct mlx5_ib_dev *dev = mr_to_mdev(mr);
struct device *ddev = &dev->mdev->pdev->dev;
struct mlx5r_umr_wqe wqe = {};
struct ib_block_iter biter;
+ struct mlx5_ksm *cur_ksm;
struct mlx5_mtt *cur_mtt;
size_t orig_sg_length;
- struct mlx5_mtt *mtt;
size_t final_size;
+ void *curr_entry;
struct ib_sge sg;
+ void *entry;
u64 offset = 0;
int err = 0;
- if (WARN_ON(mr->umem->is_odp))
- return -EINVAL;
-
- mtt = mlx5r_umr_create_xlt(
- dev, &sg, ib_umem_num_dma_blocks(mr->umem, 1 << mr->page_shift),
- sizeof(*mtt), flags);
- if (!mtt)
+ entry = mlx5r_umr_create_xlt(dev, &sg,
+ ib_umem_num_dma_blocks(mr->umem, 1 << mr->page_shift),
+ ent_size, flags);
+ if (!entry)
return -ENOMEM;
orig_sg_length = sg.length;
-
mlx5r_umr_set_update_xlt_ctrl_seg(&wqe.ctrl_seg, flags, &sg);
mlx5r_umr_set_update_xlt_mkey_seg(dev, &wqe.mkey_seg, mr,
mr->page_shift);
+ if (dd) {
+ /* Use the data direct internal kernel PD */
+ MLX5_SET(mkc, &wqe.mkey_seg, pd, dev->ddr.pdn);
+ cur_ksm = entry;
+ } else {
+ cur_mtt = entry;
+ }
+
mlx5r_umr_set_update_xlt_data_seg(&wqe.data_seg, &sg);
- cur_mtt = mtt;
+ curr_entry = entry;
rdma_umem_for_each_dma_block(mr->umem, &biter, BIT(mr->page_shift)) {
- if (cur_mtt == (void *)mtt + sg.length) {
+ if (curr_entry == entry + sg.length) {
dma_sync_single_for_device(ddev, sg.addr, sg.length,
DMA_TO_DEVICE);
@@ -652,23 +687,31 @@ int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
DMA_TO_DEVICE);
offset += sg.length;
mlx5r_umr_update_offset(&wqe.ctrl_seg, offset);
-
- cur_mtt = mtt;
+ if (dd)
+ cur_ksm = entry;
+ else
+ cur_mtt = entry;
}
- cur_mtt->ptag =
- cpu_to_be64(rdma_block_iter_dma_address(&biter) |
- MLX5_IB_MTT_PRESENT);
-
- if (mr->umem->is_dmabuf && (flags & MLX5_IB_UPD_XLT_ZAP))
- cur_mtt->ptag = 0;
-
- cur_mtt++;
+ if (dd) {
+ cur_ksm->va = cpu_to_be64(rdma_block_iter_dma_address(&biter));
+ cur_ksm->key = cpu_to_be32(dev->ddr.mkey);
+ cur_ksm++;
+ curr_entry = cur_ksm;
+ } else {
+ cur_mtt->ptag =
+ cpu_to_be64(rdma_block_iter_dma_address(&biter) |
+ MLX5_IB_MTT_PRESENT);
+ if (mr->umem->is_dmabuf && (flags & MLX5_IB_UPD_XLT_ZAP))
+ cur_mtt->ptag = 0;
+ cur_mtt++;
+ curr_entry = cur_mtt;
+ }
}
- final_size = (void *)cur_mtt - (void *)mtt;
+ final_size = curr_entry - entry;
sg.length = ALIGN(final_size, MLX5_UMR_FLEX_ALIGNMENT);
- memset(cur_mtt, 0, sg.length - final_size);
+ memset(curr_entry, 0, sg.length - final_size);
mlx5r_umr_final_update_xlt(dev, &wqe, mr, &sg, flags);
dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE);
@@ -676,10 +719,32 @@ int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
err:
sg.length = orig_sg_length;
- mlx5r_umr_unmap_free_xlt(dev, mtt, &sg);
+ mlx5r_umr_unmap_free_xlt(dev, entry, &sg);
return err;
}
+int mlx5r_umr_update_data_direct_ksm_pas(struct mlx5_ib_mr *mr, unsigned int flags)
+{
+ /* No invalidation flow is expected */
+ if (WARN_ON(!mr->umem->is_dmabuf) || (flags & MLX5_IB_UPD_XLT_ZAP))
+ return -EINVAL;
+
+ return _mlx5r_umr_update_mr_pas(mr, flags, true);
+}
+
+/*
+ * Send the DMA list to the HW for a normal MR using UMR.
+ * Dmabuf MR is handled in a similar way, except that the MLX5_IB_UPD_XLT_ZAP
+ * flag may be used.
+ */
+int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
+{
+ if (WARN_ON(mr->umem->is_odp))
+ return -EINVAL;
+
+ return _mlx5r_umr_update_mr_pas(mr, flags, false);
+}
+
static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev)
{
return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled);
diff --git a/drivers/infiniband/hw/mlx5/umr.h b/drivers/infiniband/hw/mlx5/umr.h
index 3799bb758e49..4a02c9b5aad8 100644
--- a/drivers/infiniband/hw/mlx5/umr.h
+++ b/drivers/infiniband/hw/mlx5/umr.h
@@ -16,6 +16,9 @@
int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev);
void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev *dev);
+int mlx5r_umr_init(struct mlx5_ib_dev *dev);
+void mlx5r_umr_cleanup(struct mlx5_ib_dev *dev);
+
static inline bool mlx5r_umr_can_load_pas(struct mlx5_ib_dev *dev,
size_t length)
{
@@ -92,6 +95,7 @@ int mlx5r_umr_revoke_mr(struct mlx5_ib_mr *mr);
int mlx5r_umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd,
int access_flags);
int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags);
+int mlx5r_umr_update_data_direct_ksm_pas(struct mlx5_ib_mr *mr, unsigned int flags);
int mlx5r_umr_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
int page_shift, int flags);
diff --git a/drivers/infiniband/hw/mlx5/wr.c b/drivers/infiniband/hw/mlx5/wr.c
index df1d1b0a3ef7..9947feb7fb8a 100644
--- a/drivers/infiniband/hw/mlx5/wr.c
+++ b/drivers/infiniband/hw/mlx5/wr.c
@@ -78,7 +78,7 @@ static void set_eth_seg(const struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
*/
copysz = min_t(u64, *cur_edge - (void *)eseg->inline_hdr.start,
left);
- memcpy(eseg->inline_hdr.start, pdata, copysz);
+ memcpy(eseg->inline_hdr.data, pdata, copysz);
stride = ALIGN(sizeof(struct mlx5_wqe_eth_seg) -
sizeof(eseg->inline_hdr.start) + copysz, 16);
*size += stride / 16;
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index f330ce895d88..8fe0cef7e2be 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -635,7 +635,7 @@ void mthca_free_mailbox(struct mthca_dev *dev, struct mthca_mailbox *mailbox)
int mthca_SYS_EN(struct mthca_dev *dev)
{
- u64 out;
+ u64 out = 0;
int ret;
ret = mthca_cmd_imm(dev, 0, &out, 0, 0, CMD_SYS_EN, CMD_TIME_CLASS_D);
@@ -1955,7 +1955,7 @@ int mthca_WRITE_MGM(struct mthca_dev *dev, int index,
int mthca_MGID_HASH(struct mthca_dev *dev, struct mthca_mailbox *mailbox,
u16 *hash)
{
- u64 imm;
+ u64 imm = 0;
int err;
err = mthca_cmd_imm(dev, mailbox->dma, &imm, 0, 0, CMD_MGID_HASH,
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index b54bc8865dae..1ab268b77096 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -382,7 +382,7 @@ static int mthca_init_icm(struct mthca_dev *mdev,
struct mthca_init_hca_param *init_hca,
u64 icm_size)
{
- u64 aux_pages;
+ u64 aux_pages = 0;
int err;
err = mthca_SET_ICM_SIZE(mdev, icm_size, &aux_pages);
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.h b/drivers/infiniband/hw/mthca/mthca_memfree.h
index f9a2e65e2ff5..61d5bbba293a 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.h
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.h
@@ -68,7 +68,7 @@ struct mthca_icm_table {
int lowmem;
int coherent;
struct mutex mutex;
- struct mthca_icm *icm[];
+ struct mthca_icm *icm[] __counted_by(num_icm);
};
struct mthca_icm_iter {
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index c46df53f26cf..6a1e2e79ddc3 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -53,8 +53,8 @@
static int mthca_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
struct ib_udata *uhw)
{
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
+ struct ib_smp *in_mad;
+ struct ib_smp *out_mad;
int err = -ENOMEM;
struct mthca_dev *mdev = to_mdev(ibdev);
@@ -121,8 +121,8 @@ static int mthca_query_device(struct ib_device *ibdev, struct ib_device_attr *pr
static int mthca_query_port(struct ib_device *ibdev,
u32 port, struct ib_port_attr *props)
{
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
+ struct ib_smp *in_mad;
+ struct ib_smp *out_mad;
int err = -ENOMEM;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
@@ -217,8 +217,8 @@ out:
static int mthca_query_pkey(struct ib_device *ibdev,
u32 port, u16 index, u16 *pkey)
{
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
+ struct ib_smp *in_mad;
+ struct ib_smp *out_mad;
int err = -ENOMEM;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
@@ -246,8 +246,8 @@ static int mthca_query_pkey(struct ib_device *ibdev,
static int mthca_query_gid(struct ib_device *ibdev, u32 port,
int index, union ib_gid *gid)
{
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
+ struct ib_smp *in_mad;
+ struct ib_smp *out_mad;
int err = -ENOMEM;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
@@ -574,8 +574,9 @@ static int mthca_destroy_qp(struct ib_qp *qp, struct ib_udata *udata)
static int mthca_create_cq(struct ib_cq *ibcq,
const struct ib_cq_init_attr *attr,
- struct ib_udata *udata)
+ struct uverbs_attr_bundle *attrs)
{
+ struct ib_udata *udata = &attrs->driver_udata;
struct ib_device *ibdev = ibcq->device;
int entries = attr->cqe;
struct mthca_create_cq ucmd;
@@ -989,8 +990,8 @@ static const struct attribute_group mthca_attr_group = {
static int mthca_init_node_data(struct mthca_dev *dev)
{
- struct ib_smp *in_mad = NULL;
- struct ib_smp *out_mad = NULL;
+ struct ib_smp *in_mad;
+ struct ib_smp *out_mad;
int err = -ENOMEM;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
index 5f831e3bdbad..0834416cb3f8 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
@@ -46,7 +46,7 @@
static struct dentry *ocrdma_dbgfs_dir;
-static int ocrdma_add_stat(char *start, char *pcur,
+static noinline_for_stack int ocrdma_add_stat(char *start, char *pcur,
char *name, u64 count)
{
char buff[128] = {0};
@@ -99,7 +99,7 @@ void ocrdma_release_stats_resources(struct ocrdma_dev *dev)
kfree(mem->debugfs_mem);
}
-static char *ocrdma_resource_stats(struct ocrdma_dev *dev)
+static noinline_for_stack char *ocrdma_resource_stats(struct ocrdma_dev *dev)
{
char *stats = dev->stats_mem.debugfs_mem, *pcur;
struct ocrdma_rdma_stats_resp *rdma_stats =
@@ -216,7 +216,7 @@ static char *ocrdma_resource_stats(struct ocrdma_dev *dev)
return stats;
}
-static char *ocrdma_rx_stats(struct ocrdma_dev *dev)
+static noinline_for_stack char *ocrdma_rx_stats(struct ocrdma_dev *dev)
{
char *stats = dev->stats_mem.debugfs_mem, *pcur;
struct ocrdma_rdma_stats_resp *rdma_stats =
@@ -284,7 +284,7 @@ static u64 ocrdma_sysfs_rcv_data(struct ocrdma_dev *dev)
rx_stats->roce_frame_bytes_hi))/4;
}
-static char *ocrdma_tx_stats(struct ocrdma_dev *dev)
+static noinline_for_stack char *ocrdma_tx_stats(struct ocrdma_dev *dev)
{
char *stats = dev->stats_mem.debugfs_mem, *pcur;
struct ocrdma_rdma_stats_resp *rdma_stats =
@@ -358,7 +358,7 @@ static u64 ocrdma_sysfs_xmit_data(struct ocrdma_dev *dev)
tx_stats->read_rsp_bytes_hi))/4;
}
-static char *ocrdma_wqe_stats(struct ocrdma_dev *dev)
+static noinline_for_stack char *ocrdma_wqe_stats(struct ocrdma_dev *dev)
{
char *stats = dev->stats_mem.debugfs_mem, *pcur;
struct ocrdma_rdma_stats_resp *rdma_stats =
@@ -391,7 +391,7 @@ static char *ocrdma_wqe_stats(struct ocrdma_dev *dev)
return stats;
}
-static char *ocrdma_db_errstats(struct ocrdma_dev *dev)
+static noinline_for_stack char *ocrdma_db_errstats(struct ocrdma_dev *dev)
{
char *stats = dev->stats_mem.debugfs_mem, *pcur;
struct ocrdma_rdma_stats_resp *rdma_stats =
@@ -412,7 +412,7 @@ static char *ocrdma_db_errstats(struct ocrdma_dev *dev)
return stats;
}
-static char *ocrdma_rxqp_errstats(struct ocrdma_dev *dev)
+static noinline_for_stack char *ocrdma_rxqp_errstats(struct ocrdma_dev *dev)
{
char *stats = dev->stats_mem.debugfs_mem, *pcur;
struct ocrdma_rdma_stats_resp *rdma_stats =
@@ -438,7 +438,7 @@ static char *ocrdma_rxqp_errstats(struct ocrdma_dev *dev)
return stats;
}
-static char *ocrdma_txqp_errstats(struct ocrdma_dev *dev)
+static noinline_for_stack char *ocrdma_txqp_errstats(struct ocrdma_dev *dev)
{
char *stats = dev->stats_mem.debugfs_mem, *pcur;
struct ocrdma_rdma_stats_resp *rdma_stats =
@@ -462,7 +462,7 @@ static char *ocrdma_txqp_errstats(struct ocrdma_dev *dev)
return stats;
}
-static char *ocrdma_tx_dbg_stats(struct ocrdma_dev *dev)
+static noinline_for_stack char *ocrdma_tx_dbg_stats(struct ocrdma_dev *dev)
{
int i;
char *pstats = dev->stats_mem.debugfs_mem;
@@ -480,7 +480,7 @@ static char *ocrdma_tx_dbg_stats(struct ocrdma_dev *dev)
return dev->stats_mem.debugfs_mem;
}
-static char *ocrdma_rx_dbg_stats(struct ocrdma_dev *dev)
+static noinline_for_stack char *ocrdma_rx_dbg_stats(struct ocrdma_dev *dev)
{
int i;
char *pstats = dev->stats_mem.debugfs_mem;
@@ -498,7 +498,7 @@ static char *ocrdma_rx_dbg_stats(struct ocrdma_dev *dev)
return dev->stats_mem.debugfs_mem;
}
-static char *ocrdma_driver_dbg_stats(struct ocrdma_dev *dev)
+static noinline_for_stack char *ocrdma_driver_dbg_stats(struct ocrdma_dev *dev)
{
char *stats = dev->stats_mem.debugfs_mem, *pcur;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index 58f994341e9a..979de8f8df14 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -963,8 +963,9 @@ err:
}
int ocrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
- struct ib_udata *udata)
+ struct uverbs_attr_bundle *attrs)
{
+ struct ib_udata *udata = &attrs->driver_udata;
struct ib_device *ibdev = ibcq->device;
int entries = attr->cqe;
struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
@@ -1277,7 +1278,7 @@ static void ocrdma_set_qp_init_params(struct ocrdma_qp *qp,
qp->sq.max_sges = attrs->cap.max_send_sge;
qp->rq.max_sges = attrs->cap.max_recv_sge;
qp->state = OCRDMA_QPS_RST;
- qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
+ qp->signaled = attrs->sq_sig_type == IB_SIGNAL_ALL_WR;
}
static void ocrdma_store_gsi_qp_cq(struct ocrdma_dev *dev,
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
index f860b7fcef33..0644346d8d98 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
@@ -70,7 +70,7 @@ int ocrdma_alloc_pd(struct ib_pd *pd, struct ib_udata *udata);
int ocrdma_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
int ocrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
- struct ib_udata *udata);
+ struct uverbs_attr_bundle *attrs);
int ocrdma_resize_cq(struct ib_cq *, int cqe, struct ib_udata *);
int ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c
index a51fc6854984..259303b9907c 100644
--- a/drivers/infiniband/hw/qedr/qedr_iw_cm.c
+++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c
@@ -447,7 +447,8 @@ qedr_addr4_resolve(struct qedr_dev *dev,
struct rtable *rt = NULL;
int rc = 0;
- rt = ip_route_output(&init_net, dst_ip, src_ip, 0, 0);
+ rt = ip_route_output(&init_net, dst_ip, src_ip, 0, 0,
+ RT_SCOPE_UNIVERSE);
if (IS_ERR(rt)) {
DP_ERR(dev, "ip_route_output returned error\n");
return -EINVAL;
diff --git a/drivers/infiniband/hw/qedr/qedr_roce_cm.c b/drivers/infiniband/hw/qedr/qedr_roce_cm.c
index 05307c1488b8..859f66a51bd2 100644
--- a/drivers/infiniband/hw/qedr/qedr_roce_cm.c
+++ b/drivers/infiniband/hw/qedr/qedr_roce_cm.c
@@ -354,7 +354,6 @@ int qedr_create_gsi_qp(struct qedr_dev *dev, struct ib_qp_init_attr *attrs,
/* the GSI CQ is handled by the driver so remove it from the FW */
qedr_destroy_gsi_cq(dev, attrs);
dev->gsi_rqcq->cq_type = QEDR_CQ_TYPE_GSI;
- dev->gsi_rqcq->cq_type = QEDR_CQ_TYPE_GSI;
DP_DEBUG(dev, QEDR_MSG_GSI, "created GSI QP %p\n", qp);
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index d745ce9dc88a..568a5b18803f 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -900,8 +900,9 @@ int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
}
int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
- struct ib_udata *udata)
+ struct uverbs_attr_bundle *attrs)
{
+ struct ib_udata *udata = &attrs->driver_udata;
struct ib_device *ibdev = ibcq->device;
struct qedr_ucontext *ctx = rdma_udata_to_drv_context(
udata, struct qedr_ucontext, ibucontext);
@@ -1358,7 +1359,7 @@ static void qedr_set_common_qp_params(struct qedr_dev *dev,
qp->prev_wqe_size = 0;
- qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
+ qp->signaled = attrs->sq_sig_type == IB_SIGNAL_ALL_WR;
qp->dev = dev;
if (qedr_qp_has_sq(qp)) {
qedr_reset_qp_hwq_info(&qp->sq);
@@ -1879,8 +1880,17 @@ static int qedr_create_user_qp(struct qedr_dev *dev,
/* RQ - read access only (0) */
rc = qedr_init_user_queue(udata, dev, &qp->urq, ureq.rq_addr,
ureq.rq_len, true, 0, alloc_and_init);
- if (rc)
+ if (rc) {
+ ib_umem_release(qp->usq.umem);
+ qp->usq.umem = NULL;
+ if (rdma_protocol_roce(&dev->ibdev, 1)) {
+ qedr_free_pbl(dev, &qp->usq.pbl_info,
+ qp->usq.pbl_tbl);
+ } else {
+ kfree(qp->usq.pbl_tbl);
+ }
return rc;
+ }
}
memset(&in_params, 0, sizeof(in_params));
diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h
index 081753df79ef..5731458abb06 100644
--- a/drivers/infiniband/hw/qedr/verbs.h
+++ b/drivers/infiniband/hw/qedr/verbs.h
@@ -52,7 +52,7 @@ int qedr_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata);
int qedr_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata);
int qedr_dealloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata);
int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
- struct ib_udata *udata);
+ struct uverbs_attr_bundle *attrs);
int qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
int qedr_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *attrs,
diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
index 26c615772be3..8ee4edd7883c 100644
--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h
@@ -1359,7 +1359,6 @@ static inline u32 qib_get_rcvhdrtail(const struct qib_ctxtdata *rcd)
* sysfs interface.
*/
-extern const char ib_qib_version[];
extern const struct attribute_group qib_attr_group;
extern const struct attribute_group *qib_attr_port_groups[];
diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c
index bf3fa12fe935..4fcbef99e400 100644
--- a/drivers/infiniband/hw/qib/qib_driver.c
+++ b/drivers/infiniband/hw/qib/qib_driver.c
@@ -44,12 +44,6 @@
#include "qib.h"
-/*
- * The size has to be longer than this string, so we can append
- * board/chip information to it in the init code.
- */
-const char ib_qib_version[] = QIB_DRIVER_VERSION "\n";
-
DEFINE_MUTEX(qib_mutex); /* general driver use */
unsigned qib_ibmtu;
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index ef85bc8d9384..29e4c59aa23b 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -2244,13 +2244,15 @@ static ssize_t qib_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct qib_ctxtdata *rcd = ctxt_fp(iocb->ki_filp);
struct qib_user_sdma_queue *pq = fp->pq;
- if (!from->user_backed || !from->nr_segs || !pq)
+ if (!user_backed_iter(from) || !from->nr_segs || !pq)
return -EINVAL;
return qib_user_sdma_writev(rcd, pq, iter_iov(from), from->nr_segs);
}
-static struct class *qib_class;
+static const struct class qib_class = {
+ .name = "ipath",
+};
static dev_t qib_dev;
int qib_cdev_init(int minor, const char *name,
@@ -2281,7 +2283,7 @@ int qib_cdev_init(int minor, const char *name,
goto err_cdev;
}
- device = device_create(qib_class, NULL, dev, NULL, "%s", name);
+ device = device_create(&qib_class, NULL, dev, NULL, "%s", name);
if (!IS_ERR(device))
goto done;
ret = PTR_ERR(device);
@@ -2325,9 +2327,8 @@ int __init qib_dev_init(void)
goto done;
}
- qib_class = class_create("ipath");
- if (IS_ERR(qib_class)) {
- ret = PTR_ERR(qib_class);
+ ret = class_register(&qib_class);
+ if (ret) {
pr_err("Could not create device class (err %d)\n", -ret);
unregister_chrdev_region(qib_dev, QIB_NMINORS);
}
@@ -2338,10 +2339,8 @@ done:
void qib_dev_cleanup(void)
{
- if (qib_class) {
- class_destroy(qib_class);
- qib_class = NULL;
- }
+ if (class_is_registered(&qib_class))
+ class_unregister(&qib_class);
unregister_chrdev_region(qib_dev, QIB_NMINORS);
}
diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c
index a973905afd13..b27791029fa9 100644
--- a/drivers/infiniband/hw/qib/qib_fs.c
+++ b/drivers/infiniband/hw/qib/qib_fs.c
@@ -64,9 +64,8 @@ static int qibfs_mknod(struct inode *dir, struct dentry *dentry,
inode->i_uid = GLOBAL_ROOT_UID;
inode->i_gid = GLOBAL_ROOT_GID;
inode->i_blocks = 0;
- inode->i_atime = current_time(inode);
- inode->i_mtime = inode->i_atime;
- inode->i_ctime = inode->i_atime;
+ simple_inode_init_ts(inode);
+
inode->i_private = data;
if (S_ISDIR(mode)) {
inode->i_op = &simple_dir_inode_operations;
@@ -440,6 +439,7 @@ static int remove_device_files(struct super_block *sb,
return PTR_ERR(dir);
}
simple_recursive_removal(dir, NULL);
+ dput(dir);
return 0;
}
diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c
index 6af57067c32e..78dfe98ebcf7 100644
--- a/drivers/infiniband/hw/qib/qib_iba7220.c
+++ b/drivers/infiniband/hw/qib/qib_iba7220.c
@@ -3281,7 +3281,7 @@ static int qib_7220_intr_fallback(struct qib_devdata *dd)
qib_free_irq(dd);
dd->msi_lo = 0;
- if (pci_alloc_irq_vectors(dd->pcidev, 1, 1, PCI_IRQ_LEGACY) < 0)
+ if (pci_alloc_irq_vectors(dd->pcidev, 1, 1, PCI_IRQ_INTX) < 0)
qib_dev_err(dd, "Failed to enable INTx\n");
qib_setup_7220_interrupt(dd);
return 1;
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index 9d2dd135b784..9db29916e35a 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -3471,8 +3471,7 @@ try_intx:
pci_irq_vector(dd->pcidev, msixnum),
ret);
qib_7322_free_irq(dd);
- pci_alloc_irq_vectors(dd->pcidev, 1, 1,
- PCI_IRQ_LEGACY);
+ pci_alloc_irq_vectors(dd->pcidev, 1, 1, PCI_IRQ_INTX);
goto try_intx;
}
dd->cspec->msix_entries[msixnum].arg = arg;
@@ -5143,7 +5142,7 @@ static int qib_7322_intr_fallback(struct qib_devdata *dd)
qib_devinfo(dd->pcidev,
"MSIx interrupt not detected, trying INTx interrupts\n");
qib_7322_free_irq(dd);
- if (pci_alloc_irq_vectors(dd->pcidev, 1, 1, PCI_IRQ_LEGACY) < 0)
+ if (pci_alloc_irq_vectors(dd->pcidev, 1, 1, PCI_IRQ_INTX) < 0)
qib_dev_err(dd, "Failed to enable INTx\n");
qib_setup_7322_interrupt(dd, 0);
return 1;
@@ -6127,7 +6126,7 @@ static int setup_txselect(const char *str, const struct kernel_param *kp)
TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ + TXDDS_MFG_SZ);
return -EINVAL;
}
- strncpy(txselect_list, str, ARRAY_SIZE(txselect_list) - 1);
+ strscpy(txselect_list, str, sizeof(txselect_list));
xa_for_each(&qib_dev_table, index, dd)
if (dd->deviceid == PCI_DEVICE_ID_QLOGIC_IB_7322)
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index 33667becd52b..4100656fe9a3 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -581,12 +581,9 @@ static int qib_create_workqueues(struct qib_devdata *dd)
for (pidx = 0; pidx < dd->num_pports; ++pidx) {
ppd = dd->pport + pidx;
if (!ppd->qib_wq) {
- char wq_name[8]; /* 3 + 2 + 1 + 1 + 1 */
-
- snprintf(wq_name, sizeof(wq_name), "qib%d_%d",
- dd->unit, pidx);
- ppd->qib_wq = alloc_ordered_workqueue(wq_name,
- WQ_MEM_RECLAIM);
+ ppd->qib_wq = alloc_ordered_workqueue("qib%d_%d",
+ WQ_MEM_RECLAIM,
+ dd->unit, pidx);
if (!ppd->qib_wq)
goto wq_error;
}
diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c
index 47bf64ace05c..58c1d62d341b 100644
--- a/drivers/infiniband/hw/qib/qib_pcie.c
+++ b/drivers/infiniband/hw/qib/qib_pcie.c
@@ -210,7 +210,7 @@ int qib_pcie_params(struct qib_devdata *dd, u32 minw, u32 *nent)
}
if (dd->flags & QIB_HAS_INTX)
- flags |= PCI_IRQ_LEGACY;
+ flags |= PCI_IRQ_INTX;
maxvec = (nent && *nent) ? *nent : 1;
nvec = pci_alloc_irq_vectors(dd->pcidev, 1, maxvec, flags);
if (nvec < 0)
diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c
index 41c272980f91..53ec7510e4eb 100644
--- a/drivers/infiniband/hw/qib/qib_sysfs.c
+++ b/drivers/infiniband/hw/qib/qib_sysfs.c
@@ -585,13 +585,7 @@ static ssize_t hca_type_show(struct device *device,
static DEVICE_ATTR_RO(hca_type);
static DEVICE_ATTR(board_id, 0444, hca_type_show, NULL);
-static ssize_t version_show(struct device *device,
- struct device_attribute *attr, char *buf)
-{
- /* The string printed here is already newline-terminated. */
- return sysfs_emit(buf, "%s", (char *)ib_qib_version);
-}
-static DEVICE_ATTR_RO(version);
+static DEVICE_STRING_ATTR_RO(version, 0444, QIB_DRIVER_VERSION);
static ssize_t boardversion_show(struct device *device,
struct device_attribute *attr, char *buf)
@@ -721,7 +715,7 @@ static struct attribute *qib_attributes[] = {
&dev_attr_hw_rev.attr,
&dev_attr_hca_type.attr,
&dev_attr_board_id.attr,
- &dev_attr_version.attr,
+ &dev_attr_version.attr.attr,
&dev_attr_nctxts.attr,
&dev_attr_nfreectxts.attr,
&dev_attr_serial.attr,
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index 0080f0be72fe..5fcb41970ad9 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -1551,7 +1551,7 @@ int qib_register_ib_device(struct qib_devdata *dd)
ibdev->dev.parent = &dd->pcidev->dev;
snprintf(ibdev->node_desc, sizeof(ibdev->node_desc),
- "Intel Infiniband HCA %s", init_utsname()->nodename);
+ "Intel Infiniband HCA %.42s", init_utsname()->nodename);
/*
* Fill in rvt info object.
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index 07548fac1d8e..408fe1ba74b9 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -303,8 +303,6 @@ int qib_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe,
struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid);
-void qib_rc_rnr_retry(unsigned long arg);
-
void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr);
int qib_post_ud_send(struct rvt_qp *qp, const struct ib_send_wr *wr);
@@ -312,8 +310,6 @@ int qib_post_ud_send(struct rvt_qp *qp, const struct ib_send_wr *wr);
void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr,
int has_grh, void *data, u32 tlen, struct rvt_qp *qp);
-void mr_rcu_callback(struct rcu_head *list);
-
void qib_migrate_qp(struct rvt_qp *qp);
int qib_ruc_check_hdr(struct qib_ibport *ibp, struct ib_header *hdr,
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
index 6289238cc5af..217af34e82b3 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
@@ -577,7 +577,7 @@ out_unlock:
}
int usnic_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
- struct ib_udata *udata)
+ struct uverbs_attr_bundle *attrs)
{
if (attr->flags)
return -EOPNOTSUPP;
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
index 6ca9ee0dddbe..53f53f2d53be 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
@@ -56,7 +56,7 @@ int usnic_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata);
int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata);
int usnic_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
- struct ib_udata *udata);
+ struct uverbs_attr_bundle *attrs);
int usnic_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
index 84e0f41e7dfa..f948b76f984d 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.c
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -443,11 +443,11 @@ struct usnic_uiom_pd *usnic_uiom_alloc_pd(struct device *dev)
if (!pd)
return ERR_PTR(-ENOMEM);
- pd->domain = domain = iommu_domain_alloc(dev->bus);
- if (!domain) {
+ pd->domain = domain = iommu_paging_domain_alloc(dev);
+ if (IS_ERR(domain)) {
usnic_err("Failed to allocate IOMMU domain");
kfree(pd);
- return ERR_PTR(-ENOMEM);
+ return ERR_CAST(domain);
}
iommu_set_fault_handler(pd->domain, usnic_uiom_dma_fault, NULL);
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.h b/drivers/infiniband/hw/usnic/usnic_uiom.h
index 5a9acf941510..70d51d919d12 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.h
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.h
@@ -77,7 +77,7 @@ struct usnic_uiom_reg {
struct usnic_uiom_chunk {
struct list_head list;
int nents;
- struct scatterlist page_list[];
+ struct scatterlist page_list[] __counted_by(nents);
};
struct usnic_uiom_pd *usnic_uiom_alloc_pd(struct device *dev);
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
index 6aa40bd2fd52..b3df6eb9b8ef 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
@@ -94,13 +94,14 @@ int pvrdma_req_notify_cq(struct ib_cq *ibcq,
* pvrdma_create_cq - create completion queue
* @ibcq: Allocated CQ
* @attr: completion queue attributes
- * @udata: user data
+ * @attrs: bundle
*
* @return: 0 on success
*/
int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
- struct ib_udata *udata)
+ struct uverbs_attr_bundle *attrs)
{
+ struct ib_udata *udata = &attrs->driver_udata;
struct ib_device *ibdev = ibcq->device;
int entries = attr->cqe;
struct pvrdma_dev *dev = to_vdev(ibdev);
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
index 343288b02792..768aad364c89 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
@@ -531,7 +531,7 @@ static int pvrdma_alloc_intrs(struct pvrdma_dev *dev)
PCI_IRQ_MSIX);
if (ret < 0) {
ret = pci_alloc_irq_vectors(pdev, 1, 1,
- PCI_IRQ_MSI | PCI_IRQ_LEGACY);
+ PCI_IRQ_MSI | PCI_IRQ_INTX);
if (ret < 0)
return ret;
}
@@ -1021,10 +1021,8 @@ err_free_intrs:
pvrdma_free_irq(dev);
pci_free_irq_vectors(pdev);
err_free_cq_ring:
- if (dev->netdev) {
- dev_put(dev->netdev);
- dev->netdev = NULL;
- }
+ dev_put(dev->netdev);
+ dev->netdev = NULL;
pvrdma_page_dir_cleanup(dev, &dev->cq_pdir);
err_free_async_ring:
pvrdma_page_dir_cleanup(dev, &dev->async_pdir);
@@ -1064,10 +1062,8 @@ static void pvrdma_pci_remove(struct pci_dev *pdev)
flush_workqueue(event_wq);
- if (dev->netdev) {
- dev_put(dev->netdev);
- dev->netdev = NULL;
- }
+ dev_put(dev->netdev);
+ dev->netdev = NULL;
/* Unregister ib device */
ib_unregister_device(&dev->ib_dev);
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
index 78807b23d831..4b9edc03d73d 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
@@ -375,7 +375,7 @@ struct ib_mr *pvrdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
int pvrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
int sg_nents, unsigned int *sg_offset);
int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
- struct ib_udata *udata);
+ struct uverbs_attr_bundle *attrs);
int pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata);
int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
int pvrdma_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);