From 19d6214ad6dfffda1a5bdc2b34ea75ba45a1a60a Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 29 Jul 2022 13:33:38 -0300 Subject: IB/mlx5: Call io_stop_wc() after writing to WC MMIO This new function is defined only on ARM and serves to guarantee a barrier in the WC operation. The barrier means that another run of this loop will not combine with the stores this loop created. On x86 this is happening implicitly because of the spin_unlock(). Link: https://lore.kernel.org/r/0-v1-c5dade92f363+11-mlx5_io_stop_wc_jgg@nvidia.com Suggested-by: Pavel Shamis Signed-off-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index 6191aa833ac2..6b29e9ca323e 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -152,6 +152,7 @@ static int post_send_nop(struct mlx5_ib_dev *dev, struct ib_qp *ibqp, u64 wr_id, for (i = 0; i < 8; i++) mlx5_write64(&mmio_wqe[i * 2], bf->bfreg->map + bf->offset + i * 8); + io_stop_wc(); bf->offset ^= bf->buf_size; -- cgit v1.2.3 From 13ad1125b941a5f257d9d3ae70485773abd34792 Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Sun, 31 Jul 2022 11:26:36 +0300 Subject: RDMA/mlx5: Don't compare mkey tags in DEVX indirect mkey According to the ib spec: If the CI supports the Base Memory Management Extensions defined in this specification, the L_Key format must consist of: 24 bit index in the most significant bits of the R_Key, and 8 bit key in the least significant bits of the R_Key Through a successful Allocate L_Key verb invocation, the CI must let the consumer own the key portion of the returned R_Key Therefore, when creating a mkey using DEVX, the consumer is allowed to change the key part. The kernel should compare only the index part of a R_Key to determine equality with another R_Key. Adding capability in order not to break backward compatibility. Fixes: 534fd7aac56a ("IB/mlx5: Manage indirection mkey upon DEVX flow for ODP") Link: https://lore.kernel.org/r/3d669aacea85a3a15c3b3b953b3eaba3f80ef9be.1659255945.git.leonro@nvidia.com Signed-off-by: Aharon Landau Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/main.c | 3 +++ drivers/infiniband/hw/mlx5/odp.c | 3 ++- include/uapi/rdma/mlx5-abi.h | 1 + 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index a174a0eee8dc..7c40efae96a3 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1826,6 +1826,9 @@ static int set_ucontext_resp(struct ib_ucontext *uctx, if (MLX5_CAP_GEN(dev->mdev, drain_sigerr)) resp->comp_mask |= MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_SQD2RTS; + resp->comp_mask |= + MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_MKEY_UPDATE_TAG; + return 0; } diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index e305bf1dc6c2..901a8b030236 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -795,7 +795,8 @@ static bool mkey_is_eq(struct mlx5_ib_mkey *mmkey, u32 key) { if (!mmkey) return false; - if (mmkey->type == MLX5_MKEY_MW) + if (mmkey->type == MLX5_MKEY_MW || + mmkey->type == MLX5_MKEY_INDIRECT_DEVX) return mlx5_base_mkey(mmkey->key) == mlx5_base_mkey(key); return mmkey->key == key; } diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index 86be4a92b67b..a96b7d2770e1 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -104,6 +104,7 @@ enum mlx5_ib_alloc_ucontext_resp_mask { MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_ECE = 1UL << 2, MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_SQD2RTS = 1UL << 3, MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_REAL_TIME_TS = 1UL << 4, + MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_MKEY_UPDATE_TAG = 1UL << 5, }; enum mlx5_user_cmds_supp_uhw { -- cgit v1.2.3 From 5a93929d9f9a1d82946ddd49e260b6dd1756ad6d Mon Sep 17 00:00:00 2001 From: Santosh Pradhan Date: Thu, 18 Aug 2022 12:52:39 +0200 Subject: RDMA/rtrs-clt: Add event tracing support Add event tracing mechanism for following routines: - rtrs_clt_reconnect_work() - rtrs_clt_close_conns() - rtrs_rdma_error_recovery() How to use: 1. Load the rtrs_client module 2. cd /sys/kernel/debug/tracing 3. If all the events need to be enabled: echo 1 > events/rtrs_clt/enable 4. OR only speific routine/event needs to be enabled e.g. echo 1 > events/rtrs_clt/rtrs_clt_close_conns/enable 5. cat trace 6. Run some workload which can trigger rtrs_clt_close_conns() Link: https://lore.kernel.org/r/20220818105240.110234-2-haris.iqbal@ionos.com Signed-off-by: Santosh Pradhan Signed-off-by: Jack Wang Signed-off-by: Md Haris Iqbal Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/rtrs/Makefile | 5 +- drivers/infiniband/ulp/rtrs/rtrs-clt-trace.c | 15 +++++ drivers/infiniband/ulp/rtrs/rtrs-clt-trace.h | 86 ++++++++++++++++++++++++++++ drivers/infiniband/ulp/rtrs/rtrs-clt.c | 7 +++ 4 files changed, 112 insertions(+), 1 deletion(-) create mode 100644 drivers/infiniband/ulp/rtrs/rtrs-clt-trace.c create mode 100644 drivers/infiniband/ulp/rtrs/rtrs-clt-trace.h diff --git a/drivers/infiniband/ulp/rtrs/Makefile b/drivers/infiniband/ulp/rtrs/Makefile index 3898509be270..1fdf918b37eb 100644 --- a/drivers/infiniband/ulp/rtrs/Makefile +++ b/drivers/infiniband/ulp/rtrs/Makefile @@ -1,8 +1,11 @@ # SPDX-License-Identifier: GPL-2.0-or-later +CFLAGS_rtrs-clt-trace.o = -I$(src) + rtrs-client-y := rtrs-clt.o \ rtrs-clt-stats.o \ - rtrs-clt-sysfs.o + rtrs-clt-sysfs.o \ + rtrs-clt-trace.o rtrs-server-y := rtrs-srv.o \ rtrs-srv-stats.o \ diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt-trace.c b/drivers/infiniband/ulp/rtrs/rtrs-clt-trace.c new file mode 100644 index 000000000000..f14fa1f36ce8 --- /dev/null +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt-trace.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * RDMA Network Block Driver + * + * Copyright (c) 2022 1&1 IONOS SE. All rights reserved. + */ +#include "rtrs.h" +#include "rtrs-clt.h" + +/* + * We include this last to have the helpers above available for the trace + * event implementations. + */ +#define CREATE_TRACE_POINTS +#include "rtrs-clt-trace.h" diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt-trace.h b/drivers/infiniband/ulp/rtrs/rtrs-clt-trace.h new file mode 100644 index 000000000000..7738e2676855 --- /dev/null +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt-trace.h @@ -0,0 +1,86 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * RDMA Network Block Driver + * + * Copyright (c) 2022 1&1 IONOS SE. All rights reserved. + */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rtrs_clt + +#if !defined(_TRACE_RTRS_CLT_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_RTRS_CLT_H + +#include + +struct rtrs_clt_path; +struct rtrs_clt_sess; + +TRACE_DEFINE_ENUM(RTRS_CLT_CONNECTING); +TRACE_DEFINE_ENUM(RTRS_CLT_CONNECTING_ERR); +TRACE_DEFINE_ENUM(RTRS_CLT_RECONNECTING); +TRACE_DEFINE_ENUM(RTRS_CLT_CONNECTED); +TRACE_DEFINE_ENUM(RTRS_CLT_CLOSING); +TRACE_DEFINE_ENUM(RTRS_CLT_CLOSED); +TRACE_DEFINE_ENUM(RTRS_CLT_DEAD); + +#define show_rtrs_clt_state(x) \ + __print_symbolic(x, \ + { RTRS_CLT_CONNECTING, "CONNECTING" }, \ + { RTRS_CLT_CONNECTING_ERR, "CONNECTING_ERR" }, \ + { RTRS_CLT_RECONNECTING, "RECONNECTING" }, \ + { RTRS_CLT_CONNECTED, "CONNECTED" }, \ + { RTRS_CLT_CLOSING, "CLOSING" }, \ + { RTRS_CLT_CLOSED, "CLOSED" }, \ + { RTRS_CLT_DEAD, "DEAD" }) + +DECLARE_EVENT_CLASS(rtrs_clt_conn_class, + TP_PROTO(struct rtrs_clt_path *clt_path), + + TP_ARGS(clt_path), + + TP_STRUCT__entry( + __field(int, state) + __field(int, reconnect_attempts) + __field(int, max_reconnect_attempts) + __field(int, fail_cnt) + __field(int, success_cnt) + __array(char, sessname, NAME_MAX) + ), + + TP_fast_assign( + struct rtrs_clt_sess *clt = clt_path->clt; + + __entry->state = clt_path->state; + __entry->reconnect_attempts = clt_path->reconnect_attempts; + __entry->max_reconnect_attempts = clt->max_reconnect_attempts; + __entry->fail_cnt = clt_path->stats->reconnects.fail_cnt; + __entry->success_cnt = clt_path->stats->reconnects.successful_cnt; + memcpy(__entry->sessname, kobject_name(&clt_path->kobj), NAME_MAX); + ), + + TP_printk("RTRS-CLT: sess='%s' state=%s attempts='%d' max-attempts='%d' fail='%d' success='%d'", + __entry->sessname, + show_rtrs_clt_state(__entry->state), + __entry->reconnect_attempts, + __entry->max_reconnect_attempts, + __entry->fail_cnt, + __entry->success_cnt + ) +); + +#define DEFINE_CLT_CONN_EVENT(name) \ +DEFINE_EVENT(rtrs_clt_conn_class, rtrs_##name, \ + TP_PROTO(struct rtrs_clt_path *clt_path), \ + TP_ARGS(clt_path)) + +DEFINE_CLT_CONN_EVENT(clt_reconnect_work); +DEFINE_CLT_CONN_EVENT(clt_close_conns); +DEFINE_CLT_CONN_EVENT(rdma_error_recovery); + +#endif /* _TRACE_RTRS_CLT_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE rtrs-clt-trace +#include + diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index baecde41d126..5219bb10777a 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -16,6 +16,7 @@ #include "rtrs-clt.h" #include "rtrs-log.h" +#include "rtrs-clt-trace.h" #define RTRS_CONNECT_TIMEOUT_MS 30000 /* @@ -302,6 +303,8 @@ static void rtrs_rdma_error_recovery(struct rtrs_clt_con *con) { struct rtrs_clt_path *clt_path = to_clt_path(con->c.path); + trace_rtrs_rdma_error_recovery(clt_path); + if (rtrs_clt_change_state_from_to(clt_path, RTRS_CLT_CONNECTED, RTRS_CLT_RECONNECTING)) { @@ -1942,6 +1945,8 @@ static int rtrs_rdma_conn_rejected(struct rtrs_clt_con *con, void rtrs_clt_close_conns(struct rtrs_clt_path *clt_path, bool wait) { + trace_rtrs_clt_close_conns(clt_path); + if (rtrs_clt_change_state_get_old(clt_path, RTRS_CLT_CLOSING, NULL)) queue_work(rtrs_wq, &clt_path->close_work); if (wait) @@ -2648,6 +2653,8 @@ static void rtrs_clt_reconnect_work(struct work_struct *work) reconnect_dwork); clt = clt_path->clt; + trace_rtrs_clt_reconnect_work(clt_path); + if (READ_ONCE(clt_path->state) != RTRS_CLT_RECONNECTING) return; -- cgit v1.2.3 From c16762b7bf54d37ee441885279c4cd49e412ec5b Mon Sep 17 00:00:00 2001 From: Santosh Pradhan Date: Thu, 18 Aug 2022 12:52:40 +0200 Subject: RDMA/rtrs-srv: Add event tracing support Add event tracing mechanism for following routines: - send_io_resp_imm() How to use: 1. Load the rtrs_server module 2. cd /sys/kernel/debug/tracing 3. If all the events need to be enabled: echo 1 > events/rtrs_srv/enable 4. OR only speific routine/event needs to be enabled e.g. echo 1 > events/rtrs_srv/send_io_resp_imm/enable 5. cat trace 6. Run some I/O workload which can trigger send_io_resp_imm() Link: https://lore.kernel.org/r/20220818105240.110234-3-haris.iqbal@ionos.com Signed-off-by: Santosh Pradhan Signed-off-by: Jack Wang Signed-off-by: Md Haris Iqbal Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/rtrs/Makefile | 5 +- drivers/infiniband/ulp/rtrs/rtrs-srv-trace.c | 16 +++++ drivers/infiniband/ulp/rtrs/rtrs-srv-trace.h | 88 ++++++++++++++++++++++++++++ drivers/infiniband/ulp/rtrs/rtrs-srv.c | 8 +-- drivers/infiniband/ulp/rtrs/rtrs-srv.h | 5 ++ 5 files changed, 116 insertions(+), 6 deletions(-) create mode 100644 drivers/infiniband/ulp/rtrs/rtrs-srv-trace.c create mode 100644 drivers/infiniband/ulp/rtrs/rtrs-srv-trace.h diff --git a/drivers/infiniband/ulp/rtrs/Makefile b/drivers/infiniband/ulp/rtrs/Makefile index 1fdf918b37eb..5227e7788e1f 100644 --- a/drivers/infiniband/ulp/rtrs/Makefile +++ b/drivers/infiniband/ulp/rtrs/Makefile @@ -7,9 +7,12 @@ rtrs-client-y := rtrs-clt.o \ rtrs-clt-sysfs.o \ rtrs-clt-trace.o +CFLAGS_rtrs-srv-trace.o = -I$(src) + rtrs-server-y := rtrs-srv.o \ rtrs-srv-stats.o \ - rtrs-srv-sysfs.o + rtrs-srv-sysfs.o \ + rtrs-srv-trace.o rtrs-core-y := rtrs.o diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-trace.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-trace.c new file mode 100644 index 000000000000..29ca59ceb0dd --- /dev/null +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-trace.c @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * RDMA Network Block Driver + * + * Copyright (c) 2022 1&1 IONOS SE. All rights reserved. + */ +#include "rtrs.h" +#include "rtrs-pri.h" +#include "rtrs-srv.h" + +/* + * We include this last to have the helpers above available for the trace + * event implementations. + */ +#define CREATE_TRACE_POINTS +#include "rtrs-srv-trace.h" diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-trace.h b/drivers/infiniband/ulp/rtrs/rtrs-srv-trace.h new file mode 100644 index 000000000000..587d3e033081 --- /dev/null +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-trace.h @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * RDMA Network Block Driver + * + * Copyright (c) 2022 1&1 IONOS SE. All rights reserved. + */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rtrs_srv + +#if !defined(_TRACE_RTRS_SRV_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_RTRS_SRV_H + +#include + +struct rtrs_srv_op; +struct rtrs_srv_con; +struct rtrs_srv_path; + +TRACE_DEFINE_ENUM(RTRS_SRV_CONNECTING); +TRACE_DEFINE_ENUM(RTRS_SRV_CONNECTED); +TRACE_DEFINE_ENUM(RTRS_SRV_CLOSING); +TRACE_DEFINE_ENUM(RTRS_SRV_CLOSED); + +#define show_rtrs_srv_state(x) \ + __print_symbolic(x, \ + { RTRS_SRV_CONNECTING, "CONNECTING" }, \ + { RTRS_SRV_CONNECTED, "CONNECTED" }, \ + { RTRS_SRV_CLOSING, "CLOSING" }, \ + { RTRS_SRV_CLOSED, "CLOSED" }) + +TRACE_EVENT(send_io_resp_imm, + TP_PROTO(struct rtrs_srv_op *id, + bool need_inval, + bool always_invalidate, + int errno), + + TP_ARGS(id, need_inval, always_invalidate, errno), + + TP_STRUCT__entry( + __field(u8, dir) + __field(bool, need_inval) + __field(bool, always_invalidate) + __field(u32, msg_id) + __field(int, wr_cnt) + __field(u32, signal_interval) + __field(int, state) + __field(int, errno) + __array(char, sessname, NAME_MAX) + ), + + TP_fast_assign( + struct rtrs_srv_con *con = id->con; + struct rtrs_path *s = con->c.path; + struct rtrs_srv_path *srv_path = to_srv_path(s); + + __entry->dir = id->dir; + __entry->state = srv_path->state; + __entry->errno = errno; + __entry->need_inval = need_inval; + __entry->always_invalidate = always_invalidate; + __entry->msg_id = id->msg_id; + __entry->wr_cnt = atomic_read(&con->c.wr_cnt); + __entry->signal_interval = s->signal_interval; + memcpy(__entry->sessname, kobject_name(&srv_path->kobj), NAME_MAX); + ), + + TP_printk("sess='%s' state='%s' dir=%s err='%d' inval='%d' glob-inval='%d' msgid='%u' wrcnt='%d' sig-interval='%u'", + __entry->sessname, + show_rtrs_srv_state(__entry->state), + __print_symbolic(__entry->dir, + { READ, "READ" }, + { WRITE, "WRITE" }), + __entry->errno, + __entry->need_inval, + __entry->always_invalidate, + __entry->msg_id, + __entry->wr_cnt, + __entry->signal_interval + ) +); + +#endif /* _TRACE_RTRS_SRV_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE rtrs-srv-trace +#include + diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index 34c03bde5064..22e6f991946c 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -16,6 +16,7 @@ #include "rtrs-log.h" #include #include +#include "rtrs-srv-trace.h" MODULE_DESCRIPTION("RDMA Transport Server"); MODULE_LICENSE("GPL"); @@ -57,11 +58,6 @@ static inline struct rtrs_srv_con *to_srv_con(struct rtrs_con *c) return container_of(c, struct rtrs_srv_con, c); } -static inline struct rtrs_srv_path *to_srv_path(struct rtrs_path *s) -{ - return container_of(s, struct rtrs_srv_path, s); -} - static bool rtrs_srv_change_state(struct rtrs_srv_path *srv_path, enum rtrs_srv_state new_state) { @@ -375,6 +371,8 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id, } } + trace_send_io_resp_imm(id, need_inval, always_invalidate, errno); + if (need_inval && always_invalidate) { wr = &inv_wr; inv_wr.next = &rwr.wr; diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.h b/drivers/infiniband/ulp/rtrs/rtrs-srv.h index 186a63c217df..2f8a638e36fa 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.h @@ -91,6 +91,11 @@ struct rtrs_srv_path { struct rtrs_srv_stats *stats; }; +static inline struct rtrs_srv_path *to_srv_path(struct rtrs_path *s) +{ + return container_of(s, struct rtrs_srv_path, s); +} + struct rtrs_srv_sess { struct list_head paths_list; int paths_up; -- cgit v1.2.3 From b722d3e63fcc95674bd4dd92bbbfb3bd9de12380 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Thu, 18 Aug 2022 12:53:53 +0200 Subject: RDMA/rtrs-clt: Output sg index when warning on Output the sg index, so it's a bit easier for debug. Signed-off-by: Jack Wang Reviewed-by: Aleksei Marov Link: https://lore.kernel.org/r/20220818105355.110344-2-haris.iqbal@ionos.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/rtrs/rtrs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs.c b/drivers/infiniband/ulp/rtrs/rtrs.c index 60fa0b0160f4..ed324b47d93a 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs.c @@ -175,7 +175,7 @@ int rtrs_iu_post_rdma_write_imm(struct rtrs_con *con, struct rtrs_iu *iu, * length error */ for (i = 0; i < num_sge; i++) - if (WARN_ON(sge[i].length == 0)) + if (WARN_ONCE(sge[i].length == 0, "sg %d is zero length\n", i)) return -EINVAL; return rtrs_post_send(con->qp, head, &wr.wr, tail); -- cgit v1.2.3 From dc13fbf79ec8f983fc398cd200ed12973f390957 Mon Sep 17 00:00:00 2001 From: Michael Margolin Date: Thu, 18 Aug 2022 17:04:49 +0300 Subject: RDMA/efa: Support CQ receive entries with source GID Add a parameter for create CQ admin command to set source address on receive completion descriptors. Report capability for this feature through query device verb. Link: https://lore.kernel.org/r/20220818140449.414-1-mrgolin@amazon.com Reviewed-by: Firas Jahjah Reviewed-by: Yossi Leybovich Signed-off-by: Daniel Kranzdorf Signed-off-by: Michael Margolin Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/efa/efa_admin_cmds_defs.h | 6 +- drivers/infiniband/hw/efa/efa_com_cmd.c | 5 +- drivers/infiniband/hw/efa/efa_com_cmd.h | 3 +- drivers/infiniband/hw/efa/efa_io_defs.h | 289 ++++++++++++++++++++++++ drivers/infiniband/hw/efa/efa_verbs.c | 11 +- include/uapi/rdma/efa-abi.h | 4 +- 6 files changed, 312 insertions(+), 6 deletions(-) create mode 100644 drivers/infiniband/hw/efa/efa_io_defs.h diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h index 0b0b93b529f3..d4b9226088bd 100644 --- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h +++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h @@ -444,7 +444,10 @@ struct efa_admin_create_cq_cmd { /* * 4:0 : cq_entry_size_words - size of CQ entry in * 32-bit words, valid values: 4, 8. - * 7:5 : reserved7 - MBZ + * 5 : set_src_addr - If set, source address will be + * filled on RX completions from unknown senders. + * Requires 8 words CQ entry size. + * 7:6 : reserved7 - MBZ */ u8 cq_caps_2; @@ -980,6 +983,7 @@ struct efa_admin_host_info { #define EFA_ADMIN_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_MASK BIT(5) #define EFA_ADMIN_CREATE_CQ_CMD_VIRT_MASK BIT(6) #define EFA_ADMIN_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0) +#define EFA_ADMIN_CREATE_CQ_CMD_SET_SRC_ADDR_MASK BIT(5) /* create_cq_resp */ #define EFA_ADMIN_CREATE_CQ_RESP_DB_VALID_MASK BIT(0) diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c index fb405da4e1db..8f8885e002ba 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.c +++ b/drivers/infiniband/hw/efa/efa_com_cmd.c @@ -168,7 +168,10 @@ int efa_com_create_cq(struct efa_com_dev *edev, EFA_ADMIN_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED, 1); create_cmd.eqn = params->eqn; } - + if (params->set_src_addr) { + EFA_SET(&create_cmd.cq_caps_2, + EFA_ADMIN_CREATE_CQ_CMD_SET_SRC_ADDR, 1); + } efa_com_set_dma_addr(params->dma_addr, &create_cmd.cq_ba.mem_addr_high, &create_cmd.cq_ba.mem_addr_low); diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h index c33010bbf9e8..0898ad5bc340 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.h +++ b/drivers/infiniband/hw/efa/efa_com_cmd.h @@ -75,7 +75,8 @@ struct efa_com_create_cq_params { u16 uarn; u16 eqn; u8 entry_size_in_bytes; - bool interrupt_mode_enabled; + u8 interrupt_mode_enabled : 1; + u8 set_src_addr : 1; }; struct efa_com_create_cq_result { diff --git a/drivers/infiniband/hw/efa/efa_io_defs.h b/drivers/infiniband/hw/efa/efa_io_defs.h new file mode 100644 index 000000000000..17ba8984b11e --- /dev/null +++ b/drivers/infiniband/hw/efa/efa_io_defs.h @@ -0,0 +1,289 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright 2018-2022 Amazon.com, Inc. or its affiliates. All rights reserved. + */ + +#ifndef _EFA_IO_H_ +#define _EFA_IO_H_ + +#define EFA_IO_TX_DESC_NUM_BUFS 2 +#define EFA_IO_TX_DESC_NUM_RDMA_BUFS 1 +#define EFA_IO_TX_DESC_INLINE_MAX_SIZE 32 +#define EFA_IO_TX_DESC_IMM_DATA_SIZE 4 + +enum efa_io_queue_type { + /* send queue (of a QP) */ + EFA_IO_SEND_QUEUE = 1, + /* recv queue (of a QP) */ + EFA_IO_RECV_QUEUE = 2, +}; + +enum efa_io_send_op_type { + /* send message */ + EFA_IO_SEND = 0, + /* RDMA read */ + EFA_IO_RDMA_READ = 1, +}; + +enum efa_io_comp_status { + /* Successful completion */ + EFA_IO_COMP_STATUS_OK = 0, + /* Flushed during QP destroy */ + EFA_IO_COMP_STATUS_FLUSHED = 1, + /* Internal QP error */ + EFA_IO_COMP_STATUS_LOCAL_ERROR_QP_INTERNAL_ERROR = 2, + /* Bad operation type */ + EFA_IO_COMP_STATUS_LOCAL_ERROR_INVALID_OP_TYPE = 3, + /* Bad AH */ + EFA_IO_COMP_STATUS_LOCAL_ERROR_INVALID_AH = 4, + /* LKEY not registered or does not match IOVA */ + EFA_IO_COMP_STATUS_LOCAL_ERROR_INVALID_LKEY = 5, + /* Message too long */ + EFA_IO_COMP_STATUS_LOCAL_ERROR_BAD_LENGTH = 6, + /* Destination ENI is down or does not run EFA */ + EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_ADDRESS = 7, + /* Connection was reset by remote side */ + EFA_IO_COMP_STATUS_REMOTE_ERROR_ABORT = 8, + /* Bad dest QP number (QP does not exist or is in error state) */ + EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_DEST_QPN = 9, + /* Destination resource not ready (no WQEs posted on RQ) */ + EFA_IO_COMP_STATUS_REMOTE_ERROR_RNR = 10, + /* Receiver SGL too short */ + EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_LENGTH = 11, + /* Unexpected status returned by responder */ + EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_STATUS = 12, + /* Unresponsive remote - detected locally */ + EFA_IO_COMP_STATUS_LOCAL_ERROR_UNRESP_REMOTE = 13, +}; + +struct efa_io_tx_meta_desc { + /* Verbs-generated Request ID */ + u16 req_id; + + /* + * control flags + * 3:0 : op_type - operation type: send/rdma/fast mem + * ops/etc + * 4 : has_imm - immediate_data field carries valid + * data. + * 5 : inline_msg - inline mode - inline message data + * follows this descriptor (no buffer descriptors). + * Note that it is different from immediate data + * 6 : meta_extension - Extended metadata. MBZ + * 7 : meta_desc - Indicates metadata descriptor. + * Must be set. + */ + u8 ctrl1; + + /* + * control flags + * 0 : phase + * 1 : reserved25 - MBZ + * 2 : first - Indicates first descriptor in + * transaction. Must be set. + * 3 : last - Indicates last descriptor in + * transaction. Must be set. + * 4 : comp_req - Indicates whether completion should + * be posted, after packet is transmitted. Valid only + * for the first descriptor + * 7:5 : reserved29 - MBZ + */ + u8 ctrl2; + + u16 dest_qp_num; + + /* + * If inline_msg bit is set, length of inline message in bytes, + * otherwise length of SGL (number of buffers). + */ + u16 length; + + /* + * immediate data: if has_imm is set, then this field is included + * within Tx message and reported in remote Rx completion. + */ + u32 immediate_data; + + u16 ah; + + u16 reserved; + + /* Queue key */ + u32 qkey; + + u8 reserved2[12]; +}; + +/* + * Tx queue buffer descriptor, for any transport type. Preceded by metadata + * descriptor. + */ +struct efa_io_tx_buf_desc { + /* length in bytes */ + u32 length; + + /* + * 23:0 : lkey - local memory translation key + * 31:24 : reserved - MBZ + */ + u32 lkey; + + /* Buffer address bits[31:0] */ + u32 buf_addr_lo; + + /* Buffer address bits[63:32] */ + u32 buf_addr_hi; +}; + +struct efa_io_remote_mem_addr { + /* length in bytes */ + u32 length; + + /* remote memory translation key */ + u32 rkey; + + /* Buffer address bits[31:0] */ + u32 buf_addr_lo; + + /* Buffer address bits[63:32] */ + u32 buf_addr_hi; +}; + +struct efa_io_rdma_req { + /* Remote memory address */ + struct efa_io_remote_mem_addr remote_mem; + + /* Local memory address */ + struct efa_io_tx_buf_desc local_mem[1]; +}; + +/* + * Tx WQE, composed of tx meta descriptors followed by either tx buffer + * descriptors or inline data + */ +struct efa_io_tx_wqe { + /* TX meta */ + struct efa_io_tx_meta_desc meta; + + union { + /* Send buffer descriptors */ + struct efa_io_tx_buf_desc sgl[2]; + + u8 inline_data[32]; + + /* RDMA local and remote memory addresses */ + struct efa_io_rdma_req rdma_req; + } data; +}; + +/* + * Rx buffer descriptor; RX WQE is composed of one or more RX buffer + * descriptors. + */ +struct efa_io_rx_desc { + /* Buffer address bits[31:0] */ + u32 buf_addr_lo; + + /* Buffer Pointer[63:32] */ + u32 buf_addr_hi; + + /* Verbs-generated request id. */ + u16 req_id; + + /* Length in bytes. */ + u16 length; + + /* + * LKey and control flags + * 23:0 : lkey + * 29:24 : reserved - MBZ + * 30 : first - Indicates first descriptor in WQE + * 31 : last - Indicates last descriptor in WQE + */ + u32 lkey_ctrl; +}; + +/* Common IO completion descriptor */ +struct efa_io_cdesc_common { + /* + * verbs-generated request ID, as provided in the completed tx or rx + * descriptor. + */ + u16 req_id; + + u8 status; + + /* + * flags + * 0 : phase - Phase bit + * 2:1 : q_type - enum efa_io_queue_type: send/recv + * 3 : has_imm - indicates that immediate data is + * present - for RX completions only + * 7:4 : reserved28 - MBZ + */ + u8 flags; + + /* local QP number */ + u16 qp_num; + + /* Transferred length */ + u16 length; +}; + +/* Tx completion descriptor */ +struct efa_io_tx_cdesc { + /* Common completion info */ + struct efa_io_cdesc_common common; +}; + +/* Rx Completion Descriptor */ +struct efa_io_rx_cdesc { + /* Common completion info */ + struct efa_io_cdesc_common common; + + /* Remote Address Handle FW index, 0xFFFF indicates invalid ah */ + u16 ah; + + u16 src_qp_num; + + /* Immediate data */ + u32 imm; +}; + +/* Extended Rx Completion Descriptor */ +struct efa_io_rx_cdesc_ex { + /* Base RX completion info */ + struct efa_io_rx_cdesc rx_cdesc_base; + + /* + * Valid only in case of unknown AH (0xFFFF) and CQ set_src_addr is + * enabled. + */ + u8 src_addr[16]; +}; + +/* tx_meta_desc */ +#define EFA_IO_TX_META_DESC_OP_TYPE_MASK GENMASK(3, 0) +#define EFA_IO_TX_META_DESC_HAS_IMM_MASK BIT(4) +#define EFA_IO_TX_META_DESC_INLINE_MSG_MASK BIT(5) +#define EFA_IO_TX_META_DESC_META_EXTENSION_MASK BIT(6) +#define EFA_IO_TX_META_DESC_META_DESC_MASK BIT(7) +#define EFA_IO_TX_META_DESC_PHASE_MASK BIT(0) +#define EFA_IO_TX_META_DESC_FIRST_MASK BIT(2) +#define EFA_IO_TX_META_DESC_LAST_MASK BIT(3) +#define EFA_IO_TX_META_DESC_COMP_REQ_MASK BIT(4) + +/* tx_buf_desc */ +#define EFA_IO_TX_BUF_DESC_LKEY_MASK GENMASK(23, 0) + +/* rx_desc */ +#define EFA_IO_RX_DESC_LKEY_MASK GENMASK(23, 0) +#define EFA_IO_RX_DESC_FIRST_MASK BIT(30) +#define EFA_IO_RX_DESC_LAST_MASK BIT(31) + +/* cdesc_common */ +#define EFA_IO_CDESC_COMMON_PHASE_MASK BIT(0) +#define EFA_IO_CDESC_COMMON_Q_TYPE_MASK GENMASK(2, 1) +#define EFA_IO_CDESC_COMMON_HAS_IMM_MASK BIT(3) + +#endif /* _EFA_IO_H_ */ diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index ecfe70eb5efb..31454643f8c5 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* - * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2022 Amazon.com, Inc. or its affiliates. All rights reserved. */ #include @@ -15,6 +15,7 @@ #include #include "efa.h" +#include "efa_io_defs.h" enum { EFA_MMAP_DMA_PAGE = 0, @@ -242,6 +243,7 @@ int efa_query_device(struct ib_device *ibdev, resp.max_rq_wr = dev_attr->max_rq_depth; resp.max_rdma_size = dev_attr->max_rdma_size; + resp.device_caps |= EFA_QUERY_DEVICE_CAPS_CQ_WITH_SGID; if (EFA_DEV_CAP(dev, RDMA_READ)) resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RDMA_READ; @@ -1064,6 +1066,7 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct efa_ibv_create_cq cmd = {}; struct efa_cq *cq = to_ecq(ibcq); int entries = attr->cqe; + bool set_src_addr; int err; ibdev_dbg(ibdev, "create_cq entries %d\n", entries); @@ -1109,7 +1112,10 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, goto err_out; } - if (!cmd.cq_entry_size) { + set_src_addr = !!(cmd.flags & EFA_CREATE_CQ_WITH_SGID); + if ((cmd.cq_entry_size != sizeof(struct efa_io_rx_cdesc_ex)) && + (set_src_addr || + cmd.cq_entry_size != sizeof(struct efa_io_rx_cdesc))) { ibdev_dbg(ibdev, "Invalid entry size [%u]\n", cmd.cq_entry_size); err = -EINVAL; @@ -1138,6 +1144,7 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, params.dma_addr = cq->dma_addr; params.entry_size_in_bytes = cmd.cq_entry_size; params.num_sub_cqs = cmd.num_sub_cqs; + params.set_src_addr = set_src_addr; if (cmd.flags & EFA_CREATE_CQ_WITH_COMPLETION_CHANNEL) { cq->eq = efa_vec2eq(dev, attr->comp_vector); params.eqn = cq->eq->eeq.eqn; diff --git a/include/uapi/rdma/efa-abi.h b/include/uapi/rdma/efa-abi.h index 08035ccf1fff..163ac79556d6 100644 --- a/include/uapi/rdma/efa-abi.h +++ b/include/uapi/rdma/efa-abi.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ /* - * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2022 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef EFA_ABI_USER_H @@ -54,6 +54,7 @@ struct efa_ibv_alloc_pd_resp { enum { EFA_CREATE_CQ_WITH_COMPLETION_CHANNEL = 1 << 0, + EFA_CREATE_CQ_WITH_SGID = 1 << 1, }; struct efa_ibv_create_cq { @@ -118,6 +119,7 @@ enum { EFA_QUERY_DEVICE_CAPS_RDMA_READ = 1 << 0, EFA_QUERY_DEVICE_CAPS_RNR_RETRY = 1 << 1, EFA_QUERY_DEVICE_CAPS_CQ_NOTIFICATIONS = 1 << 2, + EFA_QUERY_DEVICE_CAPS_CQ_WITH_SGID = 1 << 3, }; struct efa_ibv_ex_query_device_resp { -- cgit v1.2.3 From 2c34bb6dea481fa11048e26ffd1ce7400dbc2105 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 18 Aug 2022 23:00:18 +0200 Subject: IB: move from strlcpy with unused retval to strscpy Follow the advice of the below link and prefer 'strscpy' in this subsystem. Conversion is 1:1 because the return value is not used. Generated by a coccinelle script. Link: https://lore.kernel.org/r/CAHk-=wgfRnXz0W3D37d01q3JFkr_i_uTL=V6A6G1oUZcprmknw@mail.gmail.com/ Link: https://lore.kernel.org/r/20220818210018.6841-1-wsa+renesas@sang-engineering.com Signed-off-by: Wolfram Sang Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cma_configfs.c | 2 +- drivers/infiniband/core/device.c | 4 ++-- drivers/infiniband/hw/bnxt_re/main.c | 2 +- drivers/infiniband/hw/hfi1/file_ops.c | 2 +- drivers/infiniband/hw/hfi1/verbs.c | 2 +- drivers/infiniband/hw/mthca/mthca_cmd.c | 2 +- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 2 +- drivers/infiniband/hw/qib/qib_iba7322.c | 2 +- drivers/infiniband/ulp/ipoib/ipoib_ethtool.c | 4 ++-- drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c | 4 ++-- drivers/infiniband/ulp/srpt/ib_srpt.c | 2 +- include/rdma/rdma_vt.h | 2 +- 12 files changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c index de8a2d5d741c..7b68b3ea979f 100644 --- a/drivers/infiniband/core/cma_configfs.c +++ b/drivers/infiniband/core/cma_configfs.c @@ -292,7 +292,7 @@ static struct config_group *make_cma_dev(struct config_group *group, goto fail; } - strlcpy(cma_dev_group->name, name, sizeof(cma_dev_group->name)); + strscpy(cma_dev_group->name, name, sizeof(cma_dev_group->name)); config_group_init_type_name(&cma_dev_group->ports_group, "ports", &cma_ports_group_type); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index d275db195f1a..ae60c73babcc 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -422,7 +422,7 @@ int ib_device_rename(struct ib_device *ibdev, const char *name) return ret; } - strlcpy(ibdev->name, name, IB_DEVICE_NAME_MAX); + strscpy(ibdev->name, name, IB_DEVICE_NAME_MAX); ret = rename_compat_devs(ibdev); downgrade_write(&devices_rwsem); @@ -1217,7 +1217,7 @@ static int assign_name(struct ib_device *device, const char *name) ret = -ENFILE; goto out; } - strlcpy(device->name, dev_name(&device->dev), IB_DEVICE_NAME_MAX); + strscpy(device->name, dev_name(&device->dev), IB_DEVICE_NAME_MAX); ret = xa_alloc_cyclic(&devices, &device->index, device, xa_limit_31b, &last_id, GFP_KERNEL); diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 3d6834d3d4fb..8c0c80a8d338 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -725,7 +725,7 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) /* ib device init */ ibdev->node_type = RDMA_NODE_IB_CA; - strlcpy(ibdev->node_desc, BNXT_RE_DESC " HCA", + strscpy(ibdev->node_desc, BNXT_RE_DESC " HCA", strlen(BNXT_RE_DESC) + 5); ibdev->phys_port_cnt = 1; diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 629beff053ad..f5f9269fdc16 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -965,7 +965,7 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, uctxt->userversion = uinfo->userversion; uctxt->flags = hfi1_cap_mask; /* save current flag state */ init_waitqueue_head(&uctxt->wait); - strlcpy(uctxt->comm, current->comm, sizeof(uctxt->comm)); + strscpy(uctxt->comm, current->comm, sizeof(uctxt->comm)); memcpy(uctxt->uuid, uinfo->uuid, sizeof(uctxt->uuid)); uctxt->jkey = generate_jkey(current_uid()); hfi1_stats.sps_ctxts++; diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 6988f6f21bde..ec4f316a28e1 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1801,7 +1801,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) ib_set_device_ops(ibdev, &hfi1_dev_ops); - strlcpy(ibdev->node_desc, init_utsname()->nodename, + strscpy(ibdev->node_desc, init_utsname()->nodename, sizeof(ibdev->node_desc)); /* diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index bdf5ed38de22..f330ce895d88 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -1252,7 +1252,7 @@ static void get_board_id(void *vsd, char *board_id) if (be16_to_cpup(vsd + VSD_OFFSET_SIG1) == VSD_SIGNATURE_TOPSPIN && be16_to_cpup(vsd + VSD_OFFSET_SIG2) == VSD_SIGNATURE_TOPSPIN) { - strlcpy(board_id, vsd + VSD_OFFSET_TS_BOARD_ID, MTHCA_BOARD_ID_LEN); + strscpy(board_id, vsd + VSD_OFFSET_TS_BOARD_ID, MTHCA_BOARD_ID_LEN); } else { /* * The board ID is a string but the firmware byte diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 265a581133dc..56f06c68f31a 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -1363,7 +1363,7 @@ static int ocrdma_mbx_get_ctrl_attribs(struct ocrdma_dev *dev) dev->hba_port_num = (hba_attribs->ptpnum_maxdoms_hbast_cv & OCRDMA_HBA_ATTRB_PTNUM_MASK) >> OCRDMA_HBA_ATTRB_PTNUM_SHIFT; - strlcpy(dev->model_number, + strscpy(dev->model_number, hba_attribs->controller_model_number, sizeof(dev->model_number)); } diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 6861c6384f18..9d2dd135b784 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -2124,7 +2124,7 @@ static void qib_7322_handle_hwerrors(struct qib_devdata *dd, char *msg, if (hwerrs & HWE_MASK(PowerOnBISTFailed)) { isfatal = 1; - strlcpy(msg, + strscpy(msg, "[Memory BIST test failed, InfiniPath hardware unusable]", msgl); /* ignore from now on, so disable until driver reloaded */ diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c index a09ca21f7dff..8af99b18d361 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c @@ -65,10 +65,10 @@ static void ipoib_get_drvinfo(struct net_device *netdev, ib_get_device_fw_str(priv->ca, drvinfo->fw_version); - strlcpy(drvinfo->bus_info, dev_name(priv->ca->dev.parent), + strscpy(drvinfo->bus_info, dev_name(priv->ca->dev.parent), sizeof(drvinfo->bus_info)); - strlcpy(drvinfo->driver, "ib_ipoib", sizeof(drvinfo->driver)); + strscpy(drvinfo->driver, "ib_ipoib", sizeof(drvinfo->driver)); } static int ipoib_get_coalesce(struct net_device *dev, diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c index 42d557dff19d..29b3d8fce3f5 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c @@ -124,8 +124,8 @@ static struct vnic_stats vnic_gstrings_stats[] = { static void vnic_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) { - strlcpy(drvinfo->driver, opa_vnic_driver_name, sizeof(drvinfo->driver)); - strlcpy(drvinfo->bus_info, dev_name(netdev->dev.parent), + strscpy(drvinfo->driver, opa_vnic_driver_name, sizeof(drvinfo->driver)); + strscpy(drvinfo->bus_info, dev_name(netdev->dev.parent), sizeof(drvinfo->bus_info)); } diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 21cbe30d526f..c1f0566bf6a0 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -2300,7 +2300,7 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev, goto free_recv_ring; } - strlcpy(ch->sess_name, src_addr, sizeof(ch->sess_name)); + strscpy(ch->sess_name, src_addr, sizeof(ch->sess_name)); snprintf(i_port_id, sizeof(i_port_id), "0x%016llx%016llx", be64_to_cpu(*(__be64 *)nexus->i_port_id), be64_to_cpu(*(__be64 *)(nexus->i_port_id + 8))); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 2dafd7dbe893..c429d6ddb129 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -445,7 +445,7 @@ static inline void rvt_set_ibdev_name(struct rvt_dev_info *rdi, * to work by setting the name manually here. */ dev_set_name(&rdi->ibdev.dev, fmt, name, unit); - strlcpy(rdi->ibdev.name, dev_name(&rdi->ibdev.dev), IB_DEVICE_NAME_MAX); + strscpy(rdi->ibdev.name, dev_name(&rdi->ibdev.dev), IB_DEVICE_NAME_MAX); } /** -- cgit v1.2.3 From ca7ef7adad979648da5006152320caa71b746134 Mon Sep 17 00:00:00 2001 From: Daisuke Matsuda Date: Tue, 23 Aug 2022 02:51:31 +0000 Subject: IB/mlx5: Remove duplicate header inclusion related to ODP rdma/ib_umem.h and rdma/ib_verbs.h are included by rdma/ib_umem_odp.h. This patch removes the redundant entries. Link: https://lore.kernel.org/r/20220823025131.862811-1-matsuda-daisuke@fujitsu.com Signed-off-by: Daisuke Matsuda Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/umem_odp.c | 2 -- drivers/infiniband/hw/mlx5/main.c | 3 +-- drivers/infiniband/hw/mlx5/mem.c | 1 - drivers/infiniband/hw/mlx5/mr.c | 2 -- drivers/infiniband/hw/mlx5/odp.c | 1 - 5 files changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index 186ed8859920..c459c4d011cf 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -43,8 +43,6 @@ #include #include -#include -#include #include #include "uverbs.h" diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 7c40efae96a3..e5b5310f6768 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include #include @@ -46,7 +46,6 @@ #include #include #include -#include #define UVERBS_MODULE_NAME mlx5_ib #include diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index 6b29e9ca323e..96ffbbaf0a73 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -30,7 +30,6 @@ * SOFTWARE. */ -#include #include #include "mlx5_ib.h" #include diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 129d531bd01b..bfec9bc3cdd8 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -39,9 +39,7 @@ #include #include #include -#include #include -#include #include "dm.h" #include "mlx5_ib.h" #include "umr.h" diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 901a8b030236..bc97958818bb 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -30,7 +30,6 @@ * SOFTWARE. */ -#include #include #include #include -- cgit v1.2.3 From 40b4b79c866ffc1414a3989cc480263e76f28589 Mon Sep 17 00:00:00 2001 From: Wenpeng Liang Date: Mon, 22 Aug 2022 18:44:49 +0800 Subject: RDMA/hns: Remove redundant DFX file and DFX ops structure There is no need to use a dedicated DXF file and DFX structure to manage the interface of the query queue context. Link: https://lore.kernel.org/r/20220822104455.2311053-2-liangwenpeng@huawei.com Signed-off-by: Wenpeng Liang Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hns/Makefile | 2 +- drivers/infiniband/hw/hns/hns_roce_device.h | 10 ++------ drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 35 ++++++++++++++++++++++---- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 3 --- drivers/infiniband/hw/hns/hns_roce_hw_v2_dfx.c | 34 ------------------------- drivers/infiniband/hw/hns/hns_roce_main.c | 6 ++++- drivers/infiniband/hw/hns/hns_roce_restrack.c | 35 +++++++++----------------- 7 files changed, 50 insertions(+), 75 deletions(-) delete mode 100644 drivers/infiniband/hw/hns/hns_roce_hw_v2_dfx.c diff --git a/drivers/infiniband/hw/hns/Makefile b/drivers/infiniband/hw/hns/Makefile index 9f04f25d9631..a7d259238305 100644 --- a/drivers/infiniband/hw/hns/Makefile +++ b/drivers/infiniband/hw/hns/Makefile @@ -10,6 +10,6 @@ hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \ hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o hns_roce_restrack.o ifdef CONFIG_INFINIBAND_HNS_HIP08 -hns-roce-hw-v2-objs := hns_roce_hw_v2.o hns_roce_hw_v2_dfx.o $(hns-roce-objs) +hns-roce-hw-v2-objs := hns_roce_hw_v2.o $(hns-roce-objs) obj-$(CONFIG_INFINIBAND_HNS) += hns-roce-hw-v2.o endif diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index f848eedc6a23..103d50564b89 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -849,11 +849,6 @@ struct hns_roce_caps { enum cong_type cong_type; }; -struct hns_roce_dfx_hw { - int (*query_cqc_info)(struct hns_roce_dev *hr_dev, u32 cqn, - int *buffer); -}; - enum hns_roce_device_state { HNS_ROCE_DEVICE_STATE_INITED, HNS_ROCE_DEVICE_STATE_RST_DOWN, @@ -899,6 +894,7 @@ struct hns_roce_hw { int (*init_eq)(struct hns_roce_dev *hr_dev); void (*cleanup_eq)(struct hns_roce_dev *hr_dev); int (*write_srqc)(struct hns_roce_srq *srq, void *mb_buf); + int (*query_cqc)(struct hns_roce_dev *hr_dev, u32 cqn, void *buffer); const struct ib_device_ops *hns_roce_dev_ops; const struct ib_device_ops *hns_roce_dev_srq_ops; }; @@ -960,7 +956,6 @@ struct hns_roce_dev { void *priv; struct workqueue_struct *irq_workq; struct work_struct ecc_work; - const struct hns_roce_dfx_hw *dfx; u32 func_num; u32 is_vf; u32 cong_algo_tmpl_id; @@ -1228,8 +1223,7 @@ u8 hns_get_gid_index(struct hns_roce_dev *hr_dev, u32 port, int gid_index); void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev); int hns_roce_init(struct hns_roce_dev *hr_dev); void hns_roce_exit(struct hns_roce_dev *hr_dev); -int hns_roce_fill_res_cq_entry(struct sk_buff *msg, - struct ib_cq *ib_cq); +int hns_roce_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ib_cq); struct hns_user_mmap_entry * hns_roce_user_mmap_entry_insert(struct ib_ucontext *ucontext, u64 address, size_t length, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index cbdafaac678a..979cd57a72fb 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -5774,6 +5774,35 @@ static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) return ret; } +static int hns_roce_v2_query_cqc(struct hns_roce_dev *hr_dev, u32 cqn, + void *buffer) +{ + struct hns_roce_v2_cq_context *context; + struct hns_roce_cmd_mailbox *mailbox; + int ret; + + mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + context = mailbox->buf; + ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, + HNS_ROCE_CMD_QUERY_CQC, cqn); + if (ret) { + ibdev_err(&hr_dev->ib_dev, + "failed to process cmd when querying CQ, ret = %d.\n", + ret); + goto err_mailbox; + } + + memcpy(buffer, context, sizeof(*context)); + +err_mailbox: + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + + return ret; +} + static void hns_roce_irq_work_handle(struct work_struct *work) { struct hns_roce_work *irq_work = @@ -6575,10 +6604,6 @@ static void hns_roce_v2_cleanup_eq_table(struct hns_roce_dev *hr_dev) kfree(eq_table->eq); } -static const struct hns_roce_dfx_hw hns_roce_dfx_hw_v2 = { - .query_cqc_info = hns_roce_v2_query_cqc_info, -}; - static const struct ib_device_ops hns_roce_v2_dev_ops = { .destroy_qp = hns_roce_v2_destroy_qp, .modify_cq = hns_roce_v2_modify_cq, @@ -6619,6 +6644,7 @@ static const struct hns_roce_hw hns_roce_hw_v2 = { .init_eq = hns_roce_v2_init_eq_table, .cleanup_eq = hns_roce_v2_cleanup_eq_table, .write_srqc = hns_roce_v2_write_srqc, + .query_cqc = hns_roce_v2_query_cqc, .hns_roce_dev_ops = &hns_roce_v2_dev_ops, .hns_roce_dev_srq_ops = &hns_roce_v2_dev_srq_ops, }; @@ -6650,7 +6676,6 @@ static void hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev, hr_dev->is_vf = id->driver_data; hr_dev->dev = &handle->pdev->dev; hr_dev->hw = &hns_roce_hw_v2; - hr_dev->dfx = &hns_roce_dfx_hw_v2; hr_dev->sdb_offset = ROCEE_DB_SQ_L_0_REG; hr_dev->odb_offset = hr_dev->sdb_offset; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index f96debac30fe..49ec29973ed7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -1462,9 +1462,6 @@ struct hns_roce_sccc_clr_done { __le32 rsv[5]; }; -int hns_roce_v2_query_cqc_info(struct hns_roce_dev *hr_dev, u32 cqn, - int *buffer); - static inline void hns_roce_write64(struct hns_roce_dev *hr_dev, __le32 val[2], void __iomem *dest) { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2_dfx.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2_dfx.c deleted file mode 100644 index f7a75a7cda74..000000000000 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2_dfx.c +++ /dev/null @@ -1,34 +0,0 @@ -// SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) -// Copyright (c) 2019 Hisilicon Limited. - -#include "hnae3.h" -#include "hns_roce_device.h" -#include "hns_roce_cmd.h" -#include "hns_roce_hw_v2.h" - -int hns_roce_v2_query_cqc_info(struct hns_roce_dev *hr_dev, u32 cqn, - int *buffer) -{ - struct hns_roce_v2_cq_context *cq_context; - struct hns_roce_cmd_mailbox *mailbox; - int ret; - - mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); - if (IS_ERR(mailbox)) - return PTR_ERR(mailbox); - - cq_context = mailbox->buf; - ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, HNS_ROCE_CMD_QUERY_CQC, - cqn); - if (ret) { - dev_err(hr_dev->dev, "QUERY cqc cmd process error\n"); - goto err_mailbox; - } - - memcpy(buffer, cq_context, sizeof(*cq_context)); - -err_mailbox: - hns_roce_free_cmd_mailbox(hr_dev, mailbox); - - return ret; -} diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index c8af4ebd7cbd..caf73e8f4bbe 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -515,7 +515,6 @@ static const struct ib_device_ops hns_roce_dev_ops = { .destroy_ah = hns_roce_destroy_ah, .destroy_cq = hns_roce_destroy_cq, .disassociate_ucontext = hns_roce_disassociate_ucontext, - .fill_res_cq_entry = hns_roce_fill_res_cq_entry, .get_dma_mr = hns_roce_get_dma_mr, .get_link_layer = hns_roce_get_link_layer, .get_port_immutable = hns_roce_port_immutable, @@ -566,6 +565,10 @@ static const struct ib_device_ops hns_roce_dev_xrcd_ops = { INIT_RDMA_OBJ_SIZE(ib_xrcd, hns_roce_xrcd, ibxrcd), }; +static const struct ib_device_ops hns_roce_dev_restrack_ops = { + .fill_res_cq_entry = hns_roce_fill_res_cq_entry, +}; + static int hns_roce_register_device(struct hns_roce_dev *hr_dev) { int ret; @@ -605,6 +608,7 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_ops); ib_set_device_ops(ib_dev, &hns_roce_dev_ops); + ib_set_device_ops(ib_dev, &hns_roce_dev_restrack_ops); for (i = 0; i < hr_dev->caps.num_ports; i++) { if (!hr_dev->iboe.netdevs[i]) continue; diff --git a/drivers/infiniband/hw/hns/hns_roce_restrack.c b/drivers/infiniband/hw/hns/hns_roce_restrack.c index 24a154d64630..83417be15d3f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_restrack.c +++ b/drivers/infiniband/hw/hns/hns_roce_restrack.c @@ -55,45 +55,34 @@ err: return -EMSGSIZE; } -int hns_roce_fill_res_cq_entry(struct sk_buff *msg, - struct ib_cq *ib_cq) +int hns_roce_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ib_cq) { struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device); struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq); - struct hns_roce_v2_cq_context *context; + struct hns_roce_v2_cq_context context; struct nlattr *table_attr; int ret; - if (!hr_dev->dfx->query_cqc_info) + if (!hr_dev->hw->query_cqc) return -EINVAL; - context = kzalloc(sizeof(struct hns_roce_v2_cq_context), GFP_KERNEL); - if (!context) - return -ENOMEM; - - ret = hr_dev->dfx->query_cqc_info(hr_dev, hr_cq->cqn, (int *)context); + ret = hr_dev->hw->query_cqc(hr_dev, hr_cq->cqn, &context); if (ret) - goto err; + return -EINVAL; table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER); - if (!table_attr) { - ret = -EMSGSIZE; - goto err; - } + if (!table_attr) + return -EMSGSIZE; - if (hns_roce_fill_cq(msg, context)) { - ret = -EMSGSIZE; - goto err_cancel_table; - } + if (hns_roce_fill_cq(msg, &context)) + goto err; nla_nest_end(msg, table_attr); - kfree(context); return 0; -err_cancel_table: - nla_nest_cancel(msg, table_attr); err: - kfree(context); - return ret; + nla_nest_cancel(msg, table_attr); + + return -EMSGSIZE; } -- cgit v1.2.3 From eb00b9a08b9dbb0aad7c59d113f35206c7ac2eac Mon Sep 17 00:00:00 2001 From: Wenpeng Liang Date: Mon, 22 Aug 2022 18:44:50 +0800 Subject: RDMA/hns: Add or remove CQ's restrack attributes Remove the resttrack attributes from the queue context held by ROCEE, and add the resttrack attributes from the queue information maintained by the driver. For example: $ rdma res show cq dev hns_0 cqn 14 -dd -jp [ { "ifindex": 4, "ifname": "hns_0", "cqn": 14, "cqe": 127, "users": 1, "adaptive-moderation": false, "ctxn": 8, "pid": 1524, "comm": "ib_send_bw" }, "drv_cq_depth": 128, "drv_cons_index": 0, "drv_cqe_size": 32, "drv_arm_sn": 1 } Link: https://lore.kernel.org/r/20220822104455.2311053-3-liangwenpeng@huawei.com Signed-off-by: Wenpeng Liang Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hns/hns_roce_restrack.c | 67 ++++----------------------- 1 file changed, 10 insertions(+), 57 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_restrack.c b/drivers/infiniband/hw/hns/hns_roce_restrack.c index 83417be15d3f..2e8299784bc2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_restrack.c +++ b/drivers/infiniband/hw/hns/hns_roce_restrack.c @@ -9,72 +9,25 @@ #include "hns_roce_device.h" #include "hns_roce_hw_v2.h" -static int hns_roce_fill_cq(struct sk_buff *msg, - struct hns_roce_v2_cq_context *context) -{ - if (rdma_nl_put_driver_u32(msg, "state", - hr_reg_read(context, CQC_ARM_ST))) - - goto err; - - if (rdma_nl_put_driver_u32(msg, "ceqn", - hr_reg_read(context, CQC_CEQN))) - goto err; - - if (rdma_nl_put_driver_u32(msg, "cqn", - hr_reg_read(context, CQC_CQN))) - goto err; - - if (rdma_nl_put_driver_u32(msg, "hopnum", - hr_reg_read(context, CQC_CQE_HOP_NUM))) - goto err; - - if (rdma_nl_put_driver_u32(msg, "pi", - hr_reg_read(context, CQC_CQ_PRODUCER_IDX))) - goto err; - - if (rdma_nl_put_driver_u32(msg, "ci", - hr_reg_read(context, CQC_CQ_CONSUMER_IDX))) - goto err; - - if (rdma_nl_put_driver_u32(msg, "coalesce", - hr_reg_read(context, CQC_CQ_MAX_CNT))) - goto err; - - if (rdma_nl_put_driver_u32(msg, "period", - hr_reg_read(context, CQC_CQ_PERIOD))) - goto err; - - if (rdma_nl_put_driver_u32(msg, "cnt", - hr_reg_read(context, CQC_CQE_CNT))) - goto err; - - return 0; - -err: - return -EMSGSIZE; -} - int hns_roce_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ib_cq) { - struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device); struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq); - struct hns_roce_v2_cq_context context; struct nlattr *table_attr; - int ret; - - if (!hr_dev->hw->query_cqc) - return -EINVAL; - - ret = hr_dev->hw->query_cqc(hr_dev, hr_cq->cqn, &context); - if (ret) - return -EINVAL; table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER); if (!table_attr) return -EMSGSIZE; - if (hns_roce_fill_cq(msg, &context)) + if (rdma_nl_put_driver_u32(msg, "cq_depth", hr_cq->cq_depth)) + goto err; + + if (rdma_nl_put_driver_u32(msg, "cons_index", hr_cq->cons_index)) + goto err; + + if (rdma_nl_put_driver_u32(msg, "cqe_size", hr_cq->cqe_size)) + goto err; + + if (rdma_nl_put_driver_u32(msg, "arm_sn", hr_cq->arm_sn)) goto err; nla_nest_end(msg, table_attr); -- cgit v1.2.3 From f2b070f36d1bb4e4c2290f5bab52cb1f2dc82cf9 Mon Sep 17 00:00:00 2001 From: Wenpeng Liang Date: Mon, 22 Aug 2022 18:44:51 +0800 Subject: RDMA/hns: Support CQ's restrack raw ops for hns driver The CQ raw restrack attributes come from the queue context maintained by the ROCEE. For example: $ rdma res show cq dev hns_0 cqn 14 -dd -jp -r [ { "ifindex": 4, "ifname": "hns_0", "data": [ 1,0,0,0,7,0,0,0,0,0,0,0,0,82,6,0,0,82,6,0,0,82,6,0, 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0, 6,0,0,0,0,0,0,0 ] } ] Link: https://lore.kernel.org/r/20220822104455.2311053-4-liangwenpeng@huawei.com Signed-off-by: Wenpeng Liang Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hns/hns_roce_device.h | 1 + drivers/infiniband/hw/hns/hns_roce_main.c | 1 + drivers/infiniband/hw/hns/hns_roce_restrack.c | 39 +++++++++++++++++++++++++++ 3 files changed, 41 insertions(+) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 103d50564b89..c73adc0d3555 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -1224,6 +1224,7 @@ void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev); int hns_roce_init(struct hns_roce_dev *hr_dev); void hns_roce_exit(struct hns_roce_dev *hr_dev); int hns_roce_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ib_cq); +int hns_roce_fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ib_cq); struct hns_user_mmap_entry * hns_roce_user_mmap_entry_insert(struct ib_ucontext *ucontext, u64 address, size_t length, diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index caf73e8f4bbe..1b66ed45350e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -567,6 +567,7 @@ static const struct ib_device_ops hns_roce_dev_xrcd_ops = { static const struct ib_device_ops hns_roce_dev_restrack_ops = { .fill_res_cq_entry = hns_roce_fill_res_cq_entry, + .fill_res_cq_entry_raw = hns_roce_fill_res_cq_entry_raw, }; static int hns_roce_register_device(struct hns_roce_dev *hr_dev) diff --git a/drivers/infiniband/hw/hns/hns_roce_restrack.c b/drivers/infiniband/hw/hns/hns_roce_restrack.c index 2e8299784bc2..3f9c2f9dfdf6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_restrack.c +++ b/drivers/infiniband/hw/hns/hns_roce_restrack.c @@ -9,6 +9,8 @@ #include "hns_roce_device.h" #include "hns_roce_hw_v2.h" +#define MAX_ENTRY_NUM 256 + int hns_roce_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ib_cq) { struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq); @@ -39,3 +41,40 @@ err: return -EMSGSIZE; } + +int hns_roce_fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ib_cq) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device); + struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq); + struct hns_roce_v2_cq_context context; + u32 data[MAX_ENTRY_NUM] = {}; + int offset = 0; + int ret; + + if (!hr_dev->hw->query_cqc) + return -EINVAL; + + ret = hr_dev->hw->query_cqc(hr_dev, hr_cq->cqn, &context); + if (ret) + return -EINVAL; + + data[offset++] = hr_reg_read(&context, CQC_CQ_ST); + data[offset++] = hr_reg_read(&context, CQC_SHIFT); + data[offset++] = hr_reg_read(&context, CQC_CQE_SIZE); + data[offset++] = hr_reg_read(&context, CQC_CQE_CNT); + data[offset++] = hr_reg_read(&context, CQC_CQ_PRODUCER_IDX); + data[offset++] = hr_reg_read(&context, CQC_CQ_CONSUMER_IDX); + data[offset++] = hr_reg_read(&context, CQC_DB_RECORD_EN); + data[offset++] = hr_reg_read(&context, CQC_ARM_ST); + data[offset++] = hr_reg_read(&context, CQC_CMD_SN); + data[offset++] = hr_reg_read(&context, CQC_CEQN); + data[offset++] = hr_reg_read(&context, CQC_CQ_MAX_CNT); + data[offset++] = hr_reg_read(&context, CQC_CQ_PERIOD); + data[offset++] = hr_reg_read(&context, CQC_CQE_HOP_NUM); + data[offset++] = hr_reg_read(&context, CQC_CQE_BAR_PG_SZ); + data[offset++] = hr_reg_read(&context, CQC_CQE_BUF_PG_SZ); + + ret = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, offset * sizeof(u32), data); + + return ret; +} -- cgit v1.2.3 From e198d65d76e9232afb92fee5c3b361bfa411859d Mon Sep 17 00:00:00 2001 From: Wenpeng Liang Date: Mon, 22 Aug 2022 18:44:52 +0800 Subject: RDMA/hns: Support QP's restrack ops for hns driver The QP restrack attributes come from the queue information maintained by the driver. For example: $ rdma res show qp link hns_0 lqpn 41 -jp -dd [ { "ifindex": 4, "ifname": "hns_0", "port": 1, "lqpn": 41, "rqpn": 40, "type": "RC", "state": "RTR", "rq-psn": 12474738, "sq-psn": 0, "path-mig-state": "ARMED", "pdn": 9, "pid": 1523, "comm": "ib_send_bw" }, "drv_sq_wqe_cnt": 128, "drv_sq_max_gs": 1, "drv_rq_wqe_cnt": 512, "drv_rq_max_gs": 2, "drv_ext_sge_sge_cnt": 0 } Link: https://lore.kernel.org/r/20220822104455.2311053-5-liangwenpeng@huawei.com Signed-off-by: Wenpeng Liang Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hns/hns_roce_device.h | 1 + drivers/infiniband/hw/hns/hns_roce_main.c | 1 + drivers/infiniband/hw/hns/hns_roce_restrack.c | 34 +++++++++++++++++++++++++++ 3 files changed, 36 insertions(+) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index c73adc0d3555..7578c0c6313b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -1225,6 +1225,7 @@ int hns_roce_init(struct hns_roce_dev *hr_dev); void hns_roce_exit(struct hns_roce_dev *hr_dev); int hns_roce_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ib_cq); int hns_roce_fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ib_cq); +int hns_roce_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ib_qp); struct hns_user_mmap_entry * hns_roce_user_mmap_entry_insert(struct ib_ucontext *ucontext, u64 address, size_t length, diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 1b66ed45350e..87442027b808 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -568,6 +568,7 @@ static const struct ib_device_ops hns_roce_dev_xrcd_ops = { static const struct ib_device_ops hns_roce_dev_restrack_ops = { .fill_res_cq_entry = hns_roce_fill_res_cq_entry, .fill_res_cq_entry_raw = hns_roce_fill_res_cq_entry_raw, + .fill_res_qp_entry = hns_roce_fill_res_qp_entry, }; static int hns_roce_register_device(struct hns_roce_dev *hr_dev) diff --git a/drivers/infiniband/hw/hns/hns_roce_restrack.c b/drivers/infiniband/hw/hns/hns_roce_restrack.c index 3f9c2f9dfdf6..e8fef37f810d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_restrack.c +++ b/drivers/infiniband/hw/hns/hns_roce_restrack.c @@ -78,3 +78,37 @@ int hns_roce_fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ib_cq) return ret; } + +int hns_roce_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ib_qp) +{ + struct hns_roce_qp *hr_qp = to_hr_qp(ib_qp); + struct nlattr *table_attr; + + table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER); + if (!table_attr) + return -EMSGSIZE; + + if (rdma_nl_put_driver_u32_hex(msg, "sq_wqe_cnt", hr_qp->sq.wqe_cnt)) + goto err; + + if (rdma_nl_put_driver_u32_hex(msg, "sq_max_gs", hr_qp->sq.max_gs)) + goto err; + + if (rdma_nl_put_driver_u32_hex(msg, "rq_wqe_cnt", hr_qp->rq.wqe_cnt)) + goto err; + + if (rdma_nl_put_driver_u32_hex(msg, "rq_max_gs", hr_qp->rq.max_gs)) + goto err; + + if (rdma_nl_put_driver_u32_hex(msg, "ext_sge_sge_cnt", hr_qp->sge.sge_cnt)) + goto err; + + nla_nest_end(msg, table_attr); + + return 0; + +err: + nla_nest_cancel(msg, table_attr); + + return -EMSGSIZE; +} -- cgit v1.2.3 From 3e89d78b21a88120f6a858391faba97f2878266e Mon Sep 17 00:00:00 2001 From: Wenpeng Liang Date: Mon, 22 Aug 2022 18:44:53 +0800 Subject: RDMA/hns: Support QP's restrack raw ops for hns driver The QP raw restrack attributes come from the queue context maintained by the ROCEE. For example: $ rdma res show qp link hns_0 -jp -dd -r [ { "ifindex": 4, "ifname": "hns_0", "data": [ 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0, 5,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,255,156,0,0,63,156,0,0, 7,0,0,0,1,0,0,0,9,0,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,63,156,0, 0,0,0,0,0 ] } ] Link: https://lore.kernel.org/r/20220822104455.2311053-6-liangwenpeng@huawei.com Signed-off-by: Wenpeng Liang Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hns/hns_roce_device.h | 2 + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 12 +++--- drivers/infiniband/hw/hns/hns_roce_main.c | 1 + drivers/infiniband/hw/hns/hns_roce_restrack.c | 56 +++++++++++++++++++++++++++ 4 files changed, 65 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 7578c0c6313b..e0395870b819 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -895,6 +895,7 @@ struct hns_roce_hw { void (*cleanup_eq)(struct hns_roce_dev *hr_dev); int (*write_srqc)(struct hns_roce_srq *srq, void *mb_buf); int (*query_cqc)(struct hns_roce_dev *hr_dev, u32 cqn, void *buffer); + int (*query_qpc)(struct hns_roce_dev *hr_dev, u32 qpn, void *buffer); const struct ib_device_ops *hns_roce_dev_ops; const struct ib_device_ops *hns_roce_dev_srq_ops; }; @@ -1226,6 +1227,7 @@ void hns_roce_exit(struct hns_roce_dev *hr_dev); int hns_roce_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ib_cq); int hns_roce_fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ib_cq); int hns_roce_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ib_qp); +int hns_roce_fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ib_qp); struct hns_user_mmap_entry * hns_roce_user_mmap_entry_insert(struct ib_ucontext *ucontext, u64 address, size_t length, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 979cd57a72fb..319de9a4d2ef 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -5307,9 +5307,8 @@ static int to_ib_qp_st(enum hns_roce_v2_qp_state state) return (state < ARRAY_SIZE(map)) ? map[state] : -1; } -static int hns_roce_v2_query_qpc(struct hns_roce_dev *hr_dev, - struct hns_roce_qp *hr_qp, - struct hns_roce_v2_qp_context *hr_context) +static int hns_roce_v2_query_qpc(struct hns_roce_dev *hr_dev, u32 qpn, + void *buffer) { struct hns_roce_cmd_mailbox *mailbox; int ret; @@ -5319,11 +5318,11 @@ static int hns_roce_v2_query_qpc(struct hns_roce_dev *hr_dev, return PTR_ERR(mailbox); ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, HNS_ROCE_CMD_QUERY_QPC, - hr_qp->qpn); + qpn); if (ret) goto out; - memcpy(hr_context, mailbox->buf, hr_dev->caps.qpc_sz); + memcpy(buffer, mailbox->buf, hr_dev->caps.qpc_sz); out: hns_roce_free_cmd_mailbox(hr_dev, mailbox); @@ -5353,7 +5352,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, goto done; } - ret = hns_roce_v2_query_qpc(hr_dev, hr_qp, &context); + ret = hns_roce_v2_query_qpc(hr_dev, hr_qp->qpn, &context); if (ret) { ibdev_err(ibdev, "failed to query QPC, ret = %d.\n", ret); ret = -EINVAL; @@ -6645,6 +6644,7 @@ static const struct hns_roce_hw hns_roce_hw_v2 = { .cleanup_eq = hns_roce_v2_cleanup_eq_table, .write_srqc = hns_roce_v2_write_srqc, .query_cqc = hns_roce_v2_query_cqc, + .query_qpc = hns_roce_v2_query_qpc, .hns_roce_dev_ops = &hns_roce_v2_dev_ops, .hns_roce_dev_srq_ops = &hns_roce_v2_dev_srq_ops, }; diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 87442027b808..17bc73c108f2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -569,6 +569,7 @@ static const struct ib_device_ops hns_roce_dev_restrack_ops = { .fill_res_cq_entry = hns_roce_fill_res_cq_entry, .fill_res_cq_entry_raw = hns_roce_fill_res_cq_entry_raw, .fill_res_qp_entry = hns_roce_fill_res_qp_entry, + .fill_res_qp_entry_raw = hns_roce_fill_res_qp_entry_raw, }; static int hns_roce_register_device(struct hns_roce_dev *hr_dev) diff --git a/drivers/infiniband/hw/hns/hns_roce_restrack.c b/drivers/infiniband/hw/hns/hns_roce_restrack.c index e8fef37f810d..9bafc627864b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_restrack.c +++ b/drivers/infiniband/hw/hns/hns_roce_restrack.c @@ -112,3 +112,59 @@ err: return -EMSGSIZE; } + +int hns_roce_fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ib_qp) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ib_qp->device); + struct hns_roce_qp *hr_qp = to_hr_qp(ib_qp); + struct hns_roce_v2_qp_context context; + u32 data[MAX_ENTRY_NUM] = {}; + int offset = 0; + int ret; + + if (!hr_dev->hw->query_qpc) + return -EINVAL; + + ret = hr_dev->hw->query_qpc(hr_dev, hr_qp->qpn, &context); + if (ret) + return -EINVAL; + + data[offset++] = hr_reg_read(&context, QPC_QP_ST); + data[offset++] = hr_reg_read(&context, QPC_ERR_TYPE); + data[offset++] = hr_reg_read(&context, QPC_CHECK_FLG); + data[offset++] = hr_reg_read(&context, QPC_SRQ_EN); + data[offset++] = hr_reg_read(&context, QPC_SRQN); + data[offset++] = hr_reg_read(&context, QPC_QKEY_XRCD); + data[offset++] = hr_reg_read(&context, QPC_TX_CQN); + data[offset++] = hr_reg_read(&context, QPC_RX_CQN); + data[offset++] = hr_reg_read(&context, QPC_SQ_PRODUCER_IDX); + data[offset++] = hr_reg_read(&context, QPC_SQ_CONSUMER_IDX); + data[offset++] = hr_reg_read(&context, QPC_RQ_RECORD_EN); + data[offset++] = hr_reg_read(&context, QPC_RQ_PRODUCER_IDX); + data[offset++] = hr_reg_read(&context, QPC_RQ_CONSUMER_IDX); + data[offset++] = hr_reg_read(&context, QPC_SQ_SHIFT); + data[offset++] = hr_reg_read(&context, QPC_RQWS); + data[offset++] = hr_reg_read(&context, QPC_RQ_SHIFT); + data[offset++] = hr_reg_read(&context, QPC_SGE_SHIFT); + data[offset++] = hr_reg_read(&context, QPC_SQ_HOP_NUM); + data[offset++] = hr_reg_read(&context, QPC_RQ_HOP_NUM); + data[offset++] = hr_reg_read(&context, QPC_SGE_HOP_NUM); + data[offset++] = hr_reg_read(&context, QPC_WQE_SGE_BA_PG_SZ); + data[offset++] = hr_reg_read(&context, QPC_WQE_SGE_BUF_PG_SZ); + data[offset++] = hr_reg_read(&context, QPC_RETRY_NUM_INIT); + data[offset++] = hr_reg_read(&context, QPC_RETRY_CNT); + data[offset++] = hr_reg_read(&context, QPC_SQ_CUR_PSN); + data[offset++] = hr_reg_read(&context, QPC_SQ_MAX_PSN); + data[offset++] = hr_reg_read(&context, QPC_SQ_FLUSH_IDX); + data[offset++] = hr_reg_read(&context, QPC_SQ_MAX_IDX); + data[offset++] = hr_reg_read(&context, QPC_SQ_TX_ERR); + data[offset++] = hr_reg_read(&context, QPC_SQ_RX_ERR); + data[offset++] = hr_reg_read(&context, QPC_RQ_RX_ERR); + data[offset++] = hr_reg_read(&context, QPC_RQ_TX_ERR); + data[offset++] = hr_reg_read(&context, QPC_RQ_CQE_IDX); + data[offset++] = hr_reg_read(&context, QPC_RQ_RTY_TX_ERR); + + ret = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, offset * sizeof(u32), data); + + return ret; +} -- cgit v1.2.3 From dc9981ef17c6ac371d098c574dcc2ad3de68f567 Mon Sep 17 00:00:00 2001 From: Wenpeng Liang Date: Mon, 22 Aug 2022 18:44:54 +0800 Subject: RDMA/hns: Support MR's restrack ops for hns driver The MR restrack attributes come from the queue information maintained by the driver. For example: $ rdma res show mr dev hns_0 mrn 6 -dd -jp [ { "ifindex": 4, "ifname": "hns_0", "mrn": 6, "rkey": "300", "lkey": "300", "mrlen": 131072, "pdn": 8, "pid": 1524, "comm": "ib_send_bw" }, "drv_pbl_hop_num": 2, "drv_ba_pg_shift": 14, "drv_buf_pg_shift": 12 } Link: https://lore.kernel.org/r/20220822104455.2311053-7-liangwenpeng@huawei.com Signed-off-by: Wenpeng Liang Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hns/hns_roce_device.h | 1 + drivers/infiniband/hw/hns/hns_roce_main.c | 1 + drivers/infiniband/hw/hns/hns_roce_restrack.c | 30 +++++++++++++++++++++++++++ 3 files changed, 32 insertions(+) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index e0395870b819..30a67bc70f1a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -1228,6 +1228,7 @@ int hns_roce_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ib_cq); int hns_roce_fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ib_cq); int hns_roce_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ib_qp); int hns_roce_fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ib_qp); +int hns_roce_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ib_mr); struct hns_user_mmap_entry * hns_roce_user_mmap_entry_insert(struct ib_ucontext *ucontext, u64 address, size_t length, diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 17bc73c108f2..ff4386b5c064 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -570,6 +570,7 @@ static const struct ib_device_ops hns_roce_dev_restrack_ops = { .fill_res_cq_entry_raw = hns_roce_fill_res_cq_entry_raw, .fill_res_qp_entry = hns_roce_fill_res_qp_entry, .fill_res_qp_entry_raw = hns_roce_fill_res_qp_entry_raw, + .fill_res_mr_entry = hns_roce_fill_res_mr_entry, }; static int hns_roce_register_device(struct hns_roce_dev *hr_dev) diff --git a/drivers/infiniband/hw/hns/hns_roce_restrack.c b/drivers/infiniband/hw/hns/hns_roce_restrack.c index 9bafc627864b..84f942e19743 100644 --- a/drivers/infiniband/hw/hns/hns_roce_restrack.c +++ b/drivers/infiniband/hw/hns/hns_roce_restrack.c @@ -168,3 +168,33 @@ int hns_roce_fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ib_qp) return ret; } + +int hns_roce_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ib_mr) +{ + struct hns_roce_mr *hr_mr = to_hr_mr(ib_mr); + struct nlattr *table_attr; + + table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER); + if (!table_attr) + return -EMSGSIZE; + + if (rdma_nl_put_driver_u32_hex(msg, "pbl_hop_num", hr_mr->pbl_hop_num)) + goto err; + + if (rdma_nl_put_driver_u32_hex(msg, "ba_pg_shift", + hr_mr->pbl_mtr.hem_cfg.ba_pg_shift)) + goto err; + + if (rdma_nl_put_driver_u32_hex(msg, "buf_pg_shift", + hr_mr->pbl_mtr.hem_cfg.buf_pg_shift)) + goto err; + + nla_nest_end(msg, table_attr); + + return 0; + +err: + nla_nest_cancel(msg, table_attr); + + return -EMSGSIZE; +} -- cgit v1.2.3 From 3d67e7e236adb4965ff9834bb7125686ecf9654a Mon Sep 17 00:00:00 2001 From: Wenpeng Liang Date: Mon, 22 Aug 2022 18:44:55 +0800 Subject: RDMA/hns: Support MR's restrack raw ops for hns driver The MR raw restrack attributes come from the queue context maintained by the ROCEE. For example: $ rdma res show mr dev hns_0 mrn 6 -dd -jp -r [ { "ifindex": 4, "ifname": "hns_0", "data": [ 1,0,0,0,2,0,0,0,0,3,0,0,0,0,2,0,0,0,0,0,32,0,0,0,2,0,0,0, 2,0,0,0,0,0,0,0 ] } ] Link: https://lore.kernel.org/r/20220822104455.2311053-8-liangwenpeng@huawei.com Signed-off-by: Wenpeng Liang Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hns/hns_roce_device.h | 2 ++ drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 30 ++++++++++++++++++++++++++ drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 3 ++- drivers/infiniband/hw/hns/hns_roce_main.c | 1 + drivers/infiniband/hw/hns/hns_roce_restrack.c | 31 +++++++++++++++++++++++++++ 5 files changed, 66 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 30a67bc70f1a..1bcecc5589fa 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -896,6 +896,7 @@ struct hns_roce_hw { int (*write_srqc)(struct hns_roce_srq *srq, void *mb_buf); int (*query_cqc)(struct hns_roce_dev *hr_dev, u32 cqn, void *buffer); int (*query_qpc)(struct hns_roce_dev *hr_dev, u32 qpn, void *buffer); + int (*query_mpt)(struct hns_roce_dev *hr_dev, u32 key, void *buffer); const struct ib_device_ops *hns_roce_dev_ops; const struct ib_device_ops *hns_roce_dev_srq_ops; }; @@ -1229,6 +1230,7 @@ int hns_roce_fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ib_cq); int hns_roce_fill_res_qp_entry(struct sk_buff *msg, struct ib_qp *ib_qp); int hns_roce_fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ib_qp); int hns_roce_fill_res_mr_entry(struct sk_buff *msg, struct ib_mr *ib_mr); +int hns_roce_fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ib_mr); struct hns_user_mmap_entry * hns_roce_user_mmap_entry_insert(struct ib_ucontext *ucontext, u64 address, size_t length, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 319de9a4d2ef..fa78b141dff2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -5802,6 +5802,35 @@ err_mailbox: return ret; } +static int hns_roce_v2_query_mpt(struct hns_roce_dev *hr_dev, u32 key, + void *buffer) +{ + struct hns_roce_v2_mpt_entry *context; + struct hns_roce_cmd_mailbox *mailbox; + int ret; + + mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + context = mailbox->buf; + ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, HNS_ROCE_CMD_QUERY_MPT, + key_to_hw_index(key)); + if (ret) { + ibdev_err(&hr_dev->ib_dev, + "failed to process cmd when querying MPT, ret = %d.\n", + ret); + goto err_mailbox; + } + + memcpy(buffer, context, sizeof(*context)); + +err_mailbox: + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + + return ret; +} + static void hns_roce_irq_work_handle(struct work_struct *work) { struct hns_roce_work *irq_work = @@ -6645,6 +6674,7 @@ static const struct hns_roce_hw hns_roce_hw_v2 = { .write_srqc = hns_roce_v2_write_srqc, .query_cqc = hns_roce_v2_query_cqc, .query_qpc = hns_roce_v2_query_qpc, + .query_mpt = hns_roce_v2_query_mpt, .hns_roce_dev_ops = &hns_roce_v2_dev_ops, .hns_roce_dev_srq_ops = &hns_roce_v2_dev_srq_ops, }; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 49ec29973ed7..ae29780dd63a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -758,7 +758,8 @@ struct hns_roce_v2_mpt_entry { #define MPT_INNER_PA_VLD MPT_FIELD_LOC(71, 71) #define MPT_MW_BIND_QPN MPT_FIELD_LOC(95, 72) #define MPT_BOUND_LKEY MPT_FIELD_LOC(127, 96) -#define MPT_LEN MPT_FIELD_LOC(191, 128) +#define MPT_LEN_L MPT_FIELD_LOC(159, 128) +#define MPT_LEN_H MPT_FIELD_LOC(191, 160) #define MPT_LKEY MPT_FIELD_LOC(223, 192) #define MPT_VA MPT_FIELD_LOC(287, 224) #define MPT_PBL_SIZE MPT_FIELD_LOC(319, 288) diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index ff4386b5c064..9de3a522980a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -571,6 +571,7 @@ static const struct ib_device_ops hns_roce_dev_restrack_ops = { .fill_res_qp_entry = hns_roce_fill_res_qp_entry, .fill_res_qp_entry_raw = hns_roce_fill_res_qp_entry_raw, .fill_res_mr_entry = hns_roce_fill_res_mr_entry, + .fill_res_mr_entry_raw = hns_roce_fill_res_mr_entry_raw, }; static int hns_roce_register_device(struct hns_roce_dev *hr_dev) diff --git a/drivers/infiniband/hw/hns/hns_roce_restrack.c b/drivers/infiniband/hw/hns/hns_roce_restrack.c index 84f942e19743..989a2af2e938 100644 --- a/drivers/infiniband/hw/hns/hns_roce_restrack.c +++ b/drivers/infiniband/hw/hns/hns_roce_restrack.c @@ -198,3 +198,34 @@ err: return -EMSGSIZE; } + +int hns_roce_fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ib_mr) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ib_mr->device); + struct hns_roce_mr *hr_mr = to_hr_mr(ib_mr); + struct hns_roce_v2_mpt_entry context; + u32 data[MAX_ENTRY_NUM] = {}; + int offset = 0; + int ret; + + if (!hr_dev->hw->query_mpt) + return -EINVAL; + + ret = hr_dev->hw->query_mpt(hr_dev, hr_mr->key, &context); + if (ret) + return -EINVAL; + + data[offset++] = hr_reg_read(&context, MPT_ST); + data[offset++] = hr_reg_read(&context, MPT_PD); + data[offset++] = hr_reg_read(&context, MPT_LKEY); + data[offset++] = hr_reg_read(&context, MPT_LEN_L); + data[offset++] = hr_reg_read(&context, MPT_LEN_H); + data[offset++] = hr_reg_read(&context, MPT_PBL_SIZE); + data[offset++] = hr_reg_read(&context, MPT_PBL_HOP_NUM); + data[offset++] = hr_reg_read(&context, MPT_PBL_BA_PG_SZ); + data[offset++] = hr_reg_read(&context, MPT_PBL_BUF_PG_SZ); + + ret = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, offset * sizeof(u32), data); + + return ret; +} -- cgit v1.2.3 From c8e4c23976554fb9dda1658bd1a3914b202815cd Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 25 Aug 2022 14:38:57 -0700 Subject: RDMA/srp: Rework the srp_add_port() error path device_register() always calls device_initialize() so calling device_del() is safe even if device_register() fails. Implement the following advice from the comment block above device_register(): "NOTE: _Never_ directly free @dev after calling this function, even if it returned an error! Always use put_device() to give up the reference initialized in this function instead." Keep the kfree() call in the error path since srp_release_dev() does not free the host. Link: https://lore.kernel.org/r/20220825213900.864587-2-bvanassche@acm.org Signed-off-by: Bart Van Assche Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/srp/ib_srp.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 7720ea270ed8..8fd6a88f7a9c 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -3909,20 +3909,19 @@ static struct srp_host *srp_add_port(struct srp_device *device, u8 port) port); if (device_register(&host->dev)) - goto free_host; + goto put_host; if (device_create_file(&host->dev, &dev_attr_add_target)) - goto err_class; + goto put_host; if (device_create_file(&host->dev, &dev_attr_ibdev)) - goto err_class; + goto put_host; if (device_create_file(&host->dev, &dev_attr_port)) - goto err_class; + goto put_host; return host; -err_class: - device_unregister(&host->dev); - -free_host: +put_host: + device_del(&host->dev); + put_device(&host->dev); kfree(host); return NULL; -- cgit v1.2.3 From 0766fcaa1e06d5b5b04f734b788c1556022a9051 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 25 Aug 2022 14:38:58 -0700 Subject: RDMA/srp: Remove the srp_host.released completion Move the kfree(host) calls into srp_release_dev(). Convert a device_unregister() call into a device_del() and a device_put() call. Remove the host->released completion object. This patch prepares for handling dev_set_name() failure in srp_add_port(). Link: https://lore.kernel.org/r/20220825213900.864587-3-bvanassche@acm.org Signed-off-by: Bart Van Assche Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/srp/ib_srp.c | 14 +++++--------- drivers/infiniband/ulp/srp/ib_srp.h | 1 - 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 8fd6a88f7a9c..1d3a15e63732 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -3178,7 +3178,7 @@ static void srp_release_dev(struct device *dev) struct srp_host *host = container_of(dev, struct srp_host, dev); - complete(&host->released); + kfree(host); } static struct class srp_class = { @@ -3898,7 +3898,6 @@ static struct srp_host *srp_add_port(struct srp_device *device, u8 port) INIT_LIST_HEAD(&host->target_list); spin_lock_init(&host->target_lock); - init_completion(&host->released); mutex_init(&host->add_target_mutex); host->srp_dev = device; host->port = port; @@ -3922,8 +3921,6 @@ static struct srp_host *srp_add_port(struct srp_device *device, u8 port) put_host: device_del(&host->dev); put_device(&host->dev); - kfree(host); - return NULL; } @@ -4029,12 +4026,11 @@ static void srp_remove_one(struct ib_device *device, void *client_data) srp_dev = client_data; list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) { - device_unregister(&host->dev); /* - * Wait for the sysfs entry to go away, so that no new - * target ports can be created. + * Remove the add_target sysfs entry so that no new target ports + * can be created. */ - wait_for_completion(&host->released); + device_del(&host->dev); /* * Remove all target ports. @@ -4052,7 +4048,7 @@ static void srp_remove_one(struct ib_device *device, void *client_data) */ flush_workqueue(srp_remove_wq); - kfree(host); + put_device(&host->dev); } ib_dealloc_pd(srp_dev->pd); diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index 55a575e2cace..493e7fd1913e 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -124,7 +124,6 @@ struct srp_host { struct device dev; struct list_head target_list; spinlock_t target_lock; - struct completion released; struct list_head list; struct mutex add_target_mutex; }; -- cgit v1.2.3 From 351e458f725da8106eba920f3cdecf39a0e31136 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 25 Aug 2022 14:38:59 -0700 Subject: RDMA/srp: Handle dev_set_name() failure Instead of ignoring dev_set_name() failure, handle dev_set_name() failure. Convert a device_register() call into device_initialize() and device_add() calls. Link: https://lore.kernel.org/r/20220825213900.864587-4-bvanassche@acm.org Reported-by: Bo Liu Signed-off-by: Bart Van Assche Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/srp/ib_srp.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 1d3a15e63732..3f31a0eef1ef 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -3902,12 +3902,13 @@ static struct srp_host *srp_add_port(struct srp_device *device, u8 port) host->srp_dev = device; host->port = port; + device_initialize(&host->dev); host->dev.class = &srp_class; host->dev.parent = device->dev->dev.parent; - dev_set_name(&host->dev, "srp-%s-%d", dev_name(&device->dev->dev), - port); - - if (device_register(&host->dev)) + if (dev_set_name(&host->dev, "srp-%s-%d", dev_name(&device->dev->dev), + port)) + goto put_host; + if (device_add(&host->dev)) goto put_host; if (device_create_file(&host->dev, &dev_attr_add_target)) goto put_host; -- cgit v1.2.3 From b8a9c18c2f39bd84b8240b744b666114f7d62054 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 25 Aug 2022 14:39:00 -0700 Subject: RDMA/srp: Use the attribute group mechanism for sysfs attributes Simplify the SRP driver by using the attribute group mechanism instead of calling device_create_file() explicitly. Link: https://lore.kernel.org/r/20220825213900.864587-5-bvanassche@acm.org Signed-off-by: Bart Van Assche Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/srp/ib_srp.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 3f31a0eef1ef..1e777b2043d6 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -3181,8 +3181,13 @@ static void srp_release_dev(struct device *dev) kfree(host); } +static struct attribute *srp_class_attrs[]; + +ATTRIBUTE_GROUPS(srp_class); + static struct class srp_class = { .name = "infiniband_srp", + .dev_groups = srp_class_groups, .dev_release = srp_release_dev }; @@ -3888,6 +3893,13 @@ static ssize_t port_show(struct device *dev, struct device_attribute *attr, static DEVICE_ATTR_RO(port); +static struct attribute *srp_class_attrs[] = { + &dev_attr_add_target.attr, + &dev_attr_ibdev.attr, + &dev_attr_port.attr, + NULL +}; + static struct srp_host *srp_add_port(struct srp_device *device, u8 port) { struct srp_host *host; @@ -3910,12 +3922,6 @@ static struct srp_host *srp_add_port(struct srp_device *device, u8 port) goto put_host; if (device_add(&host->dev)) goto put_host; - if (device_create_file(&host->dev, &dev_attr_add_target)) - goto put_host; - if (device_create_file(&host->dev, &dev_attr_ibdev)) - goto put_host; - if (device_create_file(&host->dev, &dev_attr_port)) - goto put_host; return host; -- cgit v1.2.3 From 05195dcb43504e381bf383e837fc935aac4258cc Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Fri, 26 Aug 2022 22:32:15 +0800 Subject: RDMA/core: Remove 'device' argument from rdma_build_skb() 'device' argument is never used since rdma_build_skb() is introduced, so remove it. Link: https://lore.kernel.org/r/20220826143215.18111-1-linyunsheng@huawei.com Signed-off-by: Yunsheng Lin Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/lag.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/core/lag.c b/drivers/infiniband/core/lag.c index 7063e41eaf26..c77d7d2559a1 100644 --- a/drivers/infiniband/core/lag.c +++ b/drivers/infiniband/core/lag.c @@ -7,8 +7,7 @@ #include #include -static struct sk_buff *rdma_build_skb(struct ib_device *device, - struct net_device *netdev, +static struct sk_buff *rdma_build_skb(struct net_device *netdev, struct rdma_ah_attr *ah_attr, gfp_t flags) { @@ -86,7 +85,7 @@ static struct net_device *rdma_get_xmit_slave_udp(struct ib_device *device, struct net_device *slave; struct sk_buff *skb; - skb = rdma_build_skb(device, master, ah_attr, flags); + skb = rdma_build_skb(master, ah_attr, flags); if (!skb) return ERR_PTR(-ENOMEM); -- cgit v1.2.3 From d4ecb56e86bf3bb2e5ef99e353f892d325b43174 Mon Sep 17 00:00:00 2001 From: Daisuke Matsuda Date: Mon, 29 Aug 2022 10:23:35 +0900 Subject: RDMA/rxe: Remove an unused member from struct rxe_mr Commit 1e75550648da ("Revert "RDMA/rxe: Create duplicate mapping tables for FMRs"") brought back the member 'va' to struct rxe_mr. However, it is actually used by nobody and thus can be removed. Fixes: 1e75550648da ("Revert "RDMA/rxe: Create duplicate mapping tables for FMRs"") Link: https://lore.kernel.org/r/20220829012335.1212697-1-matsuda-daisuke@fujitsu.com Signed-off-by: Daisuke Matsuda Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/rxe/rxe_mr.c | 1 - drivers/infiniband/sw/rxe/rxe_verbs.c | 1 - drivers/infiniband/sw/rxe/rxe_verbs.h | 1 - 3 files changed, 3 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 850b80f5ad8b..814116ec4778 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -180,7 +180,6 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, mr->access = access; mr->length = length; mr->iova = iova; - mr->va = start; mr->offset = ib_umem_offset(umem); mr->state = RXE_MR_STATE_VALID; mr->type = IB_MR_TYPE_USER; diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index e264cf69bf55..9ebe9decad34 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -1007,7 +1007,6 @@ static int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rxe_set_page); - mr->va = ibmr->iova; mr->iova = ibmr->iova; mr->length = ibmr->length; mr->page_shift = ilog2(ibmr->page_size); diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index 96af3e054f4d..a51819d0c345 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -305,7 +305,6 @@ struct rxe_mr { u32 rkey; enum rxe_mr_state state; enum ib_mr_type type; - u64 va; u64 iova; size_t length; u32 offset; -- cgit v1.2.3 From 6edd86a2d20e702f49dfd59786da14c35495c784 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Fri, 26 Aug 2022 16:11:17 +0800 Subject: RDMA/rtrs: Remove 'dir' argument from rnbd_srv_rdma_ev Since process_{read,write} already prints direction info if ctx->ops.rdma_ev fails, no need to pass 'dir'. Link: https://lore.kernel.org/r/20220826081117.21687-1-guoqing.jiang@linux.dev Signed-off-by: Guoqing Jiang Signed-off-by: Leon Romanovsky --- drivers/block/rnbd/rnbd-srv.c | 11 +++++------ drivers/infiniband/ulp/rtrs/rtrs-srv.c | 4 ++-- drivers/infiniband/ulp/rtrs/rtrs.h | 3 +-- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c index 5e08da277ddf..d07ff3ba560c 100644 --- a/drivers/block/rnbd/rnbd-srv.c +++ b/drivers/block/rnbd/rnbd-srv.c @@ -359,10 +359,9 @@ static int process_msg_sess_info(struct rnbd_srv_session *srv_sess, const void *msg, size_t len, void *data, size_t datalen); -static int rnbd_srv_rdma_ev(void *priv, - struct rtrs_srv_op *id, int dir, - void *data, size_t datalen, const void *usr, - size_t usrlen) +static int rnbd_srv_rdma_ev(void *priv, struct rtrs_srv_op *id, + void *data, size_t datalen, + const void *usr, size_t usrlen) { struct rnbd_srv_session *srv_sess = priv; const struct rnbd_msg_hdr *hdr = usr; @@ -388,8 +387,8 @@ static int rnbd_srv_rdma_ev(void *priv, datalen); break; default: - pr_warn("Received unexpected message type %d with dir %d from session %s\n", - type, dir, srv_sess->sessname); + pr_warn("Received unexpected message type %d from session %s\n", + type, srv_sess->sessname); return -EINVAL; } diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index 22e6f991946c..f0cac27dc965 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -1022,7 +1022,7 @@ static void process_read(struct rtrs_srv_con *con, usr_len = le16_to_cpu(msg->usr_len); data_len = off - usr_len; data = page_address(srv->chunks[buf_id]); - ret = ctx->ops.rdma_ev(srv->priv, id, READ, data, data_len, + ret = ctx->ops.rdma_ev(srv->priv, id, data, data_len, data + data_len, usr_len); if (ret) { @@ -1075,7 +1075,7 @@ static void process_write(struct rtrs_srv_con *con, usr_len = le16_to_cpu(req->usr_len); data_len = off - usr_len; data = page_address(srv->chunks[buf_id]); - ret = ctx->ops.rdma_ev(srv->priv, id, WRITE, data, data_len, + ret = ctx->ops.rdma_ev(srv->priv, id, data, data_len, data + data_len, usr_len); if (ret) { rtrs_err_rl(s, diff --git a/drivers/infiniband/ulp/rtrs/rtrs.h b/drivers/infiniband/ulp/rtrs/rtrs.h index 5e57a7ccc7fb..b48b53a7c143 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs.h +++ b/drivers/infiniband/ulp/rtrs/rtrs.h @@ -139,7 +139,6 @@ struct rtrs_srv_ops { * @priv: Private data set by rtrs_srv_set_sess_priv() * @id: internal RTRS operation id - * @dir: READ/WRITE * @data: Pointer to (bidirectional) rdma memory area: * - in case of %RTRS_SRV_RDMA_EV_RECV contains * data sent by the client @@ -151,7 +150,7 @@ struct rtrs_srv_ops { * @usrlen: Size of the user message */ int (*rdma_ev)(void *priv, - struct rtrs_srv_op *id, int dir, + struct rtrs_srv_op *id, void *data, size_t datalen, const void *usr, size_t usrlen); /** -- cgit v1.2.3 From 91a3f14ec953f3224215dc867001b9a201785740 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Fri, 19 Aug 2022 12:08:57 +0300 Subject: IB/cm: Remove the service_mask parameter from ib_cm_listen() Remove the service_mask parameter of ib_cm_listen(), as all callers use 0. Link: https://lore.kernel.org/r/20220819090859.957943-2-markzhang@nvidia.com Signed-off-by: Mark Zhang Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cm.c | 8 ++------ drivers/infiniband/ulp/ipoib/ipoib_cm.c | 4 ++-- drivers/infiniband/ulp/srpt/ib_srpt.c | 2 +- include/rdma/ib_cm.h | 7 +------ 4 files changed, 6 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index b985e0d9bc05..b59f864b3d79 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1185,12 +1185,8 @@ static int cm_init_listen(struct cm_id_private *cm_id_priv, __be64 service_id, * and service ID resolution requests. The service ID should be specified * network-byte order. If set to IB_CM_ASSIGN_SERVICE_ID, the CM will * assign a service ID to the caller. - * @service_mask: Mask applied to service ID used to listen across a - * range of service IDs. If set to 0, the service ID is matched - * exactly. This parameter is ignored if %service_id is set to - * IB_CM_ASSIGN_SERVICE_ID. */ -int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask) +int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id) { struct cm_id_private *cm_id_priv = container_of(cm_id, struct cm_id_private, id); @@ -1203,7 +1199,7 @@ int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask) goto out; } - ret = cm_init_listen(cm_id_priv, service_id, service_mask); + ret = cm_init_listen(cm_id_priv, service_id, 0); if (ret) goto out; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index fd9d7f2c4d64..ebb35b809f26 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -884,8 +884,8 @@ int ipoib_cm_dev_open(struct net_device *dev) goto err_cm; } - ret = ib_cm_listen(priv->cm.id, cpu_to_be64(IPOIB_CM_IETF_ID | priv->qp->qp_num), - 0); + ret = ib_cm_listen(priv->cm.id, + cpu_to_be64(IPOIB_CM_IETF_ID | priv->qp->qp_num)); if (ret) { pr_warn("%s: failed to listen on ID 0x%llx\n", priv->ca->name, IPOIB_CM_IETF_ID | priv->qp->qp_num); diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index c1f0566bf6a0..9450c609bf3b 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -3191,7 +3191,7 @@ static int srpt_add_one(struct ib_device *device) * if this HCA is gone bad and replaced by different HCA */ ret = sdev->cm_id ? - ib_cm_listen(sdev->cm_id, cpu_to_be64(srpt_service_guid), 0) : + ib_cm_listen(sdev->cm_id, cpu_to_be64(srpt_service_guid)) : 0; if (ret < 0) { pr_err("ib_cm_listen() failed: %d (cm_id state = %d)\n", ret, diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h index e23eb357b761..fbf260c1b1df 100644 --- a/include/rdma/ib_cm.h +++ b/include/rdma/ib_cm.h @@ -340,13 +340,8 @@ void ib_destroy_cm_id(struct ib_cm_id *cm_id); * and service ID resolution requests. The service ID should be specified * network-byte order. If set to IB_CM_ASSIGN_SERVICE_ID, the CM will * assign a service ID to the caller. - * @service_mask: Mask applied to service ID used to listen across a - * range of service IDs. If set to 0, the service ID is matched - * exactly. This parameter is ignored if %service_id is set to - * IB_CM_ASSIGN_SERVICE_ID. */ -int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, - __be64 service_mask); +int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id); struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device, ib_cm_handler cm_handler, -- cgit v1.2.3 From a461b746c5768b9b3001045cff2d508346f5f789 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Fri, 19 Aug 2022 12:08:58 +0300 Subject: IB/cm: remove cm_id_priv->id.service_mask and service_mask parameter of cm_init_listen() The service_mask is always ~cpu_to_be64(0), so the result is always a NOP when it is &'d with a service_id. Remove it for simplicity. Link: https://lore.kernel.org/r/20220819090859.957943-3-markzhang@nvidia.com Signed-off-by: Mark Zhang Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cm.c | 28 ++++++++-------------------- include/rdma/ib_cm.h | 1 - 2 files changed, 8 insertions(+), 21 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index b59f864b3d79..84bb10799467 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -617,7 +617,6 @@ static struct cm_id_private *cm_insert_listen(struct cm_id_private *cm_id_priv, struct rb_node *parent = NULL; struct cm_id_private *cur_cm_id_priv; __be64 service_id = cm_id_priv->id.service_id; - __be64 service_mask = cm_id_priv->id.service_mask; unsigned long flags; spin_lock_irqsave(&cm.lock, flags); @@ -625,8 +624,7 @@ static struct cm_id_private *cm_insert_listen(struct cm_id_private *cm_id_priv, parent = *link; cur_cm_id_priv = rb_entry(parent, struct cm_id_private, service_node); - if ((cur_cm_id_priv->id.service_mask & service_id) == - (service_mask & cur_cm_id_priv->id.service_id) && + if ((service_id == cur_cm_id_priv->id.service_id) && (cm_id_priv->id.device == cur_cm_id_priv->id.device)) { /* * Sharing an ib_cm_id with different handlers is not @@ -670,8 +668,7 @@ static struct cm_id_private *cm_find_listen(struct ib_device *device, while (node) { cm_id_priv = rb_entry(node, struct cm_id_private, service_node); - if ((cm_id_priv->id.service_mask & service_id) == - cm_id_priv->id.service_id && + if ((service_id == cm_id_priv->id.service_id) && (cm_id_priv->id.device == device)) { refcount_inc(&cm_id_priv->refcount); return cm_id_priv; @@ -1158,22 +1155,17 @@ void ib_destroy_cm_id(struct ib_cm_id *cm_id) } EXPORT_SYMBOL(ib_destroy_cm_id); -static int cm_init_listen(struct cm_id_private *cm_id_priv, __be64 service_id, - __be64 service_mask) +static int cm_init_listen(struct cm_id_private *cm_id_priv, __be64 service_id) { - service_mask = service_mask ? service_mask : ~cpu_to_be64(0); - service_id &= service_mask; if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID && (service_id != IB_CM_ASSIGN_SERVICE_ID)) return -EINVAL; - if (service_id == IB_CM_ASSIGN_SERVICE_ID) { + if (service_id == IB_CM_ASSIGN_SERVICE_ID) cm_id_priv->id.service_id = cpu_to_be64(cm.listen_service_id++); - cm_id_priv->id.service_mask = ~cpu_to_be64(0); - } else { + else cm_id_priv->id.service_id = service_id; - cm_id_priv->id.service_mask = service_mask; - } + return 0; } @@ -1199,7 +1191,7 @@ int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id) goto out; } - ret = cm_init_listen(cm_id_priv, service_id, 0); + ret = cm_init_listen(cm_id_priv, service_id); if (ret) goto out; @@ -1247,7 +1239,7 @@ struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device, if (IS_ERR(cm_id_priv)) return ERR_CAST(cm_id_priv); - err = cm_init_listen(cm_id_priv, service_id, 0); + err = cm_init_listen(cm_id_priv, service_id); if (err) { ib_destroy_cm_id(&cm_id_priv->id); return ERR_PTR(err); @@ -1518,7 +1510,6 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, } } cm_id->service_id = param->service_id; - cm_id->service_mask = ~cpu_to_be64(0); cm_id_priv->timeout_ms = cm_convert_to_ms( param->primary_path->packet_life_time) * 2 + cm_convert_to_ms( @@ -2075,7 +2066,6 @@ static int cm_req_handler(struct cm_work *work) cpu_to_be32(IBA_GET(CM_REQ_LOCAL_COMM_ID, req_msg)); cm_id_priv->id.service_id = cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg)); - cm_id_priv->id.service_mask = ~cpu_to_be64(0); cm_id_priv->tid = req_msg->hdr.tid; cm_id_priv->timeout_ms = cm_convert_to_ms( IBA_GET(CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT, req_msg)); @@ -3482,7 +3472,6 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, spin_lock_irqsave(&cm_id_priv->lock, flags); cm_move_av_from_path(&cm_id_priv->av, &av); cm_id->service_id = param->service_id; - cm_id->service_mask = ~cpu_to_be64(0); cm_id_priv->timeout_ms = param->timeout_ms; cm_id_priv->max_cm_retries = param->max_cm_retries; if (cm_id->state != IB_CM_IDLE) { @@ -3557,7 +3546,6 @@ static int cm_sidr_req_handler(struct cm_work *work) cpu_to_be32(IBA_GET(CM_SIDR_REQ_REQUESTID, sidr_req_msg)); cm_id_priv->id.service_id = cpu_to_be64(IBA_GET(CM_SIDR_REQ_SERVICEID, sidr_req_msg)); - cm_id_priv->id.service_mask = ~cpu_to_be64(0); cm_id_priv->tid = sidr_req_msg->hdr.tid; wc = work->mad_recv_wc->wc; diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h index fbf260c1b1df..8dae5847020a 100644 --- a/include/rdma/ib_cm.h +++ b/include/rdma/ib_cm.h @@ -294,7 +294,6 @@ struct ib_cm_id { void *context; struct ib_device *device; __be64 service_id; - __be64 service_mask; enum ib_cm_state state; /* internal CM/debug use */ enum ib_cm_lap_state lap_state; /* internal CM/debug use */ __be32 local_id; -- cgit v1.2.3 From 637ff8ea00a20dd731110c9cdbef0e41c050607d Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Fri, 19 Aug 2022 12:08:59 +0300 Subject: IB/cm: Refactor cm_insert_listen() and cm_find_listen() Move the device and service_id match code at the top of cm_insert_listen() and cm_find_listen() into the final else branch. Link: https://lore.kernel.org/r/20220819090859.957943-4-markzhang@nvidia.com Signed-off-by: Mark Zhang Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cm.c | 35 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 84bb10799467..d7410ee2ade7 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -624,8 +624,16 @@ static struct cm_id_private *cm_insert_listen(struct cm_id_private *cm_id_priv, parent = *link; cur_cm_id_priv = rb_entry(parent, struct cm_id_private, service_node); - if ((service_id == cur_cm_id_priv->id.service_id) && - (cm_id_priv->id.device == cur_cm_id_priv->id.device)) { + + if (cm_id_priv->id.device < cur_cm_id_priv->id.device) + link = &(*link)->rb_left; + else if (cm_id_priv->id.device > cur_cm_id_priv->id.device) + link = &(*link)->rb_right; + else if (be64_lt(service_id, cur_cm_id_priv->id.service_id)) + link = &(*link)->rb_left; + else if (be64_gt(service_id, cur_cm_id_priv->id.service_id)) + link = &(*link)->rb_right; + else { /* * Sharing an ib_cm_id with different handlers is not * supported @@ -641,17 +649,6 @@ static struct cm_id_private *cm_insert_listen(struct cm_id_private *cm_id_priv, spin_unlock_irqrestore(&cm.lock, flags); return cur_cm_id_priv; } - - if (cm_id_priv->id.device < cur_cm_id_priv->id.device) - link = &(*link)->rb_left; - else if (cm_id_priv->id.device > cur_cm_id_priv->id.device) - link = &(*link)->rb_right; - else if (be64_lt(service_id, cur_cm_id_priv->id.service_id)) - link = &(*link)->rb_left; - else if (be64_gt(service_id, cur_cm_id_priv->id.service_id)) - link = &(*link)->rb_right; - else - link = &(*link)->rb_right; } cm_id_priv->listen_sharecount++; rb_link_node(&cm_id_priv->service_node, parent, link); @@ -668,11 +665,7 @@ static struct cm_id_private *cm_find_listen(struct ib_device *device, while (node) { cm_id_priv = rb_entry(node, struct cm_id_private, service_node); - if ((service_id == cm_id_priv->id.service_id) && - (cm_id_priv->id.device == device)) { - refcount_inc(&cm_id_priv->refcount); - return cm_id_priv; - } + if (device < cm_id_priv->id.device) node = node->rb_left; else if (device > cm_id_priv->id.device) @@ -681,8 +674,10 @@ static struct cm_id_private *cm_find_listen(struct ib_device *device, node = node->rb_left; else if (be64_gt(service_id, cm_id_priv->id.service_id)) node = node->rb_right; - else - node = node->rb_right; + else { + refcount_inc(&cm_id_priv->refcount); + return cm_id_priv; + } } return NULL; } -- cgit v1.2.3 From bfb3bde95479e7072839564ec90dbf5d00bfb9b1 Mon Sep 17 00:00:00 2001 From: Wenpeng Liang Date: Mon, 29 Aug 2022 18:50:21 +0800 Subject: RDMA/hns: Remove redundant member doorbell_qpn of struct hns_roce_qp The value of doorbell_qpn is always equal to qpn on current hardware versions. So remove it. Link: https://lore.kernel.org/r/20220829105021.1427804-5-liangwenpeng@huawei.com Signed-off-by: Wenpeng Liang Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hns/hns_roce_device.h | 1 - drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 2 +- drivers/infiniband/hw/hns/hns_roce_qp.c | 3 --- 3 files changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 1bcecc5589fa..6fb6080d2506 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -599,7 +599,6 @@ struct hns_roce_qp { struct hns_roce_db rdb; struct hns_roce_db sdb; unsigned long en_flags; - u32 doorbell_qpn; enum ib_sig_type sq_signal_bits; struct hns_roce_wq sq; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index fa78b141dff2..437d5dd4e648 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -637,7 +637,7 @@ static inline void update_sq_db(struct hns_roce_dev *hr_dev, } else { struct hns_roce_v2_db sq_db = {}; - hr_reg_write(&sq_db, DB_TAG, qp->doorbell_qpn); + hr_reg_write(&sq_db, DB_TAG, qp->qpn); hr_reg_write(&sq_db, DB_CMD, HNS_ROCE_V2_SQ_DB); hr_reg_write(&sq_db, DB_PI, qp->sq.head); hr_reg_write(&sq_db, DB_SL, qp->sl); diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 48d3616a6d71..52ba194d7ae3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -218,7 +218,6 @@ static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) if (hr_qp->ibqp.qp_type == IB_QPT_GSI) { num = 1; - hr_qp->doorbell_qpn = 1; } else { mutex_lock(&qp_table->bank_mutex); bankid = get_least_load_bankid_for_qp(qp_table->bank); @@ -234,8 +233,6 @@ static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) qp_table->bank[bankid].inuse++; mutex_unlock(&qp_table->bank_mutex); - - hr_qp->doorbell_qpn = (u32)num; } hr_qp->qpn = num; -- cgit v1.2.3 From a625ca30eff806395175ebad3ac1399014bdb280 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Sun, 21 Aug 2022 21:16:13 -0400 Subject: RDMA/rxe: Fix "kernel NULL pointer dereference" error When rxe_queue_init in the function rxe_qp_init_req fails, both qp->req.task.func and qp->req.task.arg are not initialized. Because of creation of qp fails, the function rxe_create_qp will call rxe_qp_do_cleanup to handle allocated resource. Before calling __rxe_do_task, both qp->req.task.func and qp->req.task.arg should be checked. Fixes: 8700e3e7c485 ("Soft RoCE driver") Link: https://lore.kernel.org/r/20220822011615.805603-2-yanjun.zhu@linux.dev Reported-by: syzbot+ab99dc4c6e961eed8b8e@syzkaller.appspotmail.com Signed-off-by: Zhu Yanjun Reviewed-by: Li Zhijian Reviewed-by: Bob Pearson Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/rxe/rxe_qp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 516bf9b95e48..fda03f9f03ed 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -797,7 +797,9 @@ static void rxe_qp_do_cleanup(struct work_struct *work) rxe_cleanup_task(&qp->comp.task); /* flush out any receive wr's or pending requests */ - __rxe_do_task(&qp->req.task); + if (qp->req.task.func) + __rxe_do_task(&qp->req.task); + if (qp->sq.queue) { __rxe_do_task(&qp->comp.task); __rxe_do_task(&qp->req.task); -- cgit v1.2.3 From 548ce2e66725dcba4e27d1e8ac468d5dd17fd509 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Sun, 21 Aug 2022 21:16:14 -0400 Subject: RDMA/rxe: Fix the error caused by qp->sk When sock_create_kern in the function rxe_qp_init_req fails, qp->sk is set to NULL. Then the function rxe_create_qp will call rxe_qp_do_cleanup to handle allocated resource. Before handling qp->sk, this variable should be checked. Fixes: 8700e3e7c485 ("Soft RoCE driver") Link: https://lore.kernel.org/r/20220822011615.805603-3-yanjun.zhu@linux.dev Signed-off-by: Zhu Yanjun Reviewed-by: Li Zhijian Reviewed-by: Bob Pearson Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/rxe/rxe_qp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index fda03f9f03ed..d776dfda43b1 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -835,8 +835,10 @@ static void rxe_qp_do_cleanup(struct work_struct *work) free_rd_atomic_resources(qp); - kernel_sock_shutdown(qp->sk, SHUT_RDWR); - sock_release(qp->sk); + if (qp->sk) { + kernel_sock_shutdown(qp->sk, SHUT_RDWR); + sock_release(qp->sk); + } } /* called when the last reference to the qp is dropped */ -- cgit v1.2.3 From f07853582d1f6ed282f8d9a0b1209a87dd761f58 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Sun, 21 Aug 2022 21:16:15 -0400 Subject: RDMA/rxe: Remove the unused variable obj The member variable obj in struct rxe_task is not needed. So remove it to save memory. Link: https://lore.kernel.org/r/20220822011615.805603-4-yanjun.zhu@linux.dev Signed-off-by: Zhu Yanjun Reviewed-by: Li Zhijian Reviewed-by: Bob Pearson Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/rxe/rxe_qp.c | 6 +++--- drivers/infiniband/sw/rxe/rxe_task.c | 3 +-- drivers/infiniband/sw/rxe/rxe_task.h | 3 +-- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index d776dfda43b1..1dcbeacb3122 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -242,9 +242,9 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, skb_queue_head_init(&qp->req_pkts); - rxe_init_task(rxe, &qp->req.task, qp, + rxe_init_task(&qp->req.task, qp, rxe_requester, "req"); - rxe_init_task(rxe, &qp->comp.task, qp, + rxe_init_task(&qp->comp.task, qp, rxe_completer, "comp"); qp->qp_timeout_jiffies = 0; /* Can't be set for UD/UC in modify_qp */ @@ -292,7 +292,7 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp, skb_queue_head_init(&qp->resp_pkts); - rxe_init_task(rxe, &qp->resp.task, qp, + rxe_init_task(&qp->resp.task, qp, rxe_responder, "resp"); qp->resp.opcode = OPCODE_NONE; diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c index 2248cf33d776..ec2b7de1c497 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.c +++ b/drivers/infiniband/sw/rxe/rxe_task.c @@ -94,10 +94,9 @@ void rxe_do_task(struct tasklet_struct *t) task->ret = ret; } -int rxe_init_task(void *obj, struct rxe_task *task, +int rxe_init_task(struct rxe_task *task, void *arg, int (*func)(void *), char *name) { - task->obj = obj; task->arg = arg; task->func = func; snprintf(task->name, sizeof(task->name), "%s", name); diff --git a/drivers/infiniband/sw/rxe/rxe_task.h b/drivers/infiniband/sw/rxe/rxe_task.h index 11d183fd3338..7f612a1c68a7 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.h +++ b/drivers/infiniband/sw/rxe/rxe_task.h @@ -19,7 +19,6 @@ enum { * called again. */ struct rxe_task { - void *obj; struct tasklet_struct tasklet; int state; spinlock_t state_lock; /* spinlock for task state */ @@ -35,7 +34,7 @@ struct rxe_task { * arg => parameter to pass to fcn * func => function to call until it returns != 0 */ -int rxe_init_task(void *obj, struct rxe_task *task, +int rxe_init_task(struct rxe_task *task, void *arg, int (*func)(void *), char *name); /* cleanup task */ -- cgit v1.2.3 From 2c02249fcbfc066bd33e2a7375c7006d4cb367f6 Mon Sep 17 00:00:00 2001 From: Daisuke Matsuda Date: Mon, 29 Aug 2022 16:12:18 +0900 Subject: RDMA/rxe: Delete error messages triggered by incoming Read requests An incoming Read request causes multiple Read responses. If a user MR to copy data from is unavailable or responder cannot send a reply, then the error messages can be printed for each response attempt, resulting in message overflow. Link: https://lore.kernel.org/r/20220829071218.1639065-1-matsuda-daisuke@fujitsu.com Signed-off-by: Daisuke Matsuda Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/rxe/rxe_resp.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index b36ec5c4d5e0..7c336db5cb54 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -809,10 +809,8 @@ static enum resp_states read_reply(struct rxe_qp *qp, if (!skb) return RESPST_ERR_RNR; - err = rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt), - payload, RXE_FROM_MR_OBJ); - if (err) - pr_err("Failed copying memory\n"); + rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt), + payload, RXE_FROM_MR_OBJ); if (mr) rxe_put(mr); @@ -823,10 +821,8 @@ static enum resp_states read_reply(struct rxe_qp *qp, } err = rxe_xmit_packet(qp, &ack_pkt, skb); - if (err) { - pr_err("Failed sending RDMA reply.\n"); + if (err) return RESPST_ERR_RNR; - } res->read.va += payload; res->read.resid -= payload; -- cgit v1.2.3 From fc5e1acf6ade49da06c6a74b0c3fa903e0c9503a Mon Sep 17 00:00:00 2001 From: Tom Talpey Date: Wed, 31 Aug 2022 12:30:48 -0400 Subject: RDMA/siw: Add missing Kconfig selections The SoftiWARP Kconfig is missing "select" for CRYPTO and CRYPTO_CRC32C. In addition, it improperly "depends on" LIBCRC32C, this should be a "select", similar to net/sctp and others. As a dependency, SIW fails to appear in generic configurations. Link: https://lore.kernel.org/r/d366bf02-3271-754f-fc68-1a84016d0e19@talpey.com Signed-off-by: Tom Talpey Acked-by: Bernard Metzler Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/siw/Kconfig | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/siw/Kconfig b/drivers/infiniband/sw/siw/Kconfig index 1b5105cbabae..81b70a3eeb87 100644 --- a/drivers/infiniband/sw/siw/Kconfig +++ b/drivers/infiniband/sw/siw/Kconfig @@ -1,7 +1,10 @@ config RDMA_SIW tristate "Software RDMA over TCP/IP (iWARP) driver" - depends on INET && INFINIBAND && LIBCRC32C + depends on INET && INFINIBAND depends on INFINIBAND_VIRT_DMA + select LIBCRC32C + select CRYPTO + select CRYPTO_CRC32C help This driver implements the iWARP RDMA transport over the Linux TCP/IP network stack. It enables a system with a -- cgit v1.2.3 From b021d82e2503e3704672221bfa3028f30e749cc5 Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Mon, 29 Aug 2022 12:04:12 +0300 Subject: IB/mlx5: Support querying eswitch functions from DEVX Query eswitch functions returns information of the external host PF(if it exists). It can be used to check if DEVX is running on ECPF. Reviewed-by: Erez Shitrit Reviewed-by: Saeed Mahameed Signed-off-by: Bodong Wang Link: https://lore.kernel.org/r/4265925178ab3224dc1d3e3784bb312d808edca5.1661763785.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/devx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 2a2a9e9afc9d..adefff89fb39 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -907,6 +907,7 @@ static bool devx_is_whitelist_cmd(void *in) case MLX5_CMD_OP_QUERY_HCA_CAP: case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT: case MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT: + case MLX5_CMD_OP_QUERY_ESW_FUNCTIONS: return true; default: return false; @@ -962,6 +963,7 @@ static bool devx_is_general_cmd(void *in, struct mlx5_ib_dev *dev) case MLX5_CMD_OP_QUERY_CONG_PARAMS: case MLX5_CMD_OP_QUERY_CONG_STATISTICS: case MLX5_CMD_OP_QUERY_LAG: + case MLX5_CMD_OP_QUERY_ESW_FUNCTIONS: return true; default: return false; -- cgit v1.2.3 From e58f889e293e6bd13ae2b48208a4d0d15592bf5a Mon Sep 17 00:00:00 2001 From: ye xingchen Date: Thu, 1 Sep 2022 07:42:09 +0000 Subject: RDMA/hfi1: Remove the unneeded result variable Return the value set_link_state() directly instead of storing it in another redundant variable. Reported-by: Zeal Robot Signed-off-by: ye xingchen Link: https://lore.kernel.org/r/20220901074209.313004-1-ye.xingchen@zte.com.cn Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hfi1/verbs.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index ec4f316a28e1..e6e17984553c 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1447,12 +1447,10 @@ static int shut_down_port(struct rvt_dev_info *rdi, u32 port_num) struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi); struct hfi1_devdata *dd = dd_from_dev(verbs_dev); struct hfi1_pportdata *ppd = &dd->pport[port_num - 1]; - int ret; set_link_down_reason(ppd, OPA_LINKDOWN_REASON_UNKNOWN, 0, OPA_LINKDOWN_REASON_UNKNOWN); - ret = set_link_state(ppd, HLS_DN_DOWNDEF); - return ret; + return set_link_state(ppd, HLS_DN_DOWNDEF); } static int hfi1_get_guid_be(struct rvt_dev_info *rdi, struct rvt_ibport *rvp, -- cgit v1.2.3 From 2aa9e4a2c3db065672fe530fb594a8e31f5672f6 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Fri, 2 Sep 2022 18:19:20 +0800 Subject: RDMA/rtrs: Update comments for MAX_SESS_QUEUE_DEPTH The maximum queue_depth should be 65535 per check_module_params, also update other relevant comments. Signed-off-by: Guoqing Jiang Link: https://lore.kernel.org/r/20220902101922.26273-2-guoqing.jiang@linux.dev Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/rtrs/rtrs-pri.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-pri.h b/drivers/infiniband/ulp/rtrs/rtrs-pri.h index ac0df734eba8..a2420eecaf5a 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-pri.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-pri.h @@ -26,11 +26,10 @@ /* * Max IB immediate data size is 2^28 (MAX_IMM_PAYL_BITS) * and the minimum chunk size is 4096 (2^12). - * So the maximum sess_queue_depth is 65536 (2^16) in theory. - * But mempool_create, create_qp and ib_post_send fail with - * "cannot allocate memory" error if sess_queue_depth is too big. + * So the maximum sess_queue_depth is 65535 (2^16 - 1) in theory + * since queue_depth in rtrs_msg_conn_rsp is defined as le16. * Therefore the pratical max value of sess_queue_depth is - * somewhere between 1 and 65534 and it depends on the system. + * somewhere between 1 and 65535 and it depends on the system. */ #define MAX_SESS_QUEUE_DEPTH 65535 -- cgit v1.2.3 From 57eb9382370e768fc13e9f3bbdca5579f14ffe83 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Fri, 2 Sep 2022 18:19:21 +0800 Subject: RDMA/rtrs-clt: Break the loop once one path is connected No need to iterate all paths after find one connected path. Signed-off-by: Guoqing Jiang Link: https://lore.kernel.org/r/20220902101922.26273-3-guoqing.jiang@linux.dev Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/rtrs/rtrs-clt.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 5219bb10777a..c29eccdb4fd2 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -54,7 +54,10 @@ static inline bool rtrs_clt_is_connected(const struct rtrs_clt_sess *clt) rcu_read_lock(); list_for_each_entry_rcu(clt_path, &clt->paths_list, s.entry) - connected |= READ_ONCE(clt_path->state) == RTRS_CLT_CONNECTED; + if (READ_ONCE(clt_path->state) == RTRS_CLT_CONNECTED) { + connected = true; + break; + } rcu_read_unlock(); return connected; -- cgit v1.2.3 From db77d84cfe3608eac938302f8f7178e44415bcba Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Sat, 3 Sep 2022 12:02:52 +0800 Subject: RDMA/rtrs-clt: Kill xchg_paths Let's call try_cmpxchg directly for the same purpose. Signed-off-by: Guoqing Jiang Link: https://lore.kernel.org/r/20220903040252.29397-1-guoqing.jiang@linux.dev Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/rtrs/rtrs-clt.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index c29eccdb4fd2..d252676c7889 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -2220,17 +2220,6 @@ static void rtrs_clt_stop_and_destroy_conns(struct rtrs_clt_path *clt_path) } } -static inline bool xchg_paths(struct rtrs_clt_path __rcu **rcu_ppcpu_path, - struct rtrs_clt_path *clt_path, - struct rtrs_clt_path *next) -{ - struct rtrs_clt_path **ppcpu_path; - - /* Call cmpxchg() without sparse warnings */ - ppcpu_path = (typeof(ppcpu_path))rcu_ppcpu_path; - return clt_path == cmpxchg(ppcpu_path, clt_path, next); -} - static void rtrs_clt_remove_path_from_arr(struct rtrs_clt_path *clt_path) { struct rtrs_clt_sess *clt = clt_path->clt; @@ -2305,7 +2294,8 @@ static void rtrs_clt_remove_path_from_arr(struct rtrs_clt_path *clt_path) * We race with IO code path, which also changes pointer, * thus we have to be careful not to overwrite it. */ - if (xchg_paths(ppcpu_path, clt_path, next)) + if (try_cmpxchg((struct rtrs_clt_path **)ppcpu_path, &clt_path, + next)) /* * @ppcpu_path was successfully replaced with @next, * that means that someone could also pick up the -- cgit v1.2.3 From e2edba67fcd514f92401e073a624fbdeb37ce0db Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Wed, 7 Sep 2022 02:48:20 +0000 Subject: RDMA/rxe: use %u to print u32 variables struct ib_qp_cap { u32 max_send_wr; u32 max_recv_wr; u32 max_send_sge; u32 max_recv_sge; u32 max_inline_data; ... To avoid getting a negative value from dmesg: [410580.579965] rdma_rxe: invalid send sge = 65535 > 32 [410580.583818] rdma_rxe: invalid send wr = -1 > 1048576 [410582.771323] rdma_rxe: invalid recv sge = 65535 > 32 [410582.775310] rdma_rxe: invalid recv wr = -1 > 1048576 Signed-off-by: Li Zhijian Link: https://lore.kernel.org/r/1662518901-2-1-git-send-email-lizhijian@fujitsu.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/rxe/rxe_qp.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 1dcbeacb3122..ad7f06f4beb0 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -19,33 +19,33 @@ static int rxe_qp_chk_cap(struct rxe_dev *rxe, struct ib_qp_cap *cap, int has_srq) { if (cap->max_send_wr > rxe->attr.max_qp_wr) { - pr_warn("invalid send wr = %d > %d\n", + pr_warn("invalid send wr = %u > %d\n", cap->max_send_wr, rxe->attr.max_qp_wr); goto err1; } if (cap->max_send_sge > rxe->attr.max_send_sge) { - pr_warn("invalid send sge = %d > %d\n", + pr_warn("invalid send sge = %u > %d\n", cap->max_send_sge, rxe->attr.max_send_sge); goto err1; } if (!has_srq) { if (cap->max_recv_wr > rxe->attr.max_qp_wr) { - pr_warn("invalid recv wr = %d > %d\n", + pr_warn("invalid recv wr = %u > %d\n", cap->max_recv_wr, rxe->attr.max_qp_wr); goto err1; } if (cap->max_recv_sge > rxe->attr.max_recv_sge) { - pr_warn("invalid recv sge = %d > %d\n", + pr_warn("invalid recv sge = %u > %d\n", cap->max_recv_sge, rxe->attr.max_recv_sge); goto err1; } } if (cap->max_inline_data > rxe->max_inline_data) { - pr_warn("invalid max inline data = %d > %d\n", + pr_warn("invalid max inline data = %u > %d\n", cap->max_inline_data, rxe->max_inline_data); goto err1; } -- cgit v1.2.3 From 415a04844aff46384c4264ad687f15579fac8f7e Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Wed, 7 Sep 2022 02:48:21 +0000 Subject: RDMA/rxe: convert pr_warn to pr_debug They could be triggered by user APIs with invalid parameters. Signed-off-by: Li Zhijian Link: https://lore.kernel.org/r/1662518901-2-2-git-send-email-lizhijian@fujitsu.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/rxe/rxe_qp.c | 45 +++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index ad7f06f4beb0..a62bab88415c 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -19,34 +19,34 @@ static int rxe_qp_chk_cap(struct rxe_dev *rxe, struct ib_qp_cap *cap, int has_srq) { if (cap->max_send_wr > rxe->attr.max_qp_wr) { - pr_warn("invalid send wr = %u > %d\n", - cap->max_send_wr, rxe->attr.max_qp_wr); + pr_debug("invalid send wr = %u > %d\n", + cap->max_send_wr, rxe->attr.max_qp_wr); goto err1; } if (cap->max_send_sge > rxe->attr.max_send_sge) { - pr_warn("invalid send sge = %u > %d\n", - cap->max_send_sge, rxe->attr.max_send_sge); + pr_debug("invalid send sge = %u > %d\n", + cap->max_send_sge, rxe->attr.max_send_sge); goto err1; } if (!has_srq) { if (cap->max_recv_wr > rxe->attr.max_qp_wr) { - pr_warn("invalid recv wr = %u > %d\n", - cap->max_recv_wr, rxe->attr.max_qp_wr); + pr_debug("invalid recv wr = %u > %d\n", + cap->max_recv_wr, rxe->attr.max_qp_wr); goto err1; } if (cap->max_recv_sge > rxe->attr.max_recv_sge) { - pr_warn("invalid recv sge = %u > %d\n", - cap->max_recv_sge, rxe->attr.max_recv_sge); + pr_debug("invalid recv sge = %u > %d\n", + cap->max_recv_sge, rxe->attr.max_recv_sge); goto err1; } } if (cap->max_inline_data > rxe->max_inline_data) { - pr_warn("invalid max inline data = %u > %d\n", - cap->max_inline_data, rxe->max_inline_data); + pr_debug("invalid max inline data = %u > %d\n", + cap->max_inline_data, rxe->max_inline_data); goto err1; } @@ -73,7 +73,7 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init) } if (!init->recv_cq || !init->send_cq) { - pr_warn("missing cq\n"); + pr_debug("missing cq\n"); goto err1; } @@ -82,14 +82,14 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init) if (init->qp_type == IB_QPT_GSI) { if (!rdma_is_port_valid(&rxe->ib_dev, port_num)) { - pr_warn("invalid port = %d\n", port_num); + pr_debug("invalid port = %d\n", port_num); goto err1; } port = &rxe->port; if (init->qp_type == IB_QPT_GSI && port->qp_gsi_index) { - pr_warn("GSI QP exists for port %d\n", port_num); + pr_debug("GSI QP exists for port %d\n", port_num); goto err1; } } @@ -402,7 +402,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, attr->qp_state : cur_state; if (!ib_modify_qp_is_ok(cur_state, new_state, qp_type(qp), mask)) { - pr_warn("invalid mask or state for qp\n"); + pr_debug("invalid mask or state for qp\n"); goto err1; } @@ -416,7 +416,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, if (mask & IB_QP_PORT) { if (!rdma_is_port_valid(&rxe->ib_dev, attr->port_num)) { - pr_warn("invalid port %d\n", attr->port_num); + pr_debug("invalid port %d\n", attr->port_num); goto err1; } } @@ -431,12 +431,12 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, if (rxe_av_chk_attr(rxe, &attr->alt_ah_attr)) goto err1; if (!rdma_is_port_valid(&rxe->ib_dev, attr->alt_port_num)) { - pr_warn("invalid alt port %d\n", attr->alt_port_num); + pr_debug("invalid alt port %d\n", attr->alt_port_num); goto err1; } if (attr->alt_timeout > 31) { - pr_warn("invalid QP alt timeout %d > 31\n", - attr->alt_timeout); + pr_debug("invalid QP alt timeout %d > 31\n", + attr->alt_timeout); goto err1; } } @@ -457,17 +457,16 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, if (mask & IB_QP_MAX_QP_RD_ATOMIC) { if (attr->max_rd_atomic > rxe->attr.max_qp_rd_atom) { - pr_warn("invalid max_rd_atomic %d > %d\n", - attr->max_rd_atomic, - rxe->attr.max_qp_rd_atom); + pr_debug("invalid max_rd_atomic %d > %d\n", + attr->max_rd_atomic, + rxe->attr.max_qp_rd_atom); goto err1; } } if (mask & IB_QP_TIMEOUT) { if (attr->timeout > 31) { - pr_warn("invalid QP timeout %d > 31\n", - attr->timeout); + pr_debug("invalid QP timeout %d > 31\n", attr->timeout); goto err1; } } -- cgit v1.2.3 From e866025b3b1557f9bf6ab1770f297fe6d90e0417 Mon Sep 17 00:00:00 2001 From: Daisuke Matsuda Date: Thu, 8 Sep 2022 17:30:58 +0900 Subject: RDMA/mlx5: Remove duplicate assignment in umr_rereg_pas() The same value is assigned to 'mr->ibmr.length'. Remove redundant one. Signed-off-by: Daisuke Matsuda Link: https://lore.kernel.org/r/20220908083058.3993700-1-matsuda-daisuke@fujitsu.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mr.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index bfec9bc3cdd8..4fcb653b35bb 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1400,7 +1400,6 @@ static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd, upd_flags |= MLX5_IB_UPD_XLT_ACCESS; } - mr->ibmr.length = new_umem->length; mr->ibmr.iova = iova; mr->ibmr.length = new_umem->length; mr->page_shift = order_base_2(page_size); -- cgit v1.2.3 From 7f51a961f8c6b84752a48e950074a8c4a0808d91 Mon Sep 17 00:00:00 2001 From: Sindhu-Devale Date: Wed, 7 Sep 2022 14:13:23 -0500 Subject: RDMA/irdma: Align AE id codes to correct flush code and event A number of asynchronous event (AE) ids were not aligned to the correct flush_code and event_type. Fix these up so that the correct IBV error and event codes are returned to application. Also, add handling for new AE ids like IRDMA_AE_INVALID_REQUEST to return the correct WC error code. Fixes: 44d9e52977a1 ("RDMA/irdma: Implement device initialization definitions") Signed-off-by: Sindhu-Devale Signed-off-by: Shiraz Saleem Link: https://lore.kernel.org/r/20220907191324.1173-2-shiraz.saleem@intel.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/defs.h | 1 + drivers/infiniband/hw/irdma/hw.c | 51 ++++++++++++++++++++++--------------- drivers/infiniband/hw/irdma/type.h | 1 + drivers/infiniband/hw/irdma/user.h | 1 + drivers/infiniband/hw/irdma/utils.c | 3 +++ drivers/infiniband/hw/irdma/verbs.c | 2 ++ 6 files changed, 38 insertions(+), 21 deletions(-) diff --git a/drivers/infiniband/hw/irdma/defs.h b/drivers/infiniband/hw/irdma/defs.h index e03e03082a5f..c1906cab5c8a 100644 --- a/drivers/infiniband/hw/irdma/defs.h +++ b/drivers/infiniband/hw/irdma/defs.h @@ -314,6 +314,7 @@ enum irdma_cqp_op_type { #define IRDMA_AE_IB_REMOTE_ACCESS_ERROR 0x020d #define IRDMA_AE_IB_REMOTE_OP_ERROR 0x020e #define IRDMA_AE_WQE_LSMM_TOO_LONG 0x0220 +#define IRDMA_AE_INVALID_REQUEST 0x0223 #define IRDMA_AE_DDP_INVALID_MSN_GAP_IN_MSN 0x0301 #define IRDMA_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER 0x0303 #define IRDMA_AE_DDP_UBE_INVALID_DDP_VERSION 0x0304 diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index 4f132c6fb653..ab246447520b 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -138,59 +138,68 @@ static void irdma_set_flush_fields(struct irdma_sc_qp *qp, qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC; switch (info->ae_id) { - case IRDMA_AE_AMP_UNALLOCATED_STAG: case IRDMA_AE_AMP_BOUNDS_VIOLATION: case IRDMA_AE_AMP_INVALID_STAG: - qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR; - fallthrough; + case IRDMA_AE_AMP_RIGHTS_VIOLATION: + case IRDMA_AE_AMP_UNALLOCATED_STAG: case IRDMA_AE_AMP_BAD_PD: - case IRDMA_AE_UDA_XMIT_BAD_PD: + case IRDMA_AE_AMP_BAD_QP: + case IRDMA_AE_AMP_BAD_STAG_KEY: + case IRDMA_AE_AMP_BAD_STAG_INDEX: + case IRDMA_AE_AMP_TO_WRAP: + case IRDMA_AE_PRIV_OPERATION_DENIED: qp->flush_code = FLUSH_PROT_ERR; + qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR; break; - case IRDMA_AE_AMP_BAD_QP: + case IRDMA_AE_UDA_XMIT_BAD_PD: case IRDMA_AE_WQE_UNEXPECTED_OPCODE: qp->flush_code = FLUSH_LOC_QP_OP_ERR; + qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC; + break; + case IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG: + case IRDMA_AE_UDA_XMIT_DGRAM_TOO_SHORT: + case IRDMA_AE_UDA_L4LEN_INVALID: + case IRDMA_AE_DDP_UBE_INVALID_MO: + case IRDMA_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER: + qp->flush_code = FLUSH_LOC_LEN_ERR; + qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC; break; - case IRDMA_AE_AMP_BAD_STAG_KEY: - case IRDMA_AE_AMP_BAD_STAG_INDEX: - case IRDMA_AE_AMP_TO_WRAP: - case IRDMA_AE_AMP_RIGHTS_VIOLATION: case IRDMA_AE_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS: - case IRDMA_AE_PRIV_OPERATION_DENIED: - case IRDMA_AE_IB_INVALID_REQUEST: case IRDMA_AE_IB_REMOTE_ACCESS_ERROR: qp->flush_code = FLUSH_REM_ACCESS_ERR; qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR; break; case IRDMA_AE_LLP_SEGMENT_TOO_SMALL: - case IRDMA_AE_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER: - case IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG: - case IRDMA_AE_UDA_XMIT_DGRAM_TOO_SHORT: - case IRDMA_AE_UDA_L4LEN_INVALID: + case IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR: case IRDMA_AE_ROCE_RSP_LENGTH_ERROR: - qp->flush_code = FLUSH_LOC_LEN_ERR; + case IRDMA_AE_IB_REMOTE_OP_ERROR: + qp->flush_code = FLUSH_REM_OP_ERR; + qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC; break; case IRDMA_AE_LCE_QP_CATASTROPHIC: qp->flush_code = FLUSH_FATAL_ERR; + qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC; break; - case IRDMA_AE_DDP_UBE_INVALID_MO: case IRDMA_AE_IB_RREQ_AND_Q1_FULL: - case IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR: qp->flush_code = FLUSH_GENERAL_ERR; break; case IRDMA_AE_LLP_TOO_MANY_RETRIES: qp->flush_code = FLUSH_RETRY_EXC_ERR; + qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC; break; case IRDMA_AE_AMP_MWBIND_INVALID_RIGHTS: case IRDMA_AE_AMP_MWBIND_BIND_DISABLED: case IRDMA_AE_AMP_MWBIND_INVALID_BOUNDS: qp->flush_code = FLUSH_MW_BIND_ERR; + qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR; break; - case IRDMA_AE_IB_REMOTE_OP_ERROR: - qp->flush_code = FLUSH_REM_OP_ERR; + case IRDMA_AE_IB_INVALID_REQUEST: + qp->flush_code = FLUSH_REM_INV_REQ_ERR; + qp->event_type = IRDMA_QP_EVENT_REQ_ERR; break; default: - qp->flush_code = FLUSH_FATAL_ERR; + qp->flush_code = FLUSH_GENERAL_ERR; + qp->event_type = IRDMA_QP_EVENT_CATASTROPHIC; break; } } diff --git a/drivers/infiniband/hw/irdma/type.h b/drivers/infiniband/hw/irdma/type.h index 9e7b8ecb137a..517d41a1c289 100644 --- a/drivers/infiniband/hw/irdma/type.h +++ b/drivers/infiniband/hw/irdma/type.h @@ -98,6 +98,7 @@ enum irdma_term_mpa_errors { enum irdma_qp_event_type { IRDMA_QP_EVENT_CATASTROPHIC, IRDMA_QP_EVENT_ACCESS_ERR, + IRDMA_QP_EVENT_REQ_ERR, }; enum irdma_hw_stats_index_32b { diff --git a/drivers/infiniband/hw/irdma/user.h b/drivers/infiniband/hw/irdma/user.h index ddd0ebbdd7d5..2ef61923c926 100644 --- a/drivers/infiniband/hw/irdma/user.h +++ b/drivers/infiniband/hw/irdma/user.h @@ -103,6 +103,7 @@ enum irdma_flush_opcode { FLUSH_FATAL_ERR, FLUSH_RETRY_EXC_ERR, FLUSH_MW_BIND_ERR, + FLUSH_REM_INV_REQ_ERR, }; enum irdma_cmpl_status { diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c index fdf4cc88cb91..dac939c51f1d 100644 --- a/drivers/infiniband/hw/irdma/utils.c +++ b/drivers/infiniband/hw/irdma/utils.c @@ -2476,6 +2476,9 @@ void irdma_ib_qp_event(struct irdma_qp *iwqp, enum irdma_qp_event_type event) case IRDMA_QP_EVENT_ACCESS_ERR: ibevent.event = IB_EVENT_QP_ACCESS_ERR; break; + case IRDMA_QP_EVENT_REQ_ERR: + ibevent.event = IB_EVENT_QP_REQ_ERR; + break; } ibevent.device = iwqp->ibqp.device; ibevent.element.qp = &iwqp->ibqp; diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 9b07b8af2997..f3925f11d281 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -3308,6 +3308,8 @@ static enum ib_wc_status irdma_flush_err_to_ib_wc_status(enum irdma_flush_opcode return IB_WC_RETRY_EXC_ERR; case FLUSH_MW_BIND_ERR: return IB_WC_MW_BIND_ERR; + case FLUSH_REM_INV_REQ_ERR: + return IB_WC_REM_INV_REQ_ERR; case FLUSH_FATAL_ERR: default: return IB_WC_FATAL_ERR; -- cgit v1.2.3 From 34acb833cc83bdea912a160ff99b537e62bb4cf3 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Wed, 7 Sep 2022 14:13:24 -0500 Subject: RDMA/irdma: Validate udata inlen and outlen Currently ib_copy_from_udata and ib_copy_to_udata could underfill the request and response buffer if the user-space passes an undersized value for udata->inlen or udata->outlen respectively [1] This could lead to undesirable behavior. Zero initing the buffer only goes as far as preventing using the buffer uninitialized. Validate udata->inlen and udata->outlen passed from user-space to ensure they are at least the required minimum size. [1] https://lore.kernel.org/linux-rdma/MWHPR11MB0029F37D40D9D4A993F8F549E9D79@MWHPR11MB0029.namprd11.prod.outlook.com/ Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Reported-by: Dan Carpenter Signed-off-by: Shiraz Saleem Link: https://lore.kernel.org/r/20220907191324.1173-3-shiraz.saleem@intel.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/verbs.c | 67 +++++++++++++++++++++++++++++++++---- 1 file changed, 60 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index f3925f11d281..ba403cc25aa9 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -296,13 +296,19 @@ static void irdma_alloc_push_page(struct irdma_qp *iwqp) static int irdma_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) { +#define IRDMA_ALLOC_UCTX_MIN_REQ_LEN offsetofend(struct irdma_alloc_ucontext_req, rsvd8) +#define IRDMA_ALLOC_UCTX_MIN_RESP_LEN offsetofend(struct irdma_alloc_ucontext_resp, rsvd) struct ib_device *ibdev = uctx->device; struct irdma_device *iwdev = to_iwdev(ibdev); - struct irdma_alloc_ucontext_req req; + struct irdma_alloc_ucontext_req req = {}; struct irdma_alloc_ucontext_resp uresp = {}; struct irdma_ucontext *ucontext = to_ucontext(uctx); struct irdma_uk_attrs *uk_attrs; + if (udata->inlen < IRDMA_ALLOC_UCTX_MIN_REQ_LEN || + udata->outlen < IRDMA_ALLOC_UCTX_MIN_RESP_LEN) + return -EINVAL; + if (ib_copy_from_udata(&req, udata, min(sizeof(req), udata->inlen))) return -EINVAL; @@ -314,7 +320,7 @@ static int irdma_alloc_ucontext(struct ib_ucontext *uctx, uk_attrs = &iwdev->rf->sc_dev.hw_attrs.uk_attrs; /* GEN_1 legacy support with libi40iw */ - if (udata->outlen < sizeof(uresp)) { + if (udata->outlen == IRDMA_ALLOC_UCTX_MIN_RESP_LEN) { if (uk_attrs->hw_rev != IRDMA_GEN_1) return -EOPNOTSUPP; @@ -386,6 +392,7 @@ static void irdma_dealloc_ucontext(struct ib_ucontext *context) */ static int irdma_alloc_pd(struct ib_pd *pd, struct ib_udata *udata) { +#define IRDMA_ALLOC_PD_MIN_RESP_LEN offsetofend(struct irdma_alloc_pd_resp, rsvd) struct irdma_pd *iwpd = to_iwpd(pd); struct irdma_device *iwdev = to_iwdev(pd->device); struct irdma_sc_dev *dev = &iwdev->rf->sc_dev; @@ -395,6 +402,9 @@ static int irdma_alloc_pd(struct ib_pd *pd, struct ib_udata *udata) u32 pd_id = 0; int err; + if (udata && udata->outlen < IRDMA_ALLOC_PD_MIN_RESP_LEN) + return -EINVAL; + err = irdma_alloc_rsrc(rf, rf->allocated_pds, rf->max_pd, &pd_id, &rf->next_pd); if (err) @@ -811,12 +821,14 @@ static int irdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) { +#define IRDMA_CREATE_QP_MIN_REQ_LEN offsetofend(struct irdma_create_qp_req, user_compl_ctx) +#define IRDMA_CREATE_QP_MIN_RESP_LEN offsetofend(struct irdma_create_qp_resp, rsvd) struct ib_pd *ibpd = ibqp->pd; struct irdma_pd *iwpd = to_iwpd(ibpd); struct irdma_device *iwdev = to_iwdev(ibpd->device); struct irdma_pci_f *rf = iwdev->rf; struct irdma_qp *iwqp = to_iwqp(ibqp); - struct irdma_create_qp_req req; + struct irdma_create_qp_req req = {}; struct irdma_create_qp_resp uresp = {}; u32 qp_num = 0; int err_code; @@ -833,6 +845,10 @@ static int irdma_create_qp(struct ib_qp *ibqp, if (err_code) return err_code; + if (udata && (udata->inlen < IRDMA_CREATE_QP_MIN_REQ_LEN || + udata->outlen < IRDMA_CREATE_QP_MIN_RESP_LEN)) + return -EINVAL; + sq_size = init_attr->cap.max_send_wr; rq_size = init_attr->cap.max_recv_wr; @@ -1117,6 +1133,8 @@ static int irdma_query_pkey(struct ib_device *ibdev, u32 port, u16 index, int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { +#define IRDMA_MODIFY_QP_MIN_REQ_LEN offsetofend(struct irdma_modify_qp_req, rq_flush) +#define IRDMA_MODIFY_QP_MIN_RESP_LEN offsetofend(struct irdma_modify_qp_resp, push_valid) struct irdma_pd *iwpd = to_iwpd(ibqp->pd); struct irdma_qp *iwqp = to_iwqp(ibqp); struct irdma_device *iwdev = iwqp->iwdev; @@ -1135,6 +1153,13 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, roce_info = &iwqp->roce_info; udp_info = &iwqp->udp_info; + if (udata) { + /* udata inlen/outlen can be 0 when supporting legacy libi40iw */ + if ((udata->inlen && udata->inlen < IRDMA_MODIFY_QP_MIN_REQ_LEN) || + (udata->outlen && udata->outlen < IRDMA_MODIFY_QP_MIN_RESP_LEN)) + return -EINVAL; + } + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) return -EOPNOTSUPP; @@ -1371,7 +1396,7 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (iwqp->iwarp_state == IRDMA_QP_STATE_ERROR) { spin_unlock_irqrestore(&iwqp->lock, flags); - if (udata) { + if (udata && udata->inlen) { if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq), udata->inlen))) return -EINVAL; @@ -1423,7 +1448,7 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, } else { iwqp->ibqp_state = attr->qp_state; } - if (udata && dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) { + if (udata && udata->outlen && dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) { struct irdma_ucontext *ucontext; ucontext = rdma_udata_to_drv_context(udata, @@ -1463,6 +1488,8 @@ exit: int irdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { +#define IRDMA_MODIFY_QP_MIN_REQ_LEN offsetofend(struct irdma_modify_qp_req, rq_flush) +#define IRDMA_MODIFY_QP_MIN_RESP_LEN offsetofend(struct irdma_modify_qp_resp, push_valid) struct irdma_qp *iwqp = to_iwqp(ibqp); struct irdma_device *iwdev = iwqp->iwdev; struct irdma_sc_dev *dev = &iwdev->rf->sc_dev; @@ -1477,6 +1504,13 @@ int irdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, int err; unsigned long flags; + if (udata) { + /* udata inlen/outlen can be 0 when supporting legacy libi40iw */ + if ((udata->inlen && udata->inlen < IRDMA_MODIFY_QP_MIN_REQ_LEN) || + (udata->outlen && udata->outlen < IRDMA_MODIFY_QP_MIN_RESP_LEN)) + return -EINVAL; + } + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) return -EOPNOTSUPP; @@ -1562,7 +1596,7 @@ int irdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, case IB_QPS_RESET: if (iwqp->iwarp_state == IRDMA_QP_STATE_ERROR) { spin_unlock_irqrestore(&iwqp->lock, flags); - if (udata) { + if (udata && udata->inlen) { if (ib_copy_from_udata(&ureq, udata, min(sizeof(ureq), udata->inlen))) return -EINVAL; @@ -1659,7 +1693,7 @@ int irdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, } } } - if (attr_mask & IB_QP_STATE && udata && + if (attr_mask & IB_QP_STATE && udata && udata->outlen && dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) { struct irdma_ucontext *ucontext; @@ -1794,6 +1828,7 @@ static int irdma_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) static int irdma_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) { +#define IRDMA_RESIZE_CQ_MIN_REQ_LEN offsetofend(struct irdma_resize_cq_req, user_cq_buffer) struct irdma_cq *iwcq = to_iwcq(ibcq); struct irdma_sc_dev *dev = iwcq->sc_cq.dev; struct irdma_cqp_request *cqp_request; @@ -1816,6 +1851,9 @@ static int irdma_resize_cq(struct ib_cq *ibcq, int entries, IRDMA_FEATURE_CQ_RESIZE)) return -EOPNOTSUPP; + if (udata && udata->inlen < IRDMA_RESIZE_CQ_MIN_REQ_LEN) + return -EINVAL; + if (entries > rf->max_cqe) return -EINVAL; @@ -1948,6 +1986,8 @@ static int irdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata) { +#define IRDMA_CREATE_CQ_MIN_REQ_LEN offsetofend(struct irdma_create_cq_req, user_cq_buf) +#define IRDMA_CREATE_CQ_MIN_RESP_LEN offsetofend(struct irdma_create_cq_resp, cq_size) struct ib_device *ibdev = ibcq->device; struct irdma_device *iwdev = to_iwdev(ibdev); struct irdma_pci_f *rf = iwdev->rf; @@ -1966,6 +2006,11 @@ static int irdma_create_cq(struct ib_cq *ibcq, err_code = cq_validate_flags(attr->flags, dev->hw_attrs.uk_attrs.hw_rev); if (err_code) return err_code; + + if (udata && (udata->inlen < IRDMA_CREATE_CQ_MIN_REQ_LEN || + udata->outlen < IRDMA_CREATE_CQ_MIN_RESP_LEN)) + return -EINVAL; + err_code = irdma_alloc_rsrc(rf, rf->allocated_cqs, rf->max_cq, &cq_num, &rf->next_cq); if (err_code) @@ -2743,6 +2788,7 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, u64 virt, int access, struct ib_udata *udata) { +#define IRDMA_MEM_REG_MIN_REQ_LEN offsetofend(struct irdma_mem_reg_req, sq_pages) struct irdma_device *iwdev = to_iwdev(pd->device); struct irdma_ucontext *ucontext; struct irdma_pble_alloc *palloc; @@ -2760,6 +2806,9 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, if (len > iwdev->rf->sc_dev.hw_attrs.max_mr_size) return ERR_PTR(-EINVAL); + if (udata->inlen < IRDMA_MEM_REG_MIN_REQ_LEN) + return ERR_PTR(-EINVAL); + region = ib_umem_get(pd->device, start, len, access); if (IS_ERR(region)) { @@ -4291,12 +4340,16 @@ static int irdma_create_user_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *attr, struct ib_udata *udata) { +#define IRDMA_CREATE_AH_MIN_RESP_LEN offsetofend(struct irdma_create_ah_resp, rsvd) struct irdma_ah *ah = container_of(ibah, struct irdma_ah, ibah); struct irdma_device *iwdev = to_iwdev(ibah->pd->device); struct irdma_create_ah_resp uresp; struct irdma_ah *parent_ah; int err; + if (udata && udata->outlen < IRDMA_CREATE_AH_MIN_RESP_LEN) + return -EINVAL; + err = irdma_setup_ah(ibah, attr); if (err) return err; -- cgit v1.2.3 From 67d8f59bdcc2a34bcae2becb6e2fdd81ec18990f Mon Sep 17 00:00:00 2001 From: wangjianli Date: Thu, 8 Sep 2022 21:18:24 +0800 Subject: RDMA/hfi1: fix repeated words in comments Delete the redundant word 'to'. Signed-off-by: wangjianli Link: https://lore.kernel.org/r/20220908131824.41106-1-wangjianli@cdjrlc.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hfi1/chip.c | 2 +- drivers/infiniband/hw/hfi1/firmware.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index f1245c94ae26..ebe970f76232 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -8753,7 +8753,7 @@ static int do_8051_command(struct hfi1_devdata *dd, u32 type, u64 in_data, /* * When writing a LCB CSR, out_data contains the full value to - * to be written, while in_data contains the relative LCB + * be written, while in_data contains the relative LCB * address in 7:0. Do the work here, rather than the caller, * of distrubting the write data to where it needs to go: * diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c index aa15a5cc7cf3..1d77514ebbee 100644 --- a/drivers/infiniband/hw/hfi1/firmware.c +++ b/drivers/infiniband/hw/hfi1/firmware.c @@ -1114,7 +1114,7 @@ static void turn_off_spicos(struct hfi1_devdata *dd, int flags) * Reset all of the fabric serdes for this HFI in preparation to take the * link to Polling. * - * To do a reset, we need to write to to the serdes registers. Unfortunately, + * To do a reset, we need to write to the serdes registers. Unfortunately, * the fabric serdes download to the other HFI on the ASIC will have turned * off the firmware validation on this HFI. This means we can't write to the * registers to reset the serdes. Work around this by performing a complete -- cgit v1.2.3 From 7eff36527195cf434dc8f9ddc7bedc0254d0d835 Mon Sep 17 00:00:00 2001 From: wangjianli Date: Thu, 8 Sep 2022 21:20:36 +0800 Subject: RDMA/qib: fix repeated words in comments Delete the redundant word 'to'. Signed-off-by: wangjianli Link: https://lore.kernel.org/r/20220908132036.42355-1-wangjianli@cdjrlc.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/qib/qib_pcie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c index cb2a02d671e2..692b64efad97 100644 --- a/drivers/infiniband/hw/qib/qib_pcie.c +++ b/drivers/infiniband/hw/qib/qib_pcie.c @@ -295,7 +295,7 @@ void qib_free_irq(struct qib_devdata *dd) * Setup pcie interrupt stuff again after a reset. I'd like to just call * pci_enable_msi() again for msi, but when I do that, * the MSI enable bit doesn't get set in the command word, and - * we switch to to a different interrupt vector, which is confusing, + * we switch to a different interrupt vector, which is confusing, * so I instead just do it all inline. Perhaps somehow can tie this * into the PCIe hotplug support at some point */ -- cgit v1.2.3 From 6dbe4a8dead84de474483910b02ec9e6a10fc1a9 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 8 Sep 2022 16:31:39 -0700 Subject: RDMA/srp: Fix srp_abort() Fix the code for converting a SCSI command pointer into an SRP request pointer. Cc: Xiao Yang Fixes: ad215aaea4f9 ("RDMA/srp: Make struct scsi_cmnd and struct srp_request adjacent") Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220908233139.3042628-1-bvanassche@acm.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/srp/ib_srp.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 1e777b2043d6..9d593445d436 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -2788,7 +2788,7 @@ static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun, static int srp_abort(struct scsi_cmnd *scmnd) { struct srp_target_port *target = host_to_target(scmnd->device->host); - struct srp_request *req = (struct srp_request *) scmnd->host_scribble; + struct srp_request *req = scsi_cmd_priv(scmnd); u32 tag; u16 ch_idx; struct srp_rdma_ch *ch; @@ -2796,8 +2796,6 @@ static int srp_abort(struct scsi_cmnd *scmnd) shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n"); - if (!req) - return SUCCESS; tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmnd)); ch_idx = blk_mq_unique_tag_to_hwq(tag); if (WARN_ON_ONCE(ch_idx >= target->ch_count)) -- cgit v1.2.3 From 95f911d94995861311d78c77acb91af1ad6b8cc5 Mon Sep 17 00:00:00 2001 From: Cheng Xu Date: Fri, 9 Sep 2022 17:38:19 +0800 Subject: RDMA/erdma: Eliminate unnecessary casting for erdma_post_cmd_wait erdma_post_cmd_wait does not use the 'u64 *req' input parameter directly. So it is better to define it to 'void *req', and by this we can eliminate the casting when calling erdma_post_cmd_wait. Signed-off-by: Cheng Xu Link: https://lore.kernel.org/r/20220909093822.33868-2-chengyou@linux.alibaba.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/erdma/erdma.h | 2 +- drivers/infiniband/hw/erdma/erdma_cmdq.c | 2 +- drivers/infiniband/hw/erdma/erdma_eq.c | 7 ++----- drivers/infiniband/hw/erdma/erdma_qp.c | 6 ++---- drivers/infiniband/hw/erdma/erdma_verbs.c | 17 ++++++----------- 5 files changed, 12 insertions(+), 22 deletions(-) diff --git a/drivers/infiniband/hw/erdma/erdma.h b/drivers/infiniband/hw/erdma/erdma.h index 2aae635c1c8d..07bcd688fdb7 100644 --- a/drivers/infiniband/hw/erdma/erdma.h +++ b/drivers/infiniband/hw/erdma/erdma.h @@ -269,7 +269,7 @@ void erdma_finish_cmdq_init(struct erdma_dev *dev); void erdma_cmdq_destroy(struct erdma_dev *dev); void erdma_cmdq_build_reqhdr(u64 *hdr, u32 mod, u32 op); -int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, u64 *req, u32 req_size, +int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, void *req, u32 req_size, u64 *resp0, u64 *resp1); void erdma_cmdq_completion_handler(struct erdma_cmdq *cmdq); diff --git a/drivers/infiniband/hw/erdma/erdma_cmdq.c b/drivers/infiniband/hw/erdma/erdma_cmdq.c index 57da0c670472..c8f93dc11449 100644 --- a/drivers/infiniband/hw/erdma/erdma_cmdq.c +++ b/drivers/infiniband/hw/erdma/erdma_cmdq.c @@ -441,7 +441,7 @@ void erdma_cmdq_build_reqhdr(u64 *hdr, u32 mod, u32 op) FIELD_PREP(ERDMA_CMD_HDR_OPCODE_MASK, op); } -int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, u64 *req, u32 req_size, +int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, void *req, u32 req_size, u64 *resp0, u64 *resp1) { struct erdma_comp_wait *comp_wait; diff --git a/drivers/infiniband/hw/erdma/erdma_eq.c b/drivers/infiniband/hw/erdma/erdma_eq.c index 8f2d094e0227..09ddedb5c1b5 100644 --- a/drivers/infiniband/hw/erdma/erdma_eq.c +++ b/drivers/infiniband/hw/erdma/erdma_eq.c @@ -229,9 +229,7 @@ static int create_eq_cmd(struct erdma_dev *dev, u32 eqn, struct erdma_eq *eq) req.db_dma_addr_l = lower_32_bits(db_info_dma_addr); req.db_dma_addr_h = upper_32_bits(db_info_dma_addr); - return erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req, - sizeof(struct erdma_cmdq_create_eq_req), - NULL, NULL); + return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); } static int erdma_ceq_init_one(struct erdma_dev *dev, u16 ceqn) @@ -281,8 +279,7 @@ static void erdma_ceq_uninit_one(struct erdma_dev *dev, u16 ceqn) req.qtype = ERDMA_EQ_TYPE_CEQ; req.vector_idx = ceqn + 1; - err = erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req, sizeof(req), NULL, - NULL); + err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); if (err) return; diff --git a/drivers/infiniband/hw/erdma/erdma_qp.c b/drivers/infiniband/hw/erdma/erdma_qp.c index 72f08171a28a..5d5827fd959f 100644 --- a/drivers/infiniband/hw/erdma/erdma_qp.c +++ b/drivers/infiniband/hw/erdma/erdma_qp.c @@ -105,8 +105,7 @@ static int erdma_modify_qp_state_to_rts(struct erdma_qp *qp, req.send_nxt += MPA_DEFAULT_HDR_LEN + qp->attrs.pd_len; req.recv_nxt = tp->rcv_nxt; - return erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req, sizeof(req), NULL, - NULL); + return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); } static int erdma_modify_qp_state_to_stop(struct erdma_qp *qp, @@ -124,8 +123,7 @@ static int erdma_modify_qp_state_to_stop(struct erdma_qp *qp, req.cfg = FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK, attrs->state) | FIELD_PREP(ERDMA_CMD_MODIFY_QP_QPN_MASK, QP_ID(qp)); - return erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req, sizeof(req), NULL, - NULL); + return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); } int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs, diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c index a7a3d42e2016..32fe418843a6 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.c +++ b/drivers/infiniband/hw/erdma/erdma_verbs.c @@ -102,7 +102,7 @@ static int create_qp_cmd(struct erdma_dev *dev, struct erdma_qp *qp) req.rq_db_info_dma_addr = user_qp->rq_db_info_dma_addr; } - err = erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req, sizeof(req), &resp0, + err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &resp0, &resp1); if (!err) qp->attrs.cookie = @@ -151,8 +151,7 @@ static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr) } post_cmd: - return erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req, sizeof(req), NULL, - NULL); + return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); } static int create_cq_cmd(struct erdma_dev *dev, struct erdma_cq *cq) @@ -202,8 +201,7 @@ static int create_cq_cmd(struct erdma_dev *dev, struct erdma_cq *cq) req.cq_db_info_addr = cq->user_cq.db_info_dma_addr; } - return erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req, sizeof(req), NULL, - NULL); + return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); } static int erdma_alloc_idx(struct erdma_resource_cb *res_cb) @@ -976,8 +974,7 @@ int erdma_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) req.cfg = FIELD_PREP(ERDMA_CMD_MR_MPT_IDX_MASK, ibmr->lkey >> 8) | FIELD_PREP(ERDMA_CMD_MR_KEY_MASK, ibmr->lkey & 0xFF); - ret = erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req, sizeof(req), NULL, - NULL); + ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); if (ret) return ret; @@ -1002,8 +999,7 @@ int erdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) CMDQ_OPCODE_DESTROY_CQ); req.cqn = cq->cqn; - err = erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req, sizeof(req), NULL, - NULL); + err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); if (err) return err; @@ -1040,8 +1036,7 @@ int erdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) CMDQ_OPCODE_DESTROY_QP); req.qpn = QP_ID(qp); - err = erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req, sizeof(req), NULL, - NULL); + err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); if (err) return err; -- cgit v1.2.3 From 93aea72cc53c87de3bae3fe554f9836d8b4a0386 Mon Sep 17 00:00:00 2001 From: Cheng Xu Date: Fri, 9 Sep 2022 17:38:20 +0800 Subject: RDMA/erdma: Remove redundant includes Many of erdma's includes are redundant, because they are already included indirectly by kernel headers or custom headers. So we remove all the unnecessary direct-includes. Besides, add linux/pci.h to erdma.h because it's also used in the file. Signed-off-by: Cheng Xu Link: https://lore.kernel.org/r/20220909093822.33868-3-chengyou@linux.alibaba.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/erdma/erdma.h | 1 + drivers/infiniband/hw/erdma/erdma_cm.c | 8 -------- drivers/infiniband/hw/erdma/erdma_cmdq.c | 6 ------ drivers/infiniband/hw/erdma/erdma_cq.c | 3 --- drivers/infiniband/hw/erdma/erdma_eq.c | 6 ------ drivers/infiniband/hw/erdma/erdma_main.c | 9 --------- drivers/infiniband/hw/erdma/erdma_qp.c | 9 --------- drivers/infiniband/hw/erdma/erdma_verbs.c | 7 ------- drivers/infiniband/hw/erdma/erdma_verbs.h | 8 -------- 9 files changed, 1 insertion(+), 56 deletions(-) diff --git a/drivers/infiniband/hw/erdma/erdma.h b/drivers/infiniband/hw/erdma/erdma.h index 07bcd688fdb7..cc5e4eb3a21e 100644 --- a/drivers/infiniband/hw/erdma/erdma.h +++ b/drivers/infiniband/hw/erdma/erdma.h @@ -9,6 +9,7 @@ #include #include +#include #include #include diff --git a/drivers/infiniband/hw/erdma/erdma_cm.c b/drivers/infiniband/hw/erdma/erdma_cm.c index f13f16479eca..74f6348f240a 100644 --- a/drivers/infiniband/hw/erdma/erdma_cm.c +++ b/drivers/infiniband/hw/erdma/erdma_cm.c @@ -10,15 +10,7 @@ /* Copyright (c) 2008-2019, IBM Corporation */ /* Copyright (c) 2017, Open Grid Computing, Inc. */ -#include -#include -#include -#include #include -#include - -#include -#include #include "erdma.h" #include "erdma_cm.h" diff --git a/drivers/infiniband/hw/erdma/erdma_cmdq.c b/drivers/infiniband/hw/erdma/erdma_cmdq.c index c8f93dc11449..6ebfa6989b11 100644 --- a/drivers/infiniband/hw/erdma/erdma_cmdq.c +++ b/drivers/infiniband/hw/erdma/erdma_cmdq.c @@ -4,13 +4,7 @@ /* Kai Shen */ /* Copyright (c) 2020-2022, Alibaba Group. */ -#include -#include -#include - #include "erdma.h" -#include "erdma_hw.h" -#include "erdma_verbs.h" static void arm_cmdq_cq(struct erdma_cmdq *cmdq) { diff --git a/drivers/infiniband/hw/erdma/erdma_cq.c b/drivers/infiniband/hw/erdma/erdma_cq.c index 751c7f9f0de7..2f7390de35d7 100644 --- a/drivers/infiniband/hw/erdma/erdma_cq.c +++ b/drivers/infiniband/hw/erdma/erdma_cq.c @@ -4,9 +4,6 @@ /* Kai Shen */ /* Copyright (c) 2020-2022, Alibaba Group. */ -#include - -#include "erdma_hw.h" #include "erdma_verbs.h" static void *get_next_valid_cqe(struct erdma_cq *cq) diff --git a/drivers/infiniband/hw/erdma/erdma_eq.c b/drivers/infiniband/hw/erdma/erdma_eq.c index 09ddedb5c1b5..ed54130d924b 100644 --- a/drivers/infiniband/hw/erdma/erdma_eq.c +++ b/drivers/infiniband/hw/erdma/erdma_eq.c @@ -4,12 +4,6 @@ /* Kai Shen */ /* Copyright (c) 2020-2022, Alibaba Group. */ -#include -#include -#include - -#include "erdma.h" -#include "erdma_hw.h" #include "erdma_verbs.h" #define MAX_POLL_CHUNK_SIZE 16 diff --git a/drivers/infiniband/hw/erdma/erdma_main.c b/drivers/infiniband/hw/erdma/erdma_main.c index 07e743d24847..6d3e02ba9e77 100644 --- a/drivers/infiniband/hw/erdma/erdma_main.c +++ b/drivers/infiniband/hw/erdma/erdma_main.c @@ -4,21 +4,12 @@ /* Kai Shen */ /* Copyright (c) 2020-2022, Alibaba Group. */ -#include -#include -#include -#include #include -#include -#include #include #include -#include -#include #include "erdma.h" #include "erdma_cm.h" -#include "erdma_hw.h" #include "erdma_verbs.h" MODULE_AUTHOR("Cheng Xu "); diff --git a/drivers/infiniband/hw/erdma/erdma_qp.c b/drivers/infiniband/hw/erdma/erdma_qp.c index 5d5827fd959f..9f12d683150a 100644 --- a/drivers/infiniband/hw/erdma/erdma_qp.c +++ b/drivers/infiniband/hw/erdma/erdma_qp.c @@ -6,15 +6,6 @@ /* Authors: Bernard Metzler */ /* Copyright (c) 2008-2019, IBM Corporation */ -#include -#include -#include -#include - -#include -#include - -#include "erdma.h" #include "erdma_cm.h" #include "erdma_verbs.h" diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c index 32fe418843a6..c99e296a3e05 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.c +++ b/drivers/infiniband/hw/erdma/erdma_verbs.c @@ -9,21 +9,14 @@ /* Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. */ -#include -#include -#include -#include #include #include #include #include -#include -#include #include #include "erdma.h" #include "erdma_cm.h" -#include "erdma_hw.h" #include "erdma_verbs.h" static int create_qp_cmd(struct erdma_dev *dev, struct erdma_qp *qp) diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.h b/drivers/infiniband/hw/erdma/erdma_verbs.h index c7baddb1f292..fe93e1ac9674 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.h +++ b/drivers/infiniband/hw/erdma/erdma_verbs.h @@ -7,15 +7,7 @@ #ifndef __ERDMA_VERBS_H__ #define __ERDMA_VERBS_H__ -#include - -#include -#include -#include - #include "erdma.h" -#include "erdma_cm.h" -#include "erdma_hw.h" /* RDMA Capability. */ #define ERDMA_MAX_PD (128 * 1024) -- cgit v1.2.3 From 13f42e5166bc73786d21b5fae13ff89e67dcbe8b Mon Sep 17 00:00:00 2001 From: Cheng Xu Date: Fri, 9 Sep 2022 17:38:21 +0800 Subject: RDMA/erdma: Make hardware internal opcodes invisible to driver Some opcodes are used in hardware internally, and driver does not care about them. So, we change them to reserved opcodes in driver. Signed-off-by: Cheng Xu Link: https://lore.kernel.org/r/20220909093822.33868-4-chengyou@linux.alibaba.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/erdma/erdma_cq.c | 1 - drivers/infiniband/hw/erdma/erdma_hw.h | 8 ++++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/erdma/erdma_cq.c b/drivers/infiniband/hw/erdma/erdma_cq.c index 2f7390de35d7..58e0dc5c75d1 100644 --- a/drivers/infiniband/hw/erdma/erdma_cq.c +++ b/drivers/infiniband/hw/erdma/erdma_cq.c @@ -59,7 +59,6 @@ static const enum ib_wc_opcode wc_mapping_table[ERDMA_NUM_OPCODES] = { [ERDMA_OP_RECV_IMM] = IB_WC_RECV_RDMA_WITH_IMM, [ERDMA_OP_RECV_INV] = IB_WC_RECV, [ERDMA_OP_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE, - [ERDMA_OP_INVALIDATE] = IB_WC_LOCAL_INV, [ERDMA_OP_RSP_SEND_IMM] = IB_WC_RECV, [ERDMA_OP_SEND_WITH_INV] = IB_WC_SEND, [ERDMA_OP_REG_MR] = IB_WC_REG_MR, diff --git a/drivers/infiniband/hw/erdma/erdma_hw.h b/drivers/infiniband/hw/erdma/erdma_hw.h index b210c49c669f..3004cf3ac481 100644 --- a/drivers/infiniband/hw/erdma/erdma_hw.h +++ b/drivers/infiniband/hw/erdma/erdma_hw.h @@ -450,13 +450,13 @@ enum erdma_opcode { ERDMA_OP_RECV_IMM = 5, ERDMA_OP_RECV_INV = 6, - ERDMA_OP_REQ_ERR = 7, - ERDMA_OP_READ_RESPONSE = 8, + ERDMA_OP_RSVD0 = 7, + ERDMA_OP_RSVD1 = 8, ERDMA_OP_WRITE_WITH_IMM = 9, - ERDMA_OP_RECV_ERR = 10, + ERDMA_OP_RSVD2 = 10, + ERDMA_OP_RSVD3 = 11, - ERDMA_OP_INVALIDATE = 11, ERDMA_OP_RSP_SEND_IMM = 12, ERDMA_OP_SEND_WITH_INV = 13, -- cgit v1.2.3 From 4b46a6079d2f8a9aa23c96227dfdb8692ac10421 Mon Sep 17 00:00:00 2001 From: Hangyu Hua Date: Fri, 9 Sep 2022 10:29:43 +0800 Subject: RDMA/srpt: Use flex array destination for memcpy() In preparation for FORTIFY_SOURCE performing run-time destination buffer bounds checking for memcpy(), specify the destination output buffer explicitly, instead of asking memcpy() to write past the end of what looked like a fixed-size object. Notice that srp_rsp[] is a pointer to a structure that contains flexible-array member data[]: struct srp_rsp { ... __be32 sense_data_len; __be32 resp_data_len; u8 data[]; }; link: https://github.com/KSPP/linux/issues/201 Signed-off-by: Hangyu Hua Link: https://lore.kernel.org/r/20220909022943.8896-1-hbh25y@gmail.com Reviewed-by: Bart Van Assche Reviewed-by: Gustavo A. R. Silva Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/srpt/ib_srpt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 9450c609bf3b..3c3fae738c3e 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1421,7 +1421,7 @@ static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch, srp_rsp->flags |= SRP_RSP_FLAG_SNSVALID; srp_rsp->sense_data_len = cpu_to_be32(sense_data_len); - memcpy(srp_rsp + 1, sense_data, sense_data_len); + memcpy(srp_rsp->data, sense_data, sense_data_len); } return sizeof(*srp_rsp) + sense_data_len; -- cgit v1.2.3 From 0227f4d0d15433c34f5dca68817c0d12ca244feb Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Sun, 11 Sep 2022 17:23:25 +0800 Subject: IB/hfi1: remove rc_only_opcode and uc_only_opcode declarations rc_only_opcode and uc_only_opcode have been removed since commit b374e060cc2a ("IB/hfi1: Consolidate pio control masks into single definition"), so remove them. Signed-off-by: Gaosheng Cui Link: https://lore.kernel.org/r/20220911092325.3216513-1-cuigaosheng1@huawei.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hfi1/verbs.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 38565532d654..7f30f32b34dc 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -391,9 +391,6 @@ void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait); int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe, bool *call_send); -extern const u32 rc_only_opcode; -extern const u32 uc_only_opcode; - int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet); u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr, -- cgit v1.2.3 From 754209850df8367c954ac1de7671c7430b1f342c Mon Sep 17 00:00:00 2001 From: Bernard Metzler Date: Tue, 20 Sep 2022 10:12:02 +0200 Subject: RDMA/siw: Always consume all skbuf data in sk_data_ready() upcall. For header and trailer/padding processing, siw did not consume new skb data until minimum amount present to fill current header or trailer structure, including potential payload padding. Not consuming any data during upcall may cause a receive stall, since tcp_read_sock() is not upcalling again if no new data arrive. A NFSoRDMA client got stuck at RDMA Write reception of unaligned payload, if the current skb did contain only the expected 3 padding bytes, but not the 4 bytes CRC trailer. Expecting 4 more bytes already arrived in another skb, and not consuming those 3 bytes in the current upcall left the Write incomplete, waiting for the CRC forever. Fixes: 8b6a361b8c48 ("rdma/siw: receive path") Reported-by: Olga Kornievskaia Tested-by: Olga Kornievskaia Signed-off-by: Bernard Metzler Link: https://lore.kernel.org/r/20220920081202.223629-1-bmt@zurich.ibm.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/siw/siw_qp_rx.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/sw/siw/siw_qp_rx.c b/drivers/infiniband/sw/siw/siw_qp_rx.c index 875ea6f1b04a..fd721cc19682 100644 --- a/drivers/infiniband/sw/siw/siw_qp_rx.c +++ b/drivers/infiniband/sw/siw/siw_qp_rx.c @@ -961,27 +961,28 @@ out: static int siw_get_trailer(struct siw_qp *qp, struct siw_rx_stream *srx) { struct sk_buff *skb = srx->skb; + int avail = min(srx->skb_new, srx->fpdu_part_rem); u8 *tbuf = (u8 *)&srx->trailer.crc - srx->pad; __wsum crc_in, crc_own = 0; siw_dbg_qp(qp, "expected %d, available %d, pad %u\n", srx->fpdu_part_rem, srx->skb_new, srx->pad); - if (srx->skb_new < srx->fpdu_part_rem) - return -EAGAIN; - - skb_copy_bits(skb, srx->skb_offset, tbuf, srx->fpdu_part_rem); + skb_copy_bits(skb, srx->skb_offset, tbuf, avail); - if (srx->mpa_crc_hd && srx->pad) - crypto_shash_update(srx->mpa_crc_hd, tbuf, srx->pad); + srx->skb_new -= avail; + srx->skb_offset += avail; + srx->skb_copied += avail; + srx->fpdu_part_rem -= avail; - srx->skb_new -= srx->fpdu_part_rem; - srx->skb_offset += srx->fpdu_part_rem; - srx->skb_copied += srx->fpdu_part_rem; + if (srx->fpdu_part_rem) + return -EAGAIN; if (!srx->mpa_crc_hd) return 0; + if (srx->pad) + crypto_shash_update(srx->mpa_crc_hd, tbuf, srx->pad); /* * CRC32 is computed, transmitted and received directly in NBO, * so there's never a reason to convert byte order. @@ -1083,10 +1084,9 @@ static int siw_get_hdr(struct siw_rx_stream *srx) * completely received. */ if (iwarp_pktinfo[opcode].hdr_len > sizeof(struct iwarp_ctrl_tagged)) { - bytes = iwarp_pktinfo[opcode].hdr_len - MIN_DDP_HDR; + int hdrlen = iwarp_pktinfo[opcode].hdr_len; - if (srx->skb_new < bytes) - return -EAGAIN; + bytes = min_t(int, hdrlen - MIN_DDP_HDR, srx->skb_new); skb_copy_bits(skb, srx->skb_offset, (char *)c_hdr + srx->fpdu_part_rcvd, bytes); @@ -1096,6 +1096,9 @@ static int siw_get_hdr(struct siw_rx_stream *srx) srx->skb_new -= bytes; srx->skb_offset += bytes; srx->skb_copied += bytes; + + if (srx->fpdu_part_rcvd < hdrlen) + return -EAGAIN; } /* -- cgit v1.2.3 From a3c278807a459e6f50afee6971cabe74cccfb490 Mon Sep 17 00:00:00 2001 From: Bernard Metzler Date: Tue, 20 Sep 2022 10:25:03 +0200 Subject: RDMA/siw: Fix QP destroy to wait for all references dropped. Delay QP destroy completion until all siw references to QP are dropped. The calling RDMA core will free QP structure after successful return from siw_qp_destroy() call, so siw must not hold any remaining reference to the QP upon return. A use-after-free was encountered in xfstest generic/460, while testing NFSoRDMA. Here, after a TCP connection drop by peer, the triggered siw_cm_work_handler got delayed until after QP destroy call, referencing a QP which has already freed. Fixes: 303ae1cdfdf7 ("rdma/siw: application interface") Reported-by: Olga Kornievskaia Signed-off-by: Bernard Metzler Link: https://lore.kernel.org/r/20220920082503.224189-1-bmt@zurich.ibm.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/siw/siw.h | 1 + drivers/infiniband/sw/siw/siw_qp.c | 2 +- drivers/infiniband/sw/siw/siw_verbs.c | 3 +++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h index df03d84c6868..2f3a9cda3850 100644 --- a/drivers/infiniband/sw/siw/siw.h +++ b/drivers/infiniband/sw/siw/siw.h @@ -418,6 +418,7 @@ struct siw_qp { struct ib_qp base_qp; struct siw_device *sdev; struct kref ref; + struct completion qp_free; struct list_head devq; int tx_cpu; struct siw_qp_attrs attrs; diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c index 7e01f2438afc..e6f634971228 100644 --- a/drivers/infiniband/sw/siw/siw_qp.c +++ b/drivers/infiniband/sw/siw/siw_qp.c @@ -1342,6 +1342,6 @@ void siw_free_qp(struct kref *ref) vfree(qp->orq); siw_put_tx_cpu(qp->tx_cpu); - + complete(&qp->qp_free); atomic_dec(&sdev->num_qp); } diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index 8dedae7ae79e..3e814cfb298c 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -480,6 +480,8 @@ int siw_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs, list_add_tail(&qp->devq, &sdev->qp_list); spin_unlock_irqrestore(&sdev->lock, flags); + init_completion(&qp->qp_free); + return 0; err_out_xa: @@ -624,6 +626,7 @@ int siw_destroy_qp(struct ib_qp *base_qp, struct ib_udata *udata) qp->scq = qp->rcq = NULL; siw_qp_put(qp); + wait_for_completion(&qp->qp_free); return 0; } -- cgit v1.2.3 From 9bdb9350f3808bbff229167acb55cf0a3bd8f2ca Mon Sep 17 00:00:00 2001 From: Cheng Xu Date: Fri, 9 Sep 2022 17:38:22 +0800 Subject: RDMA/erdma: Support dynamic mtu Hardware now support jumbo frame for RDMA. So we introduce a new CMDQ message to support mtu change notification. Signed-off-by: Cheng Xu Link: https://lore.kernel.org/r/20220909093822.33868-5-chengyou@linux.alibaba.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/erdma/erdma.h | 1 + drivers/infiniband/hw/erdma/erdma_hw.h | 6 ++++++ drivers/infiniband/hw/erdma/erdma_main.c | 8 +++++++- drivers/infiniband/hw/erdma/erdma_verbs.c | 11 +++++++++++ drivers/infiniband/hw/erdma/erdma_verbs.h | 1 + 5 files changed, 26 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/erdma/erdma.h b/drivers/infiniband/hw/erdma/erdma.h index cc5e4eb3a21e..730783fbc894 100644 --- a/drivers/infiniband/hw/erdma/erdma.h +++ b/drivers/infiniband/hw/erdma/erdma.h @@ -197,6 +197,7 @@ struct erdma_dev { struct erdma_devattr attrs; /* physical port state (only one port per device) */ enum ib_port_state state; + u32 mtu; /* cmdq and aeq use the same msix vector */ struct erdma_irq comm_irq; diff --git a/drivers/infiniband/hw/erdma/erdma_hw.h b/drivers/infiniband/hw/erdma/erdma_hw.h index 3004cf3ac481..e788887732e1 100644 --- a/drivers/infiniband/hw/erdma/erdma_hw.h +++ b/drivers/infiniband/hw/erdma/erdma_hw.h @@ -153,6 +153,7 @@ enum CMDQ_COMMON_OPCODE { CMDQ_OPCODE_CREATE_EQ = 0, CMDQ_OPCODE_DESTROY_EQ = 1, CMDQ_OPCODE_QUERY_FW_INFO = 2, + CMDQ_OPCODE_CONF_MTU = 3, }; /* cmdq-SQE HDR */ @@ -190,6 +191,11 @@ struct erdma_cmdq_destroy_eq_req { u8 qtype; }; +struct erdma_cmdq_config_mtu_req { + u64 hdr; + u32 mtu; +}; + /* create_cq cfg0 */ #define ERDMA_CMD_CREATE_CQ_DEPTH_MASK GENMASK(31, 24) #define ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK GENMASK(23, 20) diff --git a/drivers/infiniband/hw/erdma/erdma_main.c b/drivers/infiniband/hw/erdma/erdma_main.c index 6d3e02ba9e77..49778bb294ae 100644 --- a/drivers/infiniband/hw/erdma/erdma_main.c +++ b/drivers/infiniband/hw/erdma/erdma_main.c @@ -34,10 +34,15 @@ static int erdma_netdev_event(struct notifier_block *nb, unsigned long event, dev->state = IB_PORT_DOWN; erdma_port_event(dev, IB_EVENT_PORT_ERR); break; + case NETDEV_CHANGEMTU: + if (dev->mtu != netdev->mtu) { + erdma_set_mtu(dev, netdev->mtu); + dev->mtu = netdev->mtu; + } + break; case NETDEV_REGISTER: case NETDEV_UNREGISTER: case NETDEV_CHANGEADDR: - case NETDEV_CHANGEMTU: case NETDEV_GOING_DOWN: case NETDEV_CHANGE: default: @@ -95,6 +100,7 @@ static int erdma_device_register(struct erdma_dev *dev) if (ret) return ret; + dev->mtu = dev->netdev->mtu; addrconf_addr_eui48((u8 *)&ibdev->node_guid, dev->netdev->dev_addr); ret = ib_register_device(ibdev, "erdma_%d", &dev->pdev->dev); diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c index c99e296a3e05..3d7966617588 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.c +++ b/drivers/infiniband/hw/erdma/erdma_verbs.c @@ -1436,6 +1436,17 @@ err_out_xa: return ret; } +void erdma_set_mtu(struct erdma_dev *dev, u32 mtu) +{ + struct erdma_cmdq_config_mtu_req req; + + erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON, + CMDQ_OPCODE_CONF_MTU); + req.mtu = mtu; + + erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); +} + void erdma_port_event(struct erdma_dev *dev, enum ib_event_type reason) { struct ib_event event; diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.h b/drivers/infiniband/hw/erdma/erdma_verbs.h index fe93e1ac9674..ab6380635e9e 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.h +++ b/drivers/infiniband/hw/erdma/erdma_verbs.h @@ -330,5 +330,6 @@ struct ib_mr *erdma_ib_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, int erdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); void erdma_port_event(struct erdma_dev *dev, enum ib_event_type reason); +void erdma_set_mtu(struct erdma_dev *dev, u32 mtu); #endif -- cgit v1.2.3 From bf9a9928510a03e445fa4f54bdc0b8e71f4c0067 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Thu, 8 Sep 2022 13:09:00 +0300 Subject: RDMA/core: Rename rdma_route.num_paths field to num_pri_alt_paths This fields means the total number of primary and alternative paths, i.e.,: 0 - No primary nor alternate path is available; 1 - Only primary path is available; 2 - Both primary and alternate path are available. Rename it to avoid confusion as with follow patches primary path will support multiple path records. Signed-off-by: Mark Zhang Reviewed-by: Mark Bloch Link: https://lore.kernel.org/r/cbe424de63a56207870d70c5edce7c68e45f429e.1662631201.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cma.c | 18 +++++++++--------- drivers/infiniband/core/ucma.c | 10 +++++----- include/rdma/rdma_cm.h | 7 ++++++- 3 files changed, 20 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 46d06678dfbe..91e72a76d95e 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2241,14 +2241,14 @@ cma_ib_new_conn_id(const struct rdma_cm_id *listen_id, goto err; rt = &id->route; - rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1; - rt->path_rec = kmalloc_array(rt->num_paths, sizeof(*rt->path_rec), - GFP_KERNEL); + rt->num_pri_alt_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1; + rt->path_rec = kmalloc_array(rt->num_pri_alt_paths, + sizeof(*rt->path_rec), GFP_KERNEL); if (!rt->path_rec) goto err; rt->path_rec[0] = *path; - if (rt->num_paths == 2) + if (rt->num_pri_alt_paths == 2) rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path; if (net_dev) { @@ -2826,7 +2826,7 @@ static void cma_query_handler(int status, struct sa_path_rec *path_rec, route = &work->id->id.route; if (!status) { - route->num_paths = 1; + route->num_pri_alt_paths = 1; *route->path_rec = *path_rec; } else { work->old_state = RDMA_CM_ROUTE_QUERY; @@ -3081,7 +3081,7 @@ int rdma_set_ib_path(struct rdma_cm_id *id, dev_put(ndev); } - id->route.num_paths = 1; + id->route.num_pri_alt_paths = 1; return 0; err_free: @@ -3214,7 +3214,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) goto err1; } - route->num_paths = 1; + route->num_pri_alt_paths = 1; ndev = cma_iboe_set_path_rec_l2_fields(id_priv); if (!ndev) { @@ -3274,7 +3274,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) err2: kfree(route->path_rec); route->path_rec = NULL; - route->num_paths = 0; + route->num_pri_alt_paths = 0; err1: kfree(work); return ret; @@ -4265,7 +4265,7 @@ static int cma_connect_ib(struct rdma_id_private *id_priv, } req.primary_path = &route->path_rec[0]; - if (route->num_paths == 2) + if (route->num_pri_alt_paths == 2) req.alternate_path = &route->path_rec[1]; req.ppath_sgid_attr = id_priv->id.route.addr.dev_addr.sgid_attr; diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 9d6ac9dff39a..bf42650f125b 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -754,8 +754,8 @@ static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp, { struct rdma_dev_addr *dev_addr; - resp->num_paths = route->num_paths; - switch (route->num_paths) { + resp->num_paths = route->num_pri_alt_paths; + switch (route->num_pri_alt_paths) { case 0: dev_addr = &route->addr.dev_addr; rdma_addr_get_dgid(dev_addr, @@ -781,8 +781,8 @@ static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp, struct rdma_route *route) { - resp->num_paths = route->num_paths; - switch (route->num_paths) { + resp->num_paths = route->num_pri_alt_paths; + switch (route->num_pri_alt_paths) { case 0: rdma_ip2gid((struct sockaddr *)&route->addr.dst_addr, (union ib_gid *)&resp->ib_route[0].dgid); @@ -921,7 +921,7 @@ static ssize_t ucma_query_path(struct ucma_context *ctx, if (!resp) return -ENOMEM; - resp->num_paths = ctx->cm_id->route.num_paths; + resp->num_paths = ctx->cm_id->route.num_pri_alt_paths; for (i = 0, out_len -= sizeof(*resp); i < resp->num_paths && out_len > sizeof(struct ib_path_rec_data); i++, out_len -= sizeof(struct ib_path_rec_data)) { diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index 5b18e2e36ee6..81916039ee24 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -52,7 +52,12 @@ struct rdma_addr { struct rdma_route { struct rdma_addr addr; struct sa_path_rec *path_rec; - int num_paths; + /* + * 0 - No primary nor alternate path is available + * 1 - Only primary path is available + * 2 - Both primary and alternate path are available + */ + int num_pri_alt_paths; }; struct rdma_conn_param { -- cgit v1.2.3 From 5a3749493394276449cfc4efb417ed267edbd480 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Thu, 8 Sep 2022 13:09:01 +0300 Subject: RDMA/cma: Multiple path records support with netlink channel Support receiving inbound and outbound IB path records (along with GMP PathRecord) from user-space service through the RDMA netlink channel. The LIDs in these 3 PRs can be used in this way: 1. GMP PR: used as the standard local/remote LIDs; 2. DLID of outbound PR: Used as the "dlid" field for outbound traffic; 3. DLID of inbound PR: Used as the "dlid" field for outbound traffic in responder side. This is aimed to support adaptive routing. With current IB routing solution when a packet goes out it's assigned with a fixed DLID per target, meaning a fixed router will be used. The LIDs in inbound/outbound path records can be used to identify group of routers that allow communication with another subnet's entity. With them packets from an inter-subnet connection may travel through any router in the set to reach the target. As confirmed with Jason, when sending a netlink request, kernel uses LS_RESOLVE_PATH_USE_ALL so that the service knows kernel supports multiple PRs. Signed-off-by: Mark Zhang Reviewed-by: Mark Bloch Link: https://lore.kernel.org/r/2fa2b6c93c4c16c8915bac3cfc4f27be1d60519d.1662631201.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cma.c | 70 +++++++-- drivers/infiniband/core/sa_query.c | 235 ++++++++++++++++++++---------- drivers/infiniband/ulp/ipoib/ipoib_main.c | 2 +- drivers/infiniband/ulp/srp/ib_srp.c | 2 +- include/rdma/ib_sa.h | 3 +- include/rdma/rdma_cm.h | 6 + 6 files changed, 231 insertions(+), 87 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 91e72a76d95e..a3efc462305d 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2026,6 +2026,8 @@ static void _destroy_id(struct rdma_id_private *id_priv, cma_id_put(id_priv->id.context); kfree(id_priv->id.route.path_rec); + kfree(id_priv->id.route.path_rec_inbound); + kfree(id_priv->id.route.path_rec_outbound); put_net(id_priv->id.route.addr.dev_addr.net); kfree(id_priv); @@ -2817,26 +2819,72 @@ int rdma_set_min_rnr_timer(struct rdma_cm_id *id, u8 min_rnr_timer) } EXPORT_SYMBOL(rdma_set_min_rnr_timer); +static void route_set_path_rec_inbound(struct cma_work *work, + struct sa_path_rec *path_rec) +{ + struct rdma_route *route = &work->id->id.route; + + if (!route->path_rec_inbound) { + route->path_rec_inbound = + kzalloc(sizeof(*route->path_rec_inbound), GFP_KERNEL); + if (!route->path_rec_inbound) + return; + } + + *route->path_rec_inbound = *path_rec; +} + +static void route_set_path_rec_outbound(struct cma_work *work, + struct sa_path_rec *path_rec) +{ + struct rdma_route *route = &work->id->id.route; + + if (!route->path_rec_outbound) { + route->path_rec_outbound = + kzalloc(sizeof(*route->path_rec_outbound), GFP_KERNEL); + if (!route->path_rec_outbound) + return; + } + + *route->path_rec_outbound = *path_rec; +} + static void cma_query_handler(int status, struct sa_path_rec *path_rec, - void *context) + int num_prs, void *context) { struct cma_work *work = context; struct rdma_route *route; + int i; route = &work->id->id.route; - if (!status) { - route->num_pri_alt_paths = 1; - *route->path_rec = *path_rec; - } else { - work->old_state = RDMA_CM_ROUTE_QUERY; - work->new_state = RDMA_CM_ADDR_RESOLVED; - work->event.event = RDMA_CM_EVENT_ROUTE_ERROR; - work->event.status = status; - pr_debug_ratelimited("RDMA CM: ROUTE_ERROR: failed to query path. status %d\n", - status); + if (status) + goto fail; + + for (i = 0; i < num_prs; i++) { + if (!path_rec[i].flags || (path_rec[i].flags & IB_PATH_GMP)) + *route->path_rec = path_rec[i]; + else if (path_rec[i].flags & IB_PATH_INBOUND) + route_set_path_rec_inbound(work, &path_rec[i]); + else if (path_rec[i].flags & IB_PATH_OUTBOUND) + route_set_path_rec_outbound(work, &path_rec[i]); + } + if (!route->path_rec) { + status = -EINVAL; + goto fail; } + route->num_pri_alt_paths = 1; + queue_work(cma_wq, &work->work); + return; + +fail: + work->old_state = RDMA_CM_ROUTE_QUERY; + work->new_state = RDMA_CM_ADDR_RESOLVED; + work->event.event = RDMA_CM_EVENT_ROUTE_ERROR; + work->event.status = status; + pr_debug_ratelimited("RDMA CM: ROUTE_ERROR: failed to query path. status %d\n", + status); queue_work(cma_wq, &work->work); } diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 003e504feca2..0de83d9a4985 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -50,6 +50,7 @@ #include #include #include +#include #include "sa.h" #include "core_priv.h" @@ -104,7 +105,8 @@ struct ib_sa_device { }; struct ib_sa_query { - void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *); + void (*callback)(struct ib_sa_query *sa_query, int status, + int num_prs, struct ib_sa_mad *mad); void (*release)(struct ib_sa_query *); struct ib_sa_client *client; struct ib_sa_port *port; @@ -116,6 +118,12 @@ struct ib_sa_query { u32 seq; /* Local svc request sequence number */ unsigned long timeout; /* Local svc timeout */ u8 path_use; /* How will the pathrecord be used */ + + /* A separate buffer to save pathrecords of a response, as in cases + * like IB/netlink, mulptiple pathrecords are supported, so that + * mad->data is not large enough to hold them + */ + void *resp_pr_data; }; #define IB_SA_ENABLE_LOCAL_SERVICE 0x00000001 @@ -123,7 +131,8 @@ struct ib_sa_query { #define IB_SA_QUERY_OPA 0x00000004 struct ib_sa_path_query { - void (*callback)(int, struct sa_path_rec *, void *); + void (*callback)(int status, struct sa_path_rec *rec, + int num_paths, void *context); void *context; struct ib_sa_query sa_query; struct sa_path_rec *conv_pr; @@ -712,7 +721,7 @@ static void ib_nl_set_path_rec_attrs(struct sk_buff *skb, if ((comp_mask & IB_SA_PATH_REC_REVERSIBLE) && sa_rec->reversible != 0) - query->path_use = LS_RESOLVE_PATH_USE_GMP; + query->path_use = LS_RESOLVE_PATH_USE_ALL; else query->path_use = LS_RESOLVE_PATH_USE_UNIDIRECTIONAL; header->path_use = query->path_use; @@ -865,50 +874,81 @@ static void send_handler(struct ib_mad_agent *agent, static void ib_nl_process_good_resolve_rsp(struct ib_sa_query *query, const struct nlmsghdr *nlh) { + struct ib_path_rec_data *srec, *drec; + struct ib_sa_path_query *path_query; struct ib_mad_send_wc mad_send_wc; - struct ib_sa_mad *mad = NULL; const struct nlattr *head, *curr; - struct ib_path_rec_data *rec; - int len, rem; + struct ib_sa_mad *mad = NULL; + int len, rem, num_prs = 0; u32 mask = 0; int status = -EIO; - if (query->callback) { - head = (const struct nlattr *) nlmsg_data(nlh); - len = nlmsg_len(nlh); - switch (query->path_use) { - case LS_RESOLVE_PATH_USE_UNIDIRECTIONAL: - mask = IB_PATH_PRIMARY | IB_PATH_OUTBOUND; - break; + if (!query->callback) + goto out; - case LS_RESOLVE_PATH_USE_ALL: - case LS_RESOLVE_PATH_USE_GMP: - default: - mask = IB_PATH_PRIMARY | IB_PATH_GMP | - IB_PATH_BIDIRECTIONAL; - break; + path_query = container_of(query, struct ib_sa_path_query, sa_query); + mad = query->mad_buf->mad; + if (!path_query->conv_pr && + (be16_to_cpu(mad->mad_hdr.attr_id) == IB_SA_ATTR_PATH_REC)) { + /* Need a larger buffer for possible multiple PRs */ + query->resp_pr_data = kvcalloc(RDMA_PRIMARY_PATH_MAX_REC_NUM, + sizeof(*drec), GFP_KERNEL); + if (!query->resp_pr_data) { + query->callback(query, -ENOMEM, 0, NULL); + return; } - nla_for_each_attr(curr, head, len, rem) { - if (curr->nla_type == LS_NLA_TYPE_PATH_RECORD) { - rec = nla_data(curr); - /* - * Get the first one. In the future, we may - * need to get up to 6 pathrecords. - */ - if ((rec->flags & mask) == mask) { - mad = query->mad_buf->mad; - mad->mad_hdr.method |= - IB_MGMT_METHOD_RESP; - memcpy(mad->data, rec->path_rec, - sizeof(rec->path_rec)); - status = 0; - break; - } - } + } + + head = (const struct nlattr *) nlmsg_data(nlh); + len = nlmsg_len(nlh); + switch (query->path_use) { + case LS_RESOLVE_PATH_USE_UNIDIRECTIONAL: + mask = IB_PATH_PRIMARY | IB_PATH_OUTBOUND; + break; + + case LS_RESOLVE_PATH_USE_ALL: + mask = IB_PATH_PRIMARY; + break; + + case LS_RESOLVE_PATH_USE_GMP: + default: + mask = IB_PATH_PRIMARY | IB_PATH_GMP | + IB_PATH_BIDIRECTIONAL; + break; + } + + drec = (struct ib_path_rec_data *)query->resp_pr_data; + nla_for_each_attr(curr, head, len, rem) { + if (curr->nla_type != LS_NLA_TYPE_PATH_RECORD) + continue; + + srec = nla_data(curr); + if ((srec->flags & mask) != mask) + continue; + + status = 0; + if (!drec) { + memcpy(mad->data, srec->path_rec, + sizeof(srec->path_rec)); + num_prs = 1; + break; } - query->callback(query, status, mad); + + memcpy(drec, srec, sizeof(*drec)); + drec++; + num_prs++; + if (num_prs >= RDMA_PRIMARY_PATH_MAX_REC_NUM) + break; } + if (!status) + mad->mad_hdr.method |= IB_MGMT_METHOD_RESP; + + query->callback(query, status, num_prs, mad); + kvfree(query->resp_pr_data); + query->resp_pr_data = NULL; + +out: mad_send_wc.send_buf = query->mad_buf; mad_send_wc.status = IB_WC_SUCCESS; send_handler(query->mad_buf->mad_agent, &mad_send_wc); @@ -1411,41 +1451,90 @@ static int opa_pr_query_possible(struct ib_sa_client *client, return PR_IB_SUPPORTED; } +static void ib_sa_pr_callback_single(struct ib_sa_path_query *query, + int status, struct ib_sa_mad *mad) +{ + struct sa_path_rec rec = {}; + + ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), + mad->data, &rec); + rec.rec_type = SA_PATH_REC_TYPE_IB; + sa_path_set_dmac_zero(&rec); + + if (query->conv_pr) { + struct sa_path_rec opa; + + memset(&opa, 0, sizeof(struct sa_path_rec)); + sa_convert_path_ib_to_opa(&opa, &rec); + query->callback(status, &opa, 1, query->context); + } else { + query->callback(status, &rec, 1, query->context); + } +} + +/** + * ib_sa_pr_callback_multiple() - Parse path records then do callback. + * + * In a multiple-PR case the PRs are saved in "query->resp_pr_data" + * (instead of"mad->data") and with "ib_path_rec_data" structure format, + * so that rec->flags can be set to indicate the type of PR. + * This is valid only in IB fabric. + */ +static void ib_sa_pr_callback_multiple(struct ib_sa_path_query *query, + int status, int num_prs, + struct ib_path_rec_data *rec_data) +{ + struct sa_path_rec *rec; + int i; + + rec = kvcalloc(num_prs, sizeof(*rec), GFP_KERNEL); + if (!rec) { + query->callback(-ENOMEM, NULL, 0, query->context); + return; + } + + for (i = 0; i < num_prs; i++) { + ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), + rec_data[i].path_rec, rec + i); + rec[i].rec_type = SA_PATH_REC_TYPE_IB; + sa_path_set_dmac_zero(rec + i); + rec[i].flags = rec_data[i].flags; + } + + query->callback(status, rec, num_prs, query->context); + kvfree(rec); +} + static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query, - int status, + int status, int num_prs, struct ib_sa_mad *mad) { struct ib_sa_path_query *query = container_of(sa_query, struct ib_sa_path_query, sa_query); + struct sa_path_rec rec; - if (mad) { - struct sa_path_rec rec; - - if (sa_query->flags & IB_SA_QUERY_OPA) { - ib_unpack(opa_path_rec_table, - ARRAY_SIZE(opa_path_rec_table), - mad->data, &rec); - rec.rec_type = SA_PATH_REC_TYPE_OPA; - query->callback(status, &rec, query->context); - } else { - ib_unpack(path_rec_table, - ARRAY_SIZE(path_rec_table), - mad->data, &rec); - rec.rec_type = SA_PATH_REC_TYPE_IB; - sa_path_set_dmac_zero(&rec); - - if (query->conv_pr) { - struct sa_path_rec opa; + if (!mad || !num_prs) { + query->callback(status, NULL, 0, query->context); + return; + } - memset(&opa, 0, sizeof(struct sa_path_rec)); - sa_convert_path_ib_to_opa(&opa, &rec); - query->callback(status, &opa, query->context); - } else { - query->callback(status, &rec, query->context); - } + if (sa_query->flags & IB_SA_QUERY_OPA) { + if (num_prs != 1) { + query->callback(-EINVAL, NULL, 0, query->context); + return; } - } else - query->callback(status, NULL, query->context); + + ib_unpack(opa_path_rec_table, ARRAY_SIZE(opa_path_rec_table), + mad->data, &rec); + rec.rec_type = SA_PATH_REC_TYPE_OPA; + query->callback(status, &rec, num_prs, query->context); + } else { + if (!sa_query->resp_pr_data) + ib_sa_pr_callback_single(query, status, mad); + else + ib_sa_pr_callback_multiple(query, status, num_prs, + sa_query->resp_pr_data); + } } static void ib_sa_path_rec_release(struct ib_sa_query *sa_query) @@ -1489,7 +1578,7 @@ int ib_sa_path_rec_get(struct ib_sa_client *client, unsigned long timeout_ms, gfp_t gfp_mask, void (*callback)(int status, struct sa_path_rec *resp, - void *context), + int num_paths, void *context), void *context, struct ib_sa_query **sa_query) { @@ -1588,7 +1677,7 @@ err1: EXPORT_SYMBOL(ib_sa_path_rec_get); static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query, - int status, + int status, int num_prs, struct ib_sa_mad *mad) { struct ib_sa_mcmember_query *query = @@ -1680,7 +1769,7 @@ err1: /* Support GuidInfoRecord */ static void ib_sa_guidinfo_rec_callback(struct ib_sa_query *sa_query, - int status, + int status, int num_paths, struct ib_sa_mad *mad) { struct ib_sa_guidinfo_query *query = @@ -1790,7 +1879,7 @@ static void ib_classportinfo_cb(void *context) } static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query, - int status, + int status, int num_prs, struct ib_sa_mad *mad) { unsigned long flags; @@ -1966,13 +2055,13 @@ static void send_handler(struct ib_mad_agent *agent, /* No callback -- already got recv */ break; case IB_WC_RESP_TIMEOUT_ERR: - query->callback(query, -ETIMEDOUT, NULL); + query->callback(query, -ETIMEDOUT, 0, NULL); break; case IB_WC_WR_FLUSH_ERR: - query->callback(query, -EINTR, NULL); + query->callback(query, -EINTR, 0, NULL); break; default: - query->callback(query, -EIO, NULL); + query->callback(query, -EIO, 0, NULL); break; } @@ -2000,10 +2089,10 @@ static void recv_handler(struct ib_mad_agent *mad_agent, if (mad_recv_wc->wc->status == IB_WC_SUCCESS) query->callback(query, mad_recv_wc->recv_buf.mad->mad_hdr.status ? - -EINVAL : 0, + -EINVAL : 0, 1, (struct ib_sa_mad *) mad_recv_wc->recv_buf.mad); else - query->callback(query, -EIO, NULL); + query->callback(query, -EIO, 0, NULL); } ib_free_recv_mad(mad_recv_wc); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index a4904371e2db..ac25fc80fb33 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -742,7 +742,7 @@ void ipoib_flush_paths(struct net_device *dev) static void path_rec_completion(int status, struct sa_path_rec *pathrec, - void *path_ptr) + int num_prs, void *path_ptr) { struct ipoib_path *path = path_ptr; struct net_device *dev = path->dev; diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 9d593445d436..d01102db4fd4 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -699,7 +699,7 @@ static void srp_free_ch_ib(struct srp_target_port *target, static void srp_path_rec_completion(int status, struct sa_path_rec *pathrec, - void *ch_ptr) + int num_paths, void *ch_ptr) { struct srp_rdma_ch *ch = ch_ptr; struct srp_target_port *target = ch->target; diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h index 3634d4cc7a56..e930bec33b31 100644 --- a/include/rdma/ib_sa.h +++ b/include/rdma/ib_sa.h @@ -186,6 +186,7 @@ struct sa_path_rec { struct sa_path_rec_opa opa; }; enum sa_path_rec_type rec_type; + u32 flags; }; static inline enum ib_gid_type @@ -413,7 +414,7 @@ int ib_sa_path_rec_get(struct ib_sa_client *client, struct ib_device *device, ib_sa_comp_mask comp_mask, unsigned long timeout_ms, gfp_t gfp_mask, void (*callback)(int status, struct sa_path_rec *resp, - void *context), + int num_prs, void *context), void *context, struct ib_sa_query **query); struct ib_sa_multicast { diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index 81916039ee24..cdc7cafab572 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -49,9 +49,15 @@ struct rdma_addr { struct rdma_dev_addr dev_addr; }; +#define RDMA_PRIMARY_PATH_MAX_REC_NUM 3 struct rdma_route { struct rdma_addr addr; struct sa_path_rec *path_rec; + + /* Optional path records of primary path */ + struct sa_path_rec *path_rec_inbound; + struct sa_path_rec *path_rec_outbound; + /* * 0 - No primary nor alternate path is available * 1 - Only primary path is available -- cgit v1.2.3 From b7d95040c13f61a4a6a859c5355faf583eff9658 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Thu, 8 Sep 2022 13:09:02 +0300 Subject: RDMA/cm: Use SLID in the work completion as the DLID in responder side The responder should always use WC's SLID as the dlid, to follow the IB SPEC section "13.5.4.2 COMMON RESPONSE ACTIONS": A responder always takes the following actions in constructing a response packet: - The SLID of the received packet is used as the DLID in the response packet. Fixes: ac3a949fb2ff ("IB/CM: Set appropriate slid and dlid when handling CM request") Signed-off-by: Mark Zhang Reviewed-by: Mark Bloch Link: https://lore.kernel.org/r/cd17c240231e059d2fc07c17dfe555d548b917eb.1662631201.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cm.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index d7410ee2ade7..ade82752f9f7 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1614,14 +1614,13 @@ static void cm_path_set_rec_type(struct ib_device *ib_device, u32 port_num, static void cm_format_path_lid_from_req(struct cm_req_msg *req_msg, struct sa_path_rec *primary_path, - struct sa_path_rec *alt_path) + struct sa_path_rec *alt_path, + struct ib_wc *wc) { u32 lid; if (primary_path->rec_type != SA_PATH_REC_TYPE_OPA) { - sa_path_set_dlid(primary_path, - IBA_GET(CM_REQ_PRIMARY_LOCAL_PORT_LID, - req_msg)); + sa_path_set_dlid(primary_path, wc->slid); sa_path_set_slid(primary_path, IBA_GET(CM_REQ_PRIMARY_REMOTE_PORT_LID, req_msg)); @@ -1658,7 +1657,8 @@ static void cm_format_path_lid_from_req(struct cm_req_msg *req_msg, static void cm_format_paths_from_req(struct cm_req_msg *req_msg, struct sa_path_rec *primary_path, - struct sa_path_rec *alt_path) + struct sa_path_rec *alt_path, + struct ib_wc *wc) { primary_path->dgid = *IBA_GET_MEM_PTR(CM_REQ_PRIMARY_LOCAL_PORT_GID, req_msg); @@ -1716,7 +1716,7 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg, if (sa_path_is_roce(alt_path)) alt_path->roce.route_resolved = false; } - cm_format_path_lid_from_req(req_msg, primary_path, alt_path); + cm_format_path_lid_from_req(req_msg, primary_path, alt_path, wc); } static u16 cm_get_bth_pkey(struct cm_work *work) @@ -2129,7 +2129,7 @@ static int cm_req_handler(struct cm_work *work) if (cm_req_has_alt_path(req_msg)) work->path[1].rec_type = work->path[0].rec_type; cm_format_paths_from_req(req_msg, &work->path[0], - &work->path[1]); + &work->path[1], work->mad_recv_wc->wc); if (cm_id_priv->av.ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE) sa_path_set_dmac(&work->path[0], cm_id_priv->av.ah_attr.roce.dmac); -- cgit v1.2.3 From eb8336dbe373edd1ad6061c543e4ba6ea60f6cc9 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Thu, 8 Sep 2022 13:09:03 +0300 Subject: RDMA/cm: Use DLID from inbound/outbound PathRecords as the datapath DLID In inter-subnet cases, when inbound/outbound PRs are available, outbound_PR.dlid is used as the requestor's datapath DLID and inbound_PR.dlid is used as the responder's DLID. The inbound_PR.dlid is passed to responder side with the "ConnectReq.Primary_Local_Port_LID" field. With this solution the PERMISSIVE_LID is no longer used in Primary Local LID field. Signed-off-by: Mark Zhang Reviewed-by: Mark Bloch Link: https://lore.kernel.org/r/b3f6cac685bce9dde37c610be82e2c19d9e51d9e.1662631201.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cm.c | 25 +++++++++++++++++++++++-- drivers/infiniband/core/cma.c | 2 ++ include/rdma/ib_cm.h | 2 ++ 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index ade82752f9f7..1f9938a2c475 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -175,6 +175,7 @@ struct cm_device { struct cm_av { struct cm_port *port; struct rdma_ah_attr ah_attr; + u16 dlid_datapath; u16 pkey_index; u8 timeout; }; @@ -1304,6 +1305,7 @@ static void cm_format_req(struct cm_req_msg *req_msg, struct sa_path_rec *pri_path = param->primary_path; struct sa_path_rec *alt_path = param->alternate_path; bool pri_ext = false; + __be16 lid; if (pri_path->rec_type == SA_PATH_REC_TYPE_OPA) pri_ext = opa_is_extended_lid(pri_path->opa.dlid, @@ -1363,9 +1365,16 @@ static void cm_format_req(struct cm_req_msg *req_msg, htons(ntohl(sa_path_get_dlid( pri_path))))); } else { + + if (param->primary_path_inbound) { + lid = param->primary_path_inbound->ib.dlid; + IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg, + be16_to_cpu(lid)); + } else + IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg, + be16_to_cpu(IB_LID_PERMISSIVE)); + /* Work-around until there's a way to obtain remote LID info */ - IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg, - be16_to_cpu(IB_LID_PERMISSIVE)); IBA_SET(CM_REQ_PRIMARY_REMOTE_PORT_LID, req_msg, be16_to_cpu(IB_LID_PERMISSIVE)); } @@ -1520,6 +1529,10 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, spin_lock_irqsave(&cm_id_priv->lock, flags); cm_move_av_from_path(&cm_id_priv->av, &av); + if (param->primary_path_outbound) + cm_id_priv->av.dlid_datapath = + be16_to_cpu(param->primary_path_outbound->ib.dlid); + if (param->alternate_path) cm_move_av_from_path(&cm_id_priv->alt_av, &alt_av); @@ -2154,6 +2167,10 @@ static int cm_req_handler(struct cm_work *work) NULL, 0); goto rejected; } + if (cm_id_priv->av.ah_attr.type == RDMA_AH_ATTR_TYPE_IB) + cm_id_priv->av.dlid_datapath = + IBA_GET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg); + if (cm_req_has_alt_path(req_msg)) { ret = cm_init_av_by_path(&work->path[1], NULL, &cm_id_priv->alt_av); @@ -4113,6 +4130,10 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv, *qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN | IB_QP_RQ_PSN; qp_attr->ah_attr = cm_id_priv->av.ah_attr; + if ((qp_attr->ah_attr.type == RDMA_AH_ATTR_TYPE_IB) && + cm_id_priv->av.dlid_datapath && + (cm_id_priv->av.dlid_datapath != 0xffff)) + qp_attr->ah_attr.ib.dlid = cm_id_priv->av.dlid_datapath; qp_attr->path_mtu = cm_id_priv->path_mtu; qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn); qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index a3efc462305d..7eacb23165fc 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -4313,6 +4313,8 @@ static int cma_connect_ib(struct rdma_id_private *id_priv, } req.primary_path = &route->path_rec[0]; + req.primary_path_inbound = route->path_rec_inbound; + req.primary_path_outbound = route->path_rec_outbound; if (route->num_pri_alt_paths == 2) req.alternate_path = &route->path_rec[1]; diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h index 8dae5847020a..a2ac62b4a6cf 100644 --- a/include/rdma/ib_cm.h +++ b/include/rdma/ib_cm.h @@ -348,6 +348,8 @@ struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device, struct ib_cm_req_param { struct sa_path_rec *primary_path; + struct sa_path_rec *primary_path_inbound; + struct sa_path_rec *primary_path_outbound; struct sa_path_rec *alternate_path; const struct ib_gid_attr *ppath_sgid_attr; __be64 service_id; -- cgit v1.2.3 From 241f9a27e0fc0eaf23e3d52c8450f10648cd11f1 Mon Sep 17 00:00:00 2001 From: Daisuke Matsuda Date: Wed, 21 Sep 2022 17:08:43 +0900 Subject: IB: Set IOVA/LENGTH on IB_MR in core/uverbs layers Set 'iova' and 'length' on ib_mr in ib_uverbs and ib_core layers to let all drivers have the members filled. Also, this commit removes redundancy in the respective drivers. Previously, commit 04c0a5fcfcf65 ("IB/uverbs: Set IOVA on IB MR in uverbs layer") changed to set 'iova', but seems to have missed 'length' and the ib_core layer at that time. Fixes: 04c0a5fcfcf65 ("IB/uverbs: Set IOVA on IB MR in uverbs layer") Signed-off-by: Daisuke Matsuda Link: https://lore.kernel.org/r/20220921080844.1616883-1-matsuda-daisuke@fujitsu.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/uverbs_cmd.c | 5 ++++- drivers/infiniband/core/verbs.c | 2 ++ drivers/infiniband/hw/hns/hns_roce_mr.c | 1 - drivers/infiniband/hw/mlx4/mr.c | 1 - 4 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 046376bd68e2..4796f6a8828c 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -739,6 +739,7 @@ static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs) mr->uobject = uobj; atomic_inc(&pd->usecnt); mr->iova = cmd.hca_va; + mr->length = cmd.length; rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR); rdma_restrack_set_name(&mr->res, NULL); @@ -861,8 +862,10 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs) mr->pd = new_pd; atomic_inc(&new_pd->usecnt); } - if (cmd.flags & IB_MR_REREG_TRANS) + if (cmd.flags & IB_MR_REREG_TRANS) { mr->iova = cmd.hca_va; + mr->length = cmd.length; + } } memset(&resp, 0, sizeof(resp)); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index e54b3f1b730e..f8964c8cf0ad 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -2149,6 +2149,8 @@ struct ib_mr *ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mr->pd = pd; mr->dm = NULL; atomic_inc(&pd->usecnt); + mr->iova = virt_addr; + mr->length = length; rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR); rdma_restrack_parent_name(&mr->res, &pd->res); diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 867972c2a894..dedfa56f5773 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -249,7 +249,6 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, goto err_alloc_pbl; mr->ibmr.rkey = mr->ibmr.lkey = mr->key; - mr->ibmr.length = length; return &mr->ibmr; diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 04a67b481608..a40bf58bcdd3 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -439,7 +439,6 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, goto err_mr; mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; - mr->ibmr.length = length; mr->ibmr.page_size = 1U << shift; return &mr->ibmr; -- cgit v1.2.3 From 954afc5a8fd85745a27536e064eebaa34abf9a19 Mon Sep 17 00:00:00 2001 From: Daisuke Matsuda Date: Wed, 21 Sep 2022 17:08:44 +0900 Subject: RDMA/rxe: Use members of generic struct in rxe_mr rxe_mr and ib_mr have interchangeable members. Remove device specific members and use ones in the generic struct. Both 'iova' and 'length' are filled in ib_uverbs or ib_core layer after MR registration. Signed-off-by: Daisuke Matsuda Link: https://lore.kernel.org/r/20220921080844.1616883-2-matsuda-daisuke@fujitsu.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/rxe/rxe_mr.c | 10 ++++------ drivers/infiniband/sw/rxe/rxe_mw.c | 6 +++--- drivers/infiniband/sw/rxe/rxe_verbs.c | 4 +--- drivers/infiniband/sw/rxe/rxe_verbs.h | 2 -- 4 files changed, 8 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 814116ec4778..6b0c2e7b8145 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -32,8 +32,8 @@ int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length) case IB_MR_TYPE_USER: case IB_MR_TYPE_MEM_REG: - if (iova < mr->iova || length > mr->length || - iova > mr->iova + mr->length - length) + if (iova < mr->ibmr.iova || length > mr->ibmr.length || + iova > mr->ibmr.iova + mr->ibmr.length - length) return -EFAULT; return 0; @@ -178,8 +178,6 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, mr->ibmr.pd = &pd->ibpd; mr->umem = umem; mr->access = access; - mr->length = length; - mr->iova = iova; mr->offset = ib_umem_offset(umem); mr->state = RXE_MR_STATE_VALID; mr->type = IB_MR_TYPE_USER; @@ -221,7 +219,7 @@ err1: static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out, size_t *offset_out) { - size_t offset = iova - mr->iova + mr->offset; + size_t offset = iova - mr->ibmr.iova + mr->offset; int map_index; int buf_index; u64 length; @@ -604,7 +602,7 @@ int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe) mr->access = access; mr->lkey = key; mr->rkey = (access & IB_ACCESS_REMOTE) ? key : 0; - mr->iova = wqe->wr.wr.reg.mr->iova; + mr->ibmr.iova = wqe->wr.wr.reg.mr->iova; mr->state = RXE_MR_STATE_VALID; return 0; diff --git a/drivers/infiniband/sw/rxe/rxe_mw.c b/drivers/infiniband/sw/rxe/rxe_mw.c index 104993801a80..902b7df7aaed 100644 --- a/drivers/infiniband/sw/rxe/rxe_mw.c +++ b/drivers/infiniband/sw/rxe/rxe_mw.c @@ -114,15 +114,15 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, /* C10-75 */ if (mw->access & IB_ZERO_BASED) { - if (unlikely(wqe->wr.wr.mw.length > mr->length)) { + if (unlikely(wqe->wr.wr.mw.length > mr->ibmr.length)) { pr_err_once( "attempt to bind a ZB MW outside of the MR\n"); return -EINVAL; } } else { - if (unlikely((wqe->wr.wr.mw.addr < mr->iova) || + if (unlikely((wqe->wr.wr.mw.addr < mr->ibmr.iova) || ((wqe->wr.wr.mw.addr + wqe->wr.wr.mw.length) > - (mr->iova + mr->length)))) { + (mr->ibmr.iova + mr->ibmr.length)))) { pr_err_once( "attempt to bind a VA MW outside of the MR\n"); return -EINVAL; diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 9ebe9decad34..da1c484798dd 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -1007,11 +1007,9 @@ static int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rxe_set_page); - mr->iova = ibmr->iova; - mr->length = ibmr->length; mr->page_shift = ilog2(ibmr->page_size); mr->page_mask = ibmr->page_size - 1; - mr->offset = mr->iova & mr->page_mask; + mr->offset = ibmr->iova & mr->page_mask; return n; } diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index a51819d0c345..5f5cbfcb3569 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -305,8 +305,6 @@ struct rxe_mr { u32 rkey; enum rxe_mr_state state; enum ib_mr_type type; - u64 iova; - size_t length; u32 offset; int access; -- cgit v1.2.3 From b05398aff9ad9dc701b261183a5d756165d28b51 Mon Sep 17 00:00:00 2001 From: Mikhael Goikhman Date: Wed, 21 Sep 2022 11:03:07 +0300 Subject: RDMA/srp: Support more than 255 rdma ports Currently ib_srp module does not support devices with more than 256 ports. Switch from u8 to u32 to fix the problem. Fixes: 1fb7f8973f51 ("RDMA: Support more than 255 rdma ports") Reviewed-by: Shay Drory Signed-off-by: Mikhael Goikhman Link: https://lore.kernel.org/r/7d80d8844f1abb3a54170b7259f0a02be38080a6.1663747327.git.leonro@nvidia.com Reviewed-by: Bart Van Assche Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/srp/ib_srp.c | 12 ++++++------ drivers/infiniband/ulp/srp/ib_srp.h | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index d01102db4fd4..66ff61e54fa9 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -2988,7 +2988,7 @@ static ssize_t local_ib_port_show(struct device *dev, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - return sysfs_emit(buf, "%d\n", target->srp_host->port); + return sysfs_emit(buf, "%u\n", target->srp_host->port); } static DEVICE_ATTR_RO(local_ib_port); @@ -3886,7 +3886,7 @@ static ssize_t port_show(struct device *dev, struct device_attribute *attr, { struct srp_host *host = container_of(dev, struct srp_host, dev); - return sysfs_emit(buf, "%d\n", host->port); + return sysfs_emit(buf, "%u\n", host->port); } static DEVICE_ATTR_RO(port); @@ -3898,7 +3898,7 @@ static struct attribute *srp_class_attrs[] = { NULL }; -static struct srp_host *srp_add_port(struct srp_device *device, u8 port) +static struct srp_host *srp_add_port(struct srp_device *device, u32 port) { struct srp_host *host; @@ -3915,7 +3915,7 @@ static struct srp_host *srp_add_port(struct srp_device *device, u8 port) device_initialize(&host->dev); host->dev.class = &srp_class; host->dev.parent = device->dev->dev.parent; - if (dev_set_name(&host->dev, "srp-%s-%d", dev_name(&device->dev->dev), + if (dev_set_name(&host->dev, "srp-%s-%u", dev_name(&device->dev->dev), port)) goto put_host; if (device_add(&host->dev)) @@ -3937,7 +3937,7 @@ static void srp_rename_dev(struct ib_device *device, void *client_data) list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) { char name[IB_DEVICE_NAME_MAX + 8]; - snprintf(name, sizeof(name), "srp-%s-%d", + snprintf(name, sizeof(name), "srp-%s-%u", dev_name(&device->dev), host->port); device_rename(&host->dev, name); } @@ -3949,7 +3949,7 @@ static int srp_add_one(struct ib_device *device) struct ib_device_attr *attr = &device->attrs; struct srp_host *host; int mr_page_shift; - unsigned int p; + u32 p; u64 max_pages_per_mr; unsigned int flags = 0; diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index 493e7fd1913e..00b0068fda20 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -120,7 +120,7 @@ struct srp_device { */ struct srp_host { struct srp_device *srp_dev; - u8 port; + u32 port; struct device dev; struct list_head target_list; spinlock_t target_lock; -- cgit v1.2.3 From b300729b77b0b746c4f898332705672eb50d3297 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 22 Sep 2022 14:22:35 +0300 Subject: RDMA/core: Clean up a variable name in ib_create_srq_user() "&srq->pd->usecnt" and "&pd->usecnt" are different names for the same reference count. Use "&pd->usecnt" consistently for both the increment and decrement. Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/YyxFe3Pm0uzRuBkQ@kili Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index f8964c8cf0ad..26b021f43ba4 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1038,7 +1038,7 @@ struct ib_srq *ib_create_srq_user(struct ib_pd *pd, ret = pd->device->ops.create_srq(srq, srq_init_attr, udata); if (ret) { rdma_restrack_put(&srq->res); - atomic_dec(&srq->pd->usecnt); + atomic_dec(&pd->usecnt); if (srq->srq_type == IB_SRQT_XRC && srq->ext.xrc.xrcd) atomic_dec(&srq->ext.xrc.xrcd->usecnt); if (ib_srq_has_cq(srq->srq_type)) -- cgit v1.2.3 From f994ae0a143485fcc02ebf17a329239430306b6c Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Mon, 1 Aug 2022 06:23:30 +0000 Subject: RDMA/rxe: Add send_common_ack() helper Most code in send_ack() and send_atomic_ack() are duplicate, move them to a new helper send_common_ack(). In newer IBA spec, some opcodes require acknowledge with a zero-length read response, with this new helper, we can easily implement it later. Link: https://lore.kernel.org/r/1659335010-2-1-git-send-email-lizhijian@fujitsu.com Signed-off-by: Li Zhijian Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_resp.c | 43 ++++++++++++++---------------------- 1 file changed, 17 insertions(+), 26 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 7c336db5cb54..ed5a09e86417 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -1024,50 +1024,41 @@ finish: return RESPST_CLEANUP; } -static int send_ack(struct rxe_qp *qp, u8 syndrome, u32 psn) + +static int send_common_ack(struct rxe_qp *qp, u8 syndrome, u32 psn, + int opcode, const char *msg) { - int err = 0; + int err; struct rxe_pkt_info ack_pkt; struct sk_buff *skb; - skb = prepare_ack_packet(qp, &ack_pkt, IB_OPCODE_RC_ACKNOWLEDGE, - 0, psn, syndrome); - if (!skb) { - err = -ENOMEM; - goto err1; - } + skb = prepare_ack_packet(qp, &ack_pkt, opcode, 0, psn, syndrome); + if (!skb) + return -ENOMEM; err = rxe_xmit_packet(qp, &ack_pkt, skb); if (err) - pr_err_ratelimited("Failed sending ack\n"); + pr_err_ratelimited("Failed sending %s\n", msg); -err1: return err; } -static int send_atomic_ack(struct rxe_qp *qp, u8 syndrome, u32 psn) +static int send_ack(struct rxe_qp *qp, u8 syndrome, u32 psn) { - int err = 0; - struct rxe_pkt_info ack_pkt; - struct sk_buff *skb; - - skb = prepare_ack_packet(qp, &ack_pkt, IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE, - 0, psn, syndrome); - if (!skb) { - err = -ENOMEM; - goto out; - } + return send_common_ack(qp, syndrome, psn, + IB_OPCODE_RC_ACKNOWLEDGE, "ACK"); +} - err = rxe_xmit_packet(qp, &ack_pkt, skb); - if (err) - pr_err_ratelimited("Failed sending atomic ack\n"); +static int send_atomic_ack(struct rxe_qp *qp, u8 syndrome, u32 psn) +{ + int ret = send_common_ack(qp, syndrome, psn, + IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE, "ATOMIC ACK"); /* have to clear this since it is used to trigger * long read replies */ qp->resp.res = NULL; -out: - return err; + return ret; } static enum resp_states acknowledge(struct rxe_qp *qp, -- cgit v1.2.3 From 58651bbb30f87dab474eff31ab564391aa6ea1f3 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Fri, 5 Aug 2022 13:31:54 -0500 Subject: RDMA/rxe: Set pd early in mr alloc routines Move setting of pd in mr objects ahead of any possible errors so that it will always be set in rxe_mr_cleanup() to avoid seg faults when rxe_put(mr_pd(mr)) is called. Fixes: cf40367961d8 ("RDMA/rxe: Move mr cleanup code to rxe_mr_cleanup()") Link: https://lore.kernel.org/r/20220805183153.32007-2-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Reviewed-by: Li Zhijian Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_loc.h | 6 +++--- drivers/infiniband/sw/rxe/rxe_mr.c | 11 ++++------- drivers/infiniband/sw/rxe/rxe_verbs.c | 12 +++++++----- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index 22f6cc31d1d6..c2a5c8814a48 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -64,10 +64,10 @@ int rxe_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); /* rxe_mr.c */ u8 rxe_get_next_key(u32 last_key); -void rxe_mr_init_dma(struct rxe_pd *pd, int access, struct rxe_mr *mr); -int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, +void rxe_mr_init_dma(int access, struct rxe_mr *mr); +int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, int access, struct rxe_mr *mr); -int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages, struct rxe_mr *mr); +int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr); int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, enum rxe_mr_copy_dir dir); int copy_data(struct rxe_pd *pd, int access, struct rxe_dma_info *dma, diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 6b0c2e7b8145..502e9ada99b3 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -103,17 +103,16 @@ err1: return -ENOMEM; } -void rxe_mr_init_dma(struct rxe_pd *pd, int access, struct rxe_mr *mr) +void rxe_mr_init_dma(int access, struct rxe_mr *mr) { rxe_mr_init(access, mr); - mr->ibmr.pd = &pd->ibpd; mr->access = access; mr->state = RXE_MR_STATE_VALID; mr->type = IB_MR_TYPE_DMA; } -int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, +int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, int access, struct rxe_mr *mr) { struct rxe_map **map; @@ -125,7 +124,7 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, int err; int i; - umem = ib_umem_get(pd->ibpd.device, start, length, access); + umem = ib_umem_get(&rxe->ib_dev, start, length, access); if (IS_ERR(umem)) { pr_warn("%s: Unable to pin memory region err = %d\n", __func__, (int)PTR_ERR(umem)); @@ -175,7 +174,6 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, } } - mr->ibmr.pd = &pd->ibpd; mr->umem = umem; mr->access = access; mr->offset = ib_umem_offset(umem); @@ -194,7 +192,7 @@ err_out: return err; } -int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages, struct rxe_mr *mr) +int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr) { int err; @@ -205,7 +203,6 @@ int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages, struct rxe_mr *mr) if (err) goto err1; - mr->ibmr.pd = &pd->ibpd; mr->max_buf = max_pages; mr->state = RXE_MR_STATE_FREE; mr->type = IB_MR_TYPE_MEM_REG; diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index da1c484798dd..3d37216609e4 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -903,7 +903,9 @@ static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access) return ERR_PTR(-ENOMEM); rxe_get(pd); - rxe_mr_init_dma(pd, access, mr); + mr->ibmr.pd = ibpd; + + rxe_mr_init_dma(access, mr); rxe_finalize(mr); return &mr->ibmr; @@ -928,8 +930,9 @@ static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd, rxe_get(pd); + mr->ibmr.pd = ibpd; - err = rxe_mr_init_user(pd, start, length, iova, access, mr); + err = rxe_mr_init_user(rxe, start, length, iova, access, mr); if (err) goto err3; @@ -938,7 +941,6 @@ static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd, return &mr->ibmr; err3: - rxe_put(pd); rxe_cleanup(mr); err2: return ERR_PTR(err); @@ -962,8 +964,9 @@ static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, } rxe_get(pd); + mr->ibmr.pd = ibpd; - err = rxe_mr_init_fast(pd, max_num_sg, mr); + err = rxe_mr_init_fast(max_num_sg, mr); if (err) goto err2; @@ -972,7 +975,6 @@ static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, return &mr->ibmr; err2: - rxe_put(pd); rxe_cleanup(mr); err1: return ERR_PTR(err); -- cgit v1.2.3 From fda5d0cf8aef12f0a4f714a96a4b2fce039a3e55 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 25 Aug 2022 17:14:47 -0500 Subject: RDMA/rxe: Fix resize_finish() in rxe_queue.c Currently in resize_finish() in rxe_queue.c there is a loop which copies the entries in the original queue into a newly allocated queue. The termination logic for this loop is incorrect. The call to queue_next_index() updates cons but has no effect on whether the queue is empty. So if the queue starts out empty nothing is copied but if it is not then the loop will run forever. This patch changes the loop to compare the value of cons to the original producer index. Fixes: ae6e843fe08d0 ("RDMA/rxe: Add memory barriers to kernel queues") Link: https://lore.kernel.org/r/20220825221446.6512-1-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Reviewed-by: Li Zhijian Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_queue.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_queue.c b/drivers/infiniband/sw/rxe/rxe_queue.c index dbd4971039c0..d6dbf5a0058d 100644 --- a/drivers/infiniband/sw/rxe/rxe_queue.c +++ b/drivers/infiniband/sw/rxe/rxe_queue.c @@ -112,23 +112,25 @@ static int resize_finish(struct rxe_queue *q, struct rxe_queue *new_q, unsigned int num_elem) { enum queue_type type = q->type; + u32 new_prod; u32 prod; u32 cons; if (!queue_empty(q, q->type) && (num_elem < queue_count(q, type))) return -EINVAL; - prod = queue_get_producer(new_q, type); + new_prod = queue_get_producer(new_q, type); + prod = queue_get_producer(q, type); cons = queue_get_consumer(q, type); - while (!queue_empty(q, type)) { - memcpy(queue_addr_from_index(new_q, prod), + while ((prod - cons) & q->index_mask) { + memcpy(queue_addr_from_index(new_q, new_prod), queue_addr_from_index(q, cons), new_q->elem_size); - prod = queue_next_index(new_q, prod); + new_prod = queue_next_index(new_q, new_prod); cons = queue_next_index(q, cons); } - new_q->buf->producer_index = prod; + new_q->buf->producer_index = new_prod; q->buf->consumer_index = cons; /* update private index copies */ -- cgit v1.2.3 From 4bf207d7a54d49637da94dbc00d2c025b74764d1 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 1 Sep 2022 11:20:53 -0300 Subject: net/mlx5: Add IFC bits for mkey ATS Allows telling a mkey to use PCI ATS for DMA that flows through it. Link: https://lore.kernel.org/r/1-v1-bd147097458e+ede-umem_dmabuf_jgg@nvidia.com Signed-off-by: Jason Gunthorpe --- include/linux/mlx5/mlx5_ifc.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 4acd5610e96b..92602e33a82c 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1707,7 +1707,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 steering_format_version[0x4]; u8 create_qp_start_hint[0x18]; - u8 reserved_at_460[0x3]; + u8 reserved_at_460[0x1]; + u8 ats[0x1]; + u8 reserved_at_462[0x1]; u8 log_max_uctx[0x5]; u8 reserved_at_468[0x2]; u8 ipsec_offload[0x1]; @@ -3873,7 +3875,9 @@ struct mlx5_ifc_mkc_bits { u8 lw[0x1]; u8 lr[0x1]; u8 access_mode_1_0[0x2]; - u8 reserved_at_18[0x8]; + u8 reserved_at_18[0x2]; + u8 ma_translation_mode[0x2]; + u8 reserved_at_1c[0x4]; u8 qpn[0x18]; u8 mkey_7_0[0x8]; @@ -11134,7 +11138,8 @@ struct mlx5_ifc_dealloc_memic_out_bits { struct mlx5_ifc_umem_bits { u8 reserved_at_0[0x80]; - u8 reserved_at_80[0x1b]; + u8 ats[0x1]; + u8 reserved_at_81[0x1a]; u8 log_page_size[0x5]; u8 page_offset[0x20]; -- cgit v1.2.3 From 015bda8abd3a6a77656e60b36d499c43a2c0f0a1 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 1 Sep 2022 11:20:54 -0300 Subject: RDMA/core: Add UVERBS_ATTR_RAW_FD This uses the same passing protocol as UVERBS_ATTR_FD (eg len = 0 data_s64 = fd), except that the FD is not required to be a uverbs object and the core code does not covert the FD to an object handle automatically. Access to the int fd is provided by uverbs_get_raw_fd(). Link: https://lore.kernel.org/r/2-v1-bd147097458e+ede-umem_dmabuf_jgg@nvidia.com Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_ioctl.c | 8 ++++++++ include/rdma/uverbs_ioctl.h | 13 +++++++++++++ 2 files changed, 21 insertions(+) diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index 990f0724acc6..d9799706c58e 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -337,6 +337,14 @@ static int uverbs_process_attr(struct bundle_priv *pbundle, break; + case UVERBS_ATTR_TYPE_RAW_FD: + if (uattr->attr_data.reserved || uattr->len != 0 || + uattr->data_s64 < INT_MIN || uattr->data_s64 > INT_MAX) + return -EINVAL; + /* _uverbs_get_const_signed() is the accessor */ + e->ptr_attr.data = uattr->data_s64; + break; + case UVERBS_ATTR_TYPE_IDRS_ARRAY: return uverbs_process_idrs_array(pbundle, attr_uapi, &e->objs_arr_attr, uattr, diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index 23bb404aba12..9d45a5b20316 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -24,6 +24,7 @@ enum uverbs_attr_type { UVERBS_ATTR_TYPE_PTR_OUT, UVERBS_ATTR_TYPE_IDR, UVERBS_ATTR_TYPE_FD, + UVERBS_ATTR_TYPE_RAW_FD, UVERBS_ATTR_TYPE_ENUM_IN, UVERBS_ATTR_TYPE_IDRS_ARRAY, }; @@ -521,6 +522,11 @@ struct uapi_definition { .u.obj.access = _access, \ __VA_ARGS__ } }) +#define UVERBS_ATTR_RAW_FD(_attr_id, ...) \ + (&(const struct uverbs_attr_def){ \ + .id = (_attr_id), \ + .attr = { .type = UVERBS_ATTR_TYPE_RAW_FD, __VA_ARGS__ } }) + #define UVERBS_ATTR_PTR_IN(_attr_id, _type, ...) \ (&(const struct uverbs_attr_def){ \ .id = _attr_id, \ @@ -999,4 +1005,11 @@ _uverbs_get_const_unsigned(u64 *to, uverbs_get_const_default_unsigned(_to, _attrs_bundle, _idx, \ _default)) +static inline int +uverbs_get_raw_fd(int *to, const struct uverbs_attr_bundle *attrs_bundle, + size_t idx) +{ + return uverbs_get_const_signed(to, attrs_bundle, idx); +} + #endif -- cgit v1.2.3 From 9af859c58d0f169ead0ed95204cdd891b0ee623a Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 1 Sep 2022 11:20:55 -0300 Subject: RDMA/mlx5: Add support for dmabuf to devx umem This is modeled after the similar EFA enablement in commit 66f4817b5712 ("RDMA/efa: Add support for dmabuf memory regions"). Like EFA there is no support for revocation so we simply call the ib_umem_dmabuf_get_pinned() to obtain a umem instead of the normal ib_umem_get(). Everything else stays the same. Link: https://lore.kernel.org/r/3-v1-bd147097458e+ede-umem_dmabuf_jgg@nvidia.com Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/devx.c | 24 +++++++++++++++++++++--- include/uapi/rdma/mlx5_user_ioctl_cmds.h | 1 + 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index adefff89fb39..a41e8d582f5b 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -2183,9 +2183,25 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext, if (err) return err; - obj->umem = ib_umem_get(&dev->ib_dev, addr, size, access); - if (IS_ERR(obj->umem)) - return PTR_ERR(obj->umem); + if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_DMABUF_FD)) { + struct ib_umem_dmabuf *umem_dmabuf; + int dmabuf_fd; + + err = uverbs_get_raw_fd(&dmabuf_fd, attrs, + MLX5_IB_ATTR_DEVX_UMEM_REG_DMABUF_FD); + if (err) + return -EFAULT; + + umem_dmabuf = ib_umem_dmabuf_get_pinned( + &dev->ib_dev, addr, size, dmabuf_fd, access); + if (IS_ERR(umem_dmabuf)) + return PTR_ERR(umem_dmabuf); + obj->umem = &umem_dmabuf->umem; + } else { + obj->umem = ib_umem_get(&dev->ib_dev, addr, size, access); + if (IS_ERR(obj->umem)) + return PTR_ERR(obj->umem); + } return 0; } @@ -2835,6 +2851,8 @@ DECLARE_UVERBS_NAMED_METHOD( UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_LEN, UVERBS_ATTR_TYPE(u64), UA_MANDATORY), + UVERBS_ATTR_RAW_FD(MLX5_IB_ATTR_DEVX_UMEM_REG_DMABUF_FD, + UA_OPTIONAL), UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS, enum ib_access_flags), UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_PGSZ_BITMAP, diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h index 3bee490eb585..595edad03dfe 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h +++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h @@ -174,6 +174,7 @@ enum mlx5_ib_devx_umem_reg_attrs { MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS, MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID, MLX5_IB_ATTR_DEVX_UMEM_REG_PGSZ_BITMAP, + MLX5_IB_ATTR_DEVX_UMEM_REG_DMABUF_FD, }; enum mlx5_ib_devx_umem_dereg_attrs { -- cgit v1.2.3 From 72b2f7608a59727e7c2e5b11cff2749c2c080fac Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 1 Sep 2022 11:20:56 -0300 Subject: RDMA/mlx5: Enable ATS support for MRs and umems For mlx5 if ATS is enabled in the PCI config then the device will use ATS requests for only certain DMA operations. This has to be opted in by the SW side based on the mkey or umem settings. ATS slows down the PCI performance, so it should only be set in cases when it is needed. All of these cases revolve around optimizing PCI P2P transfers and avoiding bad cases where the bus just doesn't work. Link: https://lore.kernel.org/r/4-v1-bd147097458e+ede-umem_dmabuf_jgg@nvidia.com Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/devx.c | 37 ++++++++++++++++++++---------------- drivers/infiniband/hw/mlx5/mlx5_ib.h | 36 +++++++++++++++++++++++++++++++++++ drivers/infiniband/hw/mlx5/mr.c | 5 ++++- 3 files changed, 61 insertions(+), 17 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index a41e8d582f5b..2211a0be16f3 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -2160,26 +2160,17 @@ err: static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext, struct uverbs_attr_bundle *attrs, - struct devx_umem *obj) + struct devx_umem *obj, u32 access_flags) { u64 addr; size_t size; - u32 access; int err; if (uverbs_copy_from(&addr, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR) || uverbs_copy_from(&size, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_LEN)) return -EFAULT; - err = uverbs_get_flags32(&access, attrs, - MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS, - IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_WRITE | - IB_ACCESS_REMOTE_READ); - if (err) - return err; - - err = ib_check_mr_access(&dev->ib_dev, access); + err = ib_check_mr_access(&dev->ib_dev, access_flags); if (err) return err; @@ -2193,12 +2184,12 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext, return -EFAULT; umem_dmabuf = ib_umem_dmabuf_get_pinned( - &dev->ib_dev, addr, size, dmabuf_fd, access); + &dev->ib_dev, addr, size, dmabuf_fd, access_flags); if (IS_ERR(umem_dmabuf)) return PTR_ERR(umem_dmabuf); obj->umem = &umem_dmabuf->umem; } else { - obj->umem = ib_umem_get(&dev->ib_dev, addr, size, access); + obj->umem = ib_umem_get(&dev->ib_dev, addr, size, access_flags); if (IS_ERR(obj->umem)) return PTR_ERR(obj->umem); } @@ -2240,7 +2231,8 @@ static unsigned int devx_umem_find_best_pgsize(struct ib_umem *umem, static int devx_umem_reg_cmd_alloc(struct mlx5_ib_dev *dev, struct uverbs_attr_bundle *attrs, struct devx_umem *obj, - struct devx_umem_reg_cmd *cmd) + struct devx_umem_reg_cmd *cmd, + int access) { unsigned long pgsz_bitmap; unsigned int page_size; @@ -2289,6 +2281,9 @@ static int devx_umem_reg_cmd_alloc(struct mlx5_ib_dev *dev, MLX5_SET(umem, umem, page_offset, ib_umem_dma_offset(obj->umem, page_size)); + if (mlx5_umem_needs_ats(dev, obj->umem, access)) + MLX5_SET(umem, umem, ats, 1); + mlx5_ib_populate_pas(obj->umem, page_size, mtt, (obj->umem->writable ? MLX5_IB_MTT_WRITE : 0) | MLX5_IB_MTT_READ); @@ -2306,20 +2301,30 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)( struct mlx5_ib_ucontext *c = rdma_udata_to_drv_context( &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext); struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device); + int access_flags; int err; if (!c->devx_uid) return -EINVAL; + err = uverbs_get_flags32(&access_flags, attrs, + MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS, + IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_WRITE | + IB_ACCESS_REMOTE_READ | + IB_ACCESS_RELAXED_ORDERING); + if (err) + return err; + obj = kzalloc(sizeof(struct devx_umem), GFP_KERNEL); if (!obj) return -ENOMEM; - err = devx_umem_get(dev, &c->ibucontext, attrs, obj); + err = devx_umem_get(dev, &c->ibucontext, attrs, obj, access_flags); if (err) goto err_obj_free; - err = devx_umem_reg_cmd_alloc(dev, attrs, obj, &cmd); + err = devx_umem_reg_cmd_alloc(dev, attrs, obj, &cmd, access_flags); if (err) goto err_umem_release; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 2e2ad3918385..7e2c4a378220 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1550,4 +1550,40 @@ static inline bool rt_supported(int ts_cap) return ts_cap == MLX5_TIMESTAMP_FORMAT_CAP_REAL_TIME || ts_cap == MLX5_TIMESTAMP_FORMAT_CAP_FREE_RUNNING_AND_REAL_TIME; } + +/* + * PCI Peer to Peer is a trainwreck. If no switch is present then things + * sometimes work, depending on the pci_distance_p2p logic for excluding broken + * root complexes. However if a switch is present in the path, then things get + * really ugly depending on how the switch is setup. This table assumes that the + * root complex is strict and is validating that all req/reps are matches + * perfectly - so any scenario where it sees only half the transaction is a + * failure. + * + * CR/RR/DT ATS RO P2P + * 00X X X OK + * 010 X X fails (request is routed to root but root never sees comp) + * 011 0 X fails (request is routed to root but root never sees comp) + * 011 1 X OK + * 10X X 1 OK + * 101 X 0 fails (completion is routed to root but root didn't see req) + * 110 X 0 SLOW + * 111 0 0 SLOW + * 111 1 0 fails (completion is routed to root but root didn't see req) + * 111 1 1 OK + * + * Unfortunately we cannot reliably know if a switch is present or what the + * CR/RR/DT ACS settings are, as in a VM that is all hidden. Assume that + * CR/RR/DT is 111 if the ATS cap is enabled and follow the last three rows. + * + * For now assume if the umem is a dma_buf then it is P2P. + */ +static inline bool mlx5_umem_needs_ats(struct mlx5_ib_dev *dev, + struct ib_umem *umem, int access_flags) +{ + if (!MLX5_CAP_GEN(dev->mdev, ats) || !umem->is_dmabuf) + return false; + return access_flags & IB_ACCESS_RELAXED_ORDERING; +} + #endif /* MLX5_IB_H */ diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 4fcb653b35bb..410cc5fd2523 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -935,7 +935,8 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, * cache then synchronously create an uncached one. */ if (!ent || ent->limit == 0 || - !mlx5r_umr_can_reconfig(dev, 0, access_flags)) { + !mlx5r_umr_can_reconfig(dev, 0, access_flags) || + mlx5_umem_needs_ats(dev, umem, access_flags)) { mutex_lock(&dev->slow_path_mutex); mr = reg_create(pd, umem, iova, access_flags, page_size, false); mutex_unlock(&dev->slow_path_mutex); @@ -1016,6 +1017,8 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem, MLX5_SET(mkc, mkc, translations_octword_size, get_octo_len(iova, umem->length, mr->page_shift)); MLX5_SET(mkc, mkc, log_page_size, mr->page_shift); + if (mlx5_umem_needs_ats(dev, umem, access_flags)) + MLX5_SET(mkc, mkc, ma_translation_mode, 1); if (populate) { MLX5_SET(create_mkey_in, in, translations_octword_actual_size, get_octo_len(iova, umem->length, mr->page_shift)); -- cgit v1.2.3 From 6c5e683925cf19d36033f3e9e9d90755f034614e Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Tue, 13 Sep 2022 17:27:17 -0500 Subject: RDMA/rxe: Remove redundant num_sge fields In include/uapi/rdma/rdma_user_rxe.h there are redundant copies of num_sge in the rxe_send_wr, rxe_recv_wqe, and rxe_dma_info. Only the ones in rxe_dma_info are actually used by the rxe kernel driver. The userspace would set these values, but the kernel never read them. This change has no affect on the current ABI and new or old versions of rdma-core operate correctly with new or old versions of the kernel rxe driver. Link: https://lore.kernel.org/r/20220913222716.18335-1-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_verbs.c | 2 -- include/uapi/rdma/rdma_user_rxe.h | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 3d37216609e4..88825edc7dce 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -262,7 +262,6 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr) recv_wqe = queue_producer_addr(rq->queue, QUEUE_TYPE_TO_DRIVER); recv_wqe->wr_id = ibwr->wr_id; - recv_wqe->num_sge = num_sge; memcpy(recv_wqe->dma.sge, ibwr->sg_list, num_sge * sizeof(struct ib_sge)); @@ -526,7 +525,6 @@ static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr, const struct ib_send_wr *ibwr) { wr->wr_id = ibwr->wr_id; - wr->num_sge = ibwr->num_sge; wr->opcode = ibwr->opcode; wr->send_flags = ibwr->send_flags; diff --git a/include/uapi/rdma/rdma_user_rxe.h b/include/uapi/rdma/rdma_user_rxe.h index f09c5c9e3dd5..73f679dfd2df 100644 --- a/include/uapi/rdma/rdma_user_rxe.h +++ b/include/uapi/rdma/rdma_user_rxe.h @@ -74,7 +74,7 @@ struct rxe_av { struct rxe_send_wr { __aligned_u64 wr_id; - __u32 num_sge; + __u32 reserved; __u32 opcode; __u32 send_flags; union { @@ -166,7 +166,7 @@ struct rxe_send_wqe { struct rxe_recv_wqe { __aligned_u64 wr_id; - __u32 num_sge; + __u32 reserved; __u32 padding; struct rxe_dma_info dma; }; -- cgit v1.2.3 From 78657a445ca7603024348781c921f8ecaee10a49 Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Sat, 24 Sep 2022 17:14:57 +0800 Subject: IB/rdmavt: Add __init/__exit annotations to module init/exit funcs Add missing __init/__exit annotations to module init/exit funcs. Fixes: 0194621b2253 ("IB/rdmavt: Create module framework and handle driver registration") Link: https://lore.kernel.org/r/20220924091457.52446-1-xiujianfeng@huawei.com Signed-off-by: Xiu Jianfeng Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rdmavt/vt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 59481ae39505..d61f8de7f21c 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -15,7 +15,7 @@ MODULE_LICENSE("Dual BSD/GPL"); MODULE_DESCRIPTION("RDMA Verbs Transport Library"); -static int rvt_init(void) +static int __init rvt_init(void) { int ret = rvt_driver_cq_init(); @@ -26,7 +26,7 @@ static int rvt_init(void) } module_init(rvt_init); -static void rvt_cleanup(void) +static void __exit rvt_cleanup(void) { rvt_cq_exit(); } -- cgit v1.2.3 From d8913213ffabe64cb7cfd20d59ef12dcecb47fd7 Mon Sep 17 00:00:00 2001 From: Guofeng Yue Date: Thu, 22 Sep 2022 20:33:04 +0800 Subject: RDMA/hns: Cleanup for a spelling error of Asynchronous Fixed a spelling error for Asynchronous. Link: https://lore.kernel.org/r/20220922123315.3732205-2-xuhaoyue1@hisilicon.com Signed-off-by: Guofeng Yue Signed-off-by: Haoyue Xu Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 437d5dd4e648..a9dc28fd2962 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -6069,7 +6069,7 @@ static irqreturn_t hns_roce_v2_msix_interrupt_eq(int irq, void *eq_ptr) /* Completion event interrupt */ int_work = hns_roce_v2_ceq_int(hr_dev, eq); else - /* Asychronous event interrupt */ + /* Asynchronous event interrupt */ int_work = hns_roce_v2_aeq_int(hr_dev, eq); return IRQ_RETVAL(int_work); -- cgit v1.2.3 From 77c3e303f691bb3d011426e5d8b5dcecd9b89c16 Mon Sep 17 00:00:00 2001 From: Guofeng Yue Date: Thu, 22 Sep 2022 20:33:05 +0800 Subject: RDMA/hns: Remove unnecessary braces for single statement blocks Braces {} are not necessary for single statement blocks. Link: https://lore.kernel.org/r/20220922123315.3732205-3-xuhaoyue1@hisilicon.com Signed-off-by: Guofeng Yue Signed-off-by: Haoyue Xu Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 9de3a522980a..82948ae3e52b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -846,9 +846,8 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) hns_roce_init_cq_table(hr_dev); - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) { + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) hns_roce_init_srq_table(hr_dev); - } return 0; -- cgit v1.2.3 From 064fd299a70bae37a3c4d49ad6eb1766e57e4c24 Mon Sep 17 00:00:00 2001 From: Guofeng Yue Date: Thu, 22 Sep 2022 20:33:06 +0800 Subject: RDMA/hns: Remove unnecessary brackets when getting point Delete () when using & to obtain an address. Link: https://lore.kernel.org/r/20220922123315.3732205-4-xuhaoyue1@hisilicon.com Signed-off-by: Guofeng Yue Signed-off-by: Haoyue Xu Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 6 +++--- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index a9dc28fd2962..07a9988d7505 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -4613,7 +4613,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, hr_reg_clear(qpc_mask, QPC_DQPN); } - memcpy(&(context->dmac), dmac, sizeof(u32)); + memcpy(&context->dmac, dmac, sizeof(u32)); hr_reg_write(context, QPC_DMAC_H, *((u16 *)(&dmac[4]))); qpc_mask->dmac = 0; hr_reg_clear(qpc_mask, QPC_DMAC_H); @@ -5904,12 +5904,12 @@ static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev, if (!irq_work) return; - INIT_WORK(&(irq_work->work), hns_roce_irq_work_handle); + INIT_WORK(&irq_work->work, hns_roce_irq_work_handle); irq_work->hr_dev = hr_dev; irq_work->event_type = eq->event_type; irq_work->sub_type = eq->sub_type; irq_work->queue_num = queue_num; - queue_work(hr_dev->irq_workq, &(irq_work->work)); + queue_work(hr_dev->irq_workq, &irq_work->work); } static void update_eq_db(struct hns_roce_eq *eq) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index ae29780dd63a..ca63463e7d4e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -406,6 +406,7 @@ enum hns_roce_v2_qp_state { struct hns_roce_v2_qp_context_ex { __le32 data[64]; }; + struct hns_roce_v2_qp_context { __le32 byte_4_sqpn_tst; __le32 wqe_sge_ba; -- cgit v1.2.3 From bb4874af19686019d0dafd58726ed7b4058663ca Mon Sep 17 00:00:00 2001 From: Yixing Liu Date: Thu, 22 Sep 2022 20:33:07 +0800 Subject: RDMA/hns: Remove redundant 'attr_mask' in modify_qp_init_to_init() The attr_mask variable is not used in the function, so remove it. Link: https://lore.kernel.org/r/20220922123315.3732205-5-xuhaoyue1@hisilicon.com Signed-off-by: Yixing Liu Signed-off-by: Haoyue Xu Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 07a9988d7505..4931f2a8a4af 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -4300,7 +4300,6 @@ static inline int get_pdn(struct ib_pd *ib_pd) static void modify_qp_reset_to_init(struct ib_qp *ibqp, const struct ib_qp_attr *attr, - int attr_mask, struct hns_roce_v2_qp_context *context, struct hns_roce_v2_qp_context *qpc_mask) { @@ -4364,7 +4363,7 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp, } static void modify_qp_init_to_init(struct ib_qp *ibqp, - const struct ib_qp_attr *attr, int attr_mask, + const struct ib_qp_attr *attr, struct hns_roce_v2_qp_context *context, struct hns_roce_v2_qp_context *qpc_mask) { @@ -5015,11 +5014,9 @@ static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp, if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { memset(qpc_mask, 0, hr_dev->caps.qpc_sz); - modify_qp_reset_to_init(ibqp, attr, attr_mask, context, - qpc_mask); + modify_qp_reset_to_init(ibqp, attr, context, qpc_mask); } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) { - modify_qp_init_to_init(ibqp, attr, attr_mask, context, - qpc_mask); + modify_qp_init_to_init(ibqp, attr, context, qpc_mask); } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) { ret = modify_qp_init_to_rtr(ibqp, attr, attr_mask, context, qpc_mask); -- cgit v1.2.3 From be1eeb667eb748391b1c8158678fe4d892187793 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Thu, 22 Sep 2022 20:33:08 +0800 Subject: RDMA/hns: Remove redundant 'bt_level' for hem_list_alloc_item() The 'bt_level' parameter is not used in hem_list_alloc_item(), so remove it. Link: https://lore.kernel.org/r/20220922123315.3732205-6-xuhaoyue1@hisilicon.com Signed-off-by: Yunsheng Lin Signed-off-by: Haoyue Xu Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hem.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index ce1a0d2792a3..e7c73ff14ae0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -986,7 +986,7 @@ struct hns_roce_hem_head { static struct hns_roce_hem_item * hem_list_alloc_item(struct hns_roce_dev *hr_dev, int start, int end, int count, - bool exist_bt, int bt_level) + bool exist_bt) { struct hns_roce_hem_item *hem; @@ -1195,7 +1195,7 @@ static int hem_list_alloc_mid_bt(struct hns_roce_dev *hr_dev, start_aligned = (distance / step) * step + r->offset; end = min_t(int, start_aligned + step - 1, max_ofs); cur = hem_list_alloc_item(hr_dev, start_aligned, end, unit, - true, level); + true); if (!cur) { ret = -ENOMEM; goto err_exit; @@ -1247,7 +1247,7 @@ alloc_root_hem(struct hns_roce_dev *hr_dev, int unit, int *max_ba_num, /* indicate to last region */ r = ®ions[region_cnt - 1]; hem = hem_list_alloc_item(hr_dev, offset, r->offset + r->count - 1, - ba_num, true, 0); + ba_num, true); if (!hem) return ERR_PTR(-ENOMEM); @@ -1264,7 +1264,7 @@ static int alloc_fake_root_bt(struct hns_roce_dev *hr_dev, void *cpu_base, struct hns_roce_hem_item *hem; hem = hem_list_alloc_item(hr_dev, r->offset, r->offset + r->count - 1, - r->count, false, 0); + r->count, false); if (!hem) return -ENOMEM; -- cgit v1.2.3 From 29dc063596772368aa896f293f5c5aef06381712 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Thu, 22 Sep 2022 20:33:09 +0800 Subject: RDMA/hns: Remove redundant 'use_lowmem' argument from hns_roce_init_hem_table() As hns_roce_init_hem_table() is always called with use_lowmem being '1', and table->lowmem is set according to that argument, so remove table->lowmem too. Also, as the table->lowmem is used to indicate a dma buffer is allocated with GFP_HIGHUSER or GFP_KERNEL, and calling dma_alloc_coherent() with GFP_KERNEL seems like a common pattern. Link: https://lore.kernel.org/r/20220922123315.3732205-7-xuhaoyue1@hisilicon.com Signed-off-by: Yunsheng Lin Signed-off-by: Haoyue Xu Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 1 - drivers/infiniband/hw/hns/hns_roce_hem.c | 12 +++--------- drivers/infiniband/hw/hns/hns_roce_hem.h | 3 +-- drivers/infiniband/hw/hns/hns_roce_main.c | 20 ++++++++++---------- 4 files changed, 14 insertions(+), 22 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 6fb6080d2506..32cc116b3a6d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -240,7 +240,6 @@ struct hns_roce_hem_table { /* Single obj size */ unsigned long obj_size; unsigned long table_chunk_size; - int lowmem; struct mutex mutex; struct hns_roce_hem **hem; u64 **bt_l1; diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index e7c73ff14ae0..e8acd2839d7d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -455,7 +455,7 @@ static int alloc_mhop_hem(struct hns_roce_dev *hr_dev, * alloc bt space chunk for MTT/CQE. */ size = table->type < HEM_TYPE_MTT ? mhop->buf_chunk_size : bt_size; - flag = (table->lowmem ? GFP_KERNEL : GFP_HIGHUSER) | __GFP_NOWARN; + flag = GFP_KERNEL | __GFP_NOWARN; table->hem[index->buf] = hns_roce_alloc_hem(hr_dev, size >> PAGE_SHIFT, size, flag); if (!table->hem[index->buf]) { @@ -588,8 +588,7 @@ int hns_roce_table_get(struct hns_roce_dev *hr_dev, table->hem[i] = hns_roce_alloc_hem(hr_dev, table->table_chunk_size >> PAGE_SHIFT, table->table_chunk_size, - (table->lowmem ? GFP_KERNEL : - GFP_HIGHUSER) | __GFP_NOWARN); + GFP_KERNEL | __GFP_NOWARN); if (!table->hem[i]) { ret = -ENOMEM; goto out; @@ -725,9 +724,6 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev, int length; int i, j; - if (!table->lowmem) - return NULL; - mutex_lock(&table->mutex); if (!hns_roce_check_whether_mhop(hr_dev, table->type)) { @@ -783,8 +779,7 @@ out: int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, u32 type, - unsigned long obj_size, unsigned long nobj, - int use_lowmem) + unsigned long obj_size, unsigned long nobj) { unsigned long obj_per_chunk; unsigned long num_hem; @@ -861,7 +856,6 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev, table->type = type; table->num_hem = num_hem; table->obj_size = obj_size; - table->lowmem = use_lowmem; mutex_init(&table->mutex); return 0; diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h index 2d84a6b3f05d..6b888049e9a0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.h +++ b/drivers/infiniband/hw/hns/hns_roce_hem.h @@ -111,8 +111,7 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev, dma_addr_t *dma_handle); int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, u32 type, - unsigned long obj_size, unsigned long nobj, - int use_lowmem); + unsigned long obj_size, unsigned long nobj); void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table); void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 82948ae3e52b..498d7c28c56c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -659,7 +659,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) ret = hns_roce_init_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table, HEM_TYPE_MTPT, hr_dev->caps.mtpt_entry_sz, - hr_dev->caps.num_mtpts, 1); + hr_dev->caps.num_mtpts); if (ret) { dev_err(dev, "Failed to init MTPT context memory, aborting.\n"); return ret; @@ -667,7 +667,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) ret = hns_roce_init_hem_table(hr_dev, &hr_dev->qp_table.qp_table, HEM_TYPE_QPC, hr_dev->caps.qpc_sz, - hr_dev->caps.num_qps, 1); + hr_dev->caps.num_qps); if (ret) { dev_err(dev, "Failed to init QP context memory, aborting.\n"); goto err_unmap_dmpt; @@ -677,7 +677,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) HEM_TYPE_IRRL, hr_dev->caps.irrl_entry_sz * hr_dev->caps.max_qp_init_rdma, - hr_dev->caps.num_qps, 1); + hr_dev->caps.num_qps); if (ret) { dev_err(dev, "Failed to init irrl_table memory, aborting.\n"); goto err_unmap_qp; @@ -689,7 +689,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) HEM_TYPE_TRRL, hr_dev->caps.trrl_entry_sz * hr_dev->caps.max_qp_dest_rdma, - hr_dev->caps.num_qps, 1); + hr_dev->caps.num_qps); if (ret) { dev_err(dev, "Failed to init trrl_table memory, aborting.\n"); @@ -699,7 +699,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) ret = hns_roce_init_hem_table(hr_dev, &hr_dev->cq_table.table, HEM_TYPE_CQC, hr_dev->caps.cqc_entry_sz, - hr_dev->caps.num_cqs, 1); + hr_dev->caps.num_cqs); if (ret) { dev_err(dev, "Failed to init CQ context memory, aborting.\n"); goto err_unmap_trrl; @@ -709,7 +709,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) ret = hns_roce_init_hem_table(hr_dev, &hr_dev->srq_table.table, HEM_TYPE_SRQC, hr_dev->caps.srqc_entry_sz, - hr_dev->caps.num_srqs, 1); + hr_dev->caps.num_srqs); if (ret) { dev_err(dev, "Failed to init SRQ context memory, aborting.\n"); @@ -722,7 +722,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) &hr_dev->qp_table.sccc_table, HEM_TYPE_SCCC, hr_dev->caps.sccc_sz, - hr_dev->caps.num_qps, 1); + hr_dev->caps.num_qps); if (ret) { dev_err(dev, "Failed to init SCC context memory, aborting.\n"); @@ -734,7 +734,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) ret = hns_roce_init_hem_table(hr_dev, &hr_dev->qpc_timer_table, HEM_TYPE_QPC_TIMER, hr_dev->caps.qpc_timer_entry_sz, - hr_dev->caps.num_qpc_timer, 1); + hr_dev->caps.num_qpc_timer); if (ret) { dev_err(dev, "Failed to init QPC timer memory, aborting.\n"); @@ -746,7 +746,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) ret = hns_roce_init_hem_table(hr_dev, &hr_dev->cqc_timer_table, HEM_TYPE_CQC_TIMER, hr_dev->caps.cqc_timer_entry_sz, - hr_dev->caps.cqc_timer_bt_num, 1); + hr_dev->caps.cqc_timer_bt_num); if (ret) { dev_err(dev, "Failed to init CQC timer memory, aborting.\n"); @@ -758,7 +758,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) ret = hns_roce_init_hem_table(hr_dev, &hr_dev->gmv_table, HEM_TYPE_GMV, hr_dev->caps.gmv_entry_sz, - hr_dev->caps.gmv_entry_num, 1); + hr_dev->caps.gmv_entry_num); if (ret) { dev_err(dev, "failed to init gmv table memory, ret = %d\n", -- cgit v1.2.3 From 5f652387c5423a82453c5cb446a88834bf41a94b Mon Sep 17 00:00:00 2001 From: Chengchang Tang Date: Thu, 22 Sep 2022 20:33:10 +0800 Subject: RDMA/hns: Remove redundant 'phy_addr' in hns_roce_hem_list_find_mtt() This parameter has never been used. Remove it to simplify the function. Link: https://lore.kernel.org/r/20220922123315.3732205-8-xuhaoyue1@hisilicon.com Signed-off-by: Chengchang Tang Signed-off-by: Yunsheng Lin Signed-off-by: Haoyue Xu Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hem.c | 7 +------ drivers/infiniband/hw/hns/hns_roce_hem.h | 2 +- drivers/infiniband/hw/hns/hns_roce_mr.c | 4 ++-- 3 files changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index e8acd2839d7d..d0b75a2234d3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -1462,19 +1462,17 @@ void hns_roce_hem_list_init(struct hns_roce_hem_list *hem_list) void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_hem_list *hem_list, - int offset, int *mtt_cnt, u64 *phy_addr) + int offset, int *mtt_cnt) { struct list_head *head = &hem_list->btm_bt; struct hns_roce_hem_item *hem, *temp_hem; void *cpu_base = NULL; - u64 phy_base = 0; int nr = 0; list_for_each_entry_safe(hem, temp_hem, head, sibling) { if (hem_list_page_is_in_range(hem, offset)) { nr = offset - hem->start; cpu_base = hem->addr + nr * BA_BYTE_LEN; - phy_base = hem->dma_addr + nr * BA_BYTE_LEN; nr = hem->end + 1 - offset; break; } @@ -1483,8 +1481,5 @@ void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev, if (mtt_cnt) *mtt_cnt = nr; - if (phy_addr) - *phy_addr = phy_base; - return cpu_base; } diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h index 6b888049e9a0..7d23d3c51da4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.h +++ b/drivers/infiniband/hw/hns/hns_roce_hem.h @@ -131,7 +131,7 @@ void hns_roce_hem_list_release(struct hns_roce_dev *hr_dev, struct hns_roce_hem_list *hem_list); void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_hem_list *hem_list, - int offset, int *mtt_cnt, u64 *phy_addr); + int offset, int *mtt_cnt); static inline void hns_roce_hem_first(struct hns_roce_hem *hem, struct hns_roce_hem_iter *iter) diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index dedfa56f5773..93615f2556b2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -585,7 +585,7 @@ static int mtr_map_region(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, while (offset < end && npage < max_count) { count = 0; mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list, - offset, &count, NULL); + offset, &count); if (!mtts) return -ENOBUFS; @@ -834,7 +834,7 @@ int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, mtt_count = 0; mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list, start_index + total, - &mtt_count, NULL); + &mtt_count); if (!mtts || !mtt_count) goto done; -- cgit v1.2.3 From 5436272c8cf4eb420fdb3926ec07560051c8fd11 Mon Sep 17 00:00:00 2001 From: Yangyang Li Date: Thu, 22 Sep 2022 20:33:11 +0800 Subject: RDMA/hns: Remove redundant 'num_mtt_segs' and 'max_extend_sg' The num_mtt_segs and max_extend_sg used to be used for HIP06, remove them since the HIP06 code has been removed. Link: https://lore.kernel.org/r/20220922123315.3732205-9-xuhaoyue1@hisilicon.com Signed-off-by: Yangyang Li Signed-off-by: Haoyue Xu Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 4 ++-- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 3 --- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 4 +--- 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 32cc116b3a6d..edd19970931d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -724,7 +724,7 @@ struct hns_roce_caps { u32 max_sq_sg; u32 max_sq_inline; u32 max_rq_sg; - u32 max_extend_sg; + u32 rsv0; u32 num_qps; u32 num_pi_qps; u32 reserved_qps; @@ -748,7 +748,7 @@ struct hns_roce_caps { int num_comp_vectors; int num_other_vectors; u32 num_mtpts; - u32 num_mtt_segs; + u32 rsv1; u32 num_srqwqe_segs; u32 num_idx_segs; int reserved_mrws; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 4931f2a8a4af..f8b747cc4e79 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1966,7 +1966,6 @@ static void set_default_caps(struct hns_roce_dev *hr_dev) caps->min_cqes = HNS_ROCE_MIN_CQE_NUM; caps->max_cqes = HNS_ROCE_V2_MAX_CQE_NUM; caps->max_sq_sg = HNS_ROCE_V2_MAX_SQ_SGE_NUM; - caps->max_extend_sg = HNS_ROCE_V2_MAX_EXTEND_SGE_NUM; caps->max_rq_sg = HNS_ROCE_V2_MAX_RQ_SGE_NUM; caps->num_uars = HNS_ROCE_V2_UAR_NUM; @@ -2185,7 +2184,6 @@ static void apply_func_caps(struct hns_roce_dev *hr_dev) caps->num_xrcds = HNS_ROCE_V2_MAX_XRCD_NUM; caps->reserved_xrcds = HNS_ROCE_V2_RSV_XRCD_NUM; - caps->num_mtt_segs = HNS_ROCE_V2_MAX_MTT_SEGS; caps->num_srqwqe_segs = HNS_ROCE_V2_MAX_SRQWQE_SEGS; caps->num_idx_segs = HNS_ROCE_V2_MAX_IDX_SEGS; @@ -2272,7 +2270,6 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) caps->max_sq_inline = le16_to_cpu(resp_a->max_sq_inline); caps->max_rq_sg = le16_to_cpu(resp_a->max_rq_sg); caps->max_rq_sg = roundup_pow_of_two(caps->max_rq_sg); - caps->max_extend_sg = le32_to_cpu(resp_a->max_extend_sg); caps->num_qpc_timer = le16_to_cpu(resp_a->num_qpc_timer); caps->max_srq_sges = le16_to_cpu(resp_a->max_srq_sges); caps->max_srq_sges = roundup_pow_of_two(caps->max_srq_sges); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index ca63463e7d4e..7a613cbe2ad6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -46,7 +46,6 @@ #define HNS_ROCE_V2_MAX_CQE_NUM 0x400000 #define HNS_ROCE_V2_MAX_RQ_SGE_NUM 64 #define HNS_ROCE_V2_MAX_SQ_SGE_NUM 64 -#define HNS_ROCE_V2_MAX_EXTEND_SGE_NUM 0x200000 #define HNS_ROCE_V2_MAX_SQ_INLINE 0x20 #define HNS_ROCE_V3_MAX_SQ_INLINE 0x400 #define HNS_ROCE_V2_MAX_RC_INL_INN_SZ 32 @@ -55,7 +54,6 @@ #define HNS_ROCE_V2_AEQE_VEC_NUM 1 #define HNS_ROCE_V2_ABNORMAL_VEC_NUM 1 #define HNS_ROCE_V2_MAX_MTPT_NUM 0x100000 -#define HNS_ROCE_V2_MAX_MTT_SEGS 0x1000000 #define HNS_ROCE_V2_MAX_SRQWQE_SEGS 0x1000000 #define HNS_ROCE_V2_MAX_IDX_SEGS 0x1000000 #define HNS_ROCE_V2_MAX_PD_NUM 0x1000000 @@ -1175,7 +1173,7 @@ struct hns_roce_query_pf_caps_a { __le16 max_sq_sg; __le16 max_sq_inline; __le16 max_rq_sg; - __le32 max_extend_sg; + __le32 rsv0; __le16 num_qpc_timer; __le16 num_cqc_timer; __le16 max_srq_sges; -- cgit v1.2.3 From 6649b4a1c43c6ad153c3ff0c1754a436aa6b6390 Mon Sep 17 00:00:00 2001 From: Yangyang Li Date: Thu, 22 Sep 2022 20:33:12 +0800 Subject: RDMA/hns: Remove redundant 'max_srq_desc_sz' in caps The max_srq_desc_sz is defined in the code, but never used, so delete this redundant variable. Link: https://lore.kernel.org/r/20220922123315.3732205-10-xuhaoyue1@hisilicon.com Signed-off-by: Yangyang Li Signed-off-by: Haoyue Xu Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_device.h | 2 +- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 2 -- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 3 +-- 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index edd19970931d..aa859bf30774 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -735,7 +735,7 @@ struct hns_roce_caps { u32 max_srq_sges; u32 max_sq_desc_sz; u32 max_rq_desc_sz; - u32 max_srq_desc_sz; + u32 rsv2; int max_qp_init_rdma; int max_qp_dest_rdma; u32 num_cqs; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index f8b747cc4e79..31bfea15cdfc 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1983,7 +1983,6 @@ static void set_default_caps(struct hns_roce_dev *hr_dev) caps->max_qp_dest_rdma = HNS_ROCE_V2_MAX_QP_DEST_RDMA; caps->max_sq_desc_sz = HNS_ROCE_V2_MAX_SQ_DESC_SZ; caps->max_rq_desc_sz = HNS_ROCE_V2_MAX_RQ_DESC_SZ; - caps->max_srq_desc_sz = HNS_ROCE_V2_MAX_SRQ_DESC_SZ; caps->irrl_entry_sz = HNS_ROCE_V2_IRRL_ENTRY_SZ; caps->trrl_entry_sz = HNS_ROCE_V2_EXT_ATOMIC_TRRL_ENTRY_SZ; caps->cqc_entry_sz = HNS_ROCE_V2_CQC_ENTRY_SZ; @@ -2277,7 +2276,6 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) caps->num_other_vectors = resp_a->num_other_vectors; caps->max_sq_desc_sz = resp_a->max_sq_desc_sz; caps->max_rq_desc_sz = resp_a->max_rq_desc_sz; - caps->max_srq_desc_sz = resp_a->max_srq_desc_sz; caps->cqe_sz = resp_a->cqe_sz; caps->mtpt_entry_sz = resp_b->mtpt_entry_sz; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 7a613cbe2ad6..bd09109e4848 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -63,7 +63,6 @@ #define HNS_ROCE_V2_MAX_QP_DEST_RDMA 128 #define HNS_ROCE_V2_MAX_SQ_DESC_SZ 64 #define HNS_ROCE_V2_MAX_RQ_DESC_SZ 16 -#define HNS_ROCE_V2_MAX_SRQ_DESC_SZ 64 #define HNS_ROCE_V2_IRRL_ENTRY_SZ 64 #define HNS_ROCE_V2_EXT_ATOMIC_TRRL_ENTRY_SZ 100 #define HNS_ROCE_V2_CQC_ENTRY_SZ 64 @@ -1181,7 +1180,7 @@ struct hns_roce_query_pf_caps_a { u8 num_other_vectors; u8 max_sq_desc_sz; u8 max_rq_desc_sz; - u8 max_srq_desc_sz; + u8 rsv1; u8 cqe_sz; }; -- cgit v1.2.3 From 3b1f864c904915b3baebffb31ea05ee704b0df3c Mon Sep 17 00:00:00 2001 From: Luoyouming Date: Thu, 22 Sep 2022 20:33:13 +0800 Subject: RDMA/hns: Repacing 'dseg_len' by macros in fill_ext_sge_inl_data() The sge size is known to be constant, so it's unnecessary to use sizeof to calculate. Link: https://lore.kernel.org/r/20220922123315.3732205-11-xuhaoyue1@hisilicon.com Signed-off-by: Luoyouming Signed-off-by: Haoyue Xu Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 31bfea15cdfc..e1716f100dce 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -193,8 +193,7 @@ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp, unsigned int *sge_idx, u32 msg_len) { struct ib_device *ibdev = &(to_hr_dev(qp->ibqp.device))->ib_dev; - unsigned int dseg_len = sizeof(struct hns_roce_v2_wqe_data_seg); - unsigned int ext_sge_sz = qp->sq.max_gs * dseg_len; + unsigned int ext_sge_sz = qp->sq.max_gs * HNS_ROCE_SGE_SIZE; unsigned int left_len_in_pg; unsigned int idx = *sge_idx; unsigned int i = 0; @@ -222,7 +221,7 @@ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp, if (len <= left_len_in_pg) { memcpy(dseg, addr, len); - idx += len / dseg_len; + idx += len / HNS_ROCE_SGE_SIZE; i++; if (i >= wr->num_sge) @@ -237,7 +236,7 @@ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp, len -= left_len_in_pg; addr += left_len_in_pg; - idx += left_len_in_pg / dseg_len; + idx += left_len_in_pg / HNS_ROCE_SGE_SIZE; dseg = hns_roce_get_extend_sge(qp, idx & (qp->sge.sge_cnt - 1)); left_len_in_pg = 1 << HNS_HW_PAGE_SHIFT; -- cgit v1.2.3 From 8c581c47b9ba064cc3c3ad399081c202b0b0bf78 Mon Sep 17 00:00:00 2001 From: Yixing Liu Date: Thu, 22 Sep 2022 20:33:14 +0800 Subject: RDMA/hns: Replacing magic number with macros in apply_func_caps() Replacing magic number with macros in function apply_func_caps(). Link: https://lore.kernel.org/r/20220922123315.3732205-12-xuhaoyue1@hisilicon.com Signed-off-by: Yixing Liu Signed-off-by: Haoyue Xu Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index e1716f100dce..fd4e767cd8de 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2186,8 +2186,10 @@ static void apply_func_caps(struct hns_roce_dev *hr_dev) caps->num_idx_segs = HNS_ROCE_V2_MAX_IDX_SEGS; if (!caps->num_comp_vectors) - caps->num_comp_vectors = min_t(u32, caps->eqc_bt_num - 1, - (u32)priv->handle->rinfo.num_vectors - 2); + caps->num_comp_vectors = + min_t(u32, caps->eqc_bt_num - HNS_ROCE_V2_AEQE_VEC_NUM, + (u32)priv->handle->rinfo.num_vectors - + (HNS_ROCE_V2_AEQE_VEC_NUM + HNS_ROCE_V2_ABNORMAL_VEC_NUM)); if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) { caps->eqe_hop_num = HNS_ROCE_V3_EQE_HOP_NUM; -- cgit v1.2.3 From f0588567976edcb6a7f6f20a9126b40e4d2da818 Mon Sep 17 00:00:00 2001 From: Guofeng Yue Date: Thu, 22 Sep 2022 20:33:15 +0800 Subject: RDMA/hns: Unified Log Printing Style The first letter of the log information is changed to lowercase to keep the same style. Link: https://lore.kernel.org/r/20220922123315.3732205-13-xuhaoyue1@hisilicon.com Signed-off-by: Guofeng Yue Signed-off-by: Haoyue Xu Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_cq.c | 6 ++-- drivers/infiniband/hw/hns/hns_roce_hem.c | 6 ++-- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 50 +++++++++++++++--------------- drivers/infiniband/hw/hns/hns_roce_main.c | 30 +++++++++--------- drivers/infiniband/hw/hns/hns_roce_mr.c | 2 +- drivers/infiniband/hw/hns/hns_roce_qp.c | 16 +++++----- 6 files changed, 55 insertions(+), 55 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 8acd599ffac1..736dc2f993b4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -454,7 +454,7 @@ void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn) hr_cq = xa_load(&hr_dev->cq_table.array, cqn & (hr_dev->caps.num_cqs - 1)); if (!hr_cq) { - dev_warn(hr_dev->dev, "Completion event for bogus CQ 0x%06x\n", + dev_warn(hr_dev->dev, "completion event for bogus CQ 0x%06x\n", cqn); return; } @@ -475,14 +475,14 @@ void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type) hr_cq = xa_load(&hr_dev->cq_table.array, cqn & (hr_dev->caps.num_cqs - 1)); if (!hr_cq) { - dev_warn(dev, "Async event for bogus CQ 0x%06x\n", cqn); + dev_warn(dev, "async event for bogus CQ 0x%06x\n", cqn); return; } if (event_type != HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID && event_type != HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR && event_type != HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW) { - dev_err(dev, "Unexpected event type 0x%x on CQ 0x%06x\n", + dev_err(dev, "unexpected event type 0x%x on CQ 0x%06x\n", event_type, cqn); return; } diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index d0b75a2234d3..aa8a08d1c014 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -926,7 +926,7 @@ void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev, if (table->hem[i]) { if (hr_dev->hw->clear_hem(hr_dev, table, i * table->table_chunk_size / table->obj_size, 0)) - dev_err(dev, "Clear HEM base address failed.\n"); + dev_err(dev, "clear HEM base address failed.\n"); hns_roce_free_hem(hr_dev, table->hem[i]); } @@ -1415,7 +1415,7 @@ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, &hem_list->btm_bt); if (ret) { dev_err(hr_dev->dev, - "alloc hem trunk fail ret=%d!\n", ret); + "alloc hem trunk fail ret = %d!\n", ret); goto err_alloc; } } @@ -1424,7 +1424,7 @@ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, ret = hem_list_alloc_root_bt(hr_dev, hem_list, unit, regions, region_cnt); if (ret) - dev_err(hr_dev->dev, "alloc hem root fail ret=%d!\n", ret); + dev_err(hr_dev->dev, "alloc hem root fail ret = %d!\n", ret); else return 0; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index fd4e767cd8de..2d0192057d1a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -380,7 +380,7 @@ static int check_send_valid(struct hns_roce_dev *hr_dev, if (unlikely(ibqp->qp_type != IB_QPT_RC && ibqp->qp_type != IB_QPT_GSI && ibqp->qp_type != IB_QPT_UD)) { - ibdev_err(ibdev, "Not supported QP(0x%x)type!\n", + ibdev_err(ibdev, "not supported QP(0x%x)type!\n", ibqp->qp_type); return -EOPNOTSUPP; } else if (unlikely(hr_qp->state == IB_QPS_RESET || @@ -1405,20 +1405,20 @@ static void func_clr_hw_resetting_state(struct hns_roce_dev *hr_dev, hr_dev->dis_db = true; dev_warn(hr_dev->dev, - "Func clear is pending, device in resetting state.\n"); + "func clear is pending, device in resetting state.\n"); end = HNS_ROCE_V2_HW_RST_TIMEOUT; while (end) { if (!ops->get_hw_reset_stat(handle)) { hr_dev->is_reset = true; dev_info(hr_dev->dev, - "Func clear success after reset.\n"); + "func clear success after reset.\n"); return; } msleep(HNS_ROCE_V2_HW_RST_COMPLETION_WAIT); end -= HNS_ROCE_V2_HW_RST_COMPLETION_WAIT; } - dev_warn(hr_dev->dev, "Func clear failed.\n"); + dev_warn(hr_dev->dev, "func clear failed.\n"); } static void func_clr_sw_resetting_state(struct hns_roce_dev *hr_dev, @@ -1430,21 +1430,21 @@ static void func_clr_sw_resetting_state(struct hns_roce_dev *hr_dev, hr_dev->dis_db = true; dev_warn(hr_dev->dev, - "Func clear is pending, device in resetting state.\n"); + "func clear is pending, device in resetting state.\n"); end = HNS_ROCE_V2_HW_RST_TIMEOUT; while (end) { if (ops->ae_dev_reset_cnt(handle) != hr_dev->reset_cnt) { hr_dev->is_reset = true; dev_info(hr_dev->dev, - "Func clear success after sw reset\n"); + "func clear success after sw reset\n"); return; } msleep(HNS_ROCE_V2_HW_RST_COMPLETION_WAIT); end -= HNS_ROCE_V2_HW_RST_COMPLETION_WAIT; } - dev_warn(hr_dev->dev, "Func clear failed because of unfinished sw reset\n"); + dev_warn(hr_dev->dev, "func clear failed because of unfinished sw reset\n"); } static void hns_roce_func_clr_rst_proc(struct hns_roce_dev *hr_dev, int retval, @@ -1457,7 +1457,7 @@ static void hns_roce_func_clr_rst_proc(struct hns_roce_dev *hr_dev, int retval, if (ops->ae_dev_reset_cnt(handle) != hr_dev->reset_cnt) { hr_dev->dis_db = true; hr_dev->is_reset = true; - dev_info(hr_dev->dev, "Func clear success after reset.\n"); + dev_info(hr_dev->dev, "func clear success after reset.\n"); return; } @@ -1474,9 +1474,9 @@ static void hns_roce_func_clr_rst_proc(struct hns_roce_dev *hr_dev, int retval, if (retval && !flag) dev_warn(hr_dev->dev, - "Func clear read failed, ret = %d.\n", retval); + "func clear read failed, ret = %d.\n", retval); - dev_warn(hr_dev->dev, "Func clear failed.\n"); + dev_warn(hr_dev->dev, "func clear failed.\n"); } static void __hns_roce_function_clear(struct hns_roce_dev *hr_dev, int vf_id) @@ -1497,7 +1497,7 @@ static void __hns_roce_function_clear(struct hns_roce_dev *hr_dev, int vf_id) ret = hns_roce_cmq_send(hr_dev, &desc, 1); if (ret) { fclr_write_fail_flag = true; - dev_err(hr_dev->dev, "Func clear write failed, ret = %d.\n", + dev_err(hr_dev->dev, "func clear write failed, ret = %d.\n", ret); goto out; } @@ -5033,14 +5033,14 @@ static bool check_qp_timeout_cfg_range(struct hns_roce_dev *hr_dev, u8 *timeout) if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) { if (*timeout > QP_ACK_TIMEOUT_MAX_HIP08) { ibdev_warn(&hr_dev->ib_dev, - "Local ACK timeout shall be 0 to 20.\n"); + "local ACK timeout shall be 0 to 20.\n"); return false; } *timeout += QP_ACK_TIMEOUT_OFFSET; } else if (hr_dev->pci_dev->revision > PCI_REVISION_ID_HIP08) { if (*timeout > QP_ACK_TIMEOUT_MAX) { ibdev_warn(&hr_dev->ib_dev, - "Local ACK timeout shall be 0 to 31.\n"); + "local ACK timeout shall be 0 to 31.\n"); return false; } } @@ -5543,7 +5543,7 @@ static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev, msleep(20); } - ibdev_err(ibdev, "Query SCC clr done flag overtime.\n"); + ibdev_err(ibdev, "query SCC clr done flag overtime.\n"); ret = -ETIMEDOUT; out: @@ -5832,26 +5832,26 @@ static void hns_roce_irq_work_handle(struct work_struct *work) switch (irq_work->event_type) { case HNS_ROCE_EVENT_TYPE_PATH_MIG: - ibdev_info(ibdev, "Path migrated succeeded.\n"); + ibdev_info(ibdev, "path migrated succeeded.\n"); break; case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED: - ibdev_warn(ibdev, "Path migration failed.\n"); + ibdev_warn(ibdev, "path migration failed.\n"); break; case HNS_ROCE_EVENT_TYPE_COMM_EST: break; case HNS_ROCE_EVENT_TYPE_SQ_DRAINED: - ibdev_warn(ibdev, "Send queue drained.\n"); + ibdev_warn(ibdev, "send queue drained.\n"); break; case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: - ibdev_err(ibdev, "Local work queue 0x%x catast error, sub_event type is: %d\n", + ibdev_err(ibdev, "local work queue 0x%x catast error, sub_event type is: %d\n", irq_work->queue_num, irq_work->sub_type); break; case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: - ibdev_err(ibdev, "Invalid request local work queue 0x%x error.\n", + ibdev_err(ibdev, "invalid request local work queue 0x%x error.\n", irq_work->queue_num); break; case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: - ibdev_err(ibdev, "Local access violation work queue 0x%x error, sub_event type is: %d\n", + ibdev_err(ibdev, "local access violation work queue 0x%x error, sub_event type is: %d\n", irq_work->queue_num, irq_work->sub_type); break; case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH: @@ -5873,7 +5873,7 @@ static void hns_roce_irq_work_handle(struct work_struct *work) ibdev_warn(ibdev, "DB overflow.\n"); break; case HNS_ROCE_EVENT_TYPE_FLR: - ibdev_warn(ibdev, "Function level reset.\n"); + ibdev_warn(ibdev, "function level reset.\n"); break; case HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION: ibdev_err(ibdev, "xrc domain violation error.\n"); @@ -5992,7 +5992,7 @@ static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, case HNS_ROCE_EVENT_TYPE_FLR: break; default: - dev_err(dev, "Unhandled event %d on EQ %d at idx %u.\n", + dev_err(dev, "unhandled event %d on EQ %d at idx %u.\n", event_type, eq->eqn, eq->cons_index); break; } @@ -6383,7 +6383,7 @@ static int alloc_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT, NULL, 0); if (err) - dev_err(hr_dev->dev, "Failed to alloc EQE mtr, err %d\n", err); + dev_err(hr_dev->dev, "failed to alloc EQE mtr, err %d\n", err); return err; } @@ -6472,7 +6472,7 @@ static int __hns_roce_request_irq(struct hns_roce_dev *hr_dev, int irq_num, 0, hr_dev->irq_names[j - comp_num], &eq_table->eq[j - other_num]); if (ret) { - dev_err(hr_dev->dev, "Request irq error!\n"); + dev_err(hr_dev->dev, "request irq error!\n"); goto err_request_failed; } } @@ -6894,7 +6894,7 @@ static int hns_roce_hw_v2_reset_notify_init(struct hnae3_handle *handle) dev_err(dev, "In reset process RoCE reinit failed %d.\n", ret); } else { handle->rinfo.reset_state = HNS_ROCE_STATE_RST_INITED; - dev_info(dev, "Reset done, RoCE client reinit finished.\n"); + dev_info(dev, "reset done, RoCE client reinit finished.\n"); } return ret; diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 498d7c28c56c..53c53c20360d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -97,7 +97,7 @@ static int handle_en_event(struct hns_roce_dev *hr_dev, u32 port, netdev = hr_dev->iboe.netdevs[port]; if (!netdev) { - dev_err(dev, "Can't find netdev on port(%u)!\n", port); + dev_err(dev, "can't find netdev on port(%u)!\n", port); return -ENODEV; } @@ -239,7 +239,7 @@ static int hns_roce_query_port(struct ib_device *ib_dev, u32 port_num, net_dev = hr_dev->iboe.netdevs[port]; if (!net_dev) { spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); - dev_err(dev, "Find netdev %u failed!\n", port); + dev_err(dev, "find netdev %u failed!\n", port); return -EINVAL; } @@ -661,7 +661,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) HEM_TYPE_MTPT, hr_dev->caps.mtpt_entry_sz, hr_dev->caps.num_mtpts); if (ret) { - dev_err(dev, "Failed to init MTPT context memory, aborting.\n"); + dev_err(dev, "failed to init MTPT context memory, aborting.\n"); return ret; } @@ -669,7 +669,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) HEM_TYPE_QPC, hr_dev->caps.qpc_sz, hr_dev->caps.num_qps); if (ret) { - dev_err(dev, "Failed to init QP context memory, aborting.\n"); + dev_err(dev, "failed to init QP context memory, aborting.\n"); goto err_unmap_dmpt; } @@ -679,7 +679,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) hr_dev->caps.max_qp_init_rdma, hr_dev->caps.num_qps); if (ret) { - dev_err(dev, "Failed to init irrl_table memory, aborting.\n"); + dev_err(dev, "failed to init irrl_table memory, aborting.\n"); goto err_unmap_qp; } @@ -692,7 +692,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) hr_dev->caps.num_qps); if (ret) { dev_err(dev, - "Failed to init trrl_table memory, aborting.\n"); + "failed to init trrl_table memory, aborting.\n"); goto err_unmap_irrl; } } @@ -701,7 +701,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) HEM_TYPE_CQC, hr_dev->caps.cqc_entry_sz, hr_dev->caps.num_cqs); if (ret) { - dev_err(dev, "Failed to init CQ context memory, aborting.\n"); + dev_err(dev, "failed to init CQ context memory, aborting.\n"); goto err_unmap_trrl; } @@ -712,7 +712,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) hr_dev->caps.num_srqs); if (ret) { dev_err(dev, - "Failed to init SRQ context memory, aborting.\n"); + "failed to init SRQ context memory, aborting.\n"); goto err_unmap_cq; } } @@ -725,7 +725,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) hr_dev->caps.num_qps); if (ret) { dev_err(dev, - "Failed to init SCC context memory, aborting.\n"); + "failed to init SCC context memory, aborting.\n"); goto err_unmap_srq; } } @@ -737,7 +737,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) hr_dev->caps.num_qpc_timer); if (ret) { dev_err(dev, - "Failed to init QPC timer memory, aborting.\n"); + "failed to init QPC timer memory, aborting.\n"); goto err_unmap_ctx; } } @@ -749,7 +749,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) hr_dev->caps.cqc_timer_bt_num); if (ret) { dev_err(dev, - "Failed to init CQC timer memory, aborting.\n"); + "failed to init CQC timer memory, aborting.\n"); goto err_unmap_qpc_timer; } } @@ -827,13 +827,13 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) ret = hns_roce_uar_alloc(hr_dev, &hr_dev->priv_uar); if (ret) { - dev_err(dev, "Failed to allocate priv_uar.\n"); + dev_err(dev, "failed to allocate priv_uar.\n"); goto err_uar_table_free; } ret = hns_roce_init_qp_table(hr_dev); if (ret) { - dev_err(dev, "Failed to init qp_table.\n"); + dev_err(dev, "failed to init qp_table.\n"); goto err_uar_table_free; } @@ -910,14 +910,14 @@ int hns_roce_init(struct hns_roce_dev *hr_dev) if (hr_dev->hw->cmq_init) { ret = hr_dev->hw->cmq_init(hr_dev); if (ret) { - dev_err(dev, "Init RoCE Command Queue failed!\n"); + dev_err(dev, "init RoCE Command Queue failed!\n"); return ret; } } ret = hr_dev->hw->hw_profile(hr_dev); if (ret) { - dev_err(dev, "Get RoCE engine profile failed!\n"); + dev_err(dev, "get RoCE engine profile failed!\n"); goto error_failed_cmd_init; } diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 93615f2556b2..845ac7d3831f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -190,7 +190,7 @@ struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc) int ret; mr = kzalloc(sizeof(*mr), GFP_KERNEL); - if (mr == NULL) + if (!mr) return ERR_PTR(-ENOMEM); mr->type = MR_TYPE_DMA; diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 52ba194d7ae3..a546e934b887 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -56,7 +56,7 @@ static void flush_work_handle(struct work_struct *work) if (test_and_clear_bit(HNS_ROCE_FLUSH_FLAG, &hr_qp->flush_flag)) { ret = hns_roce_modify_qp(&hr_qp->ibqp, &attr, attr_mask, NULL); if (ret) - dev_err(dev, "Modify QP to error state failed(%d) during CQE flush\n", + dev_err(dev, "modify QP to error state failed(%d) during CQE flush\n", ret); } @@ -105,7 +105,7 @@ void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type) xa_unlock(&hr_dev->qp_table_xa); if (!qp) { - dev_warn(dev, "Async event for bogus QP %08x\n", qpn); + dev_warn(dev, "async event for bogus QP %08x\n", qpn); return; } @@ -275,7 +275,7 @@ static int hns_roce_qp_store(struct hns_roce_dev *hr_dev, ret = xa_err(xa_store_irq(xa, hr_qp->qpn, hr_qp, GFP_KERNEL)); if (ret) - dev_err(hr_dev->dev, "Failed to xa store for QPC\n"); + dev_err(hr_dev->dev, "failed to xa store for QPC\n"); else /* add QP to device's QP list for softwc */ add_qp_to_list(hr_dev, hr_qp, init_attr->send_cq, @@ -296,14 +296,14 @@ static int alloc_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) /* Alloc memory for QPC */ ret = hns_roce_table_get(hr_dev, &qp_table->qp_table, hr_qp->qpn); if (ret) { - dev_err(dev, "Failed to get QPC table\n"); + dev_err(dev, "failed to get QPC table\n"); goto err_out; } /* Alloc memory for IRRL */ ret = hns_roce_table_get(hr_dev, &qp_table->irrl_table, hr_qp->qpn); if (ret) { - dev_err(dev, "Failed to get IRRL table\n"); + dev_err(dev, "failed to get IRRL table\n"); goto err_put_qp; } @@ -312,7 +312,7 @@ static int alloc_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) ret = hns_roce_table_get(hr_dev, &qp_table->trrl_table, hr_qp->qpn); if (ret) { - dev_err(dev, "Failed to get TRRL table\n"); + dev_err(dev, "failed to get TRRL table\n"); goto err_put_irrl; } } @@ -322,7 +322,7 @@ static int alloc_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) ret = hns_roce_table_get(hr_dev, &qp_table->sccc_table, hr_qp->qpn); if (ret) { - dev_err(dev, "Failed to get SCC CTX table\n"); + dev_err(dev, "failed to get SCC CTX table\n"); goto err_put_trrl; } } @@ -1206,7 +1206,7 @@ int hns_roce_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *init_attr, ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata, hr_qp); if (ret) - ibdev_err(ibdev, "Create QP type 0x%x failed(%d)\n", + ibdev_err(ibdev, "create QP type 0x%x failed(%d)\n", init_attr->qp_type, ret); return ret; -- cgit v1.2.3 From cbdae01d8b517b81ed271981395fee8ebd08ba7d Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Tue, 27 Sep 2022 10:29:19 +0800 Subject: IB/hfi1: Use skb_put_data() instead of skb_put/memcpy pair Use skb_put_data() instead of skb_put() and memcpy(), which is shorter and clear. Drop the tmp variable that is not needed any more. Link: https://lore.kernel.org/r/20220927022919.16902-1-shangxiaojing@huawei.com Signed-off-by: Shang XiaoJing Reviewed-by: Dennis Dalessandro Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hfi1/ipoib_rx.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/ipoib_rx.c b/drivers/infiniband/hw/hfi1/ipoib_rx.c index 3afa7545242c..629691a572ef 100644 --- a/drivers/infiniband/hw/hfi1/ipoib_rx.c +++ b/drivers/infiniband/hw/hfi1/ipoib_rx.c @@ -11,13 +11,10 @@ static void copy_ipoib_buf(struct sk_buff *skb, void *data, int size) { - void *dst_data; - skb_checksum_none_assert(skb); skb->protocol = *((__be16 *)data); - dst_data = skb_put(skb, size); - memcpy(dst_data, data, size); + skb_put_data(skb, data, size); skb->mac_header = HFI1_IPOIB_PSEUDO_LEN; skb_pull(skb, HFI1_IPOIB_ENCAP_LEN); } -- cgit v1.2.3 From 4b83ddc0924752ebb5f99e84e00d1cb725a9aa51 Mon Sep 17 00:00:00 2001 From: Zeng Heng Date: Thu, 29 Sep 2022 11:12:00 +0800 Subject: RDMA/usnic: fix set-but-not-unused variable 'flags' warning Remove unused local variable 'flag' without any logic changes. Fixes: e3cf00d0a87f ("IB/usnic: Add Cisco VIC low-level hardware driver") Signed-off-by: Zeng Heng Link: https://lore.kernel.org/r/20220929031200.4060891-1-zengheng4@huawei.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/usnic/usnic_uiom.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c index 67a1b4562dc2..67923ced6e2d 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -95,7 +95,6 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, int ret; int off; int i; - int flags; dma_addr_t pa; unsigned int gup_flags; struct mm_struct *mm; @@ -132,8 +131,6 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, goto out; } - flags = IOMMU_READ | IOMMU_CACHE; - flags |= (writable) ? IOMMU_WRITE : 0; gup_flags = FOLL_WRITE; gup_flags |= (writable) ? 0 : FOLL_FORCE; cur_base = addr & PAGE_MASK; -- cgit v1.2.3 From 8ad891ed435ba24465e0650942267e90a060675f Mon Sep 17 00:00:00 2001 From: Daisuke Matsuda Date: Thu, 29 Sep 2022 17:00:23 +0900 Subject: RDMA/rxe: Remove error/warning messages from packet receiver path Incoming packets to rxe are passed from UDP layer using an encapsulation socket. If there are any clients reachable to a node, they can invoke the encapsulation handler arbitrarily by sending malicious or irrelevant packets. This can potentially cause a message overflow and a subsequent slowdown on the node. Signed-off-by: Daisuke Matsuda Link: https://lore.kernel.org/r/20220929080023.304242-1-matsuda-daisuke@fujitsu.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/rxe/rxe_icrc.c | 12 +--- drivers/infiniband/sw/rxe/rxe_net.c | 1 - drivers/infiniband/sw/rxe/rxe_recv.c | 106 +++++++++-------------------------- 3 files changed, 28 insertions(+), 91 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_icrc.c b/drivers/infiniband/sw/rxe/rxe_icrc.c index e03af3012590..46bb07c5c4df 100644 --- a/drivers/infiniband/sw/rxe/rxe_icrc.c +++ b/drivers/infiniband/sw/rxe/rxe_icrc.c @@ -151,18 +151,8 @@ int rxe_icrc_check(struct sk_buff *skb, struct rxe_pkt_info *pkt) payload_size(pkt) + bth_pad(pkt)); icrc = ~icrc; - if (unlikely(icrc != pkt_icrc)) { - if (skb->protocol == htons(ETH_P_IPV6)) - pr_warn_ratelimited("bad ICRC from %pI6c\n", - &ipv6_hdr(skb)->saddr); - else if (skb->protocol == htons(ETH_P_IP)) - pr_warn_ratelimited("bad ICRC from %pI4\n", - &ip_hdr(skb)->saddr); - else - pr_warn_ratelimited("bad ICRC from unknown\n"); - + if (unlikely(icrc != pkt_icrc)) return -EINVAL; - } return 0; } diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index c53f4529f098..35f327b9d4b8 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -145,7 +145,6 @@ static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb) goto drop; if (skb_linearize(skb)) { - pr_err("skb_linearize failed\n"); ib_device_put(&rxe->ib_dev); goto drop; } diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index f3ad7b6dbd97..434a693cd4a5 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -16,47 +16,36 @@ static int check_type_state(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, unsigned int pkt_type; if (unlikely(!qp->valid)) - goto err1; + return -EINVAL; pkt_type = pkt->opcode & 0xe0; switch (qp_type(qp)) { case IB_QPT_RC: - if (unlikely(pkt_type != IB_OPCODE_RC)) { - pr_warn_ratelimited("bad qp type\n"); - goto err1; - } + if (unlikely(pkt_type != IB_OPCODE_RC)) + return -EINVAL; break; case IB_QPT_UC: - if (unlikely(pkt_type != IB_OPCODE_UC)) { - pr_warn_ratelimited("bad qp type\n"); - goto err1; - } + if (unlikely(pkt_type != IB_OPCODE_UC)) + return -EINVAL; break; case IB_QPT_UD: case IB_QPT_GSI: - if (unlikely(pkt_type != IB_OPCODE_UD)) { - pr_warn_ratelimited("bad qp type\n"); - goto err1; - } + if (unlikely(pkt_type != IB_OPCODE_UD)) + return -EINVAL; break; default: - pr_warn_ratelimited("unsupported qp type\n"); - goto err1; + return -EINVAL; } if (pkt->mask & RXE_REQ_MASK) { if (unlikely(qp->resp.state != QP_STATE_READY)) - goto err1; + return -EINVAL; } else if (unlikely(qp->req.state < QP_STATE_READY || - qp->req.state > QP_STATE_DRAINED)) { - goto err1; - } + qp->req.state > QP_STATE_DRAINED)) + return -EINVAL; return 0; - -err1: - return -EINVAL; } static void set_bad_pkey_cntr(struct rxe_port *port) @@ -84,26 +73,20 @@ static int check_keys(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, pkt->pkey_index = 0; if (!pkey_match(pkey, IB_DEFAULT_PKEY_FULL)) { - pr_warn_ratelimited("bad pkey = 0x%x\n", pkey); set_bad_pkey_cntr(port); - goto err1; + return -EINVAL; } if (qp_type(qp) == IB_QPT_UD || qp_type(qp) == IB_QPT_GSI) { u32 qkey = (qpn == 1) ? GSI_QKEY : qp->attr.qkey; if (unlikely(deth_qkey(pkt) != qkey)) { - pr_warn_ratelimited("bad qkey, got 0x%x expected 0x%x for qpn 0x%x\n", - deth_qkey(pkt), qkey, qpn); set_qkey_viol_cntr(port); - goto err1; + return -EINVAL; } } return 0; - -err1: - return -EINVAL; } static int check_addr(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, @@ -112,13 +95,10 @@ static int check_addr(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, struct sk_buff *skb = PKT_TO_SKB(pkt); if (qp_type(qp) != IB_QPT_RC && qp_type(qp) != IB_QPT_UC) - goto done; + return 0; - if (unlikely(pkt->port_num != qp->attr.port_num)) { - pr_warn_ratelimited("port %d != qp port %d\n", - pkt->port_num, qp->attr.port_num); - goto err1; - } + if (unlikely(pkt->port_num != qp->attr.port_num)) + return -EINVAL; if (skb->protocol == htons(ETH_P_IP)) { struct in_addr *saddr = @@ -126,19 +106,9 @@ static int check_addr(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, struct in_addr *daddr = &qp->pri_av.dgid_addr._sockaddr_in.sin_addr; - if (ip_hdr(skb)->daddr != saddr->s_addr) { - pr_warn_ratelimited("dst addr %pI4 != qp source addr %pI4\n", - &ip_hdr(skb)->daddr, - &saddr->s_addr); - goto err1; - } - - if (ip_hdr(skb)->saddr != daddr->s_addr) { - pr_warn_ratelimited("source addr %pI4 != qp dst addr %pI4\n", - &ip_hdr(skb)->saddr, - &daddr->s_addr); - goto err1; - } + if ((ip_hdr(skb)->daddr != saddr->s_addr) || + (ip_hdr(skb)->saddr != daddr->s_addr)) + return -EINVAL; } else if (skb->protocol == htons(ETH_P_IPV6)) { struct in6_addr *saddr = @@ -146,24 +116,12 @@ static int check_addr(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, struct in6_addr *daddr = &qp->pri_av.dgid_addr._sockaddr_in6.sin6_addr; - if (memcmp(&ipv6_hdr(skb)->daddr, saddr, sizeof(*saddr))) { - pr_warn_ratelimited("dst addr %pI6 != qp source addr %pI6\n", - &ipv6_hdr(skb)->daddr, saddr); - goto err1; - } - - if (memcmp(&ipv6_hdr(skb)->saddr, daddr, sizeof(*daddr))) { - pr_warn_ratelimited("source addr %pI6 != qp dst addr %pI6\n", - &ipv6_hdr(skb)->saddr, daddr); - goto err1; - } + if (memcmp(&ipv6_hdr(skb)->daddr, saddr, sizeof(*saddr)) || + memcmp(&ipv6_hdr(skb)->saddr, daddr, sizeof(*daddr))) + return -EINVAL; } -done: return 0; - -err1: - return -EINVAL; } static int hdr_check(struct rxe_pkt_info *pkt) @@ -175,24 +133,18 @@ static int hdr_check(struct rxe_pkt_info *pkt) int index; int err; - if (unlikely(bth_tver(pkt) != BTH_TVER)) { - pr_warn_ratelimited("bad tver\n"); + if (unlikely(bth_tver(pkt) != BTH_TVER)) goto err1; - } - if (unlikely(qpn == 0)) { - pr_warn_once("QP 0 not supported"); + if (unlikely(qpn == 0)) goto err1; - } if (qpn != IB_MULTICAST_QPN) { index = (qpn == 1) ? port->qp_gsi_index : qpn; qp = rxe_pool_get_index(&rxe->qp_pool, index); - if (unlikely(!qp)) { - pr_warn_ratelimited("no qp matches qpn 0x%x\n", qpn); + if (unlikely(!qp)) goto err1; - } err = check_type_state(rxe, pkt, qp); if (unlikely(err)) @@ -206,10 +158,8 @@ static int hdr_check(struct rxe_pkt_info *pkt) if (unlikely(err)) goto err2; } else { - if (unlikely((pkt->mask & RXE_GRH_MASK) == 0)) { - pr_warn_ratelimited("no grh for mcast qpn\n"); + if (unlikely((pkt->mask & RXE_GRH_MASK) == 0)) goto err1; - } } pkt->qp = qp; @@ -364,10 +314,8 @@ void rxe_rcv(struct sk_buff *skb) if (unlikely(skb->len < RXE_BTH_BYTES)) goto drop; - if (rxe_chk_dgid(rxe, skb) < 0) { - pr_warn_ratelimited("failed checking dgid\n"); + if (rxe_chk_dgid(rxe, skb) < 0) goto drop; - } pkt->opcode = bth_opcode(pkt); pkt->psn = bth_psn(pkt); -- cgit v1.2.3