summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorParav Pandit <parav@mellanox.com>2018-04-01 15:08:21 +0300
committerJason Gunthorpe <jgg@mellanox.com>2018-04-03 21:33:50 -0600
commit598ff6bae689453aa894bc38f3f1bb78eb131a61 (patch)
treed002fd3de0d63c34662fd88fbc50e06db15b5abb
parentf35faa4ba9568138eea1c58abb92e8ef415dce41 (diff)
downloadlinux-598ff6bae689453aa894bc38f3f1bb78eb131a61.tar.gz
linux-598ff6bae689453aa894bc38f3f1bb78eb131a61.tar.bz2
linux-598ff6bae689453aa894bc38f3f1bb78eb131a61.zip
IB/core: Refactor GID modify code for RoCE
Code is refactored to prepare separate functions for RoCE which can do more complex operations related to reference counting, while still maintainining code readability. This includes (a) Simplification to not perform netdevice checks and modifications for IB link layer. (b) Do not add RoCE GID entry which has NULL netdevice; instead return an error. (c) If GID addition fails at provider level add_gid(), do not add the entry in the cache and keep the entry marked as INVALID. (d) Simplify and reuse the ib_cache_gid_add()/del() routines so that they can be used even for modifying default GIDs. This avoid some code duplication in modifying default GIDs. (e) find_gid() routine refers to the data entry flags to qualify a GID as valid or invalid GID rather than depending on attributes and zeroness of the GID content. (f) gid_table_reserve_default() sets the GID default attribute at beginning while setting up the GID table. There is no need to use default_gid flag in low level functions such as write_gid(), add_gid(), del_gid(), as they never need to update the DEFAULT property of the GID entry while during GID table update. As as result of this refactor, reserved GID 0:0:0:0:0:0:0:0 is no longer searchable as described below. A unicast GID entry of 0:0:0:0:0:0:0:0 is Reserved GID as per the IB spec version 1.3 section 4.1.1, point (6) whose snippet is below. "The unicast GID address 0:0:0:0:0:0:0:0 is reserved - referred to as the Reserved GID. It shall never be assigned to any endport. It shall not be used as a destination address or in a global routing header (GRH)." GID table cache now only stores valid GID entries. Before this patch, Reserved GID 0:0:0:0:0:0:0:0 was searchable in the GID table using ib_find_cached_gid_by_port() and other similar find routines. Zero GID is no longer searchable as it shall not to be present in GRH or path recored entry as described in IB spec version 1.3 section 4.1.1, point (6), section 12.7.10 and section 12.7.20. ib_cache_update() is simplified to check link layer once, use unified locking scheme for all link layers, removed temporary gid table allocation/free logic. Additionally, (a) Expand ib_gid_attr to store port and index so that GID query routines can get port and index information from the attribute structure. (b) Expand ib_gid_attr to store device as well so that in future code when GID reference counting is done, device is used to reach back to the GID table entry. Signed-off-by: Parav Pandit <parav@mellanox.com> Signed-off-by: Leon Romanovsky <leonro@mellanox.com> Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
-rw-r--r--drivers/infiniband/core/cache.c484
-rw-r--r--drivers/infiniband/core/sysfs.c18
-rw-r--r--include/rdma/ib_verbs.h5
3 files changed, 272 insertions, 235 deletions
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index e03eaf0c7527..045ca11fa135 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -59,8 +59,6 @@ struct ib_update_work {
union ib_gid zgid;
EXPORT_SYMBOL(zgid);
-static const struct ib_gid_attr zattr;
-
enum gid_attr_find_mask {
GID_ATTR_FIND_MASK_GID = 1UL << 0,
GID_ATTR_FIND_MASK_NETDEV = 1UL << 1,
@@ -73,15 +71,6 @@ enum gid_table_entry_props {
GID_TABLE_ENTRY_DEFAULT = 1UL << 1,
};
-enum gid_table_write_action {
- GID_TABLE_WRITE_ACTION_ADD,
- GID_TABLE_WRITE_ACTION_DEL,
- /* MODIFY only updates the GID table. Currently only used by
- * ib_cache_update.
- */
- GID_TABLE_WRITE_ACTION_MODIFY
-};
-
struct ib_gid_table_entry {
unsigned long props;
union ib_gid gid;
@@ -100,16 +89,13 @@ struct ib_gid_table {
* (a) Find the GID
* (b) Delete it.
*
- * Add/delete should be carried out atomically.
- * This is done by locking this mutex from multiple
- * writers. We don't need this lock for IB, as the MAD
- * layer replaces all entries. All data_vec entries
- * are locked by this lock.
**/
- struct mutex lock;
- /* This lock protects the table entries from being
- * read and written simultaneously.
+ /* Any writer to data_vec must hold this lock and the write side of
+ * rwlock. readers must hold only rwlock. All writers must be in a
+ * sleepable context.
*/
+ struct mutex lock;
+ /* rwlock protects data_vec[ix]->props. */
rwlock_t rwlock;
struct ib_gid_table_entry *data_vec;
};
@@ -163,94 +149,128 @@ int ib_cache_gid_parse_type_str(const char *buf)
}
EXPORT_SYMBOL(ib_cache_gid_parse_type_str);
-/* This function expects that rwlock will be write locked in all
- * scenarios and that lock will be locked in sleep-able (RoCE)
- * scenarios.
- */
-static int write_gid(struct ib_device *ib_dev, u8 port,
- struct ib_gid_table *table, int ix,
- const union ib_gid *gid,
- const struct ib_gid_attr *attr,
- enum gid_table_write_action action,
- bool default_gid)
- __releases(&table->rwlock) __acquires(&table->rwlock)
+static void del_roce_gid(struct ib_device *device, u8 port_num,
+ struct ib_gid_table *table, int ix)
{
- int ret = 0;
- struct net_device *old_net_dev;
- enum ib_gid_type old_gid_type;
+ pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
+ device->name, port_num, ix,
+ table->data_vec[ix].gid.raw);
+
+ if (rdma_cap_roce_gid_table(device, port_num))
+ device->del_gid(device, port_num, ix,
+ &table->data_vec[ix].context);
+ dev_put(table->data_vec[ix].attr.ndev);
+}
- /* in rdma_cap_roce_gid_table, this funciton should be protected by a
- * sleep-able lock.
- */
+static int add_roce_gid(struct ib_gid_table *table,
+ const union ib_gid *gid,
+ const struct ib_gid_attr *attr)
+{
+ struct ib_gid_table_entry *entry;
+ int ix = attr->index;
+ int ret = 0;
- if (rdma_cap_roce_gid_table(ib_dev, port)) {
- table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID;
- write_unlock_irq(&table->rwlock);
- /* GID_TABLE_WRITE_ACTION_MODIFY currently isn't supported by
- * RoCE providers and thus only updates the cache.
- */
- if (action == GID_TABLE_WRITE_ACTION_ADD)
- ret = ib_dev->add_gid(ib_dev, port, ix, gid, attr,
- &table->data_vec[ix].context);
- else if (action == GID_TABLE_WRITE_ACTION_DEL)
- ret = ib_dev->del_gid(ib_dev, port, ix,
- &table->data_vec[ix].context);
- write_lock_irq(&table->rwlock);
+ if (!attr->ndev) {
+ pr_err("%s NULL netdev device=%s port=%d index=%d\n",
+ __func__, attr->device->name, attr->port_num,
+ attr->index);
+ return -EINVAL;
}
- old_net_dev = table->data_vec[ix].attr.ndev;
- old_gid_type = table->data_vec[ix].attr.gid_type;
- if (old_net_dev && old_net_dev != attr->ndev)
- dev_put(old_net_dev);
- /* if modify_gid failed, just delete the old gid */
- if (ret || action == GID_TABLE_WRITE_ACTION_DEL) {
- gid = &zgid;
- attr = &zattr;
- table->data_vec[ix].context = NULL;
+ entry = &table->data_vec[ix];
+ if ((entry->props & GID_TABLE_ENTRY_INVALID) == 0) {
+ WARN(1, "GID table corruption device=%s port=%d index=%d\n",
+ attr->device->name, attr->port_num,
+ attr->index);
+ return -EINVAL;
}
- memcpy(&table->data_vec[ix].gid, gid, sizeof(*gid));
- memcpy(&table->data_vec[ix].attr, attr, sizeof(*attr));
- if (default_gid) {
- table->data_vec[ix].props |= GID_TABLE_ENTRY_DEFAULT;
- if (action == GID_TABLE_WRITE_ACTION_DEL)
- table->data_vec[ix].attr.gid_type = old_gid_type;
+ if (rdma_cap_roce_gid_table(attr->device, attr->port_num)) {
+ ret = attr->device->add_gid(attr->device, attr->port_num,
+ ix, gid, attr, &entry->context);
+ if (ret) {
+ pr_err("%s GID add failed device=%s port=%d index=%d\n",
+ __func__, attr->device->name, attr->port_num,
+ attr->index);
+ goto add_err;
+ }
}
- if (table->data_vec[ix].attr.ndev &&
- table->data_vec[ix].attr.ndev != old_net_dev)
- dev_hold(table->data_vec[ix].attr.ndev);
-
- table->data_vec[ix].props &= ~GID_TABLE_ENTRY_INVALID;
+ dev_hold(attr->ndev);
+add_err:
+ if (!ret)
+ pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
+ attr->device->name, attr->port_num, ix, gid->raw);
return ret;
}
-static int add_gid(struct ib_device *ib_dev, u8 port,
- struct ib_gid_table *table, int ix,
- const union ib_gid *gid,
- const struct ib_gid_attr *attr,
- bool default_gid) {
- return write_gid(ib_dev, port, table, ix, gid, attr,
- GID_TABLE_WRITE_ACTION_ADD, default_gid);
-}
+/**
+ * add_modify_gid - Add or modify GID table entry
+ *
+ * @table: GID table in which GID to be added or modified
+ * @gid: GID content
+ * @attr: Attributes of the GID
+ *
+ * Returns 0 on success or appropriate error code. It accepts zero
+ * GID addition for non RoCE ports for HCA's who report them as valid
+ * GID. However such zero GIDs are not added to the cache.
+ */
+static int add_modify_gid(struct ib_gid_table *table,
+ const union ib_gid *gid,
+ const struct ib_gid_attr *attr)
+{
+ int ret;
+
+ if (rdma_protocol_roce(attr->device, attr->port_num)) {
+ ret = add_roce_gid(table, gid, attr);
+ if (ret)
+ return ret;
+ } else {
+ /*
+ * Some HCA's report multiple GID entries with only one
+ * valid GID, but remaining as zero GID.
+ * So ignore such behavior for IB link layer and don't
+ * fail the call, but don't add such entry to GID cache.
+ */
+ if (!memcmp(gid, &zgid, sizeof(*gid)))
+ return 0;
+ }
+
+ lockdep_assert_held(&table->lock);
+ memcpy(&table->data_vec[attr->index].gid, gid, sizeof(*gid));
+ memcpy(&table->data_vec[attr->index].attr, attr, sizeof(*attr));
-static int modify_gid(struct ib_device *ib_dev, u8 port,
- struct ib_gid_table *table, int ix,
- const union ib_gid *gid,
- const struct ib_gid_attr *attr,
- bool default_gid) {
- return write_gid(ib_dev, port, table, ix, gid, attr,
- GID_TABLE_WRITE_ACTION_MODIFY, default_gid);
+ write_lock_irq(&table->rwlock);
+ table->data_vec[attr->index].props &= ~GID_TABLE_ENTRY_INVALID;
+ write_unlock_irq(&table->rwlock);
+ return 0;
}
-static int del_gid(struct ib_device *ib_dev, u8 port,
- struct ib_gid_table *table, int ix,
- bool default_gid) {
- return write_gid(ib_dev, port, table, ix, &zgid, &zattr,
- GID_TABLE_WRITE_ACTION_DEL, default_gid);
+/**
+ * del_gid - Delete GID table entry
+ *
+ * @ib_dev: IB device whose GID entry to be deleted
+ * @port: Port number of the IB device
+ * @table: GID table of the IB device for a port
+ * @ix: GID entry index to delete
+ *
+ */
+static void del_gid(struct ib_device *ib_dev, u8 port,
+ struct ib_gid_table *table, int ix)
+{
+ lockdep_assert_held(&table->lock);
+ write_lock_irq(&table->rwlock);
+ table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID;
+ write_unlock_irq(&table->rwlock);
+
+ if (rdma_protocol_roce(ib_dev, port))
+ del_roce_gid(ib_dev, port, table, ix);
+ memcpy(&table->data_vec[ix].gid, &zgid, sizeof(zgid));
+ memset(&table->data_vec[ix].attr, 0, sizeof(table->data_vec[ix].attr));
+ table->data_vec[ix].context = NULL;
}
-/* rwlock should be read locked */
+/* rwlock should be read locked, or lock should be held */
static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
const struct ib_gid_attr *val, bool default_gid,
unsigned long mask, int *pempty)
@@ -266,15 +286,32 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
i++;
+ /* find_gid() is used during GID addition where it is expected
+ * to return a free entry slot which is not duplicate.
+ * Free entry slot is requested and returned if pempty is set,
+ * so lookup free slot only if requested.
+ */
+ if (pempty && empty < 0) {
+ if (data->props & GID_TABLE_ENTRY_INVALID) {
+ /* Found an invalid (free) entry; allocate it */
+ if (data->props & GID_TABLE_ENTRY_DEFAULT) {
+ if (default_gid)
+ empty = curr_index;
+ } else {
+ empty = curr_index;
+ }
+ }
+ }
+
+ /*
+ * Additionally find_gid() is used to find valid entry during
+ * lookup operation, where validity needs to be checked. So
+ * find the empty entry first to continue to search for a free
+ * slot and ignore its INVALID flag.
+ */
if (data->props & GID_TABLE_ENTRY_INVALID)
continue;
- if (empty < 0)
- if (!memcmp(&data->gid, &zgid, sizeof(*gid)) &&
- !memcmp(attr, &zattr, sizeof(*attr)) &&
- !data->props)
- empty = curr_index;
-
if (found >= 0)
continue;
@@ -310,20 +347,56 @@ static void make_default_gid(struct net_device *dev, union ib_gid *gid)
addrconf_ifid_eui48(&gid->raw[8], dev);
}
-int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
- union ib_gid *gid, struct ib_gid_attr *attr)
+static int __ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
+ union ib_gid *gid, struct ib_gid_attr *attr,
+ unsigned long mask, bool default_gid)
{
struct ib_gid_table *table;
- int ix;
int ret = 0;
- struct net_device *idev;
int empty;
+ int ix;
- table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
-
+ /* Do not allow adding zero GID in support of
+ * IB spec version 1.3 section 4.1.1 point (6) and
+ * section 12.7.10 and section 12.7.20
+ */
if (!memcmp(gid, &zgid, sizeof(*gid)))
return -EINVAL;
+ table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
+
+ mutex_lock(&table->lock);
+
+ ix = find_gid(table, gid, attr, default_gid, mask, &empty);
+ if (ix >= 0)
+ goto out_unlock;
+
+ if (empty < 0) {
+ ret = -ENOSPC;
+ goto out_unlock;
+ }
+ attr->device = ib_dev;
+ attr->index = empty;
+ attr->port_num = port;
+ ret = add_modify_gid(table, gid, attr);
+ if (!ret)
+ dispatch_gid_change_event(ib_dev, port);
+
+out_unlock:
+ mutex_unlock(&table->lock);
+ if (ret)
+ pr_warn("%s: unable to add gid %pI6 error=%d\n",
+ __func__, gid->raw, ret);
+ return ret;
+}
+
+int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
+ union ib_gid *gid, struct ib_gid_attr *attr)
+{
+ struct net_device *idev;
+ unsigned long mask;
+ int ret;
+
if (ib_dev->get_netdev) {
idev = ib_dev->get_netdev(ib_dev, port);
if (idev && attr->ndev != idev) {
@@ -340,27 +413,11 @@ int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
dev_put(idev);
}
- mutex_lock(&table->lock);
- write_lock_irq(&table->rwlock);
-
- ix = find_gid(table, gid, attr, false, GID_ATTR_FIND_MASK_GID |
- GID_ATTR_FIND_MASK_GID_TYPE |
- GID_ATTR_FIND_MASK_NETDEV, &empty);
- if (ix >= 0)
- goto out_unlock;
-
- if (empty < 0) {
- ret = -ENOSPC;
- goto out_unlock;
- }
-
- ret = add_gid(ib_dev, port, table, empty, gid, attr, false);
- if (!ret)
- dispatch_gid_change_event(ib_dev, port);
+ mask = GID_ATTR_FIND_MASK_GID |
+ GID_ATTR_FIND_MASK_GID_TYPE |
+ GID_ATTR_FIND_MASK_NETDEV;
-out_unlock:
- write_unlock_irq(&table->rwlock);
- mutex_unlock(&table->lock);
+ ret = __ib_cache_gid_add(ib_dev, port, gid, attr, mask, false);
return ret;
}
@@ -368,29 +425,32 @@ int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
union ib_gid *gid, struct ib_gid_attr *attr)
{
struct ib_gid_table *table;
+ int ret = 0;
int ix;
table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
mutex_lock(&table->lock);
- write_lock_irq(&table->rwlock);
ix = find_gid(table, gid, attr, false,
GID_ATTR_FIND_MASK_GID |
GID_ATTR_FIND_MASK_GID_TYPE |
- GID_ATTR_FIND_MASK_NETDEV |
- GID_ATTR_FIND_MASK_DEFAULT,
+ GID_ATTR_FIND_MASK_NETDEV,
NULL);
- if (ix < 0)
+ if (ix < 0) {
+ ret = -EINVAL;
goto out_unlock;
+ }
- if (!del_gid(ib_dev, port, table, ix, false))
- dispatch_gid_change_event(ib_dev, port);
+ del_gid(ib_dev, port, table, ix);
+ dispatch_gid_change_event(ib_dev, port);
out_unlock:
- write_unlock_irq(&table->rwlock);
mutex_unlock(&table->lock);
- return 0;
+ if (ret)
+ pr_debug("%s: can't delete gid %pI6 error=%d\n",
+ __func__, gid->raw, ret);
+ return ret;
}
int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
@@ -403,16 +463,14 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
mutex_lock(&table->lock);
- write_lock_irq(&table->rwlock);
- for (ix = 0; ix < table->sz; ix++)
- if (table->data_vec[ix].attr.ndev == ndev)
- if (!del_gid(ib_dev, port, table, ix,
- !!(table->data_vec[ix].props &
- GID_TABLE_ENTRY_DEFAULT)))
- deleted = true;
+ for (ix = 0; ix < table->sz; ix++) {
+ if (table->data_vec[ix].attr.ndev == ndev) {
+ del_gid(ib_dev, port, table, ix);
+ deleted = true;
+ }
+ }
- write_unlock_irq(&table->rwlock);
mutex_unlock(&table->lock);
if (deleted)
@@ -609,6 +667,7 @@ static struct ib_gid_table *alloc_gid_table(int sz)
{
struct ib_gid_table *table =
kzalloc(sizeof(struct ib_gid_table), GFP_KERNEL);
+ int i;
if (!table)
return NULL;
@@ -622,6 +681,11 @@ static struct ib_gid_table *alloc_gid_table(int sz)
table->sz = sz;
rwlock_init(&table->rwlock);
+ /* Mark all entries as invalid so that allocator can allocate
+ * one of the invalid (free) entry.
+ */
+ for (i = 0; i < sz; i++)
+ table->data_vec[i].props |= GID_TABLE_ENTRY_INVALID;
return table;
err_free_table:
@@ -646,16 +710,15 @@ static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port,
if (!table)
return;
- write_lock_irq(&table->rwlock);
+ mutex_lock(&table->lock);
for (i = 0; i < table->sz; ++i) {
if (memcmp(&table->data_vec[i].gid, &zgid,
- sizeof(table->data_vec[i].gid)))
- if (!del_gid(ib_dev, port, table, i,
- table->data_vec[i].props &
- GID_ATTR_FIND_MASK_DEFAULT))
- deleted = true;
+ sizeof(table->data_vec[i].gid))) {
+ del_gid(ib_dev, port, table, i);
+ deleted = true;
+ }
}
- write_unlock_irq(&table->rwlock);
+ mutex_unlock(&table->lock);
if (deleted)
dispatch_gid_change_event(ib_dev, port);
@@ -668,9 +731,9 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
{
union ib_gid gid;
struct ib_gid_attr gid_attr;
- struct ib_gid_attr zattr_type = zattr;
struct ib_gid_table *table;
unsigned int gid_type;
+ unsigned long mask;
table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
@@ -679,60 +742,19 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
gid_attr.ndev = ndev;
for (gid_type = 0; gid_type < IB_GID_TYPE_SIZE; ++gid_type) {
- int ix;
- union ib_gid current_gid;
- struct ib_gid_attr current_gid_attr = {};
-
if (1UL << gid_type & ~gid_type_mask)
continue;
gid_attr.gid_type = gid_type;
- mutex_lock(&table->lock);
- write_lock_irq(&table->rwlock);
- ix = find_gid(table, NULL, &gid_attr, true,
- GID_ATTR_FIND_MASK_GID_TYPE |
- GID_ATTR_FIND_MASK_DEFAULT,
- NULL);
-
- /* Coudn't find default GID location */
- if (WARN_ON(ix < 0))
- goto release;
-
- zattr_type.gid_type = gid_type;
-
- if (!__ib_cache_gid_get(ib_dev, port, ix,
- &current_gid, &current_gid_attr) &&
- mode == IB_CACHE_GID_DEFAULT_MODE_SET &&
- !memcmp(&gid, &current_gid, sizeof(gid)) &&
- !memcmp(&gid_attr, &current_gid_attr, sizeof(gid_attr)))
- goto release;
-
- if (memcmp(&current_gid, &zgid, sizeof(current_gid)) ||
- memcmp(&current_gid_attr, &zattr_type,
- sizeof(current_gid_attr))) {
- if (del_gid(ib_dev, port, table, ix, true)) {
- pr_warn("ib_cache_gid: can't delete index %d for default gid %pI6\n",
- ix, gid.raw);
- goto release;
- } else {
- dispatch_gid_change_event(ib_dev, port);
- }
- }
-
if (mode == IB_CACHE_GID_DEFAULT_MODE_SET) {
- if (add_gid(ib_dev, port, table, ix, &gid, &gid_attr, true))
- pr_warn("ib_cache_gid: unable to add default gid %pI6\n",
- gid.raw);
- else
- dispatch_gid_change_event(ib_dev, port);
+ mask = GID_ATTR_FIND_MASK_GID_TYPE |
+ GID_ATTR_FIND_MASK_DEFAULT;
+ __ib_cache_gid_add(ib_dev, port, &gid,
+ &gid_attr, mask, true);
+ } else if (mode == IB_CACHE_GID_DEFAULT_MODE_DELETE) {
+ ib_cache_gid_del(ib_dev, port, &gid, &gid_attr);
}
-
-release:
- if (current_gid_attr.ndev)
- dev_put(current_gid_attr.ndev);
- write_unlock_irq(&table->rwlock);
- mutex_unlock(&table->lock);
}
}
@@ -1057,25 +1079,50 @@ int ib_get_cached_port_state(struct ib_device *device,
}
EXPORT_SYMBOL(ib_get_cached_port_state);
+static int config_non_roce_gid_cache(struct ib_device *device,
+ u8 port, int gid_tbl_len)
+{
+ struct ib_gid_attr gid_attr = {};
+ struct ib_gid_table *table;
+ union ib_gid gid;
+ int ret = 0;
+ int i;
+
+ gid_attr.device = device;
+ gid_attr.port_num = port;
+ table = device->cache.ports[port - rdma_start_port(device)].gid;
+
+ mutex_lock(&table->lock);
+ for (i = 0; i < gid_tbl_len; ++i) {
+ if (!device->query_gid)
+ continue;
+ ret = device->query_gid(device, port, i, &gid);
+ if (ret) {
+ pr_warn("query_gid failed (%d) for %s (index %d)\n",
+ ret, device->name, i);
+ goto err;
+ }
+ gid_attr.index = i;
+ add_modify_gid(table, &gid, &gid_attr);
+ }
+err:
+ mutex_unlock(&table->lock);
+ return ret;
+}
+
static void ib_cache_update(struct ib_device *device,
u8 port,
bool enforce_security)
{
struct ib_port_attr *tprops = NULL;
struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache;
- struct ib_gid_cache {
- int table_len;
- union ib_gid table[0];
- } *gid_cache = NULL;
int i;
int ret;
struct ib_gid_table *table;
- bool use_roce_gid_table;
if (!rdma_is_port_valid(device, port))
return;
- use_roce_gid_table = rdma_protocol_roce(device, port);
table = device->cache.ports[port - rdma_start_port(device)].gid;
tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
@@ -1089,6 +1136,13 @@ static void ib_cache_update(struct ib_device *device,
goto err;
}
+ if (!rdma_protocol_roce(device, port)) {
+ ret = config_non_roce_gid_cache(device, port,
+ tprops->gid_tbl_len);
+ if (ret)
+ goto err;
+ }
+
pkey_cache = kmalloc(sizeof *pkey_cache + tprops->pkey_tbl_len *
sizeof *pkey_cache->table, GFP_KERNEL);
if (!pkey_cache)
@@ -1096,15 +1150,6 @@ static void ib_cache_update(struct ib_device *device,
pkey_cache->table_len = tprops->pkey_tbl_len;
- if (!use_roce_gid_table) {
- gid_cache = kmalloc(sizeof(*gid_cache) + tprops->gid_tbl_len *
- sizeof(*gid_cache->table), GFP_KERNEL);
- if (!gid_cache)
- goto err;
-
- gid_cache->table_len = tprops->gid_tbl_len;
- }
-
for (i = 0; i < pkey_cache->table_len; ++i) {
ret = ib_query_pkey(device, port, i, pkey_cache->table + i);
if (ret) {
@@ -1114,33 +1159,12 @@ static void ib_cache_update(struct ib_device *device,
}
}
- if (!use_roce_gid_table) {
- for (i = 0; i < gid_cache->table_len; ++i) {
- ret = device->query_gid(device, port, i,
- gid_cache->table + i);
- if (ret) {
- pr_warn("ib_query_gid failed (%d) for %s (index %d)\n",
- ret, device->name, i);
- goto err;
- }
- }
- }
-
write_lock_irq(&device->cache.lock);
old_pkey_cache = device->cache.ports[port -
rdma_start_port(device)].pkey;
device->cache.ports[port - rdma_start_port(device)].pkey = pkey_cache;
- if (!use_roce_gid_table) {
- write_lock(&table->rwlock);
- for (i = 0; i < gid_cache->table_len; i++) {
- modify_gid(device, port, table, i, gid_cache->table + i,
- &zattr, false);
- }
- write_unlock(&table->rwlock);
- }
-
device->cache.ports[port - rdma_start_port(device)].lmc = tprops->lmc;
device->cache.ports[port - rdma_start_port(device)].port_state =
tprops->state;
@@ -1154,14 +1178,12 @@ static void ib_cache_update(struct ib_device *device,
port,
tprops->subnet_prefix);
- kfree(gid_cache);
kfree(old_pkey_cache);
kfree(tprops);
return;
err:
kfree(pkey_cache);
- kfree(gid_cache);
kfree(tprops);
}
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 9b0fbab41dc6..31c7efaf8e7a 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -389,14 +389,26 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr,
{
struct port_table_attribute *tab_attr =
container_of(attr, struct port_table_attribute, attr);
+ union ib_gid *pgid;
union ib_gid gid;
ssize_t ret;
ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid, NULL);
- if (ret)
- return ret;
- return sprintf(buf, "%pI6\n", gid.raw);
+ /* If reading GID fails, it is likely due to GID entry being empty
+ * (invalid) or reserved GID in the table.
+ * User space expects to read GID table entries as long as it given
+ * index is within GID table size.
+ * Administrative/debugging tool fails to query rest of the GID entries
+ * if it hits error while querying a GID of the given index.
+ * To avoid user space throwing such error on fail to read gid, return
+ * zero GID as before. This maintains backward compatibility.
+ */
+ if (ret)
+ pgid = &zgid;
+ else
+ pgid = &gid;
+ return sprintf(buf, "%pI6\n", pgid->raw);
}
static ssize_t show_port_gid_attr_ndev(struct ib_port *p,
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index a2f658125795..dc2541f13d7f 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -91,8 +91,11 @@ enum ib_gid_type {
#define ROCE_V2_UDP_DPORT 4791
struct ib_gid_attr {
- enum ib_gid_type gid_type;
struct net_device *ndev;
+ struct ib_device *device;
+ enum ib_gid_type gid_type;
+ u16 index;
+ u8 port_num;
};
enum rdma_node_type {