diff options
author | Shiwu Zhang <shiwu.zhang@amd.com> | 2023-10-31 12:32:54 +0800 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2023-11-17 09:30:51 -0500 |
commit | 75fb313c55fa102f973c440f55dc63ffc61f3b54 (patch) | |
tree | 3d7b6424a8d9b1da1a9260f09664ae507a23ef22 | |
parent | 59e4db5375f587954eb779ac9c7888a6c81c306b (diff) | |
download | linux-stable-75fb313c55fa102f973c440f55dc63ffc61f3b54.tar.gz linux-stable-75fb313c55fa102f973c440f55dc63ffc61f3b54.tar.bz2 linux-stable-75fb313c55fa102f973c440f55dc63ffc61f3b54.zip |
drm/amdgpu: expose the connected port num info through sysfs
By catting the xgmi_port_num sysfs node, it prints out the info in the
format of <src node id>:<src port num> -> <dst node id>:<dst port num>
for one xgmi link.
For example, in case of 4 sockets fully and evenly connected setup, it
would be like as below for the first node in the hive.
01:02 -> 02:03
01:03 -> 02:02
01:07 -> 03:04
01:04 -> 03:07
01:06 -> 04:05
01:05 -> 04:06
Based on the fact that there is two xgmi links between each socket pair,
"01:02 -> 02:03" means that the current socket in question use the port 2
to connect with port 3 of the second node in the hive and so on.
v2: print out the src/dst node id for each xgmi link (lijo)
v3: replace the current_node++ with +1 to align with dst node (le)
and use the dev_err instead of pr_err (lijo)
v4: fix checkpatch warning (alex)
Signed-off-by: Shiwu Zhang <shiwu.zhang@amd.com>
Acked-by: Lijo Lazar <lijo.lazar@amd.com>
Reviewed-by: Le Ma <le.ma@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 44 |
1 files changed, 44 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index bd20cb3b9819..44d8c1a11e1b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -413,6 +413,38 @@ static ssize_t amdgpu_xgmi_show_num_links(struct device *dev, return sysfs_emit(buf, "%s\n", buf); } +static ssize_t amdgpu_xgmi_show_connected_port_num(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info; + int i, j, size = 0; + int current_node; + /* + * get the node id in the sysfs for the current socket and show + * it in the port num info output in the sysfs for easy reading. + * it is NOT the one retrieved from xgmi ta. + */ + for (i = 0; i < top->num_nodes; i++) { + if (top->nodes[i].node_id == adev->gmc.xgmi.node_id) { + current_node = i; + break; + } + } + + for (i = 0; i < top->num_nodes; i++) { + for (j = 0; j < top->nodes[i].num_links; j++) + /* node id in sysfs starts from 1 rather than 0 so +1 here */ + size += sysfs_emit_at(buf, size, "%02x:%02x -> %02x:%02x\n", current_node + 1, + top->nodes[i].port_num[j].src_xgmi_port_num, i + 1, + top->nodes[i].port_num[j].dst_xgmi_port_num); + } + + return size; +} + #define AMDGPU_XGMI_SET_FICAA(o) ((o) | 0x456801) static ssize_t amdgpu_xgmi_show_error(struct device *dev, struct device_attribute *attr, @@ -452,6 +484,7 @@ static DEVICE_ATTR(xgmi_physical_id, 0444, amdgpu_xgmi_show_physical_id, NULL); static DEVICE_ATTR(xgmi_error, S_IRUGO, amdgpu_xgmi_show_error, NULL); static DEVICE_ATTR(xgmi_num_hops, S_IRUGO, amdgpu_xgmi_show_num_hops, NULL); static DEVICE_ATTR(xgmi_num_links, S_IRUGO, amdgpu_xgmi_show_num_links, NULL); +static DEVICE_ATTR(xgmi_port_num, S_IRUGO, amdgpu_xgmi_show_connected_port_num, NULL); static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev, struct amdgpu_hive_info *hive) @@ -487,6 +520,13 @@ static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev, if (ret) pr_err("failed to create xgmi_num_links\n"); + /* Create xgmi port num file if supported */ + if (adev->psp.xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG) { + ret = device_create_file(adev->dev, &dev_attr_xgmi_port_num); + if (ret) + dev_err(adev->dev, "failed to create xgmi_port_num\n"); + } + /* Create sysfs link to hive info folder on the first device */ if (hive->kobj.parent != (&adev->dev->kobj)) { ret = sysfs_create_link(&adev->dev->kobj, &hive->kobj, @@ -517,6 +557,8 @@ remove_file: device_remove_file(adev->dev, &dev_attr_xgmi_error); device_remove_file(adev->dev, &dev_attr_xgmi_num_hops); device_remove_file(adev->dev, &dev_attr_xgmi_num_links); + if (adev->psp.xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG) + device_remove_file(adev->dev, &dev_attr_xgmi_port_num); success: return ret; @@ -533,6 +575,8 @@ static void amdgpu_xgmi_sysfs_rem_dev_info(struct amdgpu_device *adev, device_remove_file(adev->dev, &dev_attr_xgmi_error); device_remove_file(adev->dev, &dev_attr_xgmi_num_hops); device_remove_file(adev->dev, &dev_attr_xgmi_num_links); + if (adev->psp.xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG) + device_remove_file(adev->dev, &dev_attr_xgmi_port_num); if (hive->kobj.parent != (&adev->dev->kobj)) sysfs_remove_link(&adev->dev->kobj,"xgmi_hive_info"); |