summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/xe/xe_gt_topology.c
blob: df2042db7ee683d5c87304a81e239f00082ed2e2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
// SPDX-License-Identifier: MIT
/*
 * Copyright © 2022 Intel Corporation
 */

#include "xe_gt_topology.h"

#include <generated/xe_wa_oob.h>
#include <linux/bitmap.h>
#include <linux/compiler.h>

#include "regs/xe_gt_regs.h"
#include "xe_assert.h"
#include "xe_gt.h"
#include "xe_mmio.h"
#include "xe_wa.h"

static void
load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...)
{
	va_list argp;
	u32 fuse_val[XE_MAX_DSS_FUSE_REGS] = {};
	int i;

	if (drm_WARN_ON(&gt_to_xe(gt)->drm, numregs > XE_MAX_DSS_FUSE_REGS))
		numregs = XE_MAX_DSS_FUSE_REGS;

	va_start(argp, numregs);
	for (i = 0; i < numregs; i++)
		fuse_val[i] = xe_mmio_read32(&gt->mmio, va_arg(argp, struct xe_reg));
	va_end(argp);

	bitmap_from_arr32(mask, fuse_val, numregs * 32);
}

static void
load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask, enum xe_gt_eu_type *eu_type)
{
	struct xe_device *xe = gt_to_xe(gt);
	u32 reg_val = xe_mmio_read32(&gt->mmio, XELP_EU_ENABLE);
	u32 val = 0;
	int i;

	BUILD_BUG_ON(XE_MAX_EU_FUSE_REGS > 1);

	/*
	 * Pre-Xe_HP platforms inverted the bit meaning (disable instead
	 * of enable).
	 */
	if (GRAPHICS_VERx100(xe) < 1250)
		reg_val = ~reg_val & XELP_EU_MASK;

	if (GRAPHICS_VERx100(xe) == 1260 || GRAPHICS_VER(xe) >= 20) {
		/* SIMD16 EUs, one bit == one EU */
		*eu_type = XE_GT_EU_TYPE_SIMD16;
		val = reg_val;
	} else {
		/* SIMD8 EUs, one bit == 2 EU */
		*eu_type = XE_GT_EU_TYPE_SIMD8;
		for (i = 0; i < fls(reg_val); i++)
			if (reg_val & BIT(i))
				val |= 0x3 << 2 * i;
	}

	bitmap_from_arr32(mask, &val, XE_MAX_EU_FUSE_BITS);
}

/**
 * gen_l3_mask_from_pattern - Replicate a bit pattern according to a mask
 *
 * It is used to compute the L3 bank masks in a generic format on
 * various platforms where the internal representation of L3 node
 * and masks from registers are different.
 *
 * @xe: device
 * @dst: destination
 * @pattern: pattern to replicate
 * @patternbits: size of the pattern, in bits
 * @mask: mask describing where to replicate the pattern
 *
 * Example 1:
 * ----------
 * @pattern =    0b1111
 *                 └┬─┘
 * @patternbits =   4 (bits)
 * @mask = 0b0101
 *           ││││
 *           │││└────────────────── 0b1111 (=1×0b1111)
 *           ││└──────────── 0b0000    │   (=0×0b1111)
 *           │└────── 0b1111    │      │   (=1×0b1111)
 *           └ 0b0000    │      │      │   (=0×0b1111)
 *                │      │      │      │
 * @dst =      0b0000 0b1111 0b0000 0b1111
 *
 * Example 2:
 * ----------
 * @pattern =    0b11111111
 *                 └┬─────┘
 * @patternbits =   8 (bits)
 * @mask = 0b10
 *           ││
 *           ││
 *           ││
 *           │└────────── 0b00000000 (=0×0b11111111)
 *           └ 0b11111111      │     (=1×0b11111111)
 *                  │          │
 * @dst =      0b11111111 0b00000000
 */
static void
gen_l3_mask_from_pattern(struct xe_device *xe, xe_l3_bank_mask_t dst,
			 xe_l3_bank_mask_t pattern, int patternbits,
			 unsigned long mask)
{
	unsigned long bit;

	xe_assert(xe, find_last_bit(pattern, XE_MAX_L3_BANK_MASK_BITS) < patternbits ||
		  bitmap_empty(pattern, XE_MAX_L3_BANK_MASK_BITS));
	xe_assert(xe, !mask || patternbits * (__fls(mask) + 1) <= XE_MAX_L3_BANK_MASK_BITS);
	for_each_set_bit(bit, &mask, 32) {
		xe_l3_bank_mask_t shifted_pattern = {};

		bitmap_shift_left(shifted_pattern, pattern, bit * patternbits,
				  XE_MAX_L3_BANK_MASK_BITS);
		bitmap_or(dst, dst, shifted_pattern, XE_MAX_L3_BANK_MASK_BITS);
	}
}

static void
load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask)
{
	struct xe_device *xe = gt_to_xe(gt);
	u32 fuse3 = xe_mmio_read32(&gt->mmio, MIRROR_FUSE3);

	/*
	 * PTL platforms with media version 30.00 do not provide proper values
	 * for the media GT's L3 bank registers.  Skip the readout since we
	 * don't have any way to obtain real values.
	 *
	 * This may get re-described as an official workaround in the future,
	 * but there's no tracking number assigned yet so we use a custom
	 * OOB workaround descriptor.
	 */
	if (XE_WA(gt, no_media_l3))
		return;

	if (GRAPHICS_VER(xe) >= 20) {
		xe_l3_bank_mask_t per_node = {};
		u32 meml3_en = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, fuse3);
		u32 bank_val = REG_FIELD_GET(XE2_GT_L3_MODE_MASK, fuse3);

		bitmap_from_arr32(per_node, &bank_val, 32);
		gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 4,
					 meml3_en);
	} else if (GRAPHICS_VERx100(xe) >= 1270) {
		xe_l3_bank_mask_t per_node = {};
		xe_l3_bank_mask_t per_mask_bit = {};
		u32 meml3_en = REG_FIELD_GET(MEML3_EN_MASK, fuse3);
		u32 fuse4 = xe_mmio_read32(&gt->mmio, XEHP_FUSE4);
		u32 bank_val = REG_FIELD_GET(GT_L3_EXC_MASK, fuse4);

		bitmap_set_value8(per_mask_bit, 0x3, 0);
		gen_l3_mask_from_pattern(xe, per_node, per_mask_bit, 2, bank_val);
		gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 4,
					 meml3_en);
	} else if (xe->info.platform == XE_PVC) {
		xe_l3_bank_mask_t per_node = {};
		xe_l3_bank_mask_t per_mask_bit = {};
		u32 meml3_en = REG_FIELD_GET(MEML3_EN_MASK, fuse3);
		u32 bank_val = REG_FIELD_GET(XEHPC_GT_L3_MODE_MASK, fuse3);

		bitmap_set_value8(per_mask_bit, 0xf, 0);
		gen_l3_mask_from_pattern(xe, per_node, per_mask_bit, 4,
					 bank_val);
		gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 16,
					 meml3_en);
	} else if (xe->info.platform == XE_DG2) {
		xe_l3_bank_mask_t per_node = {};
		u32 mask = REG_FIELD_GET(MEML3_EN_MASK, fuse3);

		bitmap_set_value8(per_node, 0xff, 0);
		gen_l3_mask_from_pattern(xe, l3_bank_mask, per_node, 8, mask);
	} else {
		/* 1:1 register bit to mask bit (inverted register bits) */
		u32 mask = REG_FIELD_GET(XELP_GT_L3_MODE_MASK, ~fuse3);

		bitmap_from_arr32(l3_bank_mask, &mask, 32);
	}
}

static void
get_num_dss_regs(struct xe_device *xe, int *geometry_regs, int *compute_regs)
{
	if (GRAPHICS_VER(xe) > 20) {
		*geometry_regs = 3;
		*compute_regs = 3;
	} else if (GRAPHICS_VERx100(xe) == 1260) {
		*geometry_regs = 0;
		*compute_regs = 2;
	} else if (GRAPHICS_VERx100(xe) >= 1250) {
		*geometry_regs = 1;
		*compute_regs = 1;
	} else {
		*geometry_regs = 1;
		*compute_regs = 0;
	}
}

void
xe_gt_topology_init(struct xe_gt *gt)
{
	struct xe_device *xe = gt_to_xe(gt);
	struct drm_printer p;
	int num_geometry_regs, num_compute_regs;

	get_num_dss_regs(xe, &num_geometry_regs, &num_compute_regs);

	/*
	 * Register counts returned shouldn't exceed the number of registers
	 * passed as parameters below.
	 */
	drm_WARN_ON(&xe->drm, num_geometry_regs > 3);
	drm_WARN_ON(&xe->drm, num_compute_regs > 3);

	load_dss_mask(gt, gt->fuse_topo.g_dss_mask,
		      num_geometry_regs,
		      XELP_GT_GEOMETRY_DSS_ENABLE,
		      XE2_GT_GEOMETRY_DSS_1,
		      XE2_GT_GEOMETRY_DSS_2);
	load_dss_mask(gt, gt->fuse_topo.c_dss_mask, num_compute_regs,
		      XEHP_GT_COMPUTE_DSS_ENABLE,
		      XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,
		      XE2_GT_COMPUTE_DSS_2);
	load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss, &gt->fuse_topo.eu_type);
	load_l3_bank_mask(gt, gt->fuse_topo.l3_bank_mask);

	p = drm_dbg_printer(&gt_to_xe(gt)->drm, DRM_UT_DRIVER, "GT topology");

	xe_gt_topology_dump(gt, &p);
}

static const char *eu_type_to_str(enum xe_gt_eu_type eu_type)
{
	switch (eu_type) {
	case XE_GT_EU_TYPE_SIMD16:
		return "simd16";
	case XE_GT_EU_TYPE_SIMD8:
		return "simd8";
	}

	return NULL;
}

void
xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p)
{
	drm_printf(p, "dss mask (geometry): %*pb\n", XE_MAX_DSS_FUSE_BITS,
		   gt->fuse_topo.g_dss_mask);
	drm_printf(p, "dss mask (compute):  %*pb\n", XE_MAX_DSS_FUSE_BITS,
		   gt->fuse_topo.c_dss_mask);

	drm_printf(p, "EU mask per DSS:     %*pb\n", XE_MAX_EU_FUSE_BITS,
		   gt->fuse_topo.eu_mask_per_dss);
	drm_printf(p, "EU type:             %s\n",
		   eu_type_to_str(gt->fuse_topo.eu_type));

	drm_printf(p, "L3 bank mask:        %*pb\n", XE_MAX_L3_BANK_MASK_BITS,
		   gt->fuse_topo.l3_bank_mask);
}

/*
 * Used to obtain the index of the first DSS.  Can start searching from the
 * beginning of a specific dss group (e.g., gslice, cslice, etc.) if
 * groupsize and groupnum are non-zero.
 */
unsigned int
xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum)
{
	return find_next_bit(mask, XE_MAX_DSS_FUSE_BITS, groupnum * groupsize);
}

bool xe_dss_mask_empty(const xe_dss_mask_t mask)
{
	return bitmap_empty(mask, XE_MAX_DSS_FUSE_BITS);
}

/**
 * xe_gt_topology_has_dss_in_quadrant - check fusing of DSS in GT quadrant
 * @gt: GT to check
 * @quad: Which quadrant of the DSS space to check
 *
 * Since Xe_HP platforms can have up to four CCS engines, those engines
 * are each logically associated with a quarter of the possible DSS.  If there
 * are no DSS present in one of the four quadrants of the DSS space, the
 * corresponding CCS engine is also not available for use.
 *
 * Returns false if all DSS in a quadrant of the GT are fused off, else true.
 */
bool xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad)
{
	struct xe_device *xe = gt_to_xe(gt);
	xe_dss_mask_t all_dss;
	int g_dss_regs, c_dss_regs, dss_per_quad, quad_first;

	bitmap_or(all_dss, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask,
		  XE_MAX_DSS_FUSE_BITS);

	get_num_dss_regs(xe, &g_dss_regs, &c_dss_regs);
	dss_per_quad = 32 * max(g_dss_regs, c_dss_regs) / 4;

	quad_first = xe_dss_mask_group_ffs(all_dss, dss_per_quad, quad);

	return quad_first < (quad + 1) * dss_per_quad;
}

bool xe_gt_has_geometry_dss(struct xe_gt *gt, unsigned int dss)
{
	return test_bit(dss, gt->fuse_topo.g_dss_mask);
}

bool xe_gt_has_compute_dss(struct xe_gt *gt, unsigned int dss)
{
	return test_bit(dss, gt->fuse_topo.c_dss_mask);
}