From 7fed555c02f754af44f8963890b5ef8e30e97391 Mon Sep 17 00:00:00 2001 From: Robert Bragg Date: Tue, 13 Jun 2017 12:22:59 +0100 Subject: drm/i915: expose _SLICE_MASK GETPARM Enables userspace to determine the maximum number of slices that can be enabled on the device and also know what specific slices can be enabled. This information is required, for example, to be able to analyse some OA counter reports where the counter configuration depends on the HW slice configuration. Signed-off-by: Robert Bragg Reviewed-by: Matthew Auld Signed-off-by: Lionel Landwerlin Signed-off-by: Ben Widawsky --- include/uapi/drm/i915_drm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/uapi/drm') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index f24a80d2d42e..25695c3d9a76 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -418,6 +418,9 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_HAS_EXEC_CAPTURE 45 +/* Query the mask of slices available for this system */ +#define I915_PARAM_SLICE_MASK 46 + typedef struct drm_i915_getparam { __s32 param; /* -- cgit v1.2.3 From f532023381df49ac00cb2d1e70df607cf534720d Mon Sep 17 00:00:00 2001 From: Robert Bragg Date: Tue, 13 Jun 2017 12:23:00 +0100 Subject: drm/i915: expose _SUBSLICE_MASK GETPARM Assuming a uniform mask across all slices, this enables userspace to determine the specific sub slices can be enabled. This information is required, for example, to be able to analyse some OA counter reports where the counter configuration depends on the HW sub slice configuration. Signed-off-by: Robert Bragg Reviewed-by: Matthew Auld Signed-off-by: Lionel Landwerlin Signed-off-by: Ben Widawsky --- include/uapi/drm/i915_drm.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/uapi/drm') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 25695c3d9a76..464547d08173 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -421,6 +421,11 @@ typedef struct drm_i915_irq_wait { /* Query the mask of slices available for this system */ #define I915_PARAM_SLICE_MASK 46 +/* Assuming it's uniform for each slice, this queries the mask of subslices + * per-slice for this system. + */ +#define I915_PARAM_SUBSLICE_MASK 47 + typedef struct drm_i915_getparam { __s32 param; /* -- cgit v1.2.3 From 19f81df2859eb10e92d68991cefa39f826dea013 Mon Sep 17 00:00:00 2001 From: Robert Bragg Date: Tue, 13 Jun 2017 12:23:03 +0100 Subject: drm/i915/perf: Add OA unit support for Gen 8+ Enables access to OA unit metrics for BDW, CHV, SKL and BXT which all share (more-or-less) the same OA unit design. Of particular note in comparison to Haswell: some OA unit HW config state has become per-context state and as a consequence it is somewhat more complicated to manage synchronous state changes from the cpu while there's no guarantee of what context (if any) is currently actively running on the gpu. The periodic sampling frequency which can be particularly useful for system-wide analysis (as opposed to command stream synchronised MI_REPORT_PERF_COUNT commands) is perhaps the most surprising state to have become per-context save and restored (while the OABUFFER destination is still a shared, system-wide resource). This support for gen8+ takes care to consider a number of timing challenges involved in synchronously updating per-context state primarily by programming all config state from the cpu and updating all current and saved contexts synchronously while the OA unit is still disabled. The driver intentionally avoids depending on command streamer programming to update OA state considering the lack of synchronization between the automatic loading of OACTXCONTROL state (that includes the periodic sampling state and enable state) on context restore and the parsing of any general purpose BB the driver can control. I.e. this implementation is careful to avoid the possibility of a context restore temporarily enabling any out-of-date periodic sampling state. In addition to the risk of transiently-out-of-date state being loaded automatically; there are also internal HW latencies involved in the loading of MUX configurations which would be difficult to account for from the command streamer (and we only want to enable the unit when once the MUX configuration is complete). Since the Gen8+ OA unit design no longer supports clock gating the unit off for a single given context (which effectively stopped any progress of counters while any other context was running) and instead supports tagging OA reports with a context ID for filtering on the CPU, it means we can no longer hide the system-wide progress of counters from a non-privileged application only interested in metrics for its own context. Although we could theoretically try and subtract the progress of other contexts before forwarding reports via read() we aren't in a position to filter reports captured via MI_REPORT_PERF_COUNT commands. As a result, for Gen8+, we always require the dev.i915.perf_stream_paranoid to be unset for any access to OA metrics if not root. v5: Drain submitted requests when enabling metric set to ensure no lite-restore erases the context image we just updated (Lionel) v6: In addition to drain, switch to kernel context & update all context in place (Chris) v7: Add missing mutex_unlock() if switching to kernel context fails (Matthew) v8: Simplify OA period/flex-eu-counters programming by using the batchbuffer instead of modifying ctx-image (Lionel) v9: Back to updating the context image (due to erroneous testing, batchbuffer programming the OA unit doesn't actually work) (Lionel) Pin context before updating context image (Chris) Drop MMIO programming now that we switch to a kernel context with right values in initial context image (Chris) v10: Just pin_map the contexts we want to modify or let the configuration happen on first use (Chris) v11: Update kernel context OA config through the batchbuffer rather than on the fly ctx-image update (Lionel) v12: Rework OA context registers update again by swithing away from user contexts and reconfiguring the kernel context through the batchbuffer and updating all the other contexts' context image. Also take care to lock slice/subslice configuration when OA is on. (Lionel) v13: Request rpcs updates on all engine when updating the OA config (Lionel) v14: Drop any kind of rpcs management now that we monitor sseu configuration changes in a later patch (Lionel) Remove usleep after programming the NOA configs on Gen8+, this doesn't seem to be needed (Lionel) v15: Respect coding style for block comments (Chris) v16: Add missing i915_add_request() in case we fail to emit OA configuration (Matthew) Signed-off-by: Robert Bragg Signed-off-by: Lionel Landwerlin Reviewed-by: Matthew Auld \o/ Signed-off-by: Ben Widawsky --- include/uapi/drm/i915_drm.h | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'include/uapi/drm') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 464547d08173..15bc9f78ba4d 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1316,13 +1316,18 @@ struct drm_i915_gem_context_param { }; enum drm_i915_oa_format { - I915_OA_FORMAT_A13 = 1, - I915_OA_FORMAT_A29, - I915_OA_FORMAT_A13_B8_C8, - I915_OA_FORMAT_B4_C8, - I915_OA_FORMAT_A45_B8_C8, - I915_OA_FORMAT_B4_C8_A16, - I915_OA_FORMAT_C4_B8, + I915_OA_FORMAT_A13 = 1, /* HSW only */ + I915_OA_FORMAT_A29, /* HSW only */ + I915_OA_FORMAT_A13_B8_C8, /* HSW only */ + I915_OA_FORMAT_B4_C8, /* HSW only */ + I915_OA_FORMAT_A45_B8_C8, /* HSW only */ + I915_OA_FORMAT_B4_C8_A16, /* HSW only */ + I915_OA_FORMAT_C4_B8, /* HSW+ */ + + /* Gen8+ */ + I915_OA_FORMAT_A12, + I915_OA_FORMAT_A12_B8_C8, + I915_OA_FORMAT_A32u40_A4u32_B8_C8, I915_OA_FORMAT_MAX /* non-ABI */ }; -- cgit v1.2.3 From 1a71cf2fa646799d4397a49b223549d8617fece0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 16 Jun 2017 15:05:23 +0100 Subject: drm/i915: Allow execbuffer to use the first object as the batch Currently, the last object in the execlist is the always the batch. However, when building the batch buffer we often know the batch object first and if we can use the first slot in the execlist we can emit relocation instructions relative to it immediately and avoid a separate pass to adjust the relocations to point to the last execlist slot. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen --- include/uapi/drm/i915_drm.h | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'include/uapi/drm') diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 15bc9f78ba4d..7ccbd6a2bbe0 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -418,7 +418,6 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_HAS_EXEC_CAPTURE 45 -/* Query the mask of slices available for this system */ #define I915_PARAM_SLICE_MASK 46 /* Assuming it's uniform for each slice, this queries the mask of subslices @@ -426,6 +425,12 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_SUBSLICE_MASK 47 +/* + * Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying the batch buffer + * as the first execobject as opposed to the last. See I915_EXEC_BATCH_FIRST. + */ +#define I915_PARAM_HAS_EXEC_BATCH_FIRST 48 + typedef struct drm_i915_getparam { __s32 param; /* @@ -912,7 +917,17 @@ struct drm_i915_gem_execbuffer2 { */ #define I915_EXEC_FENCE_OUT (1<<17) -#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_OUT<<1)) +/* + * Traditionally the execbuf ioctl has only considered the final element in + * the execobject[] to be the executable batch. Often though, the client + * will known the batch object prior to construction and being able to place + * it into the execobject[] array first can simplify the relocation tracking. + * Setting I915_EXEC_BATCH_FIRST tells execbuf to use element 0 of the + * execobject[] as the * batch instead (the default is to use the last + * element). + */ +#define I915_EXEC_BATCH_FIRST (1<<18) +#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_BATCH_FIRST<<1)) #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) #define i915_execbuffer2_set_context_id(eb2, context) \ -- cgit v1.2.3