summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2022-11-04 17:20:12 +1000
committerDave Airlie <airlied@redhat.com>2022-11-04 17:33:34 +1000
commit60ba8c5bd94e17ab4b024f5cecf8b48e2cf36412 (patch)
tree7e03a3b457f942c7eb3b865f535bcbe55bb72d11
parent441f0ec0ae1ef7350fa546e03c12cc93082e11c6 (diff)
parent8f956e9a2c9bdb22ac50c8b7656e2ea29c2e656c (diff)
downloadlinux-60ba8c5bd94e17ab4b024f5cecf8b48e2cf36412.tar.gz
linux-60ba8c5bd94e17ab4b024f5cecf8b48e2cf36412.tar.bz2
linux-60ba8c5bd94e17ab4b024f5cecf8b48e2cf36412.zip
Merge tag 'drm-intel-gt-next-2022-11-03' of git://anongit.freedesktop.org/drm/drm-intel into drm-next
Driver Changes: - Fix for #7306: [Arc A380] white flickering when using arc as a secondary gpu (Matt A) - Add Wa_18017747507 for DG2 (Wayne) - Avoid spurious WARN on DG1 due to incorrect cache_dirty flag (Niranjana, Matt A) - Corrections to CS timestamp support for Gen5 and earlier (Ville) - Fix a build error used with clang compiler on hwmon (GG) - Improvements to LMEM handling with RPM (Anshuman, Matt A) - Cleanups in dmabuf code (Mike) - Selftest improvements (Matt A) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/Y2N11wu175p6qeEN@jlahtine-mobl.ger.corp.intel.com
-rw-r--r--Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon75
-rw-r--r--MAINTAINERS1
-rw-r--r--drivers/gpu/drm/i915/Kconfig.profile26
-rw-r--r--drivers/gpu/drm/i915/Makefile14
-rw-r--r--drivers/gpu/drm/i915/display/intel_dpt.c1
-rw-r--r--drivers/gpu/drm/i915/display/intel_fb_pin.c1
-rw-r--r--drivers/gpu/drm/i915/display/intel_lpe_audio.c4
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_context.c3
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c51
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c5
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_internal.c19
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_mman.c21
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object.c12
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object.h4
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_pages.c12
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_pm.c35
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_shmem.c6
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_stolen.c261
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_ttm.c115
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_ttm.h18
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c2
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_userptr.c5
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/huge_pages.c157
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c118
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c79
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c1
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_engine_cs.c55
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_engine_cs.h12
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_ppgtt.c92
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context.h8
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context_types.h9
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine.h6
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_cs.c113
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c39
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_regs.h1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_execlists_submission.c4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt.c18
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gpu_commands.h4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gsc.c23
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.c141
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.h1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c38
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_mcr.c305
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_mcr.h24
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c196
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_regs.h181
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_sysfs.c15
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_sysfs.h7
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c469
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_types.h23
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gtt.c43
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gtt.h3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c143
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.h2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_migrate.c1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_mocs.c12
-rw-r--r--drivers/gpu/drm/i915/gt/intel_reset.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.c265
-rw-r--r--drivers/gpu/drm/i915/gt/intel_rps.h3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_sseu.c4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds.c577
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds_types.h9
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine_cs.c22
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_execlists.c50
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_gt_pm.c36
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_hangcheck.c51
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_migrate.c1
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_rps.c12
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_slpc.c190
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_workarounds.c2
-rw-r--r--drivers/gpu/drm/i915/gt/sysfs_engines.c25
-rw-r--r--drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h1
-rw-r--r--drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h9
-rw-r--r--drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h9
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.c1
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.h16
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c71
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c119
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c61
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c12
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h43
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_log.c6
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c103
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h4
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h3
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c332
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc.c262
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc.h31
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c34
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h1
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c24
-rw-r--r--drivers/gpu/drm/i915/gvt/cfg_space.c4
-rw-r--r--drivers/gpu/drm/i915/gvt/handlers.c4
-rw-r--r--drivers/gpu/drm/i915/gvt/mmio_context.c14
-rw-r--r--drivers/gpu/drm/i915/i915_driver.c8
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h30
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c49
-rw-r--r--drivers/gpu/drm/i915/i915_getparam.c3
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.c12
-rw-r--r--drivers/gpu/drm/i915/i915_hwmon.c732
-rw-r--r--drivers/gpu/drm/i915/i915_hwmon.h20
-rw-r--r--drivers/gpu/drm/i915/i915_pci.c9
-rw-r--r--drivers/gpu/drm/i915/i915_perf.c582
-rw-r--r--drivers/gpu/drm/i915/i915_perf.h2
-rw-r--r--drivers/gpu/drm/i915/i915_perf_oa_regs.h6
-rw-r--r--drivers/gpu/drm/i915/i915_perf_types.h47
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h22
-rw-r--r--drivers/gpu/drm/i915/i915_reg_defs.h27
-rw-r--r--drivers/gpu/drm/i915/i915_request.c24
-rw-r--r--drivers/gpu/drm/i915/i915_request.h5
-rw-r--r--drivers/gpu/drm/i915/i915_scatterlist.h34
-rw-r--r--drivers/gpu/drm/i915/i915_selftest.h2
-rw-r--r--drivers/gpu/drm/i915/i915_trace.h15
-rw-r--r--drivers/gpu/drm/i915/i915_vma.c9
-rw-r--r--drivers/gpu/drm/i915/intel_device_info.h3
-rw-r--r--drivers/gpu/drm/i915/intel_gvt_mmio_table.c2
-rw-r--r--drivers/gpu/drm/i915/intel_mchbar_regs.h21
-rw-r--r--drivers/gpu/drm/i915/intel_pci_config.h28
-rw-r--r--drivers/gpu/drm/i915/intel_pm.c205
-rw-r--r--drivers/gpu/drm/i915/intel_runtime_pm.c5
-rw-r--r--drivers/gpu/drm/i915/intel_runtime_pm.h22
-rw-r--r--drivers/gpu/drm/i915/intel_uncore.c280
-rw-r--r--drivers/gpu/drm/i915/intel_uncore.h2
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp.c32
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp.h32
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_huc.c69
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_huc.h13
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_irq.h8
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_session.c8
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_session.h11
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_tee.c139
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_tee.h5
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_tee_interface.h23
-rw-r--r--drivers/gpu/drm/i915/pxp/intel_pxp_types.h6
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_gem_gtt.c10
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_perf.c16
-rw-r--r--drivers/gpu/drm/i915/selftests/i915_request.c252
-rw-r--r--drivers/gpu/drm/i915/selftests/intel_uncore.c4
-rw-r--r--drivers/gpu/drm/i915/selftests/mock_gem_device.c1
-rw-r--r--drivers/misc/mei/bus.c146
-rw-r--r--drivers/misc/mei/client.c55
-rw-r--r--drivers/misc/mei/hbm.c13
-rw-r--r--drivers/misc/mei/hw-me.c7
-rw-r--r--drivers/misc/mei/hw.h89
-rw-r--r--drivers/misc/mei/interrupt.c47
-rw-r--r--drivers/misc/mei/mei_dev.h8
-rw-r--r--drivers/misc/mei/pxp/mei_pxp.c38
-rw-r--r--include/drm/i915_pxp_tee_interface.h5
-rw-r--r--include/linux/mei_cl_bus.h6
-rw-r--r--include/uapi/drm/i915_drm.h62
150 files changed, 6697 insertions, 2061 deletions
diff --git a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
new file mode 100644
index 000000000000..2d6a472eef88
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
@@ -0,0 +1,75 @@
+What: /sys/devices/.../hwmon/hwmon<i>/in0_input
+Date: February 2023
+KernelVersion: 6.2
+Contact: intel-gfx@lists.freedesktop.org
+Description: RO. Current Voltage in millivolt.
+
+ Only supported for particular Intel i915 graphics platforms.
+
+What: /sys/devices/.../hwmon/hwmon<i>/power1_max
+Date: February 2023
+KernelVersion: 6.2
+Contact: intel-gfx@lists.freedesktop.org
+Description: RW. Card reactive sustained (PL1/Tau) power limit in microwatts.
+
+ The power controller will throttle the operating frequency
+ if the power averaged over a window (typically seconds)
+ exceeds this limit.
+
+ Only supported for particular Intel i915 graphics platforms.
+
+What: /sys/devices/.../hwmon/hwmon<i>/power1_rated_max
+Date: February 2023
+KernelVersion: 6.2
+Contact: intel-gfx@lists.freedesktop.org
+Description: RO. Card default power limit (default TDP setting).
+
+ Only supported for particular Intel i915 graphics platforms.
+
+What: /sys/devices/.../hwmon/hwmon<i>/power1_max_interval
+Date: February 2023
+KernelVersion: 6.2
+Contact: intel-gfx@lists.freedesktop.org
+Description: RW. Sustained power limit interval (Tau in PL1/Tau) in
+ milliseconds over which sustained power is averaged.
+
+ Only supported for particular Intel i915 graphics platforms.
+
+What: /sys/devices/.../hwmon/hwmon<i>/power1_crit
+Date: February 2023
+KernelVersion: 6.2
+Contact: intel-gfx@lists.freedesktop.org
+Description: RW. Card reactive critical (I1) power limit in microwatts.
+
+ Card reactive critical (I1) power limit in microwatts is exposed
+ for client products. The power controller will throttle the
+ operating frequency if the power averaged over a window exceeds
+ this limit.
+
+ Only supported for particular Intel i915 graphics platforms.
+
+What: /sys/devices/.../hwmon/hwmon<i>/curr1_crit
+Date: February 2023
+KernelVersion: 6.2
+Contact: intel-gfx@lists.freedesktop.org
+Description: RW. Card reactive critical (I1) power limit in milliamperes.
+
+ Card reactive critical (I1) power limit in milliamperes is
+ exposed for server products. The power controller will throttle
+ the operating frequency if the power averaged over a window
+ exceeds this limit.
+
+ Only supported for particular Intel i915 graphics platforms.
+
+What: /sys/devices/.../hwmon/hwmon<i>/energy1_input
+Date: February 2023
+KernelVersion: 6.2
+Contact: intel-gfx@lists.freedesktop.org
+Description: RO. Energy input of device or gt in microjoules.
+
+ For i915 device level hwmon devices (name "i915") this
+ reflects energy input for the entire device. For gt level
+ hwmon devices (name "i915_gtN") this reflects energy input
+ for the gt.
+
+ Only supported for particular Intel i915 graphics platforms.
diff --git a/MAINTAINERS b/MAINTAINERS
index ab07cf28844e..30e3df70daec 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10224,6 +10224,7 @@ Q: http://patchwork.freedesktop.org/project/intel-gfx/
B: https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs
C: irc://irc.oftc.net/intel-gfx
T: git git://anongit.freedesktop.org/drm-intel
+F: Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
F: Documentation/gpu/i915.rst
F: drivers/gpu/drm/i915/
F: include/drm/i915*
diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile
index 39328567c200..7cc38d25ee5c 100644
--- a/drivers/gpu/drm/i915/Kconfig.profile
+++ b/drivers/gpu/drm/i915/Kconfig.profile
@@ -57,10 +57,28 @@ config DRM_I915_PREEMPT_TIMEOUT
default 640 # milliseconds
help
How long to wait (in milliseconds) for a preemption event to occur
- when submitting a new context via execlists. If the current context
- does not hit an arbitration point and yield to HW before the timer
- expires, the HW will be reset to allow the more important context
- to execute.
+ when submitting a new context. If the current context does not hit
+ an arbitration point and yield to HW before the timer expires, the
+ HW will be reset to allow the more important context to execute.
+
+ This is adjustable via
+ /sys/class/drm/card?/engine/*/preempt_timeout_ms
+
+ May be 0 to disable the timeout.
+
+ The compiled in default may get overridden at driver probe time on
+ certain platforms and certain engines which will be reflected in the
+ sysfs control.
+
+config DRM_I915_PREEMPT_TIMEOUT_COMPUTE
+ int "Preempt timeout for compute engines (ms, jiffy granularity)"
+ default 7500 # milliseconds
+ help
+ How long to wait (in milliseconds) for a preemption event to occur
+ when submitting a new context to a compute capable engine. If the
+ current context does not hit an arbitration point and yield to HW
+ before the timer expires, the HW will be reset to allow the more
+ important context to execute.
This is adjustable via
/sys/class/drm/card?/engine/*/preempt_timeout_ms
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index cea00aaca04b..51704b54317c 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -209,6 +209,9 @@ i915-y += gt/uc/intel_uc.o \
# graphics system controller (GSC) support
i915-y += gt/intel_gsc.o
+# graphics hardware monitoring (HWMON) support
+i915-$(CONFIG_HWMON) += i915_hwmon.o
+
# modesetting core code
i915-y += \
display/hsw_ips.o \
@@ -310,15 +313,18 @@ i915-y += \
i915-y += i915_perf.o
-# Protected execution platform (PXP) support
-i915-$(CONFIG_DRM_I915_PXP) += \
+# Protected execution platform (PXP) support. Base support is required for HuC
+i915-y += \
pxp/intel_pxp.o \
+ pxp/intel_pxp_tee.o \
+ pxp/intel_pxp_huc.o
+
+i915-$(CONFIG_DRM_I915_PXP) += \
pxp/intel_pxp_cmd.o \
pxp/intel_pxp_debugfs.o \
pxp/intel_pxp_irq.o \
pxp/intel_pxp_pm.o \
- pxp/intel_pxp_session.o \
- pxp/intel_pxp_tee.o
+ pxp/intel_pxp_session.o
# Post-mortem debug and GPU hang state capture
i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o
diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c
index ac587647e1f5..ad1a37b515fb 100644
--- a/drivers/gpu/drm/i915/display/intel_dpt.c
+++ b/drivers/gpu/drm/i915/display/intel_dpt.c
@@ -5,6 +5,7 @@
#include "gem/i915_gem_domain.h"
#include "gem/i915_gem_internal.h"
+#include "gem/i915_gem_lmem.h"
#include "gt/gen8_ppgtt.h"
#include "i915_drv.h"
diff --git a/drivers/gpu/drm/i915/display/intel_fb_pin.c b/drivers/gpu/drm/i915/display/intel_fb_pin.c
index 1dddd6abd77b..6900acbb1381 100644
--- a/drivers/gpu/drm/i915/display/intel_fb_pin.c
+++ b/drivers/gpu/drm/i915/display/intel_fb_pin.c
@@ -167,7 +167,6 @@ retry:
ret = i915_gem_object_attach_phys(obj, alignment);
else if (!ret && HAS_LMEM(dev_priv))
ret = i915_gem_object_migrate(obj, &ww, INTEL_REGION_LMEM_0);
- /* TODO: Do we need to sync when migration becomes async? */
if (!ret)
ret = i915_gem_object_pin_pages(obj);
if (ret)
diff --git a/drivers/gpu/drm/i915/display/intel_lpe_audio.c b/drivers/gpu/drm/i915/display/intel_lpe_audio.c
index 6a7ac60e4f76..22ca8754ea96 100644
--- a/drivers/gpu/drm/i915/display/intel_lpe_audio.c
+++ b/drivers/gpu/drm/i915/display/intel_lpe_audio.c
@@ -100,9 +100,9 @@ lpe_audio_platdev_create(struct drm_i915_private *dev_priv)
rsc[0].flags = IORESOURCE_IRQ;
rsc[0].name = "hdmi-lpe-audio-irq";
- rsc[1].start = pci_resource_start(pdev, GTTMMADR_BAR) +
+ rsc[1].start = pci_resource_start(pdev, GEN4_GTTMMADR_BAR) +
I915_HDMI_LPE_AUDIO_BASE;
- rsc[1].end = pci_resource_start(pdev, GTTMMADR_BAR) +
+ rsc[1].end = pci_resource_start(pdev, GEN4_GTTMMADR_BAR) +
I915_HDMI_LPE_AUDIO_BASE + I915_HDMI_LPE_AUDIO_SIZE - 1;
rsc[1].flags = IORESOURCE_MEM;
rsc[1].name = "hdmi-lpe-audio-mmio";
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 1e29b1e6d186..01402f3c58f6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1452,7 +1452,7 @@ static void engines_idle_release(struct i915_gem_context *ctx,
int err;
/* serialises with execbuf */
- set_bit(CONTEXT_CLOSED_BIT, &ce->flags);
+ intel_context_close(ce);
if (!intel_context_pin_if_active(ce))
continue;
@@ -2298,7 +2298,6 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
}
args->ctx_id = id;
- drm_dbg(&i915->drm, "HW context %d created\n", args->ctx_id);
return 0;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index 07eee1c09aaf..ec6f7ae47783 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -25,43 +25,44 @@ static struct drm_i915_gem_object *dma_buf_to_obj(struct dma_buf *buf)
return to_intel_bo(buf->priv);
}
-static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachment,
+static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attach,
enum dma_data_direction dir)
{
- struct drm_i915_gem_object *obj = dma_buf_to_obj(attachment->dmabuf);
- struct sg_table *st;
+ struct drm_i915_gem_object *obj = dma_buf_to_obj(attach->dmabuf);
+ struct sg_table *sgt;
struct scatterlist *src, *dst;
int ret, i;
- /* Copy sg so that we make an independent mapping */
- st = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
- if (st == NULL) {
+ /*
+ * Make a copy of the object's sgt, so that we can make an independent
+ * mapping
+ */
+ sgt = kmalloc(sizeof(*sgt), GFP_KERNEL);
+ if (!sgt) {
ret = -ENOMEM;
goto err;
}
- ret = sg_alloc_table(st, obj->mm.pages->nents, GFP_KERNEL);
+ ret = sg_alloc_table(sgt, obj->mm.pages->orig_nents, GFP_KERNEL);
if (ret)
goto err_free;
- src = obj->mm.pages->sgl;
- dst = st->sgl;
- for (i = 0; i < obj->mm.pages->nents; i++) {
+ dst = sgt->sgl;
+ for_each_sg(obj->mm.pages->sgl, src, obj->mm.pages->orig_nents, i) {
sg_set_page(dst, sg_page(src), src->length, 0);
dst = sg_next(dst);
- src = sg_next(src);
}
- ret = dma_map_sgtable(attachment->dev, st, dir, DMA_ATTR_SKIP_CPU_SYNC);
+ ret = dma_map_sgtable(attach->dev, sgt, dir, DMA_ATTR_SKIP_CPU_SYNC);
if (ret)
goto err_free_sg;
- return st;
+ return sgt;
err_free_sg:
- sg_free_table(st);
+ sg_free_table(sgt);
err_free:
- kfree(st);
+ kfree(sgt);
err:
return ERR_PTR(ret);
}
@@ -236,15 +237,15 @@ struct dma_buf *i915_gem_prime_export(struct drm_gem_object *gem_obj, int flags)
static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
{
struct drm_i915_private *i915 = to_i915(obj->base.dev);
- struct sg_table *pages;
+ struct sg_table *sgt;
unsigned int sg_page_sizes;
assert_object_held(obj);
- pages = dma_buf_map_attachment(obj->base.import_attach,
- DMA_BIDIRECTIONAL);
- if (IS_ERR(pages))
- return PTR_ERR(pages);
+ sgt = dma_buf_map_attachment(obj->base.import_attach,
+ DMA_BIDIRECTIONAL);
+ if (IS_ERR(sgt))
+ return PTR_ERR(sgt);
/*
* DG1 is special here since it still snoops transactions even with
@@ -261,16 +262,16 @@ static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
(!HAS_LLC(i915) && !IS_DG1(i915)))
wbinvd_on_all_cpus();
- sg_page_sizes = i915_sg_dma_sizes(pages->sgl);
- __i915_gem_object_set_pages(obj, pages, sg_page_sizes);
+ sg_page_sizes = i915_sg_dma_sizes(sgt->sgl);
+ __i915_gem_object_set_pages(obj, sgt, sg_page_sizes);
return 0;
}
static void i915_gem_object_put_pages_dmabuf(struct drm_i915_gem_object *obj,
- struct sg_table *pages)
+ struct sg_table *sgt)
{
- dma_buf_unmap_attachment(obj->base.import_attach, pages,
+ dma_buf_unmap_attachment(obj->base.import_attach, sgt,
DMA_BIDIRECTIONAL);
}
@@ -313,7 +314,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
get_dma_buf(dma_buf);
obj = i915_gem_object_alloc();
- if (obj == NULL) {
+ if (!obj) {
ret = -ENOMEM;
goto fail_detach;
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 845023c14eb3..1160723c9d2d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -2954,11 +2954,6 @@ await_fence_array(struct i915_execbuffer *eb,
int err;
for (n = 0; n < eb->num_fences; n++) {
- struct drm_syncobj *syncobj;
- unsigned int flags;
-
- syncobj = ptr_unpack_bits(eb->fences[n].syncobj, &flags, 2);
-
if (!eb->fences[n].dma_fence)
continue;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
index c698f95af15f..629acb403a2c 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
@@ -6,7 +6,6 @@
#include <linux/scatterlist.h>
#include <linux/slab.h>
-#include <linux/swiotlb.h>
#include "i915_drv.h"
#include "i915_gem.h"
@@ -38,22 +37,12 @@ static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
struct scatterlist *sg;
unsigned int sg_page_sizes;
unsigned int npages;
- int max_order;
+ int max_order = MAX_ORDER;
+ unsigned int max_segment;
gfp_t gfp;
- max_order = MAX_ORDER;
-#ifdef CONFIG_SWIOTLB
- if (is_swiotlb_active(obj->base.dev->dev)) {
- unsigned int max_segment;
-
- max_segment = swiotlb_max_segment();
- if (max_segment) {
- max_segment = max_t(unsigned int, max_segment,
- PAGE_SIZE) >> PAGE_SHIFT;
- max_order = min(max_order, ilog2(max_segment));
- }
- }
-#endif
+ max_segment = i915_sg_segment_size(i915->drm.dev) >> PAGE_SHIFT;
+ max_order = min(max_order, get_order(max_segment));
gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE;
if (IS_I965GM(i915) || IS_I965G(i915)) {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
index 73d9eda1d6b7..e63329bc8065 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -413,7 +413,7 @@ retry:
vma->mmo = mmo;
if (CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)
- intel_wakeref_auto(&to_gt(i915)->userfault_wakeref,
+ intel_wakeref_auto(&i915->runtime_pm.userfault_wakeref,
msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND));
if (write) {
@@ -557,11 +557,13 @@ void i915_gem_object_runtime_pm_release_mmap_offset(struct drm_i915_gem_object *
drm_vma_node_unmap(&bo->base.vma_node, bdev->dev_mapping);
- if (obj->userfault_count) {
- /* rpm wakeref provide exclusive access */
- list_del(&obj->userfault_link);
- obj->userfault_count = 0;
- }
+ /*
+ * We have exclusive access here via runtime suspend. All other callers
+ * must first grab the rpm wakeref.
+ */
+ GEM_BUG_ON(!obj->userfault_count);
+ list_del(&obj->userfault_link);
+ obj->userfault_count = 0;
}
void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj)
@@ -587,13 +589,6 @@ void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj)
spin_lock(&obj->mmo.lock);
}
spin_unlock(&obj->mmo.lock);
-
- if (obj->userfault_count) {
- mutex_lock(&to_gt(to_i915(obj->base.dev))->lmem_userfault_lock);
- list_del(&obj->userfault_link);
- mutex_unlock(&to_gt(to_i915(obj->base.dev))->lmem_userfault_lock);
- obj->userfault_count = 0;
- }
}
static struct i915_mmap_offset *
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 6b8710ba8ded..733696057761 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -458,6 +458,16 @@ i915_gem_object_read_from_page_iomap(struct drm_i915_gem_object *obj, u64 offset
io_mapping_unmap(src_map);
}
+static bool object_has_mappable_iomem(struct drm_i915_gem_object *obj)
+{
+ GEM_BUG_ON(!i915_gem_object_has_iomem(obj));
+
+ if (IS_DGFX(to_i915(obj->base.dev)))
+ return i915_ttm_resource_mappable(i915_gem_to_ttm(obj)->resource);
+
+ return true;
+}
+
/**
* i915_gem_object_read_from_page - read data from the page of a GEM object
* @obj: GEM object to read from
@@ -480,7 +490,7 @@ int i915_gem_object_read_from_page(struct drm_i915_gem_object *obj, u64 offset,
if (i915_gem_object_has_struct_page(obj))
i915_gem_object_read_from_page_kmap(obj, offset, dst, size);
- else if (i915_gem_object_has_iomem(obj))
+ else if (i915_gem_object_has_iomem(obj) && object_has_mappable_iomem(obj))
i915_gem_object_read_from_page_iomap(obj, offset, dst, size);
else
return -ENODEV;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 1723af9b0f6a..6b9ecff42bb5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -482,6 +482,10 @@ void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
void *__must_check i915_gem_object_pin_map_unlocked(struct drm_i915_gem_object *obj,
enum i915_map_type type);
+enum i915_map_type i915_coherent_map_type(struct drm_i915_private *i915,
+ struct drm_i915_gem_object *obj,
+ bool always_coherent);
+
void __i915_gem_object_flush_map(struct drm_i915_gem_object *obj,
unsigned long offset,
unsigned long size);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index 4df50b049cea..16f845663ff2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -466,6 +466,18 @@ void *i915_gem_object_pin_map_unlocked(struct drm_i915_gem_object *obj,
return ret;
}
+enum i915_map_type i915_coherent_map_type(struct drm_i915_private *i915,
+ struct drm_i915_gem_object *obj,
+ bool always_coherent)
+{
+ if (i915_gem_object_is_lmem(obj))
+ return I915_MAP_WC;
+ if (HAS_LLC(i915) || always_coherent)
+ return I915_MAP_WB;
+ else
+ return I915_MAP_WC;
+}
+
void __i915_gem_object_flush_map(struct drm_i915_gem_object *obj,
unsigned long offset,
unsigned long size)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
index 3428f735e786..0d812f4d787d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -22,9 +22,12 @@
void i915_gem_suspend(struct drm_i915_private *i915)
{
+ struct intel_gt *gt;
+ unsigned int i;
+
GEM_TRACE("%s\n", dev_name(i915->drm.dev));
- intel_wakeref_auto(&to_gt(i915)->userfault_wakeref, 0);
+ intel_wakeref_auto(&i915->runtime_pm.userfault_wakeref, 0);
flush_workqueue(i915->wq);
/*
@@ -36,7 +39,8 @@ void i915_gem_suspend(struct drm_i915_private *i915)
* state. Fortunately, the kernel_context is disposable and we do
* not rely on its state.
*/
- intel_gt_suspend_prepare(to_gt(i915));
+ for_each_gt(gt, i915, i)
+ intel_gt_suspend_prepare(gt);
i915_gem_drain_freed_objects(i915);
}
@@ -131,7 +135,9 @@ void i915_gem_suspend_late(struct drm_i915_private *i915)
&i915->mm.purge_list,
NULL
}, **phase;
+ struct intel_gt *gt;
unsigned long flags;
+ unsigned int i;
bool flush = false;
/*
@@ -154,7 +160,8 @@ void i915_gem_suspend_late(struct drm_i915_private *i915)
* machine in an unusable condition.
*/
- intel_gt_suspend_late(to_gt(i915));
+ for_each_gt(gt, i915, i)
+ intel_gt_suspend_late(gt);
spin_lock_irqsave(&i915->mm.obj_lock, flags);
for (phase = phases; *phase; phase++) {
@@ -212,7 +219,8 @@ int i915_gem_freeze_late(struct drm_i915_private *i915)
void i915_gem_resume(struct drm_i915_private *i915)
{
- int ret;
+ struct intel_gt *gt;
+ int ret, i, j;
GEM_TRACE("%s\n", dev_name(i915->drm.dev));
@@ -224,8 +232,25 @@ void i915_gem_resume(struct drm_i915_private *i915)
* guarantee that the context image is complete. So let's just reset
* it and start again.
*/
- intel_gt_resume(to_gt(i915));
+ for_each_gt(gt, i915, i)
+ if (intel_gt_resume(gt))
+ goto err_wedged;
ret = lmem_restore(i915, I915_TTM_BACKUP_ALLOW_GPU);
GEM_WARN_ON(ret);
+
+ return;
+
+err_wedged:
+ for_each_gt(gt, i915, j) {
+ if (!intel_gt_is_wedged(gt)) {
+ dev_err(i915->drm.dev,
+ "Failed to re-initialize GPU[%u], declaring it wedged!\n",
+ j);
+ intel_gt_set_wedged(gt);
+ }
+
+ if (j == i)
+ break;
+ }
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index f42ca1179f37..2f7804492cd5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -194,7 +194,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
struct intel_memory_region *mem = obj->mm.region;
struct address_space *mapping = obj->base.filp->f_mapping;
const unsigned long page_count = obj->base.size / PAGE_SIZE;
- unsigned int max_segment = i915_sg_segment_size();
+ unsigned int max_segment = i915_sg_segment_size(i915->drm.dev);
struct sg_table *st;
struct sgt_iter sgt_iter;
struct page *page;
@@ -369,14 +369,14 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
__start_cpu_write(obj);
/*
- * On non-LLC platforms, force the flush-on-acquire if this is ever
+ * On non-LLC igfx platforms, force the flush-on-acquire if this is ever
* swapped-in. Our async flush path is not trust worthy enough yet(and
* happens in the wrong order), and with some tricks it's conceivable
* for userspace to change the cache-level to I915_CACHE_NONE after the
* pages are swapped-in, and since execbuf binds the object before doing
* the async flush, we have a race window.
*/
- if (!HAS_LLC(i915))
+ if (!HAS_LLC(i915) && !IS_DGFX(i915))
obj->cache_dirty = true;
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index acc561c0f0aa..0c70711818ed 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -77,22 +77,26 @@ void i915_gem_stolen_remove_node(struct drm_i915_private *i915,
mutex_unlock(&i915->mm.stolen_lock);
}
-static int i915_adjust_stolen(struct drm_i915_private *i915,
- struct resource *dsm)
+static bool valid_stolen_size(struct drm_i915_private *i915, struct resource *dsm)
+{
+ return (dsm->start != 0 || HAS_LMEMBAR_SMEM_STOLEN(i915)) && dsm->end > dsm->start;
+}
+
+static int adjust_stolen(struct drm_i915_private *i915,
+ struct resource *dsm)
{
struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
struct intel_uncore *uncore = ggtt->vm.gt->uncore;
- struct resource *r;
- if (dsm->start == 0 || dsm->end <= dsm->start)
+ if (!valid_stolen_size(i915, dsm))
return -EINVAL;
/*
+ * Make sure we don't clobber the GTT if it's within stolen memory
+ *
* TODO: We have yet too encounter the case where the GTT wasn't at the
* end of stolen. With that assumption we could simplify this.
*/
-
- /* Make sure we don't clobber the GTT if it's within stolen memory */
if (GRAPHICS_VER(i915) <= 4 &&
!IS_G33(i915) && !IS_PINEVIEW(i915) && !IS_G4X(i915)) {
struct resource stolen[2] = {*dsm, *dsm};
@@ -131,12 +135,25 @@ static int i915_adjust_stolen(struct drm_i915_private *i915,
}
}
+ if (!valid_stolen_size(i915, dsm))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int request_smem_stolen(struct drm_i915_private *i915,
+ struct resource *dsm)
+{
+ struct resource *r;
+
/*
- * With stolen lmem, we don't need to check if the address range
- * overlaps with the non-stolen system memory range, since lmem is local
- * to the gpu.
+ * With stolen lmem, we don't need to request system memory for the
+ * address range since it's local to the gpu.
+ *
+ * Starting MTL, in IGFX devices the stolen memory is exposed via
+ * LMEMBAR and shall be considered similar to stolen lmem.
*/
- if (HAS_LMEM(i915))
+ if (HAS_LMEM(i915) || HAS_LMEMBAR_SMEM_STOLEN(i915))
return 0;
/*
@@ -371,8 +388,6 @@ static void icl_get_stolen_reserved(struct drm_i915_private *i915,
drm_dbg(&i915->drm, "GEN6_STOLEN_RESERVED = 0x%016llx\n", reg_val);
- *base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK;
-
switch (reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK) {
case GEN8_STOLEN_RESERVED_1M:
*size = 1024 * 1024;
@@ -390,41 +405,30 @@ static void icl_get_stolen_reserved(struct drm_i915_private *i915,
*size = 8 * 1024 * 1024;
MISSING_CASE(reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK);
}
+
+ if (HAS_LMEMBAR_SMEM_STOLEN(i915))
+ /* the base is initialized to stolen top so subtract size to get base */
+ *base -= *size;
+ else
+ *base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK;
}
-static int i915_gem_init_stolen(struct intel_memory_region *mem)
+/*
+ * Initialize i915->dsm_reserved to contain the reserved space within the Data
+ * Stolen Memory. This is a range on the top of DSM that is reserved, not to
+ * be used by driver, so must be excluded from the region passed to the
+ * allocator later. In the spec this is also called as WOPCM.
+ *
+ * Our expectation is that the reserved space is at the top of the stolen
+ * region, as it has been the case for every platform, and *never* at the
+ * bottom, so the calculation here can be simplified.
+ */
+static int init_reserved_stolen(struct drm_i915_private *i915)
{
- struct drm_i915_private *i915 = mem->i915;
struct intel_uncore *uncore = &i915->uncore;
resource_size_t reserved_base, stolen_top;
- resource_size_t reserved_total, reserved_size;
-
- mutex_init(&i915->mm.stolen_lock);
-
- if (intel_vgpu_active(i915)) {
- drm_notice(&i915->drm,
- "%s, disabling use of stolen memory\n",
- "iGVT-g active");
- return 0;
- }
-
- if (i915_vtd_active(i915) && GRAPHICS_VER(i915) < 8) {
- drm_notice(&i915->drm,
- "%s, disabling use of stolen memory\n",
- "DMAR active");
- return 0;
- }
-
- if (resource_size(&mem->region) == 0)
- return 0;
-
- i915->dsm = mem->region;
-
- if (i915_adjust_stolen(i915, &i915->dsm))
- return 0;
-
- GEM_BUG_ON(i915->dsm.start == 0);
- GEM_BUG_ON(i915->dsm.end <= i915->dsm.start);
+ resource_size_t reserved_size;
+ int ret = 0;
stolen_top = i915->dsm.end + 1;
reserved_base = stolen_top;
@@ -455,17 +459,16 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem)
&reserved_base, &reserved_size);
}
- /*
- * Our expectation is that the reserved space is at the top of the
- * stolen region and *never* at the bottom. If we see !reserved_base,
- * it likely means we failed to read the registers correctly.
- */
+ /* No reserved stolen */
+ if (reserved_base == stolen_top)
+ goto bail_out;
+
if (!reserved_base) {
drm_err(&i915->drm,
"inconsistent reservation %pa + %pa; ignoring\n",
&reserved_base, &reserved_size);
- reserved_base = stolen_top;
- reserved_size = 0;
+ ret = -EINVAL;
+ goto bail_out;
}
i915->dsm_reserved =
@@ -475,19 +478,55 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem)
drm_err(&i915->drm,
"Stolen reserved area %pR outside stolen memory %pR\n",
&i915->dsm_reserved, &i915->dsm);
- return 0;
+ ret = -EINVAL;
+ goto bail_out;
}
+ return 0;
+
+bail_out:
+ i915->dsm_reserved =
+ (struct resource)DEFINE_RES_MEM(reserved_base, 0);
+
+ return ret;
+}
+
+static int i915_gem_init_stolen(struct intel_memory_region *mem)
+{
+ struct drm_i915_private *i915 = mem->i915;
+
+ mutex_init(&i915->mm.stolen_lock);
+
+ if (intel_vgpu_active(i915)) {
+ drm_notice(&i915->drm,
+ "%s, disabling use of stolen memory\n",
+ "iGVT-g active");
+ return -ENOSPC;
+ }
+
+ if (i915_vtd_active(i915) && GRAPHICS_VER(i915) < 8) {
+ drm_notice(&i915->drm,
+ "%s, disabling use of stolen memory\n",
+ "DMAR active");
+ return -ENOSPC;
+ }
+
+ if (adjust_stolen(i915, &mem->region))
+ return -ENOSPC;
+
+ if (request_smem_stolen(i915, &mem->region))
+ return -ENOSPC;
+
+ i915->dsm = mem->region;
+
+ if (init_reserved_stolen(i915))
+ return -ENOSPC;
+
/* Exclude the reserved region from driver use */
- mem->region.end = reserved_base - 1;
+ mem->region.end = i915->dsm_reserved.start - 1;
mem->io_size = min(mem->io_size, resource_size(&mem->region));
- /* It is possible for the reserved area to end before the end of stolen
- * memory, so just consider the start. */
- reserved_total = stolen_top - reserved_base;
-
- i915->stolen_usable_size =
- resource_size(&i915->dsm) - reserved_total;
+ i915->stolen_usable_size = resource_size(&mem->region);
drm_dbg(&i915->drm,
"Memory reserved for graphics device: %lluK, usable: %lluK\n",
@@ -495,7 +534,7 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem)
(u64)i915->stolen_usable_size >> 10);
if (i915->stolen_usable_size == 0)
- return 0;
+ return -ENOSPC;
/* Basic memrange allocator for stolen space. */
drm_mm_init(&i915->mm.stolen, 0, i915->stolen_usable_size);
@@ -733,11 +772,17 @@ i915_gem_object_create_stolen(struct drm_i915_private *i915,
static int init_stolen_smem(struct intel_memory_region *mem)
{
+ int err;
+
/*
* Initialise stolen early so that we may reserve preallocated
* objects for the BIOS to KMS transition.
*/
- return i915_gem_init_stolen(mem);
+ err = i915_gem_init_stolen(mem);
+ if (err)
+ drm_dbg(&mem->i915->drm, "Skip stolen region: failed to setup\n");
+
+ return 0;
}
static int release_stolen_smem(struct intel_memory_region *mem)
@@ -754,26 +799,25 @@ static const struct intel_memory_region_ops i915_region_stolen_smem_ops = {
static int init_stolen_lmem(struct intel_memory_region *mem)
{
+ struct drm_i915_private *i915 = mem->i915;
int err;
if (GEM_WARN_ON(resource_size(&mem->region) == 0))
- return -ENODEV;
+ return 0;
- /*
- * TODO: For stolen lmem we mostly just care about populating the dsm
- * related bits and setting up the drm_mm allocator for the range.
- * Perhaps split up i915_gem_init_stolen() for this.
- */
err = i915_gem_init_stolen(mem);
- if (err)
- return err;
+ if (err) {
+ drm_dbg(&mem->i915->drm, "Skip stolen region: failed to setup\n");
+ return 0;
+ }
- if (mem->io_size && !io_mapping_init_wc(&mem->iomap,
- mem->io_start,
- mem->io_size)) {
- err = -EIO;
+ if (mem->io_size &&
+ !io_mapping_init_wc(&mem->iomap, mem->io_start, mem->io_size))
goto err_cleanup;
- }
+
+ drm_dbg(&i915->drm, "Stolen Local memory IO start: %pa\n",
+ &mem->io_start);
+ drm_dbg(&i915->drm, "Stolen Local DSM base: %pa\n", &mem->region.start);
return 0;
@@ -796,6 +840,29 @@ static const struct intel_memory_region_ops i915_region_stolen_lmem_ops = {
.init_object = _i915_gem_object_stolen_init,
};
+static int mtl_get_gms_size(struct intel_uncore *uncore)
+{
+ u16 ggc, gms;
+
+ ggc = intel_uncore_read16(uncore, GGC);
+
+ /* check GGMS, should be fixed 0x3 (8MB) */
+ if ((ggc & GGMS_MASK) != GGMS_MASK)
+ return -EIO;
+
+ /* return valid GMS value, -EIO if invalid */
+ gms = REG_FIELD_GET(GMS_MASK, ggc);
+ switch (gms) {
+ case 0x0 ... 0x04:
+ return gms * 32;
+ case 0xf0 ... 0xfe:
+ return (gms - 0xf0 + 1) * 4;
+ default:
+ MISSING_CASE(gms);
+ return -EIO;
+ }
+}
+
struct intel_memory_region *
i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
u16 instance)
@@ -806,6 +873,7 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
struct intel_memory_region *mem;
resource_size_t io_start, io_size;
resource_size_t min_page_size;
+ int ret;
if (WARN_ON_ONCE(instance))
return ERR_PTR(-ENODEV);
@@ -813,12 +881,8 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
if (!i915_pci_resource_valid(pdev, GEN12_LMEM_BAR))
return ERR_PTR(-ENXIO);
- /* Use DSM base address instead for stolen memory */
- dsm_base = intel_uncore_read64(uncore, GEN12_DSMBASE);
- if (IS_DG1(uncore->i915)) {
+ if (HAS_LMEMBAR_SMEM_STOLEN(i915) || IS_DG1(i915)) {
lmem_size = pci_resource_len(pdev, GEN12_LMEM_BAR);
- if (WARN_ON(lmem_size < dsm_base))
- return ERR_PTR(-ENODEV);
} else {
resource_size_t lmem_range;
@@ -827,13 +891,39 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
lmem_size *= SZ_1G;
}
- dsm_size = lmem_size - dsm_base;
- if (pci_resource_len(pdev, GEN12_LMEM_BAR) < lmem_size) {
+ if (HAS_LMEMBAR_SMEM_STOLEN(i915)) {
+ /*
+ * MTL dsm size is in GGC register.
+ * Also MTL uses offset to DSMBASE in ptes, so i915
+ * uses dsm_base = 0 to setup stolen region.
+ */
+ ret = mtl_get_gms_size(uncore);
+ if (ret < 0) {
+ drm_err(&i915->drm, "invalid MTL GGC register setting\n");
+ return ERR_PTR(ret);
+ }
+
+ dsm_base = 0;
+ dsm_size = (resource_size_t)(ret * SZ_1M);
+
+ GEM_BUG_ON(pci_resource_len(pdev, GEN12_LMEM_BAR) != SZ_256M);
+ GEM_BUG_ON((dsm_size + SZ_8M) > lmem_size);
+ } else {
+ /* Use DSM base address instead for stolen memory */
+ dsm_base = intel_uncore_read64(uncore, GEN12_DSMBASE) & GEN12_BDSM_MASK;
+ if (WARN_ON(lmem_size < dsm_base))
+ return ERR_PTR(-ENODEV);
+ dsm_size = lmem_size - dsm_base;
+ }
+
+ io_size = dsm_size;
+ if (HAS_LMEMBAR_SMEM_STOLEN(i915)) {
+ io_start = pci_resource_start(pdev, GEN12_LMEM_BAR) + SZ_8M;
+ } else if (pci_resource_len(pdev, GEN12_LMEM_BAR) < lmem_size) {
io_start = 0;
io_size = 0;
} else {
io_start = pci_resource_start(pdev, GEN12_LMEM_BAR) + dsm_base;
- io_size = dsm_size;
}
min_page_size = HAS_64K_PAGES(i915) ? I915_GTT_PAGE_SIZE_64K :
@@ -847,16 +937,6 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
if (IS_ERR(mem))
return mem;
- /*
- * TODO: consider creating common helper to just print all the
- * interesting stuff from intel_memory_region, which we can use for all
- * our probed regions.
- */
-
- drm_dbg(&i915->drm, "Stolen Local memory IO start: %pa\n",
- &mem->io_start);
- drm_dbg(&i915->drm, "Stolen Local DSM base: %pa\n", &dsm_base);
-
intel_memory_region_set_name(mem, "stolen-local");
mem->private = true;
@@ -881,6 +961,7 @@ i915_gem_stolen_smem_setup(struct drm_i915_private *i915, u16 type,
intel_memory_region_set_name(mem, "stolen-system");
mem->private = true;
+
return mem;
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index 7a1e92c11946..25129af70f70 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -189,7 +189,7 @@ static int i915_ttm_tt_shmem_populate(struct ttm_device *bdev,
struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev);
struct intel_memory_region *mr = i915->mm.regions[INTEL_MEMORY_SYSTEM];
struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
- const unsigned int max_segment = i915_sg_segment_size();
+ const unsigned int max_segment = i915_sg_segment_size(i915->drm.dev);
const size_t size = (size_t)ttm->num_pages << PAGE_SHIFT;
struct file *filp = i915_tt->filp;
struct sgt_iter sgt_iter;
@@ -279,7 +279,7 @@ static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo,
struct i915_ttm_tt *i915_tt;
int ret;
- if (!obj)
+ if (i915_ttm_is_ghost_object(bo))
return NULL;
i915_tt = kzalloc(sizeof(*i915_tt), GFP_KERNEL);
@@ -362,7 +362,7 @@ static bool i915_ttm_eviction_valuable(struct ttm_buffer_object *bo,
{
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
- if (!obj)
+ if (i915_ttm_is_ghost_object(bo))
return false;
/*
@@ -509,18 +509,9 @@ static int i915_ttm_shrink(struct drm_i915_gem_object *obj, unsigned int flags)
static void i915_ttm_delete_mem_notify(struct ttm_buffer_object *bo)
{
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
- intel_wakeref_t wakeref = 0;
-
- if (bo->resource && likely(obj)) {
- /* ttm_bo_release() already has dma_resv_lock */
- if (i915_ttm_cpu_maps_iomem(bo->resource))
- wakeref = intel_runtime_pm_get(&to_i915(obj->base.dev)->runtime_pm);
+ if (bo->resource && !i915_ttm_is_ghost_object(bo)) {
__i915_gem_object_pages_fini(obj);
-
- if (wakeref)
- intel_runtime_pm_put(&to_i915(obj->base.dev)->runtime_pm, wakeref);
-
i915_ttm_free_cached_io_rsgt(obj);
}
}
@@ -538,7 +529,7 @@ static struct i915_refct_sgt *i915_ttm_tt_get_st(struct ttm_tt *ttm)
ret = sg_alloc_table_from_pages_segment(st,
ttm->pages, ttm->num_pages,
0, (unsigned long)ttm->num_pages << PAGE_SHIFT,
- i915_sg_segment_size(), GFP_KERNEL);
+ i915_sg_segment_size(i915_tt->dev), GFP_KERNEL);
if (ret) {
st->sgl = NULL;
return ERR_PTR(ret);
@@ -624,7 +615,7 @@ static void i915_ttm_swap_notify(struct ttm_buffer_object *bo)
struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
int ret;
- if (!obj)
+ if (i915_ttm_is_ghost_object(bo))
return;
ret = i915_ttm_move_notify(bo);
@@ -657,7 +648,7 @@ static int i915_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resource
struct drm_i915_gem_object *obj = i915_ttm_to_gem(mem->bo);
bool unknown_state;
- if (!obj)
+ if (i915_ttm_is_ghost_object(mem->bo))
return -EINVAL;
if (!kref_get_unless_zero(&obj->base.refcount))
@@ -690,7 +681,7 @@ static unsigned long i915_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
unsigned long base;
unsigned int ofs;
- GEM_BUG_ON(!obj);
+ GEM_BUG_ON(i915_ttm_is_ghost_object(bo));
GEM_WARN_ON(bo->ttm);
base = obj->mm.region->iomap.base - obj->mm.region->region.start;
@@ -699,6 +690,50 @@ static unsigned long i915_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
return ((base + sg_dma_address(sg)) >> PAGE_SHIFT) + ofs;
}
+static int i915_ttm_access_memory(struct ttm_buffer_object *bo,
+ unsigned long offset, void *buf,
+ int len, int write)
+{
+ struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+ resource_size_t iomap = obj->mm.region->iomap.base -
+ obj->mm.region->region.start;
+ unsigned long page = offset >> PAGE_SHIFT;
+ unsigned long bytes_left = len;
+
+ /*
+ * TODO: For now just let it fail if the resource is non-mappable,
+ * otherwise we need to perform the memcpy from the gpu here, without
+ * interfering with the object (like moving the entire thing).
+ */
+ if (!i915_ttm_resource_mappable(bo->resource))
+ return -EIO;
+
+ offset -= page << PAGE_SHIFT;
+ do {
+ unsigned long bytes = min(bytes_left, PAGE_SIZE - offset);
+ void __iomem *ptr;
+ dma_addr_t daddr;
+
+ daddr = i915_gem_object_get_dma_address(obj, page);
+ ptr = ioremap_wc(iomap + daddr + offset, bytes);
+ if (!ptr)
+ return -EIO;
+
+ if (write)
+ memcpy_toio(ptr, buf, bytes);
+ else
+ memcpy_fromio(buf, ptr, bytes);
+ iounmap(ptr);
+
+ page++;
+ buf += bytes;
+ bytes_left -= bytes;
+ offset = 0;
+ } while (bytes_left);
+
+ return len;
+}
+
/*
* All callbacks need to take care not to downcast a struct ttm_buffer_object
* without checking its subclass, since it might be a TTM ghost object.
@@ -715,6 +750,7 @@ static struct ttm_device_funcs i915_ttm_bo_driver = {
.delete_mem_notify = i915_ttm_delete_mem_notify,
.io_mem_reserve = i915_ttm_io_mem_reserve,
.io_mem_pfn = i915_ttm_io_mem_pfn,
+ .access_memory = i915_ttm_access_memory,
};
/**
@@ -990,13 +1026,12 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf)
struct vm_area_struct *area = vmf->vma;
struct ttm_buffer_object *bo = area->vm_private_data;
struct drm_device *dev = bo->base.dev;
- struct drm_i915_gem_object *obj;
+ struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
intel_wakeref_t wakeref = 0;
vm_fault_t ret;
int idx;
- obj = i915_ttm_to_gem(bo);
- if (!obj)
+ if (i915_ttm_is_ghost_object(bo))
return VM_FAULT_SIGBUS;
/* Sanity check that we allow writing into this object */
@@ -1035,7 +1070,8 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf)
}
if (err) {
- drm_dbg(dev, "Unable to make resource CPU accessible\n");
+ drm_dbg(dev, "Unable to make resource CPU accessible(err = %pe)\n",
+ ERR_PTR(err));
dma_resv_unlock(bo->base.resv);
ret = VM_FAULT_SIGBUS;
goto out_rpm;
@@ -1053,16 +1089,19 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf)
if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
goto out_rpm;
- /* ttm_bo_vm_reserve() already has dma_resv_lock */
+ /*
+ * ttm_bo_vm_reserve() already has dma_resv_lock.
+ * userfault_count is protected by dma_resv lock and rpm wakeref.
+ */
if (ret == VM_FAULT_NOPAGE && wakeref && !obj->userfault_count) {
obj->userfault_count = 1;
- mutex_lock(&to_gt(to_i915(obj->base.dev))->lmem_userfault_lock);
- list_add(&obj->userfault_link, &to_gt(to_i915(obj->base.dev))->lmem_userfault_list);
- mutex_unlock(&to_gt(to_i915(obj->base.dev))->lmem_userfault_lock);
+ spin_lock(&to_i915(obj->base.dev)->runtime_pm.lmem_userfault_lock);
+ list_add(&obj->userfault_link, &to_i915(obj->base.dev)->runtime_pm.lmem_userfault_list);
+ spin_unlock(&to_i915(obj->base.dev)->runtime_pm.lmem_userfault_lock);
}
if (wakeref & CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)
- intel_wakeref_auto(&to_gt(to_i915(obj->base.dev))->userfault_wakeref,
+ intel_wakeref_auto(&to_i915(obj->base.dev)->runtime_pm.userfault_wakeref,
msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND));
i915_ttm_adjust_lru(obj);
@@ -1094,7 +1133,7 @@ static void ttm_vm_open(struct vm_area_struct *vma)
struct drm_i915_gem_object *obj =
i915_ttm_to_gem(vma->vm_private_data);
- GEM_BUG_ON(!obj);
+ GEM_BUG_ON(i915_ttm_is_ghost_object(vma->vm_private_data));
i915_gem_object_get(obj);
}
@@ -1103,7 +1142,7 @@ static void ttm_vm_close(struct vm_area_struct *vma)
struct drm_i915_gem_object *obj =
i915_ttm_to_gem(vma->vm_private_data);
- GEM_BUG_ON(!obj);
+ GEM_BUG_ON(i915_ttm_is_ghost_object(vma->vm_private_data));
i915_gem_object_put(obj);
}
@@ -1124,7 +1163,27 @@ static u64 i915_ttm_mmap_offset(struct drm_i915_gem_object *obj)
static void i915_ttm_unmap_virtual(struct drm_i915_gem_object *obj)
{
+ struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
+ intel_wakeref_t wakeref = 0;
+
+ assert_object_held_shared(obj);
+
+ if (i915_ttm_cpu_maps_iomem(bo->resource)) {
+ wakeref = intel_runtime_pm_get(&to_i915(obj->base.dev)->runtime_pm);
+
+ /* userfault_count is protected by obj lock and rpm wakeref. */
+ if (obj->userfault_count) {
+ spin_lock(&to_i915(obj->base.dev)->runtime_pm.lmem_userfault_lock);
+ list_del(&obj->userfault_link);
+ spin_unlock(&to_i915(obj->base.dev)->runtime_pm.lmem_userfault_lock);
+ obj->userfault_count = 0;
+ }
+ }
+
ttm_bo_unmap_virtual(i915_gem_to_ttm(obj));
+
+ if (wakeref)
+ intel_runtime_pm_put(&to_i915(obj->base.dev)->runtime_pm, wakeref);
}
static const struct drm_i915_gem_object_ops i915_gem_ttm_obj_ops = {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h
index e4842b4296fc..2a94a99ef76b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h
@@ -28,18 +28,26 @@ i915_gem_to_ttm(struct drm_i915_gem_object *obj)
void i915_ttm_bo_destroy(struct ttm_buffer_object *bo);
/**
+ * i915_ttm_is_ghost_object - Check if the ttm bo is a ghost object.
+ * @bo: Pointer to the ttm buffer object
+ *
+ * Return: True if the ttm bo is not a i915 object but a ghost ttm object,
+ * False otherwise.
+ */
+static inline bool i915_ttm_is_ghost_object(struct ttm_buffer_object *bo)
+{
+ return bo->destroy != i915_ttm_bo_destroy;
+}
+
+/**
* i915_ttm_to_gem - Convert a struct ttm_buffer_object to an embedding
* struct drm_i915_gem_object.
*
- * Return: Pointer to the embedding struct ttm_buffer_object, or NULL
- * if the object was not an i915 ttm object.
+ * Return: Pointer to the embedding struct ttm_buffer_object.
*/
static inline struct drm_i915_gem_object *
i915_ttm_to_gem(struct ttm_buffer_object *bo)
{
- if (bo->destroy != i915_ttm_bo_destroy)
- return NULL;
-
return container_of(bo, struct drm_i915_gem_object, __do_not_access);
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
index 9a7e50534b84..f59f812dc6d2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -560,7 +560,7 @@ int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
bool clear;
int ret;
- if (GEM_WARN_ON(!obj)) {
+ if (GEM_WARN_ON(i915_ttm_is_ghost_object(bo))) {
ttm_bo_move_null(bo, dst_mem);
return 0;
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
index d4398948f016..1b1a22716722 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
@@ -129,7 +129,7 @@ static void i915_gem_object_userptr_drop_ref(struct drm_i915_gem_object *obj)
static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
{
const unsigned long num_pages = obj->base.size >> PAGE_SHIFT;
- unsigned int max_segment = i915_sg_segment_size();
+ unsigned int max_segment = i915_sg_segment_size(obj->base.dev->dev);
struct sg_table *st;
unsigned int sg_page_sizes;
struct page **pvec;
@@ -292,7 +292,7 @@ int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj)
if (!i915_gem_object_is_readonly(obj))
gup_flags |= FOLL_WRITE;
- pinned = ret = 0;
+ pinned = 0;
while (pinned < num_pages) {
ret = pin_user_pages_fast(obj->userptr.ptr + pinned * PAGE_SIZE,
num_pages - pinned, gup_flags,
@@ -302,7 +302,6 @@ int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj)
pinned += ret;
}
- ret = 0;
ret = i915_gem_object_lock_interruptible(obj, NULL);
if (ret)
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
index c570cf780079..0cb99e75b0bc 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -1161,7 +1161,8 @@ static int igt_write_huge(struct drm_i915_private *i915,
GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
size = obj->base.size;
- if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K)
+ if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K &&
+ !HAS_64K_PAGES(i915))
size = round_up(size, I915_GTT_PAGE_SIZE_2M);
n = 0;
@@ -1214,6 +1215,10 @@ static int igt_write_huge(struct drm_i915_private *i915,
* size and ensure the vma offset is at the start of the pt
* boundary, however to improve coverage we opt for testing both
* aligned and unaligned offsets.
+ *
+ * With PS64 this is no longer the case, but to ensure we
+ * sometimes get the compact layout for smaller objects, apply
+ * the round_up anyway.
*/
if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K)
offset_low = round_down(offset_low,
@@ -1411,6 +1416,7 @@ static int igt_ppgtt_sanity_check(void *arg)
{ SZ_2M + SZ_4K, SZ_64K | SZ_4K },
{ SZ_2M + SZ_4K, SZ_2M | SZ_4K },
{ SZ_2M + SZ_64K, SZ_2M | SZ_64K },
+ { SZ_2M + SZ_64K, SZ_64K },
};
int i, j;
int err;
@@ -1540,6 +1546,154 @@ out_put:
return err;
}
+static int igt_ppgtt_mixed(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ const unsigned long flags = PIN_OFFSET_FIXED | PIN_USER;
+ struct drm_i915_gem_object *obj, *on;
+ struct i915_gem_engines *engines;
+ struct i915_gem_engines_iter it;
+ struct i915_address_space *vm;
+ struct i915_gem_context *ctx;
+ struct intel_context *ce;
+ struct file *file;
+ I915_RND_STATE(prng);
+ LIST_HEAD(objects);
+ struct intel_memory_region *mr;
+ struct i915_vma *vma;
+ unsigned int count;
+ u32 i, addr;
+ int *order;
+ int n, err;
+
+ /*
+ * Sanity check mixing 4K and 64K pages within the same page-table via
+ * the new PS64 TLB hint.
+ */
+
+ if (!HAS_64K_PAGES(i915)) {
+ pr_info("device lacks PS64, skipping\n");
+ return 0;
+ }
+
+ file = mock_file(i915);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+
+ ctx = hugepage_ctx(i915, file);
+ if (IS_ERR(ctx)) {
+ err = PTR_ERR(ctx);
+ goto out;
+ }
+ vm = i915_gem_context_get_eb_vm(ctx);
+
+ i = 0;
+ addr = 0;
+ do {
+ u32 sz;
+
+ sz = i915_prandom_u32_max_state(SZ_4M, &prng);
+ sz = max_t(u32, sz, SZ_4K);
+
+ mr = i915->mm.regions[INTEL_REGION_LMEM_0];
+ if (i & 1)
+ mr = i915->mm.regions[INTEL_REGION_SMEM];
+
+ obj = i915_gem_object_create_region(mr, sz, 0, 0);
+ if (IS_ERR(obj)) {
+ err = PTR_ERR(obj);
+ goto out_vm;
+ }
+
+ list_add_tail(&obj->st_link, &objects);
+
+ vma = i915_vma_instance(obj, vm, NULL);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto err_put;
+ }
+
+ addr = round_up(addr, mr->min_page_size);
+ err = i915_vma_pin(vma, 0, 0, addr | flags);
+ if (err)
+ goto err_put;
+
+ if (mr->type == INTEL_MEMORY_LOCAL &&
+ (vma->resource->page_sizes_gtt & I915_GTT_PAGE_SIZE_4K)) {
+ err = -EINVAL;
+ goto err_put;
+ }
+
+ addr += obj->base.size;
+ i++;
+ } while (addr <= SZ_16M);
+
+ n = 0;
+ count = 0;
+ for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
+ count++;
+ if (!intel_engine_can_store_dword(ce->engine))
+ continue;
+
+ n++;
+ }
+ i915_gem_context_unlock_engines(ctx);
+ if (!n)
+ goto err_put;
+
+ order = i915_random_order(count * count, &prng);
+ if (!order) {
+ err = -ENOMEM;
+ goto err_put;
+ }
+
+ i = 0;
+ addr = 0;
+ engines = i915_gem_context_lock_engines(ctx);
+ list_for_each_entry(obj, &objects, st_link) {
+ u32 rnd = i915_prandom_u32_max_state(UINT_MAX, &prng);
+
+ addr = round_up(addr, obj->mm.region->min_page_size);
+
+ ce = engines->engines[order[i] % engines->num_engines];
+ i = (i + 1) % (count * count);
+ if (!ce || !intel_engine_can_store_dword(ce->engine))
+ continue;
+
+ err = __igt_write_huge(ce, obj, obj->base.size, addr, 0, rnd);
+ if (err)
+ break;
+
+ err = __igt_write_huge(ce, obj, obj->base.size, addr,
+ offset_in_page(rnd) / sizeof(u32), rnd + 1);
+ if (err)
+ break;
+
+ err = __igt_write_huge(ce, obj, obj->base.size, addr,
+ (PAGE_SIZE / sizeof(u32)) - 1,
+ rnd + 2);
+ if (err)
+ break;
+
+ addr += obj->base.size;
+
+ cond_resched();
+ }
+
+ i915_gem_context_unlock_engines(ctx);
+ kfree(order);
+err_put:
+ list_for_each_entry_safe(obj, on, &objects, st_link) {
+ list_del(&obj->st_link);
+ i915_gem_object_put(obj);
+ }
+out_vm:
+ i915_vm_put(vm);
+out:
+ fput(file);
+ return err;
+}
+
static int igt_tmpfs_fallback(void *arg)
{
struct drm_i915_private *i915 = arg;
@@ -1803,6 +1957,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915)
SUBTEST(igt_ppgtt_smoke_huge),
SUBTEST(igt_ppgtt_sanity_check),
SUBTEST(igt_ppgtt_compact),
+ SUBTEST(igt_ppgtt_mixed),
};
if (!HAS_PPGTT(i915)) {
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
index c6ad67b90e8a..d8864444432b 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -179,97 +179,108 @@ out_file:
}
struct parallel_switch {
- struct task_struct *tsk;
+ struct kthread_worker *worker;
+ struct kthread_work work;
struct intel_context *ce[2];
+ int result;
};
-static int __live_parallel_switch1(void *data)
+static void __live_parallel_switch1(struct kthread_work *work)
{
- struct parallel_switch *arg = data;
+ struct parallel_switch *arg =
+ container_of(work, typeof(*arg), work);
IGT_TIMEOUT(end_time);
unsigned long count;
count = 0;
+ arg->result = 0;
do {
struct i915_request *rq = NULL;
- int err, n;
+ int n;
- err = 0;
- for (n = 0; !err && n < ARRAY_SIZE(arg->ce); n++) {
+ for (n = 0; !arg->result && n < ARRAY_SIZE(arg->ce); n++) {
struct i915_request *prev = rq;
rq = i915_request_create(arg->ce[n]);
if (IS_ERR(rq)) {
i915_request_put(prev);
- return PTR_ERR(rq);
+ arg->result = PTR_ERR(rq);
+ break;
}
i915_request_get(rq);
if (prev) {
- err = i915_request_await_dma_fence(rq, &prev->fence);
+ arg->result =
+ i915_request_await_dma_fence(rq,
+ &prev->fence);
i915_request_put(prev);
}
i915_request_add(rq);
}
+
+ if (IS_ERR_OR_NULL(rq))
+ break;
+
if (i915_request_wait(rq, 0, HZ) < 0)
- err = -ETIME;
+ arg->result = -ETIME;
+
i915_request_put(rq);
- if (err)
- return err;
count++;
- } while (!__igt_timeout(end_time, NULL));
+ } while (!arg->result && !__igt_timeout(end_time, NULL));
- pr_info("%s: %lu switches (sync)\n", arg->ce[0]->engine->name, count);
- return 0;
+ pr_info("%s: %lu switches (sync) <%d>\n",
+ arg->ce[0]->engine->name, count, arg->result);
}
-static int __live_parallel_switchN(void *data)
+static void __live_parallel_switchN(struct kthread_work *work)
{
- struct parallel_switch *arg = data;
+ struct parallel_switch *arg =
+ container_of(work, typeof(*arg), work);
struct i915_request *rq = NULL;
IGT_TIMEOUT(end_time);
unsigned long count;
int n;
count = 0;
+ arg->result = 0;
do {
- for (n = 0; n < ARRAY_SIZE(arg->ce); n++) {
+ for (n = 0; !arg->result && n < ARRAY_SIZE(arg->ce); n++) {
struct i915_request *prev = rq;
- int err = 0;
rq = i915_request_create(arg->ce[n]);
if (IS_ERR(rq)) {
i915_request_put(prev);
- return PTR_ERR(rq);
+ arg->result = PTR_ERR(rq);
+ break;
}
i915_request_get(rq);
if (prev) {
- err = i915_request_await_dma_fence(rq, &prev->fence);
+ arg->result =
+ i915_request_await_dma_fence(rq,
+ &prev->fence);
i915_request_put(prev);
}
i915_request_add(rq);
- if (err) {
- i915_request_put(rq);
- return err;
- }
}
count++;
- } while (!__igt_timeout(end_time, NULL));
- i915_request_put(rq);
+ } while (!arg->result && !__igt_timeout(end_time, NULL));
- pr_info("%s: %lu switches (many)\n", arg->ce[0]->engine->name, count);
- return 0;
+ if (!IS_ERR_OR_NULL(rq))
+ i915_request_put(rq);
+
+ pr_info("%s: %lu switches (many) <%d>\n",
+ arg->ce[0]->engine->name, count, arg->result);
}
static int live_parallel_switch(void *arg)
{
struct drm_i915_private *i915 = arg;
- static int (* const func[])(void *arg) = {
+ static void (* const func[])(struct kthread_work *) = {
__live_parallel_switch1,
__live_parallel_switchN,
NULL,
@@ -277,7 +288,7 @@ static int live_parallel_switch(void *arg)
struct parallel_switch *data = NULL;
struct i915_gem_engines *engines;
struct i915_gem_engines_iter it;
- int (* const *fn)(void *arg);
+ void (* const *fn)(struct kthread_work *);
struct i915_gem_context *ctx;
struct intel_context *ce;
struct file *file;
@@ -348,9 +359,22 @@ static int live_parallel_switch(void *arg)
}
}
+ for (n = 0; n < count; n++) {
+ struct kthread_worker *worker;
+
+ if (!data[n].ce[0])
+ continue;
+
+ worker = kthread_create_worker(0, "igt/parallel:%s",
+ data[n].ce[0]->engine->name);
+ if (IS_ERR(worker))
+ goto out;
+
+ data[n].worker = worker;
+ }
+
for (fn = func; !err && *fn; fn++) {
struct igt_live_test t;
- int n;
err = igt_live_test_begin(&t, i915, __func__, "");
if (err)
@@ -360,30 +384,17 @@ static int live_parallel_switch(void *arg)
if (!data[n].ce[0])
continue;
- data[n].tsk = kthread_run(*fn, &data[n],
- "igt/parallel:%s",
- data[n].ce[0]->engine->name);
- if (IS_ERR(data[n].tsk)) {
- err = PTR_ERR(data[n].tsk);
- break;
- }
- get_task_struct(data[n].tsk);
+ data[n].result = 0;
+ kthread_init_work(&data[n].work, *fn);
+ kthread_queue_work(data[n].worker, &data[n].work);
}
- yield(); /* start all threads before we kthread_stop() */
-
for (n = 0; n < count; n++) {
- int status;
-
- if (IS_ERR_OR_NULL(data[n].tsk))
- continue;
-
- status = kthread_stop(data[n].tsk);
- if (status && !err)
- err = status;
-
- put_task_struct(data[n].tsk);
- data[n].tsk = NULL;
+ if (data[n].ce[0]) {
+ kthread_flush_work(&data[n].work);
+ if (data[n].result && !err)
+ err = data[n].result;
+ }
}
if (igt_live_test_end(&t))
@@ -399,6 +410,9 @@ out:
intel_context_unpin(data[n].ce[m]);
intel_context_put(data[n].ce[m]);
}
+
+ if (data[n].worker)
+ kthread_destroy_worker(data[n].worker);
}
kfree(data);
out_file:
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
index f2f3cfad807b..e57f9390076c 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
@@ -6,8 +6,12 @@
#include "i915_drv.h"
#include "i915_selftest.h"
+#include "gem/i915_gem_context.h"
+#include "mock_context.h"
#include "mock_dmabuf.h"
+#include "igt_gem_utils.h"
+#include "selftests/mock_drm.h"
#include "selftests/mock_gem_device.h"
static int igt_dmabuf_export(void *arg)
@@ -140,6 +144,75 @@ out_ret:
return err;
}
+static int verify_access(struct drm_i915_private *i915,
+ struct drm_i915_gem_object *native_obj,
+ struct drm_i915_gem_object *import_obj)
+{
+ struct i915_gem_engines_iter it;
+ struct i915_gem_context *ctx;
+ struct intel_context *ce;
+ struct i915_vma *vma;
+ struct file *file;
+ u32 *vaddr;
+ int err = 0, i;
+
+ file = mock_file(i915);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+
+ ctx = live_context(i915, file);
+ if (IS_ERR(ctx)) {
+ err = PTR_ERR(ctx);
+ goto out_file;
+ }
+
+ for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
+ if (intel_engine_can_store_dword(ce->engine))
+ break;
+ }
+ i915_gem_context_unlock_engines(ctx);
+ if (!ce)
+ goto out_file;
+
+ vma = i915_vma_instance(import_obj, ce->vm, NULL);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto out_file;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_USER);
+ if (err)
+ goto out_file;
+
+ err = igt_gpu_fill_dw(ce, vma, 0,
+ vma->size >> PAGE_SHIFT, 0xdeadbeaf);
+ i915_vma_unpin(vma);
+ if (err)
+ goto out_file;
+
+ err = i915_gem_object_wait(import_obj, 0, MAX_SCHEDULE_TIMEOUT);
+ if (err)
+ goto out_file;
+
+ vaddr = i915_gem_object_pin_map_unlocked(native_obj, I915_MAP_WB);
+ if (IS_ERR(vaddr)) {
+ err = PTR_ERR(vaddr);
+ goto out_file;
+ }
+
+ for (i = 0; i < native_obj->base.size / sizeof(u32); i += PAGE_SIZE / sizeof(u32)) {
+ if (vaddr[i] != 0xdeadbeaf) {
+ pr_err("Data mismatch [%d]=%u\n", i, vaddr[i]);
+ err = -EINVAL;
+ goto out_file;
+ }
+ }
+
+out_file:
+ fput(file);
+ return err;
+}
+
static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915,
struct intel_memory_region **regions,
unsigned int num_regions)
@@ -154,7 +227,7 @@ static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915,
force_different_devices = true;
- obj = __i915_gem_object_create_user(i915, PAGE_SIZE,
+ obj = __i915_gem_object_create_user(i915, SZ_8M,
regions, num_regions);
if (IS_ERR(obj)) {
pr_err("__i915_gem_object_create_user failed with err=%ld\n",
@@ -206,6 +279,10 @@ static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915,
i915_gem_object_unlock(import_obj);
+ err = verify_access(i915, obj, import_obj);
+ if (err)
+ goto out_import;
+
/* Now try a fake an importer */
import_attach = dma_buf_attach(dmabuf, obj->base.dev->dev);
if (IS_ERR(import_attach)) {
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index b73c91aa5450..1cae24349a96 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -8,6 +8,7 @@
#include <linux/prime_numbers.h>
#include "gem/i915_gem_internal.h"
+#include "gem/i915_gem_lmem.h"
#include "gem/i915_gem_region.h"
#include "gem/i915_gem_ttm.h"
#include "gem/i915_gem_ttm_move.h"
diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
index e49fa6fa6aee..e1c76e5bfa82 100644
--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
@@ -396,15 +396,17 @@ int gen8_emit_init_breadcrumb(struct i915_request *rq)
return 0;
}
-static int __gen125_emit_bb_start(struct i915_request *rq,
- u64 offset, u32 len,
- const unsigned int flags,
- u32 arb)
+static int __xehp_emit_bb_start(struct i915_request *rq,
+ u64 offset, u32 len,
+ const unsigned int flags,
+ u32 arb)
{
struct intel_context *ce = rq->context;
u32 wa_offset = lrc_indirect_bb(ce);
u32 *cs;
+ GEM_BUG_ON(!ce->wa_bb_page);
+
cs = intel_ring_begin(rq, 12);
if (IS_ERR(cs))
return PTR_ERR(cs);
@@ -435,18 +437,18 @@ static int __gen125_emit_bb_start(struct i915_request *rq,
return 0;
}
-int gen125_emit_bb_start_noarb(struct i915_request *rq,
- u64 offset, u32 len,
- const unsigned int flags)
+int xehp_emit_bb_start_noarb(struct i915_request *rq,
+ u64 offset, u32 len,
+ const unsigned int flags)
{
- return __gen125_emit_bb_start(rq, offset, len, flags, MI_ARB_DISABLE);
+ return __xehp_emit_bb_start(rq, offset, len, flags, MI_ARB_DISABLE);
}
-int gen125_emit_bb_start(struct i915_request *rq,
- u64 offset, u32 len,
- const unsigned int flags)
+int xehp_emit_bb_start(struct i915_request *rq,
+ u64 offset, u32 len,
+ const unsigned int flags)
{
- return __gen125_emit_bb_start(rq, offset, len, flags, MI_ARB_ENABLE);
+ return __xehp_emit_bb_start(rq, offset, len, flags, MI_ARB_ENABLE);
}
int gen8_emit_bb_start_noarb(struct i915_request *rq,
@@ -583,6 +585,8 @@ u32 *gen8_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs)
u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
{
cs = gen8_emit_pipe_control(cs,
+ PIPE_CONTROL_CS_STALL |
+ PIPE_CONTROL_TLB_INVALIDATE |
PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
PIPE_CONTROL_DC_FLUSH_ENABLE,
@@ -600,15 +604,21 @@ u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
{
+ cs = gen8_emit_pipe_control(cs,
+ PIPE_CONTROL_CS_STALL |
+ PIPE_CONTROL_TLB_INVALIDATE |
+ PIPE_CONTROL_TILE_CACHE_FLUSH |
+ PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
+ PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ PIPE_CONTROL_DC_FLUSH_ENABLE,
+ 0);
+
+ /*XXX: Look at gen8_emit_fini_breadcrumb_rcs */
cs = gen8_emit_ggtt_write_rcs(cs,
rq->fence.seqno,
hwsp_offset(rq),
- PIPE_CONTROL_CS_STALL |
- PIPE_CONTROL_TILE_CACHE_FLUSH |
- PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
- PIPE_CONTROL_DEPTH_CACHE_FLUSH |
- PIPE_CONTROL_DC_FLUSH_ENABLE |
- PIPE_CONTROL_FLUSH_ENABLE);
+ PIPE_CONTROL_FLUSH_ENABLE |
+ PIPE_CONTROL_CS_STALL);
return gen8_emit_fini_breadcrumb_tail(rq, cs);
}
@@ -715,6 +725,7 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
{
struct drm_i915_private *i915 = rq->engine->i915;
u32 flags = (PIPE_CONTROL_CS_STALL |
+ PIPE_CONTROL_TLB_INVALIDATE |
PIPE_CONTROL_TILE_CACHE_FLUSH |
PIPE_CONTROL_FLUSH_L3 |
PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
@@ -731,11 +742,15 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
else if (rq->engine->class == COMPUTE_CLASS)
flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
+ cs = gen12_emit_pipe_control(cs, PIPE_CONTROL0_HDC_PIPELINE_FLUSH, flags, 0);
+
+ /*XXX: Look at gen8_emit_fini_breadcrumb_rcs */
cs = gen12_emit_ggtt_write_rcs(cs,
rq->fence.seqno,
hwsp_offset(rq),
- PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
- flags);
+ 0,
+ PIPE_CONTROL_FLUSH_ENABLE |
+ PIPE_CONTROL_CS_STALL);
return gen12_emit_fini_breadcrumb_tail(rq, cs);
}
diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h
index e4d24c811dd6..655e5c00ddc2 100644
--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h
@@ -32,12 +32,12 @@ int gen8_emit_bb_start(struct i915_request *rq,
u64 offset, u32 len,
const unsigned int flags);
-int gen125_emit_bb_start_noarb(struct i915_request *rq,
- u64 offset, u32 len,
- const unsigned int flags);
-int gen125_emit_bb_start(struct i915_request *rq,
- u64 offset, u32 len,
- const unsigned int flags);
+int xehp_emit_bb_start_noarb(struct i915_request *rq,
+ u64 offset, u32 len,
+ const unsigned int flags);
+int xehp_emit_bb_start(struct i915_request *rq,
+ u64 offset, u32 len,
+ const unsigned int flags);
u32 *gen8_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs);
u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs);
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 2128b7a72a25..4daaa6f55668 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -476,6 +476,7 @@ xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm,
const gen8_pte_t pte_encode = vm->pte_encode(0, cache_level, flags);
unsigned int rem = sg_dma_len(iter->sg);
u64 start = vma_res->start;
+ u64 end = start + vma_res->vma_size;
GEM_BUG_ON(!i915_vm_is_4lvl(vm));
@@ -489,9 +490,10 @@ xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm,
gen8_pte_t encode = pte_encode;
unsigned int page_size;
gen8_pte_t *vaddr;
- u16 index, max;
+ u16 index, max, nent, i;
max = I915_PDES;
+ nent = 1;
if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_2M &&
IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) &&
@@ -503,25 +505,37 @@ xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm,
vaddr = px_vaddr(pd);
} else {
- if (encode & GEN12_PPGTT_PTE_LM) {
- GEM_BUG_ON(__gen8_pte_index(start, 0) % 16);
- GEM_BUG_ON(rem < I915_GTT_PAGE_SIZE_64K);
- GEM_BUG_ON(!IS_ALIGNED(iter->dma,
- I915_GTT_PAGE_SIZE_64K));
-
- index = __gen8_pte_index(start, 0) / 16;
- page_size = I915_GTT_PAGE_SIZE_64K;
-
- max /= 16;
-
- vaddr = px_vaddr(pd);
- vaddr[__gen8_pte_index(start, 1)] |= GEN12_PDE_64K;
+ index = __gen8_pte_index(start, 0);
+ page_size = I915_GTT_PAGE_SIZE;
- pt->is_compact = true;
- } else {
- GEM_BUG_ON(pt->is_compact);
- index = __gen8_pte_index(start, 0);
- page_size = I915_GTT_PAGE_SIZE;
+ if (vma_res->bi.page_sizes.sg & I915_GTT_PAGE_SIZE_64K) {
+ /*
+ * Device local-memory on these platforms should
+ * always use 64K pages or larger (including GTT
+ * alignment), therefore if we know the whole
+ * page-table needs to be filled we can always
+ * safely use the compact-layout. Otherwise fall
+ * back to the TLB hint with PS64. If this is
+ * system memory we only bother with PS64.
+ */
+ if ((encode & GEN12_PPGTT_PTE_LM) &&
+ end - start >= SZ_2M && !index) {
+ index = __gen8_pte_index(start, 0) / 16;
+ page_size = I915_GTT_PAGE_SIZE_64K;
+
+ max /= 16;
+
+ vaddr = px_vaddr(pd);
+ vaddr[__gen8_pte_index(start, 1)] |= GEN12_PDE_64K;
+
+ pt->is_compact = true;
+ } else if (IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) &&
+ rem >= I915_GTT_PAGE_SIZE_64K &&
+ !(index % 16)) {
+ encode |= GEN12_PTE_PS64;
+ page_size = I915_GTT_PAGE_SIZE_64K;
+ nent = 16;
+ }
}
vaddr = px_vaddr(pt);
@@ -529,7 +543,12 @@ xehpsdv_ppgtt_insert_huge(struct i915_address_space *vm,
do {
GEM_BUG_ON(rem < page_size);
- vaddr[index++] = encode | iter->dma;
+
+ for (i = 0; i < nent; i++) {
+ vaddr[index++] =
+ encode | (iter->dma + i *
+ I915_GTT_PAGE_SIZE);
+ }
start += page_size;
iter->dma += page_size;
@@ -745,6 +764,8 @@ static void __xehpsdv_ppgtt_insert_entry_lm(struct i915_address_space *vm,
GEM_BUG_ON(!IS_ALIGNED(addr, SZ_64K));
GEM_BUG_ON(!IS_ALIGNED(offset, SZ_64K));
+ /* XXX: we don't strictly need to use this layout */
+
if (!pt->is_compact) {
vaddr = px_vaddr(pd);
vaddr[gen8_pd_index(idx, 1)] |= GEN12_PDE_64K;
@@ -929,29 +950,18 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
*/
ppgtt->vm.has_read_only = !IS_GRAPHICS_VER(gt->i915, 11, 12);
- if (HAS_LMEM(gt->i915)) {
+ if (HAS_LMEM(gt->i915))
ppgtt->vm.alloc_pt_dma = alloc_pt_lmem;
-
- /*
- * On some platforms the hw has dropped support for 4K GTT pages
- * when dealing with LMEM, and due to the design of 64K GTT
- * pages in the hw, we can only mark the *entire* page-table as
- * operating in 64K GTT mode, since the enable bit is still on
- * the pde, and not the pte. And since we still need to allow
- * 4K GTT pages for SMEM objects, we can't have a "normal" 4K
- * page-table with scratch pointing to LMEM, since that's
- * undefined from the hw pov. The simplest solution is to just
- * move the 64K scratch page to SMEM on such platforms and call
- * it a day, since that should work for all configurations.
- */
- if (HAS_64K_PAGES(gt->i915))
- ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
- else
- ppgtt->vm.alloc_scratch_dma = alloc_pt_lmem;
- } else {
+ else
ppgtt->vm.alloc_pt_dma = alloc_pt_dma;
- ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
- }
+
+ /*
+ * Using SMEM here instead of LMEM has the advantage of not reserving
+ * high performance memory for a "never" used filler page. It also
+ * removes the device access that would be required to initialise the
+ * scratch page, reducing pressure on an even scarcer resource.
+ */
+ ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
ppgtt->vm.pte_encode = gen8_pte_encode;
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index be09fb2e883a..fb62b7b8cbcd 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -276,6 +276,14 @@ static inline bool intel_context_is_barrier(const struct intel_context *ce)
return test_bit(CONTEXT_BARRIER_BIT, &ce->flags);
}
+static inline void intel_context_close(struct intel_context *ce)
+{
+ set_bit(CONTEXT_CLOSED_BIT, &ce->flags);
+
+ if (ce->ops->close)
+ ce->ops->close(ce);
+}
+
static inline bool intel_context_is_closed(const struct intel_context *ce)
{
return test_bit(CONTEXT_CLOSED_BIT, &ce->flags);
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 04eacae1aca5..e36670f2e626 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -43,6 +43,8 @@ struct intel_context_ops {
void (*revoke)(struct intel_context *ce, struct i915_request *rq,
unsigned int preempt_timeout_ms);
+ void (*close)(struct intel_context *ce);
+
int (*pre_pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void **vaddr);
int (*pin)(struct intel_context *ce, void *vaddr);
void (*unpin)(struct intel_context *ce);
@@ -197,8 +199,6 @@ struct intel_context {
* context's submissions is complete.
*/
struct i915_sw_fence blocked;
- /** @number_committed_requests: number of committed requests */
- int number_committed_requests;
/** @requests: list of active requests on this context */
struct list_head requests;
/** @prio: the context's current guc priority */
@@ -208,6 +208,11 @@ struct intel_context {
* each priority bucket
*/
u32 prio_count[GUC_CLIENT_PRIORITY_NUM];
+ /**
+ * @sched_disable_delay_work: worker to disable scheduling on this
+ * context
+ */
+ struct delayed_work sched_disable_delay_work;
} guc_state;
struct {
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index 04e435bce79b..cbc8b857d5f7 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -348,4 +348,10 @@ intel_engine_get_hung_context(struct intel_engine_cs *engine)
return engine->hung_ce;
}
+u64 intel_clamp_heartbeat_interval_ms(struct intel_engine_cs *engine, u64 value);
+u64 intel_clamp_max_busywait_duration_ns(struct intel_engine_cs *engine, u64 value);
+u64 intel_clamp_preempt_timeout_ms(struct intel_engine_cs *engine, u64 value);
+u64 intel_clamp_stop_timeout_ms(struct intel_engine_cs *engine, u64 value);
+u64 intel_clamp_timeslice_duration_ms(struct intel_engine_cs *engine, u64 value);
+
#endif /* _INTEL_RINGBUFFER_H_ */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 1f7188129cd1..3b7d750ad054 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -486,6 +486,17 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
engine->logical_mask = BIT(logical_instance);
__sprint_engine_name(engine);
+ if ((engine->class == COMPUTE_CLASS && !RCS_MASK(engine->gt) &&
+ __ffs(CCS_MASK(engine->gt)) == engine->instance) ||
+ engine->class == RENDER_CLASS)
+ engine->flags |= I915_ENGINE_FIRST_RENDER_COMPUTE;
+
+ /* features common between engines sharing EUs */
+ if (engine->class == RENDER_CLASS || engine->class == COMPUTE_CLASS) {
+ engine->flags |= I915_ENGINE_HAS_RCS_REG_STATE;
+ engine->flags |= I915_ENGINE_HAS_EU_PRIORITY;
+ }
+
engine->props.heartbeat_interval_ms =
CONFIG_DRM_I915_HEARTBEAT_INTERVAL;
engine->props.max_busywait_duration_ns =
@@ -497,20 +508,34 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
engine->props.timeslice_duration_ms =
CONFIG_DRM_I915_TIMESLICE_DURATION;
- /* Override to uninterruptible for OpenCL workloads. */
- if (GRAPHICS_VER(i915) == 12 && engine->class == RENDER_CLASS)
- engine->props.preempt_timeout_ms = 0;
-
- if ((engine->class == COMPUTE_CLASS && !RCS_MASK(engine->gt) &&
- __ffs(CCS_MASK(engine->gt)) == engine->instance) ||
- engine->class == RENDER_CLASS)
- engine->flags |= I915_ENGINE_FIRST_RENDER_COMPUTE;
-
- /* features common between engines sharing EUs */
- if (engine->class == RENDER_CLASS || engine->class == COMPUTE_CLASS) {
- engine->flags |= I915_ENGINE_HAS_RCS_REG_STATE;
- engine->flags |= I915_ENGINE_HAS_EU_PRIORITY;
- }
+ /*
+ * Mid-thread pre-emption is not available in Gen12. Unfortunately,
+ * some compute workloads run quite long threads. That means they get
+ * reset due to not pre-empting in a timely manner. So, bump the
+ * pre-emption timeout value to be much higher for compute engines.
+ */
+ if (GRAPHICS_VER(i915) == 12 && (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE))
+ engine->props.preempt_timeout_ms = CONFIG_DRM_I915_PREEMPT_TIMEOUT_COMPUTE;
+
+ /* Cap properties according to any system limits */
+#define CLAMP_PROP(field) \
+ do { \
+ u64 clamp = intel_clamp_##field(engine, engine->props.field); \
+ if (clamp != engine->props.field) { \
+ drm_notice(&engine->i915->drm, \
+ "Warning, clamping %s to %lld to prevent overflow\n", \
+ #field, clamp); \
+ engine->props.field = clamp; \
+ } \
+ } while (0)
+
+ CLAMP_PROP(heartbeat_interval_ms);
+ CLAMP_PROP(max_busywait_duration_ns);
+ CLAMP_PROP(preempt_timeout_ms);
+ CLAMP_PROP(stop_timeout_ms);
+ CLAMP_PROP(timeslice_duration_ms);
+
+#undef CLAMP_PROP
engine->defaults = engine->props; /* never to change again */
@@ -534,6 +559,55 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
return 0;
}
+u64 intel_clamp_heartbeat_interval_ms(struct intel_engine_cs *engine, u64 value)
+{
+ value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
+
+ return value;
+}
+
+u64 intel_clamp_max_busywait_duration_ns(struct intel_engine_cs *engine, u64 value)
+{
+ value = min(value, jiffies_to_nsecs(2));
+
+ return value;
+}
+
+u64 intel_clamp_preempt_timeout_ms(struct intel_engine_cs *engine, u64 value)
+{
+ /*
+ * NB: The GuC API only supports 32bit values. However, the limit is further
+ * reduced due to internal calculations which would otherwise overflow.
+ */
+ if (intel_guc_submission_is_wanted(&engine->gt->uc.guc))
+ value = min_t(u64, value, guc_policy_max_preempt_timeout_ms());
+
+ value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
+
+ return value;
+}
+
+u64 intel_clamp_stop_timeout_ms(struct intel_engine_cs *engine, u64 value)
+{
+ value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
+
+ return value;
+}
+
+u64 intel_clamp_timeslice_duration_ms(struct intel_engine_cs *engine, u64 value)
+{
+ /*
+ * NB: The GuC API only supports 32bit values. However, the limit is further
+ * reduced due to internal calculations which would otherwise overflow.
+ */
+ if (intel_guc_submission_is_wanted(&engine->gt->uc.guc))
+ value = min_t(u64, value, guc_policy_max_exec_quantum_ms());
+
+ value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
+
+ return value;
+}
+
static void __setup_engine_capabilities(struct intel_engine_cs *engine)
{
struct drm_i915_private *i915 = engine->i915;
@@ -1274,8 +1348,13 @@ int intel_engines_init(struct intel_gt *gt)
return err;
err = setup(engine);
- if (err)
+ if (err) {
+ intel_engine_cleanup_common(engine);
return err;
+ }
+
+ /* The backend should now be responsible for cleanup */
+ GEM_BUG_ON(engine->release == NULL);
err = engine_init_common(engine);
if (err)
@@ -1554,11 +1633,11 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine,
for_each_ss_steering(iter, engine->gt, slice, subslice) {
instdone->sampler[slice][subslice] =
intel_gt_mcr_read(engine->gt,
- GEN7_SAMPLER_INSTDONE,
+ GEN8_SAMPLER_INSTDONE,
slice, subslice);
instdone->row[slice][subslice] =
intel_gt_mcr_read(engine->gt,
- GEN7_ROW_INSTDONE,
+ GEN8_ROW_INSTDONE,
slice, subslice);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index a3698f611f45..9a527e1f5be6 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -22,9 +22,37 @@
static bool next_heartbeat(struct intel_engine_cs *engine)
{
+ struct i915_request *rq;
long delay;
delay = READ_ONCE(engine->props.heartbeat_interval_ms);
+
+ rq = engine->heartbeat.systole;
+
+ /*
+ * FIXME: The final period extension is disabled if the period has been
+ * modified from the default. This is to prevent issues with certain
+ * selftests which override the value and expect specific behaviour.
+ * Once the selftests have been updated to either cope with variable
+ * heartbeat periods (or to override the pre-emption timeout as well,
+ * or just to add a selftest specific override of the extension), the
+ * generic override can be removed.
+ */
+ if (rq && rq->sched.attr.priority >= I915_PRIORITY_BARRIER &&
+ delay == engine->defaults.heartbeat_interval_ms) {
+ long longer;
+
+ /*
+ * The final try is at the highest priority possible. Up until now
+ * a pre-emption might not even have been attempted. So make sure
+ * this last attempt allows enough time for a pre-emption to occur.
+ */
+ longer = READ_ONCE(engine->props.preempt_timeout_ms) * 2;
+ longer = intel_clamp_heartbeat_interval_ms(engine, longer);
+ if (longer > delay)
+ delay = longer;
+ }
+
if (!delay)
return false;
@@ -288,6 +316,17 @@ int intel_engine_set_heartbeat(struct intel_engine_cs *engine,
if (!delay && !intel_engine_has_preempt_reset(engine))
return -ENODEV;
+ /* FIXME: Remove together with equally marked hack in next_heartbeat. */
+ if (delay != engine->defaults.heartbeat_interval_ms &&
+ delay < 2 * engine->props.preempt_timeout_ms) {
+ if (intel_engine_uses_guc(engine))
+ drm_notice(&engine->i915->drm, "%s heartbeat interval adjusted to a non-default value which may downgrade individual engine resets to full GPU resets!\n",
+ engine->name);
+ else
+ drm_notice(&engine->i915->drm, "%s heartbeat interval adjusted to a non-default value which may cause engine resets to target innocent contexts!\n",
+ engine->name);
+ }
+
intel_engine_pm_get(engine);
err = mutex_lock_interruptible(&ce->timeline->mutex);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h b/drivers/gpu/drm/i915/gt/intel_engine_regs.h
index fe1a0d5fd4b1..ee3efd06ee54 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h
@@ -201,6 +201,7 @@
#define RING_CONTEXT_STATUS_PTR(base) _MMIO((base) + 0x3a0)
#define RING_CTX_TIMESTAMP(base) _MMIO((base) + 0x3a8) /* gen8+ */
#define RING_PREDICATE_RESULT(base) _MMIO((base) + 0x3b8)
+#define MI_PREDICATE_RESULT_2_ENGINE(base) _MMIO((base) + 0x3bc)
#define RING_FORCE_TO_NONPRIV(base, i) _MMIO(((base) + 0x4D0) + (i) * 4)
#define RING_FORCE_TO_NONPRIV_DENY REG_BIT(30)
#define RING_FORCE_TO_NONPRIV_ADDRESS_MASK REG_GENMASK(25, 2)
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index c718e6dc40b5..0187bc72310d 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -3471,9 +3471,9 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) {
if (intel_engine_has_preemption(engine))
- engine->emit_bb_start = gen125_emit_bb_start;
+ engine->emit_bb_start = xehp_emit_bb_start;
else
- engine->emit_bb_start = gen125_emit_bb_start_noarb;
+ engine->emit_bb_start = xehp_emit_bb_start_noarb;
} else {
if (intel_engine_has_preemption(engine))
engine->emit_bb_start = gen8_emit_bb_start;
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 2049a00417af..2518cebbf931 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -871,8 +871,8 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
u32 pte_flags;
int ret;
- GEM_WARN_ON(pci_resource_len(pdev, GTTMMADR_BAR) != gen6_gttmmadr_size(i915));
- phys_addr = pci_resource_start(pdev, GTTMMADR_BAR) + gen6_gttadr_offset(i915);
+ GEM_WARN_ON(pci_resource_len(pdev, GEN4_GTTMMADR_BAR) != gen6_gttmmadr_size(i915));
+ phys_addr = pci_resource_start(pdev, GEN4_GTTMMADR_BAR) + gen6_gttadr_offset(i915);
/*
* On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range
@@ -931,11 +931,11 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
unsigned int size;
u16 snb_gmch_ctl;
- if (!HAS_LMEM(i915)) {
- if (!i915_pci_resource_valid(pdev, GTT_APERTURE_BAR))
+ if (!HAS_LMEM(i915) && !HAS_LMEMBAR_SMEM_STOLEN(i915)) {
+ if (!i915_pci_resource_valid(pdev, GEN4_GMADR_BAR))
return -ENXIO;
- ggtt->gmadr = pci_resource(pdev, GTT_APERTURE_BAR);
+ ggtt->gmadr = pci_resource(pdev, GEN4_GMADR_BAR);
ggtt->mappable_end = resource_size(&ggtt->gmadr);
}
@@ -986,7 +986,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
- setup_private_pat(ggtt->vm.gt->uncore);
+ setup_private_pat(ggtt->vm.gt);
return ggtt_probe_common(ggtt, size);
}
@@ -1089,10 +1089,10 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt)
unsigned int size;
u16 snb_gmch_ctl;
- if (!i915_pci_resource_valid(pdev, GTT_APERTURE_BAR))
+ if (!i915_pci_resource_valid(pdev, GEN4_GMADR_BAR))
return -ENXIO;
- ggtt->gmadr = pci_resource(pdev, GTT_APERTURE_BAR);
+ ggtt->gmadr = pci_resource(pdev, GEN4_GMADR_BAR);
ggtt->mappable_end = resource_size(&ggtt->gmadr);
/*
@@ -1308,7 +1308,7 @@ void i915_ggtt_resume(struct i915_ggtt *ggtt)
wbinvd_on_all_cpus();
if (GRAPHICS_VER(ggtt->vm.i915) >= 8)
- setup_private_pat(ggtt->vm.gt->uncore);
+ setup_private_pat(ggtt->vm.gt);
intel_ggtt_restore_fences(ggtt);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index d4e9702d3c8e..f50ea92910d9 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -187,6 +187,10 @@
#define MI_BATCH_RESOURCE_STREAMER REG_BIT(10)
#define MI_BATCH_PREDICATE REG_BIT(15) /* HSW+ on RCS only*/
+#define MI_OPCODE(x) (((x) >> 23) & 0x3f)
+#define IS_MI_LRI_CMD(x) (MI_OPCODE(x) == MI_OPCODE(MI_INSTR(0x22, 0)))
+#define MI_LRI_LEN(x) (((x) & 0xff) + 1)
+
/*
* 3D instructions used by the kernel
*/
diff --git a/drivers/gpu/drm/i915/gt/intel_gsc.c b/drivers/gpu/drm/i915/gt/intel_gsc.c
index 7af6db3194dd..976fdf27e790 100644
--- a/drivers/gpu/drm/i915/gt/intel_gsc.c
+++ b/drivers/gpu/drm/i915/gt/intel_gsc.c
@@ -7,6 +7,7 @@
#include <linux/mei_aux.h>
#include "i915_drv.h"
#include "i915_reg.h"
+#include "gem/i915_gem_lmem.h"
#include "gem/i915_gem_region.h"
#include "gt/intel_gsc.h"
#include "gt/intel_gt.h"
@@ -142,8 +143,14 @@ static void gsc_destroy_one(struct drm_i915_private *i915,
struct intel_gsc_intf *intf = &gsc->intf[intf_id];
if (intf->adev) {
- auxiliary_device_delete(&intf->adev->aux_dev);
- auxiliary_device_uninit(&intf->adev->aux_dev);
+ struct auxiliary_device *aux_dev = &intf->adev->aux_dev;
+
+ if (intf_id == 0)
+ intel_huc_unregister_gsc_notifier(&gsc_to_gt(gsc)->uc.huc,
+ aux_dev->dev.bus);
+
+ auxiliary_device_delete(aux_dev);
+ auxiliary_device_uninit(aux_dev);
intf->adev = NULL;
}
@@ -242,14 +249,24 @@ add_device:
goto fail;
}
+ intf->adev = adev; /* needed by the notifier */
+
+ if (intf_id == 0)
+ intel_huc_register_gsc_notifier(&gsc_to_gt(gsc)->uc.huc,
+ aux_dev->dev.bus);
+
ret = auxiliary_device_add(aux_dev);
if (ret < 0) {
drm_err(&i915->drm, "gsc aux add failed %d\n", ret);
+ if (intf_id == 0)
+ intel_huc_unregister_gsc_notifier(&gsc_to_gt(gsc)->uc.huc,
+ aux_dev->dev.bus);
+ intf->adev = NULL;
+
/* adev will be freed with the put_device() and .release sequence */
auxiliary_device_uninit(aux_dev);
goto fail;
}
- intf->adev = adev;
return;
fail:
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index d0b03a928b9a..8e914c4066ed 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -40,8 +40,6 @@ void intel_gt_common_init_early(struct intel_gt *gt)
{
spin_lock_init(gt->irq_lock);
- INIT_LIST_HEAD(&gt->lmem_userfault_list);
- mutex_init(&gt->lmem_userfault_lock);
INIT_LIST_HEAD(&gt->closed_vma);
spin_lock_init(&gt->closed_lock);
@@ -231,6 +229,16 @@ static void gen6_clear_engine_error_register(struct intel_engine_cs *engine)
GEN6_RING_FAULT_REG_POSTING_READ(engine);
}
+i915_reg_t intel_gt_perf_limit_reasons_reg(struct intel_gt *gt)
+{
+ /* GT0_PERF_LIMIT_REASONS is available only for Gen11+ */
+ if (GRAPHICS_VER(gt->i915) < 11)
+ return INVALID_MMIO_REG;
+
+ return gt->type == GT_MEDIA ?
+ MTL_MEDIA_PERF_LIMIT_REASONS : GT0_PERF_LIMIT_REASONS;
+}
+
void
intel_gt_clear_error_registers(struct intel_gt *gt,
intel_engine_mask_t engine_mask)
@@ -260,7 +268,11 @@ intel_gt_clear_error_registers(struct intel_gt *gt,
I915_MASTER_ERROR_INTERRUPT);
}
- if (GRAPHICS_VER(i915) >= 12) {
+ if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
+ intel_gt_mcr_multicast_rmw(gt, XEHP_RING_FAULT_REG,
+ RING_FAULT_VALID, 0);
+ intel_gt_mcr_read_any(gt, XEHP_RING_FAULT_REG);
+ } else if (GRAPHICS_VER(i915) >= 12) {
rmw_clear(uncore, GEN12_RING_FAULT_REG, RING_FAULT_VALID);
intel_uncore_posting_read(uncore, GEN12_RING_FAULT_REG);
} else if (GRAPHICS_VER(i915) >= 8) {
@@ -298,6 +310,42 @@ static void gen6_check_faults(struct intel_gt *gt)
}
}
+static void xehp_check_faults(struct intel_gt *gt)
+{
+ u32 fault;
+
+ /*
+ * Although the fault register now lives in an MCR register range,
+ * the GAM registers are special and we only truly need to read
+ * the "primary" GAM instance rather than handling each instance
+ * individually. intel_gt_mcr_read_any() will automatically steer
+ * toward the primary instance.
+ */
+ fault = intel_gt_mcr_read_any(gt, XEHP_RING_FAULT_REG);
+ if (fault & RING_FAULT_VALID) {
+ u32 fault_data0, fault_data1;
+ u64 fault_addr;
+
+ fault_data0 = intel_gt_mcr_read_any(gt, XEHP_FAULT_TLB_DATA0);
+ fault_data1 = intel_gt_mcr_read_any(gt, XEHP_FAULT_TLB_DATA1);
+
+ fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) |
+ ((u64)fault_data0 << 12);
+
+ drm_dbg(&gt->i915->drm, "Unexpected fault\n"
+ "\tAddr: 0x%08x_%08x\n"
+ "\tAddress space: %s\n"
+ "\tEngine ID: %d\n"
+ "\tSource ID: %d\n"
+ "\tType: %d\n",
+ upper_32_bits(fault_addr), lower_32_bits(fault_addr),
+ fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT",
+ GEN8_RING_FAULT_ENGINE_ID(fault),
+ RING_FAULT_SRCID(fault),
+ RING_FAULT_FAULT_TYPE(fault));
+ }
+}
+
static void gen8_check_faults(struct intel_gt *gt)
{
struct intel_uncore *uncore = gt->uncore;
@@ -344,7 +392,9 @@ void intel_gt_check_and_clear_faults(struct intel_gt *gt)
struct drm_i915_private *i915 = gt->i915;
/* From GEN8 onwards we only have one 'All Engine Fault Register' */
- if (GRAPHICS_VER(i915) >= 8)
+ if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
+ xehp_check_faults(gt);
+ else if (GRAPHICS_VER(i915) >= 8)
gen8_check_faults(gt);
else if (GRAPHICS_VER(i915) >= 6)
gen6_check_faults(gt);
@@ -807,7 +857,6 @@ static int intel_gt_tile_setup(struct intel_gt *gt, phys_addr_t phys_addr)
}
intel_uncore_init_early(gt->uncore, gt);
- intel_wakeref_auto_init(&gt->userfault_wakeref, gt->uncore->rpm);
ret = intel_uncore_setup_mmio(gt->uncore, phys_addr);
if (ret)
@@ -828,7 +877,7 @@ int intel_gt_probe_all(struct drm_i915_private *i915)
unsigned int i;
int ret;
- mmio_bar = GRAPHICS_VER(i915) == 2 ? GEN2_GTTMMADR_BAR : GTTMMADR_BAR;
+ mmio_bar = intel_mmio_bar(GRAPHICS_VER(i915));
phys_addr = pci_resource_start(pdev, mmio_bar);
/*
@@ -939,7 +988,10 @@ void intel_gt_info_print(const struct intel_gt_info *info,
}
struct reg_and_bit {
- i915_reg_t reg;
+ union {
+ i915_reg_t reg;
+ i915_mcr_reg_t mcr_reg;
+ };
u32 bit;
};
@@ -965,6 +1017,32 @@ get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8,
return rb;
}
+/*
+ * HW architecture suggest typical invalidation time at 40us,
+ * with pessimistic cases up to 100us and a recommendation to
+ * cap at 1ms. We go a bit higher just in case.
+ */
+#define TLB_INVAL_TIMEOUT_US 100
+#define TLB_INVAL_TIMEOUT_MS 4
+
+/*
+ * On Xe_HP the TLB invalidation registers are located at the same MMIO offsets
+ * but are now considered MCR registers. Since they exist within a GAM range,
+ * the primary instance of the register rolls up the status from each unit.
+ */
+static int wait_for_invalidate(struct intel_gt *gt, struct reg_and_bit rb)
+{
+ if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
+ return intel_gt_mcr_wait_for_reg_fw(gt, rb.mcr_reg, rb.bit, 0,
+ TLB_INVAL_TIMEOUT_US,
+ TLB_INVAL_TIMEOUT_MS);
+ else
+ return __intel_wait_for_register_fw(gt->uncore, rb.reg, rb.bit, 0,
+ TLB_INVAL_TIMEOUT_US,
+ TLB_INVAL_TIMEOUT_MS,
+ NULL);
+}
+
static void mmio_invalidate_full(struct intel_gt *gt)
{
static const i915_reg_t gen8_regs[] = {
@@ -980,6 +1058,13 @@ static void mmio_invalidate_full(struct intel_gt *gt)
[COPY_ENGINE_CLASS] = GEN12_BLT_TLB_INV_CR,
[COMPUTE_CLASS] = GEN12_COMPCTX_TLB_INV_CR,
};
+ static const i915_mcr_reg_t xehp_regs[] = {
+ [RENDER_CLASS] = XEHP_GFX_TLB_INV_CR,
+ [VIDEO_DECODE_CLASS] = XEHP_VD_TLB_INV_CR,
+ [VIDEO_ENHANCEMENT_CLASS] = XEHP_VE_TLB_INV_CR,
+ [COPY_ENGINE_CLASS] = XEHP_BLT_TLB_INV_CR,
+ [COMPUTE_CLASS] = XEHP_COMPCTX_TLB_INV_CR,
+ };
struct drm_i915_private *i915 = gt->i915;
struct intel_uncore *uncore = gt->uncore;
struct intel_engine_cs *engine;
@@ -988,7 +1073,10 @@ static void mmio_invalidate_full(struct intel_gt *gt)
const i915_reg_t *regs;
unsigned int num = 0;
- if (GRAPHICS_VER(i915) == 12) {
+ if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
+ regs = NULL;
+ num = ARRAY_SIZE(xehp_regs);
+ } else if (GRAPHICS_VER(i915) == 12) {
regs = gen12_regs;
num = ARRAY_SIZE(gen12_regs);
} else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
@@ -1013,11 +1101,17 @@ static void mmio_invalidate_full(struct intel_gt *gt)
if (!intel_engine_pm_is_awake(engine))
continue;
- rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
- if (!i915_mmio_reg_offset(rb.reg))
- continue;
+ if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
+ intel_gt_mcr_multicast_write_fw(gt,
+ xehp_regs[engine->class],
+ BIT(engine->instance));
+ } else {
+ rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
+ if (!i915_mmio_reg_offset(rb.reg))
+ continue;
- intel_uncore_write_fw(uncore, rb.reg, rb.bit);
+ intel_uncore_write_fw(uncore, rb.reg, rb.bit);
+ }
awake |= engine->mask;
}
@@ -1037,22 +1131,17 @@ static void mmio_invalidate_full(struct intel_gt *gt)
for_each_engine_masked(engine, gt, awake, tmp) {
struct reg_and_bit rb;
- /*
- * HW architecture suggest typical invalidation time at 40us,
- * with pessimistic cases up to 100us and a recommendation to
- * cap at 1ms. We go a bit higher just in case.
- */
- const unsigned int timeout_us = 100;
- const unsigned int timeout_ms = 4;
-
- rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
- if (__intel_wait_for_register_fw(uncore,
- rb.reg, rb.bit, 0,
- timeout_us, timeout_ms,
- NULL))
+ if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
+ rb.mcr_reg = xehp_regs[engine->class];
+ rb.bit = BIT(engine->instance);
+ } else {
+ rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
+ }
+
+ if (wait_for_invalidate(gt, rb))
drm_err_ratelimited(&gt->i915->drm,
"%s TLB invalidation did not complete in %ums!\n",
- engine->name, timeout_ms);
+ engine->name, TLB_INVAL_TIMEOUT_MS);
}
/*
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
index 2ee582e287c8..e0365d556248 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -60,6 +60,7 @@ void intel_gt_driver_late_release_all(struct drm_i915_private *i915);
int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout);
void intel_gt_check_and_clear_faults(struct intel_gt *gt);
+i915_reg_t intel_gt_perf_limit_reasons_reg(struct intel_gt *gt);
void intel_gt_clear_error_registers(struct intel_gt *gt,
intel_engine_mask_t engine_mask);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c
index 3f656d3dba9a..2a6a4ca7fdad 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c
@@ -107,7 +107,7 @@ static u32 gen9_read_clock_frequency(struct intel_uncore *uncore)
return freq;
}
-static u32 gen5_read_clock_frequency(struct intel_uncore *uncore)
+static u32 gen6_read_clock_frequency(struct intel_uncore *uncore)
{
/*
* PRMs say:
@@ -119,7 +119,27 @@ static u32 gen5_read_clock_frequency(struct intel_uncore *uncore)
return 12500000;
}
-static u32 gen2_read_clock_frequency(struct intel_uncore *uncore)
+static u32 gen5_read_clock_frequency(struct intel_uncore *uncore)
+{
+ /*
+ * 63:32 increments every 1000 ns
+ * 31:0 mbz
+ */
+ return 1000000000 / 1000;
+}
+
+static u32 g4x_read_clock_frequency(struct intel_uncore *uncore)
+{
+ /*
+ * 63:20 increments every 1/4 ns
+ * 19:0 mbz
+ *
+ * -> 63:32 increments every 1024 ns
+ */
+ return 1000000000 / 1024;
+}
+
+static u32 gen4_read_clock_frequency(struct intel_uncore *uncore)
{
/*
* PRMs say:
@@ -127,8 +147,10 @@ static u32 gen2_read_clock_frequency(struct intel_uncore *uncore)
* "The value in this register increments once every 16
* hclks." (through the “Clocking Configuration”
* (“CLKCFG”) MCHBAR register)
+ *
+ * Testing on actual hardware has shown there is no /16.
*/
- return RUNTIME_INFO(uncore->i915)->rawclk_freq * 1000 / 16;
+ return RUNTIME_INFO(uncore->i915)->rawclk_freq * 1000;
}
static u32 read_clock_frequency(struct intel_uncore *uncore)
@@ -137,10 +159,16 @@ static u32 read_clock_frequency(struct intel_uncore *uncore)
return gen11_read_clock_frequency(uncore);
else if (GRAPHICS_VER(uncore->i915) >= 9)
return gen9_read_clock_frequency(uncore);
- else if (GRAPHICS_VER(uncore->i915) >= 5)
+ else if (GRAPHICS_VER(uncore->i915) >= 6)
+ return gen6_read_clock_frequency(uncore);
+ else if (GRAPHICS_VER(uncore->i915) == 5)
return gen5_read_clock_frequency(uncore);
+ else if (IS_G4X(uncore->i915))
+ return g4x_read_clock_frequency(uncore);
+ else if (GRAPHICS_VER(uncore->i915) == 4)
+ return gen4_read_clock_frequency(uncore);
else
- return gen2_read_clock_frequency(uncore);
+ return 0;
}
void intel_gt_init_clock_frequency(struct intel_gt *gt)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
index e79405a45312..830edffe88cc 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
@@ -40,6 +40,9 @@ static const char * const intel_steering_types[] = {
"L3BANK",
"MSLICE",
"LNCF",
+ "GAM",
+ "DSS",
+ "OADDRM",
"INSTANCE 0",
};
@@ -48,14 +51,23 @@ static const struct intel_mmio_range icl_l3bank_steering_table[] = {
{},
};
+/*
+ * Although the bspec lists more "MSLICE" ranges than shown here, some of those
+ * are of a "GAM" subclass that has special rules. Thus we use a separate
+ * GAM table farther down for those.
+ */
static const struct intel_mmio_range xehpsdv_mslice_steering_table[] = {
- { 0x004000, 0x004AFF },
- { 0x00C800, 0x00CFFF },
{ 0x00DD00, 0x00DDFF },
{ 0x00E900, 0x00FFFF }, /* 0xEA00 - OxEFFF is unused */
{},
};
+static const struct intel_mmio_range xehpsdv_gam_steering_table[] = {
+ { 0x004000, 0x004AFF },
+ { 0x00C800, 0x00CFFF },
+ {},
+};
+
static const struct intel_mmio_range xehpsdv_lncf_steering_table[] = {
{ 0x00B000, 0x00B0FF },
{ 0x00D800, 0x00D8FF },
@@ -89,9 +101,47 @@ static const struct intel_mmio_range pvc_instance0_steering_table[] = {
{},
};
+static const struct intel_mmio_range xelpg_instance0_steering_table[] = {
+ { 0x000B00, 0x000BFF }, /* SQIDI */
+ { 0x001000, 0x001FFF }, /* SQIDI */
+ { 0x004000, 0x0048FF }, /* GAM */
+ { 0x008700, 0x0087FF }, /* SQIDI */
+ { 0x00B000, 0x00B0FF }, /* NODE */
+ { 0x00C800, 0x00CFFF }, /* GAM */
+ { 0x00D880, 0x00D8FF }, /* NODE */
+ { 0x00DD00, 0x00DDFF }, /* OAAL2 */
+ {},
+};
+
+static const struct intel_mmio_range xelpg_l3bank_steering_table[] = {
+ { 0x00B100, 0x00B3FF },
+ {},
+};
+
+/* DSS steering is used for SLICE ranges as well */
+static const struct intel_mmio_range xelpg_dss_steering_table[] = {
+ { 0x005200, 0x0052FF }, /* SLICE */
+ { 0x005500, 0x007FFF }, /* SLICE */
+ { 0x008140, 0x00815F }, /* SLICE (0x8140-0x814F), DSS (0x8150-0x815F) */
+ { 0x0094D0, 0x00955F }, /* SLICE (0x94D0-0x951F), DSS (0x9520-0x955F) */
+ { 0x009680, 0x0096FF }, /* DSS */
+ { 0x00D800, 0x00D87F }, /* SLICE */
+ { 0x00DC00, 0x00DCFF }, /* SLICE */
+ { 0x00DE80, 0x00E8FF }, /* DSS (0xE000-0xE0FF reserved) */
+ {},
+};
+
+static const struct intel_mmio_range xelpmp_oaddrm_steering_table[] = {
+ { 0x393200, 0x39323F },
+ { 0x393400, 0x3934FF },
+ {},
+};
+
void intel_gt_mcr_init(struct intel_gt *gt)
{
struct drm_i915_private *i915 = gt->i915;
+ unsigned long fuse;
+ int i;
/*
* An mslice is unavailable only if both the meml3 for the slice is
@@ -109,14 +159,36 @@ void intel_gt_mcr_init(struct intel_gt *gt)
drm_warn(&i915->drm, "mslice mask all zero!\n");
}
- if (IS_PONTEVECCHIO(i915)) {
+ if (MEDIA_VER(i915) >= 13 && gt->type == GT_MEDIA) {
+ gt->steering_table[OADDRM] = xelpmp_oaddrm_steering_table;
+ } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) {
+ fuse = REG_FIELD_GET(GT_L3_EXC_MASK,
+ intel_uncore_read(gt->uncore, XEHP_FUSE4));
+
+ /*
+ * Despite the register field being named "exclude mask" the
+ * bits actually represent enabled banks (two banks per bit).
+ */
+ for_each_set_bit(i, &fuse, 3)
+ gt->info.l3bank_mask |= 0x3 << 2 * i;
+
+ gt->steering_table[INSTANCE0] = xelpg_instance0_steering_table;
+ gt->steering_table[L3BANK] = xelpg_l3bank_steering_table;
+ gt->steering_table[DSS] = xelpg_dss_steering_table;
+ } else if (IS_PONTEVECCHIO(i915)) {
gt->steering_table[INSTANCE0] = pvc_instance0_steering_table;
} else if (IS_DG2(i915)) {
gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table;
gt->steering_table[LNCF] = dg2_lncf_steering_table;
+ /*
+ * No need to hook up the GAM table since it has a dedicated
+ * steering control register on DG2 and can use implicit
+ * steering.
+ */
} else if (IS_XEHPSDV(i915)) {
gt->steering_table[MSLICE] = xehpsdv_mslice_steering_table;
gt->steering_table[LNCF] = xehpsdv_lncf_steering_table;
+ gt->steering_table[GAM] = xehpsdv_gam_steering_table;
} else if (GRAPHICS_VER(i915) >= 11 &&
GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) {
gt->steering_table[L3BANK] = icl_l3bank_steering_table;
@@ -135,6 +207,19 @@ void intel_gt_mcr_init(struct intel_gt *gt)
}
/*
+ * Although the rest of the driver should use MCR-specific functions to
+ * read/write MCR registers, we still use the regular intel_uncore_* functions
+ * internally to implement those, so we need a way for the functions in this
+ * file to "cast" an i915_mcr_reg_t into an i915_reg_t.
+ */
+static i915_reg_t mcr_reg_cast(const i915_mcr_reg_t mcr)
+{
+ i915_reg_t r = { .reg = mcr.reg };
+
+ return r;
+}
+
+/*
* rw_with_mcr_steering_fw - Access a register with specific MCR steering
* @uncore: pointer to struct intel_uncore
* @reg: register being accessed
@@ -148,14 +233,26 @@ void intel_gt_mcr_init(struct intel_gt *gt)
* Caller needs to make sure the relevant forcewake wells are up.
*/
static u32 rw_with_mcr_steering_fw(struct intel_uncore *uncore,
- i915_reg_t reg, u8 rw_flag,
+ i915_mcr_reg_t reg, u8 rw_flag,
int group, int instance, u32 value)
{
u32 mcr_mask, mcr_ss, mcr, old_mcr, val = 0;
lockdep_assert_held(&uncore->lock);
- if (GRAPHICS_VER(uncore->i915) >= 11) {
+ if (GRAPHICS_VER_FULL(uncore->i915) >= IP_VER(12, 70)) {
+ /*
+ * Always leave the hardware in multicast mode when doing reads
+ * (see comment about Wa_22013088509 below) and only change it
+ * to unicast mode when doing writes of a specific instance.
+ *
+ * No need to save old steering reg value.
+ */
+ intel_uncore_write_fw(uncore, MTL_MCR_SELECTOR,
+ REG_FIELD_PREP(MTL_MCR_GROUPID, group) |
+ REG_FIELD_PREP(MTL_MCR_INSTANCEID, instance) |
+ (rw_flag == FW_REG_READ ? GEN11_MCR_MULTICAST : 0));
+ } else if (GRAPHICS_VER(uncore->i915) >= 11) {
mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK;
mcr_ss = GEN11_MCR_SLICE(group) | GEN11_MCR_SUBSLICE(instance);
@@ -173,39 +270,53 @@ static u32 rw_with_mcr_steering_fw(struct intel_uncore *uncore,
*/
if (rw_flag == FW_REG_WRITE)
mcr_mask |= GEN11_MCR_MULTICAST;
+
+ mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR);
+ old_mcr = mcr;
+
+ mcr &= ~mcr_mask;
+ mcr |= mcr_ss;
+ intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
} else {
mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK;
mcr_ss = GEN8_MCR_SLICE(group) | GEN8_MCR_SUBSLICE(instance);
- }
- old_mcr = mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR);
+ mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR);
+ old_mcr = mcr;
- mcr &= ~mcr_mask;
- mcr |= mcr_ss;
- intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
+ mcr &= ~mcr_mask;
+ mcr |= mcr_ss;
+ intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
+ }
if (rw_flag == FW_REG_READ)
- val = intel_uncore_read_fw(uncore, reg);
+ val = intel_uncore_read_fw(uncore, mcr_reg_cast(reg));
else
- intel_uncore_write_fw(uncore, reg, value);
-
- mcr &= ~mcr_mask;
- mcr |= old_mcr & mcr_mask;
+ intel_uncore_write_fw(uncore, mcr_reg_cast(reg), value);
- intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr);
+ /*
+ * For pre-MTL platforms, we need to restore the old value of the
+ * steering control register to ensure that implicit steering continues
+ * to behave as expected. For MTL and beyond, we need only reinstate
+ * the 'multicast' bit (and only if we did a write that cleared it).
+ */
+ if (GRAPHICS_VER_FULL(uncore->i915) >= IP_VER(12, 70) && rw_flag == FW_REG_WRITE)
+ intel_uncore_write_fw(uncore, MTL_MCR_SELECTOR, GEN11_MCR_MULTICAST);
+ else if (GRAPHICS_VER_FULL(uncore->i915) < IP_VER(12, 70))
+ intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, old_mcr);
return val;
}
static u32 rw_with_mcr_steering(struct intel_uncore *uncore,
- i915_reg_t reg, u8 rw_flag,
+ i915_mcr_reg_t reg, u8 rw_flag,
int group, int instance,
u32 value)
{
enum forcewake_domains fw_domains;
u32 val;
- fw_domains = intel_uncore_forcewake_for_reg(uncore, reg,
+ fw_domains = intel_uncore_forcewake_for_reg(uncore, mcr_reg_cast(reg),
rw_flag);
fw_domains |= intel_uncore_forcewake_for_reg(uncore,
GEN8_MCR_SELECTOR,
@@ -233,7 +344,7 @@ static u32 rw_with_mcr_steering(struct intel_uncore *uncore,
* group/instance.
*/
u32 intel_gt_mcr_read(struct intel_gt *gt,
- i915_reg_t reg,
+ i915_mcr_reg_t reg,
int group, int instance)
{
return rw_with_mcr_steering(gt->uncore, reg, FW_REG_READ, group, instance, 0);
@@ -250,7 +361,7 @@ u32 intel_gt_mcr_read(struct intel_gt *gt,
* Write an MCR register in unicast mode after steering toward a specific
* group/instance.
*/
-void intel_gt_mcr_unicast_write(struct intel_gt *gt, i915_reg_t reg, u32 value,
+void intel_gt_mcr_unicast_write(struct intel_gt *gt, i915_mcr_reg_t reg, u32 value,
int group, int instance)
{
rw_with_mcr_steering(gt->uncore, reg, FW_REG_WRITE, group, instance, value);
@@ -265,9 +376,16 @@ void intel_gt_mcr_unicast_write(struct intel_gt *gt, i915_reg_t reg, u32 value,
* Write an MCR register in multicast mode to update all instances.
*/
void intel_gt_mcr_multicast_write(struct intel_gt *gt,
- i915_reg_t reg, u32 value)
+ i915_mcr_reg_t reg, u32 value)
{
- intel_uncore_write(gt->uncore, reg, value);
+ /*
+ * Ensure we have multicast behavior, just in case some non-i915 agent
+ * left the hardware in unicast mode.
+ */
+ if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
+ intel_uncore_write_fw(gt->uncore, MTL_MCR_SELECTOR, GEN11_MCR_MULTICAST);
+
+ intel_uncore_write(gt->uncore, mcr_reg_cast(reg), value);
}
/**
@@ -281,9 +399,44 @@ void intel_gt_mcr_multicast_write(struct intel_gt *gt,
* domains; use intel_gt_mcr_multicast_write() in cases where forcewake should
* be obtained automatically.
*/
-void intel_gt_mcr_multicast_write_fw(struct intel_gt *gt, i915_reg_t reg, u32 value)
+void intel_gt_mcr_multicast_write_fw(struct intel_gt *gt, i915_mcr_reg_t reg, u32 value)
+{
+ /*
+ * Ensure we have multicast behavior, just in case some non-i915 agent
+ * left the hardware in unicast mode.
+ */
+ if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70))
+ intel_uncore_write_fw(gt->uncore, MTL_MCR_SELECTOR, GEN11_MCR_MULTICAST);
+
+ intel_uncore_write_fw(gt->uncore, mcr_reg_cast(reg), value);
+}
+
+/**
+ * intel_gt_mcr_multicast_rmw - Performs a multicast RMW operations
+ * @gt: GT structure
+ * @reg: the MCR register to read and write
+ * @clear: bits to clear during RMW
+ * @set: bits to set during RMW
+ *
+ * Performs a read-modify-write on an MCR register in a multicast manner.
+ * This operation only makes sense on MCR registers where all instances are
+ * expected to have the same value. The read will target any non-terminated
+ * instance and the write will be applied to all instances.
+ *
+ * This function assumes the caller is already holding any necessary forcewake
+ * domains; use intel_gt_mcr_multicast_rmw() in cases where forcewake should
+ * be obtained automatically.
+ *
+ * Returns the old (unmodified) value read.
+ */
+u32 intel_gt_mcr_multicast_rmw(struct intel_gt *gt, i915_mcr_reg_t reg,
+ u32 clear, u32 set)
{
- intel_uncore_write_fw(gt->uncore, reg, value);
+ u32 val = intel_gt_mcr_read_any(gt, reg);
+
+ intel_gt_mcr_multicast_write(gt, reg, (val & ~clear) | set);
+
+ return val;
}
/*
@@ -301,7 +454,7 @@ void intel_gt_mcr_multicast_write_fw(struct intel_gt *gt, i915_reg_t reg, u32 va
* for @type steering too.
*/
static bool reg_needs_read_steering(struct intel_gt *gt,
- i915_reg_t reg,
+ i915_mcr_reg_t reg,
enum intel_steering_type type)
{
const u32 offset = i915_mmio_reg_offset(reg);
@@ -332,6 +485,8 @@ static void get_nonterminated_steering(struct intel_gt *gt,
enum intel_steering_type type,
u8 *group, u8 *instance)
{
+ u32 dss;
+
switch (type) {
case L3BANK:
*group = 0; /* unused */
@@ -351,6 +506,15 @@ static void get_nonterminated_steering(struct intel_gt *gt,
*group = __ffs(gt->info.mslice_mask) << 1;
*instance = 0; /* unused */
break;
+ case GAM:
+ *group = IS_DG2(gt->i915) ? 1 : 0;
+ *instance = 0;
+ break;
+ case DSS:
+ dss = intel_sseu_find_first_xehp_dss(&gt->info.sseu, 0, 0);
+ *group = dss / GEN_DSS_PER_GSLICE;
+ *instance = dss % GEN_DSS_PER_GSLICE;
+ break;
case INSTANCE0:
/*
* There are a lot of MCR types for which instance (0, 0)
@@ -359,6 +523,13 @@ static void get_nonterminated_steering(struct intel_gt *gt,
*group = 0;
*instance = 0;
break;
+ case OADDRM:
+ if ((VDBOX_MASK(gt) | VEBOX_MASK(gt) | gt->info.sfc_mask) & BIT(0))
+ *group = 0;
+ else
+ *group = 1;
+ *instance = 0;
+ break;
default:
MISSING_CASE(type);
*group = 0;
@@ -380,7 +551,7 @@ static void get_nonterminated_steering(struct intel_gt *gt,
* steering.
*/
void intel_gt_mcr_get_nonterminated_steering(struct intel_gt *gt,
- i915_reg_t reg,
+ i915_mcr_reg_t reg,
u8 *group, u8 *instance)
{
int type;
@@ -409,7 +580,7 @@ void intel_gt_mcr_get_nonterminated_steering(struct intel_gt *gt,
*
* Returns the value from a non-terminated instance of @reg.
*/
-u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_reg_t reg)
+u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_mcr_reg_t reg)
{
int type;
u8 group, instance;
@@ -423,7 +594,7 @@ u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_reg_t reg)
}
}
- return intel_uncore_read_fw(gt->uncore, reg);
+ return intel_uncore_read_fw(gt->uncore, mcr_reg_cast(reg));
}
/**
@@ -436,7 +607,7 @@ u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_reg_t reg)
*
* Returns the value from a non-terminated instance of @reg.
*/
-u32 intel_gt_mcr_read_any(struct intel_gt *gt, i915_reg_t reg)
+u32 intel_gt_mcr_read_any(struct intel_gt *gt, i915_mcr_reg_t reg)
{
int type;
u8 group, instance;
@@ -450,7 +621,7 @@ u32 intel_gt_mcr_read_any(struct intel_gt *gt, i915_reg_t reg)
}
}
- return intel_uncore_read(gt->uncore, reg);
+ return intel_uncore_read(gt->uncore, mcr_reg_cast(reg));
}
static void report_steering_type(struct drm_printer *p,
@@ -483,11 +654,20 @@ static void report_steering_type(struct drm_printer *p,
void intel_gt_mcr_report_steering(struct drm_printer *p, struct intel_gt *gt,
bool dump_table)
{
- drm_printf(p, "Default steering: group=0x%x, instance=0x%x\n",
- gt->default_steering.groupid,
- gt->default_steering.instanceid);
-
- if (IS_PONTEVECCHIO(gt->i915)) {
+ /*
+ * Starting with MTL we no longer have default steering;
+ * all ranges are explicitly steered.
+ */
+ if (GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 70))
+ drm_printf(p, "Default steering: group=0x%x, instance=0x%x\n",
+ gt->default_steering.groupid,
+ gt->default_steering.instanceid);
+
+ if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) {
+ for (int i = 0; i < NUM_STEERING_TYPES; i++)
+ if (gt->steering_table[i])
+ report_steering_type(p, gt, i, dump_table);
+ } else if (IS_PONTEVECCHIO(gt->i915)) {
report_steering_type(p, gt, INSTANCE0, dump_table);
} else if (HAS_MSLICE_STEERING(gt->i915)) {
report_steering_type(p, gt, MSLICE, dump_table);
@@ -520,3 +700,58 @@ void intel_gt_mcr_get_ss_steering(struct intel_gt *gt, unsigned int dss,
return;
}
}
+
+/**
+ * intel_gt_mcr_wait_for_reg_fw - wait until MCR register matches expected state
+ * @gt: GT structure
+ * @reg: the register to read
+ * @mask: mask to apply to register value
+ * @value: value to wait for
+ * @fast_timeout_us: fast timeout in microsecond for atomic/tight wait
+ * @slow_timeout_ms: slow timeout in millisecond
+ *
+ * This routine waits until the target register @reg contains the expected
+ * @value after applying the @mask, i.e. it waits until ::
+ *
+ * (intel_gt_mcr_read_any_fw(gt, reg) & mask) == value
+ *
+ * Otherwise, the wait will timeout after @slow_timeout_ms milliseconds.
+ * For atomic context @slow_timeout_ms must be zero and @fast_timeout_us
+ * must be not larger than 20,0000 microseconds.
+ *
+ * This function is basically an MCR-friendly version of
+ * __intel_wait_for_register_fw(). Generally this function will only be used
+ * on GAM registers which are a bit special --- although they're MCR registers,
+ * reads (e.g., waiting for status updates) are always directed to the primary
+ * instance.
+ *
+ * Note that this routine assumes the caller holds forcewake asserted, it is
+ * not suitable for very long waits.
+ *
+ * Return: 0 if the register matches the desired condition, or -ETIMEDOUT.
+ */
+int intel_gt_mcr_wait_for_reg_fw(struct intel_gt *gt,
+ i915_mcr_reg_t reg,
+ u32 mask,
+ u32 value,
+ unsigned int fast_timeout_us,
+ unsigned int slow_timeout_ms)
+{
+ u32 reg_value = 0;
+#define done (((reg_value = intel_gt_mcr_read_any_fw(gt, reg)) & mask) == value)
+ int ret;
+
+ /* Catch any overuse of this function */
+ might_sleep_if(slow_timeout_ms);
+ GEM_BUG_ON(fast_timeout_us > 20000);
+ GEM_BUG_ON(!fast_timeout_us && !slow_timeout_ms);
+
+ ret = -ETIMEDOUT;
+ if (fast_timeout_us && fast_timeout_us <= 20000)
+ ret = _wait_for_atomic(done, fast_timeout_us, 0);
+ if (ret && slow_timeout_ms)
+ ret = wait_for(done, slow_timeout_ms);
+
+ return ret;
+#undef done
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.h b/drivers/gpu/drm/i915/gt/intel_gt_mcr.h
index 77a8b11c287d..3fb0502bff22 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.h
@@ -11,21 +11,24 @@
void intel_gt_mcr_init(struct intel_gt *gt);
u32 intel_gt_mcr_read(struct intel_gt *gt,
- i915_reg_t reg,
+ i915_mcr_reg_t reg,
int group, int instance);
-u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_reg_t reg);
-u32 intel_gt_mcr_read_any(struct intel_gt *gt, i915_reg_t reg);
+u32 intel_gt_mcr_read_any_fw(struct intel_gt *gt, i915_mcr_reg_t reg);
+u32 intel_gt_mcr_read_any(struct intel_gt *gt, i915_mcr_reg_t reg);
void intel_gt_mcr_unicast_write(struct intel_gt *gt,
- i915_reg_t reg, u32 value,
+ i915_mcr_reg_t reg, u32 value,
int group, int instance);
void intel_gt_mcr_multicast_write(struct intel_gt *gt,
- i915_reg_t reg, u32 value);
+ i915_mcr_reg_t reg, u32 value);
void intel_gt_mcr_multicast_write_fw(struct intel_gt *gt,
- i915_reg_t reg, u32 value);
+ i915_mcr_reg_t reg, u32 value);
+
+u32 intel_gt_mcr_multicast_rmw(struct intel_gt *gt, i915_mcr_reg_t reg,
+ u32 clear, u32 set);
void intel_gt_mcr_get_nonterminated_steering(struct intel_gt *gt,
- i915_reg_t reg,
+ i915_mcr_reg_t reg,
u8 *group, u8 *instance);
void intel_gt_mcr_report_steering(struct drm_printer *p, struct intel_gt *gt,
@@ -34,6 +37,13 @@ void intel_gt_mcr_report_steering(struct drm_printer *p, struct intel_gt *gt,
void intel_gt_mcr_get_ss_steering(struct intel_gt *gt, unsigned int dss,
unsigned int *group, unsigned int *instance);
+int intel_gt_mcr_wait_for_reg_fw(struct intel_gt *gt,
+ i915_mcr_reg_t reg,
+ u32 mask,
+ u32 value,
+ unsigned int fast_timeout_us,
+ unsigned int slow_timeout_ms);
+
/*
* Helper for for_each_ss_steering loop. On pre-Xe_HP platforms, subslice
* presence is determined by using the group/instance as direct lookups in the
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
index 108b9e76c32e..40d0a3be42ac 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
@@ -344,162 +344,7 @@ void intel_gt_pm_frequency_dump(struct intel_gt *gt, struct drm_printer *p)
drm_printf(p, "efficient (RPe) frequency: %d MHz\n",
intel_gpu_freq(rps, rps->efficient_freq));
} else if (GRAPHICS_VER(i915) >= 6) {
- u32 rp_state_limits;
- u32 gt_perf_status;
- struct intel_rps_freq_caps caps;
- u32 rpmodectl, rpinclimit, rpdeclimit;
- u32 rpstat, cagf, reqf;
- u32 rpcurupei, rpcurup, rpprevup;
- u32 rpcurdownei, rpcurdown, rpprevdown;
- u32 rpupei, rpupt, rpdownei, rpdownt;
- u32 pm_ier, pm_imr, pm_isr, pm_iir, pm_mask;
-
- rp_state_limits = intel_uncore_read(uncore, GEN6_RP_STATE_LIMITS);
- gen6_rps_get_freq_caps(rps, &caps);
- if (IS_GEN9_LP(i915))
- gt_perf_status = intel_uncore_read(uncore, BXT_GT_PERF_STATUS);
- else
- gt_perf_status = intel_uncore_read(uncore, GEN6_GT_PERF_STATUS);
-
- /* RPSTAT1 is in the GT power well */
- intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
-
- reqf = intel_uncore_read(uncore, GEN6_RPNSWREQ);
- if (GRAPHICS_VER(i915) >= 9) {
- reqf >>= 23;
- } else {
- reqf &= ~GEN6_TURBO_DISABLE;
- if (IS_HASWELL(i915) || IS_BROADWELL(i915))
- reqf >>= 24;
- else
- reqf >>= 25;
- }
- reqf = intel_gpu_freq(rps, reqf);
-
- rpmodectl = intel_uncore_read(uncore, GEN6_RP_CONTROL);
- rpinclimit = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD);
- rpdeclimit = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD);
-
- rpstat = intel_uncore_read(uncore, GEN6_RPSTAT1);
- rpcurupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & GEN6_CURICONT_MASK;
- rpcurup = intel_uncore_read(uncore, GEN6_RP_CUR_UP) & GEN6_CURBSYTAVG_MASK;
- rpprevup = intel_uncore_read(uncore, GEN6_RP_PREV_UP) & GEN6_CURBSYTAVG_MASK;
- rpcurdownei = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK;
- rpcurdown = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN) & GEN6_CURBSYTAVG_MASK;
- rpprevdown = intel_uncore_read(uncore, GEN6_RP_PREV_DOWN) & GEN6_CURBSYTAVG_MASK;
-
- rpupei = intel_uncore_read(uncore, GEN6_RP_UP_EI);
- rpupt = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD);
-
- rpdownei = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
- rpdownt = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD);
-
- cagf = intel_rps_read_actual_frequency(rps);
-
- intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
-
- if (GRAPHICS_VER(i915) >= 11) {
- pm_ier = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE);
- pm_imr = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK);
- /*
- * The equivalent to the PM ISR & IIR cannot be read
- * without affecting the current state of the system
- */
- pm_isr = 0;
- pm_iir = 0;
- } else if (GRAPHICS_VER(i915) >= 8) {
- pm_ier = intel_uncore_read(uncore, GEN8_GT_IER(2));
- pm_imr = intel_uncore_read(uncore, GEN8_GT_IMR(2));
- pm_isr = intel_uncore_read(uncore, GEN8_GT_ISR(2));
- pm_iir = intel_uncore_read(uncore, GEN8_GT_IIR(2));
- } else {
- pm_ier = intel_uncore_read(uncore, GEN6_PMIER);
- pm_imr = intel_uncore_read(uncore, GEN6_PMIMR);
- pm_isr = intel_uncore_read(uncore, GEN6_PMISR);
- pm_iir = intel_uncore_read(uncore, GEN6_PMIIR);
- }
- pm_mask = intel_uncore_read(uncore, GEN6_PMINTRMSK);
-
- drm_printf(p, "Video Turbo Mode: %s\n",
- str_yes_no(rpmodectl & GEN6_RP_MEDIA_TURBO));
- drm_printf(p, "HW control enabled: %s\n",
- str_yes_no(rpmodectl & GEN6_RP_ENABLE));
- drm_printf(p, "SW control enabled: %s\n",
- str_yes_no((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == GEN6_RP_MEDIA_SW_MODE));
-
- drm_printf(p, "PM IER=0x%08x IMR=0x%08x, MASK=0x%08x\n",
- pm_ier, pm_imr, pm_mask);
- if (GRAPHICS_VER(i915) <= 10)
- drm_printf(p, "PM ISR=0x%08x IIR=0x%08x\n",
- pm_isr, pm_iir);
- drm_printf(p, "pm_intrmsk_mbz: 0x%08x\n",
- rps->pm_intrmsk_mbz);
- drm_printf(p, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status);
- drm_printf(p, "Render p-state ratio: %d\n",
- (gt_perf_status & (GRAPHICS_VER(i915) >= 9 ? 0x1ff00 : 0xff00)) >> 8);
- drm_printf(p, "Render p-state VID: %d\n",
- gt_perf_status & 0xff);
- drm_printf(p, "Render p-state limit: %d\n",
- rp_state_limits & 0xff);
- drm_printf(p, "RPSTAT1: 0x%08x\n", rpstat);
- drm_printf(p, "RPMODECTL: 0x%08x\n", rpmodectl);
- drm_printf(p, "RPINCLIMIT: 0x%08x\n", rpinclimit);
- drm_printf(p, "RPDECLIMIT: 0x%08x\n", rpdeclimit);
- drm_printf(p, "RPNSWREQ: %dMHz\n", reqf);
- drm_printf(p, "CAGF: %dMHz\n", cagf);
- drm_printf(p, "RP CUR UP EI: %d (%lldns)\n",
- rpcurupei,
- intel_gt_pm_interval_to_ns(gt, rpcurupei));
- drm_printf(p, "RP CUR UP: %d (%lldns)\n",
- rpcurup, intel_gt_pm_interval_to_ns(gt, rpcurup));
- drm_printf(p, "RP PREV UP: %d (%lldns)\n",
- rpprevup, intel_gt_pm_interval_to_ns(gt, rpprevup));
- drm_printf(p, "Up threshold: %d%%\n",
- rps->power.up_threshold);
- drm_printf(p, "RP UP EI: %d (%lldns)\n",
- rpupei, intel_gt_pm_interval_to_ns(gt, rpupei));
- drm_printf(p, "RP UP THRESHOLD: %d (%lldns)\n",
- rpupt, intel_gt_pm_interval_to_ns(gt, rpupt));
-
- drm_printf(p, "RP CUR DOWN EI: %d (%lldns)\n",
- rpcurdownei,
- intel_gt_pm_interval_to_ns(gt, rpcurdownei));
- drm_printf(p, "RP CUR DOWN: %d (%lldns)\n",
- rpcurdown,
- intel_gt_pm_interval_to_ns(gt, rpcurdown));
- drm_printf(p, "RP PREV DOWN: %d (%lldns)\n",
- rpprevdown,
- intel_gt_pm_interval_to_ns(gt, rpprevdown));
- drm_printf(p, "Down threshold: %d%%\n",
- rps->power.down_threshold);
- drm_printf(p, "RP DOWN EI: %d (%lldns)\n",
- rpdownei, intel_gt_pm_interval_to_ns(gt, rpdownei));
- drm_printf(p, "RP DOWN THRESHOLD: %d (%lldns)\n",
- rpdownt, intel_gt_pm_interval_to_ns(gt, rpdownt));
-
- drm_printf(p, "Lowest (RPN) frequency: %dMHz\n",
- intel_gpu_freq(rps, caps.min_freq));
- drm_printf(p, "Nominal (RP1) frequency: %dMHz\n",
- intel_gpu_freq(rps, caps.rp1_freq));
- drm_printf(p, "Max non-overclocked (RP0) frequency: %dMHz\n",
- intel_gpu_freq(rps, caps.rp0_freq));
- drm_printf(p, "Max overclocked frequency: %dMHz\n",
- intel_gpu_freq(rps, rps->max_freq));
-
- drm_printf(p, "Current freq: %d MHz\n",
- intel_gpu_freq(rps, rps->cur_freq));
- drm_printf(p, "Actual freq: %d MHz\n", cagf);
- drm_printf(p, "Idle freq: %d MHz\n",
- intel_gpu_freq(rps, rps->idle_freq));
- drm_printf(p, "Min freq: %d MHz\n",
- intel_gpu_freq(rps, rps->min_freq));
- drm_printf(p, "Boost freq: %d MHz\n",
- intel_gpu_freq(rps, rps->boost_freq));
- drm_printf(p, "Max freq: %d MHz\n",
- intel_gpu_freq(rps, rps->max_freq));
- drm_printf(p,
- "efficient (RPe) frequency: %d MHz\n",
- intel_gpu_freq(rps, rps->efficient_freq));
+ gen6_rps_frequency_dump(rps, p);
} else {
drm_puts(p, "no P-state info available\n");
}
@@ -655,6 +500,44 @@ static bool rps_eval(void *data)
DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(rps_boost);
+static int perf_limit_reasons_get(void *data, u64 *val)
+{
+ struct intel_gt *gt = data;
+ intel_wakeref_t wakeref;
+
+ with_intel_runtime_pm(gt->uncore->rpm, wakeref)
+ *val = intel_uncore_read(gt->uncore, intel_gt_perf_limit_reasons_reg(gt));
+
+ return 0;
+}
+
+static int perf_limit_reasons_clear(void *data, u64 val)
+{
+ struct intel_gt *gt = data;
+ intel_wakeref_t wakeref;
+
+ /*
+ * Clear the upper 16 "log" bits, the lower 16 "status" bits are
+ * read-only. The upper 16 "log" bits are identical to the lower 16
+ * "status" bits except that the "log" bits remain set until cleared.
+ */
+ with_intel_runtime_pm(gt->uncore->rpm, wakeref)
+ intel_uncore_rmw(gt->uncore, intel_gt_perf_limit_reasons_reg(gt),
+ GT0_PERF_LIMIT_REASONS_LOG_MASK, 0);
+
+ return 0;
+}
+
+static bool perf_limit_reasons_eval(void *data)
+{
+ struct intel_gt *gt = data;
+
+ return i915_mmio_reg_valid(intel_gt_perf_limit_reasons_reg(gt));
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(perf_limit_reasons_fops, perf_limit_reasons_get,
+ perf_limit_reasons_clear, "%llu\n");
+
void intel_gt_pm_debugfs_register(struct intel_gt *gt, struct dentry *root)
{
static const struct intel_gt_debugfs_file files[] = {
@@ -664,6 +547,7 @@ void intel_gt_pm_debugfs_register(struct intel_gt *gt, struct dentry *root)
{ "forcewake_user", &forcewake_user_fops, NULL},
{ "llc", &llc_fops, llc_eval },
{ "rps_boost", &rps_boost_fops, rps_eval },
+ { "perf_limit_reasons", &perf_limit_reasons_fops, perf_limit_reasons_eval },
};
intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 5a3a25838fff..70177d3f2e94 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -8,6 +8,19 @@
#include "i915_reg_defs.h"
+#define MCR_REG(offset) ((const i915_mcr_reg_t){ .reg = (offset) })
+
+/*
+ * The perf control registers are technically multicast registers, but the
+ * driver never needs to read/write them directly; we only use them to build
+ * lists of registers (where they're mixed in with other non-MCR registers)
+ * and then operate on the offset directly. For now we'll just define them
+ * as non-multicast so we can place them on the same list, but we may want
+ * to try to come up with a better way to handle heterogeneous lists of
+ * registers in the future.
+ */
+#define PERF_REG(offset) _MMIO(offset)
+
/* RPM unit config (Gen8+) */
#define RPM_CONFIG0 _MMIO(0xd00)
#define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT 3
@@ -39,12 +52,17 @@
#define FORCEWAKE_ACK_RENDER_GEN9 _MMIO(0xd84)
#define FORCEWAKE_ACK_MEDIA_GEN9 _MMIO(0xd88)
+#define FORCEWAKE_ACK_GSC _MMIO(0xdf8)
+#define FORCEWAKE_ACK_GT_MTL _MMIO(0xdfc)
+
#define GMD_ID_GRAPHICS _MMIO(0xd8c)
#define GMD_ID_MEDIA _MMIO(MTL_MEDIA_GSI_BASE + 0xd8c)
#define MCFG_MCR_SELECTOR _MMIO(0xfd0)
+#define MTL_MCR_SELECTOR _MMIO(0xfd4)
#define SF_MCR_SELECTOR _MMIO(0xfd8)
#define GEN8_MCR_SELECTOR _MMIO(0xfdc)
+#define GAM_MCR_SELECTOR _MMIO(0xfe0)
#define GEN8_MCR_SLICE(slice) (((slice) & 3) << 26)
#define GEN8_MCR_SLICE_MASK GEN8_MCR_SLICE(3)
#define GEN8_MCR_SUBSLICE(subslice) (((subslice) & 3) << 24)
@@ -54,6 +72,8 @@
#define GEN11_MCR_SLICE_MASK GEN11_MCR_SLICE(0xf)
#define GEN11_MCR_SUBSLICE(subslice) (((subslice) & 0x7) << 24)
#define GEN11_MCR_SUBSLICE_MASK GEN11_MCR_SUBSLICE(0x7)
+#define MTL_MCR_GROUPID REG_GENMASK(11, 8)
+#define MTL_MCR_INSTANCEID REG_GENMASK(3, 0)
#define IPEIR_I965 _MMIO(0x2064)
#define IPEHR_I965 _MMIO(0x2068)
@@ -329,11 +349,12 @@
#define GEN7_TLB_RD_ADDR _MMIO(0x4700)
#define GEN12_PAT_INDEX(index) _MMIO(0x4800 + (index) * 4)
+#define XEHP_PAT_INDEX(index) MCR_REG(0x4800 + (index) * 4)
-#define XEHP_TILE0_ADDR_RANGE _MMIO(0x4900)
+#define XEHP_TILE0_ADDR_RANGE MCR_REG(0x4900)
#define XEHP_TILE_LMEM_RANGE_SHIFT 8
-#define XEHP_FLAT_CCS_BASE_ADDR _MMIO(0x4910)
+#define XEHP_FLAT_CCS_BASE_ADDR MCR_REG(0x4910)
#define XEHP_CCS_BASE_SHIFT 8
#define GAMTARBMODE _MMIO(0x4a08)
@@ -383,17 +404,18 @@
#define CHICKEN_RASTER_2 _MMIO(0x6208)
#define TBIMR_FAST_CLIP REG_BIT(5)
-#define VFLSKPD _MMIO(0x62a8)
+#define VFLSKPD MCR_REG(0x62a8)
#define DIS_OVER_FETCH_CACHE REG_BIT(1)
#define DIS_MULT_MISS_RD_SQUASH REG_BIT(0)
-#define FF_MODE2 _MMIO(0x6604)
+#define GEN12_FF_MODE2 _MMIO(0x6604)
+#define XEHP_FF_MODE2 MCR_REG(0x6604)
#define FF_MODE2_GS_TIMER_MASK REG_GENMASK(31, 24)
#define FF_MODE2_GS_TIMER_224 REG_FIELD_PREP(FF_MODE2_GS_TIMER_MASK, 224)
#define FF_MODE2_TDS_TIMER_MASK REG_GENMASK(23, 16)
#define FF_MODE2_TDS_TIMER_128 REG_FIELD_PREP(FF_MODE2_TDS_TIMER_MASK, 4)
-#define XEHPG_INSTDONE_GEOM_SVG _MMIO(0x666c)
+#define XEHPG_INSTDONE_GEOM_SVG MCR_REG(0x666c)
#define CACHE_MODE_0_GEN7 _MMIO(0x7000) /* IVB+ */
#define RC_OP_FLUSH_ENABLE (1 << 0)
@@ -421,6 +443,7 @@
#define HIZ_CHICKEN _MMIO(0x7018)
#define CHV_HZ_8X8_MODE_IN_1X REG_BIT(15)
#define DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE REG_BIT(14)
+#define HZ_DEPTH_TEST_LE_GE_OPT_DISABLE REG_BIT(13)
#define BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE REG_BIT(3)
#define GEN8_L3CNTLREG _MMIO(0x7034)
@@ -442,23 +465,16 @@
#define GEN8_HDC_CHICKEN1 _MMIO(0x7304)
#define GEN11_COMMON_SLICE_CHICKEN3 _MMIO(0x7304)
+#define XEHP_COMMON_SLICE_CHICKEN3 MCR_REG(0x7304)
#define DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN REG_BIT(12)
#define XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE REG_BIT(12)
#define GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC REG_BIT(11)
#define GEN12_DISABLE_CPS_AWARE_COLOR_PIPE REG_BIT(9)
-/* GEN9 chicken */
-#define SLICE_ECO_CHICKEN0 _MMIO(0x7308)
-#define PIXEL_MASK_CAMMING_DISABLE (1 << 14)
-
-#define GEN9_SLICE_COMMON_ECO_CHICKEN0 _MMIO(0x7308)
-#define DISABLE_PIXEL_MASK_CAMMING (1 << 14)
-
#define GEN9_SLICE_COMMON_ECO_CHICKEN1 _MMIO(0x731c)
-#define GEN11_STATE_CACHE_REDIRECT_TO_CS (1 << 11)
-
-#define SLICE_COMMON_ECO_CHICKEN1 _MMIO(0x731c)
+#define XEHP_SLICE_COMMON_ECO_CHICKEN1 MCR_REG(0x731c)
#define MSC_MSAA_REODER_BUF_BYPASS_DISABLE REG_BIT(14)
+#define GEN11_STATE_CACHE_REDIRECT_TO_CS (1 << 11)
#define GEN9_SLICE_PGCTL_ACK(slice) _MMIO(0x804c + (slice) * 0x4)
#define GEN10_SLICE_PGCTL_ACK(slice) _MMIO(0x804c + ((slice) / 3) * 0x34 + \
@@ -485,9 +501,12 @@
#define VF_PREEMPTION _MMIO(0x83a4)
#define PREEMPTION_VERTEX_COUNT REG_GENMASK(15, 0)
+#define VFG_PREEMPTION_CHICKEN _MMIO(0x83b4)
+#define POLYGON_TRIFAN_LINELOOP_DISABLE REG_BIT(4)
+
#define GEN8_RC6_CTX_INFO _MMIO(0x8504)
-#define GEN12_SQCM _MMIO(0x8724)
+#define XEHP_SQCM MCR_REG(0x8724)
#define EN_32B_ACCESS REG_BIT(30)
#define HSW_IDICR _MMIO(0x9008)
@@ -519,6 +538,8 @@
#define GEN6_MBCTL_BOOT_FETCH_MECH (1 << 0)
/* Fuse readout registers for GT */
+#define XEHP_FUSE4 _MMIO(0x9114)
+#define GT_L3_EXC_MASK REG_GENMASK(6, 4)
#define GEN10_MIRROR_FUSE3 _MMIO(0x9118)
#define GEN10_L3BANK_PAIR_COUNT 4
#define GEN10_L3BANK_MASK 0x0F
@@ -647,6 +668,9 @@
#define GEN7_MISCCPCTL _MMIO(0x9424)
#define GEN7_DOP_CLOCK_GATE_ENABLE (1 << 0)
+
+#define GEN8_MISCCPCTL MCR_REG(0x9424)
+#define GEN8_DOP_CLOCK_GATE_ENABLE REG_BIT(0)
#define GEN12_DOP_CLOCK_GATE_RENDER_ENABLE REG_BIT(1)
#define GEN8_DOP_CLOCK_GATE_CFCLK_ENABLE (1 << 2)
#define GEN8_DOP_CLOCK_GATE_GUC_ENABLE (1 << 4)
@@ -700,7 +724,8 @@
#define GAMTLBVEBOX0_CLKGATE_DIS REG_BIT(16)
#define LTCDD_CLKGATE_DIS REG_BIT(10)
-#define SLICE_UNIT_LEVEL_CLKGATE _MMIO(0x94d4)
+#define GEN11_SLICE_UNIT_LEVEL_CLKGATE _MMIO(0x94d4)
+#define XEHP_SLICE_UNIT_LEVEL_CLKGATE MCR_REG(0x94d4)
#define SARBUNIT_CLKGATE_DIS (1 << 5)
#define RCCUNIT_CLKGATE_DIS (1 << 7)
#define MSCUNIT_CLKGATE_DIS (1 << 10)
@@ -708,27 +733,27 @@
#define L3_CLKGATE_DIS REG_BIT(16)
#define L3_CR2X_CLKGATE_DIS REG_BIT(17)
-#define SCCGCTL94DC _MMIO(0x94dc)
+#define SCCGCTL94DC MCR_REG(0x94dc)
#define CG3DDISURB REG_BIT(14)
#define UNSLICE_UNIT_LEVEL_CLKGATE2 _MMIO(0x94e4)
#define VSUNIT_CLKGATE_DIS_TGL REG_BIT(19)
#define PSDUNIT_CLKGATE_DIS REG_BIT(5)
-#define SUBSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9524)
+#define GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE MCR_REG(0x9524)
#define DSS_ROUTER_CLKGATE_DIS REG_BIT(28)
#define GWUNIT_CLKGATE_DIS REG_BIT(16)
-#define SUBSLICE_UNIT_LEVEL_CLKGATE2 _MMIO(0x9528)
+#define SUBSLICE_UNIT_LEVEL_CLKGATE2 MCR_REG(0x9528)
#define CPSSUNIT_CLKGATE_DIS REG_BIT(9)
-#define SSMCGCTL9530 _MMIO(0x9530)
+#define SSMCGCTL9530 MCR_REG(0x9530)
#define RTFUNIT_CLKGATE_DIS REG_BIT(18)
-#define GEN10_DFR_RATIO_EN_AND_CHICKEN _MMIO(0x9550)
+#define GEN10_DFR_RATIO_EN_AND_CHICKEN MCR_REG(0x9550)
#define DFR_DISABLE (1 << 9)
-#define INF_UNIT_LEVEL_CLKGATE _MMIO(0x9560)
+#define INF_UNIT_LEVEL_CLKGATE MCR_REG(0x9560)
#define CGPSF_CLKGATE_DIS (1 << 3)
#define MICRO_BP0_0 _MMIO(0x9800)
@@ -901,6 +926,8 @@
#define FORCEWAKE_MEDIA_VDBOX_GEN11(n) _MMIO(0xa540 + (n) * 4)
#define FORCEWAKE_MEDIA_VEBOX_GEN11(n) _MMIO(0xa560 + (n) * 4)
+#define FORCEWAKE_REQ_GSC _MMIO(0xa618)
+
#define CHV_POWER_SS0_SIG1 _MMIO(0xa720)
#define CHV_POWER_SS0_SIG2 _MMIO(0xa724)
#define CHV_POWER_SS1_SIG1 _MMIO(0xa728)
@@ -938,7 +965,8 @@
/* MOCS (Memory Object Control State) registers */
#define GEN9_LNCFCMOCS(i) _MMIO(0xb020 + (i) * 4) /* L3 Cache Control */
-#define GEN9_LNCFCMOCS_REG_COUNT 32
+#define XEHP_LNCFCMOCS(i) MCR_REG(0xb020 + (i) * 4)
+#define LNCFCMOCS_REG_COUNT 32
#define GEN7_L3CNTLREG3 _MMIO(0xb024)
@@ -954,15 +982,10 @@
#define GEN7_L3LOG(slice, i) _MMIO(0xb070 + (slice) * 0x200 + (i) * 4)
#define GEN7_L3LOG_SIZE 0x80
-#define GEN10_SCRATCH_LNCF2 _MMIO(0xb0a0)
-#define PMFLUSHDONE_LNICRSDROP (1 << 20)
-#define PMFLUSH_GAPL3UNBLOCK (1 << 21)
-#define PMFLUSHDONE_LNEBLK (1 << 22)
-
-#define XEHP_L3NODEARBCFG _MMIO(0xb0b4)
+#define XEHP_L3NODEARBCFG MCR_REG(0xb0b4)
#define XEHP_LNESPARE REG_BIT(19)
-#define GEN8_L3SQCREG1 _MMIO(0xb100)
+#define GEN8_L3SQCREG1 MCR_REG(0xb100)
/*
* Note that on CHV the following has an off-by-one error wrt. to BSpec.
* Using the formula in BSpec leads to a hang, while the formula here works
@@ -973,31 +996,28 @@
#define L3_HIGH_PRIO_CREDITS(x) (((x) >> 1) << 14)
#define L3_PRIO_CREDITS_MASK ((0x1f << 19) | (0x1f << 14))
-#define GEN10_L3_CHICKEN_MODE_REGISTER _MMIO(0xb114)
-#define GEN11_I2M_WRITE_DISABLE (1 << 28)
-
-#define GEN8_L3SQCREG4 _MMIO(0xb118)
+#define GEN8_L3SQCREG4 MCR_REG(0xb118)
#define GEN11_LQSC_CLEAN_EVICT_DISABLE (1 << 6)
#define GEN8_LQSC_RO_PERF_DIS (1 << 27)
#define GEN8_LQSC_FLUSH_COHERENT_LINES (1 << 21)
#define GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE REG_BIT(22)
-#define GEN9_SCRATCH1 _MMIO(0xb11c)
+#define GEN9_SCRATCH1 MCR_REG(0xb11c)
#define EVICTION_PERF_FIX_ENABLE REG_BIT(8)
-#define BDW_SCRATCH1 _MMIO(0xb11c)
+#define BDW_SCRATCH1 MCR_REG(0xb11c)
#define GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE (1 << 2)
-#define GEN11_SCRATCH2 _MMIO(0xb140)
+#define GEN11_SCRATCH2 MCR_REG(0xb140)
#define GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE (1 << 19)
-#define GEN11_L3SQCREG5 _MMIO(0xb158)
+#define XEHP_L3SQCREG5 MCR_REG(0xb158)
#define L3_PWM_TIMER_INIT_VAL_MASK REG_GENMASK(9, 0)
-#define MLTICTXCTL _MMIO(0xb170)
+#define MLTICTXCTL MCR_REG(0xb170)
#define TDONRENDER REG_BIT(2)
-#define XEHP_L3SCQREG7 _MMIO(0xb188)
+#define XEHP_L3SCQREG7 MCR_REG(0xb188)
#define BLEND_FILL_CACHING_OPT_DIS REG_BIT(3)
#define XEHPC_L3SCRUB _MMIO(0xb18c)
@@ -1005,7 +1025,7 @@
#define SCRUB_RATE_PER_BANK_MASK REG_GENMASK(2, 0)
#define SCRUB_RATE_4B_PER_CLK REG_FIELD_PREP(SCRUB_RATE_PER_BANK_MASK, 0x6)
-#define L3SQCREG1_CCS0 _MMIO(0xb200)
+#define L3SQCREG1_CCS0 MCR_REG(0xb200)
#define FLUSHALLNONCOH REG_BIT(5)
#define GEN11_GLBLINVL _MMIO(0xb404)
@@ -1030,11 +1050,14 @@
#define GEN9_BLT_MOCS(i) _MMIO(__GEN9_BCS0_MOCS0 + (i) * 4)
#define GEN12_FAULT_TLB_DATA0 _MMIO(0xceb8)
+#define XEHP_FAULT_TLB_DATA0 MCR_REG(0xceb8)
#define GEN12_FAULT_TLB_DATA1 _MMIO(0xcebc)
+#define XEHP_FAULT_TLB_DATA1 MCR_REG(0xcebc)
#define FAULT_VA_HIGH_BITS (0xf << 0)
#define FAULT_GTT_SEL (1 << 4)
#define GEN12_RING_FAULT_REG _MMIO(0xcec4)
+#define XEHP_RING_FAULT_REG MCR_REG(0xcec4)
#define GEN8_RING_FAULT_ENGINE_ID(x) (((x) >> 12) & 0x7)
#define RING_FAULT_GTTSEL_MASK (1 << 11)
#define RING_FAULT_SRCID(x) (((x) >> 3) & 0xff)
@@ -1042,16 +1065,21 @@
#define RING_FAULT_VALID (1 << 0)
#define GEN12_GFX_TLB_INV_CR _MMIO(0xced8)
+#define XEHP_GFX_TLB_INV_CR MCR_REG(0xced8)
#define GEN12_VD_TLB_INV_CR _MMIO(0xcedc)
+#define XEHP_VD_TLB_INV_CR MCR_REG(0xcedc)
#define GEN12_VE_TLB_INV_CR _MMIO(0xcee0)
+#define XEHP_VE_TLB_INV_CR MCR_REG(0xcee0)
#define GEN12_BLT_TLB_INV_CR _MMIO(0xcee4)
+#define XEHP_BLT_TLB_INV_CR MCR_REG(0xcee4)
#define GEN12_COMPCTX_TLB_INV_CR _MMIO(0xcf04)
+#define XEHP_COMPCTX_TLB_INV_CR MCR_REG(0xcf04)
-#define GEN12_MERT_MOD_CTRL _MMIO(0xcf28)
-#define RENDER_MOD_CTRL _MMIO(0xcf2c)
-#define COMP_MOD_CTRL _MMIO(0xcf30)
-#define VDBX_MOD_CTRL _MMIO(0xcf34)
-#define VEBX_MOD_CTRL _MMIO(0xcf38)
+#define XEHP_MERT_MOD_CTRL MCR_REG(0xcf28)
+#define RENDER_MOD_CTRL MCR_REG(0xcf2c)
+#define COMP_MOD_CTRL MCR_REG(0xcf30)
+#define VDBX_MOD_CTRL MCR_REG(0xcf34)
+#define VEBX_MOD_CTRL MCR_REG(0xcf38)
#define FORCE_MISS_FTLB REG_BIT(3)
#define GEN12_GAMSTLB_CTRL _MMIO(0xcf4c)
@@ -1066,48 +1094,52 @@
#define GEN12_GAM_DONE _MMIO(0xcf68)
#define GEN7_HALF_SLICE_CHICKEN1 _MMIO(0xe100) /* IVB GT1 + VLV */
+#define GEN8_HALF_SLICE_CHICKEN1 MCR_REG(0xe100)
#define GEN7_MAX_PS_THREAD_DEP (8 << 12)
#define GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE (1 << 10)
#define GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE (1 << 4)
#define GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE (1 << 3)
#define GEN7_SAMPLER_INSTDONE _MMIO(0xe160)
+#define GEN8_SAMPLER_INSTDONE MCR_REG(0xe160)
#define GEN7_ROW_INSTDONE _MMIO(0xe164)
+#define GEN8_ROW_INSTDONE MCR_REG(0xe164)
-#define HALF_SLICE_CHICKEN2 _MMIO(0xe180)
+#define HALF_SLICE_CHICKEN2 MCR_REG(0xe180)
#define GEN8_ST_PO_DISABLE (1 << 13)
-#define HALF_SLICE_CHICKEN3 _MMIO(0xe184)
+#define HSW_HALF_SLICE_CHICKEN3 _MMIO(0xe184)
+#define GEN8_HALF_SLICE_CHICKEN3 MCR_REG(0xe184)
#define HSW_SAMPLE_C_PERFORMANCE (1 << 9)
#define GEN8_CENTROID_PIXEL_OPT_DIS (1 << 8)
#define GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC (1 << 5)
#define GEN8_SAMPLER_POWER_BYPASS_DIS (1 << 1)
-#define GEN9_HALF_SLICE_CHICKEN5 _MMIO(0xe188)
+#define GEN9_HALF_SLICE_CHICKEN5 MCR_REG(0xe188)
#define GEN9_DG_MIRROR_FIX_ENABLE (1 << 5)
#define GEN9_CCS_TLB_PREFETCH_ENABLE (1 << 3)
-#define GEN10_SAMPLER_MODE _MMIO(0xe18c)
+#define GEN10_SAMPLER_MODE MCR_REG(0xe18c)
#define ENABLE_SMALLPL REG_BIT(15)
#define SC_DISABLE_POWER_OPTIMIZATION_EBB REG_BIT(9)
#define GEN11_SAMPLER_ENABLE_HEADLESS_MSG REG_BIT(5)
-#define GEN9_HALF_SLICE_CHICKEN7 _MMIO(0xe194)
+#define GEN9_HALF_SLICE_CHICKEN7 MCR_REG(0xe194)
#define DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA REG_BIT(15)
#define GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR REG_BIT(8)
#define GEN9_ENABLE_YV12_BUGFIX REG_BIT(4)
#define GEN9_ENABLE_GPGPU_PREEMPTION REG_BIT(2)
-#define GEN10_CACHE_MODE_SS _MMIO(0xe420)
+#define GEN10_CACHE_MODE_SS MCR_REG(0xe420)
#define ENABLE_EU_COUNT_FOR_TDL_FLUSH REG_BIT(10)
#define DISABLE_ECC REG_BIT(5)
#define FLOAT_BLEND_OPTIMIZATION_ENABLE REG_BIT(4)
#define ENABLE_PREFETCH_INTO_IC REG_BIT(3)
-#define EU_PERF_CNTL0 _MMIO(0xe458)
-#define EU_PERF_CNTL4 _MMIO(0xe45c)
+#define EU_PERF_CNTL0 PERF_REG(0xe458)
+#define EU_PERF_CNTL4 PERF_REG(0xe45c)
-#define GEN9_ROW_CHICKEN4 _MMIO(0xe48c)
+#define GEN9_ROW_CHICKEN4 MCR_REG(0xe48c)
#define GEN12_DISABLE_GRF_CLEAR REG_BIT(13)
#define XEHP_DIS_BBL_SYSPIPE REG_BIT(11)
#define GEN12_DISABLE_TDL_PUSH REG_BIT(9)
@@ -1119,7 +1151,7 @@
#define HSW_ROW_CHICKEN3 _MMIO(0xe49c)
#define HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE (1 << 6)
-#define GEN8_ROW_CHICKEN _MMIO(0xe4f0)
+#define GEN8_ROW_CHICKEN MCR_REG(0xe4f0)
#define FLOW_CONTROL_ENABLE REG_BIT(15)
#define UGM_BACKUP_MODE REG_BIT(13)
#define MDQ_ARBITRATION_MODE REG_BIT(12)
@@ -1130,42 +1162,43 @@
#define DISABLE_EARLY_EOT REG_BIT(1)
#define GEN7_ROW_CHICKEN2 _MMIO(0xe4f4)
+
+#define GEN8_ROW_CHICKEN2 MCR_REG(0xe4f4)
#define GEN12_DISABLE_READ_SUPPRESSION REG_BIT(15)
#define GEN12_DISABLE_EARLY_READ REG_BIT(14)
#define GEN12_ENABLE_LARGE_GRF_MODE REG_BIT(12)
#define GEN12_PUSH_CONST_DEREF_HOLD_DIS REG_BIT(8)
+#define GEN12_DISABLE_DOP_GATING REG_BIT(0)
-#define RT_CTRL _MMIO(0xe530)
+#define RT_CTRL MCR_REG(0xe530)
#define DIS_NULL_QUERY REG_BIT(10)
#define STACKID_CTRL REG_GENMASK(6, 5)
#define STACKID_CTRL_512 REG_FIELD_PREP(STACKID_CTRL, 0x2)
-#define EU_PERF_CNTL1 _MMIO(0xe558)
-#define EU_PERF_CNTL5 _MMIO(0xe55c)
+#define EU_PERF_CNTL1 PERF_REG(0xe558)
+#define EU_PERF_CNTL5 PERF_REG(0xe55c)
-#define GEN12_HDC_CHICKEN0 _MMIO(0xe5f0)
+#define XEHP_HDC_CHICKEN0 MCR_REG(0xe5f0)
#define LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK REG_GENMASK(13, 11)
-#define ICL_HDC_MODE _MMIO(0xe5f4)
+#define ICL_HDC_MODE MCR_REG(0xe5f4)
-#define EU_PERF_CNTL2 _MMIO(0xe658)
-#define EU_PERF_CNTL6 _MMIO(0xe65c)
-#define EU_PERF_CNTL3 _MMIO(0xe758)
+#define EU_PERF_CNTL2 PERF_REG(0xe658)
+#define EU_PERF_CNTL6 PERF_REG(0xe65c)
+#define EU_PERF_CNTL3 PERF_REG(0xe758)
-#define LSC_CHICKEN_BIT_0 _MMIO(0xe7c8)
+#define LSC_CHICKEN_BIT_0 MCR_REG(0xe7c8)
#define DISABLE_D8_D16_COASLESCE REG_BIT(30)
#define FORCE_1_SUB_MESSAGE_PER_FRAGMENT REG_BIT(15)
-#define LSC_CHICKEN_BIT_0_UDW _MMIO(0xe7c8 + 4)
+#define LSC_CHICKEN_BIT_0_UDW MCR_REG(0xe7c8 + 4)
#define DIS_CHAIN_2XSIMD8 REG_BIT(55 - 32)
#define FORCE_SLM_FENCE_SCOPE_TO_TILE REG_BIT(42 - 32)
#define FORCE_UGM_FENCE_SCOPE_TO_TILE REG_BIT(41 - 32)
#define MAXREQS_PER_BANK REG_GENMASK(39 - 32, 37 - 32)
#define DISABLE_128B_EVICTION_COMMAND_UDW REG_BIT(36 - 32)
-#define SARB_CHICKEN1 _MMIO(0xe90c)
+#define SARB_CHICKEN1 MCR_REG(0xe90c)
#define COMP_CKN_IN REG_GENMASK(30, 29)
-#define GEN7_HALF_SLICE_CHICKEN1_GT2 _MMIO(0xf100)
-
#define GEN7_ROW_CHICKEN2_GT2 _MMIO(0xf4f4)
#define DOP_CLOCK_GATING_DISABLE (1 << 0)
#define PUSH_CONSTANT_DEREF_DISABLE (1 << 8)
@@ -1513,6 +1546,9 @@
#define VLV_RENDER_C0_COUNT _MMIO(0x138118)
#define VLV_MEDIA_C0_COUNT _MMIO(0x13811c)
+#define GEN12_RPSTAT1 _MMIO(0x1381b4)
+#define GEN12_VOLTAGE_MASK REG_GENMASK(10, 0)
+
#define GEN11_GT_INTR_DW(x) _MMIO(0x190018 + ((x) * 4))
#define GEN11_CSME (31)
#define GEN11_GUNIT (28)
@@ -1583,6 +1619,11 @@
#define GEN12_SFC_DONE(n) _MMIO(0x1cc000 + (n) * 0x1000)
+#define GT0_PACKAGE_ENERGY_STATUS _MMIO(0x250004)
+#define GT0_PACKAGE_RAPL_LIMIT _MMIO(0x250008)
+#define GT0_PACKAGE_POWER_SKU_UNIT _MMIO(0x250068)
+#define GT0_PLATFORM_ENERGY_STATUS _MMIO(0x25006c)
+
/*
* Standalone Media's non-engine GT registers are located at their regular GT
* offsets plus 0x380000. This extra offset is stored inside the intel_uncore
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
index d651ccd0ab20..9486dd3bed99 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c
@@ -22,11 +22,9 @@ bool is_object_gt(struct kobject *kobj)
return !strncmp(kobj->name, "gt", 2);
}
-struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev,
+struct intel_gt *intel_gt_sysfs_get_drvdata(struct kobject *kobj,
const char *name)
{
- struct kobject *kobj = &dev->kobj;
-
/*
* We are interested at knowing from where the interface
* has been called, whether it's called from gt/ or from
@@ -38,6 +36,7 @@ struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev,
* "struct drm_i915_private *" type.
*/
if (!is_object_gt(kobj)) {
+ struct device *dev = kobj_to_dev(kobj);
struct drm_i915_private *i915 = kdev_minor_to_i915(dev);
return to_gt(i915);
@@ -51,18 +50,18 @@ static struct kobject *gt_get_parent_obj(struct intel_gt *gt)
return &gt->i915->drm.primary->kdev->kobj;
}
-static ssize_t id_show(struct device *dev,
- struct device_attribute *attr,
+static ssize_t id_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
char *buf)
{
- struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
+ struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
return sysfs_emit(buf, "%u\n", gt->info.id);
}
-static DEVICE_ATTR_RO(id);
+static struct kobj_attribute attr_id = __ATTR_RO(id);
static struct attribute *id_attrs[] = {
- &dev_attr_id.attr,
+ &attr_id.attr,
NULL,
};
ATTRIBUTE_GROUPS(id);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h
index 6232923a420d..18bab835be02 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h
@@ -18,11 +18,6 @@ bool is_object_gt(struct kobject *kobj);
struct drm_i915_private *kobj_to_i915(struct kobject *kobj);
-struct kobject *
-intel_gt_create_kobj(struct intel_gt *gt,
- struct kobject *dir,
- const char *name);
-
static inline struct intel_gt *kobj_to_gt(struct kobject *kobj)
{
return container_of(kobj, struct intel_gt, sysfs_gt);
@@ -30,7 +25,7 @@ static inline struct intel_gt *kobj_to_gt(struct kobject *kobj)
void intel_gt_sysfs_register(struct intel_gt *gt);
void intel_gt_sysfs_unregister(struct intel_gt *gt);
-struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev,
+struct intel_gt *intel_gt_sysfs_get_drvdata(struct kobject *kobj,
const char *name);
#endif /* SYSFS_GT_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
index 180dd6f3ef57..2b5f05b31187 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
@@ -24,14 +24,15 @@ enum intel_gt_sysfs_op {
};
static int
-sysfs_gt_attribute_w_func(struct device *dev, struct device_attribute *attr,
+sysfs_gt_attribute_w_func(struct kobject *kobj, struct attribute *attr,
int (func)(struct intel_gt *gt, u32 val), u32 val)
{
struct intel_gt *gt;
int ret;
- if (!is_object_gt(&dev->kobj)) {
+ if (!is_object_gt(kobj)) {
int i;
+ struct device *dev = kobj_to_dev(kobj);
struct drm_i915_private *i915 = kdev_minor_to_i915(dev);
for_each_gt(gt, i915, i) {
@@ -40,7 +41,7 @@ sysfs_gt_attribute_w_func(struct device *dev, struct device_attribute *attr,
break;
}
} else {
- gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
+ gt = intel_gt_sysfs_get_drvdata(kobj, attr->name);
ret = func(gt, val);
}
@@ -48,7 +49,7 @@ sysfs_gt_attribute_w_func(struct device *dev, struct device_attribute *attr,
}
static u32
-sysfs_gt_attribute_r_func(struct device *dev, struct device_attribute *attr,
+sysfs_gt_attribute_r_func(struct kobject *kobj, struct attribute *attr,
u32 (func)(struct intel_gt *gt),
enum intel_gt_sysfs_op op)
{
@@ -57,8 +58,9 @@ sysfs_gt_attribute_r_func(struct device *dev, struct device_attribute *attr,
ret = (op == INTEL_GT_SYSFS_MAX) ? 0 : (u32) -1;
- if (!is_object_gt(&dev->kobj)) {
+ if (!is_object_gt(kobj)) {
int i;
+ struct device *dev = kobj_to_dev(kobj);
struct drm_i915_private *i915 = kdev_minor_to_i915(dev);
for_each_gt(gt, i915, i) {
@@ -77,7 +79,7 @@ sysfs_gt_attribute_r_func(struct device *dev, struct device_attribute *attr,
}
}
} else {
- gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
+ gt = intel_gt_sysfs_get_drvdata(kobj, attr->name);
ret = func(gt);
}
@@ -92,6 +94,76 @@ sysfs_gt_attribute_r_func(struct device *dev, struct device_attribute *attr,
#define sysfs_gt_attribute_r_max_func(d, a, f) \
sysfs_gt_attribute_r_func(d, a, f, INTEL_GT_SYSFS_MAX)
+#define INTEL_GT_SYSFS_SHOW(_name, _attr_type) \
+ static ssize_t _name##_show_common(struct kobject *kobj, \
+ struct attribute *attr, char *buff) \
+ { \
+ u32 val = sysfs_gt_attribute_r_##_attr_type##_func(kobj, attr, \
+ __##_name##_show); \
+ \
+ return sysfs_emit(buff, "%u\n", val); \
+ } \
+ static ssize_t _name##_show(struct kobject *kobj, \
+ struct kobj_attribute *attr, char *buff) \
+ { \
+ return _name ##_show_common(kobj, &attr->attr, buff); \
+ } \
+ static ssize_t _name##_dev_show(struct device *dev, \
+ struct device_attribute *attr, char *buff) \
+ { \
+ return _name##_show_common(&dev->kobj, &attr->attr, buff); \
+ }
+
+#define INTEL_GT_SYSFS_STORE(_name, _func) \
+ static ssize_t _name##_store_common(struct kobject *kobj, \
+ struct attribute *attr, \
+ const char *buff, size_t count) \
+ { \
+ int ret; \
+ u32 val; \
+ \
+ ret = kstrtou32(buff, 0, &val); \
+ if (ret) \
+ return ret; \
+ \
+ ret = sysfs_gt_attribute_w_func(kobj, attr, _func, val); \
+ \
+ return ret ?: count; \
+ } \
+ static ssize_t _name##_store(struct kobject *kobj, \
+ struct kobj_attribute *attr, const char *buff, \
+ size_t count) \
+ { \
+ return _name##_store_common(kobj, &attr->attr, buff, count); \
+ } \
+ static ssize_t _name##_dev_store(struct device *dev, \
+ struct device_attribute *attr, \
+ const char *buff, size_t count) \
+ { \
+ return _name##_store_common(&dev->kobj, &attr->attr, buff, count); \
+ }
+
+#define INTEL_GT_SYSFS_SHOW_MAX(_name) INTEL_GT_SYSFS_SHOW(_name, max)
+#define INTEL_GT_SYSFS_SHOW_MIN(_name) INTEL_GT_SYSFS_SHOW(_name, min)
+
+#define INTEL_GT_ATTR_RW(_name) \
+ static struct kobj_attribute attr_##_name = __ATTR_RW(_name)
+
+#define INTEL_GT_ATTR_RO(_name) \
+ static struct kobj_attribute attr_##_name = __ATTR_RO(_name)
+
+#define INTEL_GT_DUAL_ATTR_RW(_name) \
+ static struct device_attribute dev_attr_##_name = __ATTR(_name, 0644, \
+ _name##_dev_show, \
+ _name##_dev_store); \
+ INTEL_GT_ATTR_RW(_name)
+
+#define INTEL_GT_DUAL_ATTR_RO(_name) \
+ static struct device_attribute dev_attr_##_name = __ATTR(_name, 0444, \
+ _name##_dev_show, \
+ NULL); \
+ INTEL_GT_ATTR_RO(_name)
+
#ifdef CONFIG_PM
static u32 get_residency(struct intel_gt *gt, i915_reg_t reg)
{
@@ -104,11 +176,8 @@ static u32 get_residency(struct intel_gt *gt, i915_reg_t reg)
return DIV_ROUND_CLOSEST_ULL(res, 1000);
}
-static ssize_t rc6_enable_show(struct device *dev,
- struct device_attribute *attr,
- char *buff)
+static u8 get_rc6_mask(struct intel_gt *gt)
{
- struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
u8 mask = 0;
if (HAS_RC6(gt->i915))
@@ -118,37 +187,35 @@ static ssize_t rc6_enable_show(struct device *dev,
if (HAS_RC6pp(gt->i915))
mask |= BIT(2);
- return sysfs_emit(buff, "%x\n", mask);
+ return mask;
}
-static u32 __rc6_residency_ms_show(struct intel_gt *gt)
+static ssize_t rc6_enable_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buff)
{
- return get_residency(gt, GEN6_GT_GFX_RC6);
+ struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
+
+ return sysfs_emit(buff, "%x\n", get_rc6_mask(gt));
}
-static ssize_t rc6_residency_ms_show(struct device *dev,
- struct device_attribute *attr,
- char *buff)
+static ssize_t rc6_enable_dev_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buff)
{
- u32 rc6_residency = sysfs_gt_attribute_r_min_func(dev, attr,
- __rc6_residency_ms_show);
+ struct intel_gt *gt = intel_gt_sysfs_get_drvdata(&dev->kobj, attr->attr.name);
- return sysfs_emit(buff, "%u\n", rc6_residency);
+ return sysfs_emit(buff, "%x\n", get_rc6_mask(gt));
}
-static u32 __rc6p_residency_ms_show(struct intel_gt *gt)
+static u32 __rc6_residency_ms_show(struct intel_gt *gt)
{
- return get_residency(gt, GEN6_GT_GFX_RC6p);
+ return get_residency(gt, GEN6_GT_GFX_RC6);
}
-static ssize_t rc6p_residency_ms_show(struct device *dev,
- struct device_attribute *attr,
- char *buff)
+static u32 __rc6p_residency_ms_show(struct intel_gt *gt)
{
- u32 rc6p_residency = sysfs_gt_attribute_r_min_func(dev, attr,
- __rc6p_residency_ms_show);
-
- return sysfs_emit(buff, "%u\n", rc6p_residency);
+ return get_residency(gt, GEN6_GT_GFX_RC6p);
}
static u32 __rc6pp_residency_ms_show(struct intel_gt *gt)
@@ -156,67 +223,69 @@ static u32 __rc6pp_residency_ms_show(struct intel_gt *gt)
return get_residency(gt, GEN6_GT_GFX_RC6pp);
}
-static ssize_t rc6pp_residency_ms_show(struct device *dev,
- struct device_attribute *attr,
- char *buff)
-{
- u32 rc6pp_residency = sysfs_gt_attribute_r_min_func(dev, attr,
- __rc6pp_residency_ms_show);
-
- return sysfs_emit(buff, "%u\n", rc6pp_residency);
-}
-
static u32 __media_rc6_residency_ms_show(struct intel_gt *gt)
{
return get_residency(gt, VLV_GT_MEDIA_RC6);
}
-static ssize_t media_rc6_residency_ms_show(struct device *dev,
- struct device_attribute *attr,
- char *buff)
-{
- u32 rc6_residency = sysfs_gt_attribute_r_min_func(dev, attr,
- __media_rc6_residency_ms_show);
+INTEL_GT_SYSFS_SHOW_MIN(rc6_residency_ms);
+INTEL_GT_SYSFS_SHOW_MIN(rc6p_residency_ms);
+INTEL_GT_SYSFS_SHOW_MIN(rc6pp_residency_ms);
+INTEL_GT_SYSFS_SHOW_MIN(media_rc6_residency_ms);
- return sysfs_emit(buff, "%u\n", rc6_residency);
-}
-
-static DEVICE_ATTR_RO(rc6_enable);
-static DEVICE_ATTR_RO(rc6_residency_ms);
-static DEVICE_ATTR_RO(rc6p_residency_ms);
-static DEVICE_ATTR_RO(rc6pp_residency_ms);
-static DEVICE_ATTR_RO(media_rc6_residency_ms);
+INTEL_GT_DUAL_ATTR_RO(rc6_enable);
+INTEL_GT_DUAL_ATTR_RO(rc6_residency_ms);
+INTEL_GT_DUAL_ATTR_RO(rc6p_residency_ms);
+INTEL_GT_DUAL_ATTR_RO(rc6pp_residency_ms);
+INTEL_GT_DUAL_ATTR_RO(media_rc6_residency_ms);
static struct attribute *rc6_attrs[] = {
+ &attr_rc6_enable.attr,
+ &attr_rc6_residency_ms.attr,
+ NULL
+};
+
+static struct attribute *rc6p_attrs[] = {
+ &attr_rc6p_residency_ms.attr,
+ &attr_rc6pp_residency_ms.attr,
+ NULL
+};
+
+static struct attribute *media_rc6_attrs[] = {
+ &attr_media_rc6_residency_ms.attr,
+ NULL
+};
+
+static struct attribute *rc6_dev_attrs[] = {
&dev_attr_rc6_enable.attr,
&dev_attr_rc6_residency_ms.attr,
NULL
};
-static struct attribute *rc6p_attrs[] = {
+static struct attribute *rc6p_dev_attrs[] = {
&dev_attr_rc6p_residency_ms.attr,
&dev_attr_rc6pp_residency_ms.attr,
NULL
};
-static struct attribute *media_rc6_attrs[] = {
+static struct attribute *media_rc6_dev_attrs[] = {
&dev_attr_media_rc6_residency_ms.attr,
NULL
};
static const struct attribute_group rc6_attr_group[] = {
{ .attrs = rc6_attrs, },
- { .name = power_group_name, .attrs = rc6_attrs, },
+ { .name = power_group_name, .attrs = rc6_dev_attrs, },
};
static const struct attribute_group rc6p_attr_group[] = {
{ .attrs = rc6p_attrs, },
- { .name = power_group_name, .attrs = rc6p_attrs, },
+ { .name = power_group_name, .attrs = rc6p_dev_attrs, },
};
static const struct attribute_group media_rc6_attr_group[] = {
{ .attrs = media_rc6_attrs, },
- { .name = power_group_name, .attrs = media_rc6_attrs, },
+ { .name = power_group_name, .attrs = media_rc6_dev_attrs, },
};
static int __intel_gt_sysfs_create_group(struct kobject *kobj,
@@ -271,104 +340,34 @@ static u32 __act_freq_mhz_show(struct intel_gt *gt)
return intel_rps_read_actual_frequency(&gt->rps);
}
-static ssize_t act_freq_mhz_show(struct device *dev,
- struct device_attribute *attr, char *buff)
-{
- u32 actual_freq = sysfs_gt_attribute_r_max_func(dev, attr,
- __act_freq_mhz_show);
-
- return sysfs_emit(buff, "%u\n", actual_freq);
-}
-
static u32 __cur_freq_mhz_show(struct intel_gt *gt)
{
return intel_rps_get_requested_frequency(&gt->rps);
}
-static ssize_t cur_freq_mhz_show(struct device *dev,
- struct device_attribute *attr, char *buff)
-{
- u32 cur_freq = sysfs_gt_attribute_r_max_func(dev, attr,
- __cur_freq_mhz_show);
-
- return sysfs_emit(buff, "%u\n", cur_freq);
-}
-
static u32 __boost_freq_mhz_show(struct intel_gt *gt)
{
return intel_rps_get_boost_frequency(&gt->rps);
}
-static ssize_t boost_freq_mhz_show(struct device *dev,
- struct device_attribute *attr,
- char *buff)
-{
- u32 boost_freq = sysfs_gt_attribute_r_max_func(dev, attr,
- __boost_freq_mhz_show);
-
- return sysfs_emit(buff, "%u\n", boost_freq);
-}
-
static int __boost_freq_mhz_store(struct intel_gt *gt, u32 val)
{
return intel_rps_set_boost_frequency(&gt->rps, val);
}
-static ssize_t boost_freq_mhz_store(struct device *dev,
- struct device_attribute *attr,
- const char *buff, size_t count)
-{
- ssize_t ret;
- u32 val;
-
- ret = kstrtou32(buff, 0, &val);
- if (ret)
- return ret;
-
- return sysfs_gt_attribute_w_func(dev, attr,
- __boost_freq_mhz_store, val) ?: count;
-}
-
-static u32 __rp0_freq_mhz_show(struct intel_gt *gt)
+static u32 __RP0_freq_mhz_show(struct intel_gt *gt)
{
return intel_rps_get_rp0_frequency(&gt->rps);
}
-static ssize_t RP0_freq_mhz_show(struct device *dev,
- struct device_attribute *attr, char *buff)
-{
- u32 rp0_freq = sysfs_gt_attribute_r_max_func(dev, attr,
- __rp0_freq_mhz_show);
-
- return sysfs_emit(buff, "%u\n", rp0_freq);
-}
-
-static u32 __rp1_freq_mhz_show(struct intel_gt *gt)
-{
- return intel_rps_get_rp1_frequency(&gt->rps);
-}
-
-static ssize_t RP1_freq_mhz_show(struct device *dev,
- struct device_attribute *attr, char *buff)
-{
- u32 rp1_freq = sysfs_gt_attribute_r_max_func(dev, attr,
- __rp1_freq_mhz_show);
-
- return sysfs_emit(buff, "%u\n", rp1_freq);
-}
-
-static u32 __rpn_freq_mhz_show(struct intel_gt *gt)
+static u32 __RPn_freq_mhz_show(struct intel_gt *gt)
{
return intel_rps_get_rpn_frequency(&gt->rps);
}
-static ssize_t RPn_freq_mhz_show(struct device *dev,
- struct device_attribute *attr, char *buff)
+static u32 __RP1_freq_mhz_show(struct intel_gt *gt)
{
- u32 rpn_freq = sysfs_gt_attribute_r_max_func(dev, attr,
- __rpn_freq_mhz_show);
-
- return sysfs_emit(buff, "%u\n", rpn_freq);
+ return intel_rps_get_rp1_frequency(&gt->rps);
}
static u32 __max_freq_mhz_show(struct intel_gt *gt)
@@ -376,71 +375,21 @@ static u32 __max_freq_mhz_show(struct intel_gt *gt)
return intel_rps_get_max_frequency(&gt->rps);
}
-static ssize_t max_freq_mhz_show(struct device *dev,
- struct device_attribute *attr, char *buff)
-{
- u32 max_freq = sysfs_gt_attribute_r_max_func(dev, attr,
- __max_freq_mhz_show);
-
- return sysfs_emit(buff, "%u\n", max_freq);
-}
-
static int __set_max_freq(struct intel_gt *gt, u32 val)
{
return intel_rps_set_max_frequency(&gt->rps, val);
}
-static ssize_t max_freq_mhz_store(struct device *dev,
- struct device_attribute *attr,
- const char *buff, size_t count)
-{
- int ret;
- u32 val;
-
- ret = kstrtou32(buff, 0, &val);
- if (ret)
- return ret;
-
- ret = sysfs_gt_attribute_w_func(dev, attr, __set_max_freq, val);
-
- return ret ?: count;
-}
-
static u32 __min_freq_mhz_show(struct intel_gt *gt)
{
return intel_rps_get_min_frequency(&gt->rps);
}
-static ssize_t min_freq_mhz_show(struct device *dev,
- struct device_attribute *attr, char *buff)
-{
- u32 min_freq = sysfs_gt_attribute_r_min_func(dev, attr,
- __min_freq_mhz_show);
-
- return sysfs_emit(buff, "%u\n", min_freq);
-}
-
static int __set_min_freq(struct intel_gt *gt, u32 val)
{
return intel_rps_set_min_frequency(&gt->rps, val);
}
-static ssize_t min_freq_mhz_store(struct device *dev,
- struct device_attribute *attr,
- const char *buff, size_t count)
-{
- int ret;
- u32 val;
-
- ret = kstrtou32(buff, 0, &val);
- if (ret)
- return ret;
-
- ret = sysfs_gt_attribute_w_func(dev, attr, __set_min_freq, val);
-
- return ret ?: count;
-}
-
static u32 __vlv_rpe_freq_mhz_show(struct intel_gt *gt)
{
struct intel_rps *rps = &gt->rps;
@@ -448,23 +397,31 @@ static u32 __vlv_rpe_freq_mhz_show(struct intel_gt *gt)
return intel_gpu_freq(rps, rps->efficient_freq);
}
-static ssize_t vlv_rpe_freq_mhz_show(struct device *dev,
- struct device_attribute *attr, char *buff)
-{
- u32 rpe_freq = sysfs_gt_attribute_r_max_func(dev, attr,
- __vlv_rpe_freq_mhz_show);
-
- return sysfs_emit(buff, "%u\n", rpe_freq);
-}
-
-#define INTEL_GT_RPS_SYSFS_ATTR(_name, _mode, _show, _store) \
- static struct device_attribute dev_attr_gt_##_name = __ATTR(gt_##_name, _mode, _show, _store); \
- static struct device_attribute dev_attr_rps_##_name = __ATTR(rps_##_name, _mode, _show, _store)
-
-#define INTEL_GT_RPS_SYSFS_ATTR_RO(_name) \
- INTEL_GT_RPS_SYSFS_ATTR(_name, 0444, _name##_show, NULL)
-#define INTEL_GT_RPS_SYSFS_ATTR_RW(_name) \
- INTEL_GT_RPS_SYSFS_ATTR(_name, 0644, _name##_show, _name##_store)
+INTEL_GT_SYSFS_SHOW_MAX(act_freq_mhz);
+INTEL_GT_SYSFS_SHOW_MAX(boost_freq_mhz);
+INTEL_GT_SYSFS_SHOW_MAX(cur_freq_mhz);
+INTEL_GT_SYSFS_SHOW_MAX(RP0_freq_mhz);
+INTEL_GT_SYSFS_SHOW_MAX(RP1_freq_mhz);
+INTEL_GT_SYSFS_SHOW_MAX(RPn_freq_mhz);
+INTEL_GT_SYSFS_SHOW_MAX(max_freq_mhz);
+INTEL_GT_SYSFS_SHOW_MIN(min_freq_mhz);
+INTEL_GT_SYSFS_SHOW_MAX(vlv_rpe_freq_mhz);
+INTEL_GT_SYSFS_STORE(boost_freq_mhz, __boost_freq_mhz_store);
+INTEL_GT_SYSFS_STORE(max_freq_mhz, __set_max_freq);
+INTEL_GT_SYSFS_STORE(min_freq_mhz, __set_min_freq);
+
+#define INTEL_GT_RPS_SYSFS_ATTR(_name, _mode, _show, _store, _show_dev, _store_dev) \
+ static struct device_attribute dev_attr_gt_##_name = __ATTR(gt_##_name, _mode, \
+ _show_dev, _store_dev); \
+ static struct kobj_attribute attr_rps_##_name = __ATTR(rps_##_name, _mode, \
+ _show, _store)
+
+#define INTEL_GT_RPS_SYSFS_ATTR_RO(_name) \
+ INTEL_GT_RPS_SYSFS_ATTR(_name, 0444, _name##_show, NULL, \
+ _name##_dev_show, NULL)
+#define INTEL_GT_RPS_SYSFS_ATTR_RW(_name) \
+ INTEL_GT_RPS_SYSFS_ATTR(_name, 0644, _name##_show, _name##_store, \
+ _name##_dev_show, _name##_dev_store)
/* The below macros generate static structures */
INTEL_GT_RPS_SYSFS_ATTR_RO(act_freq_mhz);
@@ -475,32 +432,31 @@ INTEL_GT_RPS_SYSFS_ATTR_RO(RP1_freq_mhz);
INTEL_GT_RPS_SYSFS_ATTR_RO(RPn_freq_mhz);
INTEL_GT_RPS_SYSFS_ATTR_RW(max_freq_mhz);
INTEL_GT_RPS_SYSFS_ATTR_RW(min_freq_mhz);
-
-static DEVICE_ATTR_RO(vlv_rpe_freq_mhz);
-
-#define GEN6_ATTR(s) { \
- &dev_attr_##s##_act_freq_mhz.attr, \
- &dev_attr_##s##_cur_freq_mhz.attr, \
- &dev_attr_##s##_boost_freq_mhz.attr, \
- &dev_attr_##s##_max_freq_mhz.attr, \
- &dev_attr_##s##_min_freq_mhz.attr, \
- &dev_attr_##s##_RP0_freq_mhz.attr, \
- &dev_attr_##s##_RP1_freq_mhz.attr, \
- &dev_attr_##s##_RPn_freq_mhz.attr, \
+INTEL_GT_RPS_SYSFS_ATTR_RO(vlv_rpe_freq_mhz);
+
+#define GEN6_ATTR(p, s) { \
+ &p##attr_##s##_act_freq_mhz.attr, \
+ &p##attr_##s##_cur_freq_mhz.attr, \
+ &p##attr_##s##_boost_freq_mhz.attr, \
+ &p##attr_##s##_max_freq_mhz.attr, \
+ &p##attr_##s##_min_freq_mhz.attr, \
+ &p##attr_##s##_RP0_freq_mhz.attr, \
+ &p##attr_##s##_RP1_freq_mhz.attr, \
+ &p##attr_##s##_RPn_freq_mhz.attr, \
NULL, \
}
-#define GEN6_RPS_ATTR GEN6_ATTR(rps)
-#define GEN6_GT_ATTR GEN6_ATTR(gt)
+#define GEN6_RPS_ATTR GEN6_ATTR(, rps)
+#define GEN6_GT_ATTR GEN6_ATTR(dev_, gt)
static const struct attribute * const gen6_rps_attrs[] = GEN6_RPS_ATTR;
static const struct attribute * const gen6_gt_attrs[] = GEN6_GT_ATTR;
-static ssize_t punit_req_freq_mhz_show(struct device *dev,
- struct device_attribute *attr,
+static ssize_t punit_req_freq_mhz_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
char *buff)
{
- struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
+ struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
u32 preq = intel_rps_read_punit_req_frequency(&gt->rps);
return sysfs_emit(buff, "%u\n", preq);
@@ -508,20 +464,20 @@ static ssize_t punit_req_freq_mhz_show(struct device *dev,
struct intel_gt_bool_throttle_attr {
struct attribute attr;
- ssize_t (*show)(struct device *dev, struct device_attribute *attr,
+ ssize_t (*show)(struct kobject *kobj, struct kobj_attribute *attr,
char *buf);
- i915_reg_t reg32;
+ i915_reg_t (*reg32)(struct intel_gt *gt);
u32 mask;
};
-static ssize_t throttle_reason_bool_show(struct device *dev,
- struct device_attribute *attr,
+static ssize_t throttle_reason_bool_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
char *buff)
{
- struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
+ struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
struct intel_gt_bool_throttle_attr *t_attr =
(struct intel_gt_bool_throttle_attr *) attr;
- bool val = rps_read_mask_mmio(&gt->rps, t_attr->reg32, t_attr->mask);
+ bool val = rps_read_mask_mmio(&gt->rps, t_attr->reg32(gt), t_attr->mask);
return sysfs_emit(buff, "%u\n", val);
}
@@ -530,11 +486,11 @@ static ssize_t throttle_reason_bool_show(struct device *dev,
struct intel_gt_bool_throttle_attr attr_##sysfs_func__ = { \
.attr = { .name = __stringify(sysfs_func__), .mode = 0444 }, \
.show = throttle_reason_bool_show, \
- .reg32 = GT0_PERF_LIMIT_REASONS, \
+ .reg32 = intel_gt_perf_limit_reasons_reg, \
.mask = mask__, \
}
-static DEVICE_ATTR_RO(punit_req_freq_mhz);
+INTEL_GT_ATTR_RO(punit_req_freq_mhz);
static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_status, GT0_PERF_LIMIT_REASONS_MASK);
static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_pl1, POWER_LIMIT_1_MASK);
static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_pl2, POWER_LIMIT_2_MASK);
@@ -597,8 +553,8 @@ static const struct attribute *throttle_reason_attrs[] = {
#define U8_8_VAL_MASK 0xffff
#define U8_8_SCALE_TO_VALUE "0.00390625"
-static ssize_t freq_factor_scale_show(struct device *dev,
- struct device_attribute *attr,
+static ssize_t freq_factor_scale_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
char *buff)
{
return sysfs_emit(buff, "%s\n", U8_8_SCALE_TO_VALUE);
@@ -610,11 +566,11 @@ static u32 media_ratio_mode_to_factor(u32 mode)
return !mode ? mode : 256 / mode;
}
-static ssize_t media_freq_factor_show(struct device *dev,
- struct device_attribute *attr,
+static ssize_t media_freq_factor_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
char *buff)
{
- struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
+ struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
struct intel_guc_slpc *slpc = &gt->uc.guc.slpc;
intel_wakeref_t wakeref;
u32 mode;
@@ -641,11 +597,11 @@ static ssize_t media_freq_factor_show(struct device *dev,
return sysfs_emit(buff, "%u\n", media_ratio_mode_to_factor(mode));
}
-static ssize_t media_freq_factor_store(struct device *dev,
- struct device_attribute *attr,
+static ssize_t media_freq_factor_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
const char *buff, size_t count)
{
- struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
+ struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
struct intel_guc_slpc *slpc = &gt->uc.guc.slpc;
u32 factor, mode;
int err;
@@ -670,11 +626,11 @@ static ssize_t media_freq_factor_store(struct device *dev,
return err ?: count;
}
-static ssize_t media_RP0_freq_mhz_show(struct device *dev,
- struct device_attribute *attr,
+static ssize_t media_RP0_freq_mhz_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
char *buff)
{
- struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
+ struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
u32 val;
int err;
@@ -691,11 +647,11 @@ static ssize_t media_RP0_freq_mhz_show(struct device *dev,
return sysfs_emit(buff, "%u\n", val);
}
-static ssize_t media_RPn_freq_mhz_show(struct device *dev,
- struct device_attribute *attr,
+static ssize_t media_RPn_freq_mhz_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
char *buff)
{
- struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
+ struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name);
u32 val;
int err;
@@ -712,17 +668,17 @@ static ssize_t media_RPn_freq_mhz_show(struct device *dev,
return sysfs_emit(buff, "%u\n", val);
}
-static DEVICE_ATTR_RW(media_freq_factor);
-static struct device_attribute dev_attr_media_freq_factor_scale =
+INTEL_GT_ATTR_RW(media_freq_factor);
+static struct kobj_attribute attr_media_freq_factor_scale =
__ATTR(media_freq_factor.scale, 0444, freq_factor_scale_show, NULL);
-static DEVICE_ATTR_RO(media_RP0_freq_mhz);
-static DEVICE_ATTR_RO(media_RPn_freq_mhz);
+INTEL_GT_ATTR_RO(media_RP0_freq_mhz);
+INTEL_GT_ATTR_RO(media_RPn_freq_mhz);
static const struct attribute *media_perf_power_attrs[] = {
- &dev_attr_media_freq_factor.attr,
- &dev_attr_media_freq_factor_scale.attr,
- &dev_attr_media_RP0_freq_mhz.attr,
- &dev_attr_media_RPn_freq_mhz.attr,
+ &attr_media_freq_factor.attr,
+ &attr_media_freq_factor_scale.attr,
+ &attr_media_RP0_freq_mhz.attr,
+ &attr_media_RPn_freq_mhz.attr,
NULL
};
@@ -754,20 +710,29 @@ static const struct attribute * const rps_defaults_attrs[] = {
NULL
};
-static int intel_sysfs_rps_init(struct intel_gt *gt, struct kobject *kobj,
- const struct attribute * const *attrs)
+static int intel_sysfs_rps_init(struct intel_gt *gt, struct kobject *kobj)
{
+ const struct attribute * const *attrs;
+ struct attribute *vlv_attr;
int ret;
if (GRAPHICS_VER(gt->i915) < 6)
return 0;
+ if (is_object_gt(kobj)) {
+ attrs = gen6_rps_attrs;
+ vlv_attr = &attr_rps_vlv_rpe_freq_mhz.attr;
+ } else {
+ attrs = gen6_gt_attrs;
+ vlv_attr = &dev_attr_gt_vlv_rpe_freq_mhz.attr;
+ }
+
ret = sysfs_create_files(kobj, attrs);
if (ret)
return ret;
if (IS_VALLEYVIEW(gt->i915) || IS_CHERRYVIEW(gt->i915))
- ret = sysfs_create_file(kobj, &dev_attr_vlv_rpe_freq_mhz.attr);
+ ret = sysfs_create_file(kobj, vlv_attr);
return ret;
}
@@ -778,9 +743,7 @@ void intel_gt_sysfs_pm_init(struct intel_gt *gt, struct kobject *kobj)
intel_sysfs_rc6_init(gt, kobj);
- ret = is_object_gt(kobj) ?
- intel_sysfs_rps_init(gt, kobj, gen6_rps_attrs) :
- intel_sysfs_rps_init(gt, kobj, gen6_gt_attrs);
+ ret = intel_sysfs_rps_init(gt, kobj);
if (ret)
drm_warn(&gt->i915->drm,
"failed to create gt%u RPS sysfs files (%pe)",
@@ -790,13 +753,13 @@ void intel_gt_sysfs_pm_init(struct intel_gt *gt, struct kobject *kobj)
if (!is_object_gt(kobj))
return;
- ret = sysfs_create_file(kobj, &dev_attr_punit_req_freq_mhz.attr);
+ ret = sysfs_create_file(kobj, &attr_punit_req_freq_mhz.attr);
if (ret)
drm_warn(&gt->i915->drm,
"failed to create gt%u punit_req_freq_mhz sysfs (%pe)",
gt->info.id, ERR_PTR(ret));
- if (GRAPHICS_VER(gt->i915) >= 11) {
+ if (i915_mmio_reg_valid(intel_gt_perf_limit_reasons_reg(gt))) {
ret = sysfs_create_files(kobj, throttle_reason_attrs);
if (ret)
drm_warn(&gt->i915->drm,
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index f19c2de77ff6..a0cc73b401ef 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -20,6 +20,7 @@
#include "intel_gsc.h"
#include "i915_vma.h"
+#include "i915_perf_types.h"
#include "intel_engine_types.h"
#include "intel_gt_buffer_pool_types.h"
#include "intel_hwconfig.h"
@@ -59,6 +60,9 @@ enum intel_steering_type {
L3BANK,
MSLICE,
LNCF,
+ GAM,
+ DSS,
+ OADDRM,
/*
* On some platforms there are multiple types of MCR registers that
@@ -141,20 +145,6 @@ struct intel_gt {
struct intel_wakeref wakeref;
atomic_t user_wakeref;
- /**
- * Protects access to lmem usefault list.
- * It is required, if we are outside of the runtime suspend path,
- * access to @lmem_userfault_list requires always first grabbing the
- * runtime pm, to ensure we can't race against runtime suspend.
- * Once we have that we also need to grab @lmem_userfault_lock,
- * at which point we have exclusive access.
- * The runtime suspend path is special since it doesn't really hold any locks,
- * but instead has exclusive access by virtue of all other accesses requiring
- * holding the runtime pm wakeref.
- */
- struct mutex lmem_userfault_lock;
- struct list_head lmem_userfault_list;
-
struct list_head closed_vma;
spinlock_t closed_lock; /* guards the list of closed_vma */
@@ -170,9 +160,6 @@ struct intel_gt {
*/
intel_wakeref_t awake;
- /* Manual runtime pm autosuspend delay for user GGTT/lmem mmaps */
- struct intel_wakeref_auto userfault_wakeref;
-
u32 clock_frequency;
u32 clock_period_ns;
@@ -286,6 +273,8 @@ struct intel_gt {
/* sysfs defaults per gt */
struct gt_defaults defaults;
struct kobject *sysfs_defaults;
+
+ struct i915_perf_gt perf;
};
struct intel_gt_definition {
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
index 2eaeba14319e..e82a9d763e57 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -15,6 +15,7 @@
#include "i915_trace.h"
#include "i915_utils.h"
#include "intel_gt.h"
+#include "intel_gt_mcr.h"
#include "intel_gt_regs.h"
#include "intel_gtt.h"
@@ -269,11 +270,7 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass)
memset64(vm->min_alignment, I915_GTT_MIN_ALIGNMENT,
ARRAY_SIZE(vm->min_alignment));
- if (HAS_64K_PAGES(vm->i915) && NEEDS_COMPACT_PT(vm->i915) &&
- subclass == VM_CLASS_PPGTT) {
- vm->min_alignment[INTEL_MEMORY_LOCAL] = I915_GTT_PAGE_SIZE_2M;
- vm->min_alignment[INTEL_MEMORY_STOLEN_LOCAL] = I915_GTT_PAGE_SIZE_2M;
- } else if (HAS_64K_PAGES(vm->i915)) {
+ if (HAS_64K_PAGES(vm->i915)) {
vm->min_alignment[INTEL_MEMORY_LOCAL] = I915_GTT_PAGE_SIZE_64K;
vm->min_alignment[INTEL_MEMORY_STOLEN_LOCAL] = I915_GTT_PAGE_SIZE_64K;
}
@@ -343,7 +340,8 @@ int setup_scratch_page(struct i915_address_space *vm)
*/
size = I915_GTT_PAGE_SIZE_4K;
if (i915_vm_is_4lvl(vm) &&
- HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K))
+ HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K) &&
+ !HAS_64K_PAGES(vm->i915))
size = I915_GTT_PAGE_SIZE_64K;
do {
@@ -385,18 +383,6 @@ skip:
if (size == I915_GTT_PAGE_SIZE_4K)
return -ENOMEM;
- /*
- * If we need 64K minimum GTT pages for device local-memory,
- * like on XEHPSDV, then we need to fail the allocation here,
- * otherwise we can't safely support the insertion of
- * local-memory pages for this vm, since the HW expects the
- * correct physical alignment and size when the page-table is
- * operating in 64K GTT mode, which includes any scratch PTEs,
- * since userspace can still touch them.
- */
- if (HAS_64K_PAGES(vm->i915))
- return -ENOMEM;
-
size = I915_GTT_PAGE_SIZE_4K;
} while (1);
}
@@ -493,6 +479,18 @@ static void tgl_setup_private_ppat(struct intel_uncore *uncore)
intel_uncore_write(uncore, GEN12_PAT_INDEX(7), GEN8_PPAT_WB);
}
+static void xehp_setup_private_ppat(struct intel_gt *gt)
+{
+ intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(0), GEN8_PPAT_WB);
+ intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(1), GEN8_PPAT_WC);
+ intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(2), GEN8_PPAT_WT);
+ intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(3), GEN8_PPAT_UC);
+ intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(4), GEN8_PPAT_WB);
+ intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(5), GEN8_PPAT_WB);
+ intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(6), GEN8_PPAT_WB);
+ intel_gt_mcr_multicast_write(gt, XEHP_PAT_INDEX(7), GEN8_PPAT_WB);
+}
+
static void icl_setup_private_ppat(struct intel_uncore *uncore)
{
intel_uncore_write(uncore,
@@ -585,13 +583,16 @@ static void chv_setup_private_ppat(struct intel_uncore *uncore)
intel_uncore_write(uncore, GEN8_PRIVATE_PAT_HI, upper_32_bits(pat));
}
-void setup_private_pat(struct intel_uncore *uncore)
+void setup_private_pat(struct intel_gt *gt)
{
- struct drm_i915_private *i915 = uncore->i915;
+ struct intel_uncore *uncore = gt->uncore;
+ struct drm_i915_private *i915 = gt->i915;
GEM_BUG_ON(GRAPHICS_VER(i915) < 8);
- if (GRAPHICS_VER(i915) >= 12)
+ if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
+ xehp_setup_private_ppat(gt);
+ else if (GRAPHICS_VER(i915) >= 12)
tgl_setup_private_ppat(uncore);
else if (GRAPHICS_VER(i915) >= 11)
icl_setup_private_ppat(uncore);
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index c0ca53cba9f0..4d75ba4bb41d 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -93,6 +93,7 @@ typedef u64 gen8_pte_t;
#define GEN12_GGTT_PTE_LM BIT_ULL(1)
#define GEN12_PDE_64K BIT(6)
+#define GEN12_PTE_PS64 BIT(8)
/*
* Cacheability Control is a 4-bit value. The low three bits are stored in bits
@@ -667,7 +668,7 @@ void ppgtt_unbind_vma(struct i915_address_space *vm,
void gtt_write_workarounds(struct intel_gt *gt);
-void setup_private_pat(struct intel_uncore *uncore);
+void setup_private_pat(struct intel_gt *gt);
int i915_vm_alloc_pt_stash(struct i915_address_space *vm,
struct i915_vm_pt_stash *stash,
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 3955292483a6..7771a19008c6 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -20,6 +20,30 @@
#include "intel_ring.h"
#include "shmem_utils.h"
+/*
+ * The per-platform tables are u8-encoded in @data. Decode @data and set the
+ * addresses' offset and commands in @regs. The following encoding is used
+ * for each byte. There are 2 steps: decoding commands and decoding addresses.
+ *
+ * Commands:
+ * [7]: create NOPs - number of NOPs are set in lower bits
+ * [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set
+ * MI_LRI_FORCE_POSTED
+ * [5:0]: Number of NOPs or registers to set values to in case of
+ * MI_LOAD_REGISTER_IMM
+ *
+ * Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count"
+ * number of registers. They are set by using the REG/REG16 macros: the former
+ * is used for offsets smaller than 0x200 while the latter is for values bigger
+ * than that. Those macros already set all the bits documented below correctly:
+ *
+ * [7]: When a register offset needs more than 6 bits, use additional bytes, to
+ * follow, for the lower bits
+ * [6:0]: Register offset, without considering the engine base.
+ *
+ * This function only tweaks the commands and register offsets. Values are not
+ * filled out.
+ */
static void set_offsets(u32 *regs,
const u8 *data,
const struct intel_engine_cs *engine,
@@ -264,6 +288,39 @@ static const u8 dg2_xcs_offsets[] = {
END
};
+static const u8 mtl_xcs_offsets[] = {
+ NOP(1),
+ LRI(13, POSTED),
+ REG16(0x244),
+ REG(0x034),
+ REG(0x030),
+ REG(0x038),
+ REG(0x03c),
+ REG(0x168),
+ REG(0x140),
+ REG(0x110),
+ REG(0x1c0),
+ REG(0x1c4),
+ REG(0x1c8),
+ REG(0x180),
+ REG16(0x2b4),
+ NOP(4),
+
+ NOP(1),
+ LRI(9, POSTED),
+ REG16(0x3a8),
+ REG16(0x28c),
+ REG16(0x288),
+ REG16(0x284),
+ REG16(0x280),
+ REG16(0x27c),
+ REG16(0x278),
+ REG16(0x274),
+ REG16(0x270),
+
+ END
+};
+
static const u8 gen8_rcs_offsets[] = {
NOP(1),
LRI(14, POSTED),
@@ -606,6 +663,49 @@ static const u8 dg2_rcs_offsets[] = {
END
};
+static const u8 mtl_rcs_offsets[] = {
+ NOP(1),
+ LRI(15, POSTED),
+ REG16(0x244),
+ REG(0x034),
+ REG(0x030),
+ REG(0x038),
+ REG(0x03c),
+ REG(0x168),
+ REG(0x140),
+ REG(0x110),
+ REG(0x1c0),
+ REG(0x1c4),
+ REG(0x1c8),
+ REG(0x180),
+ REG16(0x2b4),
+ REG(0x120),
+ REG(0x124),
+
+ NOP(1),
+ LRI(9, POSTED),
+ REG16(0x3a8),
+ REG16(0x28c),
+ REG16(0x288),
+ REG16(0x284),
+ REG16(0x280),
+ REG16(0x27c),
+ REG16(0x278),
+ REG16(0x274),
+ REG16(0x270),
+
+ NOP(2),
+ LRI(2, POSTED),
+ REG16(0x5a8),
+ REG16(0x5ac),
+
+ NOP(6),
+ LRI(1, 0),
+ REG(0x0c8),
+
+ END
+};
+
#undef END
#undef REG16
#undef REG
@@ -624,7 +724,9 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine)
!intel_engine_has_relative_mmio(engine));
if (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE) {
- if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
+ if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 70))
+ return mtl_rcs_offsets;
+ else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
return dg2_rcs_offsets;
else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
return xehp_rcs_offsets;
@@ -637,7 +739,9 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine)
else
return gen8_rcs_offsets;
} else {
- if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
+ if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 70))
+ return mtl_xcs_offsets;
+ else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
return dg2_xcs_offsets;
else if (GRAPHICS_VER(engine->i915) >= 12)
return gen12_xcs_offsets;
@@ -745,19 +849,18 @@ static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine)
static u32
lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine)
{
- switch (GRAPHICS_VER(engine->i915)) {
- default:
- MISSING_CASE(GRAPHICS_VER(engine->i915));
- fallthrough;
- case 12:
+ if (GRAPHICS_VER(engine->i915) >= 12)
return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
- case 11:
+ else if (GRAPHICS_VER(engine->i915) >= 11)
return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
- case 9:
+ else if (GRAPHICS_VER(engine->i915) >= 9)
return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
- case 8:
+ else if (GRAPHICS_VER(engine->i915) >= 8)
return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
- }
+
+ GEM_BUG_ON(GRAPHICS_VER(engine->i915) < 8);
+
+ return 0;
}
static void
@@ -1012,7 +1115,7 @@ __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine)
if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
context_size += I915_GTT_PAGE_SIZE; /* for redzone */
- if (GRAPHICS_VER(engine->i915) == 12) {
+ if (GRAPHICS_VER(engine->i915) >= 12) {
ce->wa_bb_page = context_size / PAGE_SIZE;
context_size += PAGE_SIZE;
}
@@ -1718,24 +1821,16 @@ void lrc_init_wa_ctx(struct intel_engine_cs *engine)
unsigned int i;
int err;
- if (!(engine->flags & I915_ENGINE_HAS_RCS_REG_STATE))
+ if (GRAPHICS_VER(engine->i915) >= 11 ||
+ !(engine->flags & I915_ENGINE_HAS_RCS_REG_STATE))
return;
- switch (GRAPHICS_VER(engine->i915)) {
- case 12:
- case 11:
- return;
- case 9:
+ if (GRAPHICS_VER(engine->i915) == 9) {
wa_bb_fn[0] = gen9_init_indirectctx_bb;
wa_bb_fn[1] = NULL;
- break;
- case 8:
+ } else if (GRAPHICS_VER(engine->i915) == 8) {
wa_bb_fn[0] = gen8_init_indirectctx_bb;
wa_bb_fn[1] = NULL;
- break;
- default:
- MISSING_CASE(GRAPHICS_VER(engine->i915));
- return;
}
err = lrc_create_wa_ctx(engine);
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
index a390f0813c8b..7111bae759f3 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
@@ -110,6 +110,8 @@ enum {
#define XEHP_SW_CTX_ID_WIDTH 16
#define XEHP_SW_COUNTER_SHIFT 58
#define XEHP_SW_COUNTER_WIDTH 6
+#define GEN12_GUC_SW_CTX_ID_SHIFT 39
+#define GEN12_GUC_SW_CTX_ID_WIDTH 16
static inline void lrc_runtime_start(struct intel_context *ce)
{
diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c
index aaaf1906026c..b405a04135ca 100644
--- a/drivers/gpu/drm/i915/gt/intel_migrate.c
+++ b/drivers/gpu/drm/i915/gt/intel_migrate.c
@@ -10,6 +10,7 @@
#include "intel_gtt.h"
#include "intel_migrate.h"
#include "intel_ring.h"
+#include "gem/i915_gem_lmem.h"
struct insert_pte_data {
u64 offset;
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 152244d7f62a..49fdd509527a 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -7,6 +7,7 @@
#include "intel_engine.h"
#include "intel_gt.h"
+#include "intel_gt_mcr.h"
#include "intel_gt_regs.h"
#include "intel_mocs.h"
#include "intel_ring.h"
@@ -609,14 +610,17 @@ static u32 l3cc_combine(u16 low, u16 high)
0; \
i++)
-static void init_l3cc_table(struct intel_uncore *uncore,
+static void init_l3cc_table(struct intel_gt *gt,
const struct drm_i915_mocs_table *table)
{
unsigned int i;
u32 l3cc;
for_each_l3cc(l3cc, table, i)
- intel_uncore_write_fw(uncore, GEN9_LNCFCMOCS(i), l3cc);
+ if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
+ intel_gt_mcr_multicast_write_fw(gt, XEHP_LNCFCMOCS(i), l3cc);
+ else
+ intel_uncore_write_fw(gt->uncore, GEN9_LNCFCMOCS(i), l3cc);
}
void intel_mocs_init_engine(struct intel_engine_cs *engine)
@@ -636,7 +640,7 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine)
init_mocs_table(engine, &table);
if (flags & HAS_RENDER_L3CC && engine->class == RENDER_CLASS)
- init_l3cc_table(engine->uncore, &table);
+ init_l3cc_table(engine->gt, &table);
}
static u32 global_mocs_offset(void)
@@ -672,7 +676,7 @@ void intel_mocs_init(struct intel_gt *gt)
* memory transactions including guc transactions
*/
if (flags & HAS_RENDER_L3CC)
- init_l3cc_table(gt->uncore, &table);
+ init_l3cc_table(gt, &table);
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index b36674356986..3159df6cdd49 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -1278,7 +1278,7 @@ static void intel_gt_reset_global(struct intel_gt *gt,
kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event);
/* Use a watchdog to ensure that our reset completes */
- intel_wedge_on_timeout(&w, gt, 5 * HZ) {
+ intel_wedge_on_timeout(&w, gt, 60 * HZ) {
intel_display_prepare_reset(gt->i915);
intel_gt_reset(gt, engine_mask, reason);
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c
index 6b86250c31ab..6c34a83c24b3 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -625,9 +625,7 @@ static void gen5_rps_disable(struct intel_rps *rps)
rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
/* Ack interrupts, disable EFC interrupt */
- intel_uncore_write(uncore, MEMINTREN,
- intel_uncore_read(uncore, MEMINTREN) &
- ~MEMINT_EVAL_CHG_EN);
+ intel_uncore_rmw(uncore, MEMINTREN, MEMINT_EVAL_CHG_EN, 0);
intel_uncore_write(uncore, MEMINTRSTS, MEMINT_EVAL_CHG);
/* Go back to the starting frequency */
@@ -1016,9 +1014,15 @@ void intel_rps_boost(struct i915_request *rq)
if (rps_uses_slpc(rps)) {
slpc = rps_to_slpc(rps);
+ if (slpc->min_freq_softlimit >= slpc->boost_freq)
+ return;
+
/* Return if old value is non zero */
- if (!atomic_fetch_inc(&slpc->num_waiters))
+ if (!atomic_fetch_inc(&slpc->num_waiters)) {
+ GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n",
+ rq->fence.context, rq->fence.seqno);
schedule_work(&slpc->boost_work);
+ }
return;
}
@@ -1085,15 +1089,25 @@ static u32 intel_rps_read_state_cap(struct intel_rps *rps)
return intel_uncore_read(uncore, GEN6_RP_STATE_CAP);
}
-/**
- * gen6_rps_get_freq_caps - Get freq caps exposed by HW
- * @rps: the intel_rps structure
- * @caps: returned freq caps
- *
- * Returned "caps" frequencies should be converted to MHz using
- * intel_gpu_freq()
- */
-void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps)
+static void
+mtl_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps)
+{
+ struct intel_uncore *uncore = rps_to_uncore(rps);
+ u32 rp_state_cap = rps_to_gt(rps)->type == GT_MEDIA ?
+ intel_uncore_read(uncore, MTL_MEDIAP_STATE_CAP) :
+ intel_uncore_read(uncore, MTL_RP_STATE_CAP);
+ u32 rpe = rps_to_gt(rps)->type == GT_MEDIA ?
+ intel_uncore_read(uncore, MTL_MPE_FREQUENCY) :
+ intel_uncore_read(uncore, MTL_GT_RPE_FREQUENCY);
+
+ /* MTL values are in units of 16.67 MHz */
+ caps->rp0_freq = REG_FIELD_GET(MTL_RP0_CAP_MASK, rp_state_cap);
+ caps->min_freq = REG_FIELD_GET(MTL_RPN_CAP_MASK, rp_state_cap);
+ caps->rp1_freq = REG_FIELD_GET(MTL_RPE_MASK, rpe);
+}
+
+static void
+__gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps)
{
struct drm_i915_private *i915 = rps_to_i915(rps);
u32 rp_state_cap;
@@ -1128,6 +1142,24 @@ void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *c
}
}
+/**
+ * gen6_rps_get_freq_caps - Get freq caps exposed by HW
+ * @rps: the intel_rps structure
+ * @caps: returned freq caps
+ *
+ * Returned "caps" frequencies should be converted to MHz using
+ * intel_gpu_freq()
+ */
+void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps)
+{
+ struct drm_i915_private *i915 = rps_to_i915(rps);
+
+ if (IS_METEORLAKE(i915))
+ return mtl_get_freq_caps(rps, caps);
+ else
+ return __gen6_rps_get_freq_caps(rps, caps);
+}
+
static void gen6_rps_init(struct intel_rps *rps)
{
struct drm_i915_private *i915 = rps_to_i915(rps);
@@ -2191,6 +2223,213 @@ u32 intel_rps_get_rpn_frequency(struct intel_rps *rps)
return intel_gpu_freq(rps, rps->min_freq);
}
+static void rps_frequency_dump(struct intel_rps *rps, struct drm_printer *p)
+{
+ struct intel_gt *gt = rps_to_gt(rps);
+ struct drm_i915_private *i915 = gt->i915;
+ struct intel_uncore *uncore = gt->uncore;
+ struct intel_rps_freq_caps caps;
+ u32 rp_state_limits;
+ u32 gt_perf_status;
+ u32 rpmodectl, rpinclimit, rpdeclimit;
+ u32 rpstat, cagf, reqf;
+ u32 rpcurupei, rpcurup, rpprevup;
+ u32 rpcurdownei, rpcurdown, rpprevdown;
+ u32 rpupei, rpupt, rpdownei, rpdownt;
+ u32 pm_ier, pm_imr, pm_isr, pm_iir, pm_mask;
+
+ rp_state_limits = intel_uncore_read(uncore, GEN6_RP_STATE_LIMITS);
+ gen6_rps_get_freq_caps(rps, &caps);
+ if (IS_GEN9_LP(i915))
+ gt_perf_status = intel_uncore_read(uncore, BXT_GT_PERF_STATUS);
+ else
+ gt_perf_status = intel_uncore_read(uncore, GEN6_GT_PERF_STATUS);
+
+ /* RPSTAT1 is in the GT power well */
+ intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+
+ reqf = intel_uncore_read(uncore, GEN6_RPNSWREQ);
+ if (GRAPHICS_VER(i915) >= 9) {
+ reqf >>= 23;
+ } else {
+ reqf &= ~GEN6_TURBO_DISABLE;
+ if (IS_HASWELL(i915) || IS_BROADWELL(i915))
+ reqf >>= 24;
+ else
+ reqf >>= 25;
+ }
+ reqf = intel_gpu_freq(rps, reqf);
+
+ rpmodectl = intel_uncore_read(uncore, GEN6_RP_CONTROL);
+ rpinclimit = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD);
+ rpdeclimit = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD);
+
+ rpstat = intel_uncore_read(uncore, GEN6_RPSTAT1);
+ rpcurupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & GEN6_CURICONT_MASK;
+ rpcurup = intel_uncore_read(uncore, GEN6_RP_CUR_UP) & GEN6_CURBSYTAVG_MASK;
+ rpprevup = intel_uncore_read(uncore, GEN6_RP_PREV_UP) & GEN6_CURBSYTAVG_MASK;
+ rpcurdownei = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN_EI) & GEN6_CURIAVG_MASK;
+ rpcurdown = intel_uncore_read(uncore, GEN6_RP_CUR_DOWN) & GEN6_CURBSYTAVG_MASK;
+ rpprevdown = intel_uncore_read(uncore, GEN6_RP_PREV_DOWN) & GEN6_CURBSYTAVG_MASK;
+
+ rpupei = intel_uncore_read(uncore, GEN6_RP_UP_EI);
+ rpupt = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD);
+
+ rpdownei = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
+ rpdownt = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD);
+
+ cagf = intel_rps_read_actual_frequency(rps);
+
+ intel_uncore_forcewake_put(uncore, FORCEWAKE_ALL);
+
+ if (GRAPHICS_VER(i915) >= 11) {
+ pm_ier = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_ENABLE);
+ pm_imr = intel_uncore_read(uncore, GEN11_GPM_WGBOXPERF_INTR_MASK);
+ /*
+ * The equivalent to the PM ISR & IIR cannot be read
+ * without affecting the current state of the system
+ */
+ pm_isr = 0;
+ pm_iir = 0;
+ } else if (GRAPHICS_VER(i915) >= 8) {
+ pm_ier = intel_uncore_read(uncore, GEN8_GT_IER(2));
+ pm_imr = intel_uncore_read(uncore, GEN8_GT_IMR(2));
+ pm_isr = intel_uncore_read(uncore, GEN8_GT_ISR(2));
+ pm_iir = intel_uncore_read(uncore, GEN8_GT_IIR(2));
+ } else {
+ pm_ier = intel_uncore_read(uncore, GEN6_PMIER);
+ pm_imr = intel_uncore_read(uncore, GEN6_PMIMR);
+ pm_isr = intel_uncore_read(uncore, GEN6_PMISR);
+ pm_iir = intel_uncore_read(uncore, GEN6_PMIIR);
+ }
+ pm_mask = intel_uncore_read(uncore, GEN6_PMINTRMSK);
+
+ drm_printf(p, "Video Turbo Mode: %s\n",
+ str_yes_no(rpmodectl & GEN6_RP_MEDIA_TURBO));
+ drm_printf(p, "HW control enabled: %s\n",
+ str_yes_no(rpmodectl & GEN6_RP_ENABLE));
+ drm_printf(p, "SW control enabled: %s\n",
+ str_yes_no((rpmodectl & GEN6_RP_MEDIA_MODE_MASK) == GEN6_RP_MEDIA_SW_MODE));
+
+ drm_printf(p, "PM IER=0x%08x IMR=0x%08x, MASK=0x%08x\n",
+ pm_ier, pm_imr, pm_mask);
+ if (GRAPHICS_VER(i915) <= 10)
+ drm_printf(p, "PM ISR=0x%08x IIR=0x%08x\n",
+ pm_isr, pm_iir);
+ drm_printf(p, "pm_intrmsk_mbz: 0x%08x\n",
+ rps->pm_intrmsk_mbz);
+ drm_printf(p, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status);
+ drm_printf(p, "Render p-state ratio: %d\n",
+ (gt_perf_status & (GRAPHICS_VER(i915) >= 9 ? 0x1ff00 : 0xff00)) >> 8);
+ drm_printf(p, "Render p-state VID: %d\n",
+ gt_perf_status & 0xff);
+ drm_printf(p, "Render p-state limit: %d\n",
+ rp_state_limits & 0xff);
+ drm_printf(p, "RPSTAT1: 0x%08x\n", rpstat);
+ drm_printf(p, "RPMODECTL: 0x%08x\n", rpmodectl);
+ drm_printf(p, "RPINCLIMIT: 0x%08x\n", rpinclimit);
+ drm_printf(p, "RPDECLIMIT: 0x%08x\n", rpdeclimit);
+ drm_printf(p, "RPNSWREQ: %dMHz\n", reqf);
+ drm_printf(p, "CAGF: %dMHz\n", cagf);
+ drm_printf(p, "RP CUR UP EI: %d (%lldns)\n",
+ rpcurupei,
+ intel_gt_pm_interval_to_ns(gt, rpcurupei));
+ drm_printf(p, "RP CUR UP: %d (%lldns)\n",
+ rpcurup, intel_gt_pm_interval_to_ns(gt, rpcurup));
+ drm_printf(p, "RP PREV UP: %d (%lldns)\n",
+ rpprevup, intel_gt_pm_interval_to_ns(gt, rpprevup));
+ drm_printf(p, "Up threshold: %d%%\n",
+ rps->power.up_threshold);
+ drm_printf(p, "RP UP EI: %d (%lldns)\n",
+ rpupei, intel_gt_pm_interval_to_ns(gt, rpupei));
+ drm_printf(p, "RP UP THRESHOLD: %d (%lldns)\n",
+ rpupt, intel_gt_pm_interval_to_ns(gt, rpupt));
+
+ drm_printf(p, "RP CUR DOWN EI: %d (%lldns)\n",
+ rpcurdownei,
+ intel_gt_pm_interval_to_ns(gt, rpcurdownei));
+ drm_printf(p, "RP CUR DOWN: %d (%lldns)\n",
+ rpcurdown,
+ intel_gt_pm_interval_to_ns(gt, rpcurdown));
+ drm_printf(p, "RP PREV DOWN: %d (%lldns)\n",
+ rpprevdown,
+ intel_gt_pm_interval_to_ns(gt, rpprevdown));
+ drm_printf(p, "Down threshold: %d%%\n",
+ rps->power.down_threshold);
+ drm_printf(p, "RP DOWN EI: %d (%lldns)\n",
+ rpdownei, intel_gt_pm_interval_to_ns(gt, rpdownei));
+ drm_printf(p, "RP DOWN THRESHOLD: %d (%lldns)\n",
+ rpdownt, intel_gt_pm_interval_to_ns(gt, rpdownt));
+
+ drm_printf(p, "Lowest (RPN) frequency: %dMHz\n",
+ intel_gpu_freq(rps, caps.min_freq));
+ drm_printf(p, "Nominal (RP1) frequency: %dMHz\n",
+ intel_gpu_freq(rps, caps.rp1_freq));
+ drm_printf(p, "Max non-overclocked (RP0) frequency: %dMHz\n",
+ intel_gpu_freq(rps, caps.rp0_freq));
+ drm_printf(p, "Max overclocked frequency: %dMHz\n",
+ intel_gpu_freq(rps, rps->max_freq));
+
+ drm_printf(p, "Current freq: %d MHz\n",
+ intel_gpu_freq(rps, rps->cur_freq));
+ drm_printf(p, "Actual freq: %d MHz\n", cagf);
+ drm_printf(p, "Idle freq: %d MHz\n",
+ intel_gpu_freq(rps, rps->idle_freq));
+ drm_printf(p, "Min freq: %d MHz\n",
+ intel_gpu_freq(rps, rps->min_freq));
+ drm_printf(p, "Boost freq: %d MHz\n",
+ intel_gpu_freq(rps, rps->boost_freq));
+ drm_printf(p, "Max freq: %d MHz\n",
+ intel_gpu_freq(rps, rps->max_freq));
+ drm_printf(p,
+ "efficient (RPe) frequency: %d MHz\n",
+ intel_gpu_freq(rps, rps->efficient_freq));
+}
+
+static void slpc_frequency_dump(struct intel_rps *rps, struct drm_printer *p)
+{
+ struct intel_gt *gt = rps_to_gt(rps);
+ struct intel_uncore *uncore = gt->uncore;
+ struct intel_rps_freq_caps caps;
+ u32 pm_mask;
+
+ gen6_rps_get_freq_caps(rps, &caps);
+ pm_mask = intel_uncore_read(uncore, GEN6_PMINTRMSK);
+
+ drm_printf(p, "PM MASK=0x%08x\n", pm_mask);
+ drm_printf(p, "pm_intrmsk_mbz: 0x%08x\n",
+ rps->pm_intrmsk_mbz);
+ drm_printf(p, "RPSTAT1: 0x%08x\n", intel_uncore_read(uncore, GEN6_RPSTAT1));
+ drm_printf(p, "RPNSWREQ: %dMHz\n", intel_rps_get_requested_frequency(rps));
+ drm_printf(p, "Lowest (RPN) frequency: %dMHz\n",
+ intel_gpu_freq(rps, caps.min_freq));
+ drm_printf(p, "Nominal (RP1) frequency: %dMHz\n",
+ intel_gpu_freq(rps, caps.rp1_freq));
+ drm_printf(p, "Max non-overclocked (RP0) frequency: %dMHz\n",
+ intel_gpu_freq(rps, caps.rp0_freq));
+ drm_printf(p, "Current freq: %d MHz\n",
+ intel_rps_get_requested_frequency(rps));
+ drm_printf(p, "Actual freq: %d MHz\n",
+ intel_rps_read_actual_frequency(rps));
+ drm_printf(p, "Min freq: %d MHz\n",
+ intel_rps_get_min_frequency(rps));
+ drm_printf(p, "Boost freq: %d MHz\n",
+ intel_rps_get_boost_frequency(rps));
+ drm_printf(p, "Max freq: %d MHz\n",
+ intel_rps_get_max_frequency(rps));
+ drm_printf(p,
+ "efficient (RPe) frequency: %d MHz\n",
+ intel_gpu_freq(rps, caps.rp1_freq));
+}
+
+void gen6_rps_frequency_dump(struct intel_rps *rps, struct drm_printer *p)
+{
+ if (rps_uses_slpc(rps))
+ return slpc_frequency_dump(rps, p);
+ else
+ return rps_frequency_dump(rps, p);
+}
+
static int set_max_freq(struct intel_rps *rps, u32 val)
{
struct drm_i915_private *i915 = rps_to_i915(rps);
diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h
index 4509dfdc52e0..110300dfd438 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.h
+++ b/drivers/gpu/drm/i915/gt/intel_rps.h
@@ -10,6 +10,7 @@
#include "i915_reg_defs.h"
struct i915_request;
+struct drm_printer;
void intel_rps_init_early(struct intel_rps *rps);
void intel_rps_init(struct intel_rps *rps);
@@ -54,6 +55,8 @@ void intel_rps_lower_unslice(struct intel_rps *rps);
u32 intel_rps_read_throttle_reason(struct intel_rps *rps);
bool rps_read_mask_mmio(struct intel_rps *rps, i915_reg_t reg32, u32 mask);
+void gen6_rps_frequency_dump(struct intel_rps *rps, struct drm_printer *p);
+
void gen5_rps_irq_handler(struct intel_rps *rps);
void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir);
void gen11_rps_irq_handler(struct intel_rps *rps, u32 pm_iir);
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
index 66f21c735d54..6c6198a257ac 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
@@ -677,8 +677,8 @@ u32 intel_sseu_make_rpcs(struct intel_gt *gt,
* If i915/perf is active, we want a stable powergating configuration
* on the system. Use the configuration pinned by i915/perf.
*/
- if (i915->perf.exclusive_stream)
- req_sseu = &i915->perf.sseu;
+ if (gt->perf.exclusive_stream)
+ req_sseu = &gt->perf.sseu;
slices = hweight8(req_sseu->slice_mask);
subslices = hweight8(req_sseu->subslice_mask);
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index a821e3d405db..3cdf5c24dbc5 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -166,6 +166,21 @@ static void wa_add(struct i915_wa_list *wal, i915_reg_t reg,
_wa_add(wal, &wa);
}
+static void wa_mcr_add(struct i915_wa_list *wal, i915_mcr_reg_t reg,
+ u32 clear, u32 set, u32 read_mask, bool masked_reg)
+{
+ struct i915_wa wa = {
+ .mcr_reg = reg,
+ .clr = clear,
+ .set = set,
+ .read = read_mask,
+ .masked_reg = masked_reg,
+ .is_mcr = 1,
+ };
+
+ _wa_add(wal, &wa);
+}
+
static void
wa_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set)
{
@@ -173,6 +188,12 @@ wa_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set)
}
static void
+wa_mcr_write_clr_set(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 clear, u32 set)
+{
+ wa_mcr_add(wal, reg, clear, set, clear, false);
+}
+
+static void
wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
{
wa_write_clr_set(wal, reg, ~0, set);
@@ -185,11 +206,23 @@ wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
}
static void
+wa_mcr_write_or(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 set)
+{
+ wa_mcr_write_clr_set(wal, reg, set, set);
+}
+
+static void
wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr)
{
wa_write_clr_set(wal, reg, clr, 0);
}
+static void
+wa_mcr_write_clr(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 clr)
+{
+ wa_mcr_write_clr_set(wal, reg, clr, 0);
+}
+
/*
* WA operations on "masked register". A masked register has the upper 16 bits
* documented as "masked" in b-spec. Its purpose is to allow writing to just a
@@ -208,18 +241,37 @@ wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
}
static void
+wa_mcr_masked_en(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 val)
+{
+ wa_mcr_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val, true);
+}
+
+static void
wa_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
{
wa_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true);
}
static void
+wa_mcr_masked_dis(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 val)
+{
+ wa_mcr_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true);
+}
+
+static void
wa_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg,
u32 mask, u32 val)
{
wa_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true);
}
+static void
+wa_mcr_masked_field_set(struct i915_wa_list *wal, i915_mcr_reg_t reg,
+ u32 mask, u32 val)
+{
+ wa_mcr_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true);
+}
+
static void gen6_ctx_workarounds_init(struct intel_engine_cs *engine,
struct i915_wa_list *wal)
{
@@ -241,8 +293,8 @@ static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine,
wa_masked_en(wal, RING_MI_MODE(RENDER_RING_BASE), ASYNC_FLIP_PERF_DISABLE);
/* WaDisablePartialInstShootdown:bdw,chv */
- wa_masked_en(wal, GEN8_ROW_CHICKEN,
- PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
+ wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
+ PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
/* Use Force Non-Coherent whenever executing a 3D context. This is a
* workaround for a possible hang in the unlikely event a TLB
@@ -288,18 +340,18 @@ static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine,
gen8_ctx_workarounds_init(engine, wal);
/* WaDisableThreadStallDopClockGating:bdw (pre-production) */
- wa_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
+ wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
/* WaDisableDopClockGating:bdw
*
* Also see the related UCGTCL1 write in bdw_init_clock_gating()
* to disable EUTC clock gating.
*/
- wa_masked_en(wal, GEN7_ROW_CHICKEN2,
- DOP_CLOCK_GATING_DISABLE);
+ wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
+ DOP_CLOCK_GATING_DISABLE);
- wa_masked_en(wal, HALF_SLICE_CHICKEN3,
- GEN8_SAMPLER_POWER_BYPASS_DIS);
+ wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3,
+ GEN8_SAMPLER_POWER_BYPASS_DIS);
wa_masked_en(wal, HDC_CHICKEN0,
/* WaForceContextSaveRestoreNonCoherent:bdw */
@@ -314,7 +366,7 @@ static void chv_ctx_workarounds_init(struct intel_engine_cs *engine,
gen8_ctx_workarounds_init(engine, wal);
/* WaDisableThreadStallDopClockGating:chv */
- wa_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
+ wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
/* Improve HiZ throughput on CHV. */
wa_masked_en(wal, HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
@@ -333,21 +385,21 @@ static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
*/
wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
GEN9_PBE_COMPRESSED_HASH_SELECTION);
- wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
- GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
+ wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
+ GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
}
/* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
/* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
- wa_masked_en(wal, GEN8_ROW_CHICKEN,
- FLOW_CONTROL_ENABLE |
- PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
+ wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
+ FLOW_CONTROL_ENABLE |
+ PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
/* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
/* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
- wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
- GEN9_ENABLE_YV12_BUGFIX |
- GEN9_ENABLE_GPGPU_PREEMPTION);
+ wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
+ GEN9_ENABLE_YV12_BUGFIX |
+ GEN9_ENABLE_GPGPU_PREEMPTION);
/* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
/* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
@@ -356,8 +408,8 @@ static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);
/* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
- wa_masked_dis(wal, GEN9_HALF_SLICE_CHICKEN5,
- GEN9_CCS_TLB_PREFETCH_ENABLE);
+ wa_mcr_masked_dis(wal, GEN9_HALF_SLICE_CHICKEN5,
+ GEN9_CCS_TLB_PREFETCH_ENABLE);
/* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
wa_masked_en(wal, HDC_CHICKEN0,
@@ -386,11 +438,11 @@ static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
IS_KABYLAKE(i915) ||
IS_COFFEELAKE(i915) ||
IS_COMETLAKE(i915))
- wa_masked_en(wal, HALF_SLICE_CHICKEN3,
- GEN8_SAMPLER_POWER_BYPASS_DIS);
+ wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3,
+ GEN8_SAMPLER_POWER_BYPASS_DIS);
/* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
- wa_masked_en(wal, HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
+ wa_mcr_masked_en(wal, HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
/*
* Supporting preemption with fine-granularity requires changes in the
@@ -469,8 +521,8 @@ static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine,
gen9_ctx_workarounds_init(engine, wal);
/* WaDisableThreadStallDopClockGating:bxt */
- wa_masked_en(wal, GEN8_ROW_CHICKEN,
- STALL_DOP_GATING_DISABLE);
+ wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
+ STALL_DOP_GATING_DISABLE);
/* WaToEnableHwFixForPushConstHWBug:bxt */
wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
@@ -490,8 +542,8 @@ static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine,
GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
/* WaDisableSbeCacheDispatchPortSharing:kbl */
- wa_masked_en(wal, GEN7_HALF_SLICE_CHICKEN1,
- GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
+ wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
+ GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
}
static void glk_ctx_workarounds_init(struct intel_engine_cs *engine,
@@ -514,8 +566,8 @@ static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine,
GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
/* WaDisableSbeCacheDispatchPortSharing:cfl */
- wa_masked_en(wal, GEN7_HALF_SLICE_CHICKEN1,
- GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
+ wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
+ GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
}
static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
@@ -534,13 +586,13 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
* (the register is whitelisted in hardware now, so UMDs can opt in
* for coherency if they have a good reason).
*/
- wa_masked_en(wal, ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
+ wa_mcr_masked_en(wal, ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
/* WaEnableFloatBlendOptimization:icl */
- wa_add(wal, GEN10_CACHE_MODE_SS, 0,
- _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE),
- 0 /* write-only, so skip validation */,
- true);
+ wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
+ _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE),
+ 0 /* write-only, so skip validation */,
+ true);
/* WaDisableGPGPUMidThreadPreemption:icl */
wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
@@ -548,8 +600,8 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
/* allow headerless messages for preemptible GPGPU context */
- wa_masked_en(wal, GEN10_SAMPLER_MODE,
- GEN11_SAMPLER_ENABLE_HEADLESS_MSG);
+ wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE,
+ GEN11_SAMPLER_ENABLE_HEADLESS_MSG);
/* Wa_1604278689:icl,ehl */
wa_write(wal, IVB_FBC_RT_BASE, 0xFFFFFFFF & ~ILK_FBC_RT_VALID);
@@ -558,7 +610,7 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
0xFFFFFFFF);
/* Wa_1406306137:icl,ehl */
- wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU);
+ wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU);
}
/*
@@ -569,13 +621,13 @@ static void dg2_ctx_gt_tuning_init(struct intel_engine_cs *engine,
struct i915_wa_list *wal)
{
wa_masked_en(wal, CHICKEN_RASTER_2, TBIMR_FAST_CLIP);
- wa_write_clr_set(wal, GEN11_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
- REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f));
- wa_add(wal,
- FF_MODE2,
- FF_MODE2_TDS_TIMER_MASK,
- FF_MODE2_TDS_TIMER_128,
- 0, false);
+ wa_mcr_write_clr_set(wal, XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
+ REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f));
+ wa_mcr_add(wal,
+ XEHP_FF_MODE2,
+ FF_MODE2_TDS_TIMER_MASK,
+ FF_MODE2_TDS_TIMER_128,
+ 0, false);
}
/*
@@ -599,7 +651,7 @@ static void gen12_ctx_gt_tuning_init(struct intel_engine_cs *engine,
* verification is ignored.
*/
wa_add(wal,
- FF_MODE2,
+ GEN12_FF_MODE2,
FF_MODE2_TDS_TIMER_MASK,
FF_MODE2_TDS_TIMER_128,
0, false);
@@ -608,6 +660,8 @@ static void gen12_ctx_gt_tuning_init(struct intel_engine_cs *engine,
static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine,
struct i915_wa_list *wal)
{
+ struct drm_i915_private *i915 = engine->i915;
+
gen12_ctx_gt_tuning_init(engine, wal);
/*
@@ -637,10 +691,14 @@ static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine,
* to Wa_1608008084.
*/
wa_add(wal,
- FF_MODE2,
+ GEN12_FF_MODE2,
FF_MODE2_GS_TIMER_MASK,
FF_MODE2_GS_TIMER_224,
0, false);
+
+ if (!IS_DG1(i915))
+ /* Wa_1806527549 */
+ wa_masked_en(wal, HIZ_CHICKEN, HZ_DEPTH_TEST_LE_GE_OPT_DISABLE);
}
static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine,
@@ -664,27 +722,27 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine,
/* Wa_16011186671:dg2_g11 */
if (IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) {
- wa_masked_dis(wal, VFLSKPD, DIS_MULT_MISS_RD_SQUASH);
- wa_masked_en(wal, VFLSKPD, DIS_OVER_FETCH_CACHE);
+ wa_mcr_masked_dis(wal, VFLSKPD, DIS_MULT_MISS_RD_SQUASH);
+ wa_mcr_masked_en(wal, VFLSKPD, DIS_OVER_FETCH_CACHE);
}
if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) {
/* Wa_14010469329:dg2_g10 */
- wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3,
- XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE);
+ wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3,
+ XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE);
/*
* Wa_22010465075:dg2_g10
* Wa_22010613112:dg2_g10
* Wa_14010698770:dg2_g10
*/
- wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3,
- GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
+ wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3,
+ GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
}
/* Wa_16013271637:dg2 */
- wa_masked_en(wal, SLICE_COMMON_ECO_CHICKEN1,
- MSC_MSAA_REODER_BUF_BYPASS_DISABLE);
+ wa_mcr_masked_en(wal, XEHP_SLICE_COMMON_ECO_CHICKEN1,
+ MSC_MSAA_REODER_BUF_BYPASS_DISABLE);
/* Wa_14014947963:dg2 */
if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_FOREVER) ||
@@ -1076,18 +1134,23 @@ static void __set_mcr_steering(struct i915_wa_list *wal,
wa_write_clr_set(wal, steering_reg, mcr_mask, mcr);
}
-static void __add_mcr_wa(struct intel_gt *gt, struct i915_wa_list *wal,
- unsigned int slice, unsigned int subslice)
+static void debug_dump_steering(struct intel_gt *gt)
{
struct drm_printer p = drm_debug_printer("MCR Steering:");
+ if (drm_debug_enabled(DRM_UT_DRIVER))
+ intel_gt_mcr_report_steering(&p, gt, false);
+}
+
+static void __add_mcr_wa(struct intel_gt *gt, struct i915_wa_list *wal,
+ unsigned int slice, unsigned int subslice)
+{
__set_mcr_steering(wal, GEN8_MCR_SELECTOR, slice, subslice);
gt->default_steering.groupid = slice;
gt->default_steering.instanceid = subslice;
- if (drm_debug_enabled(DRM_UT_DRIVER))
- intel_gt_mcr_report_steering(&p, gt, false);
+ debug_dump_steering(gt);
}
static void
@@ -1181,6 +1244,9 @@ xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
gt->steering_table[MSLICE] = NULL;
}
+ if (IS_XEHPSDV(gt->i915) && slice_mask & BIT(0))
+ gt->steering_table[GAM] = NULL;
+
slice = __ffs(slice_mask);
subslice = intel_sseu_find_first_xehp_dss(sseu, GEN_DSS_PER_GSLICE, slice) %
GEN_DSS_PER_GSLICE;
@@ -1198,6 +1264,13 @@ xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
*/
__set_mcr_steering(wal, MCFG_MCR_SELECTOR, 0, 2);
__set_mcr_steering(wal, SF_MCR_SELECTOR, 0, 2);
+
+ /*
+ * On DG2, GAM registers have a dedicated steering control register
+ * and must always be programmed to a hardcoded groupid of "1."
+ */
+ if (IS_DG2(gt->i915))
+ __set_mcr_steering(wal, GAM_MCR_SELECTOR, 1, 0);
}
static void
@@ -1254,22 +1327,22 @@ icl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
PSDUNIT_CLKGATE_DIS);
/* Wa_1406680159:icl,ehl */
- wa_write_or(wal,
- SUBSLICE_UNIT_LEVEL_CLKGATE,
- GWUNIT_CLKGATE_DIS);
+ wa_mcr_write_or(wal,
+ GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE,
+ GWUNIT_CLKGATE_DIS);
/* Wa_1607087056:icl,ehl,jsl */
if (IS_ICELAKE(i915) ||
IS_JSL_EHL_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
wa_write_or(wal,
- SLICE_UNIT_LEVEL_CLKGATE,
+ GEN11_SLICE_UNIT_LEVEL_CLKGATE,
L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
/*
* This is not a documented workaround, but rather an optimization
* to reduce sampler power.
*/
- wa_write_clr(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE);
+ wa_mcr_write_clr(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE);
}
/*
@@ -1303,7 +1376,7 @@ gen12_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
wa_14011060649(gt, wal);
/* Wa_14011059788:tgl,rkl,adl-s,dg1,adl-p */
- wa_write_or(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE);
+ wa_mcr_write_or(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE);
}
static void
@@ -1315,14 +1388,14 @@ tgl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
/* Wa_1409420604:tgl */
if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
- wa_write_or(wal,
- SUBSLICE_UNIT_LEVEL_CLKGATE2,
- CPSSUNIT_CLKGATE_DIS);
+ wa_mcr_write_or(wal,
+ SUBSLICE_UNIT_LEVEL_CLKGATE2,
+ CPSSUNIT_CLKGATE_DIS);
/* Wa_1607087056:tgl also know as BUG:1409180338 */
if (IS_TGL_UY_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
wa_write_or(wal,
- SLICE_UNIT_LEVEL_CLKGATE,
+ GEN11_SLICE_UNIT_LEVEL_CLKGATE,
L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
/* Wa_1408615072:tgl[a0] */
@@ -1341,14 +1414,14 @@ dg1_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
/* Wa_1607087056:dg1 */
if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
wa_write_or(wal,
- SLICE_UNIT_LEVEL_CLKGATE,
+ GEN11_SLICE_UNIT_LEVEL_CLKGATE,
L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
/* Wa_1409420604:dg1 */
if (IS_DG1(i915))
- wa_write_or(wal,
- SUBSLICE_UNIT_LEVEL_CLKGATE2,
- CPSSUNIT_CLKGATE_DIS);
+ wa_mcr_write_or(wal,
+ SUBSLICE_UNIT_LEVEL_CLKGATE2,
+ CPSSUNIT_CLKGATE_DIS);
/* Wa_1408615072:dg1 */
/* Empirical testing shows this register is unaffected by engine reset. */
@@ -1365,7 +1438,7 @@ xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
xehp_init_mcr(gt, wal);
/* Wa_1409757795:xehpsdv */
- wa_write_or(wal, SCCGCTL94DC, CG3DDISURB);
+ wa_mcr_write_or(wal, SCCGCTL94DC, CG3DDISURB);
/* Wa_16011155590:xehpsdv */
if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A0, STEP_B0))
@@ -1445,8 +1518,8 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
CG3DDISCFEG_CLKGATE_DIS);
/* Wa_14011006942:dg2 */
- wa_write_or(wal, SUBSLICE_UNIT_LEVEL_CLKGATE,
- DSS_ROUTER_CLKGATE_DIS);
+ wa_mcr_write_or(wal, GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE,
+ DSS_ROUTER_CLKGATE_DIS);
}
if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) {
@@ -1457,7 +1530,7 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
wa_write_or(wal, UNSLCGCTL9444, LTCDD_CLKGATE_DIS);
/* Wa_14011371254:dg2_g10 */
- wa_write_or(wal, SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS);
+ wa_mcr_write_or(wal, XEHP_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS);
/* Wa_14011431319:dg2_g10 */
wa_write_or(wal, UNSLCGCTL9440, GAMTLBOACS_CLKGATE_DIS |
@@ -1493,21 +1566,21 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
GAMEDIA_CLKGATE_DIS);
/* Wa_14011028019:dg2_g10 */
- wa_write_or(wal, SSMCGCTL9530, RTFUNIT_CLKGATE_DIS);
+ wa_mcr_write_or(wal, SSMCGCTL9530, RTFUNIT_CLKGATE_DIS);
}
/* Wa_14014830051:dg2 */
- wa_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN);
+ wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN);
/*
* The following are not actually "workarounds" but rather
* recommended tuning settings documented in the bspec's
* performance guide section.
*/
- wa_write_or(wal, GEN12_SQCM, EN_32B_ACCESS);
+ wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS);
/* Wa_14015795083 */
- wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
+ wa_mcr_write_clr(wal, GEN8_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
}
static void
@@ -1516,7 +1589,27 @@ pvc_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
pvc_init_mcr(gt, wal);
/* Wa_14015795083 */
- wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
+ wa_mcr_write_clr(wal, GEN8_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
+}
+
+static void
+xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
+{
+ /* FIXME: Actual workarounds will be added in future patch(es) */
+
+ /*
+ * Unlike older platforms, we no longer setup implicit steering here;
+ * all MCR accesses are explicitly steered.
+ */
+ debug_dump_steering(gt);
+}
+
+static void
+xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
+{
+ /* FIXME: Actual workarounds will be added in future patch(es) */
+
+ debug_dump_steering(gt);
}
static void
@@ -1524,7 +1617,18 @@ gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal)
{
struct drm_i915_private *i915 = gt->i915;
- if (IS_PONTEVECCHIO(i915))
+ if (gt->type == GT_MEDIA) {
+ if (MEDIA_VER(i915) >= 13)
+ xelpmp_gt_workarounds_init(gt, wal);
+ else
+ MISSING_CASE(MEDIA_VER(i915));
+
+ return;
+ }
+
+ if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
+ xelpg_gt_workarounds_init(gt, wal);
+ else if (IS_PONTEVECCHIO(i915))
pvc_gt_workarounds_init(gt, wal);
else if (IS_DG2(i915))
dg2_gt_workarounds_init(gt, wal);
@@ -1628,14 +1732,25 @@ wa_list_apply(struct intel_gt *gt, const struct i915_wa_list *wal)
u32 val, old = 0;
/* open-coded rmw due to steering */
- old = wa->clr ? intel_gt_mcr_read_any_fw(gt, wa->reg) : 0;
+ if (wa->clr)
+ old = wa->is_mcr ?
+ intel_gt_mcr_read_any_fw(gt, wa->mcr_reg) :
+ intel_uncore_read_fw(uncore, wa->reg);
val = (old & ~wa->clr) | wa->set;
- if (val != old || !wa->clr)
- intel_uncore_write_fw(uncore, wa->reg, val);
+ if (val != old || !wa->clr) {
+ if (wa->is_mcr)
+ intel_gt_mcr_multicast_write_fw(gt, wa->mcr_reg, val);
+ else
+ intel_uncore_write_fw(uncore, wa->reg, val);
+ }
+
+ if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) {
+ u32 val = wa->is_mcr ?
+ intel_gt_mcr_read_any_fw(gt, wa->mcr_reg) :
+ intel_uncore_read_fw(uncore, wa->reg);
- if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
- wa_verify(wa, intel_gt_mcr_read_any_fw(gt, wa->reg),
- wal->name, "application");
+ wa_verify(wa, val, wal->name, "application");
+ }
}
intel_uncore_forcewake_put__locked(uncore, fw);
@@ -1664,8 +1779,9 @@ static bool wa_list_verify(struct intel_gt *gt,
intel_uncore_forcewake_get__locked(uncore, fw);
for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
- ok &= wa_verify(wa,
- intel_gt_mcr_read_any_fw(gt, wa->reg),
+ ok &= wa_verify(wa, wa->is_mcr ?
+ intel_gt_mcr_read_any_fw(gt, wa->mcr_reg) :
+ intel_uncore_read_fw(uncore, wa->reg),
wal->name, from);
intel_uncore_forcewake_put__locked(uncore, fw);
@@ -1712,11 +1828,35 @@ whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags)
}
static void
+whitelist_mcr_reg_ext(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 flags)
+{
+ struct i915_wa wa = {
+ .mcr_reg = reg,
+ .is_mcr = 1,
+ };
+
+ if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS))
+ return;
+
+ if (GEM_DEBUG_WARN_ON(!is_nonpriv_flags_valid(flags)))
+ return;
+
+ wa.mcr_reg.reg |= flags;
+ _wa_add(wal, &wa);
+}
+
+static void
whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg)
{
whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW);
}
+static void
+whitelist_mcr_reg(struct i915_wa_list *wal, i915_mcr_reg_t reg)
+{
+ whitelist_mcr_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW);
+}
+
static void gen9_whitelist_build(struct i915_wa_list *w)
{
/* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
@@ -1742,7 +1882,7 @@ static void skl_whitelist_build(struct intel_engine_cs *engine)
gen9_whitelist_build(w);
/* WaDisableLSQCROPERFforOCL:skl */
- whitelist_reg(w, GEN8_L3SQCREG4);
+ whitelist_mcr_reg(w, GEN8_L3SQCREG4);
}
static void bxt_whitelist_build(struct intel_engine_cs *engine)
@@ -1763,7 +1903,7 @@ static void kbl_whitelist_build(struct intel_engine_cs *engine)
gen9_whitelist_build(w);
/* WaDisableLSQCROPERFforOCL:kbl */
- whitelist_reg(w, GEN8_L3SQCREG4);
+ whitelist_mcr_reg(w, GEN8_L3SQCREG4);
}
static void glk_whitelist_build(struct intel_engine_cs *engine)
@@ -1828,10 +1968,10 @@ static void icl_whitelist_build(struct intel_engine_cs *engine)
switch (engine->class) {
case RENDER_CLASS:
/* WaAllowUMDToModifyHalfSliceChicken7:icl */
- whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7);
+ whitelist_mcr_reg(w, GEN9_HALF_SLICE_CHICKEN7);
/* WaAllowUMDToModifySamplerMode:icl */
- whitelist_reg(w, GEN10_SAMPLER_MODE);
+ whitelist_mcr_reg(w, GEN10_SAMPLER_MODE);
/* WaEnableStateCacheRedirectToCS:icl */
whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
@@ -2107,24 +2247,21 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) {
/* Wa_14013392000:dg2_g11 */
- wa_masked_en(wal, GEN7_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE);
-
- /* Wa_16011620976:dg2_g11 */
- wa_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8);
+ wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE);
}
if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
IS_DG2_G11(i915) || IS_DG2_G12(i915)) {
/* Wa_1509727124:dg2 */
- wa_masked_en(wal, GEN10_SAMPLER_MODE,
- SC_DISABLE_POWER_OPTIMIZATION_EBB);
+ wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE,
+ SC_DISABLE_POWER_OPTIMIZATION_EBB);
}
if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0) ||
IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) {
/* Wa_14012419201:dg2 */
- wa_masked_en(wal, GEN9_ROW_CHICKEN4,
- GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX);
+ wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4,
+ GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX);
}
if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) ||
@@ -2133,13 +2270,13 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
* Wa_22012826095:dg2
* Wa_22013059131:dg2
*/
- wa_write_clr_set(wal, LSC_CHICKEN_BIT_0_UDW,
- MAXREQS_PER_BANK,
- REG_FIELD_PREP(MAXREQS_PER_BANK, 2));
+ wa_mcr_write_clr_set(wal, LSC_CHICKEN_BIT_0_UDW,
+ MAXREQS_PER_BANK,
+ REG_FIELD_PREP(MAXREQS_PER_BANK, 2));
/* Wa_22013059131:dg2 */
- wa_write_or(wal, LSC_CHICKEN_BIT_0,
- FORCE_1_SUB_MESSAGE_PER_FRAGMENT);
+ wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0,
+ FORCE_1_SUB_MESSAGE_PER_FRAGMENT);
}
/* Wa_1308578152:dg2_g10 when first gslice is fused off */
@@ -2152,19 +2289,19 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) ||
IS_DG2_G11(i915) || IS_DG2_G12(i915)) {
/* Wa_22013037850:dg2 */
- wa_write_or(wal, LSC_CHICKEN_BIT_0_UDW,
- DISABLE_128B_EVICTION_COMMAND_UDW);
+ wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW,
+ DISABLE_128B_EVICTION_COMMAND_UDW);
/* Wa_22012856258:dg2 */
- wa_masked_en(wal, GEN7_ROW_CHICKEN2,
- GEN12_DISABLE_READ_SUPPRESSION);
+ wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
+ GEN12_DISABLE_READ_SUPPRESSION);
/*
* Wa_22010960976:dg2
* Wa_14013347512:dg2
*/
- wa_masked_dis(wal, GEN12_HDC_CHICKEN0,
- LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK);
+ wa_mcr_masked_dis(wal, XEHP_HDC_CHICKEN0,
+ LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK);
}
if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
@@ -2172,8 +2309,8 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
* Wa_1608949956:dg2_g10
* Wa_14010198302:dg2_g10
*/
- wa_masked_en(wal, GEN8_ROW_CHICKEN,
- MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE);
+ wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
+ MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE);
/*
* Wa_14010918519:dg2_g10
@@ -2181,31 +2318,31 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
* LSC_CHICKEN_BIT_0 always reads back as 0 is this stepping,
* so ignoring verification.
*/
- wa_add(wal, LSC_CHICKEN_BIT_0_UDW, 0,
- FORCE_SLM_FENCE_SCOPE_TO_TILE | FORCE_UGM_FENCE_SCOPE_TO_TILE,
- 0, false);
+ wa_mcr_add(wal, LSC_CHICKEN_BIT_0_UDW, 0,
+ FORCE_SLM_FENCE_SCOPE_TO_TILE | FORCE_UGM_FENCE_SCOPE_TO_TILE,
+ 0, false);
}
if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
/* Wa_22010430635:dg2 */
- wa_masked_en(wal,
- GEN9_ROW_CHICKEN4,
- GEN12_DISABLE_GRF_CLEAR);
+ wa_mcr_masked_en(wal,
+ GEN9_ROW_CHICKEN4,
+ GEN12_DISABLE_GRF_CLEAR);
/* Wa_14010648519:dg2 */
- wa_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
+ wa_mcr_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
}
/* Wa_14013202645:dg2 */
if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) ||
IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0))
- wa_write_or(wal, RT_CTRL, DIS_NULL_QUERY);
+ wa_mcr_write_or(wal, RT_CTRL, DIS_NULL_QUERY);
/* Wa_22012532006:dg2 */
if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) ||
IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0))
- wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
- DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA);
+ wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
+ DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA);
if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) {
/* Wa_14010680813:dg2_g10 */
@@ -2216,17 +2353,16 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0) ||
IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) {
/* Wa_14012362059:dg2 */
- wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB);
+ wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB);
}
if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_B0, STEP_FOREVER) ||
IS_DG2_G10(i915)) {
/* Wa_22014600077:dg2 */
- wa_add(wal, GEN10_CACHE_MODE_SS, 0,
- _MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH),
- 0 /* Wa_14012342262 :write-only reg, so skip
- verification */,
- true);
+ wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
+ _MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH),
+ 0 /* Wa_14012342262 write-only reg, so skip verification */,
+ true);
}
if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) ||
@@ -2253,7 +2389,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || IS_DG1(i915) ||
IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
/* Wa_1606931601:tgl,rkl,dg1,adl-s,adl-p */
- wa_masked_en(wal, GEN7_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ);
+ wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ);
/*
* Wa_1407928979:tgl A*
@@ -2282,14 +2418,14 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) ||
IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
/* Wa_1409804808:tgl,rkl,dg1[a0],adl-s,adl-p */
- wa_masked_en(wal, GEN7_ROW_CHICKEN2,
- GEN12_PUSH_CONST_DEREF_HOLD_DIS);
+ wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
+ GEN12_PUSH_CONST_DEREF_HOLD_DIS);
/*
* Wa_1409085225:tgl
* Wa_14010229206:tgl,rkl,dg1[a0],adl-s,adl-p
*/
- wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH);
+ wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH);
}
if (IS_DG1_GRAPHICS_STEP(i915, STEP_A0, STEP_B0) ||
@@ -2313,9 +2449,9 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
if (IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915) ||
IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) {
/* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */
- wa_masked_en(wal,
- GEN10_SAMPLER_MODE,
- ENABLE_SMALLPL);
+ wa_mcr_masked_en(wal,
+ GEN10_SAMPLER_MODE,
+ ENABLE_SMALLPL);
}
if (GRAPHICS_VER(i915) == 11) {
@@ -2349,9 +2485,9 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
* Wa_1405733216:icl
* Formerly known as WaDisableCleanEvicts
*/
- wa_write_or(wal,
- GEN8_L3SQCREG4,
- GEN11_LQSC_CLEAN_EVICT_DISABLE);
+ wa_mcr_write_or(wal,
+ GEN8_L3SQCREG4,
+ GEN11_LQSC_CLEAN_EVICT_DISABLE);
/* Wa_1606682166:icl */
wa_write_or(wal,
@@ -2359,10 +2495,10 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
GEN7_DISABLE_SAMPLER_PREFETCH);
/* Wa_1409178092:icl */
- wa_write_clr_set(wal,
- GEN11_SCRATCH2,
- GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE,
- 0);
+ wa_mcr_write_clr_set(wal,
+ GEN11_SCRATCH2,
+ GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE,
+ 0);
/* WaEnable32PlaneMode:icl */
wa_masked_en(wal, GEN9_CSFE_CHICKEN1_RCS,
@@ -2389,12 +2525,64 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
FF_DOP_CLOCK_GATE_DISABLE);
}
- if (IS_GRAPHICS_VER(i915, 9, 12)) {
- /* FtrPerCtxtPreemptionGranularityControl:skl,bxt,kbl,cfl,cnl,icl,tgl */
+ /*
+ * Intel platforms that support fine-grained preemption (i.e., gen9 and
+ * beyond) allow the kernel-mode driver to choose between two different
+ * options for controlling preemption granularity and behavior.
+ *
+ * Option 1 (hardware default):
+ * Preemption settings are controlled in a global manner via
+ * kernel-only register CS_DEBUG_MODE1 (0x20EC). Any granularity
+ * and settings chosen by the kernel-mode driver will apply to all
+ * userspace clients.
+ *
+ * Option 2:
+ * Preemption settings are controlled on a per-context basis via
+ * register CS_CHICKEN1 (0x2580). CS_CHICKEN1 is saved/restored on
+ * context switch and is writable by userspace (e.g., via
+ * MI_LOAD_REGISTER_IMMEDIATE instructions placed in a batch buffer)
+ * which allows different userspace drivers/clients to select
+ * different settings, or to change those settings on the fly in
+ * response to runtime needs. This option was known by name
+ * "FtrPerCtxtPreemptionGranularityControl" at one time, although
+ * that name is somewhat misleading as other non-granularity
+ * preemption settings are also impacted by this decision.
+ *
+ * On Linux, our policy has always been to let userspace drivers
+ * control preemption granularity/settings (Option 2). This was
+ * originally mandatory on gen9 to prevent ABI breakage (old gen9
+ * userspace developed before object-level preemption was enabled would
+ * not behave well if i915 were to go with Option 1 and enable that
+ * preemption in a global manner). On gen9 each context would have
+ * object-level preemption disabled by default (see
+ * WaDisable3DMidCmdPreemption in gen9_ctx_workarounds_init), but
+ * userspace drivers could opt-in to object-level preemption as they
+ * saw fit. For post-gen9 platforms, we continue to utilize Option 2;
+ * even though it is no longer necessary for ABI compatibility when
+ * enabling a new platform, it does ensure that userspace will be able
+ * to implement any workarounds that show up requiring temporary
+ * adjustments to preemption behavior at runtime.
+ *
+ * Notes/Workarounds:
+ * - Wa_14015141709: On DG2 and early steppings of MTL,
+ * CS_CHICKEN1[0] does not disable object-level preemption as
+ * it is supposed to (nor does CS_DEBUG_MODE1[0] if we had been
+ * using Option 1). Effectively this means userspace is unable
+ * to disable object-level preemption on these platforms/steppings
+ * despite the setting here.
+ *
+ * - Wa_16013994831: May require that userspace program
+ * CS_CHICKEN1[10] when certain runtime conditions are true.
+ * Userspace requires Option 2 to be in effect for their update of
+ * CS_CHICKEN1[10] to be effective.
+ *
+ * Other workarounds may appear in the future that will also require
+ * Option 2 behavior to allow proper userspace implementation.
+ */
+ if (GRAPHICS_VER(i915) >= 9)
wa_masked_en(wal,
GEN7_FF_SLICE_CS_CHICKEN1,
GEN9_FFSC_PERCTX_PREEMPT_CTRL);
- }
if (IS_SKYLAKE(i915) ||
IS_KABYLAKE(i915) ||
@@ -2420,36 +2608,36 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE);
/* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
- wa_write_or(wal,
- BDW_SCRATCH1,
- GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
+ wa_mcr_write_or(wal,
+ BDW_SCRATCH1,
+ GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
/* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
if (IS_GEN9_LP(i915))
- wa_write_clr_set(wal,
- GEN8_L3SQCREG1,
- L3_PRIO_CREDITS_MASK,
- L3_GENERAL_PRIO_CREDITS(62) |
- L3_HIGH_PRIO_CREDITS(2));
+ wa_mcr_write_clr_set(wal,
+ GEN8_L3SQCREG1,
+ L3_PRIO_CREDITS_MASK,
+ L3_GENERAL_PRIO_CREDITS(62) |
+ L3_HIGH_PRIO_CREDITS(2));
/* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
- wa_write_or(wal,
- GEN8_L3SQCREG4,
- GEN8_LQSC_FLUSH_COHERENT_LINES);
+ wa_mcr_write_or(wal,
+ GEN8_L3SQCREG4,
+ GEN8_LQSC_FLUSH_COHERENT_LINES);
/* Disable atomics in L3 to prevent unrecoverable hangs */
wa_write_clr_set(wal, GEN9_SCRATCH_LNCF1,
GEN9_LNCF_NONIA_COHERENT_ATOMICS_ENABLE, 0);
- wa_write_clr_set(wal, GEN8_L3SQCREG4,
- GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE, 0);
- wa_write_clr_set(wal, GEN9_SCRATCH1,
- EVICTION_PERF_FIX_ENABLE, 0);
+ wa_mcr_write_clr_set(wal, GEN8_L3SQCREG4,
+ GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE, 0);
+ wa_mcr_write_clr_set(wal, GEN9_SCRATCH1,
+ EVICTION_PERF_FIX_ENABLE, 0);
}
if (IS_HASWELL(i915)) {
/* WaSampleCChickenBitEnable:hsw */
wa_masked_en(wal,
- HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE);
+ HSW_HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE);
wa_masked_dis(wal,
CACHE_MODE_0_GEN7,
@@ -2657,7 +2845,7 @@ ccs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
{
if (IS_PVC_CT_STEP(engine->i915, STEP_A0, STEP_C0)) {
/* Wa_14014999345:pvc */
- wa_masked_en(wal, GEN10_CACHE_MODE_SS, DISABLE_ECC);
+ wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS, DISABLE_ECC);
}
}
@@ -2683,8 +2871,8 @@ add_render_compute_tuning_settings(struct drm_i915_private *i915,
}
if (IS_DG2(i915)) {
- wa_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
- wa_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512);
+ wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
+ wa_mcr_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512);
/*
* This is also listed as Wa_22012654132 for certain DG2
@@ -2695,10 +2883,10 @@ add_render_compute_tuning_settings(struct drm_i915_private *i915,
* back for verification on DG2 (due to Wa_14012342262), so
* we need to explicitly skip the readback.
*/
- wa_add(wal, GEN10_CACHE_MODE_SS, 0,
- _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC),
- 0 /* write-only, so skip validation */,
- true);
+ wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
+ _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC),
+ 0 /* write-only, so skip validation */,
+ true);
}
/*
@@ -2707,8 +2895,8 @@ add_render_compute_tuning_settings(struct drm_i915_private *i915,
* platforms.
*/
if (INTEL_INFO(i915)->tuning_thread_rr_after_dep)
- wa_masked_field_set(wal, GEN9_ROW_CHICKEN4, THREAD_EX_ARB_MODE,
- THREAD_EX_ARB_MODE_RR_AFTER_DEP);
+ wa_mcr_masked_field_set(wal, GEN9_ROW_CHICKEN4, THREAD_EX_ARB_MODE,
+ THREAD_EX_ARB_MODE_RR_AFTER_DEP);
}
/*
@@ -2734,30 +2922,30 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
if (IS_XEHPSDV(i915)) {
/* Wa_1409954639 */
- wa_masked_en(wal,
- GEN8_ROW_CHICKEN,
- SYSTOLIC_DOP_CLOCK_GATING_DIS);
+ wa_mcr_masked_en(wal,
+ GEN8_ROW_CHICKEN,
+ SYSTOLIC_DOP_CLOCK_GATING_DIS);
/* Wa_1607196519 */
- wa_masked_en(wal,
- GEN9_ROW_CHICKEN4,
- GEN12_DISABLE_GRF_CLEAR);
+ wa_mcr_masked_en(wal,
+ GEN9_ROW_CHICKEN4,
+ GEN12_DISABLE_GRF_CLEAR);
/* Wa_14010670810:xehpsdv */
- wa_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
+ wa_mcr_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
/* Wa_14010449647:xehpsdv */
- wa_masked_en(wal, GEN7_HALF_SLICE_CHICKEN1,
- GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
+ wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
+ GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
/* Wa_18011725039:xehpsdv */
if (IS_XEHPSDV_GRAPHICS_STEP(i915, STEP_A1, STEP_B0)) {
- wa_masked_dis(wal, MLTICTXCTL, TDONRENDER);
- wa_write_or(wal, L3SQCREG1_CCS0, FLUSHALLNONCOH);
+ wa_mcr_masked_dis(wal, MLTICTXCTL, TDONRENDER);
+ wa_mcr_write_or(wal, L3SQCREG1_CCS0, FLUSHALLNONCOH);
}
/* Wa_14012362059:xehpsdv */
- wa_write_or(wal, GEN12_MERT_MOD_CTRL, FORCE_MISS_FTLB);
+ wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB);
/* Wa_14014368820:xehpsdv */
wa_write_or(wal, GEN12_GAMCNTRL_CTRL, INVALIDATION_BROADCAST_MODE_DIS |
@@ -2766,19 +2954,30 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
if (IS_DG2(i915) || IS_PONTEVECCHIO(i915)) {
/* Wa_14015227452:dg2,pvc */
- wa_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE);
+ wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE);
/* Wa_22014226127:dg2,pvc */
- wa_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE);
+ wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE);
/* Wa_16015675438:dg2,pvc */
wa_masked_en(wal, FF_SLICE_CS_CHICKEN2, GEN12_PERF_FIX_BALANCING_CFE_DISABLE);
/* Wa_18018781329:dg2,pvc */
- wa_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
- wa_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
- wa_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB);
- wa_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB);
+ wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
+ wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
+ wa_mcr_write_or(wal, VDBX_MOD_CTRL, FORCE_MISS_FTLB);
+ wa_mcr_write_or(wal, VEBX_MOD_CTRL, FORCE_MISS_FTLB);
+ }
+
+ if (IS_DG2(i915)) {
+ /*
+ * Wa_16011620976:dg2_g11
+ * Wa_22015475538:dg2
+ */
+ wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8);
+
+ /* Wa_18017747507:dg2 */
+ wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, POLYGON_TRIFAN_LINELOOP_DISABLE);
}
}
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds_types.h b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h
index 8a4b6de4e754..7c8b01d00043 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds_types.h
@@ -11,11 +11,16 @@
#include "i915_reg_defs.h"
struct i915_wa {
- i915_reg_t reg;
+ union {
+ i915_reg_t reg;
+ i915_mcr_reg_t mcr_reg;
+ };
u32 clr;
u32 set;
u32 read;
- bool masked_reg;
+
+ u32 masked_reg:1;
+ u32 is_mcr:1;
};
struct i915_wa_list {
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
index 1b75f478d1b8..881b64f3e7b9 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
@@ -39,6 +39,16 @@ static int perf_end(struct intel_gt *gt)
return igt_flush_test(gt->i915);
}
+static i915_reg_t timestamp_reg(struct intel_engine_cs *engine)
+{
+ struct drm_i915_private *i915 = engine->i915;
+
+ if (GRAPHICS_VER(i915) == 5 || IS_G4X(i915))
+ return RING_TIMESTAMP_UDW(engine->mmio_base);
+ else
+ return RING_TIMESTAMP(engine->mmio_base);
+}
+
static int write_timestamp(struct i915_request *rq, int slot)
{
struct intel_timeline *tl =
@@ -55,7 +65,7 @@ static int write_timestamp(struct i915_request *rq, int slot)
if (GRAPHICS_VER(rq->engine->i915) >= 8)
cmd++;
*cs++ = cmd;
- *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base));
+ *cs++ = i915_mmio_reg_offset(timestamp_reg(rq->engine));
*cs++ = tl->hwsp_offset + slot * sizeof(u32);
*cs++ = 0;
@@ -125,7 +135,7 @@ static int perf_mi_bb_start(void *arg)
enum intel_engine_id id;
int err = 0;
- if (GRAPHICS_VER(gt->i915) < 7) /* for per-engine CS_TIMESTAMP */
+ if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */
return 0;
perf_begin(gt);
@@ -135,6 +145,9 @@ static int perf_mi_bb_start(void *arg)
u32 cycles[COUNT];
int i;
+ if (GRAPHICS_VER(engine->i915) < 7 && engine->id != RCS0)
+ continue;
+
intel_engine_pm_get(engine);
batch = create_empty_batch(ce);
@@ -249,7 +262,7 @@ static int perf_mi_noop(void *arg)
enum intel_engine_id id;
int err = 0;
- if (GRAPHICS_VER(gt->i915) < 7) /* for per-engine CS_TIMESTAMP */
+ if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */
return 0;
perf_begin(gt);
@@ -259,6 +272,9 @@ static int perf_mi_noop(void *arg)
u32 cycles[COUNT];
int i;
+ if (GRAPHICS_VER(engine->i915) < 7 && engine->id != RCS0)
+ continue;
+
intel_engine_pm_get(engine);
base = create_empty_batch(ce);
diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c
index 1e08b2473b99..2c7c053a8808 100644
--- a/drivers/gpu/drm/i915/gt/selftest_execlists.c
+++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c
@@ -85,8 +85,6 @@ static int wait_for_reset(struct intel_engine_cs *engine,
break;
} while (time_before(jiffies, timeout));
- flush_scheduled_work();
-
if (rq->fence.error != -EIO) {
pr_err("%s: hanging request %llx:%lld not reset\n",
engine->name,
@@ -3475,12 +3473,14 @@ static int random_priority(struct rnd_state *rnd)
struct preempt_smoke {
struct intel_gt *gt;
+ struct kthread_work work;
struct i915_gem_context **contexts;
struct intel_engine_cs *engine;
struct drm_i915_gem_object *batch;
unsigned int ncontext;
struct rnd_state prng;
unsigned long count;
+ int result;
};
static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke)
@@ -3540,34 +3540,31 @@ unpin:
return err;
}
-static int smoke_crescendo_thread(void *arg)
+static void smoke_crescendo_work(struct kthread_work *work)
{
- struct preempt_smoke *smoke = arg;
+ struct preempt_smoke *smoke = container_of(work, typeof(*smoke), work);
IGT_TIMEOUT(end_time);
unsigned long count;
count = 0;
do {
struct i915_gem_context *ctx = smoke_context(smoke);
- int err;
- err = smoke_submit(smoke,
- ctx, count % I915_PRIORITY_MAX,
- smoke->batch);
- if (err)
- return err;
+ smoke->result = smoke_submit(smoke, ctx,
+ count % I915_PRIORITY_MAX,
+ smoke->batch);
count++;
- } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL));
+ } while (!smoke->result && count < smoke->ncontext &&
+ !__igt_timeout(end_time, NULL));
smoke->count = count;
- return 0;
}
static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
#define BATCH BIT(0)
{
- struct task_struct *tsk[I915_NUM_ENGINES] = {};
+ struct kthread_worker *worker[I915_NUM_ENGINES] = {};
struct preempt_smoke *arg;
struct intel_engine_cs *engine;
enum intel_engine_id id;
@@ -3578,6 +3575,8 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
if (!arg)
return -ENOMEM;
+ memset(arg, 0, I915_NUM_ENGINES * sizeof(*arg));
+
for_each_engine(engine, smoke->gt, id) {
arg[id] = *smoke;
arg[id].engine = engine;
@@ -3585,31 +3584,28 @@ static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags)
arg[id].batch = NULL;
arg[id].count = 0;
- tsk[id] = kthread_run(smoke_crescendo_thread, arg,
- "igt/smoke:%d", id);
- if (IS_ERR(tsk[id])) {
- err = PTR_ERR(tsk[id]);
+ worker[id] = kthread_create_worker(0, "igt/smoke:%d", id);
+ if (IS_ERR(worker[id])) {
+ err = PTR_ERR(worker[id]);
break;
}
- get_task_struct(tsk[id]);
- }
- yield(); /* start all threads before we kthread_stop() */
+ kthread_init_work(&arg[id].work, smoke_crescendo_work);
+ kthread_queue_work(worker[id], &arg[id].work);
+ }
count = 0;
for_each_engine(engine, smoke->gt, id) {
- int status;
-
- if (IS_ERR_OR_NULL(tsk[id]))
+ if (IS_ERR_OR_NULL(worker[id]))
continue;
- status = kthread_stop(tsk[id]);
- if (status && !err)
- err = status;
+ kthread_flush_work(&arg[id].work);
+ if (arg[id].result && !err)
+ err = arg[id].result;
count += arg[id].count;
- put_task_struct(tsk[id]);
+ kthread_destroy_worker(worker[id]);
}
pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n",
diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
index be94f863bdef..b46425aeb2f0 100644
--- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
@@ -36,6 +36,19 @@ static int cmp_u32(const void *A, const void *B)
return 0;
}
+static u32 read_timestamp(struct intel_engine_cs *engine)
+{
+ struct drm_i915_private *i915 = engine->i915;
+
+ /* On i965 the first read tends to give a stale value */
+ ENGINE_READ_FW(engine, RING_TIMESTAMP);
+
+ if (GRAPHICS_VER(i915) == 5 || IS_G4X(i915))
+ return ENGINE_READ_FW(engine, RING_TIMESTAMP_UDW);
+ else
+ return ENGINE_READ_FW(engine, RING_TIMESTAMP);
+}
+
static void measure_clocks(struct intel_engine_cs *engine,
u32 *out_cycles, ktime_t *out_dt)
{
@@ -45,13 +58,13 @@ static void measure_clocks(struct intel_engine_cs *engine,
for (i = 0; i < 5; i++) {
local_irq_disable();
- cycles[i] = -ENGINE_READ_FW(engine, RING_TIMESTAMP);
+ cycles[i] = -read_timestamp(engine);
dt[i] = ktime_get();
udelay(1000);
dt[i] = ktime_sub(ktime_get(), dt[i]);
- cycles[i] += ENGINE_READ_FW(engine, RING_TIMESTAMP);
+ cycles[i] += read_timestamp(engine);
local_irq_enable();
}
@@ -78,25 +91,6 @@ static int live_gt_clocks(void *arg)
if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */
return 0;
- if (GRAPHICS_VER(gt->i915) == 5)
- /*
- * XXX CS_TIMESTAMP low dword is dysfunctional?
- *
- * Ville's experiments indicate the high dword still works,
- * but at a correspondingly reduced frequency.
- */
- return 0;
-
- if (GRAPHICS_VER(gt->i915) == 4)
- /*
- * XXX CS_TIMESTAMP appears gibberish
- *
- * Ville's experiments indicate that it mostly appears 'stuck'
- * in that we see the register report the same cycle count
- * for a couple of reads.
- */
- return 0;
-
intel_gt_pm_get(gt);
intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
index 7f3bb1d34dfb..71263058a7b0 100644
--- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c
@@ -866,10 +866,13 @@ static int igt_reset_active_engine(void *arg)
}
struct active_engine {
- struct task_struct *task;
+ struct kthread_worker *worker;
+ struct kthread_work work;
struct intel_engine_cs *engine;
unsigned long resets;
unsigned int flags;
+ bool stop;
+ int result;
};
#define TEST_ACTIVE BIT(0)
@@ -900,10 +903,10 @@ static int active_request_put(struct i915_request *rq)
return err;
}
-static int active_engine(void *data)
+static void active_engine(struct kthread_work *work)
{
I915_RND_STATE(prng);
- struct active_engine *arg = data;
+ struct active_engine *arg = container_of(work, typeof(*arg), work);
struct intel_engine_cs *engine = arg->engine;
struct i915_request *rq[8] = {};
struct intel_context *ce[ARRAY_SIZE(rq)];
@@ -913,16 +916,17 @@ static int active_engine(void *data)
for (count = 0; count < ARRAY_SIZE(ce); count++) {
ce[count] = intel_context_create(engine);
if (IS_ERR(ce[count])) {
- err = PTR_ERR(ce[count]);
- pr_err("[%s] Create context #%ld failed: %d!\n", engine->name, count, err);
+ arg->result = PTR_ERR(ce[count]);
+ pr_err("[%s] Create context #%ld failed: %d!\n",
+ engine->name, count, arg->result);
while (--count)
intel_context_put(ce[count]);
- return err;
+ return;
}
}
count = 0;
- while (!kthread_should_stop()) {
+ while (!READ_ONCE(arg->stop)) {
unsigned int idx = count++ & (ARRAY_SIZE(rq) - 1);
struct i915_request *old = rq[idx];
struct i915_request *new;
@@ -967,7 +971,7 @@ static int active_engine(void *data)
intel_context_put(ce[count]);
}
- return err;
+ arg->result = err;
}
static int __igt_reset_engines(struct intel_gt *gt,
@@ -1022,7 +1026,7 @@ static int __igt_reset_engines(struct intel_gt *gt,
memset(threads, 0, sizeof(*threads) * I915_NUM_ENGINES);
for_each_engine(other, gt, tmp) {
- struct task_struct *tsk;
+ struct kthread_worker *worker;
threads[tmp].resets =
i915_reset_engine_count(global, other);
@@ -1036,19 +1040,21 @@ static int __igt_reset_engines(struct intel_gt *gt,
threads[tmp].engine = other;
threads[tmp].flags = flags;
- tsk = kthread_run(active_engine, &threads[tmp],
- "igt/%s", other->name);
- if (IS_ERR(tsk)) {
- err = PTR_ERR(tsk);
- pr_err("[%s] Thread spawn failed: %d!\n", engine->name, err);
+ worker = kthread_create_worker(0, "igt/%s",
+ other->name);
+ if (IS_ERR(worker)) {
+ err = PTR_ERR(worker);
+ pr_err("[%s] Worker create failed: %d!\n",
+ engine->name, err);
goto unwind;
}
- threads[tmp].task = tsk;
- get_task_struct(tsk);
- }
+ threads[tmp].worker = worker;
- yield(); /* start all threads before we begin */
+ kthread_init_work(&threads[tmp].work, active_engine);
+ kthread_queue_work(threads[tmp].worker,
+ &threads[tmp].work);
+ }
st_engine_heartbeat_disable_no_pm(engine);
GEM_BUG_ON(test_and_set_bit(I915_RESET_ENGINE + id,
@@ -1197,17 +1203,20 @@ unwind:
for_each_engine(other, gt, tmp) {
int ret;
- if (!threads[tmp].task)
+ if (!threads[tmp].worker)
continue;
- ret = kthread_stop(threads[tmp].task);
+ WRITE_ONCE(threads[tmp].stop, true);
+ kthread_flush_work(&threads[tmp].work);
+ ret = READ_ONCE(threads[tmp].result);
if (ret) {
pr_err("kthread for other engine %s failed, err=%d\n",
other->name, ret);
if (!err)
err = ret;
}
- put_task_struct(threads[tmp].task);
+
+ kthread_destroy_worker(threads[tmp].worker);
/* GuC based resets are not logged per engine */
if (!using_guc) {
diff --git a/drivers/gpu/drm/i915/gt/selftest_migrate.c b/drivers/gpu/drm/i915/gt/selftest_migrate.c
index 2b0c87999949..0dc5309c90a4 100644
--- a/drivers/gpu/drm/i915/gt/selftest_migrate.c
+++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c
@@ -6,6 +6,7 @@
#include <linux/sort.h>
#include "gem/i915_gem_internal.h"
+#include "gem/i915_gem_lmem.h"
#include "selftests/i915_random.h"
diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c
index cfb4708dd62e..99a372486fb7 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rps.c
+++ b/drivers/gpu/drm/i915/gt/selftest_rps.c
@@ -1107,21 +1107,27 @@ static u64 __measure_power(int duration_ms)
return div64_u64(1000 * 1000 * dE, dt);
}
-static u64 measure_power_at(struct intel_rps *rps, int *freq)
+static u64 measure_power(struct intel_rps *rps, int *freq)
{
u64 x[5];
int i;
- *freq = rps_set_check(rps, *freq);
for (i = 0; i < 5; i++)
x[i] = __measure_power(5);
- *freq = (*freq + read_cagf(rps)) / 2;
+
+ *freq = (*freq + intel_rps_read_actual_frequency(rps)) / 2;
/* A simple triangle filter for better result stability */
sort(x, 5, sizeof(*x), cmp_u64, NULL);
return div_u64(x[1] + 2 * x[2] + x[3], 4);
}
+static u64 measure_power_at(struct intel_rps *rps, int *freq)
+{
+ *freq = rps_set_check(rps, *freq);
+ return measure_power(rps, freq);
+}
+
int live_rps_power(void *arg)
{
struct intel_gt *gt = arg;
diff --git a/drivers/gpu/drm/i915/gt/selftest_slpc.c b/drivers/gpu/drm/i915/gt/selftest_slpc.c
index f8a1d27df272..82ec95a299f6 100644
--- a/drivers/gpu/drm/i915/gt/selftest_slpc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c
@@ -11,7 +11,8 @@
enum test_type {
VARY_MIN,
VARY_MAX,
- MAX_GRANTED
+ MAX_GRANTED,
+ SLPC_POWER,
};
static int slpc_set_min_freq(struct intel_guc_slpc *slpc, u32 freq)
@@ -41,6 +42,39 @@ static int slpc_set_max_freq(struct intel_guc_slpc *slpc, u32 freq)
return ret;
}
+static int slpc_set_freq(struct intel_gt *gt, u32 freq)
+{
+ int err;
+ struct intel_guc_slpc *slpc = &gt->uc.guc.slpc;
+
+ err = slpc_set_max_freq(slpc, freq);
+ if (err) {
+ pr_err("Unable to update max freq");
+ return err;
+ }
+
+ err = slpc_set_min_freq(slpc, freq);
+ if (err) {
+ pr_err("Unable to update min freq");
+ return err;
+ }
+
+ return err;
+}
+
+static u64 measure_power_at_freq(struct intel_gt *gt, int *freq, u64 *power)
+{
+ int err = 0;
+
+ err = slpc_set_freq(gt, *freq);
+ if (err)
+ return err;
+ *freq = intel_rps_read_actual_frequency(&gt->rps);
+ *power = measure_power(&gt->rps, freq);
+
+ return err;
+}
+
static int vary_max_freq(struct intel_guc_slpc *slpc, struct intel_rps *rps,
u32 *max_act_freq)
{
@@ -113,6 +147,58 @@ static int vary_min_freq(struct intel_guc_slpc *slpc, struct intel_rps *rps,
return err;
}
+static int slpc_power(struct intel_gt *gt, struct intel_engine_cs *engine)
+{
+ struct intel_guc_slpc *slpc = &gt->uc.guc.slpc;
+ struct {
+ u64 power;
+ int freq;
+ } min, max;
+ int err = 0;
+
+ /*
+ * Our fundamental assumption is that running at lower frequency
+ * actually saves power. Let's see if our RAPL measurement supports
+ * that theory.
+ */
+ if (!librapl_supported(gt->i915))
+ return 0;
+
+ min.freq = slpc->min_freq;
+ err = measure_power_at_freq(gt, &min.freq, &min.power);
+
+ if (err)
+ return err;
+
+ max.freq = slpc->rp0_freq;
+ err = measure_power_at_freq(gt, &max.freq, &max.power);
+
+ if (err)
+ return err;
+
+ pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n",
+ engine->name,
+ min.power, min.freq,
+ max.power, max.freq);
+
+ if (10 * min.freq >= 9 * max.freq) {
+ pr_notice("Could not control frequency, ran at [%uMHz, %uMhz]\n",
+ min.freq, max.freq);
+ }
+
+ if (11 * min.power > 10 * max.power) {
+ pr_err("%s: did not conserve power when setting lower frequency!\n",
+ engine->name);
+ err = -EINVAL;
+ }
+
+ /* Restore min/max frequencies */
+ slpc_set_max_freq(slpc, slpc->rp0_freq);
+ slpc_set_min_freq(slpc, slpc->min_freq);
+
+ return err;
+}
+
static int max_granted_freq(struct intel_guc_slpc *slpc, struct intel_rps *rps, u32 *max_act_freq)
{
struct intel_gt *gt = rps_to_gt(rps);
@@ -153,6 +239,11 @@ static int run_test(struct intel_gt *gt, int test_type)
if (!intel_uc_uses_guc_slpc(&gt->uc))
return 0;
+ if (slpc->min_freq == slpc->rp0_freq) {
+ pr_err("Min/Max are fused to the same value\n");
+ return -EINVAL;
+ }
+
if (igt_spinner_init(&spin, gt))
return -ENOMEM;
@@ -167,17 +258,14 @@ static int run_test(struct intel_gt *gt, int test_type)
}
/*
- * FIXME: With efficient frequency enabled, GuC can request
- * frequencies higher than the SLPC max. While this is fixed
- * in GuC, we level set these tests with RPn as min.
+ * Set min frequency to RPn so that we can test the whole
+ * range of RPn-RP0. This also turns off efficient freq
+ * usage and makes results more predictable.
*/
err = slpc_set_min_freq(slpc, slpc->min_freq);
- if (err)
+ if (err) {
+ pr_err("Unable to update min freq!");
return err;
-
- if (slpc->min_freq == slpc->rp0_freq) {
- pr_err("Min/Max are fused to the same value\n");
- return -EINVAL;
}
intel_gt_pm_wait_for_idle(gt);
@@ -233,17 +321,23 @@ static int run_test(struct intel_gt *gt, int test_type)
err = max_granted_freq(slpc, rps, &max_act_freq);
break;
+
+ case SLPC_POWER:
+ err = slpc_power(gt, engine);
+ break;
}
- pr_info("Max actual frequency for %s was %d\n",
- engine->name, max_act_freq);
+ if (test_type != SLPC_POWER) {
+ pr_info("Max actual frequency for %s was %d\n",
+ engine->name, max_act_freq);
- /* Actual frequency should rise above min */
- if (max_act_freq <= slpc_min_freq) {
- pr_err("Actual freq did not rise above min\n");
- pr_err("Perf Limit Reasons: 0x%x\n",
- intel_uncore_read(gt->uncore, GT0_PERF_LIMIT_REASONS));
- err = -EINVAL;
+ /* Actual frequency should rise above min */
+ if (max_act_freq <= slpc->min_freq) {
+ pr_err("Actual freq did not rise above min\n");
+ pr_err("Perf Limit Reasons: 0x%x\n",
+ intel_uncore_read(gt->uncore, GT0_PERF_LIMIT_REASONS));
+ err = -EINVAL;
+ }
}
igt_spinner_end(&spin);
@@ -270,26 +364,66 @@ static int run_test(struct intel_gt *gt, int test_type)
static int live_slpc_vary_min(void *arg)
{
struct drm_i915_private *i915 = arg;
- struct intel_gt *gt = to_gt(i915);
+ struct intel_gt *gt;
+ unsigned int i;
+ int ret;
+
+ for_each_gt(gt, i915, i) {
+ ret = run_test(gt, VARY_MIN);
+ if (ret)
+ return ret;
+ }
- return run_test(gt, VARY_MIN);
+ return ret;
}
static int live_slpc_vary_max(void *arg)
{
struct drm_i915_private *i915 = arg;
- struct intel_gt *gt = to_gt(i915);
+ struct intel_gt *gt;
+ unsigned int i;
+ int ret;
+
+ for_each_gt(gt, i915, i) {
+ ret = run_test(gt, VARY_MAX);
+ if (ret)
+ return ret;
+ }
- return run_test(gt, VARY_MAX);
+ return ret;
}
/* check if pcode can grant RP0 */
static int live_slpc_max_granted(void *arg)
{
struct drm_i915_private *i915 = arg;
- struct intel_gt *gt = to_gt(i915);
+ struct intel_gt *gt;
+ unsigned int i;
+ int ret;
- return run_test(gt, MAX_GRANTED);
+ for_each_gt(gt, i915, i) {
+ ret = run_test(gt, MAX_GRANTED);
+ if (ret)
+ return ret;
+ }
+
+ return ret;
+}
+
+static int live_slpc_power(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ struct intel_gt *gt;
+ unsigned int i;
+ int ret;
+
+ for_each_gt(gt, i915, i) {
+ ret = run_test(gt, SLPC_POWER);
+ if (ret)
+ return ret;
+ }
+
+ return ret;
}
int intel_slpc_live_selftests(struct drm_i915_private *i915)
@@ -298,10 +432,16 @@ int intel_slpc_live_selftests(struct drm_i915_private *i915)
SUBTEST(live_slpc_vary_max),
SUBTEST(live_slpc_vary_min),
SUBTEST(live_slpc_max_granted),
+ SUBTEST(live_slpc_power),
};
- if (intel_gt_is_wedged(to_gt(i915)))
- return 0;
+ struct intel_gt *gt;
+ unsigned int i;
+
+ for_each_gt(gt, i915, i) {
+ if (intel_gt_is_wedged(gt))
+ return 0;
+ }
return i915_live_subtests(tests, i915);
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
index 67a9aab801dd..21b1edc052f8 100644
--- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c
@@ -991,7 +991,7 @@ static bool pardon_reg(struct drm_i915_private *i915, i915_reg_t reg)
/* Alas, we must pardon some whitelists. Mistakes already made */
static const struct regmask pardon[] = {
{ GEN9_CTX_PREEMPT_REG, 9 },
- { GEN8_L3SQCREG4, 9 },
+ { _MMIO(0xb118), 9 }, /* GEN8_L3SQCREG4 */
};
return find_reg(i915, reg, pardon, ARRAY_SIZE(pardon));
diff --git a/drivers/gpu/drm/i915/gt/sysfs_engines.c b/drivers/gpu/drm/i915/gt/sysfs_engines.c
index 967031056202..f2d9858d827c 100644
--- a/drivers/gpu/drm/i915/gt/sysfs_engines.c
+++ b/drivers/gpu/drm/i915/gt/sysfs_engines.c
@@ -144,7 +144,7 @@ max_spin_store(struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t count)
{
struct intel_engine_cs *engine = kobj_to_engine(kobj);
- unsigned long long duration;
+ unsigned long long duration, clamped;
int err;
/*
@@ -168,7 +168,8 @@ max_spin_store(struct kobject *kobj, struct kobj_attribute *attr,
if (err)
return err;
- if (duration > jiffies_to_nsecs(2))
+ clamped = intel_clamp_max_busywait_duration_ns(engine, duration);
+ if (duration != clamped)
return -EINVAL;
WRITE_ONCE(engine->props.max_busywait_duration_ns, duration);
@@ -203,7 +204,7 @@ timeslice_store(struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t count)
{
struct intel_engine_cs *engine = kobj_to_engine(kobj);
- unsigned long long duration;
+ unsigned long long duration, clamped;
int err;
/*
@@ -218,7 +219,8 @@ timeslice_store(struct kobject *kobj, struct kobj_attribute *attr,
if (err)
return err;
- if (duration > jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT))
+ clamped = intel_clamp_timeslice_duration_ms(engine, duration);
+ if (duration != clamped)
return -EINVAL;
WRITE_ONCE(engine->props.timeslice_duration_ms, duration);
@@ -256,7 +258,7 @@ stop_store(struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t count)
{
struct intel_engine_cs *engine = kobj_to_engine(kobj);
- unsigned long long duration;
+ unsigned long long duration, clamped;
int err;
/*
@@ -272,7 +274,8 @@ stop_store(struct kobject *kobj, struct kobj_attribute *attr,
if (err)
return err;
- if (duration > jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT))
+ clamped = intel_clamp_stop_timeout_ms(engine, duration);
+ if (duration != clamped)
return -EINVAL;
WRITE_ONCE(engine->props.stop_timeout_ms, duration);
@@ -306,7 +309,7 @@ preempt_timeout_store(struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t count)
{
struct intel_engine_cs *engine = kobj_to_engine(kobj);
- unsigned long long timeout;
+ unsigned long long timeout, clamped;
int err;
/*
@@ -322,7 +325,8 @@ preempt_timeout_store(struct kobject *kobj, struct kobj_attribute *attr,
if (err)
return err;
- if (timeout > jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT))
+ clamped = intel_clamp_preempt_timeout_ms(engine, timeout);
+ if (timeout != clamped)
return -EINVAL;
WRITE_ONCE(engine->props.preempt_timeout_ms, timeout);
@@ -362,7 +366,7 @@ heartbeat_store(struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t count)
{
struct intel_engine_cs *engine = kobj_to_engine(kobj);
- unsigned long long delay;
+ unsigned long long delay, clamped;
int err;
/*
@@ -379,7 +383,8 @@ heartbeat_store(struct kobject *kobj, struct kobj_attribute *attr,
if (err)
return err;
- if (delay >= jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT))
+ clamped = intel_clamp_heartbeat_interval_ms(engine, delay);
+ if (delay != clamped)
return -EINVAL;
err = intel_engine_set_heartbeat(engine, delay);
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
index 29ef8afc8c2e..f359bef046e0 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
@@ -117,6 +117,7 @@ enum intel_guc_action {
INTEL_GUC_ACTION_ENTER_S_STATE = 0x501,
INTEL_GUC_ACTION_EXIT_S_STATE = 0x502,
INTEL_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE = 0x506,
+ INTEL_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV = 0x509,
INTEL_GUC_ACTION_SCHED_CONTEXT = 0x1000,
INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET = 0x1001,
INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_DONE = 0x1002,
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h
index 4c840a2639dc..811add10c30d 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h
@@ -128,6 +128,15 @@ enum slpc_media_ratio_mode {
SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_TWO = 2,
};
+enum slpc_gucrc_mode {
+ SLPC_GUCRC_MODE_HW = 0,
+ SLPC_GUCRC_MODE_GUCRC_NO_RC6 = 1,
+ SLPC_GUCRC_MODE_GUCRC_STATIC_TIMEOUT = 2,
+ SLPC_GUCRC_MODE_GUCRC_DYNAMIC_HYSTERESIS = 3,
+
+ SLPC_GUCRC_MODE_MAX,
+};
+
enum slpc_event_id {
SLPC_EVENT_RESET = 0,
SLPC_EVENT_SHUTDOWN = 1,
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h
index 4a59478c3b5c..58012edd4eb0 100644
--- a/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h
+++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_klvs_abi.h
@@ -82,9 +82,16 @@
#define GUC_KLV_SELF_CFG_G2H_CTB_SIZE_LEN 1u
/*
+ * Global scheduling policy update keys.
+ */
+enum {
+ GUC_SCHEDULING_POLICIES_KLV_ID_RENDER_COMPUTE_YIELD = 0x1001,
+};
+
+/*
* Per context scheduling policy update keys.
*/
-enum {
+enum {
GUC_CONTEXT_POLICIES_KLV_ID_EXECUTION_QUANTUM = 0x2001,
GUC_CONTEXT_POLICIES_KLV_ID_PREEMPTION_TIMEOUT = 0x2002,
GUC_CONTEXT_POLICIES_KLV_ID_SCHEDULING_PRIORITY = 0x2003,
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index bac06e3d6f2c..27b09ba1d295 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -441,6 +441,7 @@ err_log:
err_fw:
intel_uc_fw_fini(&guc->fw);
out:
+ intel_uc_fw_change_status(&guc->fw, INTEL_UC_FIRMWARE_INIT_FAIL);
i915_probe_error(gt->i915, "failed with %d\n", ret);
return ret;
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 804133df1ac9..357873ef692b 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -113,6 +113,10 @@ struct intel_guc {
*/
struct list_head guc_id_list;
/**
+ * @guc_ids_in_use: Number single-lrc guc_ids in use
+ */
+ unsigned int guc_ids_in_use;
+ /**
* @destroyed_contexts: list of contexts waiting to be destroyed
* (deregistered with the GuC)
*/
@@ -132,6 +136,16 @@ struct intel_guc {
* @reset_fail_mask: mask of engines that failed to reset
*/
intel_engine_mask_t reset_fail_mask;
+ /**
+ * @sched_disable_delay_ms: schedule disable delay, in ms, for
+ * contexts
+ */
+ unsigned int sched_disable_delay_ms;
+ /**
+ * @sched_disable_gucid_threshold: threshold of min remaining available
+ * guc_ids before we start bypassing the schedule disable delay
+ */
+ unsigned int sched_disable_gucid_threshold;
} submission_state;
/**
@@ -466,4 +480,6 @@ void intel_guc_write_barrier(struct intel_guc *guc);
void intel_guc_dump_time_info(struct intel_guc *guc, struct drm_printer *p);
+int intel_guc_sched_disable_gucid_threshold_max(struct intel_guc *guc);
+
#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
index 74cbe8eaf531..a419d60166c8 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
@@ -5,6 +5,7 @@
#include <linux/bsearch.h>
+#include "gem/i915_gem_lmem.h"
#include "gt/intel_engine_regs.h"
#include "gt/intel_gt.h"
#include "gt/intel_gt_mcr.h"
@@ -277,24 +278,16 @@ __mmio_reg_add(struct temp_regset *regset, struct guc_mmio_reg *reg)
return slot;
}
-#define GUC_REGSET_STEERING(group, instance) ( \
- FIELD_PREP(GUC_REGSET_STEERING_GROUP, (group)) | \
- FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, (instance)) | \
- GUC_REGSET_NEEDS_STEERING \
-)
-
static long __must_check guc_mmio_reg_add(struct intel_gt *gt,
struct temp_regset *regset,
- i915_reg_t reg, u32 flags)
+ u32 offset, u32 flags)
{
u32 count = regset->storage_used - (regset->registers - regset->storage);
- u32 offset = i915_mmio_reg_offset(reg);
struct guc_mmio_reg entry = {
.offset = offset,
.flags = flags,
};
struct guc_mmio_reg *slot;
- u8 group, inst;
/*
* The mmio list is built using separate lists within the driver.
@@ -306,17 +299,6 @@ static long __must_check guc_mmio_reg_add(struct intel_gt *gt,
sizeof(entry), guc_mmio_reg_cmp))
return 0;
- /*
- * The GuC doesn't have a default steering, so we need to explicitly
- * steer all registers that need steering. However, we do not keep track
- * of all the steering ranges, only of those that have a chance of using
- * a non-default steering from the i915 pov. Instead of adding such
- * tracking, it is easier to just program the default steering for all
- * regs that don't need a non-default one.
- */
- intel_gt_mcr_get_nonterminated_steering(gt, reg, &group, &inst);
- entry.flags |= GUC_REGSET_STEERING(group, inst);
-
slot = __mmio_reg_add(regset, &entry);
if (IS_ERR(slot))
return PTR_ERR(slot);
@@ -335,6 +317,38 @@ static long __must_check guc_mmio_reg_add(struct intel_gt *gt,
#define GUC_MMIO_REG_ADD(gt, regset, reg, masked) \
guc_mmio_reg_add(gt, \
regset, \
+ i915_mmio_reg_offset(reg), \
+ (masked) ? GUC_REGSET_MASKED : 0)
+
+#define GUC_REGSET_STEERING(group, instance) ( \
+ FIELD_PREP(GUC_REGSET_STEERING_GROUP, (group)) | \
+ FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, (instance)) | \
+ GUC_REGSET_NEEDS_STEERING \
+)
+
+static long __must_check guc_mcr_reg_add(struct intel_gt *gt,
+ struct temp_regset *regset,
+ i915_mcr_reg_t reg, u32 flags)
+{
+ u8 group, inst;
+
+ /*
+ * The GuC doesn't have a default steering, so we need to explicitly
+ * steer all registers that need steering. However, we do not keep track
+ * of all the steering ranges, only of those that have a chance of using
+ * a non-default steering from the i915 pov. Instead of adding such
+ * tracking, it is easier to just program the default steering for all
+ * regs that don't need a non-default one.
+ */
+ intel_gt_mcr_get_nonterminated_steering(gt, reg, &group, &inst);
+ flags |= GUC_REGSET_STEERING(group, inst);
+
+ return guc_mmio_reg_add(gt, regset, i915_mmio_reg_offset(reg), flags);
+}
+
+#define GUC_MCR_REG_ADD(gt, regset, reg, masked) \
+ guc_mcr_reg_add(gt, \
+ regset, \
(reg), \
(masked) ? GUC_REGSET_MASKED : 0)
@@ -372,8 +386,21 @@ static int guc_mmio_regset_init(struct temp_regset *regset,
false);
/* add in local MOCS registers */
- for (i = 0; i < GEN9_LNCFCMOCS_REG_COUNT; i++)
- ret |= GUC_MMIO_REG_ADD(gt, regset, GEN9_LNCFCMOCS(i), false);
+ for (i = 0; i < LNCFCMOCS_REG_COUNT; i++)
+ if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
+ ret |= GUC_MCR_REG_ADD(gt, regset, XEHP_LNCFCMOCS(i), false);
+ else
+ ret |= GUC_MMIO_REG_ADD(gt, regset, GEN9_LNCFCMOCS(i), false);
+
+ if (GRAPHICS_VER(engine->i915) >= 12) {
+ ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL0, false);
+ ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL1, false);
+ ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL2, false);
+ ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL3, false);
+ ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL4, false);
+ ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL5, false);
+ ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL6, false);
+ }
return ret ? -1 : 0;
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
index 8f1165146013..4e6dca707d94 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
@@ -169,6 +169,8 @@ static struct __guc_mmio_reg_descr_group default_lists[] = {
MAKE_REGLIST(default_global_regs, PF, GLOBAL, 0),
MAKE_REGLIST(default_rc_class_regs, PF, ENGINE_CLASS, GUC_RENDER_CLASS),
MAKE_REGLIST(xe_lpd_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_RENDER_CLASS),
+ MAKE_REGLIST(default_rc_class_regs, PF, ENGINE_CLASS, GUC_COMPUTE_CLASS),
+ MAKE_REGLIST(xe_lpd_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_COMPUTE_CLASS),
MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_VIDEO_CLASS),
MAKE_REGLIST(xe_lpd_vd_inst_regs, PF, ENGINE_INSTANCE, GUC_VIDEO_CLASS),
MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_VIDEOENHANCE_CLASS),
@@ -182,6 +184,8 @@ static const struct __guc_mmio_reg_descr_group xe_lpd_lists[] = {
MAKE_REGLIST(xe_lpd_global_regs, PF, GLOBAL, 0),
MAKE_REGLIST(xe_lpd_rc_class_regs, PF, ENGINE_CLASS, GUC_RENDER_CLASS),
MAKE_REGLIST(xe_lpd_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_RENDER_CLASS),
+ MAKE_REGLIST(xe_lpd_rc_class_regs, PF, ENGINE_CLASS, GUC_COMPUTE_CLASS),
+ MAKE_REGLIST(xe_lpd_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_COMPUTE_CLASS),
MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_VIDEO_CLASS),
MAKE_REGLIST(xe_lpd_vd_inst_regs, PF, ENGINE_INSTANCE, GUC_VIDEO_CLASS),
MAKE_REGLIST(xe_lpd_vec_class_regs, PF, ENGINE_CLASS, GUC_VIDEOENHANCE_CLASS),
@@ -240,19 +244,19 @@ static void guc_capture_free_extlists(struct __guc_mmio_reg_descr_group *reglist
struct __ext_steer_reg {
const char *name;
- i915_reg_t reg;
+ i915_mcr_reg_t reg;
};
static const struct __ext_steer_reg xe_extregs[] = {
- {"GEN7_SAMPLER_INSTDONE", GEN7_SAMPLER_INSTDONE},
- {"GEN7_ROW_INSTDONE", GEN7_ROW_INSTDONE}
+ {"GEN8_SAMPLER_INSTDONE", GEN8_SAMPLER_INSTDONE},
+ {"GEN8_ROW_INSTDONE", GEN8_ROW_INSTDONE}
};
static void __fill_ext_reg(struct __guc_mmio_reg_descr *ext,
const struct __ext_steer_reg *extlist,
int slice_id, int subslice_id)
{
- ext->reg = extlist->reg;
+ ext->reg = _MMIO(i915_mmio_reg_offset(extlist->reg));
ext->flags = FIELD_PREP(GUC_REGSET_STEERING_GROUP, slice_id);
ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, subslice_id);
ext->regname = extlist->name;
@@ -419,6 +423,44 @@ guc_capture_get_device_reglist(struct intel_guc *guc)
return default_lists;
}
+static const char *
+__stringify_type(u32 type)
+{
+ switch (type) {
+ case GUC_CAPTURE_LIST_TYPE_GLOBAL:
+ return "Global";
+ case GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS:
+ return "Class";
+ case GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE:
+ return "Instance";
+ default:
+ break;
+ }
+
+ return "unknown";
+}
+
+static const char *
+__stringify_engclass(u32 class)
+{
+ switch (class) {
+ case GUC_RENDER_CLASS:
+ return "Render";
+ case GUC_VIDEO_CLASS:
+ return "Video";
+ case GUC_VIDEOENHANCE_CLASS:
+ return "VideoEnhance";
+ case GUC_BLITTER_CLASS:
+ return "Blitter";
+ case GUC_COMPUTE_CLASS:
+ return "Compute";
+ default:
+ break;
+ }
+
+ return "unknown";
+}
+
static int
guc_capture_list_init(struct intel_guc *guc, u32 owner, u32 type, u32 classid,
struct guc_mmio_reg *ptr, u16 num_entries)
@@ -482,32 +524,55 @@ guc_cap_list_num_regs(struct intel_guc_state_capture *gc, u32 owner, u32 type, u
return num_regs;
}
-int
-intel_guc_capture_getlistsize(struct intel_guc *guc, u32 owner, u32 type, u32 classid,
- size_t *size)
+static int
+guc_capture_getlistsize(struct intel_guc *guc, u32 owner, u32 type, u32 classid,
+ size_t *size, bool is_purpose_est)
{
struct intel_guc_state_capture *gc = guc->capture;
+ struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
struct __guc_capture_ads_cache *cache = &gc->ads_cache[owner][type][classid];
int num_regs;
- if (!gc->reglists)
+ if (!gc->reglists) {
+ drm_warn(&i915->drm, "GuC-capture: No reglist on this device\n");
return -ENODEV;
+ }
if (cache->is_valid) {
*size = cache->size;
return cache->status;
}
+ if (!is_purpose_est && owner == GUC_CAPTURE_LIST_INDEX_PF &&
+ !guc_capture_get_one_list(gc->reglists, owner, type, classid)) {
+ if (type == GUC_CAPTURE_LIST_TYPE_GLOBAL)
+ drm_warn(&i915->drm, "Missing GuC-Err-Cap reglist Global!\n");
+ else
+ drm_warn(&i915->drm, "Missing GuC-Err-Cap reglist %s(%u):%s(%u)!\n",
+ __stringify_type(type), type,
+ __stringify_engclass(classid), classid);
+ return -ENODATA;
+ }
+
num_regs = guc_cap_list_num_regs(gc, owner, type, classid);
+ /* intentional empty lists can exist depending on hw config */
if (!num_regs)
return -ENODATA;
- *size = PAGE_ALIGN((sizeof(struct guc_debug_capture_list)) +
- (num_regs * sizeof(struct guc_mmio_reg)));
+ if (size)
+ *size = PAGE_ALIGN((sizeof(struct guc_debug_capture_list)) +
+ (num_regs * sizeof(struct guc_mmio_reg)));
return 0;
}
+int
+intel_guc_capture_getlistsize(struct intel_guc *guc, u32 owner, u32 type, u32 classid,
+ size_t *size)
+{
+ return guc_capture_getlistsize(guc, owner, type, classid, size, false);
+}
+
static void guc_capture_create_prealloc_nodes(struct intel_guc *guc);
int
@@ -606,7 +671,7 @@ guc_capture_output_min_size_est(struct intel_guc *guc)
struct intel_gt *gt = guc_to_gt(guc);
struct intel_engine_cs *engine;
enum intel_engine_id id;
- int worst_min_size = 0, num_regs = 0;
+ int worst_min_size = 0;
size_t tmp = 0;
if (!guc->capture)
@@ -627,21 +692,19 @@ guc_capture_output_min_size_est(struct intel_guc *guc)
worst_min_size += sizeof(struct guc_state_capture_group_header_t) +
(3 * sizeof(struct guc_state_capture_header_t));
- if (!intel_guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_GLOBAL, 0, &tmp))
- num_regs += tmp;
+ if (!guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_GLOBAL, 0, &tmp, true))
+ worst_min_size += tmp;
- if (!intel_guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS,
- engine->class, &tmp)) {
- num_regs += tmp;
+ if (!guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS,
+ engine->class, &tmp, true)) {
+ worst_min_size += tmp;
}
- if (!intel_guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE,
- engine->class, &tmp)) {
- num_regs += tmp;
+ if (!guc_capture_getlistsize(guc, 0, GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE,
+ engine->class, &tmp, true)) {
+ worst_min_size += tmp;
}
}
- worst_min_size += (num_regs * sizeof(struct guc_mmio_reg));
-
return worst_min_size;
}
@@ -658,15 +721,23 @@ static void check_guc_capture_size(struct intel_guc *guc)
int spare_size = min_size * GUC_CAPTURE_OVERBUFFER_MULTIPLIER;
u32 buffer_size = intel_guc_log_section_size_capture(&guc->log);
+ /*
+ * NOTE: min_size is much smaller than the capture region allocation (DG2: <80K vs 1MB)
+ * Additionally, its based on space needed to fit all engines getting reset at once
+ * within the same G2H handler task slot. This is very unlikely. However, if GuC really
+ * does run out of space for whatever reason, we will see an separate warning message
+ * when processing the G2H event capture-notification, search for:
+ * INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE.
+ */
if (min_size < 0)
drm_warn(&i915->drm, "Failed to calculate GuC error state capture buffer minimum size: %d!\n",
min_size);
else if (min_size > buffer_size)
- drm_warn(&i915->drm, "GuC error state capture buffer is too small: %d < %d\n",
+ drm_warn(&i915->drm, "GuC error state capture buffer maybe small: %d < %d\n",
buffer_size, min_size);
else if (spare_size > buffer_size)
- drm_notice(&i915->drm, "GuC error state capture buffer maybe too small: %d < %d (min = %d)\n",
- buffer_size, spare_size, min_size);
+ drm_dbg(&i915->drm, "GuC error state capture buffer lacks spare size: %d < %d (min = %d)\n",
+ buffer_size, spare_size, min_size);
}
/*
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c
index 25f09a420561..7269eb0bbedf 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c
@@ -71,12 +71,73 @@ static bool intel_eval_slpc_support(void *data)
return intel_guc_slpc_is_used(guc);
}
+static int guc_sched_disable_delay_ms_get(void *data, u64 *val)
+{
+ struct intel_guc *guc = data;
+
+ if (!intel_guc_submission_is_used(guc))
+ return -ENODEV;
+
+ *val = (u64)guc->submission_state.sched_disable_delay_ms;
+
+ return 0;
+}
+
+static int guc_sched_disable_delay_ms_set(void *data, u64 val)
+{
+ struct intel_guc *guc = data;
+
+ if (!intel_guc_submission_is_used(guc))
+ return -ENODEV;
+
+ /* clamp to a practical limit, 1 minute is reasonable for a longest delay */
+ guc->submission_state.sched_disable_delay_ms = min_t(u64, val, 60000);
+
+ return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(guc_sched_disable_delay_ms_fops,
+ guc_sched_disable_delay_ms_get,
+ guc_sched_disable_delay_ms_set, "%lld\n");
+
+static int guc_sched_disable_gucid_threshold_get(void *data, u64 *val)
+{
+ struct intel_guc *guc = data;
+
+ if (!intel_guc_submission_is_used(guc))
+ return -ENODEV;
+
+ *val = guc->submission_state.sched_disable_gucid_threshold;
+ return 0;
+}
+
+static int guc_sched_disable_gucid_threshold_set(void *data, u64 val)
+{
+ struct intel_guc *guc = data;
+
+ if (!intel_guc_submission_is_used(guc))
+ return -ENODEV;
+
+ if (val > intel_guc_sched_disable_gucid_threshold_max(guc))
+ guc->submission_state.sched_disable_gucid_threshold =
+ intel_guc_sched_disable_gucid_threshold_max(guc);
+ else
+ guc->submission_state.sched_disable_gucid_threshold = val;
+
+ return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(guc_sched_disable_gucid_threshold_fops,
+ guc_sched_disable_gucid_threshold_get,
+ guc_sched_disable_gucid_threshold_set, "%lld\n");
+
void intel_guc_debugfs_register(struct intel_guc *guc, struct dentry *root)
{
static const struct intel_gt_debugfs_file files[] = {
{ "guc_info", &guc_info_fops, NULL },
{ "guc_registered_contexts", &guc_registered_contexts_fops, NULL },
{ "guc_slpc_info", &guc_slpc_info_fops, &intel_eval_slpc_support},
+ { "guc_sched_disable_delay_ms", &guc_sched_disable_delay_ms_fops, NULL },
+ { "guc_sched_disable_gucid_threshold", &guc_sched_disable_gucid_threshold_fops,
+ NULL },
};
if (!intel_guc_is_supported(guc))
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
index a0372735cddb..5b86b2e286e0 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
@@ -10,12 +10,15 @@
*/
#include "gt/intel_gt.h"
+#include "gt/intel_gt_mcr.h"
#include "gt/intel_gt_regs.h"
#include "intel_guc_fw.h"
#include "i915_drv.h"
-static void guc_prepare_xfer(struct intel_uncore *uncore)
+static void guc_prepare_xfer(struct intel_gt *gt)
{
+ struct intel_uncore *uncore = gt->uncore;
+
u32 shim_flags = GUC_ENABLE_READ_CACHE_LOGIC |
GUC_ENABLE_READ_CACHE_FOR_SRAM_DATA |
GUC_ENABLE_READ_CACHE_FOR_WOPCM_DATA |
@@ -35,8 +38,9 @@ static void guc_prepare_xfer(struct intel_uncore *uncore)
if (GRAPHICS_VER(uncore->i915) == 9) {
/* DOP Clock Gating Enable for GuC clocks */
- intel_uncore_rmw(uncore, GEN7_MISCCPCTL,
- 0, GEN8_DOP_CLOCK_GATE_GUC_ENABLE);
+ intel_gt_mcr_multicast_write(gt, GEN8_MISCCPCTL,
+ GEN8_DOP_CLOCK_GATE_GUC_ENABLE |
+ intel_gt_mcr_read_any(gt, GEN8_MISCCPCTL));
/* allows for 5us (in 10ns units) before GT can go to RC6 */
intel_uncore_write(uncore, GUC_ARAT_C6DIS, 0x1FF);
@@ -168,7 +172,7 @@ int intel_guc_fw_upload(struct intel_guc *guc)
struct intel_uncore *uncore = gt->uncore;
int ret;
- guc_prepare_xfer(uncore);
+ guc_prepare_xfer(gt);
/*
* Note that GuC needs the CSS header plus uKernel code to be copied
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
index 323b055e5db9..968ebd79dce7 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h
@@ -290,6 +290,25 @@ struct guc_update_context_policy {
struct guc_klv_generic_dw_t klv[GUC_CONTEXT_POLICIES_KLV_NUM_IDS];
} __packed;
+/* Format of the UPDATE_SCHEDULING_POLICIES H2G data packet */
+struct guc_update_scheduling_policy_header {
+ u32 action;
+} __packed;
+
+/*
+ * Can't dynmically allocate memory for the scheduling policy KLV because
+ * it will be sent from within the reset path. Need a fixed size lump on
+ * the stack instead :(.
+ *
+ * Currently, there is only one KLV defined, which has 1 word of KL + 2 words of V.
+ */
+#define MAX_SCHEDULING_POLICY_SIZE 3
+
+struct guc_update_scheduling_policy {
+ struct guc_update_scheduling_policy_header header;
+ u32 data[MAX_SCHEDULING_POLICY_SIZE];
+} __packed;
+
#define GUC_POWER_UNSPECIFIED 0
#define GUC_POWER_D0 1
#define GUC_POWER_D1 2
@@ -298,6 +317,9 @@ struct guc_update_context_policy {
/* Scheduling policy settings */
+#define GLOBAL_SCHEDULE_POLICY_RC_YIELD_DURATION 100 /* in ms */
+#define GLOBAL_SCHEDULE_POLICY_RC_YIELD_RATIO 50 /* in percent */
+
#define GLOBAL_POLICY_MAX_NUM_WI 15
/* Don't reset an engine upon preemption failure */
@@ -305,6 +327,27 @@ struct guc_update_context_policy {
#define GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US 500000
+/*
+ * GuC converts the timeout to clock ticks internally. Different platforms have
+ * different GuC clocks. Thus, the maximum value before overflow is platform
+ * dependent. Current worst case scenario is about 110s. So, the spec says to
+ * limit to 100s to be safe.
+ */
+#define GUC_POLICY_MAX_EXEC_QUANTUM_US (100 * 1000 * 1000UL)
+#define GUC_POLICY_MAX_PREEMPT_TIMEOUT_US (100 * 1000 * 1000UL)
+
+static inline u32 guc_policy_max_exec_quantum_ms(void)
+{
+ BUILD_BUG_ON(GUC_POLICY_MAX_EXEC_QUANTUM_US >= UINT_MAX);
+ return GUC_POLICY_MAX_EXEC_QUANTUM_US / 1000;
+}
+
+static inline u32 guc_policy_max_preempt_timeout_ms(void)
+{
+ BUILD_BUG_ON(GUC_POLICY_MAX_PREEMPT_TIMEOUT_US >= UINT_MAX);
+ return GUC_POLICY_MAX_PREEMPT_TIMEOUT_US / 1000;
+}
+
struct guc_policies {
u32 submission_queue_depth[GUC_MAX_ENGINE_CLASSES];
/* In micro seconds. How much time to allow before DPC processing is
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
index 55d3ef93e86f..68331c538b0a 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
@@ -16,15 +16,15 @@
#if defined(CONFIG_DRM_I915_DEBUG_GUC)
#define GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE SZ_2M
#define GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE SZ_16M
-#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_4M
+#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_1M
#elif defined(CONFIG_DRM_I915_DEBUG_GEM)
#define GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE SZ_1M
#define GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE SZ_2M
-#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_4M
+#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_1M
#else
#define GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE SZ_8K
#define GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZE SZ_64K
-#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_2M
+#define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE SZ_1M
#endif
static void guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index fdd895f73f9f..63464933cbce 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -137,6 +137,17 @@ static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value)
return ret > 0 ? -EPROTO : ret;
}
+static int guc_action_slpc_unset_param(struct intel_guc *guc, u8 id)
+{
+ u32 request[] = {
+ GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST,
+ SLPC_EVENT(SLPC_EVENT_PARAMETER_UNSET, 1),
+ id,
+ };
+
+ return intel_guc_send(guc, request, ARRAY_SIZE(request));
+}
+
static bool slpc_is_running(struct intel_guc_slpc *slpc)
{
return slpc_get_state(slpc) == SLPC_GLOBAL_STATE_RUNNING;
@@ -190,6 +201,15 @@ static int slpc_set_param(struct intel_guc_slpc *slpc, u8 id, u32 value)
return ret;
}
+static int slpc_unset_param(struct intel_guc_slpc *slpc, u8 id)
+{
+ struct intel_guc *guc = slpc_to_guc(slpc);
+
+ GEM_BUG_ON(id >= SLPC_MAX_PARAM);
+
+ return guc_action_slpc_unset_param(guc, id);
+}
+
static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq)
{
struct drm_i915_private *i915 = slpc_to_i915(slpc);
@@ -263,6 +283,7 @@ int intel_guc_slpc_init(struct intel_guc_slpc *slpc)
slpc->max_freq_softlimit = 0;
slpc->min_freq_softlimit = 0;
+ slpc->min_is_rpmax = false;
slpc->boost_freq = 0;
atomic_set(&slpc->num_waiters, 0);
@@ -588,6 +609,39 @@ static int slpc_set_softlimits(struct intel_guc_slpc *slpc)
return 0;
}
+static bool is_slpc_min_freq_rpmax(struct intel_guc_slpc *slpc)
+{
+ struct drm_i915_private *i915 = slpc_to_i915(slpc);
+ int slpc_min_freq;
+ int ret;
+
+ ret = intel_guc_slpc_get_min_freq(slpc, &slpc_min_freq);
+ if (ret) {
+ drm_err(&i915->drm,
+ "Failed to get min freq: (%d)\n",
+ ret);
+ return false;
+ }
+
+ if (slpc_min_freq == SLPC_MAX_FREQ_MHZ)
+ return true;
+ else
+ return false;
+}
+
+static void update_server_min_softlimit(struct intel_guc_slpc *slpc)
+{
+ /* For server parts, SLPC min will be at RPMax.
+ * Use min softlimit to clamp it to RP0 instead.
+ */
+ if (!slpc->min_freq_softlimit &&
+ is_slpc_min_freq_rpmax(slpc)) {
+ slpc->min_is_rpmax = true;
+ slpc->min_freq_softlimit = slpc->rp0_freq;
+ (slpc_to_gt(slpc))->defaults.min_freq = slpc->min_freq_softlimit;
+ }
+}
+
static int slpc_use_fused_rp0(struct intel_guc_slpc *slpc)
{
/* Force SLPC to used platform rp0 */
@@ -610,6 +664,52 @@ static void slpc_get_rp_values(struct intel_guc_slpc *slpc)
slpc->boost_freq = slpc->rp0_freq;
}
+/**
+ * intel_guc_slpc_override_gucrc_mode() - override GUCRC mode
+ * @slpc: pointer to intel_guc_slpc.
+ * @mode: new value of the mode.
+ *
+ * This function will override the GUCRC mode.
+ *
+ * Return: 0 on success, non-zero error code on failure.
+ */
+int intel_guc_slpc_override_gucrc_mode(struct intel_guc_slpc *slpc, u32 mode)
+{
+ int ret;
+ struct drm_i915_private *i915 = slpc_to_i915(slpc);
+ intel_wakeref_t wakeref;
+
+ if (mode >= SLPC_GUCRC_MODE_MAX)
+ return -EINVAL;
+
+ with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
+ ret = slpc_set_param(slpc, SLPC_PARAM_PWRGATE_RC_MODE, mode);
+ if (ret)
+ drm_err(&i915->drm,
+ "Override gucrc mode %d failed %d\n",
+ mode, ret);
+ }
+
+ return ret;
+}
+
+int intel_guc_slpc_unset_gucrc_mode(struct intel_guc_slpc *slpc)
+{
+ struct drm_i915_private *i915 = slpc_to_i915(slpc);
+ intel_wakeref_t wakeref;
+ int ret = 0;
+
+ with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
+ ret = slpc_unset_param(slpc, SLPC_PARAM_PWRGATE_RC_MODE);
+ if (ret)
+ drm_err(&i915->drm,
+ "Unsetting gucrc mode failed %d\n",
+ ret);
+ }
+
+ return ret;
+}
+
/*
* intel_guc_slpc_enable() - Start SLPC
* @slpc: pointer to intel_guc_slpc.
@@ -647,6 +747,9 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)
slpc_get_rp_values(slpc);
+ /* Handle the case where min=max=RPmax */
+ update_server_min_softlimit(slpc);
+
/* Set SLPC max limit to RP0 */
ret = slpc_use_fused_rp0(slpc);
if (unlikely(ret)) {
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
index 82a98f78f96c..17ed515f6a85 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
@@ -9,6 +9,8 @@
#include "intel_guc_submission.h"
#include "intel_guc_slpc_types.h"
+#define SLPC_MAX_FREQ_MHZ 4250
+
struct intel_gt;
struct drm_printer;
@@ -42,5 +44,7 @@ int intel_guc_slpc_set_media_ratio_mode(struct intel_guc_slpc *slpc, u32 val);
void intel_guc_pm_intrmsk_enable(struct intel_gt *gt);
void intel_guc_slpc_boost(struct intel_guc_slpc *slpc);
void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc);
+int intel_guc_slpc_unset_gucrc_mode(struct intel_guc_slpc *slpc);
+int intel_guc_slpc_override_gucrc_mode(struct intel_guc_slpc *slpc, u32 mode);
#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h
index 73d208123528..a6ef53b04e04 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc_types.h
@@ -19,6 +19,9 @@ struct intel_guc_slpc {
bool supported;
bool selected;
+ /* Indicates this is a server part */
+ bool min_is_rpmax;
+
/* platform frequency limits */
u32 min_freq;
u32 rp0_freq;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 1db59eeb34db..4ccb29f9ac55 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -6,6 +6,7 @@
#include <linux/circ_buf.h>
#include "gem/i915_gem_context.h"
+#include "gem/i915_gem_lmem.h"
#include "gt/gen8_engine_cs.h"
#include "gt/intel_breadcrumbs.h"
#include "gt/intel_context.h"
@@ -65,7 +66,13 @@
* corresponding G2H returns indicating the scheduling disable operation has
* completed it is safe to unpin the context. While a disable is in flight it
* isn't safe to resubmit the context so a fence is used to stall all future
- * requests of that context until the G2H is returned.
+ * requests of that context until the G2H is returned. Because this interaction
+ * with the GuC takes a non-zero amount of time we delay the disabling of
+ * scheduling after the pin count goes to zero by a configurable period of time
+ * (see SCHED_DISABLE_DELAY_MS). The thought is this gives the user a window of
+ * time to resubmit something on the context before doing this costly operation.
+ * This delay is only done if the context isn't closed and the guc_id usage is
+ * less than a threshold (see NUM_SCHED_DISABLE_GUC_IDS_THRESHOLD).
*
* Context deregistration:
* Before a context can be destroyed or if we steal its guc_id we must
@@ -163,7 +170,8 @@ guc_create_parallel(struct intel_engine_cs **engines,
#define SCHED_STATE_PENDING_ENABLE BIT(5)
#define SCHED_STATE_REGISTERED BIT(6)
#define SCHED_STATE_POLICY_REQUIRED BIT(7)
-#define SCHED_STATE_BLOCKED_SHIFT 8
+#define SCHED_STATE_CLOSED BIT(8)
+#define SCHED_STATE_BLOCKED_SHIFT 9
#define SCHED_STATE_BLOCKED BIT(SCHED_STATE_BLOCKED_SHIFT)
#define SCHED_STATE_BLOCKED_MASK (0xfff << SCHED_STATE_BLOCKED_SHIFT)
@@ -173,12 +181,20 @@ static inline void init_sched_state(struct intel_context *ce)
ce->guc_state.sched_state &= SCHED_STATE_BLOCKED_MASK;
}
+/*
+ * Kernel contexts can have SCHED_STATE_REGISTERED after suspend.
+ * A context close can race with the submission path, so SCHED_STATE_CLOSED
+ * can be set immediately before we try to register.
+ */
+#define SCHED_STATE_VALID_INIT \
+ (SCHED_STATE_BLOCKED_MASK | \
+ SCHED_STATE_CLOSED | \
+ SCHED_STATE_REGISTERED)
+
__maybe_unused
static bool sched_state_is_init(struct intel_context *ce)
{
- /* Kernel contexts can have SCHED_STATE_REGISTERED after suspend. */
- return !(ce->guc_state.sched_state &
- ~(SCHED_STATE_BLOCKED_MASK | SCHED_STATE_REGISTERED));
+ return !(ce->guc_state.sched_state & ~SCHED_STATE_VALID_INIT);
}
static inline bool
@@ -319,6 +335,17 @@ static inline void clr_context_policy_required(struct intel_context *ce)
ce->guc_state.sched_state &= ~SCHED_STATE_POLICY_REQUIRED;
}
+static inline bool context_close_done(struct intel_context *ce)
+{
+ return ce->guc_state.sched_state & SCHED_STATE_CLOSED;
+}
+
+static inline void set_context_close_done(struct intel_context *ce)
+{
+ lockdep_assert_held(&ce->guc_state.lock);
+ ce->guc_state.sched_state |= SCHED_STATE_CLOSED;
+}
+
static inline u32 context_blocked(struct intel_context *ce)
{
return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >>
@@ -343,25 +370,6 @@ static inline void decr_context_blocked(struct intel_context *ce)
ce->guc_state.sched_state -= SCHED_STATE_BLOCKED;
}
-static inline bool context_has_committed_requests(struct intel_context *ce)
-{
- return !!ce->guc_state.number_committed_requests;
-}
-
-static inline void incr_context_committed_requests(struct intel_context *ce)
-{
- lockdep_assert_held(&ce->guc_state.lock);
- ++ce->guc_state.number_committed_requests;
- GEM_BUG_ON(ce->guc_state.number_committed_requests < 0);
-}
-
-static inline void decr_context_committed_requests(struct intel_context *ce)
-{
- lockdep_assert_held(&ce->guc_state.lock);
- --ce->guc_state.number_committed_requests;
- GEM_BUG_ON(ce->guc_state.number_committed_requests < 0);
-}
-
static struct intel_context *
request_to_scheduling_context(struct i915_request *rq)
{
@@ -1067,6 +1075,12 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
xa_unlock(&guc->context_lookup);
+ if (test_bit(CONTEXT_GUC_INIT, &ce->flags) &&
+ (cancel_delayed_work(&ce->guc_state.sched_disable_delay_work))) {
+ /* successful cancel so jump straight to close it */
+ intel_context_sched_disable_unpin(ce);
+ }
+
spin_lock(&ce->guc_state.lock);
/*
@@ -1994,6 +2008,9 @@ static int new_guc_id(struct intel_guc *guc, struct intel_context *ce)
if (unlikely(ret < 0))
return ret;
+ if (!intel_context_is_parent(ce))
+ ++guc->submission_state.guc_ids_in_use;
+
ce->guc_id.id = ret;
return 0;
}
@@ -2003,14 +2020,16 @@ static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce)
GEM_BUG_ON(intel_context_is_child(ce));
if (!context_guc_id_invalid(ce)) {
- if (intel_context_is_parent(ce))
+ if (intel_context_is_parent(ce)) {
bitmap_release_region(guc->submission_state.guc_ids_bitmap,
ce->guc_id.id,
order_base_2(ce->parallel.number_children
+ 1));
- else
+ } else {
+ --guc->submission_state.guc_ids_in_use;
ida_simple_remove(&guc->submission_state.guc_ids,
ce->guc_id.id);
+ }
clr_ctx_id_mapping(guc, ce->guc_id.id);
set_context_guc_id_invalid(ce);
}
@@ -2429,6 +2448,10 @@ static int guc_context_policy_init_v70(struct intel_context *ce, bool loop)
int ret;
/* NB: For both of these, zero means disabled. */
+ GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000,
+ execution_quantum));
+ GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000,
+ preemption_timeout));
execution_quantum = engine->props.timeslice_duration_ms * 1000;
preemption_timeout = engine->props.preempt_timeout_ms * 1000;
@@ -2462,6 +2485,10 @@ static void guc_context_policy_init_v69(struct intel_engine_cs *engine,
desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69;
/* NB: For both of these, zero means disabled. */
+ GEM_BUG_ON(overflows_type(engine->props.timeslice_duration_ms * 1000,
+ desc->execution_quantum));
+ GEM_BUG_ON(overflows_type(engine->props.preempt_timeout_ms * 1000,
+ desc->preemption_timeout));
desc->execution_quantum = engine->props.timeslice_duration_ms * 1000;
desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000;
}
@@ -2998,41 +3025,104 @@ guc_context_revoke(struct intel_context *ce, struct i915_request *rq,
}
}
-static void guc_context_sched_disable(struct intel_context *ce)
+static void do_sched_disable(struct intel_guc *guc, struct intel_context *ce,
+ unsigned long flags)
+ __releases(ce->guc_state.lock)
{
- struct intel_guc *guc = ce_to_guc(ce);
- unsigned long flags;
struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm;
intel_wakeref_t wakeref;
u16 guc_id;
+ lockdep_assert_held(&ce->guc_state.lock);
+ guc_id = prep_context_pending_disable(ce);
+
+ spin_unlock_irqrestore(&ce->guc_state.lock, flags);
+
+ with_intel_runtime_pm(runtime_pm, wakeref)
+ __guc_context_sched_disable(guc, ce, guc_id);
+}
+
+static bool bypass_sched_disable(struct intel_guc *guc,
+ struct intel_context *ce)
+{
+ lockdep_assert_held(&ce->guc_state.lock);
GEM_BUG_ON(intel_context_is_child(ce));
+ if (submission_disabled(guc) || context_guc_id_invalid(ce) ||
+ !ctx_id_mapped(guc, ce->guc_id.id)) {
+ clr_context_enabled(ce);
+ return true;
+ }
+
+ return !context_enabled(ce);
+}
+
+static void __delay_sched_disable(struct work_struct *wrk)
+{
+ struct intel_context *ce =
+ container_of(wrk, typeof(*ce), guc_state.sched_disable_delay_work.work);
+ struct intel_guc *guc = ce_to_guc(ce);
+ unsigned long flags;
+
spin_lock_irqsave(&ce->guc_state.lock, flags);
+ if (bypass_sched_disable(guc, ce)) {
+ spin_unlock_irqrestore(&ce->guc_state.lock, flags);
+ intel_context_sched_disable_unpin(ce);
+ } else {
+ do_sched_disable(guc, ce, flags);
+ }
+}
+
+static bool guc_id_pressure(struct intel_guc *guc, struct intel_context *ce)
+{
/*
- * We have to check if the context has been disabled by another thread,
- * check if submssion has been disabled to seal a race with reset and
- * finally check if any more requests have been committed to the
- * context ensursing that a request doesn't slip through the
- * 'context_pending_disable' fence.
+ * parent contexts are perma-pinned, if we are unpinning do schedule
+ * disable immediately.
*/
- if (unlikely(!context_enabled(ce) || submission_disabled(guc) ||
- context_has_committed_requests(ce))) {
- clr_context_enabled(ce);
+ if (intel_context_is_parent(ce))
+ return true;
+
+ /*
+ * If we are beyond the threshold for avail guc_ids, do schedule disable immediately.
+ */
+ return guc->submission_state.guc_ids_in_use >
+ guc->submission_state.sched_disable_gucid_threshold;
+}
+
+static void guc_context_sched_disable(struct intel_context *ce)
+{
+ struct intel_guc *guc = ce_to_guc(ce);
+ u64 delay = guc->submission_state.sched_disable_delay_ms;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ce->guc_state.lock, flags);
+
+ if (bypass_sched_disable(guc, ce)) {
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
- goto unpin;
+ intel_context_sched_disable_unpin(ce);
+ } else if (!intel_context_is_closed(ce) && !guc_id_pressure(guc, ce) &&
+ delay) {
+ spin_unlock_irqrestore(&ce->guc_state.lock, flags);
+ mod_delayed_work(system_unbound_wq,
+ &ce->guc_state.sched_disable_delay_work,
+ msecs_to_jiffies(delay));
+ } else {
+ do_sched_disable(guc, ce, flags);
}
- guc_id = prep_context_pending_disable(ce);
+}
- spin_unlock_irqrestore(&ce->guc_state.lock, flags);
+static void guc_context_close(struct intel_context *ce)
+{
+ unsigned long flags;
- with_intel_runtime_pm(runtime_pm, wakeref)
- __guc_context_sched_disable(guc, ce, guc_id);
+ if (test_bit(CONTEXT_GUC_INIT, &ce->flags) &&
+ cancel_delayed_work(&ce->guc_state.sched_disable_delay_work))
+ __delay_sched_disable(&ce->guc_state.sched_disable_delay_work.work);
- return;
-unpin:
- intel_context_sched_disable_unpin(ce);
+ spin_lock_irqsave(&ce->guc_state.lock, flags);
+ set_context_close_done(ce);
+ spin_unlock_irqrestore(&ce->guc_state.lock, flags);
}
static inline void guc_lrc_desc_unpin(struct intel_context *ce)
@@ -3071,7 +3161,6 @@ static void __guc_context_destroy(struct intel_context *ce)
ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_HIGH] ||
ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] ||
ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_NORMAL]);
- GEM_BUG_ON(ce->guc_state.number_committed_requests);
lrc_fini(ce);
intel_context_fini(ce);
@@ -3340,8 +3429,6 @@ static void remove_from_context(struct i915_request *rq)
guc_prio_fini(rq, ce);
- decr_context_committed_requests(ce);
-
spin_unlock_irq(&ce->guc_state.lock);
atomic_dec(&ce->guc_id.ref);
@@ -3351,6 +3438,8 @@ static void remove_from_context(struct i915_request *rq)
static const struct intel_context_ops guc_context_ops = {
.alloc = guc_context_alloc,
+ .close = guc_context_close,
+
.pre_pin = guc_context_pre_pin,
.pin = guc_context_pin,
.unpin = guc_context_unpin,
@@ -3433,6 +3522,10 @@ static void guc_context_init(struct intel_context *ce)
rcu_read_unlock();
ce->guc_state.prio = map_i915_prio_to_guc_prio(prio);
+
+ INIT_DELAYED_WORK(&ce->guc_state.sched_disable_delay_work,
+ __delay_sched_disable);
+
set_bit(CONTEXT_GUC_INIT, &ce->flags);
}
@@ -3471,6 +3564,26 @@ static int guc_request_alloc(struct i915_request *rq)
guc_context_init(ce);
/*
+ * If the context gets closed while the execbuf is ongoing, the context
+ * close code will race with the below code to cancel the delayed work.
+ * If the context close wins the race and cancels the work, it will
+ * immediately call the sched disable (see guc_context_close), so there
+ * is a chance we can get past this check while the sched_disable code
+ * is being executed. To make sure that code completes before we check
+ * the status further down, we wait for the close process to complete.
+ * Else, this code path could send a request down thinking that the
+ * context is still in a schedule-enable mode while the GuC ends up
+ * dropping the request completely because the disable did go from the
+ * context_close path right to GuC just prior. In the event the CT is
+ * full, we could potentially need to wait up to 1.5 seconds.
+ */
+ if (cancel_delayed_work_sync(&ce->guc_state.sched_disable_delay_work))
+ intel_context_sched_disable_unpin(ce);
+ else if (intel_context_is_closed(ce))
+ if (wait_for(context_close_done(ce), 1500))
+ drm_warn(&guc_to_gt(guc)->i915->drm,
+ "timed out waiting on context sched close before realloc\n");
+ /*
* Call pin_guc_id here rather than in the pinning step as with
* dma_resv, contexts can be repeatedly pinned / unpinned trashing the
* guc_id and creating horrible race conditions. This is especially bad
@@ -3524,7 +3637,6 @@ out:
list_add_tail(&rq->guc_fence_link, &ce->guc_state.fences);
}
- incr_context_committed_requests(ce);
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
return 0;
@@ -3600,6 +3712,8 @@ static int guc_virtual_context_alloc(struct intel_context *ce)
static const struct intel_context_ops virtual_guc_context_ops = {
.alloc = guc_virtual_context_alloc,
+ .close = guc_context_close,
+
.pre_pin = guc_virtual_context_pre_pin,
.pin = guc_virtual_context_pin,
.unpin = guc_virtual_context_unpin,
@@ -3689,6 +3803,8 @@ static void guc_child_context_destroy(struct kref *kref)
static const struct intel_context_ops virtual_parent_context_ops = {
.alloc = guc_virtual_context_alloc,
+ .close = guc_context_close,
+
.pre_pin = guc_context_pre_pin,
.pin = guc_parent_context_pin,
.unpin = guc_parent_context_unpin,
@@ -4093,7 +4209,7 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine)
engine->emit_bb_start = gen8_emit_bb_start;
if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50))
- engine->emit_bb_start = gen125_emit_bb_start;
+ engine->emit_bb_start = xehp_emit_bb_start;
}
static void rcs_submission_override(struct intel_engine_cs *engine)
@@ -4177,6 +4293,98 @@ int intel_guc_submission_setup(struct intel_engine_cs *engine)
return 0;
}
+struct scheduling_policy {
+ /* internal data */
+ u32 max_words, num_words;
+ u32 count;
+ /* API data */
+ struct guc_update_scheduling_policy h2g;
+};
+
+static u32 __guc_scheduling_policy_action_size(struct scheduling_policy *policy)
+{
+ u32 *start = (void *)&policy->h2g;
+ u32 *end = policy->h2g.data + policy->num_words;
+ size_t delta = end - start;
+
+ return delta;
+}
+
+static struct scheduling_policy *__guc_scheduling_policy_start_klv(struct scheduling_policy *policy)
+{
+ policy->h2g.header.action = INTEL_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV;
+ policy->max_words = ARRAY_SIZE(policy->h2g.data);
+ policy->num_words = 0;
+ policy->count = 0;
+
+ return policy;
+}
+
+static void __guc_scheduling_policy_add_klv(struct scheduling_policy *policy,
+ u32 action, u32 *data, u32 len)
+{
+ u32 *klv_ptr = policy->h2g.data + policy->num_words;
+
+ GEM_BUG_ON((policy->num_words + 1 + len) > policy->max_words);
+ *(klv_ptr++) = FIELD_PREP(GUC_KLV_0_KEY, action) |
+ FIELD_PREP(GUC_KLV_0_LEN, len);
+ memcpy(klv_ptr, data, sizeof(u32) * len);
+ policy->num_words += 1 + len;
+ policy->count++;
+}
+
+static int __guc_action_set_scheduling_policies(struct intel_guc *guc,
+ struct scheduling_policy *policy)
+{
+ int ret;
+
+ ret = intel_guc_send(guc, (u32 *)&policy->h2g,
+ __guc_scheduling_policy_action_size(policy));
+ if (ret < 0)
+ return ret;
+
+ if (ret != policy->count) {
+ drm_warn(&guc_to_gt(guc)->i915->drm, "GuC global scheduler policy processed %d of %d KLVs!",
+ ret, policy->count);
+ if (ret > policy->count)
+ return -EPROTO;
+ }
+
+ return 0;
+}
+
+static int guc_init_global_schedule_policy(struct intel_guc *guc)
+{
+ struct scheduling_policy policy;
+ struct intel_gt *gt = guc_to_gt(guc);
+ intel_wakeref_t wakeref;
+ int ret = 0;
+
+ if (GET_UC_VER(guc) < MAKE_UC_VER(70, 3, 0))
+ return 0;
+
+ __guc_scheduling_policy_start_klv(&policy);
+
+ with_intel_runtime_pm(&gt->i915->runtime_pm, wakeref) {
+ u32 yield[] = {
+ GLOBAL_SCHEDULE_POLICY_RC_YIELD_DURATION,
+ GLOBAL_SCHEDULE_POLICY_RC_YIELD_RATIO,
+ };
+
+ __guc_scheduling_policy_add_klv(&policy,
+ GUC_SCHEDULING_POLICIES_KLV_ID_RENDER_COMPUTE_YIELD,
+ yield, ARRAY_SIZE(yield));
+
+ ret = __guc_action_set_scheduling_policies(guc, &policy);
+ if (ret)
+ i915_probe_error(gt->i915,
+ "Failed to configure global scheduling policies: %pe!\n",
+ ERR_PTR(ret));
+ }
+
+ return ret;
+}
+
void intel_guc_submission_enable(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
@@ -4189,6 +4397,7 @@ void intel_guc_submission_enable(struct intel_guc *guc)
guc_init_lrc_mapping(guc);
guc_init_engine_stats(guc);
+ guc_init_global_schedule_policy(guc);
}
void intel_guc_submission_disable(struct intel_guc *guc)
@@ -4219,6 +4428,26 @@ static bool __guc_submission_selected(struct intel_guc *guc)
return i915->params.enable_guc & ENABLE_GUC_SUBMISSION;
}
+int intel_guc_sched_disable_gucid_threshold_max(struct intel_guc *guc)
+{
+ return guc->submission_state.num_guc_ids - NUMBER_MULTI_LRC_GUC_ID(guc);
+}
+
+/*
+ * This default value of 33 milisecs (+1 milisec round up) ensures 30fps or higher
+ * workloads are able to enjoy the latency reduction when delaying the schedule-disable
+ * operation. This matches the 30fps game-render + encode (real world) workload this
+ * knob was tested against.
+ */
+#define SCHED_DISABLE_DELAY_MS 34
+
+/*
+ * A threshold of 75% is a reasonable starting point considering that real world apps
+ * generally don't get anywhere near this.
+ */
+#define NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(__guc) \
+ (((intel_guc_sched_disable_gucid_threshold_max(guc)) * 3) / 4)
+
void intel_guc_submission_init_early(struct intel_guc *guc)
{
xa_init_flags(&guc->context_lookup, XA_FLAGS_LOCK_IRQ);
@@ -4235,7 +4464,10 @@ void intel_guc_submission_init_early(struct intel_guc *guc)
spin_lock_init(&guc->timestamp.lock);
INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping);
+ guc->submission_state.sched_disable_delay_ms = SCHED_DISABLE_DELAY_MS;
guc->submission_state.num_guc_ids = GUC_MAX_CONTEXT_ID;
+ guc->submission_state.sched_disable_gucid_threshold =
+ NUM_SCHED_DISABLE_GUCIDS_DEFAULT_THRESHOLD(guc);
guc->submission_supported = __guc_submission_supported(guc);
guc->submission_selected = __guc_submission_selected(guc);
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
index 3bb8838e325a..fbc8bae14f76 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
@@ -10,6 +10,9 @@
#include "intel_huc.h"
#include "i915_drv.h"
+#include <linux/device/bus.h>
+#include <linux/mei_aux.h>
+
/**
* DOC: HuC
*
@@ -42,6 +45,172 @@
* HuC-specific commands.
*/
+/*
+ * MEI-GSC load is an async process. The probing of the exposed aux device
+ * (see intel_gsc.c) usually happens a few seconds after i915 probe, depending
+ * on when the kernel schedules it. Unless something goes terribly wrong, we're
+ * guaranteed for this to happen during boot, so the big timeout is a safety net
+ * that we never expect to need.
+ * MEI-PXP + HuC load usually takes ~300ms, but if the GSC needs to be resumed
+ * and/or reset, this can take longer. Note that the kernel might schedule
+ * other work between the i915 init/resume and the MEI one, which can add to
+ * the delay.
+ */
+#define GSC_INIT_TIMEOUT_MS 10000
+#define PXP_INIT_TIMEOUT_MS 5000
+
+static int sw_fence_dummy_notify(struct i915_sw_fence *sf,
+ enum i915_sw_fence_notify state)
+{
+ return NOTIFY_DONE;
+}
+
+static void __delayed_huc_load_complete(struct intel_huc *huc)
+{
+ if (!i915_sw_fence_done(&huc->delayed_load.fence))
+ i915_sw_fence_complete(&huc->delayed_load.fence);
+}
+
+static void delayed_huc_load_complete(struct intel_huc *huc)
+{
+ hrtimer_cancel(&huc->delayed_load.timer);
+ __delayed_huc_load_complete(huc);
+}
+
+static void __gsc_init_error(struct intel_huc *huc)
+{
+ huc->delayed_load.status = INTEL_HUC_DELAYED_LOAD_ERROR;
+ __delayed_huc_load_complete(huc);
+}
+
+static void gsc_init_error(struct intel_huc *huc)
+{
+ hrtimer_cancel(&huc->delayed_load.timer);
+ __gsc_init_error(huc);
+}
+
+static void gsc_init_done(struct intel_huc *huc)
+{
+ hrtimer_cancel(&huc->delayed_load.timer);
+
+ /* MEI-GSC init is done, now we wait for MEI-PXP to bind */
+ huc->delayed_load.status = INTEL_HUC_WAITING_ON_PXP;
+ if (!i915_sw_fence_done(&huc->delayed_load.fence))
+ hrtimer_start(&huc->delayed_load.timer,
+ ms_to_ktime(PXP_INIT_TIMEOUT_MS),
+ HRTIMER_MODE_REL);
+}
+
+static enum hrtimer_restart huc_delayed_load_timer_callback(struct hrtimer *hrtimer)
+{
+ struct intel_huc *huc = container_of(hrtimer, struct intel_huc, delayed_load.timer);
+
+ if (!intel_huc_is_authenticated(huc)) {
+ if (huc->delayed_load.status == INTEL_HUC_WAITING_ON_GSC)
+ drm_notice(&huc_to_gt(huc)->i915->drm,
+ "timed out waiting for MEI GSC init to load HuC\n");
+ else if (huc->delayed_load.status == INTEL_HUC_WAITING_ON_PXP)
+ drm_notice(&huc_to_gt(huc)->i915->drm,
+ "timed out waiting for MEI PXP init to load HuC\n");
+ else
+ MISSING_CASE(huc->delayed_load.status);
+
+ __gsc_init_error(huc);
+ }
+
+ return HRTIMER_NORESTART;
+}
+
+static void huc_delayed_load_start(struct intel_huc *huc)
+{
+ ktime_t delay;
+
+ GEM_BUG_ON(intel_huc_is_authenticated(huc));
+
+ /*
+ * On resume we don't have to wait for MEI-GSC to be re-probed, but we
+ * do need to wait for MEI-PXP to reset & re-bind
+ */
+ switch (huc->delayed_load.status) {
+ case INTEL_HUC_WAITING_ON_GSC:
+ delay = ms_to_ktime(GSC_INIT_TIMEOUT_MS);
+ break;
+ case INTEL_HUC_WAITING_ON_PXP:
+ delay = ms_to_ktime(PXP_INIT_TIMEOUT_MS);
+ break;
+ default:
+ gsc_init_error(huc);
+ return;
+ }
+
+ /*
+ * This fence is always complete unless we're waiting for the
+ * GSC device to come up to load the HuC. We arm the fence here
+ * and complete it when we confirm that the HuC is loaded from
+ * the PXP bind callback.
+ */
+ GEM_BUG_ON(!i915_sw_fence_done(&huc->delayed_load.fence));
+ i915_sw_fence_fini(&huc->delayed_load.fence);
+ i915_sw_fence_reinit(&huc->delayed_load.fence);
+ i915_sw_fence_await(&huc->delayed_load.fence);
+ i915_sw_fence_commit(&huc->delayed_load.fence);
+
+ hrtimer_start(&huc->delayed_load.timer, delay, HRTIMER_MODE_REL);
+}
+
+static int gsc_notifier(struct notifier_block *nb, unsigned long action, void *data)
+{
+ struct device *dev = data;
+ struct intel_huc *huc = container_of(nb, struct intel_huc, delayed_load.nb);
+ struct intel_gsc_intf *intf = &huc_to_gt(huc)->gsc.intf[0];
+
+ if (!intf->adev || &intf->adev->aux_dev.dev != dev)
+ return 0;
+
+ switch (action) {
+ case BUS_NOTIFY_BOUND_DRIVER: /* mei driver bound to aux device */
+ gsc_init_done(huc);
+ break;
+
+ case BUS_NOTIFY_DRIVER_NOT_BOUND: /* mei driver fails to be bound */
+ case BUS_NOTIFY_UNBIND_DRIVER: /* mei driver about to be unbound */
+ drm_info(&huc_to_gt(huc)->i915->drm,
+ "mei driver not bound, disabling HuC load\n");
+ gsc_init_error(huc);
+ break;
+ }
+
+ return 0;
+}
+
+void intel_huc_register_gsc_notifier(struct intel_huc *huc, struct bus_type *bus)
+{
+ int ret;
+
+ if (!intel_huc_is_loaded_by_gsc(huc))
+ return;
+
+ huc->delayed_load.nb.notifier_call = gsc_notifier;
+ ret = bus_register_notifier(bus, &huc->delayed_load.nb);
+ if (ret) {
+ drm_err(&huc_to_gt(huc)->i915->drm,
+ "failed to register GSC notifier\n");
+ huc->delayed_load.nb.notifier_call = NULL;
+ gsc_init_error(huc);
+ }
+}
+
+void intel_huc_unregister_gsc_notifier(struct intel_huc *huc, struct bus_type *bus)
+{
+ if (!huc->delayed_load.nb.notifier_call)
+ return;
+
+ delayed_huc_load_complete(huc);
+
+ bus_unregister_notifier(bus, &huc->delayed_load.nb);
+ huc->delayed_load.nb.notifier_call = NULL;
+}
+
void intel_huc_init_early(struct intel_huc *huc)
{
struct drm_i915_private *i915 = huc_to_gt(huc)->i915;
@@ -57,6 +226,17 @@ void intel_huc_init_early(struct intel_huc *huc)
huc->status.mask = HUC_FW_VERIFIED;
huc->status.value = HUC_FW_VERIFIED;
}
+
+ /*
+ * Initialize fence to be complete as this is expected to be complete
+ * unless there is a delayed HuC reload in progress.
+ */
+ i915_sw_fence_init(&huc->delayed_load.fence,
+ sw_fence_dummy_notify);
+ i915_sw_fence_commit(&huc->delayed_load.fence);
+
+ hrtimer_init(&huc->delayed_load.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ huc->delayed_load.timer.function = huc_delayed_load_timer_callback;
}
#define HUC_LOAD_MODE_STRING(x) (x ? "GSC" : "legacy")
@@ -113,6 +293,7 @@ int intel_huc_init(struct intel_huc *huc)
return 0;
out:
+ intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_INIT_FAIL);
drm_info(&i915->drm, "HuC init failed with %d\n", err);
return err;
}
@@ -122,9 +303,50 @@ void intel_huc_fini(struct intel_huc *huc)
if (!intel_uc_fw_is_loadable(&huc->fw))
return;
+ delayed_huc_load_complete(huc);
+
+ i915_sw_fence_fini(&huc->delayed_load.fence);
intel_uc_fw_fini(&huc->fw);
}
+void intel_huc_suspend(struct intel_huc *huc)
+{
+ if (!intel_uc_fw_is_loadable(&huc->fw))
+ return;
+
+ /*
+ * in the unlikely case that we're suspending before the GSC has
+ * completed its loading sequence, just stop waiting. We'll restart
+ * on resume.
+ */
+ delayed_huc_load_complete(huc);
+}
+
+int intel_huc_wait_for_auth_complete(struct intel_huc *huc)
+{
+ struct intel_gt *gt = huc_to_gt(huc);
+ int ret;
+
+ ret = __intel_wait_for_register(gt->uncore,
+ huc->status.reg,
+ huc->status.mask,
+ huc->status.value,
+ 2, 50, NULL);
+
+ /* mark the load process as complete even if the wait failed */
+ delayed_huc_load_complete(huc);
+
+ if (ret) {
+ drm_err(&gt->i915->drm, "HuC: Firmware not verified %d\n", ret);
+ intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_LOAD_FAIL);
+ return ret;
+ }
+
+ intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_RUNNING);
+ drm_info(&gt->i915->drm, "HuC authenticated\n");
+ return 0;
+}
+
/**
* intel_huc_auth() - Authenticate HuC uCode
* @huc: intel_huc structure
@@ -161,27 +383,18 @@ int intel_huc_auth(struct intel_huc *huc)
}
/* Check authentication status, it should be done by now */
- ret = __intel_wait_for_register(gt->uncore,
- huc->status.reg,
- huc->status.mask,
- huc->status.value,
- 2, 50, NULL);
- if (ret) {
- DRM_ERROR("HuC: Firmware not verified %d\n", ret);
+ ret = intel_huc_wait_for_auth_complete(huc);
+ if (ret)
goto fail;
- }
- intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_RUNNING);
- drm_info(&gt->i915->drm, "HuC authenticated\n");
return 0;
fail:
i915_probe_error(gt->i915, "HuC: Authentication failed %d\n", ret);
- intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_LOAD_FAIL);
return ret;
}
-static bool huc_is_authenticated(struct intel_huc *huc)
+bool intel_huc_is_authenticated(struct intel_huc *huc)
{
struct intel_gt *gt = huc_to_gt(huc);
intel_wakeref_t wakeref;
@@ -200,13 +413,8 @@ static bool huc_is_authenticated(struct intel_huc *huc)
* This function reads status register to verify if HuC
* firmware was successfully loaded.
*
- * Returns:
- * * -ENODEV if HuC is not present on this platform,
- * * -EOPNOTSUPP if HuC firmware is disabled,
- * * -ENOPKG if HuC firmware was not installed,
- * * -ENOEXEC if HuC firmware is invalid or mismatched,
- * * 0 if HuC firmware is not running,
- * * 1 if HuC firmware is authenticated and running.
+ * The return values match what is expected for the I915_PARAM_HUC_STATUS
+ * getparam.
*/
int intel_huc_check_status(struct intel_huc *huc)
{
@@ -219,11 +427,21 @@ int intel_huc_check_status(struct intel_huc *huc)
return -ENOPKG;
case INTEL_UC_FIRMWARE_ERROR:
return -ENOEXEC;
+ case INTEL_UC_FIRMWARE_INIT_FAIL:
+ return -ENOMEM;
+ case INTEL_UC_FIRMWARE_LOAD_FAIL:
+ return -EIO;
default:
break;
}
- return huc_is_authenticated(huc);
+ return intel_huc_is_authenticated(huc);
+}
+
+static bool huc_has_delayed_load(struct intel_huc *huc)
+{
+ return intel_huc_is_loaded_by_gsc(huc) &&
+ (huc->delayed_load.status != INTEL_HUC_DELAYED_LOAD_ERROR);
}
void intel_huc_update_auth_status(struct intel_huc *huc)
@@ -231,9 +449,11 @@ void intel_huc_update_auth_status(struct intel_huc *huc)
if (!intel_uc_fw_is_loadable(&huc->fw))
return;
- if (huc_is_authenticated(huc))
+ if (intel_huc_is_authenticated(huc))
intel_uc_fw_change_status(&huc->fw,
INTEL_UC_FIRMWARE_RUNNING);
+ else if (huc_has_delayed_load(huc))
+ huc_delayed_load_start(huc);
}
/**
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.h b/drivers/gpu/drm/i915/gt/uc/intel_huc.h
index d7e25b6e879e..52db03620c60 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.h
@@ -7,9 +7,21 @@
#define _INTEL_HUC_H_
#include "i915_reg_defs.h"
+#include "i915_sw_fence.h"
#include "intel_uc_fw.h"
#include "intel_huc_fw.h"
+#include <linux/notifier.h>
+#include <linux/hrtimer.h>
+
+struct bus_type;
+
+enum intel_huc_delayed_load_status {
+ INTEL_HUC_WAITING_ON_GSC = 0,
+ INTEL_HUC_WAITING_ON_PXP,
+ INTEL_HUC_DELAYED_LOAD_ERROR,
+};
+
struct intel_huc {
/* Generic uC firmware management */
struct intel_uc_fw fw;
@@ -20,14 +32,27 @@ struct intel_huc {
u32 mask;
u32 value;
} status;
+
+ struct {
+ struct i915_sw_fence fence;
+ struct hrtimer timer;
+ struct notifier_block nb;
+ enum intel_huc_delayed_load_status status;
+ } delayed_load;
};
void intel_huc_init_early(struct intel_huc *huc);
int intel_huc_init(struct intel_huc *huc);
void intel_huc_fini(struct intel_huc *huc);
+void intel_huc_suspend(struct intel_huc *huc);
int intel_huc_auth(struct intel_huc *huc);
+int intel_huc_wait_for_auth_complete(struct intel_huc *huc);
int intel_huc_check_status(struct intel_huc *huc);
void intel_huc_update_auth_status(struct intel_huc *huc);
+bool intel_huc_is_authenticated(struct intel_huc *huc);
+
+void intel_huc_register_gsc_notifier(struct intel_huc *huc, struct bus_type *bus);
+void intel_huc_unregister_gsc_notifier(struct intel_huc *huc, struct bus_type *bus);
static inline int intel_huc_sanitize(struct intel_huc *huc)
{
@@ -56,6 +81,12 @@ static inline bool intel_huc_is_loaded_by_gsc(const struct intel_huc *huc)
return huc->fw.loaded_via_gsc;
}
+static inline bool intel_huc_wait_required(struct intel_huc *huc)
+{
+ return intel_huc_is_used(huc) && intel_huc_is_loaded_by_gsc(huc) &&
+ !intel_huc_is_authenticated(huc);
+}
+
void intel_huc_load_status(struct intel_huc *huc, struct drm_printer *p);
#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
index 9d6ab1e01639..4f246416db17 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.c
@@ -3,9 +3,43 @@
* Copyright © 2014-2019 Intel Corporation
*/
+#include "gt/intel_gsc.h"
#include "gt/intel_gt.h"
+#include "intel_huc.h"
#include "intel_huc_fw.h"
#include "i915_drv.h"
+#include "pxp/intel_pxp_huc.h"
+
+int intel_huc_fw_load_and_auth_via_gsc(struct intel_huc *huc)
+{
+ int ret;
+
+ if (!intel_huc_is_loaded_by_gsc(huc))
+ return -ENODEV;
+
+ if (!intel_uc_fw_is_loadable(&huc->fw))
+ return -ENOEXEC;
+
+ /*
+ * If we abort a suspend, HuC might still be loaded when the mei
+ * component gets re-bound and this function called again. If so, just
+ * mark the HuC as loaded.
+ */
+ if (intel_huc_is_authenticated(huc)) {
+ intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_RUNNING);
+ return 0;
+ }
+
+ GEM_WARN_ON(intel_uc_fw_is_loaded(&huc->fw));
+
+ ret = intel_pxp_huc_load_and_auth(&huc_to_gt(huc)->pxp);
+ if (ret)
+ return ret;
+
+ intel_uc_fw_change_status(&huc->fw, INTEL_UC_FIRMWARE_TRANSFERRED);
+
+ return intel_huc_wait_for_auth_complete(huc);
+}
/**
* intel_huc_fw_upload() - load HuC uCode to device via DMA transfer
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h
index 12f264ee3e0b..db42e238b45f 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc_fw.h
@@ -8,6 +8,7 @@
struct intel_huc;
+int intel_huc_fw_load_and_auth_via_gsc(struct intel_huc *huc);
int intel_huc_fw_upload(struct intel_huc *huc);
#endif
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index b91ad4aede1f..de2843dc1307 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -93,7 +93,8 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
fw_def(BROXTON, 0, guc_mmp(bxt, 70, 1, 1)) \
fw_def(SKYLAKE, 0, guc_mmp(skl, 70, 1, 1))
-#define INTEL_HUC_FIRMWARE_DEFS(fw_def, huc_raw, huc_mmp) \
+#define INTEL_HUC_FIRMWARE_DEFS(fw_def, huc_raw, huc_mmp, huc_gsc) \
+ fw_def(DG2, 0, huc_gsc(dg2)) \
fw_def(ALDERLAKE_P, 0, huc_raw(tgl)) \
fw_def(ALDERLAKE_P, 0, huc_mmp(tgl, 7, 9, 3)) \
fw_def(ALDERLAKE_S, 0, huc_raw(tgl)) \
@@ -141,6 +142,9 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
#define MAKE_HUC_FW_PATH_BLANK(prefix_) \
__MAKE_UC_FW_PATH_BLANK(prefix_, "_huc")
+#define MAKE_HUC_FW_PATH_GSC(prefix_) \
+ __MAKE_UC_FW_PATH_BLANK(prefix_, "_huc_gsc")
+
#define MAKE_HUC_FW_PATH_MMP(prefix_, major_, minor_, patch_) \
__MAKE_UC_FW_PATH_MMP(prefix_, "_huc_", major_, minor_, patch_)
@@ -153,7 +157,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
MODULE_FIRMWARE(uc_);
INTEL_GUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH_MAJOR, MAKE_GUC_FW_PATH_MMP)
-INTEL_HUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_HUC_FW_PATH_BLANK, MAKE_HUC_FW_PATH_MMP)
+INTEL_HUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_HUC_FW_PATH_BLANK, MAKE_HUC_FW_PATH_MMP, MAKE_HUC_FW_PATH_GSC)
/*
* The next expansion of the table macros (in __uc_fw_auto_select below) provides
@@ -168,6 +172,7 @@ struct __packed uc_fw_blob {
u8 major;
u8 minor;
u8 patch;
+ bool loaded_via_gsc;
};
#define UC_FW_BLOB_BASE(major_, minor_, patch_, path_) \
@@ -176,16 +181,16 @@ struct __packed uc_fw_blob {
.patch = patch_, \
.path = path_,
-#define UC_FW_BLOB_NEW(major_, minor_, patch_, path_) \
+#define UC_FW_BLOB_NEW(major_, minor_, patch_, gsc_, path_) \
{ UC_FW_BLOB_BASE(major_, minor_, patch_, path_) \
- .legacy = false }
+ .legacy = false, .loaded_via_gsc = gsc_ }
#define UC_FW_BLOB_OLD(major_, minor_, patch_, path_) \
{ UC_FW_BLOB_BASE(major_, minor_, patch_, path_) \
.legacy = true }
#define GUC_FW_BLOB(prefix_, major_, minor_) \
- UC_FW_BLOB_NEW(major_, minor_, 0, \
+ UC_FW_BLOB_NEW(major_, minor_, 0, false, \
MAKE_GUC_FW_PATH_MAJOR(prefix_, major_, minor_))
#define GUC_FW_BLOB_MMP(prefix_, major_, minor_, patch_) \
@@ -193,12 +198,15 @@ struct __packed uc_fw_blob {
MAKE_GUC_FW_PATH_MMP(prefix_, major_, minor_, patch_))
#define HUC_FW_BLOB(prefix_) \
- UC_FW_BLOB_NEW(0, 0, 0, MAKE_HUC_FW_PATH_BLANK(prefix_))
+ UC_FW_BLOB_NEW(0, 0, 0, false, MAKE_HUC_FW_PATH_BLANK(prefix_))
#define HUC_FW_BLOB_MMP(prefix_, major_, minor_, patch_) \
UC_FW_BLOB_OLD(major_, minor_, patch_, \
MAKE_HUC_FW_PATH_MMP(prefix_, major_, minor_, patch_))
+#define HUC_FW_BLOB_GSC(prefix_) \
+ UC_FW_BLOB_NEW(0, 0, 0, true, MAKE_HUC_FW_PATH_GSC(prefix_))
+
struct __packed uc_fw_platform_requirement {
enum intel_platform p;
u8 rev; /* first platform rev using this FW */
@@ -224,7 +232,7 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw)
INTEL_GUC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB, GUC_FW_BLOB_MMP)
};
static const struct uc_fw_platform_requirement blobs_huc[] = {
- INTEL_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB, HUC_FW_BLOB_MMP)
+ INTEL_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB, HUC_FW_BLOB_MMP, HUC_FW_BLOB_GSC)
};
static const struct fw_blobs_by_type blobs_all[INTEL_UC_FW_NUM_TYPES] = {
[INTEL_UC_FW_TYPE_GUC] = { blobs_guc, ARRAY_SIZE(blobs_guc) },
@@ -272,6 +280,7 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw)
uc_fw->file_wanted.path = blob->path;
uc_fw->file_wanted.major_ver = blob->major;
uc_fw->file_wanted.minor_ver = blob->minor;
+ uc_fw->loaded_via_gsc = blob->loaded_via_gsc;
found = true;
break;
}
@@ -904,7 +913,6 @@ int intel_uc_fw_init(struct intel_uc_fw *uc_fw)
out_unpin:
i915_gem_object_unpin_pages(uc_fw->obj);
out:
- intel_uc_fw_change_status(uc_fw, INTEL_UC_FIRMWARE_INIT_FAIL);
return err;
}
diff --git a/drivers/gpu/drm/i915/gvt/cfg_space.c b/drivers/gpu/drm/i915/gvt/cfg_space.c
index eef3bba8a41b..357c5b65e097 100644
--- a/drivers/gpu/drm/i915/gvt/cfg_space.c
+++ b/drivers/gpu/drm/i915/gvt/cfg_space.c
@@ -354,9 +354,9 @@ void intel_vgpu_init_cfg_space(struct intel_vgpu *vgpu,
memset(vgpu_cfg_space(vgpu) + INTEL_GVT_PCI_OPREGION, 0, 4);
vgpu->cfg_space.bar[INTEL_GVT_PCI_BAR_GTTMMIO].size =
- pci_resource_len(pdev, GTTMMADR_BAR);
+ pci_resource_len(pdev, GEN4_GTTMMADR_BAR);
vgpu->cfg_space.bar[INTEL_GVT_PCI_BAR_APERTURE].size =
- pci_resource_len(pdev, GTT_APERTURE_BAR);
+ pci_resource_len(pdev, GEN4_GMADR_BAR);
memset(vgpu_cfg_space(vgpu) + PCI_ROM_ADDRESS, 0, 4);
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index daac2050d77d..1cb388484bf0 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -734,7 +734,7 @@ static i915_reg_t force_nonpriv_white_list[] = {
_MMIO(0x770c),
_MMIO(0x83a8),
_MMIO(0xb110),
- GEN8_L3SQCREG4,//_MMIO(0xb118)
+ _MMIO(0xb118),
_MMIO(0xe100),
_MMIO(0xe18c),
_MMIO(0xe48c),
@@ -2257,7 +2257,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
MMIO_DFH(_MMIO(0x2438), D_ALL, F_CMD_ACCESS, NULL, NULL);
MMIO_DFH(_MMIO(0x243c), D_ALL, F_CMD_ACCESS, NULL, NULL);
MMIO_DFH(_MMIO(0x7018), D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
- MMIO_DFH(HALF_SLICE_CHICKEN3, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+ MMIO_DFH(HSW_HALF_SLICE_CHICKEN3, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
MMIO_DFH(GEN7_HALF_SLICE_CHICKEN1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
/* display */
diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
index 1c6e941c9666..200c1162daa3 100644
--- a/drivers/gpu/drm/i915/gvt/mmio_context.c
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
@@ -106,15 +106,15 @@ static struct engine_mmio gen9_engine_mmio_list[] __cacheline_aligned = {
{RCS0, GEN8_CS_CHICKEN1, 0xffff, true}, /* 0x2580 */
{RCS0, COMMON_SLICE_CHICKEN2, 0xffff, true}, /* 0x7014 */
{RCS0, GEN9_CS_DEBUG_MODE1, 0xffff, false}, /* 0x20ec */
- {RCS0, GEN8_L3SQCREG4, 0, false}, /* 0xb118 */
- {RCS0, GEN9_SCRATCH1, 0, false}, /* 0xb11c */
+ {RCS0, _MMIO(0xb118), 0, false}, /* GEN8_L3SQCREG4 */
+ {RCS0, _MMIO(0xb11c), 0, false}, /* GEN9_SCRATCH1 */
{RCS0, GEN9_SCRATCH_LNCF1, 0, false}, /* 0xb008 */
{RCS0, GEN7_HALF_SLICE_CHICKEN1, 0xffff, true}, /* 0xe100 */
- {RCS0, HALF_SLICE_CHICKEN2, 0xffff, true}, /* 0xe180 */
- {RCS0, HALF_SLICE_CHICKEN3, 0xffff, true}, /* 0xe184 */
- {RCS0, GEN9_HALF_SLICE_CHICKEN5, 0xffff, true}, /* 0xe188 */
- {RCS0, GEN9_HALF_SLICE_CHICKEN7, 0xffff, true}, /* 0xe194 */
- {RCS0, GEN8_ROW_CHICKEN, 0xffff, true}, /* 0xe4f0 */
+ {RCS0, _MMIO(0xe180), 0xffff, true}, /* HALF_SLICE_CHICKEN2 */
+ {RCS0, _MMIO(0xe184), 0xffff, true}, /* GEN8_HALF_SLICE_CHICKEN3 */
+ {RCS0, _MMIO(0xe188), 0xffff, true}, /* GEN9_HALF_SLICE_CHICKEN5 */
+ {RCS0, _MMIO(0xe194), 0xffff, true}, /* GEN9_HALF_SLICE_CHICKEN7 */
+ {RCS0, _MMIO(0xe4f0), 0xffff, true}, /* GEN8_ROW_CHICKEN */
{RCS0, TRVATTL3PTRDW(0), 0, true}, /* 0x4de0 */
{RCS0, TRVATTL3PTRDW(1), 0, true}, /* 0x4de4 */
{RCS0, TRNULLDETCT, 0, true}, /* 0x4de8 */
diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
index 298ed36f078a..c3d43f9b1e45 100644
--- a/drivers/gpu/drm/i915/i915_driver.c
+++ b/drivers/gpu/drm/i915/i915_driver.c
@@ -81,6 +81,7 @@
#include "i915_drm_client.h"
#include "i915_drv.h"
#include "i915_getparam.h"
+#include "i915_hwmon.h"
#include "i915_ioc32.h"
#include "i915_ioctl.h"
#include "i915_irq.h"
@@ -764,6 +765,8 @@ static void i915_driver_register(struct drm_i915_private *dev_priv)
for_each_gt(gt, dev_priv, i)
intel_gt_driver_register(gt);
+ i915_hwmon_register(dev_priv);
+
intel_display_driver_register(dev_priv);
intel_power_domains_enable(dev_priv);
@@ -796,6 +799,8 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv)
for_each_gt(gt, dev_priv, i)
intel_gt_driver_unregister(gt);
+ i915_hwmon_unregister(dev_priv);
+
i915_perf_unregister(dev_priv);
i915_pmu_unregister(dev_priv);
@@ -1656,7 +1661,8 @@ static int intel_runtime_suspend(struct device *kdev)
intel_runtime_pm_enable_interrupts(dev_priv);
- intel_gt_runtime_resume(to_gt(dev_priv));
+ for_each_gt(gt, dev_priv, i)
+ intel_gt_runtime_resume(gt);
enable_rpm_wakeref_asserts(rpm);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 349ff7d65deb..05b3300cc4ed 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -40,7 +40,6 @@
#include "display/intel_display_core.h"
#include "gem/i915_gem_context_types.h"
-#include "gem/i915_gem_lmem.h"
#include "gem/i915_gem_shrinker.h"
#include "gem/i915_gem_stolen.h"
@@ -350,6 +349,8 @@ struct drm_i915_private {
struct i915_perf perf;
+ struct i915_hwmon *hwmon;
+
/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
struct intel_gt gt0;
@@ -898,19 +899,17 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
#define HAS_RUNTIME_PM(dev_priv) (INTEL_INFO(dev_priv)->has_runtime_pm)
#define HAS_64BIT_RELOC(dev_priv) (INTEL_INFO(dev_priv)->has_64bit_reloc)
+#define HAS_OA_BPC_REPORTING(dev_priv) \
+ (INTEL_INFO(dev_priv)->has_oa_bpc_reporting)
+#define HAS_OA_SLICE_CONTRIB_LIMITS(dev_priv) \
+ (INTEL_INFO(dev_priv)->has_oa_slice_contrib_limits)
+
/*
* Set this flag, when platform requires 64K GTT page sizes or larger for
* device local memory access.
*/
#define HAS_64K_PAGES(dev_priv) (INTEL_INFO(dev_priv)->has_64k_pages)
-/*
- * Set this flag when platform doesn't allow both 64k pages and 4k pages in
- * the same PT. this flag means we need to support compact PT layout for the
- * ppGTT when using the 64K GTT pages.
- */
-#define NEEDS_COMPACT_PT(dev_priv) (INTEL_INFO(dev_priv)->needs_compact_pt)
-
#define HAS_IPC(dev_priv) (INTEL_INFO(dev_priv)->display.has_ipc)
#define HAS_REGION(i915, i) (RUNTIME_INFO(i915)->memory_regions & (i))
@@ -976,6 +975,9 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
#define HAS_ONE_EU_PER_FUSE_BIT(i915) (INTEL_INFO(i915)->has_one_eu_per_fuse_bit)
+#define HAS_LMEMBAR_SMEM_STOLEN(i915) (!HAS_LMEM(i915) && \
+ GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
+
/* intel_device_info.c */
static inline struct intel_device_info *
mkwrite_device_info(struct drm_i915_private *dev_priv)
@@ -983,16 +985,4 @@ mkwrite_device_info(struct drm_i915_private *dev_priv)
return (struct intel_device_info *)INTEL_INFO(dev_priv);
}
-static inline enum i915_map_type
-i915_coherent_map_type(struct drm_i915_private *i915,
- struct drm_i915_gem_object *obj, bool always_coherent)
-{
- if (i915_gem_object_is_lmem(obj))
- return I915_MAP_WC;
- if (HAS_LLC(i915) || always_coherent)
- return I915_MAP_WB;
- else
- return I915_MAP_WC;
-}
-
#endif
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2bdddb61ebd7..299f94a9fb87 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -843,7 +843,7 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915)
__i915_gem_object_release_mmap_gtt(obj);
list_for_each_entry_safe(obj, on,
- &to_gt(i915)->lmem_userfault_list, userfault_link)
+ &i915->runtime_pm.lmem_userfault_list, userfault_link)
i915_gem_object_runtime_pm_release_mmap_offset(obj);
/*
@@ -1128,6 +1128,8 @@ void i915_gem_drain_workqueue(struct drm_i915_private *i915)
int i915_gem_init(struct drm_i915_private *dev_priv)
{
+ struct intel_gt *gt;
+ unsigned int i;
int ret;
/* We need to fallback to 4K pages if host doesn't support huge gtt. */
@@ -1158,9 +1160,11 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
*/
intel_init_clock_gating(dev_priv);
- ret = intel_gt_init(to_gt(dev_priv));
- if (ret)
- goto err_unlock;
+ for_each_gt(gt, dev_priv, i) {
+ ret = intel_gt_init(gt);
+ if (ret)
+ goto err_unlock;
+ }
return 0;
@@ -1173,8 +1177,13 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
err_unlock:
i915_gem_drain_workqueue(dev_priv);
- if (ret != -EIO)
- intel_uc_cleanup_firmwares(&to_gt(dev_priv)->uc);
+ if (ret != -EIO) {
+ for_each_gt(gt, dev_priv, i) {
+ intel_gt_driver_remove(gt);
+ intel_gt_driver_release(gt);
+ intel_uc_cleanup_firmwares(&gt->uc);
+ }
+ }
if (ret == -EIO) {
/*
@@ -1182,10 +1191,12 @@ err_unlock:
* as wedged. But we only want to do this when the GPU is angry,
* for all other failure, such as an allocation failure, bail.
*/
- if (!intel_gt_is_wedged(to_gt(dev_priv))) {
- i915_probe_error(dev_priv,
- "Failed to initialize GPU, declaring it wedged!\n");
- intel_gt_set_wedged(to_gt(dev_priv));
+ for_each_gt(gt, dev_priv, i) {
+ if (!intel_gt_is_wedged(gt)) {
+ i915_probe_error(dev_priv,
+ "Failed to initialize GPU, declaring it wedged!\n");
+ intel_gt_set_wedged(gt);
+ }
}
/* Minimal basic recovery for KMS */
@@ -1213,23 +1224,27 @@ void i915_gem_driver_unregister(struct drm_i915_private *i915)
void i915_gem_driver_remove(struct drm_i915_private *dev_priv)
{
- intel_wakeref_auto_fini(&to_gt(dev_priv)->userfault_wakeref);
+ struct intel_gt *gt;
+ unsigned int i;
i915_gem_suspend_late(dev_priv);
- intel_gt_driver_remove(to_gt(dev_priv));
+ for_each_gt(gt, dev_priv, i)
+ intel_gt_driver_remove(gt);
dev_priv->uabi_engines = RB_ROOT;
/* Flush any outstanding unpin_work. */
i915_gem_drain_workqueue(dev_priv);
-
- i915_gem_drain_freed_objects(dev_priv);
}
void i915_gem_driver_release(struct drm_i915_private *dev_priv)
{
- intel_gt_driver_release(to_gt(dev_priv));
+ struct intel_gt *gt;
+ unsigned int i;
- intel_uc_cleanup_firmwares(&to_gt(dev_priv)->uc);
+ for_each_gt(gt, dev_priv, i) {
+ intel_gt_driver_release(gt);
+ intel_uc_cleanup_firmwares(&gt->uc);
+ }
/* Flush any outstanding work, including i915_gem_context.release_work. */
i915_gem_drain_workqueue(dev_priv);
@@ -1259,7 +1274,7 @@ void i915_gem_init_early(struct drm_i915_private *dev_priv)
void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)
{
- i915_gem_drain_freed_objects(dev_priv);
+ i915_gem_drain_workqueue(dev_priv);
GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list));
GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count));
drm_WARN_ON(&dev_priv->drm, dev_priv->mm.shrink_count);
diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
index 342c8ca6414e..3047e80e1163 100644
--- a/drivers/gpu/drm/i915/i915_getparam.c
+++ b/drivers/gpu/drm/i915/i915_getparam.c
@@ -175,6 +175,9 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
case I915_PARAM_PERF_REVISION:
value = i915_perf_ioctl_version();
break;
+ case I915_PARAM_OA_TIMESTAMP_FREQUENCY:
+ value = i915_perf_oa_timestamp_frequency(i915);
+ break;
default:
DRM_DEBUG("Unknown parameter %d\n", param->param);
return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 9ea2fe34e7d3..f2d53edcd2ee 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1221,7 +1221,10 @@ static void engine_record_registers(struct intel_engine_coredump *ee)
if (GRAPHICS_VER(i915) >= 6) {
ee->rc_psmi = ENGINE_READ(engine, RING_PSMI_CTL);
- if (GRAPHICS_VER(i915) >= 12)
+ if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
+ ee->fault_reg = intel_gt_mcr_read_any(engine->gt,
+ XEHP_RING_FAULT_REG);
+ else if (GRAPHICS_VER(i915) >= 12)
ee->fault_reg = intel_uncore_read(engine->uncore,
GEN12_RING_FAULT_REG);
else if (GRAPHICS_VER(i915) >= 8)
@@ -1820,7 +1823,12 @@ static void gt_record_global_regs(struct intel_gt_coredump *gt)
if (GRAPHICS_VER(i915) == 7)
gt->err_int = intel_uncore_read(uncore, GEN7_ERR_INT);
- if (GRAPHICS_VER(i915) >= 12) {
+ if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
+ gt->fault_data0 = intel_gt_mcr_read_any((struct intel_gt *)gt->_gt,
+ XEHP_FAULT_TLB_DATA0);
+ gt->fault_data1 = intel_gt_mcr_read_any((struct intel_gt *)gt->_gt,
+ XEHP_FAULT_TLB_DATA1);
+ } else if (GRAPHICS_VER(i915) >= 12) {
gt->fault_data0 = intel_uncore_read(uncore,
GEN12_FAULT_TLB_DATA0);
gt->fault_data1 = intel_uncore_read(uncore,
diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c
new file mode 100644
index 000000000000..c588a17f97e9
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -0,0 +1,732 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include <linux/hwmon.h>
+#include <linux/hwmon-sysfs.h>
+#include <linux/types.h>
+
+#include "i915_drv.h"
+#include "i915_hwmon.h"
+#include "i915_reg.h"
+#include "intel_mchbar_regs.h"
+#include "intel_pcode.h"
+#include "gt/intel_gt.h"
+#include "gt/intel_gt_regs.h"
+
+/*
+ * SF_* - scale factors for particular quantities according to hwmon spec.
+ * - voltage - millivolts
+ * - power - microwatts
+ * - curr - milliamperes
+ * - energy - microjoules
+ * - time - milliseconds
+ */
+#define SF_VOLTAGE 1000
+#define SF_POWER 1000000
+#define SF_CURR 1000
+#define SF_ENERGY 1000000
+#define SF_TIME 1000
+
+struct hwm_reg {
+ i915_reg_t gt_perf_status;
+ i915_reg_t pkg_power_sku_unit;
+ i915_reg_t pkg_power_sku;
+ i915_reg_t pkg_rapl_limit;
+ i915_reg_t energy_status_all;
+ i915_reg_t energy_status_tile;
+};
+
+struct hwm_energy_info {
+ u32 reg_val_prev;
+ long accum_energy; /* Accumulated energy for energy1_input */
+};
+
+struct hwm_drvdata {
+ struct i915_hwmon *hwmon;
+ struct intel_uncore *uncore;
+ struct device *hwmon_dev;
+ struct hwm_energy_info ei; /* Energy info for energy1_input */
+ char name[12];
+ int gt_n;
+};
+
+struct i915_hwmon {
+ struct hwm_drvdata ddat;
+ struct hwm_drvdata ddat_gt[I915_MAX_GT];
+ struct mutex hwmon_lock; /* counter overflow logic and rmw */
+ struct hwm_reg rg;
+ int scl_shift_power;
+ int scl_shift_energy;
+ int scl_shift_time;
+};
+
+static void
+hwm_locked_with_pm_intel_uncore_rmw(struct hwm_drvdata *ddat,
+ i915_reg_t reg, u32 clear, u32 set)
+{
+ struct i915_hwmon *hwmon = ddat->hwmon;
+ struct intel_uncore *uncore = ddat->uncore;
+ intel_wakeref_t wakeref;
+
+ mutex_lock(&hwmon->hwmon_lock);
+
+ with_intel_runtime_pm(uncore->rpm, wakeref)
+ intel_uncore_rmw(uncore, reg, clear, set);
+
+ mutex_unlock(&hwmon->hwmon_lock);
+}
+
+/*
+ * This function's return type of u64 allows for the case where the scaling
+ * of the field taken from the 32-bit register value might cause a result to
+ * exceed 32 bits.
+ */
+static u64
+hwm_field_read_and_scale(struct hwm_drvdata *ddat, i915_reg_t rgadr,
+ u32 field_msk, int nshift, u32 scale_factor)
+{
+ struct intel_uncore *uncore = ddat->uncore;
+ intel_wakeref_t wakeref;
+ u32 reg_value;
+
+ with_intel_runtime_pm(uncore->rpm, wakeref)
+ reg_value = intel_uncore_read(uncore, rgadr);
+
+ reg_value = REG_FIELD_GET(field_msk, reg_value);
+
+ return mul_u64_u32_shr(reg_value, scale_factor, nshift);
+}
+
+static void
+hwm_field_scale_and_write(struct hwm_drvdata *ddat, i915_reg_t rgadr,
+ int nshift, unsigned int scale_factor, long lval)
+{
+ u32 nval;
+
+ /* Computation in 64-bits to avoid overflow. Round to nearest. */
+ nval = DIV_ROUND_CLOSEST_ULL((u64)lval << nshift, scale_factor);
+
+ hwm_locked_with_pm_intel_uncore_rmw(ddat, rgadr,
+ PKG_PWR_LIM_1,
+ REG_FIELD_PREP(PKG_PWR_LIM_1, nval));
+}
+
+/*
+ * hwm_energy - Obtain energy value
+ *
+ * The underlying energy hardware register is 32-bits and is subject to
+ * overflow. How long before overflow? For example, with an example
+ * scaling bit shift of 14 bits (see register *PACKAGE_POWER_SKU_UNIT) and
+ * a power draw of 1000 watts, the 32-bit counter will overflow in
+ * approximately 4.36 minutes.
+ *
+ * Examples:
+ * 1 watt: (2^32 >> 14) / 1 W / (60 * 60 * 24) secs/day -> 3 days
+ * 1000 watts: (2^32 >> 14) / 1000 W / 60 secs/min -> 4.36 minutes
+ *
+ * The function significantly increases overflow duration (from 4.36
+ * minutes) by accumulating the energy register into a 'long' as allowed by
+ * the hwmon API. Using x86_64 128 bit arithmetic (see mul_u64_u32_shr()),
+ * a 'long' of 63 bits, SF_ENERGY of 1e6 (~20 bits) and
+ * hwmon->scl_shift_energy of 14 bits we have 57 (63 - 20 + 14) bits before
+ * energy1_input overflows. This at 1000 W is an overflow duration of 278 years.
+ */
+static void
+hwm_energy(struct hwm_drvdata *ddat, long *energy)
+{
+ struct intel_uncore *uncore = ddat->uncore;
+ struct i915_hwmon *hwmon = ddat->hwmon;
+ struct hwm_energy_info *ei = &ddat->ei;
+ intel_wakeref_t wakeref;
+ i915_reg_t rgaddr;
+ u32 reg_val;
+
+ if (ddat->gt_n >= 0)
+ rgaddr = hwmon->rg.energy_status_tile;
+ else
+ rgaddr = hwmon->rg.energy_status_all;
+
+ mutex_lock(&hwmon->hwmon_lock);
+
+ with_intel_runtime_pm(uncore->rpm, wakeref)
+ reg_val = intel_uncore_read(uncore, rgaddr);
+
+ if (reg_val >= ei->reg_val_prev)
+ ei->accum_energy += reg_val - ei->reg_val_prev;
+ else
+ ei->accum_energy += UINT_MAX - ei->reg_val_prev + reg_val;
+ ei->reg_val_prev = reg_val;
+
+ *energy = mul_u64_u32_shr(ei->accum_energy, SF_ENERGY,
+ hwmon->scl_shift_energy);
+ mutex_unlock(&hwmon->hwmon_lock);
+}
+
+static ssize_t
+hwm_power1_max_interval_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct hwm_drvdata *ddat = dev_get_drvdata(dev);
+ struct i915_hwmon *hwmon = ddat->hwmon;
+ intel_wakeref_t wakeref;
+ u32 r, x, y, x_w = 2; /* 2 bits */
+ u64 tau4, out;
+
+ with_intel_runtime_pm(ddat->uncore->rpm, wakeref)
+ r = intel_uncore_read(ddat->uncore, hwmon->rg.pkg_rapl_limit);
+
+ x = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_X, r);
+ y = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_Y, r);
+ /*
+ * tau = 1.x * power(2,y), x = bits(23:22), y = bits(21:17)
+ * = (4 | x) << (y - 2)
+ * where (y - 2) ensures a 1.x fixed point representation of 1.x
+ * However because y can be < 2, we compute
+ * tau4 = (4 | x) << y
+ * but add 2 when doing the final right shift to account for units
+ */
+ tau4 = ((1 << x_w) | x) << y;
+ /* val in hwmon interface units (millisec) */
+ out = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w);
+
+ return sysfs_emit(buf, "%llu\n", out);
+}
+
+static ssize_t
+hwm_power1_max_interval_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct hwm_drvdata *ddat = dev_get_drvdata(dev);
+ struct i915_hwmon *hwmon = ddat->hwmon;
+ u32 x, y, rxy, x_w = 2; /* 2 bits */
+ u64 tau4, r, max_win;
+ unsigned long val;
+ int ret;
+
+ ret = kstrtoul(buf, 0, &val);
+ if (ret)
+ return ret;
+
+ /*
+ * Max HW supported tau in '1.x * power(2,y)' format, x = 0, y = 0x12
+ * The hwmon->scl_shift_time default of 0xa results in a max tau of 256 seconds
+ */
+#define PKG_MAX_WIN_DEFAULT 0x12ull
+
+ /*
+ * val must be < max in hwmon interface units. The steps below are
+ * explained in i915_power1_max_interval_show()
+ */
+ r = FIELD_PREP(PKG_MAX_WIN, PKG_MAX_WIN_DEFAULT);
+ x = REG_FIELD_GET(PKG_MAX_WIN_X, r);
+ y = REG_FIELD_GET(PKG_MAX_WIN_Y, r);
+ tau4 = ((1 << x_w) | x) << y;
+ max_win = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w);
+
+ if (val > max_win)
+ return -EINVAL;
+
+ /* val in hw units */
+ val = DIV_ROUND_CLOSEST_ULL((u64)val << hwmon->scl_shift_time, SF_TIME);
+ /* Convert to 1.x * power(2,y) */
+ if (!val)
+ return -EINVAL;
+ y = ilog2(val);
+ /* x = (val - (1 << y)) >> (y - 2); */
+ x = (val - (1ul << y)) << x_w >> y;
+
+ rxy = REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_X, x) | REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_Y, y);
+
+ hwm_locked_with_pm_intel_uncore_rmw(ddat, hwmon->rg.pkg_rapl_limit,
+ PKG_PWR_LIM_1_TIME, rxy);
+ return count;
+}
+
+static SENSOR_DEVICE_ATTR(power1_max_interval, 0664,
+ hwm_power1_max_interval_show,
+ hwm_power1_max_interval_store, 0);
+
+static struct attribute *hwm_attributes[] = {
+ &sensor_dev_attr_power1_max_interval.dev_attr.attr,
+ NULL
+};
+
+static umode_t hwm_attributes_visible(struct kobject *kobj,
+ struct attribute *attr, int index)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct hwm_drvdata *ddat = dev_get_drvdata(dev);
+ struct i915_hwmon *hwmon = ddat->hwmon;
+
+ if (attr == &sensor_dev_attr_power1_max_interval.dev_attr.attr)
+ return i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit) ? attr->mode : 0;
+
+ return 0;
+}
+
+static const struct attribute_group hwm_attrgroup = {
+ .attrs = hwm_attributes,
+ .is_visible = hwm_attributes_visible,
+};
+
+static const struct attribute_group *hwm_groups[] = {
+ &hwm_attrgroup,
+ NULL
+};
+
+static const struct hwmon_channel_info *hwm_info[] = {
+ HWMON_CHANNEL_INFO(in, HWMON_I_INPUT),
+ HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_CRIT),
+ HWMON_CHANNEL_INFO(energy, HWMON_E_INPUT),
+ HWMON_CHANNEL_INFO(curr, HWMON_C_CRIT),
+ NULL
+};
+
+static const struct hwmon_channel_info *hwm_gt_info[] = {
+ HWMON_CHANNEL_INFO(energy, HWMON_E_INPUT),
+ NULL
+};
+
+/* I1 is exposed as power_crit or as curr_crit depending on bit 31 */
+static int hwm_pcode_read_i1(struct drm_i915_private *i915, u32 *uval)
+{
+ return snb_pcode_read_p(&i915->uncore, PCODE_POWER_SETUP,
+ POWER_SETUP_SUBCOMMAND_READ_I1, 0, uval);
+}
+
+static int hwm_pcode_write_i1(struct drm_i915_private *i915, u32 uval)
+{
+ return snb_pcode_write_p(&i915->uncore, PCODE_POWER_SETUP,
+ POWER_SETUP_SUBCOMMAND_WRITE_I1, 0, uval);
+}
+
+static umode_t
+hwm_in_is_visible(const struct hwm_drvdata *ddat, u32 attr)
+{
+ struct drm_i915_private *i915 = ddat->uncore->i915;
+
+ switch (attr) {
+ case hwmon_in_input:
+ return IS_DG1(i915) || IS_DG2(i915) ? 0444 : 0;
+ default:
+ return 0;
+ }
+}
+
+static int
+hwm_in_read(struct hwm_drvdata *ddat, u32 attr, long *val)
+{
+ struct i915_hwmon *hwmon = ddat->hwmon;
+ intel_wakeref_t wakeref;
+ u32 reg_value;
+
+ switch (attr) {
+ case hwmon_in_input:
+ with_intel_runtime_pm(ddat->uncore->rpm, wakeref)
+ reg_value = intel_uncore_read(ddat->uncore, hwmon->rg.gt_perf_status);
+ /* HW register value in units of 2.5 millivolt */
+ *val = DIV_ROUND_CLOSEST(REG_FIELD_GET(GEN12_VOLTAGE_MASK, reg_value) * 25, 10);
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static umode_t
+hwm_power_is_visible(const struct hwm_drvdata *ddat, u32 attr, int chan)
+{
+ struct drm_i915_private *i915 = ddat->uncore->i915;
+ struct i915_hwmon *hwmon = ddat->hwmon;
+ u32 uval;
+
+ switch (attr) {
+ case hwmon_power_max:
+ return i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit) ? 0664 : 0;
+ case hwmon_power_rated_max:
+ return i915_mmio_reg_valid(hwmon->rg.pkg_power_sku) ? 0444 : 0;
+ case hwmon_power_crit:
+ return (hwm_pcode_read_i1(i915, &uval) ||
+ !(uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644;
+ default:
+ return 0;
+ }
+}
+
+static int
+hwm_power_read(struct hwm_drvdata *ddat, u32 attr, int chan, long *val)
+{
+ struct i915_hwmon *hwmon = ddat->hwmon;
+ int ret;
+ u32 uval;
+
+ switch (attr) {
+ case hwmon_power_max:
+ *val = hwm_field_read_and_scale(ddat,
+ hwmon->rg.pkg_rapl_limit,
+ PKG_PWR_LIM_1,
+ hwmon->scl_shift_power,
+ SF_POWER);
+ return 0;
+ case hwmon_power_rated_max:
+ *val = hwm_field_read_and_scale(ddat,
+ hwmon->rg.pkg_power_sku,
+ PKG_PKG_TDP,
+ hwmon->scl_shift_power,
+ SF_POWER);
+ return 0;
+ case hwmon_power_crit:
+ ret = hwm_pcode_read_i1(ddat->uncore->i915, &uval);
+ if (ret)
+ return ret;
+ if (!(uval & POWER_SETUP_I1_WATTS))
+ return -ENODEV;
+ *val = mul_u64_u32_shr(REG_FIELD_GET(POWER_SETUP_I1_DATA_MASK, uval),
+ SF_POWER, POWER_SETUP_I1_SHIFT);
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int
+hwm_power_write(struct hwm_drvdata *ddat, u32 attr, int chan, long val)
+{
+ struct i915_hwmon *hwmon = ddat->hwmon;
+ u32 uval;
+
+ switch (attr) {
+ case hwmon_power_max:
+ hwm_field_scale_and_write(ddat,
+ hwmon->rg.pkg_rapl_limit,
+ hwmon->scl_shift_power,
+ SF_POWER, val);
+ return 0;
+ case hwmon_power_crit:
+ uval = DIV_ROUND_CLOSEST_ULL(val << POWER_SETUP_I1_SHIFT, SF_POWER);
+ return hwm_pcode_write_i1(ddat->uncore->i915, uval);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static umode_t
+hwm_energy_is_visible(const struct hwm_drvdata *ddat, u32 attr)
+{
+ struct i915_hwmon *hwmon = ddat->hwmon;
+ i915_reg_t rgaddr;
+
+ switch (attr) {
+ case hwmon_energy_input:
+ if (ddat->gt_n >= 0)
+ rgaddr = hwmon->rg.energy_status_tile;
+ else
+ rgaddr = hwmon->rg.energy_status_all;
+ return i915_mmio_reg_valid(rgaddr) ? 0444 : 0;
+ default:
+ return 0;
+ }
+}
+
+static int
+hwm_energy_read(struct hwm_drvdata *ddat, u32 attr, long *val)
+{
+ switch (attr) {
+ case hwmon_energy_input:
+ hwm_energy(ddat, val);
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static umode_t
+hwm_curr_is_visible(const struct hwm_drvdata *ddat, u32 attr)
+{
+ struct drm_i915_private *i915 = ddat->uncore->i915;
+ u32 uval;
+
+ switch (attr) {
+ case hwmon_curr_crit:
+ return (hwm_pcode_read_i1(i915, &uval) ||
+ (uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644;
+ default:
+ return 0;
+ }
+}
+
+static int
+hwm_curr_read(struct hwm_drvdata *ddat, u32 attr, long *val)
+{
+ int ret;
+ u32 uval;
+
+ switch (attr) {
+ case hwmon_curr_crit:
+ ret = hwm_pcode_read_i1(ddat->uncore->i915, &uval);
+ if (ret)
+ return ret;
+ if (uval & POWER_SETUP_I1_WATTS)
+ return -ENODEV;
+ *val = mul_u64_u32_shr(REG_FIELD_GET(POWER_SETUP_I1_DATA_MASK, uval),
+ SF_CURR, POWER_SETUP_I1_SHIFT);
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int
+hwm_curr_write(struct hwm_drvdata *ddat, u32 attr, long val)
+{
+ u32 uval;
+
+ switch (attr) {
+ case hwmon_curr_crit:
+ uval = DIV_ROUND_CLOSEST_ULL(val << POWER_SETUP_I1_SHIFT, SF_CURR);
+ return hwm_pcode_write_i1(ddat->uncore->i915, uval);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static umode_t
+hwm_is_visible(const void *drvdata, enum hwmon_sensor_types type,
+ u32 attr, int channel)
+{
+ struct hwm_drvdata *ddat = (struct hwm_drvdata *)drvdata;
+
+ switch (type) {
+ case hwmon_in:
+ return hwm_in_is_visible(ddat, attr);
+ case hwmon_power:
+ return hwm_power_is_visible(ddat, attr, channel);
+ case hwmon_energy:
+ return hwm_energy_is_visible(ddat, attr);
+ case hwmon_curr:
+ return hwm_curr_is_visible(ddat, attr);
+ default:
+ return 0;
+ }
+}
+
+static int
+hwm_read(struct device *dev, enum hwmon_sensor_types type, u32 attr,
+ int channel, long *val)
+{
+ struct hwm_drvdata *ddat = dev_get_drvdata(dev);
+
+ switch (type) {
+ case hwmon_in:
+ return hwm_in_read(ddat, attr, val);
+ case hwmon_power:
+ return hwm_power_read(ddat, attr, channel, val);
+ case hwmon_energy:
+ return hwm_energy_read(ddat, attr, val);
+ case hwmon_curr:
+ return hwm_curr_read(ddat, attr, val);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int
+hwm_write(struct device *dev, enum hwmon_sensor_types type, u32 attr,
+ int channel, long val)
+{
+ struct hwm_drvdata *ddat = dev_get_drvdata(dev);
+
+ switch (type) {
+ case hwmon_power:
+ return hwm_power_write(ddat, attr, channel, val);
+ case hwmon_curr:
+ return hwm_curr_write(ddat, attr, val);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static const struct hwmon_ops hwm_ops = {
+ .is_visible = hwm_is_visible,
+ .read = hwm_read,
+ .write = hwm_write,
+};
+
+static const struct hwmon_chip_info hwm_chip_info = {
+ .ops = &hwm_ops,
+ .info = hwm_info,
+};
+
+static umode_t
+hwm_gt_is_visible(const void *drvdata, enum hwmon_sensor_types type,
+ u32 attr, int channel)
+{
+ struct hwm_drvdata *ddat = (struct hwm_drvdata *)drvdata;
+
+ switch (type) {
+ case hwmon_energy:
+ return hwm_energy_is_visible(ddat, attr);
+ default:
+ return 0;
+ }
+}
+
+static int
+hwm_gt_read(struct device *dev, enum hwmon_sensor_types type, u32 attr,
+ int channel, long *val)
+{
+ struct hwm_drvdata *ddat = dev_get_drvdata(dev);
+
+ switch (type) {
+ case hwmon_energy:
+ return hwm_energy_read(ddat, attr, val);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static const struct hwmon_ops hwm_gt_ops = {
+ .is_visible = hwm_gt_is_visible,
+ .read = hwm_gt_read,
+};
+
+static const struct hwmon_chip_info hwm_gt_chip_info = {
+ .ops = &hwm_gt_ops,
+ .info = hwm_gt_info,
+};
+
+static void
+hwm_get_preregistration_info(struct drm_i915_private *i915)
+{
+ struct i915_hwmon *hwmon = i915->hwmon;
+ struct intel_uncore *uncore = &i915->uncore;
+ struct hwm_drvdata *ddat = &hwmon->ddat;
+ intel_wakeref_t wakeref;
+ u32 val_sku_unit = 0;
+ struct intel_gt *gt;
+ long energy;
+ int i;
+
+ /* Available for all Gen12+/dGfx */
+ hwmon->rg.gt_perf_status = GEN12_RPSTAT1;
+
+ if (IS_DG1(i915) || IS_DG2(i915)) {
+ hwmon->rg.pkg_power_sku_unit = PCU_PACKAGE_POWER_SKU_UNIT;
+ hwmon->rg.pkg_power_sku = PCU_PACKAGE_POWER_SKU;
+ hwmon->rg.pkg_rapl_limit = PCU_PACKAGE_RAPL_LIMIT;
+ hwmon->rg.energy_status_all = PCU_PACKAGE_ENERGY_STATUS;
+ hwmon->rg.energy_status_tile = INVALID_MMIO_REG;
+ } else if (IS_XEHPSDV(i915)) {
+ hwmon->rg.pkg_power_sku_unit = GT0_PACKAGE_POWER_SKU_UNIT;
+ hwmon->rg.pkg_power_sku = INVALID_MMIO_REG;
+ hwmon->rg.pkg_rapl_limit = GT0_PACKAGE_RAPL_LIMIT;
+ hwmon->rg.energy_status_all = GT0_PLATFORM_ENERGY_STATUS;
+ hwmon->rg.energy_status_tile = GT0_PACKAGE_ENERGY_STATUS;
+ } else {
+ hwmon->rg.pkg_power_sku_unit = INVALID_MMIO_REG;
+ hwmon->rg.pkg_power_sku = INVALID_MMIO_REG;
+ hwmon->rg.pkg_rapl_limit = INVALID_MMIO_REG;
+ hwmon->rg.energy_status_all = INVALID_MMIO_REG;
+ hwmon->rg.energy_status_tile = INVALID_MMIO_REG;
+ }
+
+ with_intel_runtime_pm(uncore->rpm, wakeref) {
+ /*
+ * The contents of register hwmon->rg.pkg_power_sku_unit do not change,
+ * so read it once and store the shift values.
+ */
+ if (i915_mmio_reg_valid(hwmon->rg.pkg_power_sku_unit))
+ val_sku_unit = intel_uncore_read(uncore,
+ hwmon->rg.pkg_power_sku_unit);
+ }
+
+ hwmon->scl_shift_power = REG_FIELD_GET(PKG_PWR_UNIT, val_sku_unit);
+ hwmon->scl_shift_energy = REG_FIELD_GET(PKG_ENERGY_UNIT, val_sku_unit);
+ hwmon->scl_shift_time = REG_FIELD_GET(PKG_TIME_UNIT, val_sku_unit);
+
+ /*
+ * Initialize 'struct hwm_energy_info', i.e. set fields to the
+ * first value of the energy register read
+ */
+ if (i915_mmio_reg_valid(hwmon->rg.energy_status_all))
+ hwm_energy(ddat, &energy);
+ if (i915_mmio_reg_valid(hwmon->rg.energy_status_tile)) {
+ for_each_gt(gt, i915, i)
+ hwm_energy(&hwmon->ddat_gt[i], &energy);
+ }
+}
+
+void i915_hwmon_register(struct drm_i915_private *i915)
+{
+ struct device *dev = i915->drm.dev;
+ struct i915_hwmon *hwmon;
+ struct device *hwmon_dev;
+ struct hwm_drvdata *ddat;
+ struct hwm_drvdata *ddat_gt;
+ struct intel_gt *gt;
+ int i;
+
+ /* hwmon is available only for dGfx */
+ if (!IS_DGFX(i915))
+ return;
+
+ hwmon = devm_kzalloc(dev, sizeof(*hwmon), GFP_KERNEL);
+ if (!hwmon)
+ return;
+
+ i915->hwmon = hwmon;
+ mutex_init(&hwmon->hwmon_lock);
+ ddat = &hwmon->ddat;
+
+ ddat->hwmon = hwmon;
+ ddat->uncore = &i915->uncore;
+ snprintf(ddat->name, sizeof(ddat->name), "i915");
+ ddat->gt_n = -1;
+
+ for_each_gt(gt, i915, i) {
+ ddat_gt = hwmon->ddat_gt + i;
+
+ ddat_gt->hwmon = hwmon;
+ ddat_gt->uncore = gt->uncore;
+ snprintf(ddat_gt->name, sizeof(ddat_gt->name), "i915_gt%u", i);
+ ddat_gt->gt_n = i;
+ }
+
+ hwm_get_preregistration_info(i915);
+
+ /* hwmon_dev points to device hwmon<i> */
+ hwmon_dev = devm_hwmon_device_register_with_info(dev, ddat->name,
+ ddat,
+ &hwm_chip_info,
+ hwm_groups);
+ if (IS_ERR(hwmon_dev)) {
+ i915->hwmon = NULL;
+ return;
+ }
+
+ ddat->hwmon_dev = hwmon_dev;
+
+ for_each_gt(gt, i915, i) {
+ ddat_gt = hwmon->ddat_gt + i;
+ /*
+ * Create per-gt directories only if a per-gt attribute is
+ * visible. Currently this is only energy
+ */
+ if (!hwm_gt_is_visible(ddat_gt, hwmon_energy, hwmon_energy_input, 0))
+ continue;
+
+ hwmon_dev = devm_hwmon_device_register_with_info(dev, ddat_gt->name,
+ ddat_gt,
+ &hwm_gt_chip_info,
+ NULL);
+ if (!IS_ERR(hwmon_dev))
+ ddat_gt->hwmon_dev = hwmon_dev;
+ }
+}
+
+void i915_hwmon_unregister(struct drm_i915_private *i915)
+{
+ fetch_and_zero(&i915->hwmon);
+}
diff --git a/drivers/gpu/drm/i915/i915_hwmon.h b/drivers/gpu/drm/i915/i915_hwmon.h
new file mode 100644
index 000000000000..7ca9cf2c34c9
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_hwmon.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: MIT */
+
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef __I915_HWMON_H__
+#define __I915_HWMON_H__
+
+struct drm_i915_private;
+
+#if IS_REACHABLE(CONFIG_HWMON)
+void i915_hwmon_register(struct drm_i915_private *i915);
+void i915_hwmon_unregister(struct drm_i915_private *i915);
+#else
+static inline void i915_hwmon_register(struct drm_i915_private *i915) { };
+static inline void i915_hwmon_unregister(struct drm_i915_private *i915) { };
+#endif
+
+#endif /* __I915_HWMON_H__ */
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 9486127a44f7..211913be40ce 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -1023,6 +1023,8 @@ static const struct intel_device_info adl_p_info = {
.has_logical_ring_contexts = 1, \
.has_logical_ring_elsq = 1, \
.has_mslice_steering = 1, \
+ .has_oa_bpc_reporting = 1, \
+ .has_oa_slice_contrib_limits = 1, \
.has_rc6 = 1, \
.has_reset_engine = 1, \
.has_rps = 1, \
@@ -1042,7 +1044,6 @@ static const struct intel_device_info xehpsdv_info = {
PLATFORM(INTEL_XEHPSDV),
NO_DISPLAY,
.has_64k_pages = 1,
- .needs_compact_pt = 1,
.has_media_ratio_mode = 1,
.__runtime.platform_engine_mask =
BIT(RCS0) | BIT(BCS0) |
@@ -1064,7 +1065,6 @@ static const struct intel_device_info xehpsdv_info = {
.has_64k_pages = 1, \
.has_guc_deprivilege = 1, \
.has_heci_pxp = 1, \
- .needs_compact_pt = 1, \
.has_media_ratio_mode = 1, \
.display.has_cdclk_squash = 1, \
.__runtime.platform_engine_mask = \
@@ -1146,6 +1146,7 @@ static const struct intel_device_info mtl_info = {
.extra_gt_list = xelpmp_extra_gt,
.has_flat_ccs = 0,
.has_gmd_id = 1,
+ .has_mslice_steering = 0,
.has_snoop = 1,
.__runtime.memory_regions = REGION_SMEM | REGION_STOLEN_LMEM,
.__runtime.platform_engine_mask = BIT(RCS0) | BIT(BCS0) | BIT(CCS0),
@@ -1298,9 +1299,7 @@ bool i915_pci_resource_valid(struct pci_dev *pdev, int bar)
static bool intel_mmio_bar_valid(struct pci_dev *pdev, struct intel_device_info *intel_info)
{
- int gttmmaddr_bar = intel_info->__runtime.graphics.ip.ver == 2 ? GEN2_GTTMMADR_BAR : GTTMMADR_BAR;
-
- return i915_pci_resource_valid(pdev, gttmmaddr_bar);
+ return i915_pci_resource_valid(pdev, intel_mmio_bar(intel_info->__runtime.graphics.ip.ver));
}
static int i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 0defbb43ceea..0dd597a7a11f 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -204,10 +204,12 @@
#include "gt/intel_gpu_commands.h"
#include "gt/intel_gt.h"
#include "gt/intel_gt_clock_utils.h"
+#include "gt/intel_gt_mcr.h"
#include "gt/intel_gt_regs.h"
#include "gt/intel_lrc.h"
#include "gt/intel_lrc_reg.h"
#include "gt/intel_ring.h"
+#include "gt/uc/intel_guc_slpc.h"
#include "i915_drv.h"
#include "i915_file_private.h"
@@ -286,6 +288,7 @@ static u32 i915_perf_stream_paranoid = true;
#define OAREPORT_REASON_CTX_SWITCH (1<<3)
#define OAREPORT_REASON_CLK_RATIO (1<<5)
+#define HAS_MI_SET_PREDICATE(i915) (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
/* For sysctl proc_dointvec_minmax of i915_oa_max_sample_rate
*
@@ -320,6 +323,8 @@ static const struct i915_oa_format oa_formats[I915_OA_FORMAT_MAX] = {
[I915_OA_FORMAT_A12] = { 0, 64 },
[I915_OA_FORMAT_A12_B8_C8] = { 2, 128 },
[I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
+ [I915_OAR_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
+ [I915_OA_FORMAT_A24u40_A14u32_B8_C8] = { 5, 256 },
};
#define SAMPLE_OA_REPORT (1<<0)
@@ -462,7 +467,7 @@ static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream)
static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
{
u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
- int report_size = stream->oa_buffer.format_size;
+ int report_size = stream->oa_buffer.format->size;
unsigned long flags;
bool pollin;
u32 hw_tail;
@@ -599,7 +604,7 @@ static int append_oa_sample(struct i915_perf_stream *stream,
size_t *offset,
const u8 *report)
{
- int report_size = stream->oa_buffer.format_size;
+ int report_size = stream->oa_buffer.format->size;
struct drm_i915_perf_record_header header;
header.type = DRM_I915_PERF_RECORD_SAMPLE;
@@ -649,14 +654,13 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
size_t *offset)
{
struct intel_uncore *uncore = stream->uncore;
- int report_size = stream->oa_buffer.format_size;
+ int report_size = stream->oa_buffer.format->size;
u8 *oa_buf_base = stream->oa_buffer.vaddr;
u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
u32 mask = (OA_BUFFER_SIZE - 1);
size_t start_offset = *offset;
unsigned long flags;
u32 head, tail;
- u32 taken;
int ret = 0;
if (drm_WARN_ON(&uncore->i915->drm, !stream->enabled))
@@ -692,7 +696,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
for (/* none */;
- (taken = OA_TAKEN(tail, head));
+ OA_TAKEN(tail, head);
head = (head + report_size) & mask) {
u8 *report = oa_buf_base + head;
u32 *report32 = (void *)report;
@@ -774,7 +778,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
* switches since it's not-uncommon for periodic samples to
* identify a switch before any 'context switch' report.
*/
- if (!stream->perf->exclusive_stream->ctx ||
+ if (!stream->ctx ||
stream->specific_ctx_id == ctx_id ||
stream->oa_buffer.last_ctx_id == stream->specific_ctx_id ||
reason & OAREPORT_REASON_CTX_SWITCH) {
@@ -783,7 +787,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
* While filtering for a single context we avoid
* leaking the IDs of other contexts.
*/
- if (stream->perf->exclusive_stream->ctx &&
+ if (stream->ctx &&
stream->specific_ctx_id != ctx_id) {
report32[2] = INVALID_CTX_ID;
}
@@ -943,14 +947,13 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
size_t *offset)
{
struct intel_uncore *uncore = stream->uncore;
- int report_size = stream->oa_buffer.format_size;
+ int report_size = stream->oa_buffer.format->size;
u8 *oa_buf_base = stream->oa_buffer.vaddr;
u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
u32 mask = (OA_BUFFER_SIZE - 1);
size_t start_offset = *offset;
unsigned long flags;
u32 head, tail;
- u32 taken;
int ret = 0;
if (drm_WARN_ON(&uncore->i915->drm, !stream->enabled))
@@ -984,7 +987,7 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
for (/* none */;
- (taken = OA_TAKEN(tail, head));
+ OA_TAKEN(tail, head);
head = (head + report_size) & mask) {
u8 *report = oa_buf_base + head;
u32 *report32 = (void *)report;
@@ -1233,6 +1236,196 @@ retry:
return stream->pinned_ctx;
}
+static int
+__store_reg_to_mem(struct i915_request *rq, i915_reg_t reg, u32 ggtt_offset)
+{
+ u32 *cs, cmd;
+
+ cmd = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
+ if (GRAPHICS_VER(rq->engine->i915) >= 8)
+ cmd++;
+
+ cs = intel_ring_begin(rq, 4);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
+ *cs++ = cmd;
+ *cs++ = i915_mmio_reg_offset(reg);
+ *cs++ = ggtt_offset;
+ *cs++ = 0;
+
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+static int
+__read_reg(struct intel_context *ce, i915_reg_t reg, u32 ggtt_offset)
+{
+ struct i915_request *rq;
+ int err;
+
+ rq = i915_request_create(ce);
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
+
+ i915_request_get(rq);
+
+ err = __store_reg_to_mem(rq, reg, ggtt_offset);
+
+ i915_request_add(rq);
+ if (!err && i915_request_wait(rq, 0, HZ / 2) < 0)
+ err = -ETIME;
+
+ i915_request_put(rq);
+
+ return err;
+}
+
+static int
+gen12_guc_sw_ctx_id(struct intel_context *ce, u32 *ctx_id)
+{
+ struct i915_vma *scratch;
+ u32 *val;
+ int err;
+
+ scratch = __vm_create_scratch_for_read_pinned(&ce->engine->gt->ggtt->vm, 4);
+ if (IS_ERR(scratch))
+ return PTR_ERR(scratch);
+
+ err = i915_vma_sync(scratch);
+ if (err)
+ goto err_scratch;
+
+ err = __read_reg(ce, RING_EXECLIST_STATUS_HI(ce->engine->mmio_base),
+ i915_ggtt_offset(scratch));
+ if (err)
+ goto err_scratch;
+
+ val = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB);
+ if (IS_ERR(val)) {
+ err = PTR_ERR(val);
+ goto err_scratch;
+ }
+
+ *ctx_id = *val;
+ i915_gem_object_unpin_map(scratch->obj);
+
+err_scratch:
+ i915_vma_unpin_and_release(&scratch, 0);
+ return err;
+}
+
+/*
+ * For execlist mode of submission, pick an unused context id
+ * 0 - (NUM_CONTEXT_TAG -1) are used by other contexts
+ * XXX_MAX_CONTEXT_HW_ID is used by idle context
+ *
+ * For GuC mode of submission read context id from the upper dword of the
+ * EXECLIST_STATUS register. Note that we read this value only once and expect
+ * that the value stays fixed for the entire OA use case. There are cases where
+ * GuC KMD implementation may deregister a context to reuse it's context id, but
+ * we prevent that from happening to the OA context by pinning it.
+ */
+static int gen12_get_render_context_id(struct i915_perf_stream *stream)
+{
+ u32 ctx_id, mask;
+ int ret;
+
+ if (intel_engine_uses_guc(stream->engine)) {
+ ret = gen12_guc_sw_ctx_id(stream->pinned_ctx, &ctx_id);
+ if (ret)
+ return ret;
+
+ mask = ((1U << GEN12_GUC_SW_CTX_ID_WIDTH) - 1) <<
+ (GEN12_GUC_SW_CTX_ID_SHIFT - 32);
+ } else if (GRAPHICS_VER_FULL(stream->engine->i915) >= IP_VER(12, 50)) {
+ ctx_id = (XEHP_MAX_CONTEXT_HW_ID - 1) <<
+ (XEHP_SW_CTX_ID_SHIFT - 32);
+
+ mask = ((1U << XEHP_SW_CTX_ID_WIDTH) - 1) <<
+ (XEHP_SW_CTX_ID_SHIFT - 32);
+ } else {
+ ctx_id = (GEN12_MAX_CONTEXT_HW_ID - 1) <<
+ (GEN11_SW_CTX_ID_SHIFT - 32);
+
+ mask = ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) <<
+ (GEN11_SW_CTX_ID_SHIFT - 32);
+ }
+ stream->specific_ctx_id = ctx_id & mask;
+ stream->specific_ctx_id_mask = mask;
+
+ return 0;
+}
+
+static bool oa_find_reg_in_lri(u32 *state, u32 reg, u32 *offset, u32 end)
+{
+ u32 idx = *offset;
+ u32 len = min(MI_LRI_LEN(state[idx]) + idx, end);
+ bool found = false;
+
+ idx++;
+ for (; idx < len; idx += 2) {
+ if (state[idx] == reg) {
+ found = true;
+ break;
+ }
+ }
+
+ *offset = idx;
+ return found;
+}
+
+static u32 oa_context_image_offset(struct intel_context *ce, u32 reg)
+{
+ u32 offset, len = (ce->engine->context_size - PAGE_SIZE) / 4;
+ u32 *state = ce->lrc_reg_state;
+
+ for (offset = 0; offset < len; ) {
+ if (IS_MI_LRI_CMD(state[offset])) {
+ /*
+ * We expect reg-value pairs in MI_LRI command, so
+ * MI_LRI_LEN() should be even, if not, issue a warning.
+ */
+ drm_WARN_ON(&ce->engine->i915->drm,
+ MI_LRI_LEN(state[offset]) & 0x1);
+
+ if (oa_find_reg_in_lri(state, reg, &offset, len))
+ break;
+ } else {
+ offset++;
+ }
+ }
+
+ return offset < len ? offset : U32_MAX;
+}
+
+static int set_oa_ctx_ctrl_offset(struct intel_context *ce)
+{
+ i915_reg_t reg = GEN12_OACTXCONTROL(ce->engine->mmio_base);
+ struct i915_perf *perf = &ce->engine->i915->perf;
+ u32 offset = perf->ctx_oactxctrl_offset;
+
+ /* Do this only once. Failure is stored as offset of U32_MAX */
+ if (offset)
+ goto exit;
+
+ offset = oa_context_image_offset(ce, i915_mmio_reg_offset(reg));
+ perf->ctx_oactxctrl_offset = offset;
+
+ drm_dbg(&ce->engine->i915->drm,
+ "%s oa ctx control at 0x%08x dword offset\n",
+ ce->engine->name, offset);
+
+exit:
+ return offset && offset != U32_MAX ? 0 : -ENODEV;
+}
+
+static bool engine_supports_mi_query(struct intel_engine_cs *engine)
+{
+ return engine->class == RENDER_CLASS;
+}
+
/**
* oa_get_render_ctx_id - determine and hold ctx hw id
* @stream: An i915-perf stream opened for OA metrics
@@ -1246,11 +1439,27 @@ retry:
static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
{
struct intel_context *ce;
+ int ret = 0;
ce = oa_pin_context(stream);
if (IS_ERR(ce))
return PTR_ERR(ce);
+ if (engine_supports_mi_query(stream->engine)) {
+ /*
+ * We are enabling perf query here. If we don't find the context
+ * offset here, just return an error.
+ */
+ ret = set_oa_ctx_ctrl_offset(ce);
+ if (ret) {
+ intel_context_unpin(ce);
+ drm_err(&stream->perf->i915->drm,
+ "Enabling perf query failed for %s\n",
+ stream->engine->name);
+ return ret;
+ }
+ }
+
switch (GRAPHICS_VER(ce->engine->i915)) {
case 7: {
/*
@@ -1292,24 +1501,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
case 11:
case 12:
- if (GRAPHICS_VER_FULL(ce->engine->i915) >= IP_VER(12, 50)) {
- stream->specific_ctx_id_mask =
- ((1U << XEHP_SW_CTX_ID_WIDTH) - 1) <<
- (XEHP_SW_CTX_ID_SHIFT - 32);
- stream->specific_ctx_id =
- (XEHP_MAX_CONTEXT_HW_ID - 1) <<
- (XEHP_SW_CTX_ID_SHIFT - 32);
- } else {
- stream->specific_ctx_id_mask =
- ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32);
- /*
- * Pick an unused context id
- * 0 - BITS_PER_LONG are used by other contexts
- * GEN12_MAX_CONTEXT_HW_ID (0x7ff) is used by idle context
- */
- stream->specific_ctx_id =
- (GEN12_MAX_CONTEXT_HW_ID - 1) << (GEN11_SW_CTX_ID_SHIFT - 32);
- }
+ ret = gen12_get_render_context_id(stream);
break;
default:
@@ -1323,7 +1515,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
stream->specific_ctx_id,
stream->specific_ctx_id_mask);
- return 0;
+ return ret;
}
/**
@@ -1375,8 +1567,9 @@ free_noa_wait(struct i915_perf_stream *stream)
static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
{
struct i915_perf *perf = stream->perf;
+ struct intel_gt *gt = stream->engine->gt;
- if (WARN_ON(stream != perf->exclusive_stream))
+ if (WARN_ON(stream != gt->perf.exclusive_stream))
return;
/*
@@ -1385,11 +1578,20 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
*
* See i915_oa_init_reg_state() and lrc_configure_all_contexts()
*/
- WRITE_ONCE(perf->exclusive_stream, NULL);
+ WRITE_ONCE(gt->perf.exclusive_stream, NULL);
perf->ops.disable_metric_set(stream);
free_oa_buffer(stream);
+ /*
+ * Wa_16011777198:dg2: Unset the override of GUCRC mode to enable rc6.
+ */
+ if (intel_uc_uses_guc_rc(&gt->uc) &&
+ (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) ||
+ IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0)))
+ drm_WARN_ON(&gt->i915->drm,
+ intel_guc_slpc_unset_gucrc_mode(&gt->uc.guc.slpc));
+
intel_uncore_forcewake_put(stream->uncore, FORCEWAKE_ALL);
intel_engine_pm_put(stream->engine);
@@ -1563,6 +1765,7 @@ static void gen12_init_oa_buffer(struct i915_perf_stream *stream)
static int alloc_oa_buffer(struct i915_perf_stream *stream)
{
struct drm_i915_private *i915 = stream->perf->i915;
+ struct intel_gt *gt = stream->engine->gt;
struct drm_i915_gem_object *bo;
struct i915_vma *vma;
int ret;
@@ -1582,11 +1785,22 @@ static int alloc_oa_buffer(struct i915_perf_stream *stream)
i915_gem_object_set_cache_coherency(bo, I915_CACHE_LLC);
/* PreHSW required 512K alignment, HSW requires 16M */
- vma = i915_gem_object_ggtt_pin(bo, NULL, 0, SZ_16M, 0);
+ vma = i915_vma_instance(bo, &gt->ggtt->vm, NULL);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
goto err_unref;
}
+
+ /*
+ * PreHSW required 512K alignment.
+ * HSW and onwards, align to requested size of OA buffer.
+ */
+ ret = i915_vma_pin(vma, 0, SZ_16M, PIN_GLOBAL | PIN_HIGH);
+ if (ret) {
+ drm_err(&gt->i915->drm, "Failed to pin OA buffer %d\n", ret);
+ goto err_unref;
+ }
+
stream->oa_buffer.vma = vma;
stream->oa_buffer.vaddr =
@@ -1636,6 +1850,7 @@ static u32 *save_restore_register(struct i915_perf_stream *stream, u32 *cs,
static int alloc_noa_wait(struct i915_perf_stream *stream)
{
struct drm_i915_private *i915 = stream->perf->i915;
+ struct intel_gt *gt = stream->engine->gt;
struct drm_i915_gem_object *bo;
struct i915_vma *vma;
const u64 delay_ticks = 0xffffffffffffffff -
@@ -1654,6 +1869,9 @@ static int alloc_noa_wait(struct i915_perf_stream *stream)
DELTA_TARGET,
N_CS_GPR
};
+ i915_reg_t mi_predicate_result = HAS_MI_SET_PREDICATE(i915) ?
+ MI_PREDICATE_RESULT_2_ENGINE(base) :
+ MI_PREDICATE_RESULT_1(RENDER_RING_BASE);
bo = i915_gem_object_create_internal(i915, 4096);
if (IS_ERR(bo)) {
@@ -1673,12 +1891,16 @@ retry:
* multiple OA config BOs will have a jump to this address and it
* needs to be fixed during the lifetime of the i915/perf stream.
*/
- vma = i915_gem_object_ggtt_pin_ww(bo, &ww, NULL, 0, 0, PIN_HIGH);
+ vma = i915_vma_instance(bo, &gt->ggtt->vm, NULL);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
goto out_ww;
}
+ ret = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_GLOBAL | PIN_HIGH);
+ if (ret)
+ goto out_ww;
+
batch = cs = i915_gem_object_pin_map(bo, I915_MAP_WB);
if (IS_ERR(batch)) {
ret = PTR_ERR(batch);
@@ -1691,7 +1913,7 @@ retry:
stream, cs, true /* save */, CS_GPR(i),
INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2);
cs = save_restore_register(
- stream, cs, true /* save */, MI_PREDICATE_RESULT_1(RENDER_RING_BASE),
+ stream, cs, true /* save */, mi_predicate_result,
INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1);
/* First timestamp snapshot location. */
@@ -1745,7 +1967,10 @@ retry:
*/
*cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
*cs++ = i915_mmio_reg_offset(CS_GPR(JUMP_PREDICATE));
- *cs++ = i915_mmio_reg_offset(MI_PREDICATE_RESULT_1(RENDER_RING_BASE));
+ *cs++ = i915_mmio_reg_offset(mi_predicate_result);
+
+ if (HAS_MI_SET_PREDICATE(i915))
+ *cs++ = MI_SET_PREDICATE | 1;
/* Restart from the beginning if we had timestamps roll over. */
*cs++ = (GRAPHICS_VER(i915) < 8 ?
@@ -1755,6 +1980,9 @@ retry:
*cs++ = i915_ggtt_offset(vma) + (ts0 - batch) * 4;
*cs++ = 0;
+ if (HAS_MI_SET_PREDICATE(i915))
+ *cs++ = MI_SET_PREDICATE;
+
/*
* Now add the diff between to previous timestamps and add it to :
* (((1 * << 64) - 1) - delay_ns)
@@ -1782,7 +2010,10 @@ retry:
*/
*cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
*cs++ = i915_mmio_reg_offset(CS_GPR(JUMP_PREDICATE));
- *cs++ = i915_mmio_reg_offset(MI_PREDICATE_RESULT_1(RENDER_RING_BASE));
+ *cs++ = i915_mmio_reg_offset(mi_predicate_result);
+
+ if (HAS_MI_SET_PREDICATE(i915))
+ *cs++ = MI_SET_PREDICATE | 1;
/* Predicate the jump. */
*cs++ = (GRAPHICS_VER(i915) < 8 ?
@@ -1792,13 +2023,16 @@ retry:
*cs++ = i915_ggtt_offset(vma) + (jump - batch) * 4;
*cs++ = 0;
+ if (HAS_MI_SET_PREDICATE(i915))
+ *cs++ = MI_SET_PREDICATE;
+
/* Restore registers. */
for (i = 0; i < N_CS_GPR; i++)
cs = save_restore_register(
stream, cs, false /* restore */, CS_GPR(i),
INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2);
cs = save_restore_register(
- stream, cs, false /* restore */, MI_PREDICATE_RESULT_1(RENDER_RING_BASE),
+ stream, cs, false /* restore */, mi_predicate_result,
INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1);
/* And return to the ring. */
@@ -2283,11 +2517,12 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream,
{
int err;
struct intel_context *ce = stream->pinned_ctx;
- u32 format = stream->oa_buffer.format;
+ u32 format = stream->oa_buffer.format->format;
+ u32 offset = stream->perf->ctx_oactxctrl_offset;
struct flex regs_context[] = {
{
GEN8_OACTXCONTROL,
- stream->perf->ctx_oactxctrl_offset + 1,
+ offset + 1,
active ? GEN8_OA_COUNTER_RESUME : 0,
},
};
@@ -2312,12 +2547,13 @@ static int gen12_configure_oar_context(struct i915_perf_stream *stream,
},
};
- /* Modify the context image of pinned context with regs_context*/
+ /* Modify the context image of pinned context with regs_context */
err = intel_context_lock_pinned(ce);
if (err)
return err;
- err = gen8_modify_context(ce, regs_context, ARRAY_SIZE(regs_context));
+ err = gen8_modify_context(ce, regs_context,
+ ARRAY_SIZE(regs_context));
intel_context_unlock_pinned(ce);
if (err)
return err;
@@ -2359,10 +2595,11 @@ oa_configure_all_contexts(struct i915_perf_stream *stream,
{
struct drm_i915_private *i915 = stream->perf->i915;
struct intel_engine_cs *engine;
+ struct intel_gt *gt = stream->engine->gt;
struct i915_gem_context *ctx, *cn;
int err;
- lockdep_assert_held(&stream->perf->lock);
+ lockdep_assert_held(&gt->perf.lock);
/*
* The OA register config is setup through the context image. This image
@@ -2442,6 +2679,7 @@ lrc_configure_all_contexts(struct i915_perf_stream *stream,
const struct i915_oa_config *oa_config,
struct i915_active *active)
{
+ u32 ctx_oactxctrl = stream->perf->ctx_oactxctrl_offset;
/* The MMIO offsets for Flex EU registers aren't contiguous */
const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1)
@@ -2452,7 +2690,7 @@ lrc_configure_all_contexts(struct i915_perf_stream *stream,
},
{
GEN8_OACTXCONTROL,
- stream->perf->ctx_oactxctrl_offset + 1,
+ ctx_oactxctrl + 1,
},
{ EU_PERF_CNTL0, ctx_flexeuN(0) },
{ EU_PERF_CNTL1, ctx_flexeuN(1) },
@@ -2540,12 +2778,26 @@ static int
gen12_enable_metric_set(struct i915_perf_stream *stream,
struct i915_active *active)
{
+ struct drm_i915_private *i915 = stream->perf->i915;
struct intel_uncore *uncore = stream->uncore;
struct i915_oa_config *oa_config = stream->oa_config;
bool periodic = stream->periodic;
u32 period_exponent = stream->period_exponent;
+ u32 sqcnt1;
int ret;
+ /*
+ * Wa_1508761755:xehpsdv, dg2
+ * EU NOA signals behave incorrectly if EU clock gating is enabled.
+ * Disable thread stall DOP gating and EU DOP gating.
+ */
+ if (IS_XEHPSDV(i915) || IS_DG2(i915)) {
+ intel_gt_mcr_multicast_write(uncore->gt, GEN8_ROW_CHICKEN,
+ _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE));
+ intel_uncore_write(uncore, GEN7_ROW_CHICKEN2,
+ _MASKED_BIT_ENABLE(GEN12_DISABLE_DOP_GATING));
+ }
+
intel_uncore_write(uncore, GEN12_OAG_OA_DEBUG,
/* Disable clk ratio reports, like previous Gens. */
_MASKED_BIT_ENABLE(GEN12_OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS |
@@ -2563,6 +2815,16 @@ gen12_enable_metric_set(struct i915_perf_stream *stream,
: 0);
/*
+ * Initialize Super Queue Internal Cnt Register
+ * Set PMON Enable in order to collect valid metrics.
+ * Enable byets per clock reporting in OA for XEHPSDV onward.
+ */
+ sqcnt1 = GEN12_SQCNT1_PMON_ENABLE |
+ (HAS_OA_BPC_REPORTING(i915) ? GEN12_SQCNT1_OABPC : 0);
+
+ intel_uncore_rmw(uncore, GEN12_SQCNT1, 0, sqcnt1);
+
+ /*
* Update all contexts prior writing the mux configurations as we need
* to make sure all slices/subslices are ON before writing to NOA
* registers.
@@ -2611,6 +2873,19 @@ static void gen11_disable_metric_set(struct i915_perf_stream *stream)
static void gen12_disable_metric_set(struct i915_perf_stream *stream)
{
struct intel_uncore *uncore = stream->uncore;
+ struct drm_i915_private *i915 = stream->perf->i915;
+ u32 sqcnt1;
+
+ /*
+ * Wa_1508761755:xehpsdv, dg2
+ * Enable thread stall DOP gating and EU DOP gating.
+ */
+ if (IS_XEHPSDV(i915) || IS_DG2(i915)) {
+ intel_gt_mcr_multicast_write(uncore->gt, GEN8_ROW_CHICKEN,
+ _MASKED_BIT_DISABLE(STALL_DOP_GATING_DISABLE));
+ intel_uncore_write(uncore, GEN7_ROW_CHICKEN2,
+ _MASKED_BIT_DISABLE(GEN12_DISABLE_DOP_GATING));
+ }
/* Reset all contexts' slices/subslices configurations. */
gen12_configure_all_contexts(stream, NULL, NULL);
@@ -2621,6 +2896,12 @@ static void gen12_disable_metric_set(struct i915_perf_stream *stream)
/* Make sure we disable noa to save power. */
intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
+
+ sqcnt1 = GEN12_SQCNT1_PMON_ENABLE |
+ (HAS_OA_BPC_REPORTING(i915) ? GEN12_SQCNT1_OABPC : 0);
+
+ /* Reset PMON Enable to save power. */
+ intel_uncore_rmw(uncore, GEN12_SQCNT1, sqcnt1, 0);
}
static void gen7_oa_enable(struct i915_perf_stream *stream)
@@ -2630,7 +2911,7 @@ static void gen7_oa_enable(struct i915_perf_stream *stream)
u32 ctx_id = stream->specific_ctx_id;
bool periodic = stream->periodic;
u32 period_exponent = stream->period_exponent;
- u32 report_format = stream->oa_buffer.format;
+ u32 report_format = stream->oa_buffer.format->format;
/*
* Reset buf pointers so we don't forward reports from before now.
@@ -2656,7 +2937,7 @@ static void gen7_oa_enable(struct i915_perf_stream *stream)
static void gen8_oa_enable(struct i915_perf_stream *stream)
{
struct intel_uncore *uncore = stream->uncore;
- u32 report_format = stream->oa_buffer.format;
+ u32 report_format = stream->oa_buffer.format->format;
/*
* Reset buf pointers so we don't forward reports from before now.
@@ -2682,7 +2963,7 @@ static void gen8_oa_enable(struct i915_perf_stream *stream)
static void gen12_oa_enable(struct i915_perf_stream *stream)
{
struct intel_uncore *uncore = stream->uncore;
- u32 report_format = stream->oa_buffer.format;
+ u32 report_format = stream->oa_buffer.format->format;
/*
* If we don't want OA reports from the OA buffer, then we don't even
@@ -2838,6 +3119,30 @@ get_sseu_config(struct intel_sseu *out_sseu,
return i915_gem_user_to_context_sseu(engine->gt, drm_sseu, out_sseu);
}
+/*
+ * OA timestamp frequency = CS timestamp frequency in most platforms. On some
+ * platforms OA unit ignores the CTC_SHIFT and the 2 timestamps differ. In such
+ * cases, return the adjusted CS timestamp frequency to the user.
+ */
+u32 i915_perf_oa_timestamp_frequency(struct drm_i915_private *i915)
+{
+ /* Wa_18013179988:dg2 */
+ if (IS_DG2(i915)) {
+ intel_wakeref_t wakeref;
+ u32 reg, shift;
+
+ with_intel_runtime_pm(to_gt(i915)->uncore->rpm, wakeref)
+ reg = intel_uncore_read(to_gt(i915)->uncore, RPM_CONFIG0);
+
+ shift = REG_FIELD_GET(GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK,
+ reg);
+
+ return to_gt(i915)->clock_frequency << (3 - shift);
+ }
+
+ return to_gt(i915)->clock_frequency;
+}
+
/**
* i915_oa_stream_init - validate combined props for OA stream and init
* @stream: An i915 perf stream
@@ -2862,7 +3167,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
{
struct drm_i915_private *i915 = stream->perf->i915;
struct i915_perf *perf = stream->perf;
- int format_size;
+ struct intel_gt *gt;
int ret;
if (!props->engine) {
@@ -2870,6 +3175,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
"OA engine not specified\n");
return -EINVAL;
}
+ gt = props->engine->gt;
/*
* If the sysfs metrics/ directory wasn't registered for some
@@ -2900,7 +3206,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
* counter reports and marshal to the appropriate client
* we currently only allow exclusive access
*/
- if (perf->exclusive_stream) {
+ if (gt->perf.exclusive_stream) {
drm_dbg(&stream->perf->i915->drm,
"OA unit already in use\n");
return -EBUSY;
@@ -2917,20 +3223,15 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
stream->sample_size = sizeof(struct drm_i915_perf_record_header);
- format_size = perf->oa_formats[props->oa_format].size;
+ stream->oa_buffer.format = &perf->oa_formats[props->oa_format];
+ if (drm_WARN_ON(&i915->drm, stream->oa_buffer.format->size == 0))
+ return -EINVAL;
stream->sample_flags = props->sample_flags;
- stream->sample_size += format_size;
-
- stream->oa_buffer.format_size = format_size;
- if (drm_WARN_ON(&i915->drm, stream->oa_buffer.format_size == 0))
- return -EINVAL;
+ stream->sample_size += stream->oa_buffer.format->size;
stream->hold_preemption = props->hold_preemption;
- stream->oa_buffer.format =
- perf->oa_formats[props->oa_format].format;
-
stream->periodic = props->oa_periodic;
if (stream->periodic)
stream->period_exponent = props->oa_period_exponent;
@@ -2974,14 +3275,31 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
intel_engine_pm_get(stream->engine);
intel_uncore_forcewake_get(stream->uncore, FORCEWAKE_ALL);
+ /*
+ * Wa_16011777198:dg2: GuC resets render as part of the Wa. This causes
+ * OA to lose the configuration state. Prevent this by overriding GUCRC
+ * mode.
+ */
+ if (intel_uc_uses_guc_rc(&gt->uc) &&
+ (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) ||
+ IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0))) {
+ ret = intel_guc_slpc_override_gucrc_mode(&gt->uc.guc.slpc,
+ SLPC_GUCRC_MODE_GUCRC_NO_RC6);
+ if (ret) {
+ drm_dbg(&stream->perf->i915->drm,
+ "Unable to override gucrc mode\n");
+ goto err_config;
+ }
+ }
+
ret = alloc_oa_buffer(stream);
if (ret)
goto err_oa_buf_alloc;
stream->ops = &i915_oa_stream_ops;
- perf->sseu = props->sseu;
- WRITE_ONCE(perf->exclusive_stream, stream);
+ stream->engine->gt->perf.sseu = props->sseu;
+ WRITE_ONCE(gt->perf.exclusive_stream, stream);
ret = i915_perf_stream_enable_sync(stream);
if (ret) {
@@ -2999,11 +3317,12 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
stream->poll_check_timer.function = oa_poll_check_timer_cb;
init_waitqueue_head(&stream->poll_wq);
spin_lock_init(&stream->oa_buffer.ptr_lock);
+ mutex_init(&stream->lock);
return 0;
err_enable:
- WRITE_ONCE(perf->exclusive_stream, NULL);
+ WRITE_ONCE(gt->perf.exclusive_stream, NULL);
perf->ops.disable_metric_set(stream);
free_oa_buffer(stream);
@@ -3033,7 +3352,7 @@ void i915_oa_init_reg_state(const struct intel_context *ce,
return;
/* perf.exclusive_stream serialised by lrc_configure_all_contexts() */
- stream = READ_ONCE(engine->i915->perf.exclusive_stream);
+ stream = READ_ONCE(engine->gt->perf.exclusive_stream);
if (stream && GRAPHICS_VER(stream->perf->i915) < 12)
gen8_update_reg_state_unlocked(ce, stream);
}
@@ -3062,7 +3381,6 @@ static ssize_t i915_perf_read(struct file *file,
loff_t *ppos)
{
struct i915_perf_stream *stream = file->private_data;
- struct i915_perf *perf = stream->perf;
size_t offset = 0;
int ret;
@@ -3086,14 +3404,14 @@ static ssize_t i915_perf_read(struct file *file,
if (ret)
return ret;
- mutex_lock(&perf->lock);
+ mutex_lock(&stream->lock);
ret = stream->ops->read(stream, buf, count, &offset);
- mutex_unlock(&perf->lock);
+ mutex_unlock(&stream->lock);
} while (!offset && !ret);
} else {
- mutex_lock(&perf->lock);
+ mutex_lock(&stream->lock);
ret = stream->ops->read(stream, buf, count, &offset);
- mutex_unlock(&perf->lock);
+ mutex_unlock(&stream->lock);
}
/* We allow the poll checking to sometimes report false positive EPOLLIN
@@ -3140,9 +3458,6 @@ static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer)
* &i915_perf_stream_ops->poll_wait to call poll_wait() with a wait queue that
* will be woken for new stream data.
*
- * Note: The &perf->lock mutex has been taken to serialize
- * with any non-file-operation driver hooks.
- *
* Returns: any poll events that are ready without sleeping
*/
static __poll_t i915_perf_poll_locked(struct i915_perf_stream *stream,
@@ -3181,12 +3496,11 @@ static __poll_t i915_perf_poll_locked(struct i915_perf_stream *stream,
static __poll_t i915_perf_poll(struct file *file, poll_table *wait)
{
struct i915_perf_stream *stream = file->private_data;
- struct i915_perf *perf = stream->perf;
__poll_t ret;
- mutex_lock(&perf->lock);
+ mutex_lock(&stream->lock);
ret = i915_perf_poll_locked(stream, file, wait);
- mutex_unlock(&perf->lock);
+ mutex_unlock(&stream->lock);
return ret;
}
@@ -3285,9 +3599,6 @@ static long i915_perf_config_locked(struct i915_perf_stream *stream,
* @cmd: the ioctl request
* @arg: the ioctl data
*
- * Note: The &perf->lock mutex has been taken to serialize
- * with any non-file-operation driver hooks.
- *
* Returns: zero on success or a negative error code. Returns -EINVAL for
* an unknown ioctl request.
*/
@@ -3325,12 +3636,11 @@ static long i915_perf_ioctl(struct file *file,
unsigned long arg)
{
struct i915_perf_stream *stream = file->private_data;
- struct i915_perf *perf = stream->perf;
long ret;
- mutex_lock(&perf->lock);
+ mutex_lock(&stream->lock);
ret = i915_perf_ioctl_locked(stream, cmd, arg);
- mutex_unlock(&perf->lock);
+ mutex_unlock(&stream->lock);
return ret;
}
@@ -3342,7 +3652,7 @@ static long i915_perf_ioctl(struct file *file,
* Frees all resources associated with the given i915 perf @stream, disabling
* any associated data capture in the process.
*
- * Note: The &perf->lock mutex has been taken to serialize
+ * Note: The &gt->perf.lock mutex has been taken to serialize
* with any non-file-operation driver hooks.
*/
static void i915_perf_destroy_locked(struct i915_perf_stream *stream)
@@ -3374,10 +3684,16 @@ static int i915_perf_release(struct inode *inode, struct file *file)
{
struct i915_perf_stream *stream = file->private_data;
struct i915_perf *perf = stream->perf;
+ struct intel_gt *gt = stream->engine->gt;
- mutex_lock(&perf->lock);
+ /*
+ * Within this call, we know that the fd is being closed and we have no
+ * other user of stream->lock. Use the perf lock to destroy the stream
+ * here.
+ */
+ mutex_lock(&gt->perf.lock);
i915_perf_destroy_locked(stream);
- mutex_unlock(&perf->lock);
+ mutex_unlock(&gt->perf.lock);
/* Release the reference the perf stream kept on the driver. */
drm_dev_put(&perf->i915->drm);
@@ -3410,7 +3726,7 @@ static const struct file_operations fops = {
* See i915_perf_ioctl_open() for interface details.
*
* Implements further stream config validation and stream initialization on
- * behalf of i915_perf_open_ioctl() with the &perf->lock mutex
+ * behalf of i915_perf_open_ioctl() with the &gt->perf.lock mutex
* taken to serialize with any non-file-operation driver hooks.
*
* Note: at this point the @props have only been validated in isolation and
@@ -3565,8 +3881,10 @@ err:
static u64 oa_exponent_to_ns(struct i915_perf *perf, int exponent)
{
- return intel_gt_clock_interval_to_ns(to_gt(perf->i915),
- 2ULL << exponent);
+ u64 nom = (2ULL << exponent) * NSEC_PER_SEC;
+ u32 den = i915_perf_oa_timestamp_frequency(perf->i915);
+
+ return div_u64(nom + den - 1, den);
}
static __always_inline bool
@@ -3794,7 +4112,7 @@ static int read_properties_unlocked(struct i915_perf *perf,
* mutex to avoid an awkward lockdep with mmap_lock.
*
* Most of the implementation details are handled by
- * i915_perf_open_ioctl_locked() after taking the &perf->lock
+ * i915_perf_open_ioctl_locked() after taking the &gt->perf.lock
* mutex for serializing with any non-file-operation driver hooks.
*
* Return: A newly opened i915 Perf stream file descriptor or negative
@@ -3805,6 +4123,7 @@ int i915_perf_open_ioctl(struct drm_device *dev, void *data,
{
struct i915_perf *perf = &to_i915(dev)->perf;
struct drm_i915_perf_open_param *param = data;
+ struct intel_gt *gt;
struct perf_open_properties props;
u32 known_open_flags;
int ret;
@@ -3831,9 +4150,11 @@ int i915_perf_open_ioctl(struct drm_device *dev, void *data,
if (ret)
return ret;
- mutex_lock(&perf->lock);
+ gt = props.engine->gt;
+
+ mutex_lock(&gt->perf.lock);
ret = i915_perf_open_ioctl_locked(perf, param, &props, file);
- mutex_unlock(&perf->lock);
+ mutex_unlock(&gt->perf.lock);
return ret;
}
@@ -3849,6 +4170,7 @@ int i915_perf_open_ioctl(struct drm_device *dev, void *data,
void i915_perf_register(struct drm_i915_private *i915)
{
struct i915_perf *perf = &i915->perf;
+ struct intel_gt *gt = to_gt(i915);
if (!perf->i915)
return;
@@ -3857,13 +4179,13 @@ void i915_perf_register(struct drm_i915_private *i915)
* i915_perf_open_ioctl(); considering that we register after
* being exposed to userspace.
*/
- mutex_lock(&perf->lock);
+ mutex_lock(&gt->perf.lock);
perf->metrics_kobj =
kobject_create_and_add("metrics",
&i915->drm.primary->kdev->kobj);
- mutex_unlock(&perf->lock);
+ mutex_unlock(&gt->perf.lock);
}
/**
@@ -3939,6 +4261,11 @@ static const struct i915_range gen12_oa_b_counters[] = {
{}
};
+static const struct i915_range xehp_oa_b_counters[] = {
+ { .start = 0xdc48, .end = 0xdc48 }, /* OAA_ENABLE_REG */
+ { .start = 0xdd00, .end = 0xdd48 }, /* OAG_LCE0_0 - OAA_LENABLE_REG */
+};
+
static const struct i915_range gen7_oa_mux_regs[] = {
{ .start = 0x91b8, .end = 0x91cc }, /* OA_PERFCNT[1-2], OA_PERFMATRIX */
{ .start = 0x9800, .end = 0x9888 }, /* MICRO_BP0_0 - NOA_WRITE */
@@ -4013,6 +4340,12 @@ static bool gen12_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
return reg_in_range_table(addr, gen12_oa_b_counters);
}
+static bool xehp_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
+{
+ return reg_in_range_table(addr, xehp_oa_b_counters) ||
+ reg_in_range_table(addr, gen12_oa_b_counters);
+}
+
static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
{
return reg_in_range_table(addr, gen12_oa_mux_regs);
@@ -4411,11 +4744,47 @@ static void oa_init_supported_formats(struct i915_perf *perf)
oa_format_add(perf, I915_OA_FORMAT_C4_B8);
break;
+ case INTEL_DG2:
+ oa_format_add(perf, I915_OAR_FORMAT_A32u40_A4u32_B8_C8);
+ oa_format_add(perf, I915_OA_FORMAT_A24u40_A14u32_B8_C8);
+ break;
+
default:
MISSING_CASE(platform);
}
}
+static void i915_perf_init_info(struct drm_i915_private *i915)
+{
+ struct i915_perf *perf = &i915->perf;
+
+ switch (GRAPHICS_VER(i915)) {
+ case 8:
+ perf->ctx_oactxctrl_offset = 0x120;
+ perf->ctx_flexeu0_offset = 0x2ce;
+ perf->gen8_valid_ctx_bit = BIT(25);
+ break;
+ case 9:
+ perf->ctx_oactxctrl_offset = 0x128;
+ perf->ctx_flexeu0_offset = 0x3de;
+ perf->gen8_valid_ctx_bit = BIT(16);
+ break;
+ case 11:
+ perf->ctx_oactxctrl_offset = 0x124;
+ perf->ctx_flexeu0_offset = 0x78e;
+ perf->gen8_valid_ctx_bit = BIT(16);
+ break;
+ case 12:
+ /*
+ * Calculate offset at runtime in oa_pin_context for gen12 and
+ * cache the value in perf->ctx_oactxctrl_offset.
+ */
+ break;
+ default:
+ MISSING_CASE(GRAPHICS_VER(i915));
+ }
+}
+
/**
* i915_perf_init - initialize i915-perf state on module bind
* @i915: i915 device instance
@@ -4429,12 +4798,6 @@ void i915_perf_init(struct drm_i915_private *i915)
{
struct i915_perf *perf = &i915->perf;
- /* XXX const struct i915_perf_ops! */
-
- /* i915_perf is not enabled for DG2 yet */
- if (IS_DG2(i915))
- return;
-
perf->oa_formats = oa_formats;
if (IS_HASWELL(i915)) {
perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr;
@@ -4454,6 +4817,7 @@ void i915_perf_init(struct drm_i915_private *i915)
* execlist mode by default.
*/
perf->ops.read = gen8_oa_read;
+ i915_perf_init_info(i915);
if (IS_GRAPHICS_VER(i915, 8, 9)) {
perf->ops.is_valid_b_counter_reg =
@@ -4473,18 +4837,6 @@ void i915_perf_init(struct drm_i915_private *i915)
perf->ops.enable_metric_set = gen8_enable_metric_set;
perf->ops.disable_metric_set = gen8_disable_metric_set;
perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
-
- if (GRAPHICS_VER(i915) == 8) {
- perf->ctx_oactxctrl_offset = 0x120;
- perf->ctx_flexeu0_offset = 0x2ce;
-
- perf->gen8_valid_ctx_bit = BIT(25);
- } else {
- perf->ctx_oactxctrl_offset = 0x128;
- perf->ctx_flexeu0_offset = 0x3de;
-
- perf->gen8_valid_ctx_bit = BIT(16);
- }
} else if (GRAPHICS_VER(i915) == 11) {
perf->ops.is_valid_b_counter_reg =
gen7_is_valid_b_counter_addr;
@@ -4498,13 +4850,10 @@ void i915_perf_init(struct drm_i915_private *i915)
perf->ops.enable_metric_set = gen8_enable_metric_set;
perf->ops.disable_metric_set = gen11_disable_metric_set;
perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read;
-
- perf->ctx_oactxctrl_offset = 0x124;
- perf->ctx_flexeu0_offset = 0x78e;
-
- perf->gen8_valid_ctx_bit = BIT(16);
} else if (GRAPHICS_VER(i915) == 12) {
perf->ops.is_valid_b_counter_reg =
+ HAS_OA_SLICE_CONTRIB_LIMITS(i915) ?
+ xehp_is_valid_b_counter_addr :
gen12_is_valid_b_counter_addr;
perf->ops.is_valid_mux_reg =
gen12_is_valid_mux_addr;
@@ -4516,14 +4865,15 @@ void i915_perf_init(struct drm_i915_private *i915)
perf->ops.enable_metric_set = gen12_enable_metric_set;
perf->ops.disable_metric_set = gen12_disable_metric_set;
perf->ops.oa_hw_tail_read = gen12_oa_hw_tail_read;
-
- perf->ctx_flexeu0_offset = 0;
- perf->ctx_oactxctrl_offset = 0x144;
}
}
if (perf->ops.enable_metric_set) {
- mutex_init(&perf->lock);
+ struct intel_gt *gt;
+ int i;
+
+ for_each_gt(gt, i915, i)
+ mutex_init(&gt->perf.lock);
/* Choose a representative limit */
oa_sample_rate_hard_limit = to_gt(i915)->clock_frequency / 2;
diff --git a/drivers/gpu/drm/i915/i915_perf.h b/drivers/gpu/drm/i915/i915_perf.h
index 1d1329e5af3a..f96e09a4af04 100644
--- a/drivers/gpu/drm/i915/i915_perf.h
+++ b/drivers/gpu/drm/i915/i915_perf.h
@@ -57,4 +57,6 @@ static inline void i915_oa_config_put(struct i915_oa_config *oa_config)
kref_put(&oa_config->ref, i915_oa_config_release);
}
+u32 i915_perf_oa_timestamp_frequency(struct drm_i915_private *i915);
+
#endif /* __I915_PERF_H__ */
diff --git a/drivers/gpu/drm/i915/i915_perf_oa_regs.h b/drivers/gpu/drm/i915/i915_perf_oa_regs.h
index f31c9f13a9fc..381d94101610 100644
--- a/drivers/gpu/drm/i915/i915_perf_oa_regs.h
+++ b/drivers/gpu/drm/i915/i915_perf_oa_regs.h
@@ -97,7 +97,7 @@
#define GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT 1
#define GEN12_OAR_OACONTROL_COUNTER_ENABLE (1 << 0)
-#define GEN12_OACTXCONTROL _MMIO(0x2360)
+#define GEN12_OACTXCONTROL(base) _MMIO((base) + 0x360)
#define GEN12_OAR_OASTATUS _MMIO(0x2968)
/* Gen12 OAG unit */
@@ -134,4 +134,8 @@
#define GDT_CHICKEN_BITS _MMIO(0x9840)
#define GT_NOA_ENABLE 0x00000080
+#define GEN12_SQCNT1 _MMIO(0x8718)
+#define GEN12_SQCNT1_PMON_ENABLE REG_BIT(30)
+#define GEN12_SQCNT1_OABPC REG_BIT(29)
+
#endif /* __INTEL_PERF_OA_REGS__ */
diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h
index 05cb9a335a97..e0c96b44eda8 100644
--- a/drivers/gpu/drm/i915/i915_perf_types.h
+++ b/drivers/gpu/drm/i915/i915_perf_types.h
@@ -146,6 +146,11 @@ struct i915_perf_stream {
*/
struct intel_engine_cs *engine;
+ /*
+ * Lock associated with operations on stream
+ */
+ struct mutex lock;
+
/**
* @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*`
* properties given when opening a stream, representing the contents
@@ -245,11 +250,10 @@ struct i915_perf_stream {
* @oa_buffer: State of the OA buffer.
*/
struct {
+ const struct i915_oa_format *format;
struct i915_vma *vma;
u8 *vaddr;
u32 last_ctx_id;
- int format;
- int format_size;
int size_exponent;
/**
@@ -380,6 +384,26 @@ struct i915_oa_ops {
u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream);
};
+struct i915_perf_gt {
+ /*
+ * Lock associated with anything below within this structure.
+ */
+ struct mutex lock;
+
+ /**
+ * @sseu: sseu configuration selected to run while perf is active,
+ * applies to all contexts.
+ */
+ struct intel_sseu sseu;
+
+ /*
+ * @exclusive_stream: The stream currently using the OA unit. This is
+ * sometimes accessed outside a syscall associated to its file
+ * descriptor.
+ */
+ struct i915_perf_stream *exclusive_stream;
+};
+
struct i915_perf {
struct drm_i915_private *i915;
@@ -397,25 +421,6 @@ struct i915_perf {
*/
struct idr metrics_idr;
- /*
- * Lock associated with anything below within this structure
- * except exclusive_stream.
- */
- struct mutex lock;
-
- /*
- * The stream currently using the OA unit. If accessed
- * outside a syscall associated to its file
- * descriptor.
- */
- struct i915_perf_stream *exclusive_stream;
-
- /**
- * @sseu: sseu configuration selected to run while perf is active,
- * applies to all contexts.
- */
- struct intel_sseu sseu;
-
/**
* For rate limiting any notifications of spurious
* invalid OA reports
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 2a887cdd7c1b..1c0da50c0dc7 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -1796,6 +1796,15 @@
#define XEHPSDV_RP_STATE_CAP _MMIO(0x250014)
#define PVC_RP_STATE_CAP _MMIO(0x281014)
+#define MTL_RP_STATE_CAP _MMIO(0x138000)
+#define MTL_MEDIAP_STATE_CAP _MMIO(0x138020)
+#define MTL_RP0_CAP_MASK REG_GENMASK(8, 0)
+#define MTL_RPN_CAP_MASK REG_GENMASK(24, 16)
+
+#define MTL_GT_RPE_FREQUENCY _MMIO(0x13800c)
+#define MTL_MPE_FREQUENCY _MMIO(0x13802c)
+#define MTL_RPE_MASK REG_GENMASK(8, 0)
+
#define GT0_PERF_LIMIT_REASONS _MMIO(0x1381a8)
#define GT0_PERF_LIMIT_REASONS_MASK 0xde3
#define PROCHOT_MASK REG_BIT(0)
@@ -1806,6 +1815,8 @@
#define POWER_LIMIT_4_MASK REG_BIT(8)
#define POWER_LIMIT_1_MASK REG_BIT(10)
#define POWER_LIMIT_2_MASK REG_BIT(11)
+#define GT0_PERF_LIMIT_REASONS_LOG_MASK REG_GENMASK(31, 16)
+#define MTL_MEDIA_PERF_LIMIT_REASONS _MMIO(0x138030)
#define CHV_CLK_CTL1 _MMIO(0x101100)
#define VLV_CLK_CTL2 _MMIO(0x101104)
@@ -6652,6 +6663,12 @@
#define DG1_PCODE_STATUS 0x7E
#define DG1_UNCORE_GET_INIT_STATUS 0x0
#define DG1_UNCORE_INIT_STATUS_COMPLETE 0x1
+#define PCODE_POWER_SETUP 0x7C
+#define POWER_SETUP_SUBCOMMAND_READ_I1 0x4
+#define POWER_SETUP_SUBCOMMAND_WRITE_I1 0x5
+#define POWER_SETUP_I1_WATTS REG_BIT(31)
+#define POWER_SETUP_I1_SHIFT 6 /* 10.6 fixed point format */
+#define POWER_SETUP_I1_DATA_MASK REG_GENMASK(15, 0)
#define GEN12_PCODE_READ_SAGV_BLOCK_TIME_US 0x23
#define XEHP_PCODE_FREQUENCY_CONFIG 0x6e /* xehpsdv, pvc */
/* XEHP_PCODE_FREQUENCY_CONFIG sub-commands (param1) */
@@ -7788,8 +7805,13 @@ enum skl_power_gate {
_ICL_PIPE_DSS_CTL2_PB, \
_ICL_PIPE_DSS_CTL2_PC)
+#define GGC _MMIO(0x108040)
+#define GMS_MASK REG_GENMASK(15, 8)
+#define GGMS_MASK REG_GENMASK(7, 6)
+
#define GEN12_GSMBASE _MMIO(0x108100)
#define GEN12_DSMBASE _MMIO(0x1080C0)
+#define GEN12_BDSM_MASK REG_GENMASK64(63, 20)
#define XEHP_CLOCK_GATE_DIS _MMIO(0x101014)
#define SGSI_SIDECLK_DIS REG_BIT(17)
diff --git a/drivers/gpu/drm/i915/i915_reg_defs.h b/drivers/gpu/drm/i915/i915_reg_defs.h
index 8f486f77609f..f1859046a9c4 100644
--- a/drivers/gpu/drm/i915/i915_reg_defs.h
+++ b/drivers/gpu/drm/i915/i915_reg_defs.h
@@ -104,22 +104,21 @@ typedef struct {
#define _MMIO(r) ((const i915_reg_t){ .reg = (r) })
-#define INVALID_MMIO_REG _MMIO(0)
-
-static __always_inline u32 i915_mmio_reg_offset(i915_reg_t reg)
-{
- return reg.reg;
-}
+typedef struct {
+ u32 reg;
+} i915_mcr_reg_t;
-static inline bool i915_mmio_reg_equal(i915_reg_t a, i915_reg_t b)
-{
- return i915_mmio_reg_offset(a) == i915_mmio_reg_offset(b);
-}
+#define INVALID_MMIO_REG _MMIO(0)
-static inline bool i915_mmio_reg_valid(i915_reg_t reg)
-{
- return !i915_mmio_reg_equal(reg, INVALID_MMIO_REG);
-}
+/*
+ * These macros can be used on either i915_reg_t or i915_mcr_reg_t since they're
+ * simply operations on the register's offset and don't care about the MCR vs
+ * non-MCR nature of the register.
+ */
+#define i915_mmio_reg_offset(r) \
+ _Generic((r), i915_reg_t: (r).reg, i915_mcr_reg_t: (r).reg)
+#define i915_mmio_reg_equal(a, b) (i915_mmio_reg_offset(a) == i915_mmio_reg_offset(b))
+#define i915_mmio_reg_valid(r) (!i915_mmio_reg_equal(r, INVALID_MMIO_REG))
#define VLV_DISPLAY_BASE 0x180000
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 62fad16a55e8..f949a9495758 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1621,6 +1621,20 @@ i915_request_await_object(struct i915_request *to,
return ret;
}
+static void i915_request_await_huc(struct i915_request *rq)
+{
+ struct intel_huc *huc = &rq->context->engine->gt->uc.huc;
+
+ /* don't stall kernel submissions! */
+ if (!rcu_access_pointer(rq->context->gem_context))
+ return;
+
+ if (intel_huc_wait_required(huc))
+ i915_sw_fence_await_sw_fence(&rq->submit,
+ &huc->delayed_load.fence,
+ &rq->hucq);
+}
+
static struct i915_request *
__i915_request_ensure_parallel_ordering(struct i915_request *rq,
struct intel_timeline *timeline)
@@ -1703,6 +1717,16 @@ __i915_request_add_to_timeline(struct i915_request *rq)
struct i915_request *prev;
/*
+ * Media workloads may require HuC, so stall them until HuC loading is
+ * complete. Note that HuC not being loaded when a user submission
+ * arrives can only happen when HuC is loaded via GSC and in that case
+ * we still expect the window between us starting to accept submissions
+ * and HuC loading completion to be small (a few hundred ms).
+ */
+ if (rq->engine->class == VIDEO_DECODE_CLASS)
+ i915_request_await_huc(rq);
+
+ /*
* Dependency tracking and request ordering along the timeline
* is special cased so that we can eliminate redundant ordering
* operations while building the request (we know that the timeline
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 47041ec68df8..f5e1bb5e857a 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -348,6 +348,11 @@ struct i915_request {
#define GUC_PRIO_FINI 0xfe
u8 guc_prio;
+ /**
+ * @hucq: wait queue entry used to wait on the HuC load to complete
+ */
+ wait_queue_entry_t hucq;
+
I915_SELFTEST_DECLARE(struct {
struct list_head link;
unsigned long delay;
diff --git a/drivers/gpu/drm/i915/i915_scatterlist.h b/drivers/gpu/drm/i915/i915_scatterlist.h
index 9ddb3e743a3e..b0a1db44f895 100644
--- a/drivers/gpu/drm/i915/i915_scatterlist.h
+++ b/drivers/gpu/drm/i915/i915_scatterlist.h
@@ -9,7 +9,8 @@
#include <linux/pfn.h>
#include <linux/scatterlist.h>
-#include <linux/swiotlb.h>
+#include <linux/dma-mapping.h>
+#include <xen/xen.h>
#include "i915_gem.h"
@@ -127,19 +128,26 @@ static inline unsigned int i915_sg_dma_sizes(struct scatterlist *sg)
return page_sizes;
}
-static inline unsigned int i915_sg_segment_size(void)
+static inline unsigned int i915_sg_segment_size(struct device *dev)
{
- unsigned int size = swiotlb_max_segment();
-
- if (size == 0)
- size = UINT_MAX;
-
- size = rounddown(size, PAGE_SIZE);
- /* swiotlb_max_segment_size can return 1 byte when it means one page. */
- if (size < PAGE_SIZE)
- size = PAGE_SIZE;
-
- return size;
+ size_t max = min_t(size_t, UINT_MAX, dma_max_mapping_size(dev));
+
+ /*
+ * For Xen PV guests pages aren't contiguous in DMA (machine) address
+ * space. The DMA API takes care of that both in dma_alloc_* (by
+ * calling into the hypervisor to make the pages contiguous) and in
+ * dma_map_* (by bounce buffering). But i915 abuses ignores the
+ * coherency aspects of the DMA API and thus can't cope with bounce
+ * buffering actually happening, so add a hack here to force small
+ * allocations and mappings when running in PV mode on Xen.
+ *
+ * Note this will still break if bounce buffering is required for other
+ * reasons, like confidential computing hypervisors or PCIe root ports
+ * with addressing limitations.
+ */
+ if (xen_pv_domain())
+ max = PAGE_SIZE;
+ return round_down(max, PAGE_SIZE);
}
bool i915_sg_trim(struct sg_table *orig_st);
diff --git a/drivers/gpu/drm/i915/i915_selftest.h b/drivers/gpu/drm/i915/i915_selftest.h
index f54de0499be7..bdf3e22c0a34 100644
--- a/drivers/gpu/drm/i915/i915_selftest.h
+++ b/drivers/gpu/drm/i915/i915_selftest.h
@@ -92,12 +92,14 @@ int __i915_subtests(const char *caller,
T, ARRAY_SIZE(T), data)
#define i915_live_subtests(T, data) ({ \
typecheck(struct drm_i915_private *, data); \
+ (data)->gt[0]->uc.guc.submission_state.sched_disable_delay_ms = 0; \
__i915_subtests(__func__, \
__i915_live_setup, __i915_live_teardown, \
T, ARRAY_SIZE(T), data); \
})
#define intel_gt_live_subtests(T, data) ({ \
typecheck(struct intel_gt *, data); \
+ (data)->uc.guc.submission_state.sched_disable_delay_ms = 0; \
__i915_subtests(__func__, \
__intel_gt_live_setup, __intel_gt_live_teardown, \
T, ARRAY_SIZE(T), data); \
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 37b5c9e9d260..c70a02517e02 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -671,21 +671,6 @@ TRACE_EVENT_CONDITION(i915_reg_rw,
(u32)(__entry->val >> 32))
);
-TRACE_EVENT(intel_gpu_freq_change,
- TP_PROTO(u32 freq),
- TP_ARGS(freq),
-
- TP_STRUCT__entry(
- __field(u32, freq)
- ),
-
- TP_fast_assign(
- __entry->freq = freq;
- ),
-
- TP_printk("new_freq=%u", __entry->freq)
-);
-
/**
* DOC: i915_ppgtt_create and i915_ppgtt_release tracepoints
*
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index f17c09ead7d7..c39488eb9eeb 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -776,12 +776,6 @@ i915_vma_insert(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE));
alignment = max(alignment, i915_vm_obj_min_alignment(vma->vm, vma->obj));
- /*
- * for compact-pt we round up the reservation to prevent
- * any smaller pages being used within the same PDE
- */
- if (NEEDS_COMPACT_PT(vma->vm->i915))
- size = round_up(size, alignment);
/* If binding the object/GGTT view requires more space than the entire
* aperture has, reject it early before evicting everything in a vain
@@ -820,7 +814,8 @@ i915_vma_insert(struct i915_vma *vma, struct i915_gem_ww_ctx *ww,
* forseeable future. See also i915_ggtt_offset().
*/
if (upper_32_bits(end - 1) &&
- vma->page_sizes.sg > I915_GTT_PAGE_SIZE) {
+ vma->page_sizes.sg > I915_GTT_PAGE_SIZE &&
+ !HAS_64K_PAGES(vma->vm->i915)) {
/*
* We can't mix 64K and 4K PTEs in the same page-table
* (2M block), and so to avoid the ugliness and
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 7b5dd8e21d7a..d588e5fd2eea 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -146,7 +146,6 @@ enum intel_ppgtt_type {
/* Keep has_* in alphabetical order */ \
func(has_64bit_reloc); \
func(has_64k_pages); \
- func(needs_compact_pt); \
func(gpu_reset_clobbers_display); \
func(has_reset_engine); \
func(has_3d_pipeline); \
@@ -165,6 +164,8 @@ enum intel_ppgtt_type {
func(has_logical_ring_elsq); \
func(has_media_ratio_mode); \
func(has_mslice_steering); \
+ func(has_oa_bpc_reporting); \
+ func(has_oa_slice_contrib_limits); \
func(has_one_eu_per_fuse_bit); \
func(has_pxp); \
func(has_rc6); \
diff --git a/drivers/gpu/drm/i915/intel_gvt_mmio_table.c b/drivers/gpu/drm/i915/intel_gvt_mmio_table.c
index 8279dc580a3e..638b77d64bf4 100644
--- a/drivers/gpu/drm/i915/intel_gvt_mmio_table.c
+++ b/drivers/gpu/drm/i915/intel_gvt_mmio_table.c
@@ -102,7 +102,7 @@ static int iterate_generic_mmio(struct intel_gvt_mmio_table_iter *iter)
MMIO_D(_MMIO(0x2438));
MMIO_D(_MMIO(0x243c));
MMIO_D(_MMIO(0x7018));
- MMIO_D(HALF_SLICE_CHICKEN3);
+ MMIO_D(HSW_HALF_SLICE_CHICKEN3);
MMIO_D(GEN7_HALF_SLICE_CHICKEN1);
/* display */
MMIO_F(_MMIO(0x60220), 0x20);
diff --git a/drivers/gpu/drm/i915/intel_mchbar_regs.h b/drivers/gpu/drm/i915/intel_mchbar_regs.h
index ffc702b79579..f93e9af43ac3 100644
--- a/drivers/gpu/drm/i915/intel_mchbar_regs.h
+++ b/drivers/gpu/drm/i915/intel_mchbar_regs.h
@@ -189,6 +189,21 @@
#define DG1_QCLK_RATIO_MASK REG_GENMASK(9, 2)
#define DG1_QCLK_REFERENCE REG_BIT(10)
+/*
+ * *_PACKAGE_POWER_SKU - SKU power and timing parameters.
+ */
+#define PCU_PACKAGE_POWER_SKU _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5930)
+#define PKG_PKG_TDP GENMASK_ULL(14, 0)
+#define PKG_MAX_WIN GENMASK_ULL(54, 48)
+#define PKG_MAX_WIN_X GENMASK_ULL(54, 53)
+#define PKG_MAX_WIN_Y GENMASK_ULL(52, 48)
+
+#define PCU_PACKAGE_POWER_SKU_UNIT _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5938)
+#define PKG_PWR_UNIT REG_GENMASK(3, 0)
+#define PKG_ENERGY_UNIT REG_GENMASK(12, 8)
+#define PKG_TIME_UNIT REG_GENMASK(19, 16)
+#define PCU_PACKAGE_ENERGY_STATUS _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x593c)
+
#define GEN6_GT_PERF_STATUS _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5948)
#define GEN6_RP_STATE_LIMITS _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5994)
#define GEN6_RP_STATE_CAP _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5998)
@@ -198,6 +213,12 @@
#define GEN10_FREQ_INFO_REC _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5ef0)
#define RPE_MASK REG_GENMASK(15, 8)
+#define PCU_PACKAGE_RAPL_LIMIT _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x59a0)
+#define PKG_PWR_LIM_1 REG_GENMASK(14, 0)
+#define PKG_PWR_LIM_1_EN REG_BIT(15)
+#define PKG_PWR_LIM_1_TIME REG_GENMASK(23, 17)
+#define PKG_PWR_LIM_1_TIME_X REG_GENMASK(23, 22)
+#define PKG_PWR_LIM_1_TIME_Y REG_GENMASK(21, 17)
/* snb MCH registers for priority tuning */
#define MCH_SSKPD _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5d10)
diff --git a/drivers/gpu/drm/i915/intel_pci_config.h b/drivers/gpu/drm/i915/intel_pci_config.h
index 4977a524ce6f..23b8e519f333 100644
--- a/drivers/gpu/drm/i915/intel_pci_config.h
+++ b/drivers/gpu/drm/i915/intel_pci_config.h
@@ -7,11 +7,29 @@
#define __INTEL_PCI_CONFIG_H__
/* PCI BARs */
-#define GTTMMADR_BAR 0
-#define GEN2_GTTMMADR_BAR 1
-#define GFXMEM_BAR 2
-#define GTT_APERTURE_BAR GFXMEM_BAR
-#define GEN12_LMEM_BAR GFXMEM_BAR
+#define GEN2_GMADR_BAR 0
+#define GEN2_MMADR_BAR 1 /* MMIO+GTT, despite the name */
+#define GEN2_IO_BAR 2 /* 85x/865 */
+
+#define GEN3_MMADR_BAR 0 /* MMIO only */
+#define GEN3_IO_BAR 1
+#define GEN3_GMADR_BAR 2
+#define GEN3_GTTADR_BAR 3 /* GTT only */
+
+#define GEN4_GTTMMADR_BAR 0 /* MMIO+GTT */
+#define GEN4_GMADR_BAR 2
+#define GEN4_IO_BAR 4
+
+#define GEN12_LMEM_BAR 2
+
+static inline int intel_mmio_bar(int graphics_ver)
+{
+ switch (graphics_ver) {
+ case 2: return GEN2_MMADR_BAR;
+ case 3: return GEN3_MMADR_BAR;
+ default: return GEN4_GTTMMADR_BAR;
+ }
+}
/* BSM in include/drm/i915_drm.h */
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 19d4a88184d7..ee34e2785636 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -30,6 +30,8 @@
#include "display/skl_watermark.h"
#include "gt/intel_engine_regs.h"
+#include "gt/intel_gt.h"
+#include "gt/intel_gt_mcr.h"
#include "gt/intel_gt_regs.h"
#include "i915_drv.h"
@@ -58,25 +60,20 @@ static void gen9_init_clock_gating(struct drm_i915_private *dev_priv)
* Must match Sampler, Pixel Back End, and Media. See
* WaCompressedResourceSamplerPbeMediaNewHashMode.
*/
- intel_uncore_write(&dev_priv->uncore, CHICKEN_PAR1_1,
- intel_uncore_read(&dev_priv->uncore, CHICKEN_PAR1_1) |
- SKL_DE_COMPRESSED_HASH_MODE);
+ intel_uncore_rmw(&dev_priv->uncore, CHICKEN_PAR1_1, 0, SKL_DE_COMPRESSED_HASH_MODE);
}
/* See Bspec note for PSR2_CTL bit 31, Wa#828:skl,bxt,kbl,cfl */
- intel_uncore_write(&dev_priv->uncore, CHICKEN_PAR1_1,
- intel_uncore_read(&dev_priv->uncore, CHICKEN_PAR1_1) | SKL_EDP_PSR_FIX_RDWRAP);
+ intel_uncore_rmw(&dev_priv->uncore, CHICKEN_PAR1_1, 0, SKL_EDP_PSR_FIX_RDWRAP);
/* WaEnableChickenDCPR:skl,bxt,kbl,glk,cfl */
- intel_uncore_write(&dev_priv->uncore, GEN8_CHICKEN_DCPR_1,
- intel_uncore_read(&dev_priv->uncore, GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM);
+ intel_uncore_rmw(&dev_priv->uncore, GEN8_CHICKEN_DCPR_1, 0, MASK_WAKEMEM);
/*
* WaFbcWakeMemOn:skl,bxt,kbl,glk,cfl
* Display WA #0859: skl,bxt,kbl,glk,cfl
*/
- intel_uncore_write(&dev_priv->uncore, DISP_ARB_CTL, intel_uncore_read(&dev_priv->uncore, DISP_ARB_CTL) |
- DISP_FBC_MEMORY_WAKE);
+ intel_uncore_rmw(&dev_priv->uncore, DISP_ARB_CTL, 0, DISP_FBC_MEMORY_WAKE);
}
static void bxt_init_clock_gating(struct drm_i915_private *dev_priv)
@@ -84,15 +81,13 @@ static void bxt_init_clock_gating(struct drm_i915_private *dev_priv)
gen9_init_clock_gating(dev_priv);
/* WaDisableSDEUnitClockGating:bxt */
- intel_uncore_write(&dev_priv->uncore, GEN8_UCGCTL6, intel_uncore_read(&dev_priv->uncore, GEN8_UCGCTL6) |
- GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
+ intel_uncore_rmw(&dev_priv->uncore, GEN8_UCGCTL6, 0, GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
/*
* FIXME:
* GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ applies on 3x6 GT SKUs only.
*/
- intel_uncore_write(&dev_priv->uncore, GEN8_UCGCTL6, intel_uncore_read(&dev_priv->uncore, GEN8_UCGCTL6) |
- GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ);
+ intel_uncore_rmw(&dev_priv->uncore, GEN8_UCGCTL6, 0, GEN8_HDCUNIT_CLOCK_GATE_DISABLE_HDCREQ);
/*
* Wa: Backlight PWM may stop in the asserted state, causing backlight
@@ -113,16 +108,13 @@ static void bxt_init_clock_gating(struct drm_i915_private *dev_priv)
* WaFbcTurnOffFbcWatermark:bxt
* Display WA #0562: bxt
*/
- intel_uncore_write(&dev_priv->uncore, DISP_ARB_CTL, intel_uncore_read(&dev_priv->uncore, DISP_ARB_CTL) |
- DISP_FBC_WM_DIS);
+ intel_uncore_rmw(&dev_priv->uncore, DISP_ARB_CTL, 0, DISP_FBC_WM_DIS);
/*
* WaFbcHighMemBwCorruptionAvoidance:bxt
* Display WA #0883: bxt
*/
- intel_uncore_write(&dev_priv->uncore, ILK_DPFC_CHICKEN(INTEL_FBC_A),
- intel_uncore_read(&dev_priv->uncore, ILK_DPFC_CHICKEN(INTEL_FBC_A)) |
- DPFC_DISABLE_DUMMY0);
+ intel_uncore_rmw(&dev_priv->uncore, ILK_DPFC_CHICKEN(INTEL_FBC_A), 0, DPFC_DISABLE_DUMMY0);
}
static void glk_init_clock_gating(struct drm_i915_private *dev_priv)
@@ -4053,9 +4045,9 @@ void vlv_wm_sanitize(struct drm_i915_private *dev_priv)
*/
static void ilk_init_lp_watermarks(struct drm_i915_private *dev_priv)
{
- intel_uncore_write(&dev_priv->uncore, WM3_LP_ILK, intel_uncore_read(&dev_priv->uncore, WM3_LP_ILK) & ~WM_LP_ENABLE);
- intel_uncore_write(&dev_priv->uncore, WM2_LP_ILK, intel_uncore_read(&dev_priv->uncore, WM2_LP_ILK) & ~WM_LP_ENABLE);
- intel_uncore_write(&dev_priv->uncore, WM1_LP_ILK, intel_uncore_read(&dev_priv->uncore, WM1_LP_ILK) & ~WM_LP_ENABLE);
+ intel_uncore_rmw(&dev_priv->uncore, WM3_LP_ILK, WM_LP_ENABLE, 0);
+ intel_uncore_rmw(&dev_priv->uncore, WM2_LP_ILK, WM_LP_ENABLE, 0);
+ intel_uncore_rmw(&dev_priv->uncore, WM1_LP_ILK, WM_LP_ENABLE, 0);
/*
* Don't touch WM_LP_SPRITE_ENABLE here.
@@ -4109,9 +4101,7 @@ static void g4x_disable_trickle_feed(struct drm_i915_private *dev_priv)
enum pipe pipe;
for_each_pipe(dev_priv, pipe) {
- intel_uncore_write(&dev_priv->uncore, DSPCNTR(pipe),
- intel_uncore_read(&dev_priv->uncore, DSPCNTR(pipe)) |
- DISP_TRICKLE_FEED_DISABLE);
+ intel_uncore_rmw(&dev_priv->uncore, DSPCNTR(pipe), 0, DISP_TRICKLE_FEED_DISABLE);
intel_uncore_rmw(&dev_priv->uncore, DSPSURF(pipe), 0, 0);
intel_uncore_posting_read(&dev_priv->uncore, DSPSURF(pipe));
@@ -4160,19 +4150,13 @@ static void ilk_init_clock_gating(struct drm_i915_private *dev_priv)
*/
if (IS_IRONLAKE_M(dev_priv)) {
/* WaFbcAsynchFlipDisableFbcQueue:ilk */
- intel_uncore_write(&dev_priv->uncore, ILK_DISPLAY_CHICKEN1,
- intel_uncore_read(&dev_priv->uncore, ILK_DISPLAY_CHICKEN1) |
- ILK_FBCQ_DIS);
- intel_uncore_write(&dev_priv->uncore, ILK_DISPLAY_CHICKEN2,
- intel_uncore_read(&dev_priv->uncore, ILK_DISPLAY_CHICKEN2) |
- ILK_DPARB_GATE);
+ intel_uncore_rmw(&dev_priv->uncore, ILK_DISPLAY_CHICKEN1, 0, ILK_FBCQ_DIS);
+ intel_uncore_rmw(&dev_priv->uncore, ILK_DISPLAY_CHICKEN2, 0, ILK_DPARB_GATE);
}
intel_uncore_write(&dev_priv->uncore, ILK_DSPCLK_GATE_D, dspclk_gate);
- intel_uncore_write(&dev_priv->uncore, ILK_DISPLAY_CHICKEN2,
- intel_uncore_read(&dev_priv->uncore, ILK_DISPLAY_CHICKEN2) |
- ILK_ELPIN_409_SELECT);
+ intel_uncore_rmw(&dev_priv->uncore, ILK_DISPLAY_CHICKEN2, 0, ILK_ELPIN_409_SELECT);
g4x_disable_trickle_feed(dev_priv);
@@ -4192,8 +4176,7 @@ static void cpt_init_clock_gating(struct drm_i915_private *dev_priv)
intel_uncore_write(&dev_priv->uncore, SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE |
PCH_DPLUNIT_CLOCK_GATE_DISABLE |
PCH_CPUNIT_CLOCK_GATE_DISABLE);
- intel_uncore_write(&dev_priv->uncore, SOUTH_CHICKEN2, intel_uncore_read(&dev_priv->uncore, SOUTH_CHICKEN2) |
- DPLS_EDP_PPS_FIX_DIS);
+ intel_uncore_rmw(&dev_priv->uncore, SOUTH_CHICKEN2, 0, DPLS_EDP_PPS_FIX_DIS);
/* The below fixes the weird display corruption, a few pixels shifted
* downward, on (only) LVDS of some HP laptops with IVY.
*/
@@ -4231,9 +4214,7 @@ static void gen6_init_clock_gating(struct drm_i915_private *dev_priv)
intel_uncore_write(&dev_priv->uncore, ILK_DSPCLK_GATE_D, dspclk_gate);
- intel_uncore_write(&dev_priv->uncore, ILK_DISPLAY_CHICKEN2,
- intel_uncore_read(&dev_priv->uncore, ILK_DISPLAY_CHICKEN2) |
- ILK_ELPIN_409_SELECT);
+ intel_uncore_rmw(&dev_priv->uncore, ILK_DISPLAY_CHICKEN2, 0, ILK_ELPIN_409_SELECT);
intel_uncore_write(&dev_priv->uncore, GEN6_UCGCTL1,
intel_uncore_read(&dev_priv->uncore, GEN6_UCGCTL1) |
@@ -4293,14 +4274,12 @@ static void lpt_init_clock_gating(struct drm_i915_private *dev_priv)
* disabled when not needed anymore in order to save power.
*/
if (HAS_PCH_LPT_LP(dev_priv))
- intel_uncore_write(&dev_priv->uncore, SOUTH_DSPCLK_GATE_D,
- intel_uncore_read(&dev_priv->uncore, SOUTH_DSPCLK_GATE_D) |
- PCH_LP_PARTITION_LEVEL_DISABLE);
+ intel_uncore_rmw(&dev_priv->uncore, SOUTH_DSPCLK_GATE_D,
+ 0, PCH_LP_PARTITION_LEVEL_DISABLE);
/* WADPOClockGatingDisable:hsw */
- intel_uncore_write(&dev_priv->uncore, TRANS_CHICKEN1(PIPE_A),
- intel_uncore_read(&dev_priv->uncore, TRANS_CHICKEN1(PIPE_A)) |
- TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
+ intel_uncore_rmw(&dev_priv->uncore, TRANS_CHICKEN1(PIPE_A),
+ 0, TRANS_CHICKEN1_DP0UNIT_GC_DISABLE);
}
static void lpt_suspend_hw(struct drm_i915_private *dev_priv)
@@ -4321,22 +4300,22 @@ static void gen8_set_l3sqc_credits(struct drm_i915_private *dev_priv,
u32 val;
/* WaTempDisableDOPClkGating:bdw */
- misccpctl = intel_uncore_rmw(&dev_priv->uncore, GEN7_MISCCPCTL,
- GEN7_DOP_CLOCK_GATE_ENABLE, 0);
+ misccpctl = intel_gt_mcr_multicast_rmw(to_gt(dev_priv), GEN8_MISCCPCTL,
+ GEN8_DOP_CLOCK_GATE_ENABLE, 0);
- val = intel_uncore_read(&dev_priv->uncore, GEN8_L3SQCREG1);
+ val = intel_gt_mcr_read_any(to_gt(dev_priv), GEN8_L3SQCREG1);
val &= ~L3_PRIO_CREDITS_MASK;
val |= L3_GENERAL_PRIO_CREDITS(general_prio_credits);
val |= L3_HIGH_PRIO_CREDITS(high_prio_credits);
- intel_uncore_write(&dev_priv->uncore, GEN8_L3SQCREG1, val);
+ intel_gt_mcr_multicast_write(to_gt(dev_priv), GEN8_L3SQCREG1, val);
/*
* Wait at least 100 clocks before re-enabling clock gating.
* See the definition of L3SQCREG1 in BSpec.
*/
- intel_uncore_posting_read(&dev_priv->uncore, GEN8_L3SQCREG1);
+ intel_gt_mcr_read_any(to_gt(dev_priv), GEN8_L3SQCREG1);
udelay(1);
- intel_uncore_write(&dev_priv->uncore, GEN7_MISCCPCTL, misccpctl);
+ intel_gt_mcr_multicast_write(to_gt(dev_priv), GEN8_MISCCPCTL, misccpctl);
}
static void icl_init_clock_gating(struct drm_i915_private *dev_priv)
@@ -4359,8 +4338,7 @@ static void gen12lp_init_clock_gating(struct drm_i915_private *dev_priv)
/* Wa_1409825376:tgl (pre-prod)*/
if (IS_TGL_DISPLAY_STEP(dev_priv, STEP_A0, STEP_C0))
- intel_uncore_write(&dev_priv->uncore, GEN9_CLKGATE_DIS_3, intel_uncore_read(&dev_priv->uncore, GEN9_CLKGATE_DIS_3) |
- TGL_VRH_GATING_DIS);
+ intel_uncore_rmw(&dev_priv->uncore, GEN9_CLKGATE_DIS_3, 0, TGL_VRH_GATING_DIS);
/* Wa_14013723622:tgl,rkl,dg1,adl-s */
if (DISPLAY_VER(dev_priv) == 12)
@@ -4385,8 +4363,7 @@ static void dg1_init_clock_gating(struct drm_i915_private *dev_priv)
/* Wa_1409836686:dg1[a0] */
if (IS_DG1_GRAPHICS_STEP(dev_priv, STEP_A0, STEP_B0))
- intel_uncore_write(&dev_priv->uncore, GEN9_CLKGATE_DIS_3, intel_uncore_read(&dev_priv->uncore, GEN9_CLKGATE_DIS_3) |
- DPT_GATING_DIS);
+ intel_uncore_rmw(&dev_priv->uncore, GEN9_CLKGATE_DIS_3, 0, DPT_GATING_DIS);
}
static void xehpsdv_init_clock_gating(struct drm_i915_private *dev_priv)
@@ -4428,8 +4405,7 @@ static void cnp_init_clock_gating(struct drm_i915_private *dev_priv)
return;
/* Display WA #1181 WaSouthDisplayDisablePWMCGEGating: cnp */
- intel_uncore_write(&dev_priv->uncore, SOUTH_DSPCLK_GATE_D, intel_uncore_read(&dev_priv->uncore, SOUTH_DSPCLK_GATE_D) |
- CNP_PWM_CGE_GATING_DISABLE);
+ intel_uncore_rmw(&dev_priv->uncore, SOUTH_DSPCLK_GATE_D, 0, CNP_PWM_CGE_GATING_DISABLE);
}
static void cfl_init_clock_gating(struct drm_i915_private *dev_priv)
@@ -4438,23 +4414,20 @@ static void cfl_init_clock_gating(struct drm_i915_private *dev_priv)
gen9_init_clock_gating(dev_priv);
/* WAC6entrylatency:cfl */
- intel_uncore_write(&dev_priv->uncore, FBC_LLC_READ_CTRL, intel_uncore_read(&dev_priv->uncore, FBC_LLC_READ_CTRL) |
- FBC_LLC_FULLY_OPEN);
+ intel_uncore_rmw(&dev_priv->uncore, FBC_LLC_READ_CTRL, 0, FBC_LLC_FULLY_OPEN);
/*
* WaFbcTurnOffFbcWatermark:cfl
* Display WA #0562: cfl
*/
- intel_uncore_write(&dev_priv->uncore, DISP_ARB_CTL, intel_uncore_read(&dev_priv->uncore, DISP_ARB_CTL) |
- DISP_FBC_WM_DIS);
+ intel_uncore_rmw(&dev_priv->uncore, DISP_ARB_CTL, 0, DISP_FBC_WM_DIS);
/*
* WaFbcNukeOnHostModify:cfl
* Display WA #0873: cfl
*/
- intel_uncore_write(&dev_priv->uncore, ILK_DPFC_CHICKEN(INTEL_FBC_A),
- intel_uncore_read(&dev_priv->uncore, ILK_DPFC_CHICKEN(INTEL_FBC_A)) |
- DPFC_NUKE_ON_ANY_MODIFICATION);
+ intel_uncore_rmw(&dev_priv->uncore, ILK_DPFC_CHICKEN(INTEL_FBC_A),
+ 0, DPFC_NUKE_ON_ANY_MODIFICATION);
}
static void kbl_init_clock_gating(struct drm_i915_private *dev_priv)
@@ -4462,33 +4435,30 @@ static void kbl_init_clock_gating(struct drm_i915_private *dev_priv)
gen9_init_clock_gating(dev_priv);
/* WAC6entrylatency:kbl */
- intel_uncore_write(&dev_priv->uncore, FBC_LLC_READ_CTRL, intel_uncore_read(&dev_priv->uncore, FBC_LLC_READ_CTRL) |
- FBC_LLC_FULLY_OPEN);
+ intel_uncore_rmw(&dev_priv->uncore, FBC_LLC_READ_CTRL, 0, FBC_LLC_FULLY_OPEN);
/* WaDisableSDEUnitClockGating:kbl */
if (IS_KBL_GRAPHICS_STEP(dev_priv, 0, STEP_C0))
- intel_uncore_write(&dev_priv->uncore, GEN8_UCGCTL6, intel_uncore_read(&dev_priv->uncore, GEN8_UCGCTL6) |
- GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
+ intel_uncore_rmw(&dev_priv->uncore, GEN8_UCGCTL6,
+ 0, GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
/* WaDisableGamClockGating:kbl */
if (IS_KBL_GRAPHICS_STEP(dev_priv, 0, STEP_C0))
- intel_uncore_write(&dev_priv->uncore, GEN6_UCGCTL1, intel_uncore_read(&dev_priv->uncore, GEN6_UCGCTL1) |
- GEN6_GAMUNIT_CLOCK_GATE_DISABLE);
+ intel_uncore_rmw(&dev_priv->uncore, GEN6_UCGCTL1,
+ 0, GEN6_GAMUNIT_CLOCK_GATE_DISABLE);
/*
* WaFbcTurnOffFbcWatermark:kbl
* Display WA #0562: kbl
*/
- intel_uncore_write(&dev_priv->uncore, DISP_ARB_CTL, intel_uncore_read(&dev_priv->uncore, DISP_ARB_CTL) |
- DISP_FBC_WM_DIS);
+ intel_uncore_rmw(&dev_priv->uncore, DISP_ARB_CTL, 0, DISP_FBC_WM_DIS);
/*
* WaFbcNukeOnHostModify:kbl
* Display WA #0873: kbl
*/
- intel_uncore_write(&dev_priv->uncore, ILK_DPFC_CHICKEN(INTEL_FBC_A),
- intel_uncore_read(&dev_priv->uncore, ILK_DPFC_CHICKEN(INTEL_FBC_A)) |
- DPFC_NUKE_ON_ANY_MODIFICATION);
+ intel_uncore_rmw(&dev_priv->uncore, ILK_DPFC_CHICKEN(INTEL_FBC_A),
+ 0, DPFC_NUKE_ON_ANY_MODIFICATION);
}
static void skl_init_clock_gating(struct drm_i915_private *dev_priv)
@@ -4496,35 +4466,30 @@ static void skl_init_clock_gating(struct drm_i915_private *dev_priv)
gen9_init_clock_gating(dev_priv);
/* WaDisableDopClockGating:skl */
- intel_uncore_write(&dev_priv->uncore, GEN7_MISCCPCTL, intel_uncore_read(&dev_priv->uncore, GEN7_MISCCPCTL) &
- ~GEN7_DOP_CLOCK_GATE_ENABLE);
+ intel_gt_mcr_multicast_rmw(to_gt(dev_priv), GEN8_MISCCPCTL,
+ GEN8_DOP_CLOCK_GATE_ENABLE, 0);
/* WAC6entrylatency:skl */
- intel_uncore_write(&dev_priv->uncore, FBC_LLC_READ_CTRL, intel_uncore_read(&dev_priv->uncore, FBC_LLC_READ_CTRL) |
- FBC_LLC_FULLY_OPEN);
+ intel_uncore_rmw(&dev_priv->uncore, FBC_LLC_READ_CTRL, 0, FBC_LLC_FULLY_OPEN);
/*
* WaFbcTurnOffFbcWatermark:skl
* Display WA #0562: skl
*/
- intel_uncore_write(&dev_priv->uncore, DISP_ARB_CTL, intel_uncore_read(&dev_priv->uncore, DISP_ARB_CTL) |
- DISP_FBC_WM_DIS);
+ intel_uncore_rmw(&dev_priv->uncore, DISP_ARB_CTL, 0, DISP_FBC_WM_DIS);
/*
* WaFbcNukeOnHostModify:skl
* Display WA #0873: skl
*/
- intel_uncore_write(&dev_priv->uncore, ILK_DPFC_CHICKEN(INTEL_FBC_A),
- intel_uncore_read(&dev_priv->uncore, ILK_DPFC_CHICKEN(INTEL_FBC_A)) |
- DPFC_NUKE_ON_ANY_MODIFICATION);
+ intel_uncore_rmw(&dev_priv->uncore, ILK_DPFC_CHICKEN(INTEL_FBC_A),
+ 0, DPFC_NUKE_ON_ANY_MODIFICATION);
/*
* WaFbcHighMemBwCorruptionAvoidance:skl
* Display WA #0883: skl
*/
- intel_uncore_write(&dev_priv->uncore, ILK_DPFC_CHICKEN(INTEL_FBC_A),
- intel_uncore_read(&dev_priv->uncore, ILK_DPFC_CHICKEN(INTEL_FBC_A)) |
- DPFC_DISABLE_DUMMY0);
+ intel_uncore_rmw(&dev_priv->uncore, ILK_DPFC_CHICKEN(INTEL_FBC_A), 0, DPFC_DISABLE_DUMMY0);
}
static void bdw_init_clock_gating(struct drm_i915_private *dev_priv)
@@ -4532,43 +4497,37 @@ static void bdw_init_clock_gating(struct drm_i915_private *dev_priv)
enum pipe pipe;
/* WaFbcAsynchFlipDisableFbcQueue:hsw,bdw */
- intel_uncore_write(&dev_priv->uncore, CHICKEN_PIPESL_1(PIPE_A),
- intel_uncore_read(&dev_priv->uncore, CHICKEN_PIPESL_1(PIPE_A)) |
- HSW_FBCQ_DIS);
+ intel_uncore_rmw(&dev_priv->uncore, CHICKEN_PIPESL_1(PIPE_A), 0, HSW_FBCQ_DIS);
/* WaSwitchSolVfFArbitrationPriority:bdw */
- intel_uncore_write(&dev_priv->uncore, GAM_ECOCHK, intel_uncore_read(&dev_priv->uncore, GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
+ intel_uncore_rmw(&dev_priv->uncore, GAM_ECOCHK, 0, HSW_ECOCHK_ARB_PRIO_SOL);
/* WaPsrDPAMaskVBlankInSRD:bdw */
- intel_uncore_write(&dev_priv->uncore, CHICKEN_PAR1_1,
- intel_uncore_read(&dev_priv->uncore, CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD);
+ intel_uncore_rmw(&dev_priv->uncore, CHICKEN_PAR1_1, 0, DPA_MASK_VBLANK_SRD);
for_each_pipe(dev_priv, pipe) {
/* WaPsrDPRSUnmaskVBlankInSRD:bdw */
- intel_uncore_write(&dev_priv->uncore, CHICKEN_PIPESL_1(pipe),
- intel_uncore_read(&dev_priv->uncore, CHICKEN_PIPESL_1(pipe)) |
- BDW_DPRS_MASK_VBLANK_SRD);
+ intel_uncore_rmw(&dev_priv->uncore, CHICKEN_PIPESL_1(pipe),
+ 0, BDW_DPRS_MASK_VBLANK_SRD);
}
/* WaVSRefCountFullforceMissDisable:bdw */
/* WaDSRefCountFullforceMissDisable:bdw */
- intel_uncore_write(&dev_priv->uncore, GEN7_FF_THREAD_MODE,
- intel_uncore_read(&dev_priv->uncore, GEN7_FF_THREAD_MODE) &
- ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
+ intel_uncore_rmw(&dev_priv->uncore, GEN7_FF_THREAD_MODE,
+ GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME, 0);
intel_uncore_write(&dev_priv->uncore, RING_PSMI_CTL(RENDER_RING_BASE),
_MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
/* WaDisableSDEUnitClockGating:bdw */
- intel_uncore_write(&dev_priv->uncore, GEN8_UCGCTL6, intel_uncore_read(&dev_priv->uncore, GEN8_UCGCTL6) |
- GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
+ intel_uncore_rmw(&dev_priv->uncore, GEN8_UCGCTL6, 0, GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
/* WaProgramL3SqcReg1Default:bdw */
gen8_set_l3sqc_credits(dev_priv, 30, 2);
/* WaKVMNotificationOnConfigChange:bdw */
- intel_uncore_write(&dev_priv->uncore, CHICKEN_PAR2_1, intel_uncore_read(&dev_priv->uncore, CHICKEN_PAR2_1)
- | KVM_CONFIG_CHANGE_NOTIFICATION_SELECT);
+ intel_uncore_rmw(&dev_priv->uncore, CHICKEN_PAR2_1,
+ 0, KVM_CONFIG_CHANGE_NOTIFICATION_SELECT);
lpt_init_clock_gating(dev_priv);
@@ -4577,24 +4536,20 @@ static void bdw_init_clock_gating(struct drm_i915_private *dev_priv)
* Also see the CHICKEN2 write in bdw_init_workarounds() to disable DOP
* clock gating.
*/
- intel_uncore_write(&dev_priv->uncore, GEN6_UCGCTL1,
- intel_uncore_read(&dev_priv->uncore, GEN6_UCGCTL1) | GEN6_EU_TCUNIT_CLOCK_GATE_DISABLE);
+ intel_uncore_rmw(&dev_priv->uncore, GEN6_UCGCTL1, 0, GEN6_EU_TCUNIT_CLOCK_GATE_DISABLE);
}
static void hsw_init_clock_gating(struct drm_i915_private *dev_priv)
{
/* WaFbcAsynchFlipDisableFbcQueue:hsw,bdw */
- intel_uncore_write(&dev_priv->uncore, CHICKEN_PIPESL_1(PIPE_A),
- intel_uncore_read(&dev_priv->uncore, CHICKEN_PIPESL_1(PIPE_A)) |
- HSW_FBCQ_DIS);
+ intel_uncore_rmw(&dev_priv->uncore, CHICKEN_PIPESL_1(PIPE_A), 0, HSW_FBCQ_DIS);
/* This is required by WaCatErrorRejectionIssue:hsw */
- intel_uncore_write(&dev_priv->uncore, GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
- intel_uncore_read(&dev_priv->uncore, GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
- GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
+ intel_uncore_rmw(&dev_priv->uncore, GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
+ 0, GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
/* WaSwitchSolVfFArbitrationPriority:hsw */
- intel_uncore_write(&dev_priv->uncore, GAM_ECOCHK, intel_uncore_read(&dev_priv->uncore, GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
+ intel_uncore_rmw(&dev_priv->uncore, GAM_ECOCHK, 0, HSW_ECOCHK_ARB_PRIO_SOL);
lpt_init_clock_gating(dev_priv);
}
@@ -4604,9 +4559,7 @@ static void ivb_init_clock_gating(struct drm_i915_private *dev_priv)
intel_uncore_write(&dev_priv->uncore, ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE);
/* WaFbcAsynchFlipDisableFbcQueue:ivb */
- intel_uncore_write(&dev_priv->uncore, ILK_DISPLAY_CHICKEN1,
- intel_uncore_read(&dev_priv->uncore, ILK_DISPLAY_CHICKEN1) |
- ILK_FBCQ_DIS);
+ intel_uncore_rmw(&dev_priv->uncore, ILK_DISPLAY_CHICKEN1, 0, ILK_FBCQ_DIS);
/* WaDisableBackToBackFlipFix:ivb */
intel_uncore_write(&dev_priv->uncore, IVB_CHICKEN3,
@@ -4632,9 +4585,8 @@ static void ivb_init_clock_gating(struct drm_i915_private *dev_priv)
GEN6_RCZUNIT_CLOCK_GATE_DISABLE);
/* This is required by WaCatErrorRejectionIssue:ivb */
- intel_uncore_write(&dev_priv->uncore, GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
- intel_uncore_read(&dev_priv->uncore, GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
- GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
+ intel_uncore_rmw(&dev_priv->uncore, GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
+ 0, GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
g4x_disable_trickle_feed(dev_priv);
@@ -4659,9 +4611,8 @@ static void vlv_init_clock_gating(struct drm_i915_private *dev_priv)
_MASKED_BIT_ENABLE(DOP_CLOCK_GATING_DISABLE));
/* This is required by WaCatErrorRejectionIssue:vlv */
- intel_uncore_write(&dev_priv->uncore, GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
- intel_uncore_read(&dev_priv->uncore, GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
- GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
+ intel_uncore_rmw(&dev_priv->uncore, GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
+ 0, GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
/*
* According to the spec, bit 13 (RCZUNIT) must be set on IVB.
@@ -4673,8 +4624,7 @@ static void vlv_init_clock_gating(struct drm_i915_private *dev_priv)
/* WaDisableL3Bank2xClockGate:vlv
* Disabling L3 clock gating- MMIO 940c[25] = 1
* Set bit 25, to disable L3_BANK_2x_CLK_GATING */
- intel_uncore_write(&dev_priv->uncore, GEN7_UCGCTL4,
- intel_uncore_read(&dev_priv->uncore, GEN7_UCGCTL4) | GEN7_L3BANK2X_CLOCK_GATE_DISABLE);
+ intel_uncore_rmw(&dev_priv->uncore, GEN7_UCGCTL4, 0, GEN7_L3BANK2X_CLOCK_GATE_DISABLE);
/*
* WaDisableVLVClockGating_VBIIssue:vlv
@@ -4688,21 +4638,18 @@ static void chv_init_clock_gating(struct drm_i915_private *dev_priv)
{
/* WaVSRefCountFullforceMissDisable:chv */
/* WaDSRefCountFullforceMissDisable:chv */
- intel_uncore_write(&dev_priv->uncore, GEN7_FF_THREAD_MODE,
- intel_uncore_read(&dev_priv->uncore, GEN7_FF_THREAD_MODE) &
- ~(GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME));
+ intel_uncore_rmw(&dev_priv->uncore, GEN7_FF_THREAD_MODE,
+ GEN8_FF_DS_REF_CNT_FFME | GEN7_FF_VS_REF_CNT_FFME, 0);
/* WaDisableSemaphoreAndSyncFlipWait:chv */
intel_uncore_write(&dev_priv->uncore, RING_PSMI_CTL(RENDER_RING_BASE),
_MASKED_BIT_ENABLE(GEN8_RC_SEMA_IDLE_MSG_DISABLE));
/* WaDisableCSUnitClockGating:chv */
- intel_uncore_write(&dev_priv->uncore, GEN6_UCGCTL1, intel_uncore_read(&dev_priv->uncore, GEN6_UCGCTL1) |
- GEN6_CSUNIT_CLOCK_GATE_DISABLE);
+ intel_uncore_rmw(&dev_priv->uncore, GEN6_UCGCTL1, 0, GEN6_CSUNIT_CLOCK_GATE_DISABLE);
/* WaDisableSDEUnitClockGating:chv */
- intel_uncore_write(&dev_priv->uncore, GEN8_UCGCTL6, intel_uncore_read(&dev_priv->uncore, GEN8_UCGCTL6) |
- GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
+ intel_uncore_rmw(&dev_priv->uncore, GEN8_UCGCTL6, 0, GEN8_SDEUNIT_CLOCK_GATE_DISABLE);
/*
* WaProgramL3SqcReg1Default:chv
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 744cca507946..129746713d07 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -633,6 +633,8 @@ void intel_runtime_pm_driver_release(struct intel_runtime_pm *rpm)
runtime_pm);
int count = atomic_read(&rpm->wakeref_count);
+ intel_wakeref_auto_fini(&rpm->userfault_wakeref);
+
drm_WARN(&i915->drm, count,
"i915 raw-wakerefs=%d wakelocks=%d on cleanup\n",
intel_rpm_raw_wakeref_count(count),
@@ -652,4 +654,7 @@ void intel_runtime_pm_init_early(struct intel_runtime_pm *rpm)
rpm->available = HAS_RUNTIME_PM(i915);
init_intel_runtime_pm_wakeref(rpm);
+ INIT_LIST_HEAD(&rpm->lmem_userfault_list);
+ spin_lock_init(&rpm->lmem_userfault_lock);
+ intel_wakeref_auto_init(&rpm->userfault_wakeref, rpm);
}
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.h b/drivers/gpu/drm/i915/intel_runtime_pm.h
index d9160e3ff4af..98b8b28baaa1 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.h
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.h
@@ -53,6 +53,28 @@ struct intel_runtime_pm {
bool irqs_enabled;
bool no_wakeref_tracking;
+ /*
+ * Protects access to lmem usefault list.
+ * It is required, if we are outside of the runtime suspend path,
+ * access to @lmem_userfault_list requires always first grabbing the
+ * runtime pm, to ensure we can't race against runtime suspend.
+ * Once we have that we also need to grab @lmem_userfault_lock,
+ * at which point we have exclusive access.
+ * The runtime suspend path is special since it doesn't really hold any locks,
+ * but instead has exclusive access by virtue of all other accesses requiring
+ * holding the runtime pm wakeref.
+ */
+ spinlock_t lmem_userfault_lock;
+
+ /*
+ * Keep list of userfaulted gem obj, which require to release their
+ * mmap mappings at runtime suspend path.
+ */
+ struct list_head lmem_userfault_list;
+
+ /* Manual runtime pm autosuspend delay for user GGTT/lmem mmaps */
+ struct intel_wakeref_auto userfault_wakeref;
+
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
/*
* To aide detection of wakeref leaks and general misuse, we
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 5cd423c7b646..2a3e2869fe71 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -104,6 +104,7 @@ static const char * const forcewake_domain_names[] = {
"vebox1",
"vebox2",
"vebox3",
+ "gsc",
};
const char *
@@ -888,10 +889,13 @@ void assert_forcewakes_active(struct intel_uncore *uncore,
spin_unlock_irq(&uncore->lock);
}
-/* We give fast paths for the really cool registers */
+/*
+ * We give fast paths for the really cool registers. The second range includes
+ * media domains (and the GSC starting from Xe_LPM+)
+ */
#define NEEDS_FORCE_WAKE(reg) ({ \
u32 __reg = (reg); \
- __reg < 0x40000 || __reg >= GEN11_BSD_RING_BASE; \
+ __reg < 0x40000 || __reg >= 0x116000; \
})
static int fw_range_cmp(u32 offset, const struct intel_forcewake_range *entry)
@@ -1131,6 +1135,45 @@ static const struct i915_range pvc_shadowed_regs[] = {
{ .start = 0x1F8510, .end = 0x1F8550 },
};
+static const struct i915_range mtl_shadowed_regs[] = {
+ { .start = 0x2030, .end = 0x2030 },
+ { .start = 0x2510, .end = 0x2550 },
+ { .start = 0xA008, .end = 0xA00C },
+ { .start = 0xA188, .end = 0xA188 },
+ { .start = 0xA278, .end = 0xA278 },
+ { .start = 0xA540, .end = 0xA56C },
+ { .start = 0xC050, .end = 0xC050 },
+ { .start = 0xC340, .end = 0xC340 },
+ { .start = 0xC4C8, .end = 0xC4C8 },
+ { .start = 0xC4E0, .end = 0xC4E0 },
+ { .start = 0xC600, .end = 0xC600 },
+ { .start = 0xC658, .end = 0xC658 },
+ { .start = 0xCFD4, .end = 0xCFDC },
+ { .start = 0x22030, .end = 0x22030 },
+ { .start = 0x22510, .end = 0x22550 },
+};
+
+static const struct i915_range xelpmp_shadowed_regs[] = {
+ { .start = 0x1C0030, .end = 0x1C0030 },
+ { .start = 0x1C0510, .end = 0x1C0550 },
+ { .start = 0x1C8030, .end = 0x1C8030 },
+ { .start = 0x1C8510, .end = 0x1C8550 },
+ { .start = 0x1D0030, .end = 0x1D0030 },
+ { .start = 0x1D0510, .end = 0x1D0550 },
+ { .start = 0x38A008, .end = 0x38A00C },
+ { .start = 0x38A188, .end = 0x38A188 },
+ { .start = 0x38A278, .end = 0x38A278 },
+ { .start = 0x38A540, .end = 0x38A56C },
+ { .start = 0x38A618, .end = 0x38A618 },
+ { .start = 0x38C050, .end = 0x38C050 },
+ { .start = 0x38C340, .end = 0x38C340 },
+ { .start = 0x38C4C8, .end = 0x38C4C8 },
+ { .start = 0x38C4E0, .end = 0x38C4E4 },
+ { .start = 0x38C600, .end = 0x38C600 },
+ { .start = 0x38C658, .end = 0x38C658 },
+ { .start = 0x38CFD4, .end = 0x38CFDC },
+};
+
static int mmio_range_cmp(u32 key, const struct i915_range *range)
{
if (key < range->start)
@@ -1639,25 +1682,27 @@ static const struct intel_forcewake_range __pvc_fw_ranges[] = {
GEN_FW_RANGE(0x12000, 0x12fff, 0), /*
0x12000 - 0x127ff: always on
0x12800 - 0x12fff: reserved */
- GEN_FW_RANGE(0x13000, 0x23fff, FORCEWAKE_GT), /*
+ GEN_FW_RANGE(0x13000, 0x19fff, FORCEWAKE_GT), /*
0x13000 - 0x135ff: gt
0x13600 - 0x147ff: reserved
0x14800 - 0x153ff: gt
- 0x15400 - 0x19fff: reserved
- 0x1a000 - 0x1ffff: gt
- 0x20000 - 0x21fff: reserved
- 0x22000 - 0x23fff: gt */
+ 0x15400 - 0x19fff: reserved */
+ GEN_FW_RANGE(0x1a000, 0x21fff, FORCEWAKE_RENDER), /*
+ 0x1a000 - 0x1ffff: render
+ 0x20000 - 0x21fff: reserved */
+ GEN_FW_RANGE(0x22000, 0x23fff, FORCEWAKE_GT),
GEN_FW_RANGE(0x24000, 0x2417f, 0), /*
24000 - 0x2407f: always on
24080 - 0x2417f: reserved */
- GEN_FW_RANGE(0x24180, 0x3ffff, FORCEWAKE_GT), /*
+ GEN_FW_RANGE(0x24180, 0x25fff, FORCEWAKE_GT), /*
0x24180 - 0x241ff: gt
0x24200 - 0x251ff: reserved
0x25200 - 0x252ff: gt
- 0x25300 - 0x25fff: reserved
- 0x26000 - 0x27fff: gt
- 0x28000 - 0x2ffff: reserved
- 0x30000 - 0x3ffff: gt */
+ 0x25300 - 0x25fff: reserved */
+ GEN_FW_RANGE(0x26000, 0x2ffff, FORCEWAKE_RENDER), /*
+ 0x26000 - 0x27fff: render
+ 0x28000 - 0x2ffff: reserved */
+ GEN_FW_RANGE(0x30000, 0x3ffff, FORCEWAKE_GT),
GEN_FW_RANGE(0x40000, 0x1bffff, 0),
GEN_FW_RANGE(0x1c0000, 0x1c3fff, FORCEWAKE_MEDIA_VDBOX0), /*
0x1c0000 - 0x1c2bff: VD0
@@ -1679,6 +1724,162 @@ static const struct intel_forcewake_range __pvc_fw_ranges[] = {
GEN_FW_RANGE(0x3e0000, 0x3effff, FORCEWAKE_GT),
};
+static const struct intel_forcewake_range __mtl_fw_ranges[] = {
+ GEN_FW_RANGE(0x0, 0xaff, 0),
+ GEN_FW_RANGE(0xb00, 0xbff, FORCEWAKE_GT),
+ GEN_FW_RANGE(0xc00, 0xfff, 0),
+ GEN_FW_RANGE(0x1000, 0x1fff, FORCEWAKE_GT),
+ GEN_FW_RANGE(0x2000, 0x26ff, FORCEWAKE_RENDER),
+ GEN_FW_RANGE(0x2700, 0x2fff, FORCEWAKE_GT),
+ GEN_FW_RANGE(0x3000, 0x3fff, FORCEWAKE_RENDER),
+ GEN_FW_RANGE(0x4000, 0x51ff, FORCEWAKE_GT), /*
+ 0x4000 - 0x48ff: render
+ 0x4900 - 0x51ff: reserved */
+ GEN_FW_RANGE(0x5200, 0x7fff, FORCEWAKE_RENDER), /*
+ 0x5200 - 0x53ff: render
+ 0x5400 - 0x54ff: reserved
+ 0x5500 - 0x7fff: render */
+ GEN_FW_RANGE(0x8000, 0x813f, FORCEWAKE_GT),
+ GEN_FW_RANGE(0x8140, 0x817f, FORCEWAKE_RENDER), /*
+ 0x8140 - 0x815f: render
+ 0x8160 - 0x817f: reserved */
+ GEN_FW_RANGE(0x8180, 0x81ff, 0),
+ GEN_FW_RANGE(0x8200, 0x94cf, FORCEWAKE_GT), /*
+ 0x8200 - 0x87ff: gt
+ 0x8800 - 0x8dff: reserved
+ 0x8e00 - 0x8f7f: gt
+ 0x8f80 - 0x8fff: reserved
+ 0x9000 - 0x947f: gt
+ 0x9480 - 0x94cf: reserved */
+ GEN_FW_RANGE(0x94d0, 0x955f, FORCEWAKE_RENDER),
+ GEN_FW_RANGE(0x9560, 0x967f, 0), /*
+ 0x9560 - 0x95ff: always on
+ 0x9600 - 0x967f: reserved */
+ GEN_FW_RANGE(0x9680, 0x97ff, FORCEWAKE_RENDER), /*
+ 0x9680 - 0x96ff: render
+ 0x9700 - 0x97ff: reserved */
+ GEN_FW_RANGE(0x9800, 0xcfff, FORCEWAKE_GT), /*
+ 0x9800 - 0xb4ff: gt
+ 0xb500 - 0xbfff: reserved
+ 0xc000 - 0xcfff: gt */
+ GEN_FW_RANGE(0xd000, 0xd7ff, 0), /*
+ 0xd000 - 0xd3ff: always on
+ 0xd400 - 0xd7ff: reserved */
+ GEN_FW_RANGE(0xd800, 0xd87f, FORCEWAKE_RENDER),
+ GEN_FW_RANGE(0xd880, 0xdbff, FORCEWAKE_GT),
+ GEN_FW_RANGE(0xdc00, 0xdcff, FORCEWAKE_RENDER),
+ GEN_FW_RANGE(0xdd00, 0xde7f, FORCEWAKE_GT), /*
+ 0xdd00 - 0xddff: gt
+ 0xde00 - 0xde7f: reserved */
+ GEN_FW_RANGE(0xde80, 0xe8ff, FORCEWAKE_RENDER), /*
+ 0xde80 - 0xdfff: render
+ 0xe000 - 0xe0ff: reserved
+ 0xe100 - 0xe8ff: render */
+ GEN_FW_RANGE(0xe900, 0xe9ff, FORCEWAKE_GT),
+ GEN_FW_RANGE(0xea00, 0x147ff, 0), /*
+ 0xea00 - 0x11fff: reserved
+ 0x12000 - 0x127ff: always on
+ 0x12800 - 0x147ff: reserved */
+ GEN_FW_RANGE(0x14800, 0x19fff, FORCEWAKE_GT), /*
+ 0x14800 - 0x153ff: gt
+ 0x15400 - 0x19fff: reserved */
+ GEN_FW_RANGE(0x1a000, 0x21fff, FORCEWAKE_RENDER), /*
+ 0x1a000 - 0x1bfff: render
+ 0x1c000 - 0x21fff: reserved */
+ GEN_FW_RANGE(0x22000, 0x23fff, FORCEWAKE_GT),
+ GEN_FW_RANGE(0x24000, 0x2ffff, 0), /*
+ 0x24000 - 0x2407f: always on
+ 0x24080 - 0x2ffff: reserved */
+ GEN_FW_RANGE(0x30000, 0x3ffff, FORCEWAKE_GT)
+};
+
+/*
+ * Note that the register ranges here are the final offsets after
+ * translation of the GSI block to the 0x380000 offset.
+ *
+ * NOTE: There are a couple MCR ranges near the bottom of this table
+ * that need to power up either VD0 or VD2 depending on which replicated
+ * instance of the register we're trying to access. Our forcewake logic
+ * at the moment doesn't have a good way to take steering into consideration,
+ * and the driver doesn't even access any registers in those ranges today,
+ * so for now we just mark those ranges as FORCEWAKE_ALL. That will ensure
+ * proper operation if we do start using the ranges in the future, and we
+ * can determine at that time whether it's worth adding extra complexity to
+ * the forcewake handling to take steering into consideration.
+ */
+static const struct intel_forcewake_range __xelpmp_fw_ranges[] = {
+ GEN_FW_RANGE(0x0, 0x115fff, 0), /* render GT range */
+ GEN_FW_RANGE(0x116000, 0x11ffff, FORCEWAKE_GSC), /*
+ 0x116000 - 0x117fff: gsc
+ 0x118000 - 0x119fff: reserved
+ 0x11a000 - 0x11efff: gsc
+ 0x11f000 - 0x11ffff: reserved */
+ GEN_FW_RANGE(0x120000, 0x1bffff, 0), /* non-GT range */
+ GEN_FW_RANGE(0x1c0000, 0x1c7fff, FORCEWAKE_MEDIA_VDBOX0), /*
+ 0x1c0000 - 0x1c3dff: VD0
+ 0x1c3e00 - 0x1c3eff: reserved
+ 0x1c3f00 - 0x1c3fff: VD0
+ 0x1c4000 - 0x1c7fff: reserved */
+ GEN_FW_RANGE(0x1c8000, 0x1cbfff, FORCEWAKE_MEDIA_VEBOX0), /*
+ 0x1c8000 - 0x1ca0ff: VE0
+ 0x1ca100 - 0x1cbfff: reserved */
+ GEN_FW_RANGE(0x1cc000, 0x1cffff, FORCEWAKE_MEDIA_VDBOX0), /*
+ 0x1cc000 - 0x1cdfff: VD0
+ 0x1ce000 - 0x1cffff: reserved */
+ GEN_FW_RANGE(0x1d0000, 0x1d7fff, FORCEWAKE_MEDIA_VDBOX2), /*
+ 0x1d0000 - 0x1d3dff: VD2
+ 0x1d3e00 - 0x1d3eff: reserved
+ 0x1d4000 - 0x1d7fff: VD2 */
+ GEN_FW_RANGE(0x1d8000, 0x1da0ff, FORCEWAKE_MEDIA_VEBOX1),
+ GEN_FW_RANGE(0x1da100, 0x380aff, 0), /*
+ 0x1da100 - 0x23ffff: reserved
+ 0x240000 - 0x37ffff: non-GT range
+ 0x380000 - 0x380aff: reserved */
+ GEN_FW_RANGE(0x380b00, 0x380bff, FORCEWAKE_GT),
+ GEN_FW_RANGE(0x380c00, 0x380fff, 0),
+ GEN_FW_RANGE(0x381000, 0x38817f, FORCEWAKE_GT), /*
+ 0x381000 - 0x381fff: gt
+ 0x382000 - 0x383fff: reserved
+ 0x384000 - 0x384aff: gt
+ 0x384b00 - 0x3851ff: reserved
+ 0x385200 - 0x3871ff: gt
+ 0x387200 - 0x387fff: reserved
+ 0x388000 - 0x38813f: gt
+ 0x388140 - 0x38817f: reserved */
+ GEN_FW_RANGE(0x388180, 0x3882ff, 0), /*
+ 0x388180 - 0x3881ff: always on
+ 0x388200 - 0x3882ff: reserved */
+ GEN_FW_RANGE(0x388300, 0x38955f, FORCEWAKE_GT), /*
+ 0x388300 - 0x38887f: gt
+ 0x388880 - 0x388fff: reserved
+ 0x389000 - 0x38947f: gt
+ 0x389480 - 0x38955f: reserved */
+ GEN_FW_RANGE(0x389560, 0x389fff, 0), /*
+ 0x389560 - 0x3895ff: always on
+ 0x389600 - 0x389fff: reserved */
+ GEN_FW_RANGE(0x38a000, 0x38cfff, FORCEWAKE_GT), /*
+ 0x38a000 - 0x38afff: gt
+ 0x38b000 - 0x38bfff: reserved
+ 0x38c000 - 0x38cfff: gt */
+ GEN_FW_RANGE(0x38d000, 0x38d11f, 0),
+ GEN_FW_RANGE(0x38d120, 0x391fff, FORCEWAKE_GT), /*
+ 0x38d120 - 0x38dfff: gt
+ 0x38e000 - 0x38efff: reserved
+ 0x38f000 - 0x38ffff: gt
+ 0x389000 - 0x391fff: reserved */
+ GEN_FW_RANGE(0x392000, 0x392fff, 0), /*
+ 0x392000 - 0x3927ff: always on
+ 0x392800 - 0x292fff: reserved */
+ GEN_FW_RANGE(0x393000, 0x3931ff, FORCEWAKE_GT),
+ GEN_FW_RANGE(0x393200, 0x39323f, FORCEWAKE_ALL), /* instance-based, see note above */
+ GEN_FW_RANGE(0x393240, 0x3933ff, FORCEWAKE_GT),
+ GEN_FW_RANGE(0x393400, 0x3934ff, FORCEWAKE_ALL), /* instance-based, see note above */
+ GEN_FW_RANGE(0x393500, 0x393c7f, 0), /*
+ 0x393500 - 0x393bff: reserved
+ 0x393c00 - 0x393c7f: always on */
+ GEN_FW_RANGE(0x393c80, 0x393dff, FORCEWAKE_GT),
+};
+
static void
ilk_dummy_write(struct intel_uncore *uncore)
{
@@ -2021,6 +2222,7 @@ static int __fw_domain_init(struct intel_uncore *uncore,
BUILD_BUG_ON(FORCEWAKE_MEDIA_VEBOX1 != (1 << FW_DOMAIN_ID_MEDIA_VEBOX1));
BUILD_BUG_ON(FORCEWAKE_MEDIA_VEBOX2 != (1 << FW_DOMAIN_ID_MEDIA_VEBOX2));
BUILD_BUG_ON(FORCEWAKE_MEDIA_VEBOX3 != (1 << FW_DOMAIN_ID_MEDIA_VEBOX3));
+ BUILD_BUG_ON(FORCEWAKE_GSC != (1 << FW_DOMAIN_ID_GSC));
d->mask = BIT(domain_id);
@@ -2085,17 +2287,26 @@ static int intel_uncore_fw_domains_init(struct intel_uncore *uncore)
(ret ?: (ret = __fw_domain_init((uncore__), (id__), (set__), (ack__))))
if (GRAPHICS_VER(i915) >= 11) {
- /* we'll prune the domains of missing engines later */
- intel_engine_mask_t emask = RUNTIME_INFO(i915)->platform_engine_mask;
+ intel_engine_mask_t emask;
int i;
+ /* we'll prune the domains of missing engines later */
+ emask = uncore->gt->info.engine_mask;
+
uncore->fw_get_funcs = &uncore_get_fallback;
- fw_domain_init(uncore, FW_DOMAIN_ID_RENDER,
- FORCEWAKE_RENDER_GEN9,
- FORCEWAKE_ACK_RENDER_GEN9);
- fw_domain_init(uncore, FW_DOMAIN_ID_GT,
- FORCEWAKE_GT_GEN9,
- FORCEWAKE_ACK_GT_GEN9);
+ if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
+ fw_domain_init(uncore, FW_DOMAIN_ID_GT,
+ FORCEWAKE_GT_GEN9,
+ FORCEWAKE_ACK_GT_MTL);
+ else
+ fw_domain_init(uncore, FW_DOMAIN_ID_GT,
+ FORCEWAKE_GT_GEN9,
+ FORCEWAKE_ACK_GT_GEN9);
+
+ if (RCS_MASK(uncore->gt) || CCS_MASK(uncore->gt))
+ fw_domain_init(uncore, FW_DOMAIN_ID_RENDER,
+ FORCEWAKE_RENDER_GEN9,
+ FORCEWAKE_ACK_RENDER_GEN9);
for (i = 0; i < I915_MAX_VCS; i++) {
if (!__HAS_ENGINE(emask, _VCS(i)))
@@ -2113,6 +2324,10 @@ static int intel_uncore_fw_domains_init(struct intel_uncore *uncore)
FORCEWAKE_MEDIA_VEBOX_GEN11(i),
FORCEWAKE_ACK_MEDIA_VEBOX_GEN11(i));
}
+
+ if (uncore->gt->type == GT_MEDIA)
+ fw_domain_init(uncore, FW_DOMAIN_ID_GSC,
+ FORCEWAKE_REQ_GSC, FORCEWAKE_ACK_GSC);
} else if (IS_GRAPHICS_VER(i915, 9, 10)) {
uncore->fw_get_funcs = &uncore_get_fallback;
fw_domain_init(uncore, FW_DOMAIN_ID_RENDER,
@@ -2300,6 +2515,22 @@ static void uncore_raw_init(struct intel_uncore *uncore)
}
}
+static int uncore_media_forcewake_init(struct intel_uncore *uncore)
+{
+ struct drm_i915_private *i915 = uncore->i915;
+
+ if (MEDIA_VER(i915) >= 13) {
+ ASSIGN_FW_DOMAINS_TABLE(uncore, __xelpmp_fw_ranges);
+ ASSIGN_SHADOW_TABLE(uncore, xelpmp_shadowed_regs);
+ ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable);
+ } else {
+ MISSING_CASE(MEDIA_VER(i915));
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
static int uncore_forcewake_init(struct intel_uncore *uncore)
{
struct drm_i915_private *i915 = uncore->i915;
@@ -2314,7 +2545,14 @@ static int uncore_forcewake_init(struct intel_uncore *uncore)
ASSIGN_READ_MMIO_VFUNCS(uncore, fwtable);
- if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 60)) {
+ if (uncore->gt->type == GT_MEDIA)
+ return uncore_media_forcewake_init(uncore);
+
+ if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) {
+ ASSIGN_FW_DOMAINS_TABLE(uncore, __mtl_fw_ranges);
+ ASSIGN_SHADOW_TABLE(uncore, mtl_shadowed_regs);
+ ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable);
+ } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 60)) {
ASSIGN_FW_DOMAINS_TABLE(uncore, __pvc_fw_ranges);
ASSIGN_SHADOW_TABLE(uncore, pvc_shadowed_regs);
ASSIGN_WRITE_MMIO_VFUNCS(uncore, fwtable);
diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h
index ddafa4a7ed71..5449146a0624 100644
--- a/drivers/gpu/drm/i915/intel_uncore.h
+++ b/drivers/gpu/drm/i915/intel_uncore.h
@@ -62,6 +62,7 @@ enum forcewake_domain_id {
FW_DOMAIN_ID_MEDIA_VEBOX1,
FW_DOMAIN_ID_MEDIA_VEBOX2,
FW_DOMAIN_ID_MEDIA_VEBOX3,
+ FW_DOMAIN_ID_GSC,
FW_DOMAIN_ID_COUNT
};
@@ -82,6 +83,7 @@ enum forcewake_domains {
FORCEWAKE_MEDIA_VEBOX1 = BIT(FW_DOMAIN_ID_MEDIA_VEBOX1),
FORCEWAKE_MEDIA_VEBOX2 = BIT(FW_DOMAIN_ID_MEDIA_VEBOX2),
FORCEWAKE_MEDIA_VEBOX3 = BIT(FW_DOMAIN_ID_MEDIA_VEBOX3),
+ FORCEWAKE_GSC = BIT(FW_DOMAIN_ID_GSC),
FORCEWAKE_ALL = BIT(FW_DOMAIN_ID_COUNT) - 1,
};
diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.c b/drivers/gpu/drm/i915/pxp/intel_pxp.c
index 69cdaaddc4a9..5efe61f67546 100644
--- a/drivers/gpu/drm/i915/pxp/intel_pxp.c
+++ b/drivers/gpu/drm/i915/pxp/intel_pxp.c
@@ -103,19 +103,15 @@ static int create_vcs_context(struct intel_pxp *pxp)
static void destroy_vcs_context(struct intel_pxp *pxp)
{
- intel_engine_destroy_pinned_context(fetch_and_zero(&pxp->ce));
+ if (pxp->ce)
+ intel_engine_destroy_pinned_context(fetch_and_zero(&pxp->ce));
}
-void intel_pxp_init(struct intel_pxp *pxp)
+static void pxp_init_full(struct intel_pxp *pxp)
{
struct intel_gt *gt = pxp_to_gt(pxp);
int ret;
- if (!HAS_PXP(gt->i915))
- return;
-
- mutex_init(&pxp->tee_mutex);
-
/*
* we'll use the completion to check if there is a termination pending,
* so we start it as completed and we reinit it when a termination
@@ -124,8 +120,7 @@ void intel_pxp_init(struct intel_pxp *pxp)
init_completion(&pxp->termination);
complete_all(&pxp->termination);
- mutex_init(&pxp->arb_mutex);
- INIT_WORK(&pxp->session_work, intel_pxp_session_work);
+ intel_pxp_session_management_init(pxp);
ret = create_vcs_context(pxp);
if (ret)
@@ -143,11 +138,26 @@ out_context:
destroy_vcs_context(pxp);
}
-void intel_pxp_fini(struct intel_pxp *pxp)
+void intel_pxp_init(struct intel_pxp *pxp)
{
- if (!intel_pxp_is_enabled(pxp))
+ struct intel_gt *gt = pxp_to_gt(pxp);
+
+ /* we rely on the mei PXP module */
+ if (!IS_ENABLED(CONFIG_INTEL_MEI_PXP))
return;
+ /*
+ * If HuC is loaded by GSC but PXP is disabled, we can skip the init of
+ * the full PXP session/object management and just init the tee channel.
+ */
+ if (HAS_PXP(gt->i915))
+ pxp_init_full(pxp);
+ else if (intel_huc_is_loaded_by_gsc(&gt->uc.huc) && intel_uc_uses_huc(&gt->uc))
+ intel_pxp_tee_component_init(pxp);
+}
+
+void intel_pxp_fini(struct intel_pxp *pxp)
+{
pxp->arb_is_valid = false;
intel_pxp_tee_component_fini(pxp);
diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp.h b/drivers/gpu/drm/i915/pxp/intel_pxp.h
index 73847e535cab..2da309088c6d 100644
--- a/drivers/gpu/drm/i915/pxp/intel_pxp.h
+++ b/drivers/gpu/drm/i915/pxp/intel_pxp.h
@@ -12,7 +12,6 @@
struct intel_pxp;
struct drm_i915_gem_object;
-#ifdef CONFIG_DRM_I915_PXP
struct intel_gt *pxp_to_gt(const struct intel_pxp *pxp);
bool intel_pxp_is_enabled(const struct intel_pxp *pxp);
bool intel_pxp_is_active(const struct intel_pxp *pxp);
@@ -32,36 +31,5 @@ int intel_pxp_key_check(struct intel_pxp *pxp,
bool assign);
void intel_pxp_invalidate(struct intel_pxp *pxp);
-#else
-static inline void intel_pxp_init(struct intel_pxp *pxp)
-{
-}
-
-static inline void intel_pxp_fini(struct intel_pxp *pxp)
-{
-}
-
-static inline int intel_pxp_start(struct intel_pxp *pxp)
-{
- return -ENODEV;
-}
-
-static inline bool intel_pxp_is_enabled(const struct intel_pxp *pxp)
-{
- return false;
-}
-
-static inline bool intel_pxp_is_active(const struct intel_pxp *pxp)
-{
- return false;
-}
-
-static inline int intel_pxp_key_check(struct intel_pxp *pxp,
- struct drm_i915_gem_object *obj,
- bool assign)
-{
- return -ENODEV;
-}
-#endif
#endif /* __INTEL_PXP_H__ */
diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_huc.c b/drivers/gpu/drm/i915/pxp/intel_pxp_huc.c
new file mode 100644
index 000000000000..7ec36d94e758
--- /dev/null
+++ b/drivers/gpu/drm/i915/pxp/intel_pxp_huc.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright(c) 2021-2022, Intel Corporation. All rights reserved.
+ */
+
+#include "drm/i915_drm.h"
+#include "i915_drv.h"
+
+#include "gem/i915_gem_region.h"
+#include "gt/intel_gt.h"
+
+#include "intel_pxp.h"
+#include "intel_pxp_huc.h"
+#include "intel_pxp_tee.h"
+#include "intel_pxp_types.h"
+#include "intel_pxp_tee_interface.h"
+
+int intel_pxp_huc_load_and_auth(struct intel_pxp *pxp)
+{
+ struct intel_gt *gt = pxp_to_gt(pxp);
+ struct intel_huc *huc = &gt->uc.huc;
+ struct pxp_tee_start_huc_auth_in huc_in = {0};
+ struct pxp_tee_start_huc_auth_out huc_out = {0};
+ dma_addr_t huc_phys_addr;
+ u8 client_id = 0;
+ u8 fence_id = 0;
+ int err;
+
+ if (!pxp->pxp_component)
+ return -ENODEV;
+
+ huc_phys_addr = i915_gem_object_get_dma_address(huc->fw.obj, 0);
+
+ /* write the PXP message into the lmem (the sg list) */
+ huc_in.header.api_version = PXP_TEE_43_APIVER;
+ huc_in.header.command_id = PXP_TEE_43_START_HUC_AUTH;
+ huc_in.header.status = 0;
+ huc_in.header.buffer_len = sizeof(huc_in.huc_base_address);
+ huc_in.huc_base_address = huc_phys_addr;
+
+ err = intel_pxp_tee_stream_message(pxp, client_id, fence_id,
+ &huc_in, sizeof(huc_in),
+ &huc_out, sizeof(huc_out));
+ if (err < 0) {
+ drm_err(&gt->i915->drm,
+ "Failed to send HuC load and auth command to GSC [%d]!\n",
+ err);
+ return err;
+ }
+
+ /*
+ * HuC does sometimes survive suspend/resume (it depends on how "deep"
+ * a sleep state the device reaches) so we can end up here on resume
+ * with HuC already loaded, in which case the GSC will return
+ * PXP_STATUS_OP_NOT_PERMITTED. We can therefore consider the GuC
+ * correctly transferred in this scenario; if the same error is ever
+ * returned with HuC not loaded we'll still catch it when we check the
+ * authentication bit later.
+ */
+ if (huc_out.header.status != PXP_STATUS_SUCCESS &&
+ huc_out.header.status != PXP_STATUS_OP_NOT_PERMITTED) {
+ drm_err(&gt->i915->drm,
+ "HuC load failed with GSC error = 0x%x\n",
+ huc_out.header.status);
+ return -EPROTO;
+ }
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_huc.h b/drivers/gpu/drm/i915/pxp/intel_pxp_huc.h
new file mode 100644
index 000000000000..e40847a91c39
--- /dev/null
+++ b/drivers/gpu/drm/i915/pxp/intel_pxp_huc.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright(c) 2021-2022, Intel Corporation. All rights reserved.
+ */
+
+#ifndef __INTEL_PXP_HUC_H__
+#define __INTEL_PXP_HUC_H__
+
+struct intel_pxp;
+
+int intel_pxp_huc_load_and_auth(struct intel_pxp *pxp);
+
+#endif /* __INTEL_PXP_HUC_H__ */
diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_irq.h b/drivers/gpu/drm/i915/pxp/intel_pxp_irq.h
index 8b5793654844..8c292dc86f68 100644
--- a/drivers/gpu/drm/i915/pxp/intel_pxp_irq.h
+++ b/drivers/gpu/drm/i915/pxp/intel_pxp_irq.h
@@ -27,6 +27,14 @@ void intel_pxp_irq_handler(struct intel_pxp *pxp, u16 iir);
static inline void intel_pxp_irq_handler(struct intel_pxp *pxp, u16 iir)
{
}
+
+static inline void intel_pxp_irq_enable(struct intel_pxp *pxp)
+{
+}
+
+static inline void intel_pxp_irq_disable(struct intel_pxp *pxp)
+{
+}
#endif
#endif /* __INTEL_PXP_IRQ_H__ */
diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_session.c b/drivers/gpu/drm/i915/pxp/intel_pxp_session.c
index c4f5c994ca51..85572360c71a 100644
--- a/drivers/gpu/drm/i915/pxp/intel_pxp_session.c
+++ b/drivers/gpu/drm/i915/pxp/intel_pxp_session.c
@@ -138,7 +138,7 @@ static void pxp_terminate_complete(struct intel_pxp *pxp)
complete_all(&pxp->termination);
}
-void intel_pxp_session_work(struct work_struct *work)
+static void pxp_session_work(struct work_struct *work)
{
struct intel_pxp *pxp = container_of(work, typeof(*pxp), session_work);
struct intel_gt *gt = pxp_to_gt(pxp);
@@ -173,3 +173,9 @@ void intel_pxp_session_work(struct work_struct *work)
intel_runtime_pm_put(gt->uncore->rpm, wakeref);
}
+
+void intel_pxp_session_management_init(struct intel_pxp *pxp)
+{
+ mutex_init(&pxp->arb_mutex);
+ INIT_WORK(&pxp->session_work, pxp_session_work);
+}
diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_session.h b/drivers/gpu/drm/i915/pxp/intel_pxp_session.h
index ba4c9d2b94b7..903ac52cffa1 100644
--- a/drivers/gpu/drm/i915/pxp/intel_pxp_session.h
+++ b/drivers/gpu/drm/i915/pxp/intel_pxp_session.h
@@ -8,8 +8,13 @@
#include <linux/types.h>
-struct work_struct;
-
-void intel_pxp_session_work(struct work_struct *work);
+struct intel_pxp;
+#ifdef CONFIG_DRM_I915_PXP
+void intel_pxp_session_management_init(struct intel_pxp *pxp);
+#else
+static inline void intel_pxp_session_management_init(struct intel_pxp *pxp)
+{
+}
+#endif
#endif /* __INTEL_PXP_SESSION_H__ */
diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c b/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c
index a90905039216..052fd2f9a583 100644
--- a/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c
+++ b/drivers/gpu/drm/i915/pxp/intel_pxp_tee.c
@@ -8,11 +8,14 @@
#include <drm/i915_pxp_tee_interface.h>
#include <drm/i915_component.h>
+#include "gem/i915_gem_lmem.h"
+
#include "i915_drv.h"
#include "intel_pxp.h"
#include "intel_pxp_session.h"
#include "intel_pxp_tee.h"
#include "intel_pxp_tee_interface.h"
+#include "intel_pxp_huc.h"
static inline struct intel_pxp *i915_dev_to_pxp(struct device *i915_kdev)
{
@@ -69,6 +72,47 @@ unlock:
return ret;
}
+int intel_pxp_tee_stream_message(struct intel_pxp *pxp,
+ u8 client_id, u32 fence_id,
+ void *msg_in, size_t msg_in_len,
+ void *msg_out, size_t msg_out_len)
+{
+ /* TODO: for bigger objects we need to use a sg of 4k pages */
+ const size_t max_msg_size = PAGE_SIZE;
+ struct drm_i915_private *i915 = pxp_to_gt(pxp)->i915;
+ struct i915_pxp_component *pxp_component = pxp->pxp_component;
+ unsigned int offset = 0;
+ struct scatterlist *sg;
+ int ret;
+
+ if (msg_in_len > max_msg_size || msg_out_len > max_msg_size)
+ return -ENOSPC;
+
+ mutex_lock(&pxp->tee_mutex);
+
+ if (unlikely(!pxp_component || !pxp_component->ops->gsc_command)) {
+ ret = -ENODEV;
+ goto unlock;
+ }
+
+ GEM_BUG_ON(!pxp->stream_cmd.obj);
+
+ sg = i915_gem_object_get_sg_dma(pxp->stream_cmd.obj, 0, &offset);
+
+ memcpy(pxp->stream_cmd.vaddr, msg_in, msg_in_len);
+
+ ret = pxp_component->ops->gsc_command(pxp_component->tee_dev, client_id,
+ fence_id, sg, msg_in_len, sg);
+ if (ret < 0)
+ drm_err(&i915->drm, "Failed to send PXP TEE gsc command\n");
+ else
+ memcpy(msg_out, pxp->stream_cmd.vaddr, msg_out_len);
+
+unlock:
+ mutex_unlock(&pxp->tee_mutex);
+ return ret;
+}
+
/**
* i915_pxp_tee_component_bind - bind function to pass the function pointers to pxp_tee
* @i915_kdev: pointer to i915 kernel device
@@ -84,24 +128,36 @@ static int i915_pxp_tee_component_bind(struct device *i915_kdev,
{
struct drm_i915_private *i915 = kdev_to_i915(i915_kdev);
struct intel_pxp *pxp = i915_dev_to_pxp(i915_kdev);
+ struct intel_uc *uc = &pxp_to_gt(pxp)->uc;
intel_wakeref_t wakeref;
+ int ret = 0;
mutex_lock(&pxp->tee_mutex);
pxp->pxp_component = data;
pxp->pxp_component->tee_dev = tee_kdev;
mutex_unlock(&pxp->tee_mutex);
+ if (intel_uc_uses_huc(uc) && intel_huc_is_loaded_by_gsc(&uc->huc)) {
+ with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
+ /* load huc via pxp */
+ ret = intel_huc_fw_load_and_auth_via_gsc(&uc->huc);
+ if (ret < 0)
+ drm_err(&i915->drm, "failed to load huc via gsc %d\n", ret);
+ }
+ }
+
/* if we are suspended, the HW will be re-initialized on resume */
wakeref = intel_runtime_pm_get_if_in_use(&i915->runtime_pm);
if (!wakeref)
return 0;
/* the component is required to fully start the PXP HW */
- intel_pxp_init_hw(pxp);
+ if (intel_pxp_is_enabled(pxp))
+ intel_pxp_init_hw(pxp);
intel_runtime_pm_put(&i915->runtime_pm, wakeref);
- return 0;
+ return ret;
}
static void i915_pxp_tee_component_unbind(struct device *i915_kdev,
@@ -111,8 +167,9 @@ static void i915_pxp_tee_component_unbind(struct device *i915_kdev,
struct intel_pxp *pxp = i915_dev_to_pxp(i915_kdev);
intel_wakeref_t wakeref;
- with_intel_runtime_pm_if_in_use(&i915->runtime_pm, wakeref)
- intel_pxp_fini_hw(pxp);
+ if (intel_pxp_is_enabled(pxp))
+ with_intel_runtime_pm_if_in_use(&i915->runtime_pm, wakeref)
+ intel_pxp_fini_hw(pxp);
mutex_lock(&pxp->tee_mutex);
pxp->pxp_component = NULL;
@@ -124,22 +181,92 @@ static const struct component_ops i915_pxp_tee_component_ops = {
.unbind = i915_pxp_tee_component_unbind,
};
+static int alloc_streaming_command(struct intel_pxp *pxp)
+{
+ struct drm_i915_private *i915 = pxp_to_gt(pxp)->i915;
+ struct drm_i915_gem_object *obj = NULL;
+ void *cmd;
+ int err;
+
+ pxp->stream_cmd.obj = NULL;
+ pxp->stream_cmd.vaddr = NULL;
+
+ if (!IS_DGFX(i915))
+ return 0;
+
+ /* allocate lmem object of one page for PXP command memory and store it */
+ obj = i915_gem_object_create_lmem(i915, PAGE_SIZE, I915_BO_ALLOC_CONTIGUOUS);
+ if (IS_ERR(obj)) {
+ drm_err(&i915->drm, "Failed to allocate pxp streaming command!\n");
+ return PTR_ERR(obj);
+ }
+
+ err = i915_gem_object_pin_pages_unlocked(obj);
+ if (err) {
+ drm_err(&i915->drm, "Failed to pin gsc message page!\n");
+ goto out_put;
+ }
+
+ /* map the lmem into the virtual memory pointer */
+ cmd = i915_gem_object_pin_map_unlocked(obj, i915_coherent_map_type(i915, obj, true));
+ if (IS_ERR(cmd)) {
+ drm_err(&i915->drm, "Failed to map gsc message page!\n");
+ err = PTR_ERR(cmd);
+ goto out_unpin;
+ }
+
+ memset(cmd, 0, obj->base.size);
+
+ pxp->stream_cmd.obj = obj;
+ pxp->stream_cmd.vaddr = cmd;
+
+ return 0;
+
+out_unpin:
+ i915_gem_object_unpin_pages(obj);
+out_put:
+ i915_gem_object_put(obj);
+ return err;
+}
+
+static void free_streaming_command(struct intel_pxp *pxp)
+{
+ struct drm_i915_gem_object *obj = fetch_and_zero(&pxp->stream_cmd.obj);
+
+ if (!obj)
+ return;
+
+ i915_gem_object_unpin_map(obj);
+ i915_gem_object_unpin_pages(obj);
+ i915_gem_object_put(obj);
+}
+
int intel_pxp_tee_component_init(struct intel_pxp *pxp)
{
int ret;
struct intel_gt *gt = pxp_to_gt(pxp);
struct drm_i915_private *i915 = gt->i915;
+ mutex_init(&pxp->tee_mutex);
+
+ ret = alloc_streaming_command(pxp);
+ if (ret)
+ return ret;
+
ret = component_add_typed(i915->drm.dev, &i915_pxp_tee_component_ops,
I915_COMPONENT_PXP);
if (ret < 0) {
drm_err(&i915->drm, "Failed to add PXP component (%d)\n", ret);
- return ret;
+ goto out_free;
}
pxp->pxp_component_added = true;
return 0;
+
+out_free:
+ free_streaming_command(pxp);
+ return ret;
}
void intel_pxp_tee_component_fini(struct intel_pxp *pxp)
@@ -151,6 +278,8 @@ void intel_pxp_tee_component_fini(struct intel_pxp *pxp)
component_del(i915->drm.dev, &i915_pxp_tee_component_ops);
pxp->pxp_component_added = false;
+
+ free_streaming_command(pxp);
}
int intel_pxp_tee_cmd_create_arb_session(struct intel_pxp *pxp,
diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_tee.h b/drivers/gpu/drm/i915/pxp/intel_pxp_tee.h
index c136053ce340..aeb3dfe7ce96 100644
--- a/drivers/gpu/drm/i915/pxp/intel_pxp_tee.h
+++ b/drivers/gpu/drm/i915/pxp/intel_pxp_tee.h
@@ -14,4 +14,9 @@ void intel_pxp_tee_component_fini(struct intel_pxp *pxp);
int intel_pxp_tee_cmd_create_arb_session(struct intel_pxp *pxp,
int arb_session_id);
+int intel_pxp_tee_stream_message(struct intel_pxp *pxp,
+ u8 client_id, u32 fence_id,
+ void *msg_in, size_t msg_in_len,
+ void *msg_out, size_t msg_out_len);
+
#endif /* __INTEL_PXP_TEE_H__ */
diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_tee_interface.h b/drivers/gpu/drm/i915/pxp/intel_pxp_tee_interface.h
index 36e9b0868f5c..7edc1760f142 100644
--- a/drivers/gpu/drm/i915/pxp/intel_pxp_tee_interface.h
+++ b/drivers/gpu/drm/i915/pxp/intel_pxp_tee_interface.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: MIT */
/*
- * Copyright(c) 2020, Intel Corporation. All rights reserved.
+ * Copyright(c) 2020-2022, Intel Corporation. All rights reserved.
*/
#ifndef __INTEL_PXP_TEE_INTERFACE_H__
@@ -9,8 +9,20 @@
#include <linux/types.h>
#define PXP_TEE_APIVER 0x40002
+#define PXP_TEE_43_APIVER 0x00040003
#define PXP_TEE_ARB_CMDID 0x1e
#define PXP_TEE_ARB_PROTECTION_MODE 0x2
+#define PXP_TEE_43_START_HUC_AUTH 0x0000003A
+
+/*
+ * there are a lot of status codes for PXP, but we only define the ones we
+ * actually can handle in the driver. other failure codes will be printed to
+ * error msg for debug.
+ */
+enum pxp_status {
+ PXP_STATUS_SUCCESS = 0x0,
+ PXP_STATUS_OP_NOT_PERMITTED = 0x4013
+};
/* PXP TEE message header */
struct pxp_tee_cmd_header {
@@ -33,4 +45,13 @@ struct pxp_tee_create_arb_out {
struct pxp_tee_cmd_header header;
} __packed;
+struct pxp_tee_start_huc_auth_in {
+ struct pxp_tee_cmd_header header;
+ __le64 huc_base_address;
+};
+
+struct pxp_tee_start_huc_auth_out {
+ struct pxp_tee_cmd_header header;
+};
+
#endif /* __INTEL_PXP_TEE_INTERFACE_H__ */
diff --git a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h
index 7ce5f37ee12e..f74b1e11a505 100644
--- a/drivers/gpu/drm/i915/pxp/intel_pxp_types.h
+++ b/drivers/gpu/drm/i915/pxp/intel_pxp_types.h
@@ -53,6 +53,12 @@ struct intel_pxp {
/** @tee_mutex: protects the tee channel binding and messaging. */
struct mutex tee_mutex;
+ /** @stream_cmd: LMEM obj used to send stream PXP commands to the GSC */
+ struct {
+ struct drm_i915_gem_object *obj; /* contains PXP command memory */
+ void *vaddr; /* virtual memory for PXP command */
+ } stream_cmd;
+
/**
* @hw_state_invalidated: if the HW perceives an attack on the integrity
* of the encryption it will invalidate the keys and expect SW to
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index e050a2de5fd1..27c733b00976 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -27,6 +27,7 @@
#include "gem/i915_gem_context.h"
#include "gem/i915_gem_internal.h"
+#include "gem/i915_gem_lmem.h"
#include "gem/i915_gem_region.h"
#include "gem/selftests/mock_context.h"
#include "gt/intel_context.h"
@@ -1113,15 +1114,8 @@ static int misaligned_case(struct i915_address_space *vm, struct intel_memory_re
expected_node_size = expected_vma_size;
if (HAS_64K_PAGES(vm->i915) && i915_gem_object_is_lmem(obj)) {
- /*
- * The compact-pt should expand lmem node to 2MB for the ppGTT,
- * for all other cases we should only expect 64K.
- */
expected_vma_size = round_up(size, I915_GTT_PAGE_SIZE_64K);
- if (NEEDS_COMPACT_PT(vm->i915) && !i915_is_ggtt(vm))
- expected_node_size = round_up(size, I915_GTT_PAGE_SIZE_2M);
- else
- expected_node_size = round_up(size, I915_GTT_PAGE_SIZE_64K);
+ expected_node_size = round_up(size, I915_GTT_PAGE_SIZE_64K);
}
if (vma->size != expected_vma_size || vma->node.size != expected_node_size) {
diff --git a/drivers/gpu/drm/i915/selftests/i915_perf.c b/drivers/gpu/drm/i915/selftests/i915_perf.c
index 429c6d73b159..24dde5531423 100644
--- a/drivers/gpu/drm/i915/selftests/i915_perf.c
+++ b/drivers/gpu/drm/i915/selftests/i915_perf.c
@@ -102,6 +102,12 @@ test_stream(struct i915_perf *perf)
I915_OA_FORMAT_A32u40_A4u32_B8_C8 : I915_OA_FORMAT_C4_B8,
};
struct i915_perf_stream *stream;
+ struct intel_gt *gt;
+
+ if (!props.engine)
+ return NULL;
+
+ gt = props.engine->gt;
if (!oa_config)
return NULL;
@@ -116,12 +122,12 @@ test_stream(struct i915_perf *perf)
stream->perf = perf;
- mutex_lock(&perf->lock);
+ mutex_lock(&gt->perf.lock);
if (i915_oa_stream_init(stream, &param, &props)) {
kfree(stream);
stream = NULL;
}
- mutex_unlock(&perf->lock);
+ mutex_unlock(&gt->perf.lock);
i915_oa_config_put(oa_config);
@@ -130,11 +136,11 @@ test_stream(struct i915_perf *perf)
static void stream_destroy(struct i915_perf_stream *stream)
{
- struct i915_perf *perf = stream->perf;
+ struct intel_gt *gt = stream->engine->gt;
- mutex_lock(&perf->lock);
+ mutex_lock(&gt->perf.lock);
i915_perf_destroy_locked(stream);
- mutex_unlock(&perf->lock);
+ mutex_unlock(&gt->perf.lock);
}
static int live_sanitycheck(void *arg)
diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c
index 818a4909c1f3..a46350c37e9d 100644
--- a/drivers/gpu/drm/i915/selftests/i915_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_request.c
@@ -299,9 +299,18 @@ __live_request_alloc(struct intel_context *ce)
return intel_context_create_request(ce);
}
-static int __igt_breadcrumbs_smoketest(void *arg)
+struct smoke_thread {
+ struct kthread_worker *worker;
+ struct kthread_work work;
+ struct smoketest *t;
+ bool stop;
+ int result;
+};
+
+static void __igt_breadcrumbs_smoketest(struct kthread_work *work)
{
- struct smoketest *t = arg;
+ struct smoke_thread *thread = container_of(work, typeof(*thread), work);
+ struct smoketest *t = thread->t;
const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1;
const unsigned int total = 4 * t->ncontexts + 1;
unsigned int num_waits = 0, num_fences = 0;
@@ -320,8 +329,10 @@ static int __igt_breadcrumbs_smoketest(void *arg)
*/
requests = kcalloc(total, sizeof(*requests), GFP_KERNEL);
- if (!requests)
- return -ENOMEM;
+ if (!requests) {
+ thread->result = -ENOMEM;
+ return;
+ }
order = i915_random_order(total, &prng);
if (!order) {
@@ -329,7 +340,7 @@ static int __igt_breadcrumbs_smoketest(void *arg)
goto out_requests;
}
- while (!kthread_should_stop()) {
+ while (!READ_ONCE(thread->stop)) {
struct i915_sw_fence *submit, *wait;
unsigned int n, count;
@@ -437,7 +448,7 @@ static int __igt_breadcrumbs_smoketest(void *arg)
kfree(order);
out_requests:
kfree(requests);
- return err;
+ thread->result = err;
}
static int mock_breadcrumbs_smoketest(void *arg)
@@ -450,7 +461,7 @@ static int mock_breadcrumbs_smoketest(void *arg)
.request_alloc = __mock_request_alloc
};
unsigned int ncpus = num_online_cpus();
- struct task_struct **threads;
+ struct smoke_thread *threads;
unsigned int n;
int ret = 0;
@@ -479,28 +490,37 @@ static int mock_breadcrumbs_smoketest(void *arg)
}
for (n = 0; n < ncpus; n++) {
- threads[n] = kthread_run(__igt_breadcrumbs_smoketest,
- &t, "igt/%d", n);
- if (IS_ERR(threads[n])) {
- ret = PTR_ERR(threads[n]);
+ struct kthread_worker *worker;
+
+ worker = kthread_create_worker(0, "igt/%d", n);
+ if (IS_ERR(worker)) {
+ ret = PTR_ERR(worker);
ncpus = n;
break;
}
- get_task_struct(threads[n]);
+ threads[n].worker = worker;
+ threads[n].t = &t;
+ threads[n].stop = false;
+ threads[n].result = 0;
+
+ kthread_init_work(&threads[n].work,
+ __igt_breadcrumbs_smoketest);
+ kthread_queue_work(worker, &threads[n].work);
}
- yield(); /* start all threads before we begin */
msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
for (n = 0; n < ncpus; n++) {
int err;
- err = kthread_stop(threads[n]);
+ WRITE_ONCE(threads[n].stop, true);
+ kthread_flush_work(&threads[n].work);
+ err = READ_ONCE(threads[n].result);
if (err < 0 && !ret)
ret = err;
- put_task_struct(threads[n]);
+ kthread_destroy_worker(threads[n].worker);
}
pr_info("Completed %lu waits for %lu fence across %d cpus\n",
atomic_long_read(&t.num_waits),
@@ -1419,9 +1439,18 @@ out_free:
return err;
}
-static int __live_parallel_engine1(void *arg)
+struct parallel_thread {
+ struct kthread_worker *worker;
+ struct kthread_work work;
+ struct intel_engine_cs *engine;
+ int result;
+};
+
+static void __live_parallel_engine1(struct kthread_work *work)
{
- struct intel_engine_cs *engine = arg;
+ struct parallel_thread *thread =
+ container_of(work, typeof(*thread), work);
+ struct intel_engine_cs *engine = thread->engine;
IGT_TIMEOUT(end_time);
unsigned long count;
int err = 0;
@@ -1452,12 +1481,14 @@ static int __live_parallel_engine1(void *arg)
intel_engine_pm_put(engine);
pr_info("%s: %lu request + sync\n", engine->name, count);
- return err;
+ thread->result = err;
}
-static int __live_parallel_engineN(void *arg)
+static void __live_parallel_engineN(struct kthread_work *work)
{
- struct intel_engine_cs *engine = arg;
+ struct parallel_thread *thread =
+ container_of(work, typeof(*thread), work);
+ struct intel_engine_cs *engine = thread->engine;
IGT_TIMEOUT(end_time);
unsigned long count;
int err = 0;
@@ -1479,7 +1510,7 @@ static int __live_parallel_engineN(void *arg)
intel_engine_pm_put(engine);
pr_info("%s: %lu requests\n", engine->name, count);
- return err;
+ thread->result = err;
}
static bool wake_all(struct drm_i915_private *i915)
@@ -1505,9 +1536,11 @@ static int wait_for_all(struct drm_i915_private *i915)
return -ETIME;
}
-static int __live_parallel_spin(void *arg)
+static void __live_parallel_spin(struct kthread_work *work)
{
- struct intel_engine_cs *engine = arg;
+ struct parallel_thread *thread =
+ container_of(work, typeof(*thread), work);
+ struct intel_engine_cs *engine = thread->engine;
struct igt_spinner spin;
struct i915_request *rq;
int err = 0;
@@ -1520,7 +1553,8 @@ static int __live_parallel_spin(void *arg)
if (igt_spinner_init(&spin, engine->gt)) {
wake_all(engine->i915);
- return -ENOMEM;
+ thread->result = -ENOMEM;
+ return;
}
intel_engine_pm_get(engine);
@@ -1553,22 +1587,22 @@ static int __live_parallel_spin(void *arg)
out_spin:
igt_spinner_fini(&spin);
- return err;
+ thread->result = err;
}
static int live_parallel_engines(void *arg)
{
struct drm_i915_private *i915 = arg;
- static int (* const func[])(void *arg) = {
+ static void (* const func[])(struct kthread_work *) = {
__live_parallel_engine1,
__live_parallel_engineN,
__live_parallel_spin,
NULL,
};
const unsigned int nengines = num_uabi_engines(i915);
+ struct parallel_thread *threads;
struct intel_engine_cs *engine;
- int (* const *fn)(void *arg);
- struct task_struct **tsk;
+ void (* const *fn)(struct kthread_work *);
int err = 0;
/*
@@ -1576,8 +1610,8 @@ static int live_parallel_engines(void *arg)
* tests that we load up the system maximally.
*/
- tsk = kcalloc(nengines, sizeof(*tsk), GFP_KERNEL);
- if (!tsk)
+ threads = kcalloc(nengines, sizeof(*threads), GFP_KERNEL);
+ if (!threads)
return -ENOMEM;
for (fn = func; !err && *fn; fn++) {
@@ -1594,37 +1628,44 @@ static int live_parallel_engines(void *arg)
idx = 0;
for_each_uabi_engine(engine, i915) {
- tsk[idx] = kthread_run(*fn, engine,
- "igt/parallel:%s",
- engine->name);
- if (IS_ERR(tsk[idx])) {
- err = PTR_ERR(tsk[idx]);
+ struct kthread_worker *worker;
+
+ worker = kthread_create_worker(0, "igt/parallel:%s",
+ engine->name);
+ if (IS_ERR(worker)) {
+ err = PTR_ERR(worker);
break;
}
- get_task_struct(tsk[idx++]);
- }
- yield(); /* start all threads before we kthread_stop() */
+ threads[idx].worker = worker;
+ threads[idx].result = 0;
+ threads[idx].engine = engine;
+
+ kthread_init_work(&threads[idx].work, *fn);
+ kthread_queue_work(worker, &threads[idx].work);
+ idx++;
+ }
idx = 0;
for_each_uabi_engine(engine, i915) {
int status;
- if (IS_ERR(tsk[idx]))
+ if (!threads[idx].worker)
break;
- status = kthread_stop(tsk[idx]);
+ kthread_flush_work(&threads[idx].work);
+ status = READ_ONCE(threads[idx].result);
if (status && !err)
err = status;
- put_task_struct(tsk[idx++]);
+ kthread_destroy_worker(threads[idx++].worker);
}
if (igt_live_test_end(&t))
err = -EIO;
}
- kfree(tsk);
+ kfree(threads);
return err;
}
@@ -1672,7 +1713,7 @@ static int live_breadcrumbs_smoketest(void *arg)
const unsigned int ncpus = num_online_cpus();
unsigned long num_waits, num_fences;
struct intel_engine_cs *engine;
- struct task_struct **threads;
+ struct smoke_thread *threads;
struct igt_live_test live;
intel_wakeref_t wakeref;
struct smoketest *smoke;
@@ -1746,23 +1787,26 @@ static int live_breadcrumbs_smoketest(void *arg)
smoke[idx].max_batch, engine->name);
for (n = 0; n < ncpus; n++) {
- struct task_struct *tsk;
+ unsigned int i = idx * ncpus + n;
+ struct kthread_worker *worker;
- tsk = kthread_run(__igt_breadcrumbs_smoketest,
- &smoke[idx], "igt/%d.%d", idx, n);
- if (IS_ERR(tsk)) {
- ret = PTR_ERR(tsk);
+ worker = kthread_create_worker(0, "igt/%d.%d", idx, n);
+ if (IS_ERR(worker)) {
+ ret = PTR_ERR(worker);
goto out_flush;
}
- get_task_struct(tsk);
- threads[idx * ncpus + n] = tsk;
+ threads[i].worker = worker;
+ threads[i].t = &smoke[idx];
+
+ kthread_init_work(&threads[i].work,
+ __igt_breadcrumbs_smoketest);
+ kthread_queue_work(worker, &threads[i].work);
}
idx++;
}
- yield(); /* start all threads before we begin */
msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
out_flush:
@@ -1771,17 +1815,19 @@ out_flush:
num_fences = 0;
for_each_uabi_engine(engine, i915) {
for (n = 0; n < ncpus; n++) {
- struct task_struct *tsk = threads[idx * ncpus + n];
+ unsigned int i = idx * ncpus + n;
int err;
- if (!tsk)
+ if (!threads[i].worker)
continue;
- err = kthread_stop(tsk);
+ WRITE_ONCE(threads[i].stop, true);
+ kthread_flush_work(&threads[i].work);
+ err = READ_ONCE(threads[i].result);
if (err < 0 && !ret)
ret = err;
- put_task_struct(tsk);
+ kthread_destroy_worker(threads[i].worker);
}
num_waits += atomic_long_read(&smoke[idx].num_waits);
@@ -2891,9 +2937,18 @@ out:
return err;
}
-static int p_sync0(void *arg)
+struct p_thread {
+ struct perf_stats p;
+ struct kthread_worker *worker;
+ struct kthread_work work;
+ struct intel_engine_cs *engine;
+ int result;
+};
+
+static void p_sync0(struct kthread_work *work)
{
- struct perf_stats *p = arg;
+ struct p_thread *thread = container_of(work, typeof(*thread), work);
+ struct perf_stats *p = &thread->p;
struct intel_engine_cs *engine = p->engine;
struct intel_context *ce;
IGT_TIMEOUT(end_time);
@@ -2902,13 +2957,16 @@ static int p_sync0(void *arg)
int err = 0;
ce = intel_context_create(engine);
- if (IS_ERR(ce))
- return PTR_ERR(ce);
+ if (IS_ERR(ce)) {
+ thread->result = PTR_ERR(ce);
+ return;
+ }
err = intel_context_pin(ce);
if (err) {
intel_context_put(ce);
- return err;
+ thread->result = err;
+ return;
}
if (intel_engine_supports_stats(engine)) {
@@ -2958,12 +3016,13 @@ static int p_sync0(void *arg)
intel_context_unpin(ce);
intel_context_put(ce);
- return err;
+ thread->result = err;
}
-static int p_sync1(void *arg)
+static void p_sync1(struct kthread_work *work)
{
- struct perf_stats *p = arg;
+ struct p_thread *thread = container_of(work, typeof(*thread), work);
+ struct perf_stats *p = &thread->p;
struct intel_engine_cs *engine = p->engine;
struct i915_request *prev = NULL;
struct intel_context *ce;
@@ -2973,13 +3032,16 @@ static int p_sync1(void *arg)
int err = 0;
ce = intel_context_create(engine);
- if (IS_ERR(ce))
- return PTR_ERR(ce);
+ if (IS_ERR(ce)) {
+ thread->result = PTR_ERR(ce);
+ return;
+ }
err = intel_context_pin(ce);
if (err) {
intel_context_put(ce);
- return err;
+ thread->result = err;
+ return;
}
if (intel_engine_supports_stats(engine)) {
@@ -3031,12 +3093,13 @@ static int p_sync1(void *arg)
intel_context_unpin(ce);
intel_context_put(ce);
- return err;
+ thread->result = err;
}
-static int p_many(void *arg)
+static void p_many(struct kthread_work *work)
{
- struct perf_stats *p = arg;
+ struct p_thread *thread = container_of(work, typeof(*thread), work);
+ struct perf_stats *p = &thread->p;
struct intel_engine_cs *engine = p->engine;
struct intel_context *ce;
IGT_TIMEOUT(end_time);
@@ -3045,13 +3108,16 @@ static int p_many(void *arg)
bool busy;
ce = intel_context_create(engine);
- if (IS_ERR(ce))
- return PTR_ERR(ce);
+ if (IS_ERR(ce)) {
+ thread->result = PTR_ERR(ce);
+ return;
+ }
err = intel_context_pin(ce);
if (err) {
intel_context_put(ce);
- return err;
+ thread->result = err;
+ return;
}
if (intel_engine_supports_stats(engine)) {
@@ -3092,26 +3158,23 @@ static int p_many(void *arg)
intel_context_unpin(ce);
intel_context_put(ce);
- return err;
+ thread->result = err;
}
static int perf_parallel_engines(void *arg)
{
struct drm_i915_private *i915 = arg;
- static int (* const func[])(void *arg) = {
+ static void (* const func[])(struct kthread_work *) = {
p_sync0,
p_sync1,
p_many,
NULL,
};
const unsigned int nengines = num_uabi_engines(i915);
+ void (* const *fn)(struct kthread_work *);
struct intel_engine_cs *engine;
- int (* const *fn)(void *arg);
struct pm_qos_request qos;
- struct {
- struct perf_stats p;
- struct task_struct *tsk;
- } *engines;
+ struct p_thread *engines;
int err = 0;
engines = kcalloc(nengines, sizeof(*engines), GFP_KERNEL);
@@ -3134,36 +3197,45 @@ static int perf_parallel_engines(void *arg)
idx = 0;
for_each_uabi_engine(engine, i915) {
+ struct kthread_worker *worker;
+
intel_engine_pm_get(engine);
memset(&engines[idx].p, 0, sizeof(engines[idx].p));
- engines[idx].p.engine = engine;
- engines[idx].tsk = kthread_run(*fn, &engines[idx].p,
- "igt:%s", engine->name);
- if (IS_ERR(engines[idx].tsk)) {
- err = PTR_ERR(engines[idx].tsk);
+ worker = kthread_create_worker(0, "igt:%s",
+ engine->name);
+ if (IS_ERR(worker)) {
+ err = PTR_ERR(worker);
intel_engine_pm_put(engine);
break;
}
- get_task_struct(engines[idx++].tsk);
- }
+ engines[idx].worker = worker;
+ engines[idx].result = 0;
+ engines[idx].p.engine = engine;
+ engines[idx].engine = engine;
- yield(); /* start all threads before we kthread_stop() */
+ kthread_init_work(&engines[idx].work, *fn);
+ kthread_queue_work(worker, &engines[idx].work);
+ idx++;
+ }
idx = 0;
for_each_uabi_engine(engine, i915) {
int status;
- if (IS_ERR(engines[idx].tsk))
+ if (!engines[idx].worker)
break;
- status = kthread_stop(engines[idx].tsk);
+ kthread_flush_work(&engines[idx].work);
+ status = READ_ONCE(engines[idx].result);
if (status && !err)
err = status;
intel_engine_pm_put(engine);
- put_task_struct(engines[idx++].tsk);
+
+ kthread_destroy_worker(engines[idx].worker);
+ idx++;
}
if (igt_live_test_end(&t))
diff --git a/drivers/gpu/drm/i915/selftests/intel_uncore.c b/drivers/gpu/drm/i915/selftests/intel_uncore.c
index fda9bb79c049..e4281508d580 100644
--- a/drivers/gpu/drm/i915/selftests/intel_uncore.c
+++ b/drivers/gpu/drm/i915/selftests/intel_uncore.c
@@ -70,6 +70,8 @@ static int intel_shadow_table_check(void)
{ gen12_shadowed_regs, ARRAY_SIZE(gen12_shadowed_regs) },
{ dg2_shadowed_regs, ARRAY_SIZE(dg2_shadowed_regs) },
{ pvc_shadowed_regs, ARRAY_SIZE(pvc_shadowed_regs) },
+ { mtl_shadowed_regs, ARRAY_SIZE(mtl_shadowed_regs) },
+ { xelpmp_shadowed_regs, ARRAY_SIZE(xelpmp_shadowed_regs) },
};
const struct i915_range *range;
unsigned int i, j;
@@ -117,6 +119,8 @@ int intel_uncore_mock_selftests(void)
{ __gen12_fw_ranges, ARRAY_SIZE(__gen12_fw_ranges), true },
{ __xehp_fw_ranges, ARRAY_SIZE(__xehp_fw_ranges), true },
{ __pvc_fw_ranges, ARRAY_SIZE(__pvc_fw_ranges), true },
+ { __mtl_fw_ranges, ARRAY_SIZE(__mtl_fw_ranges), true },
+ { __xelpmp_fw_ranges, ARRAY_SIZE(__xelpmp_fw_ranges), true },
};
int err, i;
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index fff11c90f1fa..f6a7c0bd2955 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -67,7 +67,6 @@ static void mock_device_release(struct drm_device *dev)
intel_gt_driver_remove(to_gt(i915));
i915_gem_drain_workqueue(i915);
- i915_gem_drain_freed_objects(i915);
mock_fini_ggtt(to_gt(i915)->ggtt);
destroy_workqueue(i915->wq);
diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c
index 46aa3554e97b..1fbe127ff633 100644
--- a/drivers/misc/mei/bus.c
+++ b/drivers/misc/mei/bus.c
@@ -13,6 +13,7 @@
#include <linux/slab.h>
#include <linux/mutex.h>
#include <linux/interrupt.h>
+#include <linux/scatterlist.h>
#include <linux/mei_cl_bus.h>
#include "mei_dev.h"
@@ -100,9 +101,18 @@ ssize_t __mei_cl_send(struct mei_cl *cl, const u8 *buf, size_t length, u8 vtag,
cb->internal = !!(mode & MEI_CL_IO_TX_INTERNAL);
cb->blocking = !!(mode & MEI_CL_IO_TX_BLOCKING);
memcpy(cb->buf.data, buf, length);
+ /* hack we point data to header */
+ if (mode & MEI_CL_IO_SGL) {
+ cb->ext_hdr = (struct mei_ext_hdr *)cb->buf.data;
+ cb->buf.data = NULL;
+ cb->buf.size = 0;
+ }
rets = mei_cl_write(cl, cb);
+ if (mode & MEI_CL_IO_SGL && rets == 0)
+ rets = length;
+
out:
mutex_unlock(&bus->device_lock);
@@ -205,9 +215,16 @@ copy:
goto free;
}
- r_length = min_t(size_t, length, cb->buf_idx);
- memcpy(buf, cb->buf.data, r_length);
+ /* for the GSC type - copy the extended header to the buffer */
+ if (cb->ext_hdr && cb->ext_hdr->type == MEI_EXT_HDR_GSC) {
+ r_length = min_t(size_t, length, cb->ext_hdr->length * sizeof(u32));
+ memcpy(buf, cb->ext_hdr, r_length);
+ } else {
+ r_length = min_t(size_t, length, cb->buf_idx);
+ memcpy(buf, cb->buf.data, r_length);
+ }
rets = r_length;
+
if (vtag)
*vtag = cb->vtag;
@@ -823,6 +840,131 @@ out:
EXPORT_SYMBOL_GPL(mei_cldev_disable);
/**
+ * mei_cldev_send_gsc_command - sends a gsc command, by sending
+ * a gsl mei message to gsc and receiving reply from gsc
+ *
+ * @cldev: me client device
+ * @client_id: client id to send the command to
+ * @fence_id: fence id to send the command to
+ * @sg_in: scatter gather list containing addresses for rx message buffer
+ * @total_in_len: total length of data in 'in' sg, can be less than the sum of buffers sizes
+ * @sg_out: scatter gather list containing addresses for tx message buffer
+ *
+ * Return:
+ * * written size in bytes
+ * * < 0 on error
+ */
+ssize_t mei_cldev_send_gsc_command(struct mei_cl_device *cldev,
+ u8 client_id, u32 fence_id,
+ struct scatterlist *sg_in,
+ size_t total_in_len,
+ struct scatterlist *sg_out)
+{
+ struct mei_cl *cl;
+ struct mei_device *bus;
+ ssize_t ret = 0;
+
+ struct mei_ext_hdr_gsc_h2f *ext_hdr;
+ size_t buf_sz = sizeof(struct mei_ext_hdr_gsc_h2f);
+ int sg_out_nents, sg_in_nents;
+ int i;
+ struct scatterlist *sg;
+ struct mei_ext_hdr_gsc_f2h rx_msg;
+ unsigned int sg_len;
+
+ if (!cldev || !sg_in || !sg_out)
+ return -EINVAL;
+
+ cl = cldev->cl;
+ bus = cldev->bus;
+
+ dev_dbg(bus->dev, "client_id %u, fence_id %u\n", client_id, fence_id);
+
+ if (!bus->hbm_f_gsc_supported)
+ return -EOPNOTSUPP;
+
+ sg_out_nents = sg_nents(sg_out);
+ sg_in_nents = sg_nents(sg_in);
+ /* at least one entry in tx and rx sgls must be present */
+ if (sg_out_nents <= 0 || sg_in_nents <= 0)
+ return -EINVAL;
+
+ buf_sz += (sg_out_nents + sg_in_nents) * sizeof(struct mei_gsc_sgl);
+ ext_hdr = kzalloc(buf_sz, GFP_KERNEL);
+ if (!ext_hdr)
+ return -ENOMEM;
+
+ /* construct the GSC message */
+ ext_hdr->hdr.type = MEI_EXT_HDR_GSC;
+ ext_hdr->hdr.length = buf_sz / sizeof(u32); /* length is in dw */
+
+ ext_hdr->client_id = client_id;
+ ext_hdr->addr_type = GSC_ADDRESS_TYPE_PHYSICAL_SGL;
+ ext_hdr->fence_id = fence_id;
+ ext_hdr->input_address_count = sg_in_nents;
+ ext_hdr->output_address_count = sg_out_nents;
+ ext_hdr->reserved[0] = 0;
+ ext_hdr->reserved[1] = 0;
+
+ /* copy in-sgl to the message */
+ for (i = 0, sg = sg_in; i < sg_in_nents; i++, sg++) {
+ ext_hdr->sgl[i].low = lower_32_bits(sg_dma_address(sg));
+ ext_hdr->sgl[i].high = upper_32_bits(sg_dma_address(sg));
+ sg_len = min_t(unsigned int, sg_dma_len(sg), PAGE_SIZE);
+ ext_hdr->sgl[i].length = (sg_len <= total_in_len) ? sg_len : total_in_len;
+ total_in_len -= ext_hdr->sgl[i].length;
+ }
+
+ /* copy out-sgl to the message */
+ for (i = sg_in_nents, sg = sg_out; i < sg_in_nents + sg_out_nents; i++, sg++) {
+ ext_hdr->sgl[i].low = lower_32_bits(sg_dma_address(sg));
+ ext_hdr->sgl[i].high = upper_32_bits(sg_dma_address(sg));
+ sg_len = min_t(unsigned int, sg_dma_len(sg), PAGE_SIZE);
+ ext_hdr->sgl[i].length = sg_len;
+ }
+
+ /* send the message to GSC */
+ ret = __mei_cl_send(cl, (u8 *)ext_hdr, buf_sz, 0, MEI_CL_IO_SGL);
+ if (ret < 0) {
+ dev_err(bus->dev, "__mei_cl_send failed, returned %zd\n", ret);
+ goto end;
+ }
+ if (ret != buf_sz) {
+ dev_err(bus->dev, "__mei_cl_send returned %zd instead of expected %zd\n",
+ ret, buf_sz);
+ ret = -EIO;
+ goto end;
+ }
+
+ /* receive the reply from GSC, note that at this point sg_in should contain the reply */
+ ret = __mei_cl_recv(cl, (u8 *)&rx_msg, sizeof(rx_msg), NULL, MEI_CL_IO_SGL, 0);
+
+ if (ret != sizeof(rx_msg)) {
+ dev_err(bus->dev, "__mei_cl_recv returned %zd instead of expected %zd\n",
+ ret, sizeof(rx_msg));
+ if (ret >= 0)
+ ret = -EIO;
+ goto end;
+ }
+
+ /* check rx_msg.client_id and rx_msg.fence_id match the ones we send */
+ if (rx_msg.client_id != client_id || rx_msg.fence_id != fence_id) {
+ dev_err(bus->dev, "received client_id/fence_id %u/%u instead of %u/%u sent\n",
+ rx_msg.client_id, rx_msg.fence_id, client_id, fence_id);
+ ret = -EFAULT;
+ goto end;
+ }
+
+ dev_dbg(bus->dev, "gsc command: successfully written %u bytes\n", rx_msg.written);
+ ret = rx_msg.written;
+
+end:
+ kfree(ext_hdr);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(mei_cldev_send_gsc_command);
+
+/**
* mei_cl_device_find - find matching entry in the driver id table
*
* @cldev: me client device
diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c
index 0b2fbe1335a7..6c8b71ae32c8 100644
--- a/drivers/misc/mei/client.c
+++ b/drivers/misc/mei/client.c
@@ -322,6 +322,7 @@ void mei_io_cb_free(struct mei_cl_cb *cb)
list_del(&cb->list);
kfree(cb->buf.data);
+ kfree(cb->ext_hdr);
kfree(cb);
}
@@ -401,6 +402,7 @@ static struct mei_cl_cb *mei_io_cb_init(struct mei_cl *cl,
cb->buf_idx = 0;
cb->fop_type = type;
cb->vtag = 0;
+ cb->ext_hdr = NULL;
return cb;
}
@@ -1740,6 +1742,17 @@ static inline u8 mei_ext_hdr_set_vtag(void *ext, u8 vtag)
return vtag_hdr->hdr.length;
}
+static inline bool mei_ext_hdr_is_gsc(struct mei_ext_hdr *ext)
+{
+ return ext && ext->type == MEI_EXT_HDR_GSC;
+}
+
+static inline u8 mei_ext_hdr_set_gsc(struct mei_ext_hdr *ext, struct mei_ext_hdr *gsc_hdr)
+{
+ memcpy(ext, gsc_hdr, mei_ext_hdr_len(gsc_hdr));
+ return ext->length;
+}
+
/**
* mei_msg_hdr_init - allocate and initialize mei message header
*
@@ -1752,14 +1765,17 @@ static struct mei_msg_hdr *mei_msg_hdr_init(const struct mei_cl_cb *cb)
size_t hdr_len;
struct mei_ext_meta_hdr *meta;
struct mei_msg_hdr *mei_hdr;
- bool is_ext, is_vtag;
+ bool is_ext, is_hbm, is_gsc, is_vtag;
+ struct mei_ext_hdr *next_ext;
if (!cb)
return ERR_PTR(-EINVAL);
/* Extended header for vtag is attached only on the first fragment */
is_vtag = (cb->vtag && cb->buf_idx == 0);
- is_ext = is_vtag;
+ is_hbm = cb->cl->me_cl->client_id == 0;
+ is_gsc = ((!is_hbm) && cb->cl->dev->hbm_f_gsc_supported && mei_ext_hdr_is_gsc(cb->ext_hdr));
+ is_ext = is_vtag || is_gsc;
/* Compute extended header size */
hdr_len = sizeof(*mei_hdr);
@@ -1771,6 +1787,9 @@ static struct mei_msg_hdr *mei_msg_hdr_init(const struct mei_cl_cb *cb)
if (is_vtag)
hdr_len += sizeof(struct mei_ext_hdr_vtag);
+ if (is_gsc)
+ hdr_len += mei_ext_hdr_len(cb->ext_hdr);
+
setup_hdr:
mei_hdr = kzalloc(hdr_len, GFP_KERNEL);
if (!mei_hdr)
@@ -1785,10 +1804,20 @@ setup_hdr:
goto out;
meta = (struct mei_ext_meta_hdr *)mei_hdr->extension;
+ meta->size = 0;
+ next_ext = (struct mei_ext_hdr *)meta->hdrs;
if (is_vtag) {
meta->count++;
- meta->size += mei_ext_hdr_set_vtag(meta->hdrs, cb->vtag);
+ meta->size += mei_ext_hdr_set_vtag(next_ext, cb->vtag);
+ next_ext = mei_ext_next(next_ext);
+ }
+
+ if (is_gsc) {
+ meta->count++;
+ meta->size += mei_ext_hdr_set_gsc(next_ext, cb->ext_hdr);
+ next_ext = mei_ext_next(next_ext);
}
+
out:
mei_hdr->length = hdr_len - sizeof(*mei_hdr);
return mei_hdr;
@@ -1812,14 +1841,14 @@ int mei_cl_irq_write(struct mei_cl *cl, struct mei_cl_cb *cb,
struct mei_msg_hdr *mei_hdr = NULL;
size_t hdr_len;
size_t hbuf_len, dr_len;
- size_t buf_len;
+ size_t buf_len = 0;
size_t data_len;
int hbuf_slots;
u32 dr_slots;
u32 dma_len;
int rets;
bool first_chunk;
- const void *data;
+ const void *data = NULL;
if (WARN_ON(!cl || !cl->dev))
return -ENODEV;
@@ -1839,8 +1868,10 @@ int mei_cl_irq_write(struct mei_cl *cl, struct mei_cl_cb *cb,
return 0;
}
- buf_len = buf->size - cb->buf_idx;
- data = buf->data + cb->buf_idx;
+ if (buf->data) {
+ buf_len = buf->size - cb->buf_idx;
+ data = buf->data + cb->buf_idx;
+ }
hbuf_slots = mei_hbuf_empty_slots(dev);
if (hbuf_slots < 0) {
rets = -EOVERFLOW;
@@ -1858,9 +1889,6 @@ int mei_cl_irq_write(struct mei_cl *cl, struct mei_cl_cb *cb,
goto err;
}
- cl_dbg(dev, cl, "Extended Header %d vtag = %d\n",
- mei_hdr->extended, cb->vtag);
-
hdr_len = sizeof(*mei_hdr) + mei_hdr->length;
/**
@@ -1889,7 +1917,7 @@ int mei_cl_irq_write(struct mei_cl *cl, struct mei_cl_cb *cb,
}
mei_hdr->length += data_len;
- if (mei_hdr->dma_ring)
+ if (mei_hdr->dma_ring && buf->data)
mei_dma_ring_write(dev, buf->data + cb->buf_idx, buf_len);
rets = mei_write_message(dev, mei_hdr, hdr_len, data, data_len);
@@ -1983,9 +2011,6 @@ ssize_t mei_cl_write(struct mei_cl *cl, struct mei_cl_cb *cb)
goto err;
}
- cl_dbg(dev, cl, "Extended Header %d vtag = %d\n",
- mei_hdr->extended, cb->vtag);
-
hdr_len = sizeof(*mei_hdr) + mei_hdr->length;
if (rets == 0) {
@@ -2030,7 +2055,7 @@ ssize_t mei_cl_write(struct mei_cl *cl, struct mei_cl_cb *cb)
mei_hdr->length += data_len;
- if (mei_hdr->dma_ring)
+ if (mei_hdr->dma_ring && buf->data)
mei_dma_ring_write(dev, buf->data, buf_len);
rets = mei_write_message(dev, mei_hdr, hdr_len, data, data_len);
diff --git a/drivers/misc/mei/hbm.c b/drivers/misc/mei/hbm.c
index de712cbf5d07..12a62a911e42 100644
--- a/drivers/misc/mei/hbm.c
+++ b/drivers/misc/mei/hbm.c
@@ -340,9 +340,13 @@ static int mei_hbm_capabilities_req(struct mei_device *dev)
req.hbm_cmd = MEI_HBM_CAPABILITIES_REQ_CMD;
if (dev->hbm_f_vt_supported)
req.capability_requested[0] |= HBM_CAP_VT;
+
if (dev->hbm_f_cd_supported)
req.capability_requested[0] |= HBM_CAP_CD;
+ if (dev->hbm_f_gsc_supported)
+ req.capability_requested[0] |= HBM_CAP_GSC;
+
ret = mei_hbm_write_message(dev, &mei_hdr, &req);
if (ret) {
dev_err(dev->dev,
@@ -1200,6 +1204,12 @@ static void mei_hbm_config_features(struct mei_device *dev)
dev->version.minor_version >= HBM_MINOR_VERSION_VT))
dev->hbm_f_vt_supported = 1;
+ /* GSC support */
+ if (dev->version.major_version > HBM_MAJOR_VERSION_GSC ||
+ (dev->version.major_version == HBM_MAJOR_VERSION_GSC &&
+ dev->version.minor_version >= HBM_MINOR_VERSION_GSC))
+ dev->hbm_f_gsc_supported = 1;
+
/* Capability message Support */
dev->hbm_f_cap_supported = 0;
if (dev->version.major_version > HBM_MAJOR_VERSION_CAP ||
@@ -1367,6 +1377,9 @@ int mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr)
if (!(capability_res->capability_granted[0] & HBM_CAP_CD))
dev->hbm_f_cd_supported = 0;
+ if (!(capability_res->capability_granted[0] & HBM_CAP_GSC))
+ dev->hbm_f_gsc_supported = 0;
+
if (dev->hbm_f_dr_supported) {
if (mei_dmam_ring_alloc(dev))
dev_info(dev->dev, "running w/o dma ring\n");
diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c
index 9e2f781c6ed5..da4ef0b51954 100644
--- a/drivers/misc/mei/hw-me.c
+++ b/drivers/misc/mei/hw-me.c
@@ -590,9 +590,14 @@ static int mei_me_hbuf_write(struct mei_device *dev,
u32 dw_cnt;
int empty_slots;
- if (WARN_ON(!hdr || !data || hdr_len & 0x3))
+ if (WARN_ON(!hdr || hdr_len & 0x3))
return -EINVAL;
+ if (!data && data_len) {
+ dev_err(dev->dev, "wrong parameters null data with data_len = %zu\n", data_len);
+ return -EINVAL;
+ }
+
dev_dbg(dev->dev, MEI_HDR_FMT, MEI_HDR_PRM((struct mei_msg_hdr *)hdr));
empty_slots = mei_hbuf_empty_slots(dev);
diff --git a/drivers/misc/mei/hw.h b/drivers/misc/mei/hw.h
index e7e020dba6b1..319418ddf4fb 100644
--- a/drivers/misc/mei/hw.h
+++ b/drivers/misc/mei/hw.h
@@ -93,6 +93,12 @@
#define HBM_MAJOR_VERSION_VT 2
/*
+ * MEI version with GSC support
+ */
+#define HBM_MINOR_VERSION_GSC 2
+#define HBM_MAJOR_VERSION_GSC 2
+
+/*
* MEI version with capabilities message support
*/
#define HBM_MINOR_VERSION_CAP 2
@@ -229,18 +235,19 @@ enum mei_cl_disconnect_status {
*
* @MEI_EXT_HDR_NONE: sentinel
* @MEI_EXT_HDR_VTAG: vtag header
+ * @MEI_EXT_HDR_GSC: gsc header
*/
enum mei_ext_hdr_type {
MEI_EXT_HDR_NONE = 0,
MEI_EXT_HDR_VTAG = 1,
+ MEI_EXT_HDR_GSC = 2,
};
/**
* struct mei_ext_hdr - extend header descriptor (TLV)
* @type: enum mei_ext_hdr_type
* @length: length excluding descriptor
- * @ext_payload: payload of the specific extended header
- * @hdr: place holder for actual header
+ * @data: the extended header payload
*/
struct mei_ext_hdr {
u8 type;
@@ -279,12 +286,11 @@ struct mei_ext_hdr_vtag {
* Extended header iterator functions
*/
/**
- * mei_ext_hdr - extended header iterator begin
+ * mei_ext_begin - extended header iterator begin
*
* @meta: meta header of the extended header list
*
- * Return:
- * The first extended header
+ * Return: The first extended header
*/
static inline struct mei_ext_hdr *mei_ext_begin(struct mei_ext_meta_hdr *meta)
{
@@ -305,6 +311,60 @@ static inline bool mei_ext_last(struct mei_ext_meta_hdr *meta,
return (u8 *)ext >= (u8 *)meta + sizeof(*meta) + (meta->size * 4);
}
+struct mei_gsc_sgl {
+ u32 low;
+ u32 high;
+ u32 length;
+} __packed;
+
+#define GSC_HECI_MSG_KERNEL 0
+#define GSC_HECI_MSG_USER 1
+
+#define GSC_ADDRESS_TYPE_GTT 0
+#define GSC_ADDRESS_TYPE_PPGTT 1
+#define GSC_ADDRESS_TYPE_PHYSICAL_CONTINUOUS 2 /* max of 64K */
+#define GSC_ADDRESS_TYPE_PHYSICAL_SGL 3
+
+/**
+ * struct mei_ext_hdr_gsc_h2f - extended header: gsc host to firmware interface
+ *
+ * @hdr: extended header
+ * @client_id: GSC_HECI_MSG_KERNEL or GSC_HECI_MSG_USER
+ * @addr_type: GSC_ADDRESS_TYPE_{GTT, PPGTT, PHYSICAL_CONTINUOUS, PHYSICAL_SGL}
+ * @fence_id: synchronization marker
+ * @input_address_count: number of input sgl buffers
+ * @output_address_count: number of output sgl buffers
+ * @reserved: reserved
+ * @sgl: sg list
+ */
+struct mei_ext_hdr_gsc_h2f {
+ struct mei_ext_hdr hdr;
+ u8 client_id;
+ u8 addr_type;
+ u32 fence_id;
+ u8 input_address_count;
+ u8 output_address_count;
+ u8 reserved[2];
+ struct mei_gsc_sgl sgl[];
+} __packed;
+
+/**
+ * struct mei_ext_hdr_gsc_f2h - gsc firmware to host interface
+ *
+ * @hdr: extended header
+ * @client_id: GSC_HECI_MSG_KERNEL or GSC_HECI_MSG_USER
+ * @reserved: reserved
+ * @fence_id: synchronization marker
+ * @written: number of bytes written to firmware
+ */
+struct mei_ext_hdr_gsc_f2h {
+ struct mei_ext_hdr hdr;
+ u8 client_id;
+ u8 reserved;
+ u32 fence_id;
+ u32 written;
+} __packed;
+
/**
* mei_ext_next - following extended header on the TLV list
*
@@ -321,6 +381,21 @@ static inline struct mei_ext_hdr *mei_ext_next(struct mei_ext_hdr *ext)
}
/**
+ * mei_ext_hdr_len - get ext header length in bytes
+ *
+ * @ext: extend header
+ *
+ * Return: extend header length in bytes
+ */
+static inline u32 mei_ext_hdr_len(const struct mei_ext_hdr *ext)
+{
+ if (!ext)
+ return 0;
+
+ return ext->length * sizeof(u32);
+}
+
+/**
* struct mei_msg_hdr - MEI BUS Interface Section
*
* @me_addr: device address
@@ -682,6 +757,10 @@ struct hbm_dma_ring_ctrl {
/* virtual tag supported */
#define HBM_CAP_VT BIT(0)
+
+/* gsc extended header support */
+#define HBM_CAP_GSC BIT(1)
+
/* client dma supported */
#define HBM_CAP_CD BIT(2)
diff --git a/drivers/misc/mei/interrupt.c b/drivers/misc/mei/interrupt.c
index 0706322154cb..0a0e984e5673 100644
--- a/drivers/misc/mei/interrupt.c
+++ b/drivers/misc/mei/interrupt.c
@@ -98,9 +98,12 @@ static int mei_cl_irq_read_msg(struct mei_cl *cl,
struct mei_device *dev = cl->dev;
struct mei_cl_cb *cb;
+ struct mei_ext_hdr_vtag *vtag_hdr = NULL;
+ struct mei_ext_hdr_gsc_f2h *gsc_f2h = NULL;
+
size_t buf_sz;
u32 length;
- int ext_len;
+ u32 ext_len;
length = mei_hdr->length;
ext_len = 0;
@@ -122,18 +125,24 @@ static int mei_cl_irq_read_msg(struct mei_cl *cl,
}
if (mei_hdr->extended) {
- struct mei_ext_hdr *ext;
- struct mei_ext_hdr_vtag *vtag_hdr = NULL;
-
- ext = mei_ext_begin(meta);
+ struct mei_ext_hdr *ext = mei_ext_begin(meta);
do {
switch (ext->type) {
case MEI_EXT_HDR_VTAG:
vtag_hdr = (struct mei_ext_hdr_vtag *)ext;
break;
+ case MEI_EXT_HDR_GSC:
+ gsc_f2h = (struct mei_ext_hdr_gsc_f2h *)ext;
+ cb->ext_hdr = kzalloc(sizeof(*gsc_f2h), GFP_KERNEL);
+ if (!cb->ext_hdr) {
+ cb->status = -ENOMEM;
+ goto discard;
+ }
+ break;
case MEI_EXT_HDR_NONE:
fallthrough;
default:
+ cl_err(dev, cl, "unknown extended header\n");
cb->status = -EPROTO;
break;
}
@@ -141,12 +150,14 @@ static int mei_cl_irq_read_msg(struct mei_cl *cl,
ext = mei_ext_next(ext);
} while (!mei_ext_last(meta, ext));
- if (!vtag_hdr) {
- cl_dbg(dev, cl, "vtag not found in extended header.\n");
+ if (!vtag_hdr && !gsc_f2h) {
+ cl_dbg(dev, cl, "no vtag or gsc found in extended header.\n");
cb->status = -EPROTO;
goto discard;
}
+ }
+ if (vtag_hdr) {
cl_dbg(dev, cl, "vtag: %d\n", vtag_hdr->vtag);
if (cb->vtag && cb->vtag != vtag_hdr->vtag) {
cl_err(dev, cl, "mismatched tag: %d != %d\n",
@@ -157,6 +168,28 @@ static int mei_cl_irq_read_msg(struct mei_cl *cl,
cb->vtag = vtag_hdr->vtag;
}
+ if (gsc_f2h) {
+ u32 ext_hdr_len = mei_ext_hdr_len(&gsc_f2h->hdr);
+
+ if (!dev->hbm_f_gsc_supported) {
+ cl_err(dev, cl, "gsc extended header is not supported\n");
+ cb->status = -EPROTO;
+ goto discard;
+ }
+
+ if (length) {
+ cl_err(dev, cl, "no data allowed in cb with gsc\n");
+ cb->status = -EPROTO;
+ goto discard;
+ }
+ if (ext_hdr_len > sizeof(*gsc_f2h)) {
+ cl_err(dev, cl, "gsc extended header is too big %u\n", ext_hdr_len);
+ cb->status = -EPROTO;
+ goto discard;
+ }
+ memcpy(cb->ext_hdr, gsc_f2h, ext_hdr_len);
+ }
+
if (!mei_cl_is_connected(cl)) {
cl_dbg(dev, cl, "not connected\n");
cb->status = -ENODEV;
diff --git a/drivers/misc/mei/mei_dev.h b/drivers/misc/mei/mei_dev.h
index 6bb3e1ba9ded..8d8018428d9d 100644
--- a/drivers/misc/mei/mei_dev.h
+++ b/drivers/misc/mei/mei_dev.h
@@ -116,12 +116,16 @@ enum mei_cb_file_ops {
* @MEI_CL_IO_TX_INTERNAL: internal communication between driver and FW
*
* @MEI_CL_IO_RX_NONBLOCK: recv is non-blocking
+ *
+ * @MEI_CL_IO_SGL: send command with sgl list.
*/
enum mei_cl_io_mode {
MEI_CL_IO_TX_BLOCKING = BIT(0),
MEI_CL_IO_TX_INTERNAL = BIT(1),
MEI_CL_IO_RX_NONBLOCK = BIT(2),
+
+ MEI_CL_IO_SGL = BIT(3),
};
/*
@@ -206,6 +210,7 @@ struct mei_cl;
* @status: io status of the cb
* @internal: communication between driver and FW flag
* @blocking: transmission blocking mode
+ * @ext_hdr: extended header
*/
struct mei_cl_cb {
struct list_head list;
@@ -218,6 +223,7 @@ struct mei_cl_cb {
int status;
u32 internal:1;
u32 blocking:1;
+ struct mei_ext_hdr *ext_hdr;
};
/**
@@ -494,6 +500,7 @@ struct mei_dev_timeouts {
* @hbm_f_vt_supported : hbm feature vtag supported
* @hbm_f_cap_supported : hbm feature capabilities message supported
* @hbm_f_cd_supported : hbm feature client dma supported
+ * @hbm_f_gsc_supported : hbm feature gsc supported
*
* @fw_ver : FW versions
*
@@ -585,6 +592,7 @@ struct mei_device {
unsigned int hbm_f_vt_supported:1;
unsigned int hbm_f_cap_supported:1;
unsigned int hbm_f_cd_supported:1;
+ unsigned int hbm_f_gsc_supported:1;
struct mei_fw_version fw_ver[MEI_MAX_FW_VER_BLOCKS];
diff --git a/drivers/misc/mei/pxp/mei_pxp.c b/drivers/misc/mei/pxp/mei_pxp.c
index 5c39457e3f53..8dd09b1722eb 100644
--- a/drivers/misc/mei/pxp/mei_pxp.c
+++ b/drivers/misc/mei/pxp/mei_pxp.c
@@ -77,10 +77,35 @@ mei_pxp_receive_message(struct device *dev, void *buffer, size_t size)
return byte;
}
+/**
+ * mei_pxp_gsc_command() - sends a gsc command, by sending
+ * a sgl mei message to gsc and receiving reply from gsc
+ *
+ * @dev: device corresponding to the mei_cl_device
+ * @client_id: client id to send the command to
+ * @fence_id: fence id to send the command to
+ * @sg_in: scatter gather list containing addresses for rx message buffer
+ * @total_in_len: total length of data in 'in' sg, can be less than the sum of buffers sizes
+ * @sg_out: scatter gather list containing addresses for tx message buffer
+ *
+ * Return: bytes sent on Success, <0 on Failure
+ */
+static ssize_t mei_pxp_gsc_command(struct device *dev, u8 client_id, u32 fence_id,
+ struct scatterlist *sg_in, size_t total_in_len,
+ struct scatterlist *sg_out)
+{
+ struct mei_cl_device *cldev;
+
+ cldev = to_mei_cl_device(dev);
+
+ return mei_cldev_send_gsc_command(cldev, client_id, fence_id, sg_in, total_in_len, sg_out);
+}
+
static const struct i915_pxp_component_ops mei_pxp_ops = {
.owner = THIS_MODULE,
.send = mei_pxp_send_message,
.recv = mei_pxp_receive_message,
+ .gsc_command = mei_pxp_gsc_command,
};
static int mei_component_master_bind(struct device *dev)
@@ -131,17 +156,24 @@ static int mei_pxp_component_match(struct device *dev, int subcomponent,
{
struct device *base = data;
+ if (!dev)
+ return 0;
+
if (!dev->driver || strcmp(dev->driver->name, "i915") ||
subcomponent != I915_COMPONENT_PXP)
return 0;
base = base->parent;
- if (!base)
+ if (!base) /* mei device */
return 0;
- base = base->parent;
- dev = dev->parent;
+ base = base->parent; /* pci device */
+ /* for dgfx */
+ if (base && dev == base)
+ return 1;
+ /* for pch */
+ dev = dev->parent;
return (base && dev && dev == base);
}
diff --git a/include/drm/i915_pxp_tee_interface.h b/include/drm/i915_pxp_tee_interface.h
index af593ec64469..a702b6ec17f7 100644
--- a/include/drm/i915_pxp_tee_interface.h
+++ b/include/drm/i915_pxp_tee_interface.h
@@ -8,6 +8,7 @@
#include <linux/mutex.h>
#include <linux/device.h>
+struct scatterlist;
/**
* struct i915_pxp_component_ops - ops for PXP services.
@@ -23,6 +24,10 @@ struct i915_pxp_component_ops {
int (*send)(struct device *dev, const void *message, size_t size);
int (*recv)(struct device *dev, void *buffer, size_t size);
+ ssize_t (*gsc_command)(struct device *dev, u8 client_id, u32 fence_id,
+ struct scatterlist *sg_in, size_t total_in_len,
+ struct scatterlist *sg_out);
+
};
/**
diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h
index df1fab44ea5c..fd6e0620658d 100644
--- a/include/linux/mei_cl_bus.h
+++ b/include/linux/mei_cl_bus.h
@@ -11,6 +11,7 @@
struct mei_cl_device;
struct mei_device;
+struct scatterlist;
typedef void (*mei_cldev_cb_t)(struct mei_cl_device *cldev);
@@ -116,6 +117,11 @@ void mei_cldev_set_drvdata(struct mei_cl_device *cldev, void *data);
int mei_cldev_enable(struct mei_cl_device *cldev);
int mei_cldev_disable(struct mei_cl_device *cldev);
bool mei_cldev_enabled(const struct mei_cl_device *cldev);
+ssize_t mei_cldev_send_gsc_command(struct mei_cl_device *cldev,
+ u8 client_id, u32 fence_id,
+ struct scatterlist *sg_in,
+ size_t total_in_len,
+ struct scatterlist *sg_out);
void *mei_cldev_dma_map(struct mei_cl_device *cldev, u8 buffer_id, size_t size);
int mei_cldev_dma_unmap(struct mei_cl_device *cldev);
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 520ad2691a99..8df261c5ab9b 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -645,6 +645,22 @@ typedef struct drm_i915_irq_wait {
*/
#define I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP (1ul << 5)
+/*
+ * Query the status of HuC load.
+ *
+ * The query can fail in the following scenarios with the listed error codes:
+ * -ENODEV if HuC is not present on this platform,
+ * -EOPNOTSUPP if HuC firmware usage is disabled,
+ * -ENOPKG if HuC firmware fetch failed,
+ * -ENOEXEC if HuC firmware is invalid or mismatched,
+ * -ENOMEM if i915 failed to prepare the FW objects for transfer to the uC,
+ * -EIO if the FW transfer or the FW authentication failed.
+ *
+ * If the IOCTL is successful, the returned parameter will be set to one of the
+ * following values:
+ * * 0 if HuC firmware load is not complete,
+ * * 1 if HuC firmware is authenticated and running.
+ */
#define I915_PARAM_HUC_STATUS 42
/* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to opt-out of
@@ -749,6 +765,12 @@ typedef struct drm_i915_irq_wait {
/* Query if the kernel supports the I915_USERPTR_PROBE flag. */
#define I915_PARAM_HAS_USERPTR_PROBE 56
+/*
+ * Frequency of the timestamps in OA reports. This used to be the same as the CS
+ * timestamp frequency, but differs on some platforms.
+ */
+#define I915_PARAM_OA_TIMESTAMP_FREQUENCY 57
+
/* Must be kept compact -- no holes and well documented */
/**
@@ -2650,6 +2672,10 @@ enum drm_i915_oa_format {
I915_OA_FORMAT_A12_B8_C8,
I915_OA_FORMAT_A32u40_A4u32_B8_C8,
+ /* DG2 */
+ I915_OAR_FORMAT_A32u40_A4u32_B8_C8,
+ I915_OA_FORMAT_A24u40_A14u32_B8_C8,
+
I915_OA_FORMAT_MAX /* non-ABI */
};
@@ -3493,27 +3519,13 @@ struct drm_i915_gem_create_ext {
*
* The (page-aligned) allocated size for the object will be returned.
*
- * DG2 64K min page size implications:
- *
- * On discrete platforms, starting from DG2, we have to contend with GTT
- * page size restrictions when dealing with I915_MEMORY_CLASS_DEVICE
- * objects. Specifically the hardware only supports 64K or larger GTT
- * page sizes for such memory. The kernel will already ensure that all
- * I915_MEMORY_CLASS_DEVICE memory is allocated using 64K or larger page
- * sizes underneath.
- *
- * Note that the returned size here will always reflect any required
- * rounding up done by the kernel, i.e 4K will now become 64K on devices
- * such as DG2. The kernel will always select the largest minimum
- * page-size for the set of possible placements as the value to use when
- * rounding up the @size.
- *
- * Special DG2 GTT address alignment requirement:
- *
- * The GTT alignment will also need to be at least 2M for such objects.
+ * On platforms like DG2/ATS the kernel will always use 64K or larger
+ * pages for I915_MEMORY_CLASS_DEVICE. The kernel also requires a
+ * minimum of 64K GTT alignment for such objects.
*
- * Note that due to how the hardware implements 64K GTT page support, we
- * have some further complications:
+ * NOTE: Previously the ABI here required a minimum GTT alignment of 2M
+ * on DG2/ATS, due to how the hardware implemented 64K GTT page support,
+ * where we had the following complications:
*
* 1) The entire PDE (which covers a 2MB virtual address range), must
* contain only 64K PTEs, i.e mixing 4K and 64K PTEs in the same
@@ -3522,12 +3534,10 @@ struct drm_i915_gem_create_ext {
* 2) We still need to support 4K PTEs for I915_MEMORY_CLASS_SYSTEM
* objects.
*
- * To keep things simple for userland, we mandate that any GTT mappings
- * must be aligned to and rounded up to 2MB. The kernel will internally
- * pad them out to the next 2MB boundary. As this only wastes virtual
- * address space and avoids userland having to copy any needlessly
- * complicated PDE sharing scheme (coloring) and only affects DG2, this
- * is deemed to be a good compromise.
+ * However on actual production HW this was completely changed to now
+ * allow setting a TLB hint at the PTE level (see PS64), which is a lot
+ * more flexible than the above. With this the 2M restriction was
+ * dropped where we now only require 64K.
*/
__u64 size;